2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
54 TASK_NOTHING, /* have to be the last element */
59 enum task_position tp;
61 struct task_info *info;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
81 enum btrfs_check_mode {
85 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
92 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94 struct data_backref *back1 = to_data_backref(ext1);
95 struct data_backref *back2 = to_data_backref(ext2);
97 WARN_ON(!ext1->is_data);
98 WARN_ON(!ext2->is_data);
100 /* parent and root are a union, so this covers both */
101 if (back1->parent > back2->parent)
103 if (back1->parent < back2->parent)
106 /* This is a full backref and the parents match. */
107 if (back1->node.full_backref)
110 if (back1->owner > back2->owner)
112 if (back1->owner < back2->owner)
115 if (back1->offset > back2->offset)
117 if (back1->offset < back2->offset)
120 if (back1->found_ref && back2->found_ref) {
121 if (back1->disk_bytenr > back2->disk_bytenr)
123 if (back1->disk_bytenr < back2->disk_bytenr)
126 if (back1->bytes > back2->bytes)
128 if (back1->bytes < back2->bytes)
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
137 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139 struct tree_backref *back1 = to_tree_backref(ext1);
140 struct tree_backref *back2 = to_tree_backref(ext2);
142 WARN_ON(ext1->is_data);
143 WARN_ON(ext2->is_data);
145 /* parent and root are a union, so this covers both */
146 if (back1->parent > back2->parent)
148 if (back1->parent < back2->parent)
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
156 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159 if (ext1->is_data > ext2->is_data)
162 if (ext1->is_data < ext2->is_data)
165 if (ext1->full_backref > ext2->full_backref)
167 if (ext1->full_backref < ext2->full_backref)
171 return compare_data_backref(node1, node2);
173 return compare_tree_backref(node1, node2);
177 static void *print_status_check(void *p)
179 struct task_ctx *priv = p;
180 const char work_indicator[] = { '.', 'o', 'O', 'o' };
182 static char *task_position_string[] = {
184 "checking free space cache",
188 task_period_start(priv->info, 1000 /* 1s */);
190 if (priv->tp == TASK_NOTHING)
194 printf("%s [%c]\r", task_position_string[priv->tp],
195 work_indicator[count % 4]);
198 task_period_wait(priv->info);
203 static int print_status_return(void *p)
211 static enum btrfs_check_mode parse_check_mode(const char *str)
213 if (strcmp(str, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM;
215 if (strcmp(str, "orig") == 0)
216 return CHECK_MODE_ORIGINAL;
217 if (strcmp(str, "original") == 0)
218 return CHECK_MODE_ORIGINAL;
220 return CHECK_MODE_UNKNOWN;
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
226 struct file_extent_hole *hole;
228 if (RB_EMPTY_ROOT(holes))
231 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
237 struct file_extent_hole *hole1;
238 struct file_extent_hole *hole2;
240 hole1 = rb_entry(node1, struct file_extent_hole, node);
241 hole2 = rb_entry(node2, struct file_extent_hole, node);
243 if (hole1->start > hole2->start)
245 if (hole1->start < hole2->start)
247 /* Now hole1->start == hole2->start */
248 if (hole1->len >= hole2->len)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
254 /* Hole 2 will be merge center */
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root *holes,
267 struct file_extent_hole *hole;
268 struct file_extent_hole *prev = NULL;
269 struct file_extent_hole *next = NULL;
271 hole = malloc(sizeof(*hole));
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes, &hole->node, compare_hole);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole->node))
281 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
283 if (prev && prev->start + prev->len >= hole->start) {
284 hole->len = hole->start + hole->len - prev->start;
285 hole->start = prev->start;
286 rb_erase(&prev->node, holes);
291 /* iterate merge with next holes */
293 if (!rb_next(&hole->node))
295 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
297 if (hole->start + hole->len >= next->start) {
298 if (hole->start + hole->len <= next->start + next->len)
299 hole->len = next->start + next->len -
301 rb_erase(&next->node, holes);
310 static int compare_hole_range(struct rb_node *node, void *data)
312 struct file_extent_hole *hole;
315 hole = (struct file_extent_hole *)data;
318 hole = rb_entry(node, struct file_extent_hole, node);
319 if (start < hole->start)
321 if (start >= hole->start && start < hole->start + hole->len)
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root *holes,
334 struct file_extent_hole *hole;
335 struct file_extent_hole tmp;
340 struct rb_node *node;
347 node = rb_search(holes, &tmp, compare_hole_range, NULL);
350 hole = rb_entry(node, struct file_extent_hole, node);
351 if (start + len > hole->start + hole->len)
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start > hole->start) {
359 prev_start = hole->start;
360 prev_len = start - hole->start;
363 if (hole->start + hole->len > start + len) {
364 next_start = start + len;
365 next_len = hole->start + hole->len - start - len;
368 rb_erase(node, holes);
371 ret = add_file_extent_hole(holes, prev_start, prev_len);
376 ret = add_file_extent_hole(holes, next_start, next_len);
383 static int copy_file_extent_holes(struct rb_root *dst,
386 struct file_extent_hole *hole;
387 struct rb_node *node;
390 node = rb_first(src);
392 hole = rb_entry(node, struct file_extent_hole, node);
393 ret = add_file_extent_hole(dst, hole->start, hole->len);
396 node = rb_next(node);
401 static void free_file_extent_holes(struct rb_root *holes)
403 struct rb_node *node;
404 struct file_extent_hole *hole;
406 node = rb_first(holes);
408 hole = rb_entry(node, struct file_extent_hole, node);
409 rb_erase(node, holes);
411 node = rb_first(holes);
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root)
420 if (root->last_trans != trans->transid) {
421 root->track_dirty = 1;
422 root->last_trans = trans->transid;
423 root->commit_root = root->node;
424 extent_buffer_get(root->node);
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
430 struct device_record *rec1;
431 struct device_record *rec2;
433 rec1 = rb_entry(node1, struct device_record, node);
434 rec2 = rb_entry(node2, struct device_record, node);
435 if (rec1->devid > rec2->devid)
437 else if (rec1->devid < rec2->devid)
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
445 struct inode_record *rec;
446 struct inode_backref *backref;
447 struct inode_backref *orig;
448 struct inode_backref *tmp;
449 struct orphan_data_extent *src_orphan;
450 struct orphan_data_extent *dst_orphan;
455 rec = malloc(sizeof(*rec));
457 return ERR_PTR(-ENOMEM);
458 memcpy(rec, orig_rec, sizeof(*rec));
460 INIT_LIST_HEAD(&rec->backrefs);
461 INIT_LIST_HEAD(&rec->orphan_extents);
462 rec->holes = RB_ROOT;
464 list_for_each_entry(orig, &orig_rec->backrefs, list) {
465 size = sizeof(*orig) + orig->namelen + 1;
466 backref = malloc(size);
471 memcpy(backref, orig, size);
472 list_add_tail(&backref->list, &rec->backrefs);
474 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475 dst_orphan = malloc(sizeof(*dst_orphan));
480 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
483 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
490 rb = rb_first(&rec->holes);
492 struct file_extent_hole *hole;
494 hole = rb_entry(rb, struct file_extent_hole, node);
500 if (!list_empty(&rec->backrefs))
501 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502 list_del(&orig->list);
506 if (!list_empty(&rec->orphan_extents))
507 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508 list_del(&orig->list);
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
520 struct orphan_data_extent *orphan;
522 if (list_empty(orphan_extents))
524 printf("The following data extent is lost in tree %llu:\n",
526 list_for_each_entry(orphan, orphan_extents, list) {
527 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528 orphan->objectid, orphan->offset, orphan->disk_bytenr,
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
535 u64 root_objectid = root->root_key.objectid;
536 int errors = rec->errors;
540 /* reloc root errors, we print its corresponding fs root objectid*/
541 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542 root_objectid = root->root_key.offset;
543 fprintf(stderr, "reloc");
545 fprintf(stderr, "root %llu inode %llu errors %x",
546 (unsigned long long) root_objectid,
547 (unsigned long long) rec->ino, rec->errors);
549 if (errors & I_ERR_NO_INODE_ITEM)
550 fprintf(stderr, ", no inode item");
551 if (errors & I_ERR_NO_ORPHAN_ITEM)
552 fprintf(stderr, ", no orphan item");
553 if (errors & I_ERR_DUP_INODE_ITEM)
554 fprintf(stderr, ", dup inode item");
555 if (errors & I_ERR_DUP_DIR_INDEX)
556 fprintf(stderr, ", dup dir index");
557 if (errors & I_ERR_ODD_DIR_ITEM)
558 fprintf(stderr, ", odd dir item");
559 if (errors & I_ERR_ODD_FILE_EXTENT)
560 fprintf(stderr, ", odd file extent");
561 if (errors & I_ERR_BAD_FILE_EXTENT)
562 fprintf(stderr, ", bad file extent");
563 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564 fprintf(stderr, ", file extent overlap");
565 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566 fprintf(stderr, ", file extent discount");
567 if (errors & I_ERR_DIR_ISIZE_WRONG)
568 fprintf(stderr, ", dir isize wrong");
569 if (errors & I_ERR_FILE_NBYTES_WRONG)
570 fprintf(stderr, ", nbytes wrong");
571 if (errors & I_ERR_ODD_CSUM_ITEM)
572 fprintf(stderr, ", odd csum item");
573 if (errors & I_ERR_SOME_CSUM_MISSING)
574 fprintf(stderr, ", some csum missing");
575 if (errors & I_ERR_LINK_COUNT_WRONG)
576 fprintf(stderr, ", link count wrong");
577 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578 fprintf(stderr, ", orphan file extent");
579 fprintf(stderr, "\n");
580 /* Print the orphan extents if needed */
581 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
584 /* Print the holes if needed */
585 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586 struct file_extent_hole *hole;
587 struct rb_node *node;
590 node = rb_first(&rec->holes);
591 fprintf(stderr, "Found file extent holes:\n");
594 hole = rb_entry(node, struct file_extent_hole, node);
595 fprintf(stderr, "\tstart: %llu, len: %llu\n",
596 hole->start, hole->len);
597 node = rb_next(node);
600 fprintf(stderr, "\tstart: 0, len: %llu\n",
602 root->fs_info->sectorsize));
606 static void print_ref_error(int errors)
608 if (errors & REF_ERR_NO_DIR_ITEM)
609 fprintf(stderr, ", no dir item");
610 if (errors & REF_ERR_NO_DIR_INDEX)
611 fprintf(stderr, ", no dir index");
612 if (errors & REF_ERR_NO_INODE_REF)
613 fprintf(stderr, ", no inode ref");
614 if (errors & REF_ERR_DUP_DIR_ITEM)
615 fprintf(stderr, ", dup dir item");
616 if (errors & REF_ERR_DUP_DIR_INDEX)
617 fprintf(stderr, ", dup dir index");
618 if (errors & REF_ERR_DUP_INODE_REF)
619 fprintf(stderr, ", dup inode ref");
620 if (errors & REF_ERR_INDEX_UNMATCH)
621 fprintf(stderr, ", index mismatch");
622 if (errors & REF_ERR_FILETYPE_UNMATCH)
623 fprintf(stderr, ", filetype mismatch");
624 if (errors & REF_ERR_NAME_TOO_LONG)
625 fprintf(stderr, ", name too long");
626 if (errors & REF_ERR_NO_ROOT_REF)
627 fprintf(stderr, ", no root ref");
628 if (errors & REF_ERR_NO_ROOT_BACKREF)
629 fprintf(stderr, ", no root backref");
630 if (errors & REF_ERR_DUP_ROOT_REF)
631 fprintf(stderr, ", dup root ref");
632 if (errors & REF_ERR_DUP_ROOT_BACKREF)
633 fprintf(stderr, ", dup root backref");
634 fprintf(stderr, "\n");
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
640 struct ptr_node *node;
641 struct cache_extent *cache;
642 struct inode_record *rec = NULL;
645 cache = lookup_cache_extent(inode_cache, ino, 1);
647 node = container_of(cache, struct ptr_node, cache);
649 if (mod && rec->refs > 1) {
650 node->data = clone_inode_rec(rec);
651 if (IS_ERR(node->data))
657 rec = calloc(1, sizeof(*rec));
659 return ERR_PTR(-ENOMEM);
661 rec->extent_start = (u64)-1;
663 INIT_LIST_HEAD(&rec->backrefs);
664 INIT_LIST_HEAD(&rec->orphan_extents);
665 rec->holes = RB_ROOT;
667 node = malloc(sizeof(*node));
670 return ERR_PTR(-ENOMEM);
672 node->cache.start = ino;
673 node->cache.size = 1;
676 if (ino == BTRFS_FREE_INO_OBJECTID)
679 ret = insert_cache_extent(inode_cache, &node->cache);
681 return ERR_PTR(-EEXIST);
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
688 struct orphan_data_extent *orphan;
690 while (!list_empty(orphan_extents)) {
691 orphan = list_entry(orphan_extents->next,
692 struct orphan_data_extent, list);
693 list_del(&orphan->list);
698 static void free_inode_rec(struct inode_record *rec)
700 struct inode_backref *backref;
705 while (!list_empty(&rec->backrefs)) {
706 backref = to_inode_backref(rec->backrefs.next);
707 list_del(&backref->list);
710 free_orphan_data_extents(&rec->orphan_extents);
711 free_file_extent_holes(&rec->holes);
715 static int can_free_inode_rec(struct inode_record *rec)
717 if (!rec->errors && rec->checked && rec->found_inode_item &&
718 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724 struct inode_record *rec)
726 struct cache_extent *cache;
727 struct inode_backref *tmp, *backref;
728 struct ptr_node *node;
731 if (!rec->found_inode_item)
734 filetype = imode_to_type(rec->imode);
735 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736 if (backref->found_dir_item && backref->found_dir_index) {
737 if (backref->filetype != filetype)
738 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739 if (!backref->errors && backref->found_inode_ref &&
740 rec->nlink == rec->found_link) {
741 list_del(&backref->list);
747 if (!rec->checked || rec->merging)
750 if (S_ISDIR(rec->imode)) {
751 if (rec->found_size != rec->isize)
752 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753 if (rec->found_file_extent)
754 rec->errors |= I_ERR_ODD_FILE_EXTENT;
755 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756 if (rec->found_dir_item)
757 rec->errors |= I_ERR_ODD_DIR_ITEM;
758 if (rec->found_size != rec->nbytes)
759 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760 if (rec->nlink > 0 && !no_holes &&
761 (rec->extent_end < rec->isize ||
762 first_extent_gap(&rec->holes) < rec->isize))
763 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
766 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767 if (rec->found_csum_item && rec->nodatasum)
768 rec->errors |= I_ERR_ODD_CSUM_ITEM;
769 if (rec->some_csum_missing && !rec->nodatasum)
770 rec->errors |= I_ERR_SOME_CSUM_MISSING;
773 BUG_ON(rec->refs != 1);
774 if (can_free_inode_rec(rec)) {
775 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776 node = container_of(cache, struct ptr_node, cache);
777 BUG_ON(node->data != rec);
778 remove_cache_extent(inode_cache, &node->cache);
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
786 struct btrfs_path path;
787 struct btrfs_key key;
790 key.objectid = BTRFS_ORPHAN_OBJECTID;
791 key.type = BTRFS_ORPHAN_ITEM_KEY;
794 btrfs_init_path(&path);
795 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796 btrfs_release_path(&path);
802 static int process_inode_item(struct extent_buffer *eb,
803 int slot, struct btrfs_key *key,
804 struct shared_node *active_node)
806 struct inode_record *rec;
807 struct btrfs_inode_item *item;
809 rec = active_node->current;
810 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811 if (rec->found_inode_item) {
812 rec->errors |= I_ERR_DUP_INODE_ITEM;
815 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816 rec->nlink = btrfs_inode_nlink(eb, item);
817 rec->isize = btrfs_inode_size(eb, item);
818 rec->nbytes = btrfs_inode_nbytes(eb, item);
819 rec->imode = btrfs_inode_mode(eb, item);
820 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
822 rec->found_inode_item = 1;
824 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825 maybe_free_inode_rec(&active_node->inode_cache, rec);
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
831 int namelen, u64 dir)
833 struct inode_backref *backref;
835 list_for_each_entry(backref, &rec->backrefs, list) {
836 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
838 if (backref->dir != dir || backref->namelen != namelen)
840 if (memcmp(name, backref->name, namelen))
845 backref = malloc(sizeof(*backref) + namelen + 1);
848 memset(backref, 0, sizeof(*backref));
850 backref->namelen = namelen;
851 memcpy(backref->name, name, namelen);
852 backref->name[namelen] = '\0';
853 list_add_tail(&backref->list, &rec->backrefs);
857 static int add_inode_backref(struct cache_tree *inode_cache,
858 u64 ino, u64 dir, u64 index,
859 const char *name, int namelen,
860 u8 filetype, u8 itemtype, int errors)
862 struct inode_record *rec;
863 struct inode_backref *backref;
865 rec = get_inode_rec(inode_cache, ino, 1);
867 backref = get_inode_backref(rec, name, namelen, dir);
870 backref->errors |= errors;
871 if (itemtype == BTRFS_DIR_INDEX_KEY) {
872 if (backref->found_dir_index)
873 backref->errors |= REF_ERR_DUP_DIR_INDEX;
874 if (backref->found_inode_ref && backref->index != index)
875 backref->errors |= REF_ERR_INDEX_UNMATCH;
876 if (backref->found_dir_item && backref->filetype != filetype)
877 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
879 backref->index = index;
880 backref->filetype = filetype;
881 backref->found_dir_index = 1;
882 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
884 if (backref->found_dir_item)
885 backref->errors |= REF_ERR_DUP_DIR_ITEM;
886 if (backref->found_dir_index && backref->filetype != filetype)
887 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
889 backref->filetype = filetype;
890 backref->found_dir_item = 1;
891 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893 if (backref->found_inode_ref)
894 backref->errors |= REF_ERR_DUP_INODE_REF;
895 if (backref->found_dir_index && backref->index != index)
896 backref->errors |= REF_ERR_INDEX_UNMATCH;
898 backref->index = index;
900 backref->ref_type = itemtype;
901 backref->found_inode_ref = 1;
906 maybe_free_inode_rec(inode_cache, rec);
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911 struct cache_tree *dst_cache)
913 struct inode_backref *backref;
918 list_for_each_entry(backref, &src->backrefs, list) {
919 if (backref->found_dir_index) {
920 add_inode_backref(dst_cache, dst->ino, backref->dir,
921 backref->index, backref->name,
922 backref->namelen, backref->filetype,
923 BTRFS_DIR_INDEX_KEY, backref->errors);
925 if (backref->found_dir_item) {
927 add_inode_backref(dst_cache, dst->ino,
928 backref->dir, 0, backref->name,
929 backref->namelen, backref->filetype,
930 BTRFS_DIR_ITEM_KEY, backref->errors);
932 if (backref->found_inode_ref) {
933 add_inode_backref(dst_cache, dst->ino,
934 backref->dir, backref->index,
935 backref->name, backref->namelen, 0,
936 backref->ref_type, backref->errors);
940 if (src->found_dir_item)
941 dst->found_dir_item = 1;
942 if (src->found_file_extent)
943 dst->found_file_extent = 1;
944 if (src->found_csum_item)
945 dst->found_csum_item = 1;
946 if (src->some_csum_missing)
947 dst->some_csum_missing = 1;
948 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949 ret = copy_file_extent_holes(&dst->holes, &src->holes);
954 BUG_ON(src->found_link < dir_count);
955 dst->found_link += src->found_link - dir_count;
956 dst->found_size += src->found_size;
957 if (src->extent_start != (u64)-1) {
958 if (dst->extent_start == (u64)-1) {
959 dst->extent_start = src->extent_start;
960 dst->extent_end = src->extent_end;
962 if (dst->extent_end > src->extent_start)
963 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964 else if (dst->extent_end < src->extent_start) {
965 ret = add_file_extent_hole(&dst->holes,
967 src->extent_start - dst->extent_end);
969 if (dst->extent_end < src->extent_end)
970 dst->extent_end = src->extent_end;
974 dst->errors |= src->errors;
975 if (src->found_inode_item) {
976 if (!dst->found_inode_item) {
977 dst->nlink = src->nlink;
978 dst->isize = src->isize;
979 dst->nbytes = src->nbytes;
980 dst->imode = src->imode;
981 dst->nodatasum = src->nodatasum;
982 dst->found_inode_item = 1;
984 dst->errors |= I_ERR_DUP_INODE_ITEM;
992 static int splice_shared_node(struct shared_node *src_node,
993 struct shared_node *dst_node)
995 struct cache_extent *cache;
996 struct ptr_node *node, *ins;
997 struct cache_tree *src, *dst;
998 struct inode_record *rec, *conflict;
1003 if (--src_node->refs == 0)
1005 if (src_node->current)
1006 current_ino = src_node->current->ino;
1008 src = &src_node->root_cache;
1009 dst = &dst_node->root_cache;
1011 cache = search_cache_extent(src, 0);
1013 node = container_of(cache, struct ptr_node, cache);
1015 cache = next_cache_extent(cache);
1018 remove_cache_extent(src, &node->cache);
1021 ins = malloc(sizeof(*ins));
1023 ins->cache.start = node->cache.start;
1024 ins->cache.size = node->cache.size;
1028 ret = insert_cache_extent(dst, &ins->cache);
1029 if (ret == -EEXIST) {
1030 conflict = get_inode_rec(dst, rec->ino, 1);
1031 BUG_ON(IS_ERR(conflict));
1032 merge_inode_recs(rec, conflict, dst);
1034 conflict->checked = 1;
1035 if (dst_node->current == conflict)
1036 dst_node->current = NULL;
1038 maybe_free_inode_rec(dst, conflict);
1039 free_inode_rec(rec);
1046 if (src == &src_node->root_cache) {
1047 src = &src_node->inode_cache;
1048 dst = &dst_node->inode_cache;
1052 if (current_ino > 0 && (!dst_node->current ||
1053 current_ino > dst_node->current->ino)) {
1054 if (dst_node->current) {
1055 dst_node->current->checked = 1;
1056 maybe_free_inode_rec(dst, dst_node->current);
1058 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059 BUG_ON(IS_ERR(dst_node->current));
1064 static void free_inode_ptr(struct cache_extent *cache)
1066 struct ptr_node *node;
1067 struct inode_record *rec;
1069 node = container_of(cache, struct ptr_node, cache);
1071 free_inode_rec(rec);
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1080 struct cache_extent *cache;
1081 struct shared_node *node;
1083 cache = lookup_cache_extent(shared, bytenr, 1);
1085 node = container_of(cache, struct shared_node, cache);
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1094 struct shared_node *node;
1096 node = calloc(1, sizeof(*node));
1099 node->cache.start = bytenr;
1100 node->cache.size = 1;
1101 cache_tree_init(&node->root_cache);
1102 cache_tree_init(&node->inode_cache);
1105 ret = insert_cache_extent(shared, &node->cache);
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111 struct walk_control *wc, int level)
1113 struct shared_node *node;
1114 struct shared_node *dest;
1117 if (level == wc->active_node)
1120 BUG_ON(wc->active_node <= level);
1121 node = find_shared_node(&wc->shared, bytenr);
1123 ret = add_shared_node(&wc->shared, bytenr, refs);
1125 node = find_shared_node(&wc->shared, bytenr);
1126 wc->nodes[level] = node;
1127 wc->active_node = level;
1131 if (wc->root_level == wc->active_node &&
1132 btrfs_root_refs(&root->root_item) == 0) {
1133 if (--node->refs == 0) {
1134 free_inode_recs_tree(&node->root_cache);
1135 free_inode_recs_tree(&node->inode_cache);
1136 remove_cache_extent(&wc->shared, &node->cache);
1142 dest = wc->nodes[wc->active_node];
1143 splice_shared_node(node, dest);
1144 if (node->refs == 0) {
1145 remove_cache_extent(&wc->shared, &node->cache);
1151 static int leave_shared_node(struct btrfs_root *root,
1152 struct walk_control *wc, int level)
1154 struct shared_node *node;
1155 struct shared_node *dest;
1158 if (level == wc->root_level)
1161 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1165 BUG_ON(i >= BTRFS_MAX_LEVEL);
1167 node = wc->nodes[wc->active_node];
1168 wc->nodes[wc->active_node] = NULL;
1169 wc->active_node = i;
1171 dest = wc->nodes[wc->active_node];
1172 if (wc->active_node < wc->root_level ||
1173 btrfs_root_refs(&root->root_item) > 0) {
1174 BUG_ON(node->refs <= 1);
1175 splice_shared_node(node, dest);
1177 BUG_ON(node->refs < 2);
1186 * 1 - if the root with id child_root_id is a child of root parent_root_id
1187 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1188 * has other root(s) as parent(s)
1189 * 2 - if the root child_root_id doesn't have any parent roots
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1194 struct btrfs_path path;
1195 struct btrfs_key key;
1196 struct extent_buffer *leaf;
1200 btrfs_init_path(&path);
1202 key.objectid = parent_root_id;
1203 key.type = BTRFS_ROOT_REF_KEY;
1204 key.offset = child_root_id;
1205 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1209 btrfs_release_path(&path);
1213 key.objectid = child_root_id;
1214 key.type = BTRFS_ROOT_BACKREF_KEY;
1216 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1222 leaf = path.nodes[0];
1223 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1227 leaf = path.nodes[0];
1230 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231 if (key.objectid != child_root_id ||
1232 key.type != BTRFS_ROOT_BACKREF_KEY)
1237 if (key.offset == parent_root_id) {
1238 btrfs_release_path(&path);
1245 btrfs_release_path(&path);
1248 return has_parent ? 0 : 2;
1251 static int process_dir_item(struct extent_buffer *eb,
1252 int slot, struct btrfs_key *key,
1253 struct shared_node *active_node)
1263 struct btrfs_dir_item *di;
1264 struct inode_record *rec;
1265 struct cache_tree *root_cache;
1266 struct cache_tree *inode_cache;
1267 struct btrfs_key location;
1268 char namebuf[BTRFS_NAME_LEN];
1270 root_cache = &active_node->root_cache;
1271 inode_cache = &active_node->inode_cache;
1272 rec = active_node->current;
1273 rec->found_dir_item = 1;
1275 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276 total = btrfs_item_size_nr(eb, slot);
1277 while (cur < total) {
1279 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280 name_len = btrfs_dir_name_len(eb, di);
1281 data_len = btrfs_dir_data_len(eb, di);
1282 filetype = btrfs_dir_type(eb, di);
1284 rec->found_size += name_len;
1285 if (cur + sizeof(*di) + name_len > total ||
1286 name_len > BTRFS_NAME_LEN) {
1287 error = REF_ERR_NAME_TOO_LONG;
1289 if (cur + sizeof(*di) > total)
1291 len = min_t(u32, total - cur - sizeof(*di),
1298 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1300 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301 key->offset != btrfs_name_hash(namebuf, len)) {
1302 rec->errors |= I_ERR_ODD_DIR_ITEM;
1303 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304 key->objectid, key->offset, namebuf, len, filetype,
1305 key->offset, btrfs_name_hash(namebuf, len));
1308 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309 add_inode_backref(inode_cache, location.objectid,
1310 key->objectid, key->offset, namebuf,
1311 len, filetype, key->type, error);
1312 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313 add_inode_backref(root_cache, location.objectid,
1314 key->objectid, key->offset,
1315 namebuf, len, filetype,
1319 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320 location.type, key->objectid, key->offset);
1321 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322 key->objectid, key->offset, namebuf,
1323 len, filetype, key->type, error);
1326 len = sizeof(*di) + name_len + data_len;
1327 di = (struct btrfs_dir_item *)((char *)di + len);
1330 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331 rec->errors |= I_ERR_DUP_DIR_INDEX;
1336 static int process_inode_ref(struct extent_buffer *eb,
1337 int slot, struct btrfs_key *key,
1338 struct shared_node *active_node)
1346 struct cache_tree *inode_cache;
1347 struct btrfs_inode_ref *ref;
1348 char namebuf[BTRFS_NAME_LEN];
1350 inode_cache = &active_node->inode_cache;
1352 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353 total = btrfs_item_size_nr(eb, slot);
1354 while (cur < total) {
1355 name_len = btrfs_inode_ref_name_len(eb, ref);
1356 index = btrfs_inode_ref_index(eb, ref);
1358 /* inode_ref + namelen should not cross item boundary */
1359 if (cur + sizeof(*ref) + name_len > total ||
1360 name_len > BTRFS_NAME_LEN) {
1361 if (total < cur + sizeof(*ref))
1364 /* Still try to read out the remaining part */
1365 len = min_t(u32, total - cur - sizeof(*ref),
1367 error = REF_ERR_NAME_TOO_LONG;
1373 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374 add_inode_backref(inode_cache, key->objectid, key->offset,
1375 index, namebuf, len, 0, key->type, error);
1377 len = sizeof(*ref) + name_len;
1378 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1384 static int process_inode_extref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1395 struct cache_tree *inode_cache;
1396 struct btrfs_inode_extref *extref;
1397 char namebuf[BTRFS_NAME_LEN];
1399 inode_cache = &active_node->inode_cache;
1401 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402 total = btrfs_item_size_nr(eb, slot);
1403 while (cur < total) {
1404 name_len = btrfs_inode_extref_name_len(eb, extref);
1405 index = btrfs_inode_extref_index(eb, extref);
1406 parent = btrfs_inode_extref_parent(eb, extref);
1407 if (name_len <= BTRFS_NAME_LEN) {
1411 len = BTRFS_NAME_LEN;
1412 error = REF_ERR_NAME_TOO_LONG;
1414 read_extent_buffer(eb, namebuf,
1415 (unsigned long)(extref + 1), len);
1416 add_inode_backref(inode_cache, key->objectid, parent,
1417 index, namebuf, len, 0, key->type, error);
1419 len = sizeof(*extref) + name_len;
1420 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1427 static int process_file_extent(struct btrfs_root *root,
1428 struct extent_buffer *eb,
1429 int slot, struct btrfs_key *key,
1430 struct shared_node *active_node)
1432 struct inode_record *rec;
1433 struct btrfs_file_extent_item *fi;
1435 u64 disk_bytenr = 0;
1436 u64 extent_offset = 0;
1437 u64 mask = root->fs_info->sectorsize - 1;
1441 rec = active_node->current;
1442 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1443 rec->found_file_extent = 1;
1445 if (rec->extent_start == (u64)-1) {
1446 rec->extent_start = key->offset;
1447 rec->extent_end = key->offset;
1450 if (rec->extent_end > key->offset)
1451 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1452 else if (rec->extent_end < key->offset) {
1453 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1454 key->offset - rec->extent_end);
1459 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1460 extent_type = btrfs_file_extent_type(eb, fi);
1462 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1463 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1465 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1466 rec->found_size += num_bytes;
1467 num_bytes = (num_bytes + mask) & ~mask;
1468 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1469 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1470 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1471 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1472 extent_offset = btrfs_file_extent_offset(eb, fi);
1473 if (num_bytes == 0 || (num_bytes & mask))
1474 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1475 if (num_bytes + extent_offset >
1476 btrfs_file_extent_ram_bytes(eb, fi))
1477 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1478 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1479 (btrfs_file_extent_compression(eb, fi) ||
1480 btrfs_file_extent_encryption(eb, fi) ||
1481 btrfs_file_extent_other_encoding(eb, fi)))
1482 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1483 if (disk_bytenr > 0)
1484 rec->found_size += num_bytes;
1486 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1488 rec->extent_end = key->offset + num_bytes;
1491 * The data reloc tree will copy full extents into its inode and then
1492 * copy the corresponding csums. Because the extent it copied could be
1493 * a preallocated extent that hasn't been written to yet there may be no
1494 * csums to copy, ergo we won't have csums for our file extent. This is
1495 * ok so just don't bother checking csums if the inode belongs to the
1498 if (disk_bytenr > 0 &&
1499 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1501 if (btrfs_file_extent_compression(eb, fi))
1502 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1504 disk_bytenr += extent_offset;
1506 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1510 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1512 rec->found_csum_item = 1;
1513 if (found < num_bytes)
1514 rec->some_csum_missing = 1;
1515 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1517 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1523 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1524 struct walk_control *wc)
1526 struct btrfs_key key;
1530 struct cache_tree *inode_cache;
1531 struct shared_node *active_node;
1533 if (wc->root_level == wc->active_node &&
1534 btrfs_root_refs(&root->root_item) == 0)
1537 active_node = wc->nodes[wc->active_node];
1538 inode_cache = &active_node->inode_cache;
1539 nritems = btrfs_header_nritems(eb);
1540 for (i = 0; i < nritems; i++) {
1541 btrfs_item_key_to_cpu(eb, &key, i);
1543 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1545 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1548 if (active_node->current == NULL ||
1549 active_node->current->ino < key.objectid) {
1550 if (active_node->current) {
1551 active_node->current->checked = 1;
1552 maybe_free_inode_rec(inode_cache,
1553 active_node->current);
1555 active_node->current = get_inode_rec(inode_cache,
1557 BUG_ON(IS_ERR(active_node->current));
1560 case BTRFS_DIR_ITEM_KEY:
1561 case BTRFS_DIR_INDEX_KEY:
1562 ret = process_dir_item(eb, i, &key, active_node);
1564 case BTRFS_INODE_REF_KEY:
1565 ret = process_inode_ref(eb, i, &key, active_node);
1567 case BTRFS_INODE_EXTREF_KEY:
1568 ret = process_inode_extref(eb, i, &key, active_node);
1570 case BTRFS_INODE_ITEM_KEY:
1571 ret = process_inode_item(eb, i, &key, active_node);
1573 case BTRFS_EXTENT_DATA_KEY:
1574 ret = process_file_extent(root, eb, i, &key,
1584 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1585 struct extent_buffer *eb, struct node_refs *nrefs,
1586 u64 level, int check_all);
1587 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1588 unsigned int ext_ref);
1591 * Returns >0 Found error, not fatal, should continue
1592 * Returns <0 Fatal error, must exit the whole check
1593 * Returns 0 No errors found
1595 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1596 struct node_refs *nrefs, int *level, int ext_ref)
1598 struct extent_buffer *cur = path->nodes[0];
1599 struct btrfs_key key;
1603 int root_level = btrfs_header_level(root->node);
1605 int ret = 0; /* Final return value */
1606 int err = 0; /* Positive error bitmap */
1608 cur_bytenr = cur->start;
1610 /* skip to first inode item or the first inode number change */
1611 nritems = btrfs_header_nritems(cur);
1612 for (i = 0; i < nritems; i++) {
1613 btrfs_item_key_to_cpu(cur, &key, i);
1615 first_ino = key.objectid;
1616 if (key.type == BTRFS_INODE_ITEM_KEY ||
1617 (first_ino && first_ino != key.objectid))
1621 path->slots[0] = nritems;
1627 err |= check_inode_item(root, path, ext_ref);
1629 /* modify cur since check_inode_item may change path */
1630 cur = path->nodes[0];
1632 if (err & LAST_ITEM)
1635 /* still have inode items in thie leaf */
1636 if (cur->start == cur_bytenr)
1640 * we have switched to another leaf, above nodes may
1641 * have changed, here walk down the path, if a node
1642 * or leaf is shared, check whether we can skip this
1645 for (i = root_level; i >= 0; i--) {
1646 if (path->nodes[i]->start == nrefs->bytenr[i])
1649 ret = update_nodes_refs(root, path->nodes[i]->start,
1650 path->nodes[i], nrefs, i, 0);
1654 if (!nrefs->need_check[i]) {
1660 for (i = 0; i < *level; i++) {
1661 free_extent_buffer(path->nodes[i]);
1662 path->nodes[i] = NULL;
1671 static void reada_walk_down(struct btrfs_root *root,
1672 struct extent_buffer *node, int slot)
1674 struct btrfs_fs_info *fs_info = root->fs_info;
1681 level = btrfs_header_level(node);
1685 nritems = btrfs_header_nritems(node);
1686 for (i = slot; i < nritems; i++) {
1687 bytenr = btrfs_node_blockptr(node, i);
1688 ptr_gen = btrfs_node_ptr_generation(node, i);
1689 readahead_tree_block(fs_info, bytenr, ptr_gen);
1694 * Check the child node/leaf by the following condition:
1695 * 1. the first item key of the node/leaf should be the same with the one
1697 * 2. block in parent node should match the child node/leaf.
1698 * 3. generation of parent node and child's header should be consistent.
1700 * Or the child node/leaf pointed by the key in parent is not valid.
1702 * We hope to check leaf owner too, but since subvol may share leaves,
1703 * which makes leaf owner check not so strong, key check should be
1704 * sufficient enough for that case.
1706 static int check_child_node(struct extent_buffer *parent, int slot,
1707 struct extent_buffer *child)
1709 struct btrfs_key parent_key;
1710 struct btrfs_key child_key;
1713 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1714 if (btrfs_header_level(child) == 0)
1715 btrfs_item_key_to_cpu(child, &child_key, 0);
1717 btrfs_node_key_to_cpu(child, &child_key, 0);
1719 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1722 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1723 parent_key.objectid, parent_key.type, parent_key.offset,
1724 child_key.objectid, child_key.type, child_key.offset);
1726 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1728 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1729 btrfs_node_blockptr(parent, slot),
1730 btrfs_header_bytenr(child));
1732 if (btrfs_node_ptr_generation(parent, slot) !=
1733 btrfs_header_generation(child)) {
1735 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1736 btrfs_header_generation(child),
1737 btrfs_node_ptr_generation(parent, slot));
1743 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1744 * in every fs or file tree check. Here we find its all root ids, and only check
1745 * it in the fs or file tree which has the smallest root id.
1747 static int need_check(struct btrfs_root *root, struct ulist *roots)
1749 struct rb_node *node;
1750 struct ulist_node *u;
1753 * @roots can be empty if it belongs to tree reloc tree
1754 * In that case, we should always check the leaf, as we can't use
1755 * the tree owner to ensure some other root will check it.
1757 if (roots->nnodes == 1 || roots->nnodes == 0)
1760 node = rb_first(&roots->root);
1761 u = rb_entry(node, struct ulist_node, rb_node);
1763 * current root id is not smallest, we skip it and let it be checked
1764 * in the fs or file tree who hash the smallest root id.
1766 if (root->objectid != u->val)
1772 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1775 struct btrfs_root *extent_root = root->fs_info->extent_root;
1776 struct btrfs_root_item *ri = &root->root_item;
1777 struct btrfs_extent_inline_ref *iref;
1778 struct btrfs_extent_item *ei;
1779 struct btrfs_key key;
1780 struct btrfs_path *path = NULL;
1791 * Except file/reloc tree, we can not have FULL BACKREF MODE
1793 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1797 if (eb->start == btrfs_root_bytenr(ri))
1800 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1803 owner = btrfs_header_owner(eb);
1804 if (owner == root->objectid)
1807 path = btrfs_alloc_path();
1811 key.objectid = btrfs_header_bytenr(eb);
1813 key.offset = (u64)-1;
1815 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1822 ret = btrfs_previous_extent_item(extent_root, path,
1828 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1830 eb = path->nodes[0];
1831 slot = path->slots[0];
1832 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1834 flags = btrfs_extent_flags(eb, ei);
1835 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1838 ptr = (unsigned long)(ei + 1);
1839 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1841 if (key.type == BTRFS_EXTENT_ITEM_KEY)
1842 ptr += sizeof(struct btrfs_tree_block_info);
1845 /* Reached extent item ends normally */
1849 /* Beyond extent item end, wrong item size */
1851 error("extent item at bytenr %llu slot %d has wrong size",
1856 iref = (struct btrfs_extent_inline_ref *)ptr;
1857 offset = btrfs_extent_inline_ref_offset(eb, iref);
1858 type = btrfs_extent_inline_ref_type(eb, iref);
1860 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1862 ptr += btrfs_extent_inline_ref_size(type);
1866 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1870 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1872 btrfs_free_path(path);
1877 * for a tree node or leaf, we record its reference count, so later if we still
1878 * process this node or leaf, don't need to compute its reference count again.
1880 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1882 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1883 struct extent_buffer *eb, struct node_refs *nrefs,
1884 u64 level, int check_all)
1886 struct ulist *roots;
1889 int root_level = btrfs_header_level(root->node);
1893 if (nrefs->bytenr[level] == bytenr)
1896 if (bytenr != (u64)-1) {
1897 /* the return value of this function seems a mistake */
1898 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1899 level, 1, &refs, &flags);
1901 if (ret < 0 && !check_all)
1904 nrefs->bytenr[level] = bytenr;
1905 nrefs->refs[level] = refs;
1906 nrefs->full_backref[level] = 0;
1907 nrefs->checked[level] = 0;
1910 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1915 check = need_check(root, roots);
1917 nrefs->need_check[level] = check;
1920 nrefs->need_check[level] = 1;
1922 if (level == root_level) {
1923 nrefs->need_check[level] = 1;
1926 * The node refs may have not been
1927 * updated if upper needs checking (the
1928 * lowest root_objectid) the node can
1931 nrefs->need_check[level] =
1932 nrefs->need_check[level + 1];
1938 if (check_all && eb) {
1939 calc_extent_flag_v2(root, eb, &flags);
1940 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1941 nrefs->full_backref[level] = 1;
1948 * @level if @level == -1 means extent data item
1949 * else normal treeblocl.
1951 static int should_check_extent_strictly(struct btrfs_root *root,
1952 struct node_refs *nrefs, int level)
1954 int root_level = btrfs_header_level(root->node);
1956 if (level > root_level || level < -1)
1958 if (level == root_level)
1961 * if the upper node is marked full backref, it should contain shared
1962 * backref of the parent (except owner == root->objectid).
1964 while (++level <= root_level)
1965 if (nrefs->refs[level] > 1)
1971 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1972 struct walk_control *wc, int *level,
1973 struct node_refs *nrefs)
1975 enum btrfs_tree_block_status status;
1978 struct btrfs_fs_info *fs_info = root->fs_info;
1979 struct extent_buffer *next;
1980 struct extent_buffer *cur;
1984 WARN_ON(*level < 0);
1985 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1987 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1988 refs = nrefs->refs[*level];
1991 ret = btrfs_lookup_extent_info(NULL, root,
1992 path->nodes[*level]->start,
1993 *level, 1, &refs, NULL);
1998 nrefs->bytenr[*level] = path->nodes[*level]->start;
1999 nrefs->refs[*level] = refs;
2003 ret = enter_shared_node(root, path->nodes[*level]->start,
2011 while (*level >= 0) {
2012 WARN_ON(*level < 0);
2013 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014 cur = path->nodes[*level];
2016 if (btrfs_header_level(cur) != *level)
2019 if (path->slots[*level] >= btrfs_header_nritems(cur))
2022 ret = process_one_leaf(root, cur, wc);
2027 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2028 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2030 if (bytenr == nrefs->bytenr[*level - 1]) {
2031 refs = nrefs->refs[*level - 1];
2033 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2034 *level - 1, 1, &refs, NULL);
2038 nrefs->bytenr[*level - 1] = bytenr;
2039 nrefs->refs[*level - 1] = refs;
2044 ret = enter_shared_node(root, bytenr, refs,
2047 path->slots[*level]++;
2052 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2053 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2054 free_extent_buffer(next);
2055 reada_walk_down(root, cur, path->slots[*level]);
2056 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2057 if (!extent_buffer_uptodate(next)) {
2058 struct btrfs_key node_key;
2060 btrfs_node_key_to_cpu(path->nodes[*level],
2062 path->slots[*level]);
2063 btrfs_add_corrupt_extent_record(root->fs_info,
2065 path->nodes[*level]->start,
2066 root->fs_info->nodesize,
2073 ret = check_child_node(cur, path->slots[*level], next);
2075 free_extent_buffer(next);
2080 if (btrfs_is_leaf(next))
2081 status = btrfs_check_leaf(root, NULL, next);
2083 status = btrfs_check_node(root, NULL, next);
2084 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2085 free_extent_buffer(next);
2090 *level = *level - 1;
2091 free_extent_buffer(path->nodes[*level]);
2092 path->nodes[*level] = next;
2093 path->slots[*level] = 0;
2096 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2101 * Update global fs information.
2103 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2107 struct extent_buffer *eb = path->nodes[level];
2109 total_btree_bytes += eb->len;
2110 if (fs_root_objectid(root->objectid))
2111 total_fs_tree_bytes += eb->len;
2112 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2113 total_extent_tree_bytes += eb->len;
2116 btree_space_waste += btrfs_leaf_free_space(root, eb);
2118 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2119 btrfs_header_nritems(eb));
2120 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2125 * This function only handles BACKREF_MISSING,
2126 * If corresponding extent item exists, increase the ref, else insert an extent
2129 * Returns error bits after repair.
2131 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2132 struct btrfs_root *root,
2133 struct extent_buffer *node,
2134 struct node_refs *nrefs, int level, int err)
2136 struct btrfs_fs_info *fs_info = root->fs_info;
2137 struct btrfs_root *extent_root = fs_info->extent_root;
2138 struct btrfs_path path;
2139 struct btrfs_extent_item *ei;
2140 struct btrfs_tree_block_info *bi;
2141 struct btrfs_key key;
2142 struct extent_buffer *eb;
2143 u32 size = sizeof(*ei);
2144 u32 node_size = root->fs_info->nodesize;
2145 int insert_extent = 0;
2146 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2147 int root_level = btrfs_header_level(root->node);
2152 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2155 if ((err & BACKREF_MISSING) == 0)
2158 WARN_ON(level > BTRFS_MAX_LEVEL);
2161 btrfs_init_path(&path);
2162 bytenr = btrfs_header_bytenr(node);
2163 owner = btrfs_header_owner(node);
2164 generation = btrfs_header_generation(node);
2166 key.objectid = bytenr;
2168 key.offset = (u64)-1;
2170 /* Search for the extent item */
2171 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2177 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2181 /* calculate if the extent item flag is full backref or not */
2182 if (nrefs->full_backref[level] != 0)
2183 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2185 /* insert an extent item */
2186 if (insert_extent) {
2187 struct btrfs_disk_key copy_key;
2189 generation = btrfs_header_generation(node);
2191 if (level < root_level && nrefs->full_backref[level + 1] &&
2192 owner != root->objectid) {
2193 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2196 key.objectid = bytenr;
2197 if (!skinny_metadata) {
2198 key.type = BTRFS_EXTENT_ITEM_KEY;
2199 key.offset = node_size;
2200 size += sizeof(*bi);
2202 key.type = BTRFS_METADATA_ITEM_KEY;
2206 btrfs_release_path(&path);
2207 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2213 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2215 btrfs_set_extent_refs(eb, ei, 0);
2216 btrfs_set_extent_generation(eb, ei, generation);
2217 btrfs_set_extent_flags(eb, ei, flags);
2219 if (!skinny_metadata) {
2220 bi = (struct btrfs_tree_block_info *)(ei + 1);
2221 memset_extent_buffer(eb, 0, (unsigned long)bi,
2223 btrfs_set_disk_key_objectid(©_key, root->objectid);
2224 btrfs_set_disk_key_type(©_key, 0);
2225 btrfs_set_disk_key_offset(©_key, 0);
2227 btrfs_set_tree_block_level(eb, bi, level);
2228 btrfs_set_tree_block_key(eb, bi, ©_key);
2230 btrfs_mark_buffer_dirty(eb);
2231 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2232 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2234 nrefs->refs[level] = 0;
2235 nrefs->full_backref[level] =
2236 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2237 btrfs_release_path(&path);
2240 if (level < root_level && nrefs->full_backref[level + 1] &&
2241 owner != root->objectid)
2242 parent = nrefs->bytenr[level + 1];
2244 /* increase the ref */
2245 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2246 parent, root->objectid, level, 0);
2248 nrefs->refs[level]++;
2250 btrfs_release_path(&path);
2253 "failed to repair tree block ref start %llu root %llu due to %s",
2254 bytenr, root->objectid, strerror(-ret));
2256 printf("Added one tree block ref start %llu %s %llu\n",
2257 bytenr, parent ? "parent" : "root",
2258 parent ? parent : root->objectid);
2259 err &= ~BACKREF_MISSING;
2265 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2266 unsigned int ext_ref);
2267 static int check_tree_block_ref(struct btrfs_root *root,
2268 struct extent_buffer *eb, u64 bytenr,
2269 int level, u64 owner, struct node_refs *nrefs);
2270 static int check_leaf_items(struct btrfs_trans_handle *trans,
2271 struct btrfs_root *root, struct btrfs_path *path,
2272 struct node_refs *nrefs, int account_bytes);
2275 * @trans just for lowmem repair mode
2276 * @check all if not 0 then check all tree block backrefs and items
2277 * 0 then just check relationship of items in fs tree(s)
2279 * Returns >0 Found error, should continue
2280 * Returns <0 Fatal error, must exit the whole check
2281 * Returns 0 No errors found
2283 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2284 struct btrfs_root *root, struct btrfs_path *path,
2285 int *level, struct node_refs *nrefs, int ext_ref,
2289 enum btrfs_tree_block_status status;
2292 struct btrfs_fs_info *fs_info = root->fs_info;
2293 struct extent_buffer *next;
2294 struct extent_buffer *cur;
2298 int account_file_data = 0;
2300 WARN_ON(*level < 0);
2301 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2303 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2304 path->nodes[*level], nrefs, *level, check_all);
2308 while (*level >= 0) {
2309 WARN_ON(*level < 0);
2310 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2311 cur = path->nodes[*level];
2312 bytenr = btrfs_header_bytenr(cur);
2313 check = nrefs->need_check[*level];
2315 if (btrfs_header_level(cur) != *level)
2318 * Update bytes accounting and check tree block ref
2319 * NOTE: Doing accounting and check before checking nritems
2320 * is necessary because of empty node/leaf.
2322 if ((check_all && !nrefs->checked[*level]) ||
2323 (!check_all && nrefs->need_check[*level])) {
2324 ret = check_tree_block_ref(root, cur,
2325 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2326 btrfs_header_owner(cur), nrefs);
2329 ret = repair_tree_block_ref(trans, root,
2330 path->nodes[*level], nrefs, *level, ret);
2333 if (check_all && nrefs->need_check[*level] &&
2334 nrefs->refs[*level]) {
2335 account_bytes(root, path, *level);
2336 account_file_data = 1;
2338 nrefs->checked[*level] = 1;
2341 if (path->slots[*level] >= btrfs_header_nritems(cur))
2344 /* Don't forgot to check leaf/node validation */
2346 /* skip duplicate check */
2347 if (check || !check_all) {
2348 ret = btrfs_check_leaf(root, NULL, cur);
2349 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2357 ret = process_one_leaf_v2(root, path, nrefs,
2360 ret = check_leaf_items(trans, root, path,
2361 nrefs, account_file_data);
2365 if (check || !check_all) {
2366 ret = btrfs_check_node(root, NULL, cur);
2367 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2374 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2375 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2377 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2382 * check all trees in check_chunks_and_extent_v2
2383 * check shared node once in check_fs_roots
2385 if (!check_all && !nrefs->need_check[*level - 1]) {
2386 path->slots[*level]++;
2390 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392 free_extent_buffer(next);
2393 reada_walk_down(root, cur, path->slots[*level]);
2394 next = read_tree_block(fs_info, bytenr, ptr_gen);
2395 if (!extent_buffer_uptodate(next)) {
2396 struct btrfs_key node_key;
2398 btrfs_node_key_to_cpu(path->nodes[*level],
2400 path->slots[*level]);
2401 btrfs_add_corrupt_extent_record(fs_info,
2402 &node_key, path->nodes[*level]->start,
2403 fs_info->nodesize, *level);
2409 ret = check_child_node(cur, path->slots[*level], next);
2414 if (btrfs_is_leaf(next))
2415 status = btrfs_check_leaf(root, NULL, next);
2417 status = btrfs_check_node(root, NULL, next);
2418 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2419 free_extent_buffer(next);
2424 *level = *level - 1;
2425 free_extent_buffer(path->nodes[*level]);
2426 path->nodes[*level] = next;
2427 path->slots[*level] = 0;
2428 account_file_data = 0;
2430 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436 struct walk_control *wc, int *level)
2439 struct extent_buffer *leaf;
2441 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442 leaf = path->nodes[i];
2443 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2448 free_extent_buffer(path->nodes[*level]);
2449 path->nodes[*level] = NULL;
2450 BUG_ON(*level > wc->active_node);
2451 if (*level == wc->active_node)
2452 leave_shared_node(root, wc, *level);
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2463 struct extent_buffer *leaf;
2465 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466 leaf = path->nodes[i];
2467 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2472 free_extent_buffer(path->nodes[*level]);
2473 path->nodes[*level] = NULL;
2480 static int check_root_dir(struct inode_record *rec)
2482 struct inode_backref *backref;
2485 if (!rec->found_inode_item || rec->errors)
2487 if (rec->nlink != 1 || rec->found_link != 0)
2489 if (list_empty(&rec->backrefs))
2491 backref = to_inode_backref(rec->backrefs.next);
2492 if (!backref->found_inode_ref)
2494 if (backref->index != 0 || backref->namelen != 2 ||
2495 memcmp(backref->name, "..", 2))
2497 if (backref->found_dir_index || backref->found_dir_item)
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505 struct btrfs_root *root, struct btrfs_path *path,
2506 struct inode_record *rec)
2508 struct btrfs_inode_item *ei;
2509 struct btrfs_key key;
2512 key.objectid = rec->ino;
2513 key.type = BTRFS_INODE_ITEM_KEY;
2514 key.offset = (u64)-1;
2516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (!path->slots[0]) {
2527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528 if (key.objectid != rec->ino) {
2533 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534 struct btrfs_inode_item);
2535 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536 btrfs_mark_buffer_dirty(path->nodes[0]);
2537 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539 root->root_key.objectid);
2541 btrfs_release_path(path);
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546 struct btrfs_root *root,
2547 struct btrfs_path *path,
2548 struct inode_record *rec)
2552 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553 btrfs_release_path(path);
2555 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560 struct btrfs_root *root,
2561 struct btrfs_path *path,
2562 struct inode_record *rec)
2564 struct btrfs_inode_item *ei;
2565 struct btrfs_key key;
2568 key.objectid = rec->ino;
2569 key.type = BTRFS_INODE_ITEM_KEY;
2572 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2579 /* Since ret == 0, no need to check anything */
2580 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581 struct btrfs_inode_item);
2582 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583 btrfs_mark_buffer_dirty(path->nodes[0]);
2584 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585 printf("reset nbytes for ino %llu root %llu\n",
2586 rec->ino, root->root_key.objectid);
2588 btrfs_release_path(path);
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593 struct cache_tree *inode_cache,
2594 struct inode_record *rec,
2595 struct inode_backref *backref)
2597 struct btrfs_path path;
2598 struct btrfs_trans_handle *trans;
2599 struct btrfs_dir_item *dir_item;
2600 struct extent_buffer *leaf;
2601 struct btrfs_key key;
2602 struct btrfs_disk_key disk_key;
2603 struct inode_record *dir_rec;
2604 unsigned long name_ptr;
2605 u32 data_size = sizeof(*dir_item) + backref->namelen;
2608 trans = btrfs_start_transaction(root, 1);
2610 return PTR_ERR(trans);
2612 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613 (unsigned long long)rec->ino);
2615 btrfs_init_path(&path);
2616 key.objectid = backref->dir;
2617 key.type = BTRFS_DIR_INDEX_KEY;
2618 key.offset = backref->index;
2619 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2622 leaf = path.nodes[0];
2623 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625 disk_key.objectid = cpu_to_le64(rec->ino);
2626 disk_key.type = BTRFS_INODE_ITEM_KEY;
2627 disk_key.offset = 0;
2629 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631 btrfs_set_dir_data_len(leaf, dir_item, 0);
2632 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633 name_ptr = (unsigned long)(dir_item + 1);
2634 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635 btrfs_mark_buffer_dirty(leaf);
2636 btrfs_release_path(&path);
2637 btrfs_commit_transaction(trans, root);
2639 backref->found_dir_index = 1;
2640 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641 BUG_ON(IS_ERR(dir_rec));
2644 dir_rec->found_size += backref->namelen;
2645 if (dir_rec->found_size == dir_rec->isize &&
2646 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648 if (dir_rec->found_size != dir_rec->isize)
2649 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2654 static int delete_dir_index(struct btrfs_root *root,
2655 struct inode_backref *backref)
2657 struct btrfs_trans_handle *trans;
2658 struct btrfs_dir_item *di;
2659 struct btrfs_path path;
2662 trans = btrfs_start_transaction(root, 1);
2664 return PTR_ERR(trans);
2666 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667 (unsigned long long)backref->dir,
2668 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669 (unsigned long long)root->objectid);
2671 btrfs_init_path(&path);
2672 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673 backref->name, backref->namelen,
2674 backref->index, -1);
2677 btrfs_release_path(&path);
2678 btrfs_commit_transaction(trans, root);
2685 ret = btrfs_del_item(trans, root, &path);
2687 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689 btrfs_release_path(&path);
2690 btrfs_commit_transaction(trans, root);
2694 static int __create_inode_item(struct btrfs_trans_handle *trans,
2695 struct btrfs_root *root, u64 ino, u64 size,
2696 u64 nbytes, u64 nlink, u32 mode)
2698 struct btrfs_inode_item ii;
2699 time_t now = time(NULL);
2702 btrfs_set_stack_inode_size(&ii, size);
2703 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2704 btrfs_set_stack_inode_nlink(&ii, nlink);
2705 btrfs_set_stack_inode_mode(&ii, mode);
2706 btrfs_set_stack_inode_generation(&ii, trans->transid);
2707 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2708 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2709 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2710 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2711 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2712 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2713 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2715 ret = btrfs_insert_inode(trans, root, ino, &ii);
2718 warning("root %llu inode %llu recreating inode item, this may "
2719 "be incomplete, please check permissions and content after "
2720 "the fsck completes.\n", (unsigned long long)root->objectid,
2721 (unsigned long long)ino);
2726 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2727 struct btrfs_root *root, u64 ino,
2730 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2732 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2735 static int create_inode_item(struct btrfs_root *root,
2736 struct inode_record *rec, int root_dir)
2738 struct btrfs_trans_handle *trans;
2744 trans = btrfs_start_transaction(root, 1);
2745 if (IS_ERR(trans)) {
2746 ret = PTR_ERR(trans);
2750 nlink = root_dir ? 1 : rec->found_link;
2751 if (rec->found_dir_item) {
2752 if (rec->found_file_extent)
2753 fprintf(stderr, "root %llu inode %llu has both a dir "
2754 "item and extents, unsure if it is a dir or a "
2755 "regular file so setting it as a directory\n",
2756 (unsigned long long)root->objectid,
2757 (unsigned long long)rec->ino);
2758 mode = S_IFDIR | 0755;
2759 size = rec->found_size;
2760 } else if (!rec->found_dir_item) {
2761 size = rec->extent_end;
2762 mode = S_IFREG | 0755;
2765 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2767 btrfs_commit_transaction(trans, root);
2771 static int repair_inode_backrefs(struct btrfs_root *root,
2772 struct inode_record *rec,
2773 struct cache_tree *inode_cache,
2776 struct inode_backref *tmp, *backref;
2777 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2781 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2782 if (!delete && rec->ino == root_dirid) {
2783 if (!rec->found_inode_item) {
2784 ret = create_inode_item(root, rec, 1);
2791 /* Index 0 for root dir's are special, don't mess with it */
2792 if (rec->ino == root_dirid && backref->index == 0)
2796 ((backref->found_dir_index && !backref->found_inode_ref) ||
2797 (backref->found_dir_index && backref->found_inode_ref &&
2798 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2799 ret = delete_dir_index(root, backref);
2803 list_del(&backref->list);
2808 if (!delete && !backref->found_dir_index &&
2809 backref->found_dir_item && backref->found_inode_ref) {
2810 ret = add_missing_dir_index(root, inode_cache, rec,
2815 if (backref->found_dir_item &&
2816 backref->found_dir_index) {
2817 if (!backref->errors &&
2818 backref->found_inode_ref) {
2819 list_del(&backref->list);
2826 if (!delete && (!backref->found_dir_index &&
2827 !backref->found_dir_item &&
2828 backref->found_inode_ref)) {
2829 struct btrfs_trans_handle *trans;
2830 struct btrfs_key location;
2832 ret = check_dir_conflict(root, backref->name,
2838 * let nlink fixing routine to handle it,
2839 * which can do it better.
2844 location.objectid = rec->ino;
2845 location.type = BTRFS_INODE_ITEM_KEY;
2846 location.offset = 0;
2848 trans = btrfs_start_transaction(root, 1);
2849 if (IS_ERR(trans)) {
2850 ret = PTR_ERR(trans);
2853 fprintf(stderr, "adding missing dir index/item pair "
2855 (unsigned long long)rec->ino);
2856 ret = btrfs_insert_dir_item(trans, root, backref->name,
2858 backref->dir, &location,
2859 imode_to_type(rec->imode),
2862 btrfs_commit_transaction(trans, root);
2866 if (!delete && (backref->found_inode_ref &&
2867 backref->found_dir_index &&
2868 backref->found_dir_item &&
2869 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2870 !rec->found_inode_item)) {
2871 ret = create_inode_item(root, rec, 0);
2878 return ret ? ret : repaired;
2882 * To determine the file type for nlink/inode_item repair
2884 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2885 * Return -ENOENT if file type is not found.
2887 static int find_file_type(struct inode_record *rec, u8 *type)
2889 struct inode_backref *backref;
2891 /* For inode item recovered case */
2892 if (rec->found_inode_item) {
2893 *type = imode_to_type(rec->imode);
2897 list_for_each_entry(backref, &rec->backrefs, list) {
2898 if (backref->found_dir_index || backref->found_dir_item) {
2899 *type = backref->filetype;
2907 * To determine the file name for nlink repair
2909 * Return 0 if file name is found, set name and namelen.
2910 * Return -ENOENT if file name is not found.
2912 static int find_file_name(struct inode_record *rec,
2913 char *name, int *namelen)
2915 struct inode_backref *backref;
2917 list_for_each_entry(backref, &rec->backrefs, list) {
2918 if (backref->found_dir_index || backref->found_dir_item ||
2919 backref->found_inode_ref) {
2920 memcpy(name, backref->name, backref->namelen);
2921 *namelen = backref->namelen;
2928 /* Reset the nlink of the inode to the correct one */
2929 static int reset_nlink(struct btrfs_trans_handle *trans,
2930 struct btrfs_root *root,
2931 struct btrfs_path *path,
2932 struct inode_record *rec)
2934 struct inode_backref *backref;
2935 struct inode_backref *tmp;
2936 struct btrfs_key key;
2937 struct btrfs_inode_item *inode_item;
2940 /* We don't believe this either, reset it and iterate backref */
2941 rec->found_link = 0;
2943 /* Remove all backref including the valid ones */
2944 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2945 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2946 backref->index, backref->name,
2947 backref->namelen, 0);
2951 /* remove invalid backref, so it won't be added back */
2952 if (!(backref->found_dir_index &&
2953 backref->found_dir_item &&
2954 backref->found_inode_ref)) {
2955 list_del(&backref->list);
2962 /* Set nlink to 0 */
2963 key.objectid = rec->ino;
2964 key.type = BTRFS_INODE_ITEM_KEY;
2966 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2973 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2974 struct btrfs_inode_item);
2975 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2976 btrfs_mark_buffer_dirty(path->nodes[0]);
2977 btrfs_release_path(path);
2980 * Add back valid inode_ref/dir_item/dir_index,
2981 * add_link() will handle the nlink inc, so new nlink must be correct
2983 list_for_each_entry(backref, &rec->backrefs, list) {
2984 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2985 backref->name, backref->namelen,
2986 backref->filetype, &backref->index, 1, 0);
2991 btrfs_release_path(path);
2995 static int get_highest_inode(struct btrfs_trans_handle *trans,
2996 struct btrfs_root *root,
2997 struct btrfs_path *path,
3000 struct btrfs_key key, found_key;
3003 btrfs_init_path(path);
3004 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3006 key.type = BTRFS_INODE_ITEM_KEY;
3007 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3009 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3010 path->slots[0] - 1);
3011 *highest_ino = found_key.objectid;
3014 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3016 btrfs_release_path(path);
3021 * Link inode to dir 'lost+found'. Increase @ref_count.
3023 * Returns 0 means success.
3024 * Returns <0 means failure.
3026 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3027 struct btrfs_root *root,
3028 struct btrfs_path *path,
3029 u64 ino, char *namebuf, u32 name_len,
3030 u8 filetype, u64 *ref_count)
3032 char *dir_name = "lost+found";
3037 btrfs_release_path(path);
3038 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3043 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3044 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3047 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3050 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3051 namebuf, name_len, filetype, NULL, 1, 0);
3053 * Add ".INO" suffix several times to handle case where
3054 * "FILENAME.INO" is already taken by another file.
3056 while (ret == -EEXIST) {
3058 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3060 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3064 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3066 name_len += count_digits(ino) + 1;
3067 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3068 name_len, filetype, NULL, 1, 0);
3071 error("failed to link the inode %llu to %s dir: %s",
3072 ino, dir_name, strerror(-ret));
3077 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3078 name_len, namebuf, dir_name);
3080 btrfs_release_path(path);
3082 error("failed to move file '%.*s' to '%s' dir", name_len,
3087 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3088 struct btrfs_root *root,
3089 struct btrfs_path *path,
3090 struct inode_record *rec)
3092 char namebuf[BTRFS_NAME_LEN] = {0};
3095 int name_recovered = 0;
3096 int type_recovered = 0;
3100 * Get file name and type first before these invalid inode ref
3101 * are deleted by remove_all_invalid_backref()
3103 name_recovered = !find_file_name(rec, namebuf, &namelen);
3104 type_recovered = !find_file_type(rec, &type);
3106 if (!name_recovered) {
3107 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3108 rec->ino, rec->ino);
3109 namelen = count_digits(rec->ino);
3110 sprintf(namebuf, "%llu", rec->ino);
3113 if (!type_recovered) {
3114 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3116 type = BTRFS_FT_REG_FILE;
3120 ret = reset_nlink(trans, root, path, rec);
3123 "Failed to reset nlink for inode %llu: %s\n",
3124 rec->ino, strerror(-ret));
3128 if (rec->found_link == 0) {
3129 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3130 namebuf, namelen, type,
3131 (u64 *)&rec->found_link);
3135 printf("Fixed the nlink of inode %llu\n", rec->ino);
3138 * Clear the flag anyway, or we will loop forever for the same inode
3139 * as it will not be removed from the bad inode list and the dead loop
3142 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3143 btrfs_release_path(path);
3148 * Check if there is any normal(reg or prealloc) file extent for given
3150 * This is used to determine the file type when neither its dir_index/item or
3151 * inode_item exists.
3153 * This will *NOT* report error, if any error happens, just consider it does
3154 * not have any normal file extent.
3156 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3158 struct btrfs_path path;
3159 struct btrfs_key key;
3160 struct btrfs_key found_key;
3161 struct btrfs_file_extent_item *fi;
3165 btrfs_init_path(&path);
3167 key.type = BTRFS_EXTENT_DATA_KEY;
3170 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3175 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3176 ret = btrfs_next_leaf(root, &path);
3183 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3185 if (found_key.objectid != ino ||
3186 found_key.type != BTRFS_EXTENT_DATA_KEY)
3188 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3189 struct btrfs_file_extent_item);
3190 type = btrfs_file_extent_type(path.nodes[0], fi);
3191 if (type != BTRFS_FILE_EXTENT_INLINE) {
3197 btrfs_release_path(&path);
3201 static u32 btrfs_type_to_imode(u8 type)
3203 static u32 imode_by_btrfs_type[] = {
3204 [BTRFS_FT_REG_FILE] = S_IFREG,
3205 [BTRFS_FT_DIR] = S_IFDIR,
3206 [BTRFS_FT_CHRDEV] = S_IFCHR,
3207 [BTRFS_FT_BLKDEV] = S_IFBLK,
3208 [BTRFS_FT_FIFO] = S_IFIFO,
3209 [BTRFS_FT_SOCK] = S_IFSOCK,
3210 [BTRFS_FT_SYMLINK] = S_IFLNK,
3213 return imode_by_btrfs_type[(type)];
3216 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3217 struct btrfs_root *root,
3218 struct btrfs_path *path,
3219 struct inode_record *rec)
3223 int type_recovered = 0;
3226 printf("Trying to rebuild inode:%llu\n", rec->ino);
3228 type_recovered = !find_file_type(rec, &filetype);
3231 * Try to determine inode type if type not found.
3233 * For found regular file extent, it must be FILE.
3234 * For found dir_item/index, it must be DIR.
3236 * For undetermined one, use FILE as fallback.
3239 * 1. If found backref(inode_index/item is already handled) to it,
3241 * Need new inode-inode ref structure to allow search for that.
3243 if (!type_recovered) {
3244 if (rec->found_file_extent &&
3245 find_normal_file_extent(root, rec->ino)) {
3247 filetype = BTRFS_FT_REG_FILE;
3248 } else if (rec->found_dir_item) {
3250 filetype = BTRFS_FT_DIR;
3251 } else if (!list_empty(&rec->orphan_extents)) {
3253 filetype = BTRFS_FT_REG_FILE;
3255 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3258 filetype = BTRFS_FT_REG_FILE;
3262 ret = btrfs_new_inode(trans, root, rec->ino,
3263 mode | btrfs_type_to_imode(filetype));
3268 * Here inode rebuild is done, we only rebuild the inode item,
3269 * don't repair the nlink(like move to lost+found).
3270 * That is the job of nlink repair.
3272 * We just fill the record and return
3274 rec->found_dir_item = 1;
3275 rec->imode = mode | btrfs_type_to_imode(filetype);
3277 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3278 /* Ensure the inode_nlinks repair function will be called */
3279 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3284 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3285 struct btrfs_root *root,
3286 struct btrfs_path *path,
3287 struct inode_record *rec)
3289 struct orphan_data_extent *orphan;
3290 struct orphan_data_extent *tmp;
3293 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3295 * Check for conflicting file extents
3297 * Here we don't know whether the extents is compressed or not,
3298 * so we can only assume it not compressed nor data offset,
3299 * and use its disk_len as extent length.
3301 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3302 orphan->offset, orphan->disk_len, 0);
3303 btrfs_release_path(path);
3308 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3309 orphan->disk_bytenr, orphan->disk_len);
3310 ret = btrfs_free_extent(trans,
3311 root->fs_info->extent_root,
3312 orphan->disk_bytenr, orphan->disk_len,
3313 0, root->objectid, orphan->objectid,
3318 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3319 orphan->offset, orphan->disk_bytenr,
3320 orphan->disk_len, orphan->disk_len);
3324 /* Update file size info */
3325 rec->found_size += orphan->disk_len;
3326 if (rec->found_size == rec->nbytes)
3327 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3329 /* Update the file extent hole info too */
3330 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3334 if (RB_EMPTY_ROOT(&rec->holes))
3335 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3337 list_del(&orphan->list);
3340 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3345 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3346 struct btrfs_root *root,
3347 struct btrfs_path *path,
3348 struct inode_record *rec)
3350 struct rb_node *node;
3351 struct file_extent_hole *hole;
3355 node = rb_first(&rec->holes);
3359 hole = rb_entry(node, struct file_extent_hole, node);
3360 ret = btrfs_punch_hole(trans, root, rec->ino,
3361 hole->start, hole->len);
3364 ret = del_file_extent_hole(&rec->holes, hole->start,
3368 if (RB_EMPTY_ROOT(&rec->holes))
3369 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3370 node = rb_first(&rec->holes);
3372 /* special case for a file losing all its file extent */
3374 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3375 round_up(rec->isize,
3376 root->fs_info->sectorsize));
3380 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3381 rec->ino, root->objectid);
3386 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3388 struct btrfs_trans_handle *trans;
3389 struct btrfs_path path;
3392 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3393 I_ERR_NO_ORPHAN_ITEM |
3394 I_ERR_LINK_COUNT_WRONG |
3395 I_ERR_NO_INODE_ITEM |
3396 I_ERR_FILE_EXTENT_ORPHAN |
3397 I_ERR_FILE_EXTENT_DISCOUNT|
3398 I_ERR_FILE_NBYTES_WRONG)))
3402 * For nlink repair, it may create a dir and add link, so
3403 * 2 for parent(256)'s dir_index and dir_item
3404 * 2 for lost+found dir's inode_item and inode_ref
3405 * 1 for the new inode_ref of the file
3406 * 2 for lost+found dir's dir_index and dir_item for the file
3408 trans = btrfs_start_transaction(root, 7);
3410 return PTR_ERR(trans);
3412 btrfs_init_path(&path);
3413 if (rec->errors & I_ERR_NO_INODE_ITEM)
3414 ret = repair_inode_no_item(trans, root, &path, rec);
3415 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3416 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3417 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3418 ret = repair_inode_discount_extent(trans, root, &path, rec);
3419 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3420 ret = repair_inode_isize(trans, root, &path, rec);
3421 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3422 ret = repair_inode_orphan_item(trans, root, &path, rec);
3423 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3424 ret = repair_inode_nlinks(trans, root, &path, rec);
3425 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3426 ret = repair_inode_nbytes(trans, root, &path, rec);
3427 btrfs_commit_transaction(trans, root);
3428 btrfs_release_path(&path);
3432 static int check_inode_recs(struct btrfs_root *root,
3433 struct cache_tree *inode_cache)
3435 struct cache_extent *cache;
3436 struct ptr_node *node;
3437 struct inode_record *rec;
3438 struct inode_backref *backref;
3443 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3445 if (btrfs_root_refs(&root->root_item) == 0) {
3446 if (!cache_tree_empty(inode_cache))
3447 fprintf(stderr, "warning line %d\n", __LINE__);
3452 * We need to repair backrefs first because we could change some of the
3453 * errors in the inode recs.
3455 * We also need to go through and delete invalid backrefs first and then
3456 * add the correct ones second. We do this because we may get EEXIST
3457 * when adding back the correct index because we hadn't yet deleted the
3460 * For example, if we were missing a dir index then the directories
3461 * isize would be wrong, so if we fixed the isize to what we thought it
3462 * would be and then fixed the backref we'd still have a invalid fs, so
3463 * we need to add back the dir index and then check to see if the isize
3468 if (stage == 3 && !err)
3471 cache = search_cache_extent(inode_cache, 0);
3472 while (repair && cache) {
3473 node = container_of(cache, struct ptr_node, cache);
3475 cache = next_cache_extent(cache);
3477 /* Need to free everything up and rescan */
3479 remove_cache_extent(inode_cache, &node->cache);
3481 free_inode_rec(rec);
3485 if (list_empty(&rec->backrefs))
3488 ret = repair_inode_backrefs(root, rec, inode_cache,
3502 rec = get_inode_rec(inode_cache, root_dirid, 0);
3503 BUG_ON(IS_ERR(rec));
3505 ret = check_root_dir(rec);
3507 fprintf(stderr, "root %llu root dir %llu error\n",
3508 (unsigned long long)root->root_key.objectid,
3509 (unsigned long long)root_dirid);
3510 print_inode_error(root, rec);
3515 struct btrfs_trans_handle *trans;
3517 trans = btrfs_start_transaction(root, 1);
3518 if (IS_ERR(trans)) {
3519 err = PTR_ERR(trans);
3524 "root %llu missing its root dir, recreating\n",
3525 (unsigned long long)root->objectid);
3527 ret = btrfs_make_root_dir(trans, root, root_dirid);
3530 btrfs_commit_transaction(trans, root);
3534 fprintf(stderr, "root %llu root dir %llu not found\n",
3535 (unsigned long long)root->root_key.objectid,
3536 (unsigned long long)root_dirid);
3540 cache = search_cache_extent(inode_cache, 0);
3543 node = container_of(cache, struct ptr_node, cache);
3545 remove_cache_extent(inode_cache, &node->cache);
3547 if (rec->ino == root_dirid ||
3548 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3549 free_inode_rec(rec);
3553 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3554 ret = check_orphan_item(root, rec->ino);
3556 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3557 if (can_free_inode_rec(rec)) {
3558 free_inode_rec(rec);
3563 if (!rec->found_inode_item)
3564 rec->errors |= I_ERR_NO_INODE_ITEM;
3565 if (rec->found_link != rec->nlink)
3566 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3568 ret = try_repair_inode(root, rec);
3569 if (ret == 0 && can_free_inode_rec(rec)) {
3570 free_inode_rec(rec);
3576 if (!(repair && ret == 0))
3578 print_inode_error(root, rec);
3579 list_for_each_entry(backref, &rec->backrefs, list) {
3580 if (!backref->found_dir_item)
3581 backref->errors |= REF_ERR_NO_DIR_ITEM;
3582 if (!backref->found_dir_index)
3583 backref->errors |= REF_ERR_NO_DIR_INDEX;
3584 if (!backref->found_inode_ref)
3585 backref->errors |= REF_ERR_NO_INODE_REF;
3586 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3587 " namelen %u name %s filetype %d errors %x",
3588 (unsigned long long)backref->dir,
3589 (unsigned long long)backref->index,
3590 backref->namelen, backref->name,
3591 backref->filetype, backref->errors);
3592 print_ref_error(backref->errors);
3594 free_inode_rec(rec);
3596 return (error > 0) ? -1 : 0;
3599 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3602 struct cache_extent *cache;
3603 struct root_record *rec = NULL;
3606 cache = lookup_cache_extent(root_cache, objectid, 1);
3608 rec = container_of(cache, struct root_record, cache);
3610 rec = calloc(1, sizeof(*rec));
3612 return ERR_PTR(-ENOMEM);
3613 rec->objectid = objectid;
3614 INIT_LIST_HEAD(&rec->backrefs);
3615 rec->cache.start = objectid;
3616 rec->cache.size = 1;
3618 ret = insert_cache_extent(root_cache, &rec->cache);
3620 return ERR_PTR(-EEXIST);
3625 static struct root_backref *get_root_backref(struct root_record *rec,
3626 u64 ref_root, u64 dir, u64 index,
3627 const char *name, int namelen)
3629 struct root_backref *backref;
3631 list_for_each_entry(backref, &rec->backrefs, list) {
3632 if (backref->ref_root != ref_root || backref->dir != dir ||
3633 backref->namelen != namelen)
3635 if (memcmp(name, backref->name, namelen))
3640 backref = calloc(1, sizeof(*backref) + namelen + 1);
3643 backref->ref_root = ref_root;
3645 backref->index = index;
3646 backref->namelen = namelen;
3647 memcpy(backref->name, name, namelen);
3648 backref->name[namelen] = '\0';
3649 list_add_tail(&backref->list, &rec->backrefs);
3653 static void free_root_record(struct cache_extent *cache)
3655 struct root_record *rec;
3656 struct root_backref *backref;
3658 rec = container_of(cache, struct root_record, cache);
3659 while (!list_empty(&rec->backrefs)) {
3660 backref = to_root_backref(rec->backrefs.next);
3661 list_del(&backref->list);
3668 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3670 static int add_root_backref(struct cache_tree *root_cache,
3671 u64 root_id, u64 ref_root, u64 dir, u64 index,
3672 const char *name, int namelen,
3673 int item_type, int errors)
3675 struct root_record *rec;
3676 struct root_backref *backref;
3678 rec = get_root_rec(root_cache, root_id);
3679 BUG_ON(IS_ERR(rec));
3680 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3683 backref->errors |= errors;
3685 if (item_type != BTRFS_DIR_ITEM_KEY) {
3686 if (backref->found_dir_index || backref->found_back_ref ||
3687 backref->found_forward_ref) {
3688 if (backref->index != index)
3689 backref->errors |= REF_ERR_INDEX_UNMATCH;
3691 backref->index = index;
3695 if (item_type == BTRFS_DIR_ITEM_KEY) {
3696 if (backref->found_forward_ref)
3698 backref->found_dir_item = 1;
3699 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3700 backref->found_dir_index = 1;
3701 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3702 if (backref->found_forward_ref)
3703 backref->errors |= REF_ERR_DUP_ROOT_REF;
3704 else if (backref->found_dir_item)
3706 backref->found_forward_ref = 1;
3707 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3708 if (backref->found_back_ref)
3709 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3710 backref->found_back_ref = 1;
3715 if (backref->found_forward_ref && backref->found_dir_item)
3716 backref->reachable = 1;
3720 static int merge_root_recs(struct btrfs_root *root,
3721 struct cache_tree *src_cache,
3722 struct cache_tree *dst_cache)
3724 struct cache_extent *cache;
3725 struct ptr_node *node;
3726 struct inode_record *rec;
3727 struct inode_backref *backref;
3730 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3731 free_inode_recs_tree(src_cache);
3736 cache = search_cache_extent(src_cache, 0);
3739 node = container_of(cache, struct ptr_node, cache);
3741 remove_cache_extent(src_cache, &node->cache);
3744 ret = is_child_root(root, root->objectid, rec->ino);
3750 list_for_each_entry(backref, &rec->backrefs, list) {
3751 BUG_ON(backref->found_inode_ref);
3752 if (backref->found_dir_item)
3753 add_root_backref(dst_cache, rec->ino,
3754 root->root_key.objectid, backref->dir,
3755 backref->index, backref->name,
3756 backref->namelen, BTRFS_DIR_ITEM_KEY,
3758 if (backref->found_dir_index)
3759 add_root_backref(dst_cache, rec->ino,
3760 root->root_key.objectid, backref->dir,
3761 backref->index, backref->name,
3762 backref->namelen, BTRFS_DIR_INDEX_KEY,
3766 free_inode_rec(rec);
3773 static int check_root_refs(struct btrfs_root *root,
3774 struct cache_tree *root_cache)
3776 struct root_record *rec;
3777 struct root_record *ref_root;
3778 struct root_backref *backref;
3779 struct cache_extent *cache;
3785 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3786 BUG_ON(IS_ERR(rec));
3789 /* fixme: this can not detect circular references */
3792 cache = search_cache_extent(root_cache, 0);
3796 rec = container_of(cache, struct root_record, cache);
3797 cache = next_cache_extent(cache);
3799 if (rec->found_ref == 0)
3802 list_for_each_entry(backref, &rec->backrefs, list) {
3803 if (!backref->reachable)
3806 ref_root = get_root_rec(root_cache,
3808 BUG_ON(IS_ERR(ref_root));
3809 if (ref_root->found_ref > 0)
3812 backref->reachable = 0;
3814 if (rec->found_ref == 0)
3820 cache = search_cache_extent(root_cache, 0);
3824 rec = container_of(cache, struct root_record, cache);
3825 cache = next_cache_extent(cache);
3827 if (rec->found_ref == 0 &&
3828 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3829 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3830 ret = check_orphan_item(root->fs_info->tree_root,
3836 * If we don't have a root item then we likely just have
3837 * a dir item in a snapshot for this root but no actual
3838 * ref key or anything so it's meaningless.
3840 if (!rec->found_root_item)
3843 fprintf(stderr, "fs tree %llu not referenced\n",
3844 (unsigned long long)rec->objectid);
3848 if (rec->found_ref > 0 && !rec->found_root_item)
3850 list_for_each_entry(backref, &rec->backrefs, list) {
3851 if (!backref->found_dir_item)
3852 backref->errors |= REF_ERR_NO_DIR_ITEM;
3853 if (!backref->found_dir_index)
3854 backref->errors |= REF_ERR_NO_DIR_INDEX;
3855 if (!backref->found_back_ref)
3856 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3857 if (!backref->found_forward_ref)
3858 backref->errors |= REF_ERR_NO_ROOT_REF;
3859 if (backref->reachable && backref->errors)
3866 fprintf(stderr, "fs tree %llu refs %u %s\n",
3867 (unsigned long long)rec->objectid, rec->found_ref,
3868 rec->found_root_item ? "" : "not found");
3870 list_for_each_entry(backref, &rec->backrefs, list) {
3871 if (!backref->reachable)
3873 if (!backref->errors && rec->found_root_item)
3875 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3876 " index %llu namelen %u name %s errors %x\n",
3877 (unsigned long long)backref->ref_root,
3878 (unsigned long long)backref->dir,
3879 (unsigned long long)backref->index,
3880 backref->namelen, backref->name,
3882 print_ref_error(backref->errors);
3885 return errors > 0 ? 1 : 0;
3888 static int process_root_ref(struct extent_buffer *eb, int slot,
3889 struct btrfs_key *key,
3890 struct cache_tree *root_cache)
3896 struct btrfs_root_ref *ref;
3897 char namebuf[BTRFS_NAME_LEN];
3900 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3902 dirid = btrfs_root_ref_dirid(eb, ref);
3903 index = btrfs_root_ref_sequence(eb, ref);
3904 name_len = btrfs_root_ref_name_len(eb, ref);
3906 if (name_len <= BTRFS_NAME_LEN) {
3910 len = BTRFS_NAME_LEN;
3911 error = REF_ERR_NAME_TOO_LONG;
3913 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3915 if (key->type == BTRFS_ROOT_REF_KEY) {
3916 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3917 index, namebuf, len, key->type, error);
3919 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3920 index, namebuf, len, key->type, error);
3925 static void free_corrupt_block(struct cache_extent *cache)
3927 struct btrfs_corrupt_block *corrupt;
3929 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3933 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3936 * Repair the btree of the given root.
3938 * The fix is to remove the node key in corrupt_blocks cache_tree.
3939 * and rebalance the tree.
3940 * After the fix, the btree should be writeable.
3942 static int repair_btree(struct btrfs_root *root,
3943 struct cache_tree *corrupt_blocks)
3945 struct btrfs_trans_handle *trans;
3946 struct btrfs_path path;
3947 struct btrfs_corrupt_block *corrupt;
3948 struct cache_extent *cache;
3949 struct btrfs_key key;
3954 if (cache_tree_empty(corrupt_blocks))
3957 trans = btrfs_start_transaction(root, 1);
3958 if (IS_ERR(trans)) {
3959 ret = PTR_ERR(trans);
3960 fprintf(stderr, "Error starting transaction: %s\n",
3964 btrfs_init_path(&path);
3965 cache = first_cache_extent(corrupt_blocks);
3967 corrupt = container_of(cache, struct btrfs_corrupt_block,
3969 level = corrupt->level;
3970 path.lowest_level = level;
3971 key.objectid = corrupt->key.objectid;
3972 key.type = corrupt->key.type;
3973 key.offset = corrupt->key.offset;
3976 * Here we don't want to do any tree balance, since it may
3977 * cause a balance with corrupted brother leaf/node,
3978 * so ins_len set to 0 here.
3979 * Balance will be done after all corrupt node/leaf is deleted.
3981 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3984 offset = btrfs_node_blockptr(path.nodes[level],
3987 /* Remove the ptr */
3988 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3992 * Remove the corresponding extent
3993 * return value is not concerned.
3995 btrfs_release_path(&path);
3996 ret = btrfs_free_extent(trans, root, offset,
3997 root->fs_info->nodesize, 0,
3998 root->root_key.objectid, level - 1, 0);
3999 cache = next_cache_extent(cache);
4002 /* Balance the btree using btrfs_search_slot() */
4003 cache = first_cache_extent(corrupt_blocks);
4005 corrupt = container_of(cache, struct btrfs_corrupt_block,
4007 memcpy(&key, &corrupt->key, sizeof(key));
4008 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4011 /* return will always >0 since it won't find the item */
4013 btrfs_release_path(&path);
4014 cache = next_cache_extent(cache);
4017 btrfs_commit_transaction(trans, root);
4018 btrfs_release_path(&path);
4022 static int check_fs_root(struct btrfs_root *root,
4023 struct cache_tree *root_cache,
4024 struct walk_control *wc)
4030 struct btrfs_path path;
4031 struct shared_node root_node;
4032 struct root_record *rec;
4033 struct btrfs_root_item *root_item = &root->root_item;
4034 struct cache_tree corrupt_blocks;
4035 struct orphan_data_extent *orphan;
4036 struct orphan_data_extent *tmp;
4037 enum btrfs_tree_block_status status;
4038 struct node_refs nrefs;
4041 * Reuse the corrupt_block cache tree to record corrupted tree block
4043 * Unlike the usage in extent tree check, here we do it in a per
4044 * fs/subvol tree base.
4046 cache_tree_init(&corrupt_blocks);
4047 root->fs_info->corrupt_blocks = &corrupt_blocks;
4049 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4050 rec = get_root_rec(root_cache, root->root_key.objectid);
4051 BUG_ON(IS_ERR(rec));
4052 if (btrfs_root_refs(root_item) > 0)
4053 rec->found_root_item = 1;
4056 btrfs_init_path(&path);
4057 memset(&root_node, 0, sizeof(root_node));
4058 cache_tree_init(&root_node.root_cache);
4059 cache_tree_init(&root_node.inode_cache);
4060 memset(&nrefs, 0, sizeof(nrefs));
4062 /* Move the orphan extent record to corresponding inode_record */
4063 list_for_each_entry_safe(orphan, tmp,
4064 &root->orphan_data_extents, list) {
4065 struct inode_record *inode;
4067 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4069 BUG_ON(IS_ERR(inode));
4070 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4071 list_move(&orphan->list, &inode->orphan_extents);
4074 level = btrfs_header_level(root->node);
4075 memset(wc->nodes, 0, sizeof(wc->nodes));
4076 wc->nodes[level] = &root_node;
4077 wc->active_node = level;
4078 wc->root_level = level;
4080 /* We may not have checked the root block, lets do that now */
4081 if (btrfs_is_leaf(root->node))
4082 status = btrfs_check_leaf(root, NULL, root->node);
4084 status = btrfs_check_node(root, NULL, root->node);
4085 if (status != BTRFS_TREE_BLOCK_CLEAN)
4088 if (btrfs_root_refs(root_item) > 0 ||
4089 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4090 path.nodes[level] = root->node;
4091 extent_buffer_get(root->node);
4092 path.slots[level] = 0;
4094 struct btrfs_key key;
4095 struct btrfs_disk_key found_key;
4097 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4098 level = root_item->drop_level;
4099 path.lowest_level = level;
4100 if (level > btrfs_header_level(root->node) ||
4101 level >= BTRFS_MAX_LEVEL) {
4102 error("ignoring invalid drop level: %u", level);
4105 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4108 btrfs_node_key(path.nodes[level], &found_key,
4110 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4111 sizeof(found_key)));
4115 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4121 wret = walk_up_tree(root, &path, wc, &level);
4128 btrfs_release_path(&path);
4130 if (!cache_tree_empty(&corrupt_blocks)) {
4131 struct cache_extent *cache;
4132 struct btrfs_corrupt_block *corrupt;
4134 printf("The following tree block(s) is corrupted in tree %llu:\n",
4135 root->root_key.objectid);
4136 cache = first_cache_extent(&corrupt_blocks);
4138 corrupt = container_of(cache,
4139 struct btrfs_corrupt_block,
4141 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4142 cache->start, corrupt->level,
4143 corrupt->key.objectid, corrupt->key.type,
4144 corrupt->key.offset);
4145 cache = next_cache_extent(cache);
4148 printf("Try to repair the btree for root %llu\n",
4149 root->root_key.objectid);
4150 ret = repair_btree(root, &corrupt_blocks);
4152 fprintf(stderr, "Failed to repair btree: %s\n",
4155 printf("Btree for root %llu is fixed\n",
4156 root->root_key.objectid);
4160 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4164 if (root_node.current) {
4165 root_node.current->checked = 1;
4166 maybe_free_inode_rec(&root_node.inode_cache,
4170 err = check_inode_recs(root, &root_node.inode_cache);
4174 free_corrupt_blocks_tree(&corrupt_blocks);
4175 root->fs_info->corrupt_blocks = NULL;
4176 free_orphan_data_extents(&root->orphan_data_extents);
4180 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4181 struct cache_tree *root_cache)
4183 struct btrfs_path path;
4184 struct btrfs_key key;
4185 struct walk_control wc;
4186 struct extent_buffer *leaf, *tree_node;
4187 struct btrfs_root *tmp_root;
4188 struct btrfs_root *tree_root = fs_info->tree_root;
4192 if (ctx.progress_enabled) {
4193 ctx.tp = TASK_FS_ROOTS;
4194 task_start(ctx.info);
4198 * Just in case we made any changes to the extent tree that weren't
4199 * reflected into the free space cache yet.
4202 reset_cached_block_groups(fs_info);
4203 memset(&wc, 0, sizeof(wc));
4204 cache_tree_init(&wc.shared);
4205 btrfs_init_path(&path);
4210 key.type = BTRFS_ROOT_ITEM_KEY;
4211 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4216 tree_node = tree_root->node;
4218 if (tree_node != tree_root->node) {
4219 free_root_recs_tree(root_cache);
4220 btrfs_release_path(&path);
4223 leaf = path.nodes[0];
4224 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4225 ret = btrfs_next_leaf(tree_root, &path);
4231 leaf = path.nodes[0];
4233 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4234 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4235 fs_root_objectid(key.objectid)) {
4236 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4237 tmp_root = btrfs_read_fs_root_no_cache(
4240 key.offset = (u64)-1;
4241 tmp_root = btrfs_read_fs_root(
4244 if (IS_ERR(tmp_root)) {
4248 ret = check_fs_root(tmp_root, root_cache, &wc);
4249 if (ret == -EAGAIN) {
4250 free_root_recs_tree(root_cache);
4251 btrfs_release_path(&path);
4256 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4257 btrfs_free_fs_root(tmp_root);
4258 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4259 key.type == BTRFS_ROOT_BACKREF_KEY) {
4260 process_root_ref(leaf, path.slots[0], &key,
4267 btrfs_release_path(&path);
4269 free_extent_cache_tree(&wc.shared);
4270 if (!cache_tree_empty(&wc.shared))
4271 fprintf(stderr, "warning line %d\n", __LINE__);
4273 task_stop(ctx.info);
4279 * Find the @index according by @ino and name.
4280 * Notice:time efficiency is O(N)
4282 * @root: the root of the fs/file tree
4283 * @index_ret: the index as return value
4284 * @namebuf: the name to match
4285 * @name_len: the length of name to match
4286 * @file_type: the file_type of INODE_ITEM to match
4288 * Returns 0 if found and *@index_ret will be modified with right value
4289 * Returns< 0 not found and *@index_ret will be (u64)-1
4291 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4292 u64 *index_ret, char *namebuf, u32 name_len,
4295 struct btrfs_path path;
4296 struct extent_buffer *node;
4297 struct btrfs_dir_item *di;
4298 struct btrfs_key key;
4299 struct btrfs_key location;
4300 char name[BTRFS_NAME_LEN] = {0};
4312 /* search from the last index */
4313 key.objectid = dirid;
4314 key.offset = (u64)-1;
4315 key.type = BTRFS_DIR_INDEX_KEY;
4317 btrfs_init_path(&path);
4318 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4323 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4326 *index_ret = (64)-1;
4329 /* Check whether inode_id/filetype/name match */
4330 node = path.nodes[0];
4331 slot = path.slots[0];
4332 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4333 total = btrfs_item_size_nr(node, slot);
4334 while (cur < total) {
4336 len = btrfs_dir_name_len(node, di);
4337 data_len = btrfs_dir_data_len(node, di);
4339 btrfs_dir_item_key_to_cpu(node, di, &location);
4340 if (location.objectid != location_id ||
4341 location.type != BTRFS_INODE_ITEM_KEY ||
4342 location.offset != 0)
4345 filetype = btrfs_dir_type(node, di);
4346 if (file_type != filetype)
4349 if (len > BTRFS_NAME_LEN)
4350 len = BTRFS_NAME_LEN;
4352 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4353 if (len != name_len || strncmp(namebuf, name, len))
4356 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4357 *index_ret = key.offset;
4361 len += sizeof(*di) + data_len;
4362 di = (struct btrfs_dir_item *)((char *)di + len);
4368 btrfs_release_path(&path);
4373 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4374 * INODE_REF/INODE_EXTREF match.
4376 * @root: the root of the fs/file tree
4377 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4378 * value while find index
4379 * @location_key: location key of the struct btrfs_dir_item to match
4380 * @name: the name to match
4381 * @namelen: the length of name
4382 * @file_type: the type of file to math
4384 * Return 0 if no error occurred.
4385 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4386 * DIR_ITEM/DIR_INDEX
4387 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4388 * and DIR_ITEM/DIR_INDEX mismatch
4390 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4391 struct btrfs_key *location_key, char *name,
4392 u32 namelen, u8 file_type)
4394 struct btrfs_path path;
4395 struct extent_buffer *node;
4396 struct btrfs_dir_item *di;
4397 struct btrfs_key location;
4398 char namebuf[BTRFS_NAME_LEN] = {0};
4407 /* get the index by traversing all index */
4408 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4409 ret = find_dir_index(root, key->objectid,
4410 location_key->objectid, &key->offset,
4411 name, namelen, file_type);
4413 ret = DIR_INDEX_MISSING;
4417 btrfs_init_path(&path);
4418 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4420 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4425 /* Check whether inode_id/filetype/name match */
4426 node = path.nodes[0];
4427 slot = path.slots[0];
4428 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4429 total = btrfs_item_size_nr(node, slot);
4430 while (cur < total) {
4431 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4432 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4434 len = btrfs_dir_name_len(node, di);
4435 data_len = btrfs_dir_data_len(node, di);
4437 btrfs_dir_item_key_to_cpu(node, di, &location);
4438 if (location.objectid != location_key->objectid ||
4439 location.type != location_key->type ||
4440 location.offset != location_key->offset)
4443 filetype = btrfs_dir_type(node, di);
4444 if (file_type != filetype)
4447 if (len > BTRFS_NAME_LEN) {
4448 len = BTRFS_NAME_LEN;
4449 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4451 key->type == BTRFS_DIR_ITEM_KEY ?
4452 "DIR_ITEM" : "DIR_INDEX",
4453 key->objectid, key->offset, len);
4455 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4457 if (len != namelen || strncmp(namebuf, name, len))
4463 len += sizeof(*di) + data_len;
4464 di = (struct btrfs_dir_item *)((char *)di + len);
4469 btrfs_release_path(&path);
4474 * Prints inode ref error message
4476 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4477 u64 index, const char *namebuf, int name_len,
4478 u8 filetype, int err)
4483 /* root dir error */
4484 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4486 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4487 root->objectid, key->objectid, key->offset, namebuf);
4492 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4493 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4494 root->objectid, key->offset,
4495 btrfs_name_hash(namebuf, name_len),
4496 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4498 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4499 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4500 root->objectid, key->offset, index,
4501 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4506 * Insert the missing inode item.
4508 * Returns 0 means success.
4509 * Returns <0 means error.
4511 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4514 struct btrfs_key key;
4515 struct btrfs_trans_handle *trans;
4516 struct btrfs_path path;
4520 key.type = BTRFS_INODE_ITEM_KEY;
4523 btrfs_init_path(&path);
4524 trans = btrfs_start_transaction(root, 1);
4525 if (IS_ERR(trans)) {
4530 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4531 if (ret < 0 || !ret)
4534 /* insert inode item */
4535 create_inode_item_lowmem(trans, root, ino, filetype);
4538 btrfs_commit_transaction(trans, root);
4541 error("failed to repair root %llu INODE ITEM[%llu] missing",
4542 root->objectid, ino);
4543 btrfs_release_path(&path);
4548 * The ternary means dir item, dir index and relative inode ref.
4549 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4550 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4552 * If two of three is missing or mismatched, delete the existing one.
4553 * If one of three is missing or mismatched, add the missing one.
4555 * returns 0 means success.
4556 * returns not 0 means on error;
4558 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4559 u64 index, char *name, int name_len, u8 filetype,
4562 struct btrfs_trans_handle *trans;
4567 * stage shall be one of following valild values:
4568 * 0: Fine, nothing to do.
4569 * 1: One of three is wrong, so add missing one.
4570 * 2: Two of three is wrong, so delete existed one.
4572 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4574 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4576 if (err & (INODE_REF_MISSING))
4579 /* stage must be smllarer than 3 */
4582 trans = btrfs_start_transaction(root, 1);
4584 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4589 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4590 filetype, &index, 1, 1);
4594 btrfs_commit_transaction(trans, root);
4597 error("fail to repair inode %llu name %s filetype %u",
4598 ino, name, filetype);
4600 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4601 stage == 2 ? "Delete" : "Add",
4602 ino, name, filetype);
4608 * Traverse the given INODE_REF and call find_dir_item() to find related
4609 * DIR_ITEM/DIR_INDEX.
4611 * @root: the root of the fs/file tree
4612 * @ref_key: the key of the INODE_REF
4613 * @path the path provides node and slot
4614 * @refs: the count of INODE_REF
4615 * @mode: the st_mode of INODE_ITEM
4616 * @name_ret: returns with the first ref's name
4617 * @name_len_ret: len of the name_ret
4619 * Return 0 if no error occurred.
4621 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4622 struct btrfs_path *path, char *name_ret,
4623 u32 *namelen_ret, u64 *refs_ret, int mode)
4625 struct btrfs_key key;
4626 struct btrfs_key location;
4627 struct btrfs_inode_ref *ref;
4628 struct extent_buffer *node;
4629 char namebuf[BTRFS_NAME_LEN] = {0};
4639 int need_research = 0;
4647 /* since after repair, path and the dir item may be changed */
4648 if (need_research) {
4650 btrfs_release_path(path);
4651 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4652 /* the item was deleted, let path point to the last checked item */
4654 if (path->slots[0] == 0)
4655 btrfs_prev_leaf(root, path);
4663 location.objectid = ref_key->objectid;
4664 location.type = BTRFS_INODE_ITEM_KEY;
4665 location.offset = 0;
4666 node = path->nodes[0];
4667 slot = path->slots[0];
4669 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4670 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4671 total = btrfs_item_size_nr(node, slot);
4674 /* Update inode ref count */
4677 index = btrfs_inode_ref_index(node, ref);
4678 name_len = btrfs_inode_ref_name_len(node, ref);
4680 if (name_len <= BTRFS_NAME_LEN) {
4683 len = BTRFS_NAME_LEN;
4684 warning("root %llu INODE_REF[%llu %llu] name too long",
4685 root->objectid, ref_key->objectid, ref_key->offset);
4688 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4690 /* copy the first name found to name_ret */
4691 if (refs == 1 && name_ret) {
4692 memcpy(name_ret, namebuf, len);
4696 /* Check root dir ref */
4697 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4698 if (index != 0 || len != strlen("..") ||
4699 strncmp("..", namebuf, len) ||
4700 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4701 /* set err bits then repair will delete the ref */
4702 err |= DIR_INDEX_MISSING;
4703 err |= DIR_ITEM_MISSING;
4708 /* Find related DIR_INDEX */
4709 key.objectid = ref_key->offset;
4710 key.type = BTRFS_DIR_INDEX_KEY;
4712 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4713 imode_to_type(mode));
4715 /* Find related dir_item */
4716 key.objectid = ref_key->offset;
4717 key.type = BTRFS_DIR_ITEM_KEY;
4718 key.offset = btrfs_name_hash(namebuf, len);
4719 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4720 imode_to_type(mode));
4722 if (tmp_err && repair) {
4723 ret = repair_ternary_lowmem(root, ref_key->offset,
4724 ref_key->objectid, index, namebuf,
4725 name_len, imode_to_type(mode),
4732 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4733 imode_to_type(mode), tmp_err);
4735 len = sizeof(*ref) + name_len;
4736 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4747 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4748 * DIR_ITEM/DIR_INDEX.
4750 * @root: the root of the fs/file tree
4751 * @ref_key: the key of the INODE_EXTREF
4752 * @refs: the count of INODE_EXTREF
4753 * @mode: the st_mode of INODE_ITEM
4755 * Return 0 if no error occurred.
4757 static int check_inode_extref(struct btrfs_root *root,
4758 struct btrfs_key *ref_key,
4759 struct extent_buffer *node, int slot, u64 *refs,
4762 struct btrfs_key key;
4763 struct btrfs_key location;
4764 struct btrfs_inode_extref *extref;
4765 char namebuf[BTRFS_NAME_LEN] = {0};
4775 location.objectid = ref_key->objectid;
4776 location.type = BTRFS_INODE_ITEM_KEY;
4777 location.offset = 0;
4779 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4780 total = btrfs_item_size_nr(node, slot);
4783 /* update inode ref count */
4785 name_len = btrfs_inode_extref_name_len(node, extref);
4786 index = btrfs_inode_extref_index(node, extref);
4787 parent = btrfs_inode_extref_parent(node, extref);
4788 if (name_len <= BTRFS_NAME_LEN) {
4791 len = BTRFS_NAME_LEN;
4792 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4793 root->objectid, ref_key->objectid, ref_key->offset);
4795 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4797 /* Check root dir ref name */
4798 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4799 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4800 root->objectid, ref_key->objectid, ref_key->offset,
4802 err |= ROOT_DIR_ERROR;
4805 /* find related dir_index */
4806 key.objectid = parent;
4807 key.type = BTRFS_DIR_INDEX_KEY;
4809 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4812 /* find related dir_item */
4813 key.objectid = parent;
4814 key.type = BTRFS_DIR_ITEM_KEY;
4815 key.offset = btrfs_name_hash(namebuf, len);
4816 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4819 len = sizeof(*extref) + name_len;
4820 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4830 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4831 * DIR_ITEM/DIR_INDEX match.
4832 * Return with @index_ret.
4834 * @root: the root of the fs/file tree
4835 * @key: the key of the INODE_REF/INODE_EXTREF
4836 * @name: the name in the INODE_REF/INODE_EXTREF
4837 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4838 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4839 * value (64)-1 means do not check index
4840 * @ext_ref: the EXTENDED_IREF feature
4842 * Return 0 if no error occurred.
4843 * Return >0 for error bitmap
4845 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4846 char *name, int namelen, u64 *index_ret,
4847 unsigned int ext_ref)
4849 struct btrfs_path path;
4850 struct btrfs_inode_ref *ref;
4851 struct btrfs_inode_extref *extref;
4852 struct extent_buffer *node;
4853 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4866 btrfs_init_path(&path);
4867 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4869 ret = INODE_REF_MISSING;
4873 node = path.nodes[0];
4874 slot = path.slots[0];
4876 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4877 total = btrfs_item_size_nr(node, slot);
4879 /* Iterate all entry of INODE_REF */
4880 while (cur < total) {
4881 ret = INODE_REF_MISSING;
4883 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4884 ref_index = btrfs_inode_ref_index(node, ref);
4885 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4888 if (cur + sizeof(*ref) + ref_namelen > total ||
4889 ref_namelen > BTRFS_NAME_LEN) {
4890 warning("root %llu INODE %s[%llu %llu] name too long",
4892 key->type == BTRFS_INODE_REF_KEY ?
4894 key->objectid, key->offset);
4896 if (cur + sizeof(*ref) > total)
4898 len = min_t(u32, total - cur - sizeof(*ref),
4904 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4907 if (len != namelen || strncmp(ref_namebuf, name, len))
4910 *index_ret = ref_index;
4914 len = sizeof(*ref) + ref_namelen;
4915 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4920 /* Skip if not support EXTENDED_IREF feature */
4924 btrfs_release_path(&path);
4925 btrfs_init_path(&path);
4927 dir_id = key->offset;
4928 key->type = BTRFS_INODE_EXTREF_KEY;
4929 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4931 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4933 ret = INODE_REF_MISSING;
4937 node = path.nodes[0];
4938 slot = path.slots[0];
4940 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4942 total = btrfs_item_size_nr(node, slot);
4944 /* Iterate all entry of INODE_EXTREF */
4945 while (cur < total) {
4946 ret = INODE_REF_MISSING;
4948 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4949 ref_index = btrfs_inode_extref_index(node, extref);
4950 parent = btrfs_inode_extref_parent(node, extref);
4951 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4954 if (parent != dir_id)
4957 if (ref_namelen <= BTRFS_NAME_LEN) {
4960 len = BTRFS_NAME_LEN;
4961 warning("root %llu INODE %s[%llu %llu] name too long",
4963 key->type == BTRFS_INODE_REF_KEY ?
4965 key->objectid, key->offset);
4967 read_extent_buffer(node, ref_namebuf,
4968 (unsigned long)(extref + 1), len);
4970 if (len != namelen || strncmp(ref_namebuf, name, len))
4973 *index_ret = ref_index;
4978 len = sizeof(*extref) + ref_namelen;
4979 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4984 btrfs_release_path(&path);
4988 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4989 u64 ino, u64 index, const char *namebuf,
4990 int name_len, u8 filetype, int err)
4992 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4993 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4994 root->objectid, key->objectid, key->offset, namebuf,
4996 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4999 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5000 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5001 root->objectid, key->objectid, index, namebuf, filetype,
5002 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5005 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5007 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5008 root->objectid, ino, index, namebuf, filetype,
5009 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5012 if (err & INODE_REF_MISSING)
5014 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5015 root->objectid, ino, key->objectid, namebuf, filetype);
5020 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5022 * Returns error after repair
5024 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5025 u64 index, u8 filetype, char *namebuf, u32 name_len,
5030 if (err & INODE_ITEM_MISSING) {
5031 ret = repair_inode_item_missing(root, ino, filetype);
5033 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5036 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5037 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5038 name_len, filetype, err);
5040 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5041 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5042 err &= ~(INODE_REF_MISSING);
5048 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5051 struct btrfs_key key;
5052 struct btrfs_path path;
5054 struct btrfs_dir_item *di;
5064 key.offset = (u64)-1;
5066 btrfs_init_path(&path);
5067 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5072 /* if found, go to spacial case */
5077 ret = btrfs_previous_item(root, &path, ino, type);
5085 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5087 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5089 while (cur < total) {
5090 len = btrfs_dir_name_len(path.nodes[0], di);
5091 if (len > BTRFS_NAME_LEN)
5092 len = BTRFS_NAME_LEN;
5095 len += btrfs_dir_data_len(path.nodes[0], di);
5097 di = (struct btrfs_dir_item *)((char *)di + len);
5103 btrfs_release_path(&path);
5107 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5114 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5118 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5122 *size = item_size + index_size;
5126 error("failed to count root %llu INODE[%llu] root size",
5127 root->objectid, ino);
5132 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5133 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5135 * @root: the root of the fs/file tree
5136 * @key: the key of the INODE_REF/INODE_EXTREF
5138 * @size: the st_size of the INODE_ITEM
5139 * @ext_ref: the EXTENDED_IREF feature
5141 * Return 0 if no error occurred.
5142 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5144 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5145 struct btrfs_path *path, u64 *size,
5146 unsigned int ext_ref)
5148 struct btrfs_dir_item *di;
5149 struct btrfs_inode_item *ii;
5150 struct btrfs_key key;
5151 struct btrfs_key location;
5152 struct extent_buffer *node;
5154 char namebuf[BTRFS_NAME_LEN] = {0};
5166 int need_research = 0;
5169 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5170 * ignore index check.
5172 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5173 index = di_key->offset;
5180 /* since after repair, path and the dir item may be changed */
5181 if (need_research) {
5183 err |= DIR_COUNT_AGAIN;
5184 btrfs_release_path(path);
5185 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5186 /* the item was deleted, let path point the last checked item */
5188 if (path->slots[0] == 0)
5189 btrfs_prev_leaf(root, path);
5197 node = path->nodes[0];
5198 slot = path->slots[0];
5200 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5201 total = btrfs_item_size_nr(node, slot);
5202 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5204 while (cur < total) {
5205 data_len = btrfs_dir_data_len(node, di);
5208 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5210 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5211 di_key->objectid, di_key->offset, data_len);
5213 name_len = btrfs_dir_name_len(node, di);
5214 if (name_len <= BTRFS_NAME_LEN) {
5217 len = BTRFS_NAME_LEN;
5218 warning("root %llu %s[%llu %llu] name too long",
5220 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5221 di_key->objectid, di_key->offset);
5223 (*size) += name_len;
5224 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5226 filetype = btrfs_dir_type(node, di);
5228 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5229 di_key->offset != btrfs_name_hash(namebuf, len)) {
5231 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5232 root->objectid, di_key->objectid, di_key->offset,
5233 namebuf, len, filetype, di_key->offset,
5234 btrfs_name_hash(namebuf, len));
5237 btrfs_dir_item_key_to_cpu(node, di, &location);
5238 /* Ignore related ROOT_ITEM check */
5239 if (location.type == BTRFS_ROOT_ITEM_KEY)
5242 btrfs_release_path(path);
5243 /* Check relative INODE_ITEM(existence/filetype) */
5244 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5246 tmp_err |= INODE_ITEM_MISSING;
5250 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5251 struct btrfs_inode_item);
5252 mode = btrfs_inode_mode(path->nodes[0], ii);
5253 if (imode_to_type(mode) != filetype) {
5254 tmp_err |= INODE_ITEM_MISMATCH;
5258 /* Check relative INODE_REF/INODE_EXTREF */
5259 key.objectid = location.objectid;
5260 key.type = BTRFS_INODE_REF_KEY;
5261 key.offset = di_key->objectid;
5262 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5265 /* check relative INDEX/ITEM */
5266 key.objectid = di_key->objectid;
5267 if (key.type == BTRFS_DIR_ITEM_KEY) {
5268 key.type = BTRFS_DIR_INDEX_KEY;
5271 key.type = BTRFS_DIR_ITEM_KEY;
5272 key.offset = btrfs_name_hash(namebuf, name_len);
5275 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5276 name_len, filetype);
5277 /* find_dir_item may find index */
5278 if (key.type == BTRFS_DIR_INDEX_KEY)
5282 if (tmp_err && repair) {
5283 ret = repair_dir_item(root, di_key->objectid,
5284 location.objectid, index,
5285 imode_to_type(mode), namebuf,
5287 if (ret != tmp_err) {
5292 btrfs_release_path(path);
5293 print_dir_item_err(root, di_key, location.objectid, index,
5294 namebuf, name_len, filetype, tmp_err);
5296 len = sizeof(*di) + name_len + data_len;
5297 di = (struct btrfs_dir_item *)((char *)di + len);
5300 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5301 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5302 root->objectid, di_key->objectid,
5309 btrfs_release_path(path);
5310 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5312 err |= ret > 0 ? -ENOENT : ret;
5317 * Wrapper function of btrfs_punch_hole.
5319 * Returns 0 means success.
5320 * Returns not 0 means error.
5322 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5325 struct btrfs_trans_handle *trans;
5328 trans = btrfs_start_transaction(root, 1);
5330 return PTR_ERR(trans);
5332 ret = btrfs_punch_hole(trans, root, ino, start, len);
5334 error("failed to add hole [%llu, %llu] in inode [%llu]",
5337 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5340 btrfs_commit_transaction(trans, root);
5345 * Check file extent datasum/hole, update the size of the file extents,
5346 * check and update the last offset of the file extent.
5348 * @root: the root of fs/file tree.
5349 * @fkey: the key of the file extent.
5350 * @nodatasum: INODE_NODATASUM feature.
5351 * @size: the sum of all EXTENT_DATA items size for this inode.
5352 * @end: the offset of the last extent.
5354 * Return 0 if no error occurred.
5356 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5357 struct extent_buffer *node, int slot,
5358 unsigned int nodatasum, u64 *size, u64 *end)
5360 struct btrfs_file_extent_item *fi;
5363 u64 extent_num_bytes;
5365 u64 csum_found; /* In byte size, sectorsize aligned */
5366 u64 search_start; /* Logical range start we search for csum */
5367 u64 search_len; /* Logical range len we search for csum */
5368 unsigned int extent_type;
5369 unsigned int is_hole;
5374 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5376 /* Check inline extent */
5377 extent_type = btrfs_file_extent_type(node, fi);
5378 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5379 struct btrfs_item *e = btrfs_item_nr(slot);
5380 u32 item_inline_len;
5382 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5383 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5384 compressed = btrfs_file_extent_compression(node, fi);
5385 if (extent_num_bytes == 0) {
5387 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5388 root->objectid, fkey->objectid, fkey->offset);
5389 err |= FILE_EXTENT_ERROR;
5391 if (!compressed && extent_num_bytes != item_inline_len) {
5393 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5394 root->objectid, fkey->objectid, fkey->offset,
5395 extent_num_bytes, item_inline_len);
5396 err |= FILE_EXTENT_ERROR;
5398 *end += extent_num_bytes;
5399 *size += extent_num_bytes;
5403 /* Check extent type */
5404 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5405 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5406 err |= FILE_EXTENT_ERROR;
5407 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5408 root->objectid, fkey->objectid, fkey->offset);
5412 /* Check REG_EXTENT/PREALLOC_EXTENT */
5413 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5414 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5415 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5416 extent_offset = btrfs_file_extent_offset(node, fi);
5417 compressed = btrfs_file_extent_compression(node, fi);
5418 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5421 * Check EXTENT_DATA csum
5423 * For plain (uncompressed) extent, we should only check the range
5424 * we're referring to, as it's possible that part of prealloc extent
5425 * has been written, and has csum:
5427 * |<--- Original large preallocated extent A ---->|
5428 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5431 * For compressed extent, we should check the whole range.
5434 search_start = disk_bytenr + extent_offset;
5435 search_len = extent_num_bytes;
5437 search_start = disk_bytenr;
5438 search_len = disk_num_bytes;
5440 ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5441 if (csum_found > 0 && nodatasum) {
5442 err |= ODD_CSUM_ITEM;
5443 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5444 root->objectid, fkey->objectid, fkey->offset);
5445 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5446 !is_hole && (ret < 0 || csum_found < search_len)) {
5447 err |= CSUM_ITEM_MISSING;
5448 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5449 root->objectid, fkey->objectid, fkey->offset,
5450 csum_found, search_len);
5451 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5452 err |= ODD_CSUM_ITEM;
5453 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5454 root->objectid, fkey->objectid, fkey->offset, csum_found);
5457 /* Check EXTENT_DATA hole */
5458 if (!no_holes && *end != fkey->offset) {
5460 ret = punch_extent_hole(root, fkey->objectid,
5461 *end, fkey->offset - *end);
5462 if (!repair || ret) {
5463 err |= FILE_EXTENT_ERROR;
5465 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5466 root->objectid, fkey->objectid, fkey->offset,
5467 fkey->objectid, *end);
5471 *end += extent_num_bytes;
5473 *size += extent_num_bytes;
5479 * Set inode item nbytes to @nbytes
5481 * Returns 0 on success
5482 * Returns != 0 on error
5484 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5485 struct btrfs_path *path,
5486 u64 ino, u64 nbytes)
5488 struct btrfs_trans_handle *trans;
5489 struct btrfs_inode_item *ii;
5490 struct btrfs_key key;
5491 struct btrfs_key research_key;
5495 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5498 key.type = BTRFS_INODE_ITEM_KEY;
5501 trans = btrfs_start_transaction(root, 1);
5502 if (IS_ERR(trans)) {
5503 ret = PTR_ERR(trans);
5508 btrfs_release_path(path);
5509 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5517 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5518 struct btrfs_inode_item);
5519 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5520 btrfs_mark_buffer_dirty(path->nodes[0]);
5522 btrfs_commit_transaction(trans, root);
5525 error("failed to set nbytes in inode %llu root %llu",
5526 ino, root->root_key.objectid);
5528 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5529 root->root_key.objectid, nbytes);
5532 btrfs_release_path(path);
5533 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5540 * Set directory inode isize to @isize.
5542 * Returns 0 on success.
5543 * Returns != 0 on error.
5545 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5546 struct btrfs_path *path,
5549 struct btrfs_trans_handle *trans;
5550 struct btrfs_inode_item *ii;
5551 struct btrfs_key key;
5552 struct btrfs_key research_key;
5556 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5559 key.type = BTRFS_INODE_ITEM_KEY;
5562 trans = btrfs_start_transaction(root, 1);
5563 if (IS_ERR(trans)) {
5564 ret = PTR_ERR(trans);
5569 btrfs_release_path(path);
5570 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5578 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5579 struct btrfs_inode_item);
5580 btrfs_set_inode_size(path->nodes[0], ii, isize);
5581 btrfs_mark_buffer_dirty(path->nodes[0]);
5583 btrfs_commit_transaction(trans, root);
5586 error("failed to set isize in inode %llu root %llu",
5587 ino, root->root_key.objectid);
5589 printf("Set isize in inode %llu root %llu to %llu\n",
5590 ino, root->root_key.objectid, isize);
5592 btrfs_release_path(path);
5593 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5600 * Wrapper function for btrfs_add_orphan_item().
5602 * Returns 0 on success.
5603 * Returns != 0 on error.
5605 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5606 struct btrfs_path *path, u64 ino)
5608 struct btrfs_trans_handle *trans;
5609 struct btrfs_key research_key;
5613 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5615 trans = btrfs_start_transaction(root, 1);
5616 if (IS_ERR(trans)) {
5617 ret = PTR_ERR(trans);
5622 btrfs_release_path(path);
5623 ret = btrfs_add_orphan_item(trans, root, path, ino);
5625 btrfs_commit_transaction(trans, root);
5628 error("failed to add inode %llu as orphan item root %llu",
5629 ino, root->root_key.objectid);
5631 printf("Added inode %llu as orphan item root %llu\n",
5632 ino, root->root_key.objectid);
5634 btrfs_release_path(path);
5635 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5641 /* Set inode_item nlink to @ref_count.
5642 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5644 * Returns 0 on success
5646 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5647 struct btrfs_path *path, u64 ino,
5648 const char *name, u32 namelen,
5649 u64 ref_count, u8 filetype, u64 *nlink)
5651 struct btrfs_trans_handle *trans;
5652 struct btrfs_inode_item *ii;
5653 struct btrfs_key key;
5654 struct btrfs_key old_key;
5655 char namebuf[BTRFS_NAME_LEN] = {0};
5661 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5663 if (name && namelen) {
5664 ASSERT(namelen <= BTRFS_NAME_LEN);
5665 memcpy(namebuf, name, namelen);
5668 sprintf(namebuf, "%llu", ino);
5669 name_len = count_digits(ino);
5670 printf("Can't find file name for inode %llu, use %s instead\n",
5674 trans = btrfs_start_transaction(root, 1);
5675 if (IS_ERR(trans)) {
5676 ret = PTR_ERR(trans);
5680 btrfs_release_path(path);
5681 /* if refs is 0, put it into lostfound */
5682 if (ref_count == 0) {
5683 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5684 name_len, filetype, &ref_count);
5689 /* reset inode_item's nlink to ref_count */
5691 key.type = BTRFS_INODE_ITEM_KEY;
5694 btrfs_release_path(path);
5695 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5701 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5702 struct btrfs_inode_item);
5703 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5704 btrfs_mark_buffer_dirty(path->nodes[0]);
5709 btrfs_commit_transaction(trans, root);
5713 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5714 root->objectid, ino, namebuf, filetype);
5716 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5717 root->objectid, ino, namebuf, filetype);
5720 btrfs_release_path(path);
5721 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5728 * Check INODE_ITEM and related ITEMs (the same inode number)
5729 * 1. check link count
5730 * 2. check inode ref/extref
5731 * 3. check dir item/index
5733 * @ext_ref: the EXTENDED_IREF feature
5735 * Return 0 if no error occurred.
5736 * Return >0 for error or hit the traversal is done(by error bitmap)
5738 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5739 unsigned int ext_ref)
5741 struct extent_buffer *node;
5742 struct btrfs_inode_item *ii;
5743 struct btrfs_key key;
5744 struct btrfs_key last_key;
5753 u64 extent_size = 0;
5755 unsigned int nodatasum;
5759 char namebuf[BTRFS_NAME_LEN] = {0};
5762 node = path->nodes[0];
5763 slot = path->slots[0];
5765 btrfs_item_key_to_cpu(node, &key, slot);
5766 inode_id = key.objectid;
5768 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5769 ret = btrfs_next_item(root, path);
5775 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5776 isize = btrfs_inode_size(node, ii);
5777 nbytes = btrfs_inode_nbytes(node, ii);
5778 mode = btrfs_inode_mode(node, ii);
5779 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5780 nlink = btrfs_inode_nlink(node, ii);
5781 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5784 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5785 ret = btrfs_next_item(root, path);
5787 /* out will fill 'err' rusing current statistics */
5789 } else if (ret > 0) {
5794 node = path->nodes[0];
5795 slot = path->slots[0];
5796 btrfs_item_key_to_cpu(node, &key, slot);
5797 if (key.objectid != inode_id)
5801 case BTRFS_INODE_REF_KEY:
5802 ret = check_inode_ref(root, &key, path, namebuf,
5803 &name_len, &refs, mode);
5806 case BTRFS_INODE_EXTREF_KEY:
5807 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5808 warning("root %llu EXTREF[%llu %llu] isn't supported",
5809 root->objectid, key.objectid,
5811 ret = check_inode_extref(root, &key, node, slot, &refs,
5815 case BTRFS_DIR_ITEM_KEY:
5816 case BTRFS_DIR_INDEX_KEY:
5818 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5819 root->objectid, inode_id,
5820 imode_to_type(mode), key.objectid,
5823 ret = check_dir_item(root, &key, path, &size, ext_ref);
5826 case BTRFS_EXTENT_DATA_KEY:
5828 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5829 root->objectid, inode_id, key.objectid,
5832 ret = check_file_extent(root, &key, node, slot,
5833 nodatasum, &extent_size,
5837 case BTRFS_XATTR_ITEM_KEY:
5840 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5841 key.objectid, key.type, key.offset);
5846 if (err & LAST_ITEM) {
5847 btrfs_release_path(path);
5848 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5853 /* verify INODE_ITEM nlink/isize/nbytes */
5855 if (repair && (err & DIR_COUNT_AGAIN)) {
5856 err &= ~DIR_COUNT_AGAIN;
5857 count_dir_isize(root, inode_id, &size);
5860 if ((nlink != 1 || refs != 1) && repair) {
5861 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5862 namebuf, name_len, refs, imode_to_type(mode),
5867 err |= LINK_COUNT_ERROR;
5868 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5869 root->objectid, inode_id, nlink);
5873 * Just a warning, as dir inode nbytes is just an
5874 * instructive value.
5876 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5877 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5878 root->objectid, inode_id,
5879 root->fs_info->nodesize);
5882 if (isize != size) {
5884 ret = repair_dir_isize_lowmem(root, path,
5886 if (!repair || ret) {
5889 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5890 root->objectid, inode_id, isize, size);
5894 if (nlink != refs) {
5896 ret = repair_inode_nlinks_lowmem(root, path,
5897 inode_id, namebuf, name_len, refs,
5898 imode_to_type(mode), &nlink);
5899 if (!repair || ret) {
5900 err |= LINK_COUNT_ERROR;
5902 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5903 root->objectid, inode_id, nlink, refs);
5905 } else if (!nlink) {
5907 ret = repair_inode_orphan_item_lowmem(root,
5909 if (!repair || ret) {
5911 error("root %llu INODE[%llu] is orphan item",
5912 root->objectid, inode_id);
5916 if (!nbytes && !no_holes && extent_end < isize) {
5918 ret = punch_extent_hole(root, inode_id,
5919 extent_end, isize - extent_end);
5920 if (!repair || ret) {
5921 err |= NBYTES_ERROR;
5923 "root %llu INODE[%llu] size %llu should have a file extent hole",
5924 root->objectid, inode_id, isize);
5928 if (nbytes != extent_size) {
5930 ret = repair_inode_nbytes_lowmem(root, path,
5931 inode_id, extent_size);
5932 if (!repair || ret) {
5933 err |= NBYTES_ERROR;
5935 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5936 root->objectid, inode_id, nbytes,
5942 if (err & LAST_ITEM)
5943 btrfs_next_item(root, path);
5948 * Insert the missing inode item and inode ref.
5950 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5951 * Root dir should be handled specially because root dir is the root of fs.
5953 * returns err (>0 or 0) after repair
5955 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5957 struct btrfs_trans_handle *trans;
5958 struct btrfs_key key;
5959 struct btrfs_path path;
5960 int filetype = BTRFS_FT_DIR;
5963 btrfs_init_path(&path);
5965 if (err & INODE_REF_MISSING) {
5966 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5967 key.type = BTRFS_INODE_REF_KEY;
5968 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5970 trans = btrfs_start_transaction(root, 1);
5971 if (IS_ERR(trans)) {
5972 ret = PTR_ERR(trans);
5976 btrfs_release_path(&path);
5977 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5981 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5982 BTRFS_FIRST_FREE_OBJECTID,
5983 BTRFS_FIRST_FREE_OBJECTID, 0);
5987 printf("Add INODE_REF[%llu %llu] name %s\n",
5988 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5990 err &= ~INODE_REF_MISSING;
5993 error("fail to insert first inode's ref");
5994 btrfs_commit_transaction(trans, root);
5997 if (err & INODE_ITEM_MISSING) {
5998 ret = repair_inode_item_missing(root,
5999 BTRFS_FIRST_FREE_OBJECTID, filetype);
6002 err &= ~INODE_ITEM_MISSING;
6006 error("fail to repair first inode");
6007 btrfs_release_path(&path);
6012 * check first root dir's inode_item and inode_ref
6014 * returns 0 means no error
6015 * returns >0 means error
6016 * returns <0 means fatal error
6018 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6020 struct btrfs_path path;
6021 struct btrfs_key key;
6022 struct btrfs_inode_item *ii;
6028 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6029 key.type = BTRFS_INODE_ITEM_KEY;
6032 /* For root being dropped, we don't need to check first inode */
6033 if (btrfs_root_refs(&root->root_item) == 0 &&
6034 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6035 BTRFS_FIRST_FREE_OBJECTID)
6038 btrfs_init_path(&path);
6039 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6044 err |= INODE_ITEM_MISSING;
6046 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6047 struct btrfs_inode_item);
6048 mode = btrfs_inode_mode(path.nodes[0], ii);
6049 if (imode_to_type(mode) != BTRFS_FT_DIR)
6050 err |= INODE_ITEM_MISMATCH;
6053 /* lookup first inode ref */
6054 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6055 key.type = BTRFS_INODE_REF_KEY;
6056 /* special index value */
6059 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6065 btrfs_release_path(&path);
6068 err = repair_fs_first_inode(root, err);
6070 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6071 error("root dir INODE_ITEM is %s",
6072 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6073 if (err & INODE_REF_MISSING)
6074 error("root dir INODE_REF is missing");
6076 return ret < 0 ? ret : err;
6079 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6080 u64 parent, u64 root)
6082 struct rb_node *node;
6083 struct tree_backref *back = NULL;
6084 struct tree_backref match = {
6091 match.parent = parent;
6092 match.node.full_backref = 1;
6097 node = rb_search(&rec->backref_tree, &match.node.node,
6098 (rb_compare_keys)compare_extent_backref, NULL);
6100 back = to_tree_backref(rb_node_to_extent_backref(node));
6105 static struct data_backref *find_data_backref(struct extent_record *rec,
6106 u64 parent, u64 root,
6107 u64 owner, u64 offset,
6109 u64 disk_bytenr, u64 bytes)
6111 struct rb_node *node;
6112 struct data_backref *back = NULL;
6113 struct data_backref match = {
6120 .found_ref = found_ref,
6121 .disk_bytenr = disk_bytenr,
6125 match.parent = parent;
6126 match.node.full_backref = 1;
6131 node = rb_search(&rec->backref_tree, &match.node.node,
6132 (rb_compare_keys)compare_extent_backref, NULL);
6134 back = to_data_backref(rb_node_to_extent_backref(node));
6139 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6140 * blocks and integrity of fs tree items.
6142 * @root: the root of the tree to be checked.
6143 * @ext_ref feature EXTENDED_IREF is enable or not.
6144 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6145 * otherwise means check fs tree(s) items relationship and
6146 * @root MUST be a fs tree root.
6147 * Returns 0 represents OK.
6148 * Returns not 0 represents error.
6150 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6151 struct btrfs_root *root, unsigned int ext_ref,
6155 struct btrfs_path path;
6156 struct node_refs nrefs;
6157 struct btrfs_root_item *root_item = &root->root_item;
6162 memset(&nrefs, 0, sizeof(nrefs));
6165 * We need to manually check the first inode item (256)
6166 * As the following traversal function will only start from
6167 * the first inode item in the leaf, if inode item (256) is
6168 * missing we will skip it forever.
6170 ret = check_fs_first_inode(root, ext_ref);
6176 level = btrfs_header_level(root->node);
6177 btrfs_init_path(&path);
6179 if (btrfs_root_refs(root_item) > 0 ||
6180 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6181 path.nodes[level] = root->node;
6182 path.slots[level] = 0;
6183 extent_buffer_get(root->node);
6185 struct btrfs_key key;
6187 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6188 level = root_item->drop_level;
6189 path.lowest_level = level;
6190 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6197 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6198 ext_ref, check_all);
6202 /* if ret is negative, walk shall stop */
6208 ret = walk_up_tree_v2(root, &path, &level);
6210 /* Normal exit, reset ret to err */
6217 btrfs_release_path(&path);
6222 * Iterate all items in the tree and call check_inode_item() to check.
6224 * @root: the root of the tree to be checked.
6225 * @ext_ref: the EXTENDED_IREF feature
6227 * Return 0 if no error found.
6228 * Return <0 for error.
6230 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6232 reset_cached_block_groups(root->fs_info);
6233 return check_btrfs_root(NULL, root, ext_ref, 0);
6237 * Find the relative ref for root_ref and root_backref.
6239 * @root: the root of the root tree.
6240 * @ref_key: the key of the root ref.
6242 * Return 0 if no error occurred.
6244 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6245 struct extent_buffer *node, int slot)
6247 struct btrfs_path path;
6248 struct btrfs_key key;
6249 struct btrfs_root_ref *ref;
6250 struct btrfs_root_ref *backref;
6251 char ref_name[BTRFS_NAME_LEN] = {0};
6252 char backref_name[BTRFS_NAME_LEN] = {0};
6258 u32 backref_namelen;
6263 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6264 ref_dirid = btrfs_root_ref_dirid(node, ref);
6265 ref_seq = btrfs_root_ref_sequence(node, ref);
6266 ref_namelen = btrfs_root_ref_name_len(node, ref);
6268 if (ref_namelen <= BTRFS_NAME_LEN) {
6271 len = BTRFS_NAME_LEN;
6272 warning("%s[%llu %llu] ref_name too long",
6273 ref_key->type == BTRFS_ROOT_REF_KEY ?
6274 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6277 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6279 /* Find relative root_ref */
6280 key.objectid = ref_key->offset;
6281 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6282 key.offset = ref_key->objectid;
6284 btrfs_init_path(&path);
6285 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6287 err |= ROOT_REF_MISSING;
6288 error("%s[%llu %llu] couldn't find relative ref",
6289 ref_key->type == BTRFS_ROOT_REF_KEY ?
6290 "ROOT_REF" : "ROOT_BACKREF",
6291 ref_key->objectid, ref_key->offset);
6295 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6296 struct btrfs_root_ref);
6297 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6298 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6299 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6301 if (backref_namelen <= BTRFS_NAME_LEN) {
6302 len = backref_namelen;
6304 len = BTRFS_NAME_LEN;
6305 warning("%s[%llu %llu] ref_name too long",
6306 key.type == BTRFS_ROOT_REF_KEY ?
6307 "ROOT_REF" : "ROOT_BACKREF",
6308 key.objectid, key.offset);
6310 read_extent_buffer(path.nodes[0], backref_name,
6311 (unsigned long)(backref + 1), len);
6313 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6314 ref_namelen != backref_namelen ||
6315 strncmp(ref_name, backref_name, len)) {
6316 err |= ROOT_REF_MISMATCH;
6317 error("%s[%llu %llu] mismatch relative ref",
6318 ref_key->type == BTRFS_ROOT_REF_KEY ?
6319 "ROOT_REF" : "ROOT_BACKREF",
6320 ref_key->objectid, ref_key->offset);
6323 btrfs_release_path(&path);
6328 * Check all fs/file tree in low_memory mode.
6330 * 1. for fs tree root item, call check_fs_root_v2()
6331 * 2. for fs tree root ref/backref, call check_root_ref()
6333 * Return 0 if no error occurred.
6335 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6337 struct btrfs_root *tree_root = fs_info->tree_root;
6338 struct btrfs_root *cur_root = NULL;
6339 struct btrfs_path path;
6340 struct btrfs_key key;
6341 struct extent_buffer *node;
6342 unsigned int ext_ref;
6347 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6349 btrfs_init_path(&path);
6350 key.objectid = BTRFS_FS_TREE_OBJECTID;
6352 key.type = BTRFS_ROOT_ITEM_KEY;
6354 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6358 } else if (ret > 0) {
6364 node = path.nodes[0];
6365 slot = path.slots[0];
6366 btrfs_item_key_to_cpu(node, &key, slot);
6367 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6369 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6370 fs_root_objectid(key.objectid)) {
6371 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6372 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6375 key.offset = (u64)-1;
6376 cur_root = btrfs_read_fs_root(fs_info, &key);
6379 if (IS_ERR(cur_root)) {
6380 error("Fail to read fs/subvol tree: %lld",
6386 ret = check_fs_root_v2(cur_root, ext_ref);
6389 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6390 btrfs_free_fs_root(cur_root);
6391 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6392 key.type == BTRFS_ROOT_BACKREF_KEY) {
6393 ret = check_root_ref(tree_root, &key, node, slot);
6397 ret = btrfs_next_item(tree_root, &path);
6407 btrfs_release_path(&path);
6411 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6412 struct cache_tree *root_cache)
6416 if (!ctx.progress_enabled)
6417 fprintf(stderr, "checking fs roots\n");
6418 if (check_mode == CHECK_MODE_LOWMEM)
6419 ret = check_fs_roots_v2(fs_info);
6421 ret = check_fs_roots(fs_info, root_cache);
6426 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6428 struct extent_backref *back, *tmp;
6429 struct tree_backref *tback;
6430 struct data_backref *dback;
6434 rbtree_postorder_for_each_entry_safe(back, tmp,
6435 &rec->backref_tree, node) {
6436 if (!back->found_extent_tree) {
6440 if (back->is_data) {
6441 dback = to_data_backref(back);
6442 fprintf(stderr, "Data backref %llu %s %llu"
6443 " owner %llu offset %llu num_refs %lu"
6444 " not found in extent tree\n",
6445 (unsigned long long)rec->start,
6446 back->full_backref ?
6448 back->full_backref ?
6449 (unsigned long long)dback->parent:
6450 (unsigned long long)dback->root,
6451 (unsigned long long)dback->owner,
6452 (unsigned long long)dback->offset,
6453 (unsigned long)dback->num_refs);
6455 tback = to_tree_backref(back);
6456 fprintf(stderr, "Tree backref %llu parent %llu"
6457 " root %llu not found in extent tree\n",
6458 (unsigned long long)rec->start,
6459 (unsigned long long)tback->parent,
6460 (unsigned long long)tback->root);
6463 if (!back->is_data && !back->found_ref) {
6467 tback = to_tree_backref(back);
6468 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6469 (unsigned long long)rec->start,
6470 back->full_backref ? "parent" : "root",
6471 back->full_backref ?
6472 (unsigned long long)tback->parent :
6473 (unsigned long long)tback->root, back);
6475 if (back->is_data) {
6476 dback = to_data_backref(back);
6477 if (dback->found_ref != dback->num_refs) {
6481 fprintf(stderr, "Incorrect local backref count"
6482 " on %llu %s %llu owner %llu"
6483 " offset %llu found %u wanted %u back %p\n",
6484 (unsigned long long)rec->start,
6485 back->full_backref ?
6487 back->full_backref ?
6488 (unsigned long long)dback->parent:
6489 (unsigned long long)dback->root,
6490 (unsigned long long)dback->owner,
6491 (unsigned long long)dback->offset,
6492 dback->found_ref, dback->num_refs, back);
6494 if (dback->disk_bytenr != rec->start) {
6498 fprintf(stderr, "Backref disk bytenr does not"
6499 " match extent record, bytenr=%llu, "
6500 "ref bytenr=%llu\n",
6501 (unsigned long long)rec->start,
6502 (unsigned long long)dback->disk_bytenr);
6505 if (dback->bytes != rec->nr) {
6509 fprintf(stderr, "Backref bytes do not match "
6510 "extent backref, bytenr=%llu, ref "
6511 "bytes=%llu, backref bytes=%llu\n",
6512 (unsigned long long)rec->start,
6513 (unsigned long long)rec->nr,
6514 (unsigned long long)dback->bytes);
6517 if (!back->is_data) {
6520 dback = to_data_backref(back);
6521 found += dback->found_ref;
6524 if (found != rec->refs) {
6528 fprintf(stderr, "Incorrect global backref count "
6529 "on %llu found %llu wanted %llu\n",
6530 (unsigned long long)rec->start,
6531 (unsigned long long)found,
6532 (unsigned long long)rec->refs);
6538 static void __free_one_backref(struct rb_node *node)
6540 struct extent_backref *back = rb_node_to_extent_backref(node);
6545 static void free_all_extent_backrefs(struct extent_record *rec)
6547 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6550 static void free_extent_record_cache(struct cache_tree *extent_cache)
6552 struct cache_extent *cache;
6553 struct extent_record *rec;
6556 cache = first_cache_extent(extent_cache);
6559 rec = container_of(cache, struct extent_record, cache);
6560 remove_cache_extent(extent_cache, cache);
6561 free_all_extent_backrefs(rec);
6566 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6567 struct extent_record *rec)
6569 if (rec->content_checked && rec->owner_ref_checked &&
6570 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6571 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6572 !rec->bad_full_backref && !rec->crossing_stripes &&
6573 !rec->wrong_chunk_type) {
6574 remove_cache_extent(extent_cache, &rec->cache);
6575 free_all_extent_backrefs(rec);
6576 list_del_init(&rec->list);
6582 static int check_owner_ref(struct btrfs_root *root,
6583 struct extent_record *rec,
6584 struct extent_buffer *buf)
6586 struct extent_backref *node, *tmp;
6587 struct tree_backref *back;
6588 struct btrfs_root *ref_root;
6589 struct btrfs_key key;
6590 struct btrfs_path path;
6591 struct extent_buffer *parent;
6596 rbtree_postorder_for_each_entry_safe(node, tmp,
6597 &rec->backref_tree, node) {
6600 if (!node->found_ref)
6602 if (node->full_backref)
6604 back = to_tree_backref(node);
6605 if (btrfs_header_owner(buf) == back->root)
6608 BUG_ON(rec->is_root);
6610 /* try to find the block by search corresponding fs tree */
6611 key.objectid = btrfs_header_owner(buf);
6612 key.type = BTRFS_ROOT_ITEM_KEY;
6613 key.offset = (u64)-1;
6615 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6616 if (IS_ERR(ref_root))
6619 level = btrfs_header_level(buf);
6621 btrfs_item_key_to_cpu(buf, &key, 0);
6623 btrfs_node_key_to_cpu(buf, &key, 0);
6625 btrfs_init_path(&path);
6626 path.lowest_level = level + 1;
6627 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6631 parent = path.nodes[level + 1];
6632 if (parent && buf->start == btrfs_node_blockptr(parent,
6633 path.slots[level + 1]))
6636 btrfs_release_path(&path);
6637 return found ? 0 : 1;
6640 static int is_extent_tree_record(struct extent_record *rec)
6642 struct extent_backref *node, *tmp;
6643 struct tree_backref *back;
6646 rbtree_postorder_for_each_entry_safe(node, tmp,
6647 &rec->backref_tree, node) {
6650 back = to_tree_backref(node);
6651 if (node->full_backref)
6653 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6660 static int record_bad_block_io(struct btrfs_fs_info *info,
6661 struct cache_tree *extent_cache,
6664 struct extent_record *rec;
6665 struct cache_extent *cache;
6666 struct btrfs_key key;
6668 cache = lookup_cache_extent(extent_cache, start, len);
6672 rec = container_of(cache, struct extent_record, cache);
6673 if (!is_extent_tree_record(rec))
6676 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6677 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6680 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6681 struct extent_buffer *buf, int slot)
6683 if (btrfs_header_level(buf)) {
6684 struct btrfs_key_ptr ptr1, ptr2;
6686 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6687 sizeof(struct btrfs_key_ptr));
6688 read_extent_buffer(buf, &ptr2,
6689 btrfs_node_key_ptr_offset(slot + 1),
6690 sizeof(struct btrfs_key_ptr));
6691 write_extent_buffer(buf, &ptr1,
6692 btrfs_node_key_ptr_offset(slot + 1),
6693 sizeof(struct btrfs_key_ptr));
6694 write_extent_buffer(buf, &ptr2,
6695 btrfs_node_key_ptr_offset(slot),
6696 sizeof(struct btrfs_key_ptr));
6698 struct btrfs_disk_key key;
6699 btrfs_node_key(buf, &key, 0);
6700 btrfs_fixup_low_keys(root, path, &key,
6701 btrfs_header_level(buf) + 1);
6704 struct btrfs_item *item1, *item2;
6705 struct btrfs_key k1, k2;
6706 char *item1_data, *item2_data;
6707 u32 item1_offset, item2_offset, item1_size, item2_size;
6709 item1 = btrfs_item_nr(slot);
6710 item2 = btrfs_item_nr(slot + 1);
6711 btrfs_item_key_to_cpu(buf, &k1, slot);
6712 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6713 item1_offset = btrfs_item_offset(buf, item1);
6714 item2_offset = btrfs_item_offset(buf, item2);
6715 item1_size = btrfs_item_size(buf, item1);
6716 item2_size = btrfs_item_size(buf, item2);
6718 item1_data = malloc(item1_size);
6721 item2_data = malloc(item2_size);
6727 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6728 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6730 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6731 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6735 btrfs_set_item_offset(buf, item1, item2_offset);
6736 btrfs_set_item_offset(buf, item2, item1_offset);
6737 btrfs_set_item_size(buf, item1, item2_size);
6738 btrfs_set_item_size(buf, item2, item1_size);
6740 path->slots[0] = slot;
6741 btrfs_set_item_key_unsafe(root, path, &k2);
6742 path->slots[0] = slot + 1;
6743 btrfs_set_item_key_unsafe(root, path, &k1);
6748 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6750 struct extent_buffer *buf;
6751 struct btrfs_key k1, k2;
6753 int level = path->lowest_level;
6756 buf = path->nodes[level];
6757 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6759 btrfs_node_key_to_cpu(buf, &k1, i);
6760 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6762 btrfs_item_key_to_cpu(buf, &k1, i);
6763 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6765 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6767 ret = swap_values(root, path, buf, i);
6770 btrfs_mark_buffer_dirty(buf);
6776 static int delete_bogus_item(struct btrfs_root *root,
6777 struct btrfs_path *path,
6778 struct extent_buffer *buf, int slot)
6780 struct btrfs_key key;
6781 int nritems = btrfs_header_nritems(buf);
6783 btrfs_item_key_to_cpu(buf, &key, slot);
6785 /* These are all the keys we can deal with missing. */
6786 if (key.type != BTRFS_DIR_INDEX_KEY &&
6787 key.type != BTRFS_EXTENT_ITEM_KEY &&
6788 key.type != BTRFS_METADATA_ITEM_KEY &&
6789 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6790 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6793 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6794 (unsigned long long)key.objectid, key.type,
6795 (unsigned long long)key.offset, slot, buf->start);
6796 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6797 btrfs_item_nr_offset(slot + 1),
6798 sizeof(struct btrfs_item) *
6799 (nritems - slot - 1));
6800 btrfs_set_header_nritems(buf, nritems - 1);
6802 struct btrfs_disk_key disk_key;
6804 btrfs_item_key(buf, &disk_key, 0);
6805 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6807 btrfs_mark_buffer_dirty(buf);
6811 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6813 struct extent_buffer *buf;
6817 /* We should only get this for leaves */
6818 BUG_ON(path->lowest_level);
6819 buf = path->nodes[0];
6821 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6822 unsigned int shift = 0, offset;
6824 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6825 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6826 if (btrfs_item_end_nr(buf, i) >
6827 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6828 ret = delete_bogus_item(root, path, buf, i);
6831 fprintf(stderr, "item is off the end of the "
6832 "leaf, can't fix\n");
6836 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6837 btrfs_item_end_nr(buf, i);
6838 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6839 btrfs_item_offset_nr(buf, i - 1)) {
6840 if (btrfs_item_end_nr(buf, i) >
6841 btrfs_item_offset_nr(buf, i - 1)) {
6842 ret = delete_bogus_item(root, path, buf, i);
6845 fprintf(stderr, "items overlap, can't fix\n");
6849 shift = btrfs_item_offset_nr(buf, i - 1) -
6850 btrfs_item_end_nr(buf, i);
6855 printf("Shifting item nr %d by %u bytes in block %llu\n",
6856 i, shift, (unsigned long long)buf->start);
6857 offset = btrfs_item_offset_nr(buf, i);
6858 memmove_extent_buffer(buf,
6859 btrfs_leaf_data(buf) + offset + shift,
6860 btrfs_leaf_data(buf) + offset,
6861 btrfs_item_size_nr(buf, i));
6862 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6864 btrfs_mark_buffer_dirty(buf);
6868 * We may have moved things, in which case we want to exit so we don't
6869 * write those changes out. Once we have proper abort functionality in
6870 * progs this can be changed to something nicer.
6877 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6878 * then just return -EIO.
6880 static int try_to_fix_bad_block(struct btrfs_root *root,
6881 struct extent_buffer *buf,
6882 enum btrfs_tree_block_status status)
6884 struct btrfs_trans_handle *trans;
6885 struct ulist *roots;
6886 struct ulist_node *node;
6887 struct btrfs_root *search_root;
6888 struct btrfs_path path;
6889 struct ulist_iterator iter;
6890 struct btrfs_key root_key, key;
6893 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6894 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6897 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6901 btrfs_init_path(&path);
6902 ULIST_ITER_INIT(&iter);
6903 while ((node = ulist_next(roots, &iter))) {
6904 root_key.objectid = node->val;
6905 root_key.type = BTRFS_ROOT_ITEM_KEY;
6906 root_key.offset = (u64)-1;
6908 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6915 trans = btrfs_start_transaction(search_root, 0);
6916 if (IS_ERR(trans)) {
6917 ret = PTR_ERR(trans);
6921 path.lowest_level = btrfs_header_level(buf);
6922 path.skip_check_block = 1;
6923 if (path.lowest_level)
6924 btrfs_node_key_to_cpu(buf, &key, 0);
6926 btrfs_item_key_to_cpu(buf, &key, 0);
6927 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6930 btrfs_commit_transaction(trans, search_root);
6933 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6934 ret = fix_key_order(search_root, &path);
6935 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6936 ret = fix_item_offset(search_root, &path);
6938 btrfs_commit_transaction(trans, search_root);
6941 btrfs_release_path(&path);
6942 btrfs_commit_transaction(trans, search_root);
6945 btrfs_release_path(&path);
6949 static int check_block(struct btrfs_root *root,
6950 struct cache_tree *extent_cache,
6951 struct extent_buffer *buf, u64 flags)
6953 struct extent_record *rec;
6954 struct cache_extent *cache;
6955 struct btrfs_key key;
6956 enum btrfs_tree_block_status status;
6960 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6963 rec = container_of(cache, struct extent_record, cache);
6964 rec->generation = btrfs_header_generation(buf);
6966 level = btrfs_header_level(buf);
6967 if (btrfs_header_nritems(buf) > 0) {
6970 btrfs_item_key_to_cpu(buf, &key, 0);
6972 btrfs_node_key_to_cpu(buf, &key, 0);
6974 rec->info_objectid = key.objectid;
6976 rec->info_level = level;
6978 if (btrfs_is_leaf(buf))
6979 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6981 status = btrfs_check_node(root, &rec->parent_key, buf);
6983 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6985 status = try_to_fix_bad_block(root, buf, status);
6986 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6988 fprintf(stderr, "bad block %llu\n",
6989 (unsigned long long)buf->start);
6992 * Signal to callers we need to start the scan over
6993 * again since we'll have cowed blocks.
6998 rec->content_checked = 1;
6999 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7000 rec->owner_ref_checked = 1;
7002 ret = check_owner_ref(root, rec, buf);
7004 rec->owner_ref_checked = 1;
7008 maybe_free_extent_rec(extent_cache, rec);
7013 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7014 u64 parent, u64 root)
7016 struct list_head *cur = rec->backrefs.next;
7017 struct extent_backref *node;
7018 struct tree_backref *back;
7020 while(cur != &rec->backrefs) {
7021 node = to_extent_backref(cur);
7025 back = to_tree_backref(node);
7027 if (!node->full_backref)
7029 if (parent == back->parent)
7032 if (node->full_backref)
7034 if (back->root == root)
7042 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7043 u64 parent, u64 root)
7045 struct tree_backref *ref = malloc(sizeof(*ref));
7049 memset(&ref->node, 0, sizeof(ref->node));
7051 ref->parent = parent;
7052 ref->node.full_backref = 1;
7055 ref->node.full_backref = 0;
7062 static struct data_backref *find_data_backref(struct extent_record *rec,
7063 u64 parent, u64 root,
7064 u64 owner, u64 offset,
7066 u64 disk_bytenr, u64 bytes)
7068 struct list_head *cur = rec->backrefs.next;
7069 struct extent_backref *node;
7070 struct data_backref *back;
7072 while(cur != &rec->backrefs) {
7073 node = to_extent_backref(cur);
7077 back = to_data_backref(node);
7079 if (!node->full_backref)
7081 if (parent == back->parent)
7084 if (node->full_backref)
7086 if (back->root == root && back->owner == owner &&
7087 back->offset == offset) {
7088 if (found_ref && node->found_ref &&
7089 (back->bytes != bytes ||
7090 back->disk_bytenr != disk_bytenr))
7100 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7101 u64 parent, u64 root,
7102 u64 owner, u64 offset,
7105 struct data_backref *ref = malloc(sizeof(*ref));
7109 memset(&ref->node, 0, sizeof(ref->node));
7110 ref->node.is_data = 1;
7113 ref->parent = parent;
7116 ref->node.full_backref = 1;
7120 ref->offset = offset;
7121 ref->node.full_backref = 0;
7123 ref->bytes = max_size;
7126 if (max_size > rec->max_size)
7127 rec->max_size = max_size;
7131 /* Check if the type of extent matches with its chunk */
7132 static void check_extent_type(struct extent_record *rec)
7134 struct btrfs_block_group_cache *bg_cache;
7136 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7140 /* data extent, check chunk directly*/
7141 if (!rec->metadata) {
7142 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7143 rec->wrong_chunk_type = 1;
7147 /* metadata extent, check the obvious case first */
7148 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7149 BTRFS_BLOCK_GROUP_METADATA))) {
7150 rec->wrong_chunk_type = 1;
7155 * Check SYSTEM extent, as it's also marked as metadata, we can only
7156 * make sure it's a SYSTEM extent by its backref
7158 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7159 struct extent_backref *node;
7160 struct tree_backref *tback;
7163 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7164 if (node->is_data) {
7165 /* tree block shouldn't have data backref */
7166 rec->wrong_chunk_type = 1;
7169 tback = container_of(node, struct tree_backref, node);
7171 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7172 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7174 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7175 if (!(bg_cache->flags & bg_type))
7176 rec->wrong_chunk_type = 1;
7181 * Allocate a new extent record, fill default values from @tmpl and insert int
7182 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7183 * the cache, otherwise it fails.
7185 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7186 struct extent_record *tmpl)
7188 struct extent_record *rec;
7191 BUG_ON(tmpl->max_size == 0);
7192 rec = malloc(sizeof(*rec));
7195 rec->start = tmpl->start;
7196 rec->max_size = tmpl->max_size;
7197 rec->nr = max(tmpl->nr, tmpl->max_size);
7198 rec->found_rec = tmpl->found_rec;
7199 rec->content_checked = tmpl->content_checked;
7200 rec->owner_ref_checked = tmpl->owner_ref_checked;
7201 rec->num_duplicates = 0;
7202 rec->metadata = tmpl->metadata;
7203 rec->flag_block_full_backref = FLAG_UNSET;
7204 rec->bad_full_backref = 0;
7205 rec->crossing_stripes = 0;
7206 rec->wrong_chunk_type = 0;
7207 rec->is_root = tmpl->is_root;
7208 rec->refs = tmpl->refs;
7209 rec->extent_item_refs = tmpl->extent_item_refs;
7210 rec->parent_generation = tmpl->parent_generation;
7211 INIT_LIST_HEAD(&rec->backrefs);
7212 INIT_LIST_HEAD(&rec->dups);
7213 INIT_LIST_HEAD(&rec->list);
7214 rec->backref_tree = RB_ROOT;
7215 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7216 rec->cache.start = tmpl->start;
7217 rec->cache.size = tmpl->nr;
7218 ret = insert_cache_extent(extent_cache, &rec->cache);
7223 bytes_used += rec->nr;
7226 rec->crossing_stripes = check_crossing_stripes(global_info,
7227 rec->start, global_info->nodesize);
7228 check_extent_type(rec);
7233 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7235 * - refs - if found, increase refs
7236 * - is_root - if found, set
7237 * - content_checked - if found, set
7238 * - owner_ref_checked - if found, set
7240 * If not found, create a new one, initialize and insert.
7242 static int add_extent_rec(struct cache_tree *extent_cache,
7243 struct extent_record *tmpl)
7245 struct extent_record *rec;
7246 struct cache_extent *cache;
7250 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7252 rec = container_of(cache, struct extent_record, cache);
7256 rec->nr = max(tmpl->nr, tmpl->max_size);
7259 * We need to make sure to reset nr to whatever the extent
7260 * record says was the real size, this way we can compare it to
7263 if (tmpl->found_rec) {
7264 if (tmpl->start != rec->start || rec->found_rec) {
7265 struct extent_record *tmp;
7268 if (list_empty(&rec->list))
7269 list_add_tail(&rec->list,
7270 &duplicate_extents);
7273 * We have to do this song and dance in case we
7274 * find an extent record that falls inside of
7275 * our current extent record but does not have
7276 * the same objectid.
7278 tmp = malloc(sizeof(*tmp));
7281 tmp->start = tmpl->start;
7282 tmp->max_size = tmpl->max_size;
7285 tmp->metadata = tmpl->metadata;
7286 tmp->extent_item_refs = tmpl->extent_item_refs;
7287 INIT_LIST_HEAD(&tmp->list);
7288 list_add_tail(&tmp->list, &rec->dups);
7289 rec->num_duplicates++;
7296 if (tmpl->extent_item_refs && !dup) {
7297 if (rec->extent_item_refs) {
7298 fprintf(stderr, "block %llu rec "
7299 "extent_item_refs %llu, passed %llu\n",
7300 (unsigned long long)tmpl->start,
7301 (unsigned long long)
7302 rec->extent_item_refs,
7303 (unsigned long long)tmpl->extent_item_refs);
7305 rec->extent_item_refs = tmpl->extent_item_refs;
7309 if (tmpl->content_checked)
7310 rec->content_checked = 1;
7311 if (tmpl->owner_ref_checked)
7312 rec->owner_ref_checked = 1;
7313 memcpy(&rec->parent_key, &tmpl->parent_key,
7314 sizeof(tmpl->parent_key));
7315 if (tmpl->parent_generation)
7316 rec->parent_generation = tmpl->parent_generation;
7317 if (rec->max_size < tmpl->max_size)
7318 rec->max_size = tmpl->max_size;
7321 * A metadata extent can't cross stripe_len boundary, otherwise
7322 * kernel scrub won't be able to handle it.
7323 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7327 rec->crossing_stripes = check_crossing_stripes(
7328 global_info, rec->start,
7329 global_info->nodesize);
7330 check_extent_type(rec);
7331 maybe_free_extent_rec(extent_cache, rec);
7335 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7340 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7341 u64 parent, u64 root, int found_ref)
7343 struct extent_record *rec;
7344 struct tree_backref *back;
7345 struct cache_extent *cache;
7347 bool insert = false;
7349 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7351 struct extent_record tmpl;
7353 memset(&tmpl, 0, sizeof(tmpl));
7354 tmpl.start = bytenr;
7359 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7363 /* really a bug in cache_extent implement now */
7364 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7369 rec = container_of(cache, struct extent_record, cache);
7370 if (rec->start != bytenr) {
7372 * Several cause, from unaligned bytenr to over lapping extents
7377 back = find_tree_backref(rec, parent, root);
7379 back = alloc_tree_backref(rec, parent, root);
7386 if (back->node.found_ref) {
7387 fprintf(stderr, "Extent back ref already exists "
7388 "for %llu parent %llu root %llu \n",
7389 (unsigned long long)bytenr,
7390 (unsigned long long)parent,
7391 (unsigned long long)root);
7393 back->node.found_ref = 1;
7395 if (back->node.found_extent_tree) {
7396 fprintf(stderr, "Extent back ref already exists "
7397 "for %llu parent %llu root %llu \n",
7398 (unsigned long long)bytenr,
7399 (unsigned long long)parent,
7400 (unsigned long long)root);
7402 back->node.found_extent_tree = 1;
7405 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7406 compare_extent_backref));
7407 check_extent_type(rec);
7408 maybe_free_extent_rec(extent_cache, rec);
7412 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7413 u64 parent, u64 root, u64 owner, u64 offset,
7414 u32 num_refs, int found_ref, u64 max_size)
7416 struct extent_record *rec;
7417 struct data_backref *back;
7418 struct cache_extent *cache;
7420 bool insert = false;
7422 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7424 struct extent_record tmpl;
7426 memset(&tmpl, 0, sizeof(tmpl));
7427 tmpl.start = bytenr;
7429 tmpl.max_size = max_size;
7431 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7435 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7440 rec = container_of(cache, struct extent_record, cache);
7441 if (rec->max_size < max_size)
7442 rec->max_size = max_size;
7445 * If found_ref is set then max_size is the real size and must match the
7446 * existing refs. So if we have already found a ref then we need to
7447 * make sure that this ref matches the existing one, otherwise we need
7448 * to add a new backref so we can notice that the backrefs don't match
7449 * and we need to figure out who is telling the truth. This is to
7450 * account for that awful fsync bug I introduced where we'd end up with
7451 * a btrfs_file_extent_item that would have its length include multiple
7452 * prealloc extents or point inside of a prealloc extent.
7454 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7457 back = alloc_data_backref(rec, parent, root, owner, offset,
7464 BUG_ON(num_refs != 1);
7465 if (back->node.found_ref)
7466 BUG_ON(back->bytes != max_size);
7467 back->node.found_ref = 1;
7468 back->found_ref += 1;
7469 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7470 back->bytes = max_size;
7471 back->disk_bytenr = bytenr;
7473 /* Need to reinsert if not already in the tree */
7475 rb_erase(&back->node.node, &rec->backref_tree);
7480 rec->content_checked = 1;
7481 rec->owner_ref_checked = 1;
7483 if (back->node.found_extent_tree) {
7484 fprintf(stderr, "Extent back ref already exists "
7485 "for %llu parent %llu root %llu "
7486 "owner %llu offset %llu num_refs %lu\n",
7487 (unsigned long long)bytenr,
7488 (unsigned long long)parent,
7489 (unsigned long long)root,
7490 (unsigned long long)owner,
7491 (unsigned long long)offset,
7492 (unsigned long)num_refs);
7494 back->num_refs = num_refs;
7495 back->node.found_extent_tree = 1;
7498 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7499 compare_extent_backref));
7501 maybe_free_extent_rec(extent_cache, rec);
7505 static int add_pending(struct cache_tree *pending,
7506 struct cache_tree *seen, u64 bytenr, u32 size)
7509 ret = add_cache_extent(seen, bytenr, size);
7512 add_cache_extent(pending, bytenr, size);
7516 static int pick_next_pending(struct cache_tree *pending,
7517 struct cache_tree *reada,
7518 struct cache_tree *nodes,
7519 u64 last, struct block_info *bits, int bits_nr,
7522 unsigned long node_start = last;
7523 struct cache_extent *cache;
7526 cache = search_cache_extent(reada, 0);
7528 bits[0].start = cache->start;
7529 bits[0].size = cache->size;
7534 if (node_start > 32768)
7535 node_start -= 32768;
7537 cache = search_cache_extent(nodes, node_start);
7539 cache = search_cache_extent(nodes, 0);
7542 cache = search_cache_extent(pending, 0);
7547 bits[ret].start = cache->start;
7548 bits[ret].size = cache->size;
7549 cache = next_cache_extent(cache);
7551 } while (cache && ret < bits_nr);
7557 bits[ret].start = cache->start;
7558 bits[ret].size = cache->size;
7559 cache = next_cache_extent(cache);
7561 } while (cache && ret < bits_nr);
7563 if (bits_nr - ret > 8) {
7564 u64 lookup = bits[0].start + bits[0].size;
7565 struct cache_extent *next;
7566 next = search_cache_extent(pending, lookup);
7568 if (next->start - lookup > 32768)
7570 bits[ret].start = next->start;
7571 bits[ret].size = next->size;
7572 lookup = next->start + next->size;
7576 next = next_cache_extent(next);
7584 static void free_chunk_record(struct cache_extent *cache)
7586 struct chunk_record *rec;
7588 rec = container_of(cache, struct chunk_record, cache);
7589 list_del_init(&rec->list);
7590 list_del_init(&rec->dextents);
7594 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7596 cache_tree_free_extents(chunk_cache, free_chunk_record);
7599 static void free_device_record(struct rb_node *node)
7601 struct device_record *rec;
7603 rec = container_of(node, struct device_record, node);
7607 FREE_RB_BASED_TREE(device_cache, free_device_record);
7609 int insert_block_group_record(struct block_group_tree *tree,
7610 struct block_group_record *bg_rec)
7614 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7618 list_add_tail(&bg_rec->list, &tree->block_groups);
7622 static void free_block_group_record(struct cache_extent *cache)
7624 struct block_group_record *rec;
7626 rec = container_of(cache, struct block_group_record, cache);
7627 list_del_init(&rec->list);
7631 void free_block_group_tree(struct block_group_tree *tree)
7633 cache_tree_free_extents(&tree->tree, free_block_group_record);
7636 int insert_device_extent_record(struct device_extent_tree *tree,
7637 struct device_extent_record *de_rec)
7642 * Device extent is a bit different from the other extents, because
7643 * the extents which belong to the different devices may have the
7644 * same start and size, so we need use the special extent cache
7645 * search/insert functions.
7647 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7651 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7652 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7656 static void free_device_extent_record(struct cache_extent *cache)
7658 struct device_extent_record *rec;
7660 rec = container_of(cache, struct device_extent_record, cache);
7661 if (!list_empty(&rec->chunk_list))
7662 list_del_init(&rec->chunk_list);
7663 if (!list_empty(&rec->device_list))
7664 list_del_init(&rec->device_list);
7668 void free_device_extent_tree(struct device_extent_tree *tree)
7670 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7673 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7674 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7675 struct extent_buffer *leaf, int slot)
7677 struct btrfs_extent_ref_v0 *ref0;
7678 struct btrfs_key key;
7681 btrfs_item_key_to_cpu(leaf, &key, slot);
7682 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7683 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7684 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7687 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7688 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7694 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7695 struct btrfs_key *key,
7698 struct btrfs_chunk *ptr;
7699 struct chunk_record *rec;
7702 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7703 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7705 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7707 fprintf(stderr, "memory allocation failed\n");
7711 INIT_LIST_HEAD(&rec->list);
7712 INIT_LIST_HEAD(&rec->dextents);
7715 rec->cache.start = key->offset;
7716 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7718 rec->generation = btrfs_header_generation(leaf);
7720 rec->objectid = key->objectid;
7721 rec->type = key->type;
7722 rec->offset = key->offset;
7724 rec->length = rec->cache.size;
7725 rec->owner = btrfs_chunk_owner(leaf, ptr);
7726 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7727 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7728 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7729 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7730 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7731 rec->num_stripes = num_stripes;
7732 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7734 for (i = 0; i < rec->num_stripes; ++i) {
7735 rec->stripes[i].devid =
7736 btrfs_stripe_devid_nr(leaf, ptr, i);
7737 rec->stripes[i].offset =
7738 btrfs_stripe_offset_nr(leaf, ptr, i);
7739 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7740 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7747 static int process_chunk_item(struct cache_tree *chunk_cache,
7748 struct btrfs_key *key, struct extent_buffer *eb,
7751 struct chunk_record *rec;
7752 struct btrfs_chunk *chunk;
7755 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7757 * Do extra check for this chunk item,
7759 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7760 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7761 * and owner<->key_type check.
7763 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7766 error("chunk(%llu, %llu) is not valid, ignore it",
7767 key->offset, btrfs_chunk_length(eb, chunk));
7770 rec = btrfs_new_chunk_record(eb, key, slot);
7771 ret = insert_cache_extent(chunk_cache, &rec->cache);
7773 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7774 rec->offset, rec->length);
7781 static int process_device_item(struct rb_root *dev_cache,
7782 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7784 struct btrfs_dev_item *ptr;
7785 struct device_record *rec;
7788 ptr = btrfs_item_ptr(eb,
7789 slot, struct btrfs_dev_item);
7791 rec = malloc(sizeof(*rec));
7793 fprintf(stderr, "memory allocation failed\n");
7797 rec->devid = key->offset;
7798 rec->generation = btrfs_header_generation(eb);
7800 rec->objectid = key->objectid;
7801 rec->type = key->type;
7802 rec->offset = key->offset;
7804 rec->devid = btrfs_device_id(eb, ptr);
7805 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7806 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7808 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7810 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7817 struct block_group_record *
7818 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7821 struct btrfs_block_group_item *ptr;
7822 struct block_group_record *rec;
7824 rec = calloc(1, sizeof(*rec));
7826 fprintf(stderr, "memory allocation failed\n");
7830 rec->cache.start = key->objectid;
7831 rec->cache.size = key->offset;
7833 rec->generation = btrfs_header_generation(leaf);
7835 rec->objectid = key->objectid;
7836 rec->type = key->type;
7837 rec->offset = key->offset;
7839 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7840 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7842 INIT_LIST_HEAD(&rec->list);
7847 static int process_block_group_item(struct block_group_tree *block_group_cache,
7848 struct btrfs_key *key,
7849 struct extent_buffer *eb, int slot)
7851 struct block_group_record *rec;
7854 rec = btrfs_new_block_group_record(eb, key, slot);
7855 ret = insert_block_group_record(block_group_cache, rec);
7857 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7858 rec->objectid, rec->offset);
7865 struct device_extent_record *
7866 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7867 struct btrfs_key *key, int slot)
7869 struct device_extent_record *rec;
7870 struct btrfs_dev_extent *ptr;
7872 rec = calloc(1, sizeof(*rec));
7874 fprintf(stderr, "memory allocation failed\n");
7878 rec->cache.objectid = key->objectid;
7879 rec->cache.start = key->offset;
7881 rec->generation = btrfs_header_generation(leaf);
7883 rec->objectid = key->objectid;
7884 rec->type = key->type;
7885 rec->offset = key->offset;
7887 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7888 rec->chunk_objecteid =
7889 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7891 btrfs_dev_extent_chunk_offset(leaf, ptr);
7892 rec->length = btrfs_dev_extent_length(leaf, ptr);
7893 rec->cache.size = rec->length;
7895 INIT_LIST_HEAD(&rec->chunk_list);
7896 INIT_LIST_HEAD(&rec->device_list);
7902 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7903 struct btrfs_key *key, struct extent_buffer *eb,
7906 struct device_extent_record *rec;
7909 rec = btrfs_new_device_extent_record(eb, key, slot);
7910 ret = insert_device_extent_record(dev_extent_cache, rec);
7913 "Device extent[%llu, %llu, %llu] existed.\n",
7914 rec->objectid, rec->offset, rec->length);
7921 static int process_extent_item(struct btrfs_root *root,
7922 struct cache_tree *extent_cache,
7923 struct extent_buffer *eb, int slot)
7925 struct btrfs_extent_item *ei;
7926 struct btrfs_extent_inline_ref *iref;
7927 struct btrfs_extent_data_ref *dref;
7928 struct btrfs_shared_data_ref *sref;
7929 struct btrfs_key key;
7930 struct extent_record tmpl;
7935 u32 item_size = btrfs_item_size_nr(eb, slot);
7941 btrfs_item_key_to_cpu(eb, &key, slot);
7943 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7945 num_bytes = root->fs_info->nodesize;
7947 num_bytes = key.offset;
7950 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7951 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7952 key.objectid, root->fs_info->sectorsize);
7955 if (item_size < sizeof(*ei)) {
7956 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7957 struct btrfs_extent_item_v0 *ei0;
7958 if (item_size != sizeof(*ei0)) {
7960 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7961 key.objectid, key.type, key.offset,
7962 btrfs_header_bytenr(eb), slot);
7965 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7966 refs = btrfs_extent_refs_v0(eb, ei0);
7970 memset(&tmpl, 0, sizeof(tmpl));
7971 tmpl.start = key.objectid;
7972 tmpl.nr = num_bytes;
7973 tmpl.extent_item_refs = refs;
7974 tmpl.metadata = metadata;
7976 tmpl.max_size = num_bytes;
7978 return add_extent_rec(extent_cache, &tmpl);
7981 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7982 refs = btrfs_extent_refs(eb, ei);
7983 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7987 if (metadata && num_bytes != root->fs_info->nodesize) {
7988 error("ignore invalid metadata extent, length %llu does not equal to %u",
7989 num_bytes, root->fs_info->nodesize);
7992 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7993 error("ignore invalid data extent, length %llu is not aligned to %u",
7994 num_bytes, root->fs_info->sectorsize);
7998 memset(&tmpl, 0, sizeof(tmpl));
7999 tmpl.start = key.objectid;
8000 tmpl.nr = num_bytes;
8001 tmpl.extent_item_refs = refs;
8002 tmpl.metadata = metadata;
8004 tmpl.max_size = num_bytes;
8005 add_extent_rec(extent_cache, &tmpl);
8007 ptr = (unsigned long)(ei + 1);
8008 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8009 key.type == BTRFS_EXTENT_ITEM_KEY)
8010 ptr += sizeof(struct btrfs_tree_block_info);
8012 end = (unsigned long)ei + item_size;
8014 iref = (struct btrfs_extent_inline_ref *)ptr;
8015 type = btrfs_extent_inline_ref_type(eb, iref);
8016 offset = btrfs_extent_inline_ref_offset(eb, iref);
8018 case BTRFS_TREE_BLOCK_REF_KEY:
8019 ret = add_tree_backref(extent_cache, key.objectid,
8023 "add_tree_backref failed (extent items tree block): %s",
8026 case BTRFS_SHARED_BLOCK_REF_KEY:
8027 ret = add_tree_backref(extent_cache, key.objectid,
8031 "add_tree_backref failed (extent items shared block): %s",
8034 case BTRFS_EXTENT_DATA_REF_KEY:
8035 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8036 add_data_backref(extent_cache, key.objectid, 0,
8037 btrfs_extent_data_ref_root(eb, dref),
8038 btrfs_extent_data_ref_objectid(eb,
8040 btrfs_extent_data_ref_offset(eb, dref),
8041 btrfs_extent_data_ref_count(eb, dref),
8044 case BTRFS_SHARED_DATA_REF_KEY:
8045 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8046 add_data_backref(extent_cache, key.objectid, offset,
8048 btrfs_shared_data_ref_count(eb, sref),
8052 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8053 key.objectid, key.type, num_bytes);
8056 ptr += btrfs_extent_inline_ref_size(type);
8063 static int check_cache_range(struct btrfs_root *root,
8064 struct btrfs_block_group_cache *cache,
8065 u64 offset, u64 bytes)
8067 struct btrfs_free_space *entry;
8073 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8074 bytenr = btrfs_sb_offset(i);
8075 ret = btrfs_rmap_block(root->fs_info,
8076 cache->key.objectid, bytenr, 0,
8077 &logical, &nr, &stripe_len);
8082 if (logical[nr] + stripe_len <= offset)
8084 if (offset + bytes <= logical[nr])
8086 if (logical[nr] == offset) {
8087 if (stripe_len >= bytes) {
8091 bytes -= stripe_len;
8092 offset += stripe_len;
8093 } else if (logical[nr] < offset) {
8094 if (logical[nr] + stripe_len >=
8099 bytes = (offset + bytes) -
8100 (logical[nr] + stripe_len);
8101 offset = logical[nr] + stripe_len;
8104 * Could be tricky, the super may land in the
8105 * middle of the area we're checking. First
8106 * check the easiest case, it's at the end.
8108 if (logical[nr] + stripe_len >=
8110 bytes = logical[nr] - offset;
8114 /* Check the left side */
8115 ret = check_cache_range(root, cache,
8117 logical[nr] - offset);
8123 /* Now we continue with the right side */
8124 bytes = (offset + bytes) -
8125 (logical[nr] + stripe_len);
8126 offset = logical[nr] + stripe_len;
8133 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8135 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8136 offset, offset+bytes);
8140 if (entry->offset != offset) {
8141 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8146 if (entry->bytes != bytes) {
8147 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8148 bytes, entry->bytes, offset);
8152 unlink_free_space(cache->free_space_ctl, entry);
8157 static int verify_space_cache(struct btrfs_root *root,
8158 struct btrfs_block_group_cache *cache)
8160 struct btrfs_path path;
8161 struct extent_buffer *leaf;
8162 struct btrfs_key key;
8166 root = root->fs_info->extent_root;
8168 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8170 btrfs_init_path(&path);
8171 key.objectid = last;
8173 key.type = BTRFS_EXTENT_ITEM_KEY;
8174 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8179 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8180 ret = btrfs_next_leaf(root, &path);
8188 leaf = path.nodes[0];
8189 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8190 if (key.objectid >= cache->key.offset + cache->key.objectid)
8192 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8193 key.type != BTRFS_METADATA_ITEM_KEY) {
8198 if (last == key.objectid) {
8199 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8200 last = key.objectid + key.offset;
8202 last = key.objectid + root->fs_info->nodesize;
8207 ret = check_cache_range(root, cache, last,
8208 key.objectid - last);
8211 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8212 last = key.objectid + key.offset;
8214 last = key.objectid + root->fs_info->nodesize;
8218 if (last < cache->key.objectid + cache->key.offset)
8219 ret = check_cache_range(root, cache, last,
8220 cache->key.objectid +
8221 cache->key.offset - last);
8224 btrfs_release_path(&path);
8227 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8228 fprintf(stderr, "There are still entries left in the space "
8236 static int check_space_cache(struct btrfs_root *root)
8238 struct btrfs_block_group_cache *cache;
8239 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8243 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8244 btrfs_super_generation(root->fs_info->super_copy) !=
8245 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8246 printf("cache and super generation don't match, space cache "
8247 "will be invalidated\n");
8251 if (ctx.progress_enabled) {
8252 ctx.tp = TASK_FREE_SPACE;
8253 task_start(ctx.info);
8257 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8261 start = cache->key.objectid + cache->key.offset;
8262 if (!cache->free_space_ctl) {
8263 if (btrfs_init_free_space_ctl(cache,
8264 root->fs_info->sectorsize)) {
8269 btrfs_remove_free_space_cache(cache);
8272 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8273 ret = exclude_super_stripes(root, cache);
8275 fprintf(stderr, "could not exclude super stripes: %s\n",
8280 ret = load_free_space_tree(root->fs_info, cache);
8281 free_excluded_extents(root, cache);
8283 fprintf(stderr, "could not load free space tree: %s\n",
8290 ret = load_free_space_cache(root->fs_info, cache);
8295 ret = verify_space_cache(root, cache);
8297 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8298 cache->key.objectid);
8303 task_stop(ctx.info);
8305 return error ? -EINVAL : 0;
8308 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8309 u64 num_bytes, unsigned long leaf_offset,
8310 struct extent_buffer *eb) {
8312 struct btrfs_fs_info *fs_info = root->fs_info;
8314 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8316 unsigned long csum_offset;
8320 u64 data_checked = 0;
8326 if (num_bytes % fs_info->sectorsize)
8329 data = malloc(num_bytes);
8333 while (offset < num_bytes) {
8336 read_len = num_bytes - offset;
8337 /* read as much space once a time */
8338 ret = read_extent_data(fs_info, data + offset,
8339 bytenr + offset, &read_len, mirror);
8343 /* verify every 4k data's checksum */
8344 while (data_checked < read_len) {
8346 tmp = offset + data_checked;
8348 csum = btrfs_csum_data((char *)data + tmp,
8349 csum, fs_info->sectorsize);
8350 btrfs_csum_final(csum, (u8 *)&csum);
8352 csum_offset = leaf_offset +
8353 tmp / fs_info->sectorsize * csum_size;
8354 read_extent_buffer(eb, (char *)&csum_expected,
8355 csum_offset, csum_size);
8356 /* try another mirror */
8357 if (csum != csum_expected) {
8358 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8359 mirror, bytenr + tmp,
8360 csum, csum_expected);
8361 num_copies = btrfs_num_copies(root->fs_info,
8363 if (mirror < num_copies - 1) {
8368 data_checked += fs_info->sectorsize;
8377 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8380 struct btrfs_path path;
8381 struct extent_buffer *leaf;
8382 struct btrfs_key key;
8385 btrfs_init_path(&path);
8386 key.objectid = bytenr;
8387 key.type = BTRFS_EXTENT_ITEM_KEY;
8388 key.offset = (u64)-1;
8391 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8394 fprintf(stderr, "Error looking up extent record %d\n", ret);
8395 btrfs_release_path(&path);
8398 if (path.slots[0] > 0) {
8401 ret = btrfs_prev_leaf(root, &path);
8404 } else if (ret > 0) {
8411 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8414 * Block group items come before extent items if they have the same
8415 * bytenr, so walk back one more just in case. Dear future traveller,
8416 * first congrats on mastering time travel. Now if it's not too much
8417 * trouble could you go back to 2006 and tell Chris to make the
8418 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8419 * EXTENT_ITEM_KEY please?
8421 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8422 if (path.slots[0] > 0) {
8425 ret = btrfs_prev_leaf(root, &path);
8428 } else if (ret > 0) {
8433 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8437 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8438 ret = btrfs_next_leaf(root, &path);
8440 fprintf(stderr, "Error going to next leaf "
8442 btrfs_release_path(&path);
8448 leaf = path.nodes[0];
8449 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8450 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8454 if (key.objectid + key.offset < bytenr) {
8458 if (key.objectid > bytenr + num_bytes)
8461 if (key.objectid == bytenr) {
8462 if (key.offset >= num_bytes) {
8466 num_bytes -= key.offset;
8467 bytenr += key.offset;
8468 } else if (key.objectid < bytenr) {
8469 if (key.objectid + key.offset >= bytenr + num_bytes) {
8473 num_bytes = (bytenr + num_bytes) -
8474 (key.objectid + key.offset);
8475 bytenr = key.objectid + key.offset;
8477 if (key.objectid + key.offset < bytenr + num_bytes) {
8478 u64 new_start = key.objectid + key.offset;
8479 u64 new_bytes = bytenr + num_bytes - new_start;
8482 * Weird case, the extent is in the middle of
8483 * our range, we'll have to search one side
8484 * and then the other. Not sure if this happens
8485 * in real life, but no harm in coding it up
8486 * anyway just in case.
8488 btrfs_release_path(&path);
8489 ret = check_extent_exists(root, new_start,
8492 fprintf(stderr, "Right section didn't "
8496 num_bytes = key.objectid - bytenr;
8499 num_bytes = key.objectid - bytenr;
8506 if (num_bytes && !ret) {
8507 fprintf(stderr, "There are no extents for csum range "
8508 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8512 btrfs_release_path(&path);
8516 static int check_csums(struct btrfs_root *root)
8518 struct btrfs_path path;
8519 struct extent_buffer *leaf;
8520 struct btrfs_key key;
8521 u64 offset = 0, num_bytes = 0;
8522 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8526 unsigned long leaf_offset;
8528 root = root->fs_info->csum_root;
8529 if (!extent_buffer_uptodate(root->node)) {
8530 fprintf(stderr, "No valid csum tree found\n");
8534 btrfs_init_path(&path);
8535 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8536 key.type = BTRFS_EXTENT_CSUM_KEY;
8538 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8540 fprintf(stderr, "Error searching csum tree %d\n", ret);
8541 btrfs_release_path(&path);
8545 if (ret > 0 && path.slots[0])
8550 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8551 ret = btrfs_next_leaf(root, &path);
8553 fprintf(stderr, "Error going to next leaf "
8560 leaf = path.nodes[0];
8562 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8563 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8568 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8569 csum_size) * root->fs_info->sectorsize;
8570 if (!check_data_csum)
8571 goto skip_csum_check;
8572 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8573 ret = check_extent_csums(root, key.offset, data_len,
8579 offset = key.offset;
8580 } else if (key.offset != offset + num_bytes) {
8581 ret = check_extent_exists(root, offset, num_bytes);
8583 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8584 "there is no extent record\n",
8585 offset, offset+num_bytes);
8588 offset = key.offset;
8591 num_bytes += data_len;
8595 btrfs_release_path(&path);
8599 static int is_dropped_key(struct btrfs_key *key,
8600 struct btrfs_key *drop_key) {
8601 if (key->objectid < drop_key->objectid)
8603 else if (key->objectid == drop_key->objectid) {
8604 if (key->type < drop_key->type)
8606 else if (key->type == drop_key->type) {
8607 if (key->offset < drop_key->offset)
8615 * Here are the rules for FULL_BACKREF.
8617 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8618 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8620 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8621 * if it happened after the relocation occurred since we'll have dropped the
8622 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8623 * have no real way to know for sure.
8625 * We process the blocks one root at a time, and we start from the lowest root
8626 * objectid and go to the highest. So we can just lookup the owner backref for
8627 * the record and if we don't find it then we know it doesn't exist and we have
8630 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8631 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8632 * be set or not and then we can check later once we've gathered all the refs.
8634 static int calc_extent_flag(struct cache_tree *extent_cache,
8635 struct extent_buffer *buf,
8636 struct root_item_record *ri,
8639 struct extent_record *rec;
8640 struct cache_extent *cache;
8641 struct tree_backref *tback;
8644 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8645 /* we have added this extent before */
8649 rec = container_of(cache, struct extent_record, cache);
8652 * Except file/reloc tree, we can not have
8655 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8660 if (buf->start == ri->bytenr)
8663 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8666 owner = btrfs_header_owner(buf);
8667 if (owner == ri->objectid)
8670 tback = find_tree_backref(rec, 0, owner);
8675 if (rec->flag_block_full_backref != FLAG_UNSET &&
8676 rec->flag_block_full_backref != 0)
8677 rec->bad_full_backref = 1;
8680 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8681 if (rec->flag_block_full_backref != FLAG_UNSET &&
8682 rec->flag_block_full_backref != 1)
8683 rec->bad_full_backref = 1;
8687 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8689 fprintf(stderr, "Invalid key type(");
8690 print_key_type(stderr, 0, key_type);
8691 fprintf(stderr, ") found in root(");
8692 print_objectid(stderr, rootid, 0);
8693 fprintf(stderr, ")\n");
8697 * Check if the key is valid with its extent buffer.
8699 * This is a early check in case invalid key exists in a extent buffer
8700 * This is not comprehensive yet, but should prevent wrong key/item passed
8703 static int check_type_with_root(u64 rootid, u8 key_type)
8706 /* Only valid in chunk tree */
8707 case BTRFS_DEV_ITEM_KEY:
8708 case BTRFS_CHUNK_ITEM_KEY:
8709 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8712 /* valid in csum and log tree */
8713 case BTRFS_CSUM_TREE_OBJECTID:
8714 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8718 case BTRFS_EXTENT_ITEM_KEY:
8719 case BTRFS_METADATA_ITEM_KEY:
8720 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8721 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8724 case BTRFS_ROOT_ITEM_KEY:
8725 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8728 case BTRFS_DEV_EXTENT_KEY:
8729 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8735 report_mismatch_key_root(key_type, rootid);
8739 static int run_next_block(struct btrfs_root *root,
8740 struct block_info *bits,
8743 struct cache_tree *pending,
8744 struct cache_tree *seen,
8745 struct cache_tree *reada,
8746 struct cache_tree *nodes,
8747 struct cache_tree *extent_cache,
8748 struct cache_tree *chunk_cache,
8749 struct rb_root *dev_cache,
8750 struct block_group_tree *block_group_cache,
8751 struct device_extent_tree *dev_extent_cache,
8752 struct root_item_record *ri)
8754 struct btrfs_fs_info *fs_info = root->fs_info;
8755 struct extent_buffer *buf;
8756 struct extent_record *rec = NULL;
8767 struct btrfs_key key;
8768 struct cache_extent *cache;
8771 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8772 bits_nr, &reada_bits);
8777 for(i = 0; i < nritems; i++) {
8778 ret = add_cache_extent(reada, bits[i].start,
8783 /* fixme, get the parent transid */
8784 readahead_tree_block(fs_info, bits[i].start, 0);
8787 *last = bits[0].start;
8788 bytenr = bits[0].start;
8789 size = bits[0].size;
8791 cache = lookup_cache_extent(pending, bytenr, size);
8793 remove_cache_extent(pending, cache);
8796 cache = lookup_cache_extent(reada, bytenr, size);
8798 remove_cache_extent(reada, cache);
8801 cache = lookup_cache_extent(nodes, bytenr, size);
8803 remove_cache_extent(nodes, cache);
8806 cache = lookup_cache_extent(extent_cache, bytenr, size);
8808 rec = container_of(cache, struct extent_record, cache);
8809 gen = rec->parent_generation;
8812 /* fixme, get the real parent transid */
8813 buf = read_tree_block(root->fs_info, bytenr, gen);
8814 if (!extent_buffer_uptodate(buf)) {
8815 record_bad_block_io(root->fs_info,
8816 extent_cache, bytenr, size);
8820 nritems = btrfs_header_nritems(buf);
8823 if (!init_extent_tree) {
8824 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8825 btrfs_header_level(buf), 1, NULL,
8828 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8830 fprintf(stderr, "Couldn't calc extent flags\n");
8831 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8836 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8838 fprintf(stderr, "Couldn't calc extent flags\n");
8839 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8843 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8845 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8846 ri->objectid == btrfs_header_owner(buf)) {
8848 * Ok we got to this block from it's original owner and
8849 * we have FULL_BACKREF set. Relocation can leave
8850 * converted blocks over so this is altogether possible,
8851 * however it's not possible if the generation > the
8852 * last snapshot, so check for this case.
8854 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8855 btrfs_header_generation(buf) > ri->last_snapshot) {
8856 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8857 rec->bad_full_backref = 1;
8862 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8863 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8864 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8865 rec->bad_full_backref = 1;
8869 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8870 rec->flag_block_full_backref = 1;
8874 rec->flag_block_full_backref = 0;
8876 owner = btrfs_header_owner(buf);
8879 ret = check_block(root, extent_cache, buf, flags);
8883 if (btrfs_is_leaf(buf)) {
8884 btree_space_waste += btrfs_leaf_free_space(root, buf);
8885 for (i = 0; i < nritems; i++) {
8886 struct btrfs_file_extent_item *fi;
8887 btrfs_item_key_to_cpu(buf, &key, i);
8889 * Check key type against the leaf owner.
8890 * Could filter quite a lot of early error if
8893 if (check_type_with_root(btrfs_header_owner(buf),
8895 fprintf(stderr, "ignoring invalid key\n");
8898 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8899 process_extent_item(root, extent_cache, buf,
8903 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8904 process_extent_item(root, extent_cache, buf,
8908 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8910 btrfs_item_size_nr(buf, i);
8913 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8914 process_chunk_item(chunk_cache, &key, buf, i);
8917 if (key.type == BTRFS_DEV_ITEM_KEY) {
8918 process_device_item(dev_cache, &key, buf, i);
8921 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8922 process_block_group_item(block_group_cache,
8926 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8927 process_device_extent_item(dev_extent_cache,
8932 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8933 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8934 process_extent_ref_v0(extent_cache, buf, i);
8941 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8942 ret = add_tree_backref(extent_cache,
8943 key.objectid, 0, key.offset, 0);
8946 "add_tree_backref failed (leaf tree block): %s",
8950 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8951 ret = add_tree_backref(extent_cache,
8952 key.objectid, key.offset, 0, 0);
8955 "add_tree_backref failed (leaf shared block): %s",
8959 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8960 struct btrfs_extent_data_ref *ref;
8961 ref = btrfs_item_ptr(buf, i,
8962 struct btrfs_extent_data_ref);
8963 add_data_backref(extent_cache,
8965 btrfs_extent_data_ref_root(buf, ref),
8966 btrfs_extent_data_ref_objectid(buf,
8968 btrfs_extent_data_ref_offset(buf, ref),
8969 btrfs_extent_data_ref_count(buf, ref),
8970 0, root->fs_info->sectorsize);
8973 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8974 struct btrfs_shared_data_ref *ref;
8975 ref = btrfs_item_ptr(buf, i,
8976 struct btrfs_shared_data_ref);
8977 add_data_backref(extent_cache,
8978 key.objectid, key.offset, 0, 0, 0,
8979 btrfs_shared_data_ref_count(buf, ref),
8980 0, root->fs_info->sectorsize);
8983 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8984 struct bad_item *bad;
8986 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8990 bad = malloc(sizeof(struct bad_item));
8993 INIT_LIST_HEAD(&bad->list);
8994 memcpy(&bad->key, &key,
8995 sizeof(struct btrfs_key));
8996 bad->root_id = owner;
8997 list_add_tail(&bad->list, &delete_items);
9000 if (key.type != BTRFS_EXTENT_DATA_KEY)
9002 fi = btrfs_item_ptr(buf, i,
9003 struct btrfs_file_extent_item);
9004 if (btrfs_file_extent_type(buf, fi) ==
9005 BTRFS_FILE_EXTENT_INLINE)
9007 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9010 data_bytes_allocated +=
9011 btrfs_file_extent_disk_num_bytes(buf, fi);
9012 if (data_bytes_allocated < root->fs_info->sectorsize) {
9015 data_bytes_referenced +=
9016 btrfs_file_extent_num_bytes(buf, fi);
9017 add_data_backref(extent_cache,
9018 btrfs_file_extent_disk_bytenr(buf, fi),
9019 parent, owner, key.objectid, key.offset -
9020 btrfs_file_extent_offset(buf, fi), 1, 1,
9021 btrfs_file_extent_disk_num_bytes(buf, fi));
9025 struct btrfs_key first_key;
9027 first_key.objectid = 0;
9030 btrfs_item_key_to_cpu(buf, &first_key, 0);
9031 level = btrfs_header_level(buf);
9032 for (i = 0; i < nritems; i++) {
9033 struct extent_record tmpl;
9035 ptr = btrfs_node_blockptr(buf, i);
9036 size = root->fs_info->nodesize;
9037 btrfs_node_key_to_cpu(buf, &key, i);
9039 if ((level == ri->drop_level)
9040 && is_dropped_key(&key, &ri->drop_key)) {
9045 memset(&tmpl, 0, sizeof(tmpl));
9046 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9047 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9052 tmpl.max_size = size;
9053 ret = add_extent_rec(extent_cache, &tmpl);
9057 ret = add_tree_backref(extent_cache, ptr, parent,
9061 "add_tree_backref failed (non-leaf block): %s",
9067 add_pending(nodes, seen, ptr, size);
9069 add_pending(pending, seen, ptr, size);
9072 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9073 nritems) * sizeof(struct btrfs_key_ptr);
9075 total_btree_bytes += buf->len;
9076 if (fs_root_objectid(btrfs_header_owner(buf)))
9077 total_fs_tree_bytes += buf->len;
9078 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9079 total_extent_tree_bytes += buf->len;
9081 free_extent_buffer(buf);
9085 static int add_root_to_pending(struct extent_buffer *buf,
9086 struct cache_tree *extent_cache,
9087 struct cache_tree *pending,
9088 struct cache_tree *seen,
9089 struct cache_tree *nodes,
9092 struct extent_record tmpl;
9095 if (btrfs_header_level(buf) > 0)
9096 add_pending(nodes, seen, buf->start, buf->len);
9098 add_pending(pending, seen, buf->start, buf->len);
9100 memset(&tmpl, 0, sizeof(tmpl));
9101 tmpl.start = buf->start;
9106 tmpl.max_size = buf->len;
9107 add_extent_rec(extent_cache, &tmpl);
9109 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9110 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9111 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9114 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9119 /* as we fix the tree, we might be deleting blocks that
9120 * we're tracking for repair. This hook makes sure we
9121 * remove any backrefs for blocks as we are fixing them.
9123 static int free_extent_hook(struct btrfs_trans_handle *trans,
9124 struct btrfs_root *root,
9125 u64 bytenr, u64 num_bytes, u64 parent,
9126 u64 root_objectid, u64 owner, u64 offset,
9129 struct extent_record *rec;
9130 struct cache_extent *cache;
9132 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9134 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9135 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9139 rec = container_of(cache, struct extent_record, cache);
9141 struct data_backref *back;
9142 back = find_data_backref(rec, parent, root_objectid, owner,
9143 offset, 1, bytenr, num_bytes);
9146 if (back->node.found_ref) {
9147 back->found_ref -= refs_to_drop;
9149 rec->refs -= refs_to_drop;
9151 if (back->node.found_extent_tree) {
9152 back->num_refs -= refs_to_drop;
9153 if (rec->extent_item_refs)
9154 rec->extent_item_refs -= refs_to_drop;
9156 if (back->found_ref == 0)
9157 back->node.found_ref = 0;
9158 if (back->num_refs == 0)
9159 back->node.found_extent_tree = 0;
9161 if (!back->node.found_extent_tree && back->node.found_ref) {
9162 rb_erase(&back->node.node, &rec->backref_tree);
9166 struct tree_backref *back;
9167 back = find_tree_backref(rec, parent, root_objectid);
9170 if (back->node.found_ref) {
9173 back->node.found_ref = 0;
9175 if (back->node.found_extent_tree) {
9176 if (rec->extent_item_refs)
9177 rec->extent_item_refs--;
9178 back->node.found_extent_tree = 0;
9180 if (!back->node.found_extent_tree && back->node.found_ref) {
9181 rb_erase(&back->node.node, &rec->backref_tree);
9185 maybe_free_extent_rec(extent_cache, rec);
9190 static int delete_extent_records(struct btrfs_trans_handle *trans,
9191 struct btrfs_root *root,
9192 struct btrfs_path *path,
9195 struct btrfs_key key;
9196 struct btrfs_key found_key;
9197 struct extent_buffer *leaf;
9202 key.objectid = bytenr;
9204 key.offset = (u64)-1;
9207 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9214 if (path->slots[0] == 0)
9220 leaf = path->nodes[0];
9221 slot = path->slots[0];
9223 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9224 if (found_key.objectid != bytenr)
9227 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9228 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9229 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9230 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9231 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9232 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9233 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9234 btrfs_release_path(path);
9235 if (found_key.type == 0) {
9236 if (found_key.offset == 0)
9238 key.offset = found_key.offset - 1;
9239 key.type = found_key.type;
9241 key.type = found_key.type - 1;
9242 key.offset = (u64)-1;
9246 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9247 found_key.objectid, found_key.type, found_key.offset);
9249 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9252 btrfs_release_path(path);
9254 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9255 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9256 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9257 found_key.offset : root->fs_info->nodesize;
9259 ret = btrfs_update_block_group(root, bytenr,
9266 btrfs_release_path(path);
9271 * for a single backref, this will allocate a new extent
9272 * and add the backref to it.
9274 static int record_extent(struct btrfs_trans_handle *trans,
9275 struct btrfs_fs_info *info,
9276 struct btrfs_path *path,
9277 struct extent_record *rec,
9278 struct extent_backref *back,
9279 int allocated, u64 flags)
9282 struct btrfs_root *extent_root = info->extent_root;
9283 struct extent_buffer *leaf;
9284 struct btrfs_key ins_key;
9285 struct btrfs_extent_item *ei;
9286 struct data_backref *dback;
9287 struct btrfs_tree_block_info *bi;
9290 rec->max_size = max_t(u64, rec->max_size,
9294 u32 item_size = sizeof(*ei);
9297 item_size += sizeof(*bi);
9299 ins_key.objectid = rec->start;
9300 ins_key.offset = rec->max_size;
9301 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9303 ret = btrfs_insert_empty_item(trans, extent_root, path,
9304 &ins_key, item_size);
9308 leaf = path->nodes[0];
9309 ei = btrfs_item_ptr(leaf, path->slots[0],
9310 struct btrfs_extent_item);
9312 btrfs_set_extent_refs(leaf, ei, 0);
9313 btrfs_set_extent_generation(leaf, ei, rec->generation);
9315 if (back->is_data) {
9316 btrfs_set_extent_flags(leaf, ei,
9317 BTRFS_EXTENT_FLAG_DATA);
9319 struct btrfs_disk_key copy_key;;
9321 bi = (struct btrfs_tree_block_info *)(ei + 1);
9322 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9325 btrfs_set_disk_key_objectid(©_key,
9326 rec->info_objectid);
9327 btrfs_set_disk_key_type(©_key, 0);
9328 btrfs_set_disk_key_offset(©_key, 0);
9330 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9331 btrfs_set_tree_block_key(leaf, bi, ©_key);
9333 btrfs_set_extent_flags(leaf, ei,
9334 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9337 btrfs_mark_buffer_dirty(leaf);
9338 ret = btrfs_update_block_group(extent_root, rec->start,
9339 rec->max_size, 1, 0);
9342 btrfs_release_path(path);
9345 if (back->is_data) {
9349 dback = to_data_backref(back);
9350 if (back->full_backref)
9351 parent = dback->parent;
9355 for (i = 0; i < dback->found_ref; i++) {
9356 /* if parent != 0, we're doing a full backref
9357 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9358 * just makes the backref allocator create a data
9361 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9362 rec->start, rec->max_size,
9366 BTRFS_FIRST_FREE_OBJECTID :
9372 fprintf(stderr, "adding new data backref"
9373 " on %llu %s %llu owner %llu"
9374 " offset %llu found %d\n",
9375 (unsigned long long)rec->start,
9376 back->full_backref ?
9378 back->full_backref ?
9379 (unsigned long long)parent :
9380 (unsigned long long)dback->root,
9381 (unsigned long long)dback->owner,
9382 (unsigned long long)dback->offset,
9386 struct tree_backref *tback;
9388 tback = to_tree_backref(back);
9389 if (back->full_backref)
9390 parent = tback->parent;
9394 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9395 rec->start, rec->max_size,
9396 parent, tback->root, 0, 0);
9397 fprintf(stderr, "adding new tree backref on "
9398 "start %llu len %llu parent %llu root %llu\n",
9399 rec->start, rec->max_size, parent, tback->root);
9402 btrfs_release_path(path);
9406 static struct extent_entry *find_entry(struct list_head *entries,
9407 u64 bytenr, u64 bytes)
9409 struct extent_entry *entry = NULL;
9411 list_for_each_entry(entry, entries, list) {
9412 if (entry->bytenr == bytenr && entry->bytes == bytes)
9419 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9421 struct extent_entry *entry, *best = NULL, *prev = NULL;
9423 list_for_each_entry(entry, entries, list) {
9425 * If there are as many broken entries as entries then we know
9426 * not to trust this particular entry.
9428 if (entry->broken == entry->count)
9432 * Special case, when there are only two entries and 'best' is
9442 * If our current entry == best then we can't be sure our best
9443 * is really the best, so we need to keep searching.
9445 if (best && best->count == entry->count) {
9451 /* Prev == entry, not good enough, have to keep searching */
9452 if (!prev->broken && prev->count == entry->count)
9456 best = (prev->count > entry->count) ? prev : entry;
9457 else if (best->count < entry->count)
9465 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9466 struct data_backref *dback, struct extent_entry *entry)
9468 struct btrfs_trans_handle *trans;
9469 struct btrfs_root *root;
9470 struct btrfs_file_extent_item *fi;
9471 struct extent_buffer *leaf;
9472 struct btrfs_key key;
9476 key.objectid = dback->root;
9477 key.type = BTRFS_ROOT_ITEM_KEY;
9478 key.offset = (u64)-1;
9479 root = btrfs_read_fs_root(info, &key);
9481 fprintf(stderr, "Couldn't find root for our ref\n");
9486 * The backref points to the original offset of the extent if it was
9487 * split, so we need to search down to the offset we have and then walk
9488 * forward until we find the backref we're looking for.
9490 key.objectid = dback->owner;
9491 key.type = BTRFS_EXTENT_DATA_KEY;
9492 key.offset = dback->offset;
9493 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9495 fprintf(stderr, "Error looking up ref %d\n", ret);
9500 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9501 ret = btrfs_next_leaf(root, path);
9503 fprintf(stderr, "Couldn't find our ref, next\n");
9507 leaf = path->nodes[0];
9508 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9509 if (key.objectid != dback->owner ||
9510 key.type != BTRFS_EXTENT_DATA_KEY) {
9511 fprintf(stderr, "Couldn't find our ref, search\n");
9514 fi = btrfs_item_ptr(leaf, path->slots[0],
9515 struct btrfs_file_extent_item);
9516 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9517 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9519 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9524 btrfs_release_path(path);
9526 trans = btrfs_start_transaction(root, 1);
9528 return PTR_ERR(trans);
9531 * Ok we have the key of the file extent we want to fix, now we can cow
9532 * down to the thing and fix it.
9534 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9536 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9537 key.objectid, key.type, key.offset, ret);
9541 fprintf(stderr, "Well that's odd, we just found this key "
9542 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9547 leaf = path->nodes[0];
9548 fi = btrfs_item_ptr(leaf, path->slots[0],
9549 struct btrfs_file_extent_item);
9551 if (btrfs_file_extent_compression(leaf, fi) &&
9552 dback->disk_bytenr != entry->bytenr) {
9553 fprintf(stderr, "Ref doesn't match the record start and is "
9554 "compressed, please take a btrfs-image of this file "
9555 "system and send it to a btrfs developer so they can "
9556 "complete this functionality for bytenr %Lu\n",
9557 dback->disk_bytenr);
9562 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9563 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9564 } else if (dback->disk_bytenr > entry->bytenr) {
9565 u64 off_diff, offset;
9567 off_diff = dback->disk_bytenr - entry->bytenr;
9568 offset = btrfs_file_extent_offset(leaf, fi);
9569 if (dback->disk_bytenr + offset +
9570 btrfs_file_extent_num_bytes(leaf, fi) >
9571 entry->bytenr + entry->bytes) {
9572 fprintf(stderr, "Ref is past the entry end, please "
9573 "take a btrfs-image of this file system and "
9574 "send it to a btrfs developer, ref %Lu\n",
9575 dback->disk_bytenr);
9580 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9581 btrfs_set_file_extent_offset(leaf, fi, offset);
9582 } else if (dback->disk_bytenr < entry->bytenr) {
9585 offset = btrfs_file_extent_offset(leaf, fi);
9586 if (dback->disk_bytenr + offset < entry->bytenr) {
9587 fprintf(stderr, "Ref is before the entry start, please"
9588 " take a btrfs-image of this file system and "
9589 "send it to a btrfs developer, ref %Lu\n",
9590 dback->disk_bytenr);
9595 offset += dback->disk_bytenr;
9596 offset -= entry->bytenr;
9597 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9598 btrfs_set_file_extent_offset(leaf, fi, offset);
9601 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9604 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9605 * only do this if we aren't using compression, otherwise it's a
9608 if (!btrfs_file_extent_compression(leaf, fi))
9609 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9611 printf("ram bytes may be wrong?\n");
9612 btrfs_mark_buffer_dirty(leaf);
9614 err = btrfs_commit_transaction(trans, root);
9615 btrfs_release_path(path);
9616 return ret ? ret : err;
9619 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9620 struct extent_record *rec)
9622 struct extent_backref *back, *tmp;
9623 struct data_backref *dback;
9624 struct extent_entry *entry, *best = NULL;
9627 int broken_entries = 0;
9632 * Metadata is easy and the backrefs should always agree on bytenr and
9633 * size, if not we've got bigger issues.
9638 rbtree_postorder_for_each_entry_safe(back, tmp,
9639 &rec->backref_tree, node) {
9640 if (back->full_backref || !back->is_data)
9643 dback = to_data_backref(back);
9646 * We only pay attention to backrefs that we found a real
9649 if (dback->found_ref == 0)
9653 * For now we only catch when the bytes don't match, not the
9654 * bytenr. We can easily do this at the same time, but I want
9655 * to have a fs image to test on before we just add repair
9656 * functionality willy-nilly so we know we won't screw up the
9660 entry = find_entry(&entries, dback->disk_bytenr,
9663 entry = malloc(sizeof(struct extent_entry));
9668 memset(entry, 0, sizeof(*entry));
9669 entry->bytenr = dback->disk_bytenr;
9670 entry->bytes = dback->bytes;
9671 list_add_tail(&entry->list, &entries);
9676 * If we only have on entry we may think the entries agree when
9677 * in reality they don't so we have to do some extra checking.
9679 if (dback->disk_bytenr != rec->start ||
9680 dback->bytes != rec->nr || back->broken)
9691 /* Yay all the backrefs agree, carry on good sir */
9692 if (nr_entries <= 1 && !mismatch)
9695 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9696 "%Lu\n", rec->start);
9699 * First we want to see if the backrefs can agree amongst themselves who
9700 * is right, so figure out which one of the entries has the highest
9703 best = find_most_right_entry(&entries);
9706 * Ok so we may have an even split between what the backrefs think, so
9707 * this is where we use the extent ref to see what it thinks.
9710 entry = find_entry(&entries, rec->start, rec->nr);
9711 if (!entry && (!broken_entries || !rec->found_rec)) {
9712 fprintf(stderr, "Backrefs don't agree with each other "
9713 "and extent record doesn't agree with anybody,"
9714 " so we can't fix bytenr %Lu bytes %Lu\n",
9715 rec->start, rec->nr);
9718 } else if (!entry) {
9720 * Ok our backrefs were broken, we'll assume this is the
9721 * correct value and add an entry for this range.
9723 entry = malloc(sizeof(struct extent_entry));
9728 memset(entry, 0, sizeof(*entry));
9729 entry->bytenr = rec->start;
9730 entry->bytes = rec->nr;
9731 list_add_tail(&entry->list, &entries);
9735 best = find_most_right_entry(&entries);
9737 fprintf(stderr, "Backrefs and extent record evenly "
9738 "split on who is right, this is going to "
9739 "require user input to fix bytenr %Lu bytes "
9740 "%Lu\n", rec->start, rec->nr);
9747 * I don't think this can happen currently as we'll abort() if we catch
9748 * this case higher up, but in case somebody removes that we still can't
9749 * deal with it properly here yet, so just bail out of that's the case.
9751 if (best->bytenr != rec->start) {
9752 fprintf(stderr, "Extent start and backref starts don't match, "
9753 "please use btrfs-image on this file system and send "
9754 "it to a btrfs developer so they can make fsck fix "
9755 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9756 rec->start, rec->nr);
9762 * Ok great we all agreed on an extent record, let's go find the real
9763 * references and fix up the ones that don't match.
9765 rbtree_postorder_for_each_entry_safe(back, tmp,
9766 &rec->backref_tree, node) {
9767 if (back->full_backref || !back->is_data)
9770 dback = to_data_backref(back);
9773 * Still ignoring backrefs that don't have a real ref attached
9776 if (dback->found_ref == 0)
9779 if (dback->bytes == best->bytes &&
9780 dback->disk_bytenr == best->bytenr)
9783 ret = repair_ref(info, path, dback, best);
9789 * Ok we messed with the actual refs, which means we need to drop our
9790 * entire cache and go back and rescan. I know this is a huge pain and
9791 * adds a lot of extra work, but it's the only way to be safe. Once all
9792 * the backrefs agree we may not need to do anything to the extent
9797 while (!list_empty(&entries)) {
9798 entry = list_entry(entries.next, struct extent_entry, list);
9799 list_del_init(&entry->list);
9805 static int process_duplicates(struct cache_tree *extent_cache,
9806 struct extent_record *rec)
9808 struct extent_record *good, *tmp;
9809 struct cache_extent *cache;
9813 * If we found a extent record for this extent then return, or if we
9814 * have more than one duplicate we are likely going to need to delete
9817 if (rec->found_rec || rec->num_duplicates > 1)
9820 /* Shouldn't happen but just in case */
9821 BUG_ON(!rec->num_duplicates);
9824 * So this happens if we end up with a backref that doesn't match the
9825 * actual extent entry. So either the backref is bad or the extent
9826 * entry is bad. Either way we want to have the extent_record actually
9827 * reflect what we found in the extent_tree, so we need to take the
9828 * duplicate out and use that as the extent_record since the only way we
9829 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9831 remove_cache_extent(extent_cache, &rec->cache);
9833 good = to_extent_record(rec->dups.next);
9834 list_del_init(&good->list);
9835 INIT_LIST_HEAD(&good->backrefs);
9836 INIT_LIST_HEAD(&good->dups);
9837 good->cache.start = good->start;
9838 good->cache.size = good->nr;
9839 good->content_checked = 0;
9840 good->owner_ref_checked = 0;
9841 good->num_duplicates = 0;
9842 good->refs = rec->refs;
9843 list_splice_init(&rec->backrefs, &good->backrefs);
9845 cache = lookup_cache_extent(extent_cache, good->start,
9849 tmp = container_of(cache, struct extent_record, cache);
9852 * If we find another overlapping extent and it's found_rec is
9853 * set then it's a duplicate and we need to try and delete
9856 if (tmp->found_rec || tmp->num_duplicates > 0) {
9857 if (list_empty(&good->list))
9858 list_add_tail(&good->list,
9859 &duplicate_extents);
9860 good->num_duplicates += tmp->num_duplicates + 1;
9861 list_splice_init(&tmp->dups, &good->dups);
9862 list_del_init(&tmp->list);
9863 list_add_tail(&tmp->list, &good->dups);
9864 remove_cache_extent(extent_cache, &tmp->cache);
9869 * Ok we have another non extent item backed extent rec, so lets
9870 * just add it to this extent and carry on like we did above.
9872 good->refs += tmp->refs;
9873 list_splice_init(&tmp->backrefs, &good->backrefs);
9874 remove_cache_extent(extent_cache, &tmp->cache);
9877 ret = insert_cache_extent(extent_cache, &good->cache);
9880 return good->num_duplicates ? 0 : 1;
9883 static int delete_duplicate_records(struct btrfs_root *root,
9884 struct extent_record *rec)
9886 struct btrfs_trans_handle *trans;
9887 LIST_HEAD(delete_list);
9888 struct btrfs_path path;
9889 struct extent_record *tmp, *good, *n;
9892 struct btrfs_key key;
9894 btrfs_init_path(&path);
9897 /* Find the record that covers all of the duplicates. */
9898 list_for_each_entry(tmp, &rec->dups, list) {
9899 if (good->start < tmp->start)
9901 if (good->nr > tmp->nr)
9904 if (tmp->start + tmp->nr < good->start + good->nr) {
9905 fprintf(stderr, "Ok we have overlapping extents that "
9906 "aren't completely covered by each other, this "
9907 "is going to require more careful thought. "
9908 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9909 tmp->start, tmp->nr, good->start, good->nr);
9916 list_add_tail(&rec->list, &delete_list);
9918 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9921 list_move_tail(&tmp->list, &delete_list);
9924 root = root->fs_info->extent_root;
9925 trans = btrfs_start_transaction(root, 1);
9926 if (IS_ERR(trans)) {
9927 ret = PTR_ERR(trans);
9931 list_for_each_entry(tmp, &delete_list, list) {
9932 if (tmp->found_rec == 0)
9934 key.objectid = tmp->start;
9935 key.type = BTRFS_EXTENT_ITEM_KEY;
9936 key.offset = tmp->nr;
9938 /* Shouldn't happen but just in case */
9939 if (tmp->metadata) {
9940 fprintf(stderr, "Well this shouldn't happen, extent "
9941 "record overlaps but is metadata? "
9942 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9946 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9952 ret = btrfs_del_item(trans, root, &path);
9955 btrfs_release_path(&path);
9958 err = btrfs_commit_transaction(trans, root);
9962 while (!list_empty(&delete_list)) {
9963 tmp = to_extent_record(delete_list.next);
9964 list_del_init(&tmp->list);
9970 while (!list_empty(&rec->dups)) {
9971 tmp = to_extent_record(rec->dups.next);
9972 list_del_init(&tmp->list);
9976 btrfs_release_path(&path);
9978 if (!ret && !nr_del)
9979 rec->num_duplicates = 0;
9981 return ret ? ret : nr_del;
9984 static int find_possible_backrefs(struct btrfs_fs_info *info,
9985 struct btrfs_path *path,
9986 struct cache_tree *extent_cache,
9987 struct extent_record *rec)
9989 struct btrfs_root *root;
9990 struct extent_backref *back, *tmp;
9991 struct data_backref *dback;
9992 struct cache_extent *cache;
9993 struct btrfs_file_extent_item *fi;
9994 struct btrfs_key key;
9998 rbtree_postorder_for_each_entry_safe(back, tmp,
9999 &rec->backref_tree, node) {
10000 /* Don't care about full backrefs (poor unloved backrefs) */
10001 if (back->full_backref || !back->is_data)
10004 dback = to_data_backref(back);
10006 /* We found this one, we don't need to do a lookup */
10007 if (dback->found_ref)
10010 key.objectid = dback->root;
10011 key.type = BTRFS_ROOT_ITEM_KEY;
10012 key.offset = (u64)-1;
10014 root = btrfs_read_fs_root(info, &key);
10016 /* No root, definitely a bad ref, skip */
10017 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10019 /* Other err, exit */
10021 return PTR_ERR(root);
10023 key.objectid = dback->owner;
10024 key.type = BTRFS_EXTENT_DATA_KEY;
10025 key.offset = dback->offset;
10026 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10028 btrfs_release_path(path);
10031 /* Didn't find it, we can carry on */
10036 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10037 struct btrfs_file_extent_item);
10038 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10039 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10040 btrfs_release_path(path);
10041 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10043 struct extent_record *tmp;
10044 tmp = container_of(cache, struct extent_record, cache);
10047 * If we found an extent record for the bytenr for this
10048 * particular backref then we can't add it to our
10049 * current extent record. We only want to add backrefs
10050 * that don't have a corresponding extent item in the
10051 * extent tree since they likely belong to this record
10052 * and we need to fix it if it doesn't match bytenrs.
10054 if (tmp->found_rec)
10058 dback->found_ref += 1;
10059 dback->disk_bytenr = bytenr;
10060 dback->bytes = bytes;
10063 * Set this so the verify backref code knows not to trust the
10064 * values in this backref.
10073 * Record orphan data ref into corresponding root.
10075 * Return 0 if the extent item contains data ref and recorded.
10076 * Return 1 if the extent item contains no useful data ref
10077 * On that case, it may contains only shared_dataref or metadata backref
10078 * or the file extent exists(this should be handled by the extent bytenr
10079 * recovery routine)
10080 * Return <0 if something goes wrong.
10082 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10083 struct extent_record *rec)
10085 struct btrfs_key key;
10086 struct btrfs_root *dest_root;
10087 struct extent_backref *back, *tmp;
10088 struct data_backref *dback;
10089 struct orphan_data_extent *orphan;
10090 struct btrfs_path path;
10091 int recorded_data_ref = 0;
10096 btrfs_init_path(&path);
10097 rbtree_postorder_for_each_entry_safe(back, tmp,
10098 &rec->backref_tree, node) {
10099 if (back->full_backref || !back->is_data ||
10100 !back->found_extent_tree)
10102 dback = to_data_backref(back);
10103 if (dback->found_ref)
10105 key.objectid = dback->root;
10106 key.type = BTRFS_ROOT_ITEM_KEY;
10107 key.offset = (u64)-1;
10109 dest_root = btrfs_read_fs_root(fs_info, &key);
10111 /* For non-exist root we just skip it */
10112 if (IS_ERR(dest_root) || !dest_root)
10115 key.objectid = dback->owner;
10116 key.type = BTRFS_EXTENT_DATA_KEY;
10117 key.offset = dback->offset;
10119 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10120 btrfs_release_path(&path);
10122 * For ret < 0, it's OK since the fs-tree may be corrupted,
10123 * we need to record it for inode/file extent rebuild.
10124 * For ret > 0, we record it only for file extent rebuild.
10125 * For ret == 0, the file extent exists but only bytenr
10126 * mismatch, let the original bytenr fix routine to handle,
10132 orphan = malloc(sizeof(*orphan));
10137 INIT_LIST_HEAD(&orphan->list);
10138 orphan->root = dback->root;
10139 orphan->objectid = dback->owner;
10140 orphan->offset = dback->offset;
10141 orphan->disk_bytenr = rec->cache.start;
10142 orphan->disk_len = rec->cache.size;
10143 list_add(&dest_root->orphan_data_extents, &orphan->list);
10144 recorded_data_ref = 1;
10147 btrfs_release_path(&path);
10149 return !recorded_data_ref;
10155 * when an incorrect extent item is found, this will delete
10156 * all of the existing entries for it and recreate them
10157 * based on what the tree scan found.
10159 static int fixup_extent_refs(struct btrfs_fs_info *info,
10160 struct cache_tree *extent_cache,
10161 struct extent_record *rec)
10163 struct btrfs_trans_handle *trans = NULL;
10165 struct btrfs_path path;
10166 struct cache_extent *cache;
10167 struct extent_backref *back, *tmp;
10171 if (rec->flag_block_full_backref)
10172 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10174 btrfs_init_path(&path);
10175 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10177 * Sometimes the backrefs themselves are so broken they don't
10178 * get attached to any meaningful rec, so first go back and
10179 * check any of our backrefs that we couldn't find and throw
10180 * them into the list if we find the backref so that
10181 * verify_backrefs can figure out what to do.
10183 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10188 /* step one, make sure all of the backrefs agree */
10189 ret = verify_backrefs(info, &path, rec);
10193 trans = btrfs_start_transaction(info->extent_root, 1);
10194 if (IS_ERR(trans)) {
10195 ret = PTR_ERR(trans);
10199 /* step two, delete all the existing records */
10200 ret = delete_extent_records(trans, info->extent_root, &path,
10206 /* was this block corrupt? If so, don't add references to it */
10207 cache = lookup_cache_extent(info->corrupt_blocks,
10208 rec->start, rec->max_size);
10214 /* step three, recreate all the refs we did find */
10215 rbtree_postorder_for_each_entry_safe(back, tmp,
10216 &rec->backref_tree, node) {
10218 * if we didn't find any references, don't create a
10219 * new extent record
10221 if (!back->found_ref)
10224 rec->bad_full_backref = 0;
10225 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10233 int err = btrfs_commit_transaction(trans, info->extent_root);
10239 fprintf(stderr, "Repaired extent references for %llu\n",
10240 (unsigned long long)rec->start);
10242 btrfs_release_path(&path);
10246 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10247 struct extent_record *rec)
10249 struct btrfs_trans_handle *trans;
10250 struct btrfs_root *root = fs_info->extent_root;
10251 struct btrfs_path path;
10252 struct btrfs_extent_item *ei;
10253 struct btrfs_key key;
10257 key.objectid = rec->start;
10258 if (rec->metadata) {
10259 key.type = BTRFS_METADATA_ITEM_KEY;
10260 key.offset = rec->info_level;
10262 key.type = BTRFS_EXTENT_ITEM_KEY;
10263 key.offset = rec->max_size;
10266 trans = btrfs_start_transaction(root, 0);
10268 return PTR_ERR(trans);
10270 btrfs_init_path(&path);
10271 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10273 btrfs_release_path(&path);
10274 btrfs_commit_transaction(trans, root);
10277 fprintf(stderr, "Didn't find extent for %llu\n",
10278 (unsigned long long)rec->start);
10279 btrfs_release_path(&path);
10280 btrfs_commit_transaction(trans, root);
10284 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10285 struct btrfs_extent_item);
10286 flags = btrfs_extent_flags(path.nodes[0], ei);
10287 if (rec->flag_block_full_backref) {
10288 fprintf(stderr, "setting full backref on %llu\n",
10289 (unsigned long long)key.objectid);
10290 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10292 fprintf(stderr, "clearing full backref on %llu\n",
10293 (unsigned long long)key.objectid);
10294 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10296 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10297 btrfs_mark_buffer_dirty(path.nodes[0]);
10298 btrfs_release_path(&path);
10299 ret = btrfs_commit_transaction(trans, root);
10301 fprintf(stderr, "Repaired extent flags for %llu\n",
10302 (unsigned long long)rec->start);
10307 /* right now we only prune from the extent allocation tree */
10308 static int prune_one_block(struct btrfs_trans_handle *trans,
10309 struct btrfs_fs_info *info,
10310 struct btrfs_corrupt_block *corrupt)
10313 struct btrfs_path path;
10314 struct extent_buffer *eb;
10318 int level = corrupt->level + 1;
10320 btrfs_init_path(&path);
10322 /* we want to stop at the parent to our busted block */
10323 path.lowest_level = level;
10325 ret = btrfs_search_slot(trans, info->extent_root,
10326 &corrupt->key, &path, -1, 1);
10331 eb = path.nodes[level];
10338 * hopefully the search gave us the block we want to prune,
10339 * lets try that first
10341 slot = path.slots[level];
10342 found = btrfs_node_blockptr(eb, slot);
10343 if (found == corrupt->cache.start)
10346 nritems = btrfs_header_nritems(eb);
10348 /* the search failed, lets scan this node and hope we find it */
10349 for (slot = 0; slot < nritems; slot++) {
10350 found = btrfs_node_blockptr(eb, slot);
10351 if (found == corrupt->cache.start)
10355 * we couldn't find the bad block. TODO, search all the nodes for pointers
10358 if (eb == info->extent_root->node) {
10363 btrfs_release_path(&path);
10368 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10369 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10372 btrfs_release_path(&path);
10376 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10378 struct btrfs_trans_handle *trans = NULL;
10379 struct cache_extent *cache;
10380 struct btrfs_corrupt_block *corrupt;
10383 cache = search_cache_extent(info->corrupt_blocks, 0);
10387 trans = btrfs_start_transaction(info->extent_root, 1);
10389 return PTR_ERR(trans);
10391 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10392 prune_one_block(trans, info, corrupt);
10393 remove_cache_extent(info->corrupt_blocks, cache);
10396 return btrfs_commit_transaction(trans, info->extent_root);
10400 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10402 struct btrfs_block_group_cache *cache;
10407 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10408 &start, &end, EXTENT_DIRTY);
10411 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10416 cache = btrfs_lookup_first_block_group(fs_info, start);
10421 start = cache->key.objectid + cache->key.offset;
10425 static int check_extent_refs(struct btrfs_root *root,
10426 struct cache_tree *extent_cache)
10428 struct extent_record *rec;
10429 struct cache_extent *cache;
10436 * if we're doing a repair, we have to make sure
10437 * we don't allocate from the problem extents.
10438 * In the worst case, this will be all the
10439 * extents in the FS
10441 cache = search_cache_extent(extent_cache, 0);
10443 rec = container_of(cache, struct extent_record, cache);
10444 set_extent_dirty(root->fs_info->excluded_extents,
10446 rec->start + rec->max_size - 1);
10447 cache = next_cache_extent(cache);
10450 /* pin down all the corrupted blocks too */
10451 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10453 set_extent_dirty(root->fs_info->excluded_extents,
10455 cache->start + cache->size - 1);
10456 cache = next_cache_extent(cache);
10458 prune_corrupt_blocks(root->fs_info);
10459 reset_cached_block_groups(root->fs_info);
10462 reset_cached_block_groups(root->fs_info);
10465 * We need to delete any duplicate entries we find first otherwise we
10466 * could mess up the extent tree when we have backrefs that actually
10467 * belong to a different extent item and not the weird duplicate one.
10469 while (repair && !list_empty(&duplicate_extents)) {
10470 rec = to_extent_record(duplicate_extents.next);
10471 list_del_init(&rec->list);
10473 /* Sometimes we can find a backref before we find an actual
10474 * extent, so we need to process it a little bit to see if there
10475 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10476 * if this is a backref screwup. If we need to delete stuff
10477 * process_duplicates() will return 0, otherwise it will return
10480 if (process_duplicates(extent_cache, rec))
10482 ret = delete_duplicate_records(root, rec);
10486 * delete_duplicate_records will return the number of entries
10487 * deleted, so if it's greater than 0 then we know we actually
10488 * did something and we need to remove.
10501 cache = search_cache_extent(extent_cache, 0);
10504 rec = container_of(cache, struct extent_record, cache);
10505 if (rec->num_duplicates) {
10506 fprintf(stderr, "extent item %llu has multiple extent "
10507 "items\n", (unsigned long long)rec->start);
10511 if (rec->refs != rec->extent_item_refs) {
10512 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10513 (unsigned long long)rec->start,
10514 (unsigned long long)rec->nr);
10515 fprintf(stderr, "extent item %llu, found %llu\n",
10516 (unsigned long long)rec->extent_item_refs,
10517 (unsigned long long)rec->refs);
10518 ret = record_orphan_data_extents(root->fs_info, rec);
10524 if (all_backpointers_checked(rec, 1)) {
10525 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10526 (unsigned long long)rec->start,
10527 (unsigned long long)rec->nr);
10531 if (!rec->owner_ref_checked) {
10532 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10533 (unsigned long long)rec->start,
10534 (unsigned long long)rec->nr);
10539 if (repair && fix) {
10540 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10546 if (rec->bad_full_backref) {
10547 fprintf(stderr, "bad full backref, on [%llu]\n",
10548 (unsigned long long)rec->start);
10550 ret = fixup_extent_flags(root->fs_info, rec);
10558 * Although it's not a extent ref's problem, we reuse this
10559 * routine for error reporting.
10560 * No repair function yet.
10562 if (rec->crossing_stripes) {
10564 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10565 rec->start, rec->start + rec->max_size);
10569 if (rec->wrong_chunk_type) {
10571 "bad extent [%llu, %llu), type mismatch with chunk\n",
10572 rec->start, rec->start + rec->max_size);
10577 remove_cache_extent(extent_cache, cache);
10578 free_all_extent_backrefs(rec);
10579 if (!init_extent_tree && repair && (!cur_err || fix))
10580 clear_extent_dirty(root->fs_info->excluded_extents,
10582 rec->start + rec->max_size - 1);
10587 if (ret && ret != -EAGAIN) {
10588 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10591 struct btrfs_trans_handle *trans;
10593 root = root->fs_info->extent_root;
10594 trans = btrfs_start_transaction(root, 1);
10595 if (IS_ERR(trans)) {
10596 ret = PTR_ERR(trans);
10600 ret = btrfs_fix_block_accounting(trans, root);
10603 ret = btrfs_commit_transaction(trans, root);
10615 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10619 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10620 stripe_size = length;
10621 stripe_size /= num_stripes;
10622 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10623 stripe_size = length * 2;
10624 stripe_size /= num_stripes;
10625 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10626 stripe_size = length;
10627 stripe_size /= (num_stripes - 1);
10628 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10629 stripe_size = length;
10630 stripe_size /= (num_stripes - 2);
10632 stripe_size = length;
10634 return stripe_size;
10638 * Check the chunk with its block group/dev list ref:
10639 * Return 0 if all refs seems valid.
10640 * Return 1 if part of refs seems valid, need later check for rebuild ref
10641 * like missing block group and needs to search extent tree to rebuild them.
10642 * Return -1 if essential refs are missing and unable to rebuild.
10644 static int check_chunk_refs(struct chunk_record *chunk_rec,
10645 struct block_group_tree *block_group_cache,
10646 struct device_extent_tree *dev_extent_cache,
10649 struct cache_extent *block_group_item;
10650 struct block_group_record *block_group_rec;
10651 struct cache_extent *dev_extent_item;
10652 struct device_extent_record *dev_extent_rec;
10656 int metadump_v2 = 0;
10660 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10662 chunk_rec->length);
10663 if (block_group_item) {
10664 block_group_rec = container_of(block_group_item,
10665 struct block_group_record,
10667 if (chunk_rec->length != block_group_rec->offset ||
10668 chunk_rec->offset != block_group_rec->objectid ||
10670 chunk_rec->type_flags != block_group_rec->flags)) {
10673 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10674 chunk_rec->objectid,
10679 chunk_rec->type_flags,
10680 block_group_rec->objectid,
10681 block_group_rec->type,
10682 block_group_rec->offset,
10683 block_group_rec->offset,
10684 block_group_rec->objectid,
10685 block_group_rec->flags);
10688 list_del_init(&block_group_rec->list);
10689 chunk_rec->bg_rec = block_group_rec;
10694 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10695 chunk_rec->objectid,
10700 chunk_rec->type_flags);
10707 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10708 chunk_rec->num_stripes);
10709 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10710 devid = chunk_rec->stripes[i].devid;
10711 offset = chunk_rec->stripes[i].offset;
10712 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10713 devid, offset, length);
10714 if (dev_extent_item) {
10715 dev_extent_rec = container_of(dev_extent_item,
10716 struct device_extent_record,
10718 if (dev_extent_rec->objectid != devid ||
10719 dev_extent_rec->offset != offset ||
10720 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10721 dev_extent_rec->length != length) {
10724 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10725 chunk_rec->objectid,
10728 chunk_rec->stripes[i].devid,
10729 chunk_rec->stripes[i].offset,
10730 dev_extent_rec->objectid,
10731 dev_extent_rec->offset,
10732 dev_extent_rec->length);
10735 list_move(&dev_extent_rec->chunk_list,
10736 &chunk_rec->dextents);
10741 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10742 chunk_rec->objectid,
10745 chunk_rec->stripes[i].devid,
10746 chunk_rec->stripes[i].offset);
10753 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10754 int check_chunks(struct cache_tree *chunk_cache,
10755 struct block_group_tree *block_group_cache,
10756 struct device_extent_tree *dev_extent_cache,
10757 struct list_head *good, struct list_head *bad,
10758 struct list_head *rebuild, int silent)
10760 struct cache_extent *chunk_item;
10761 struct chunk_record *chunk_rec;
10762 struct block_group_record *bg_rec;
10763 struct device_extent_record *dext_rec;
10767 chunk_item = first_cache_extent(chunk_cache);
10768 while (chunk_item) {
10769 chunk_rec = container_of(chunk_item, struct chunk_record,
10771 err = check_chunk_refs(chunk_rec, block_group_cache,
10772 dev_extent_cache, silent);
10775 if (err == 0 && good)
10776 list_add_tail(&chunk_rec->list, good);
10777 if (err > 0 && rebuild)
10778 list_add_tail(&chunk_rec->list, rebuild);
10779 if (err < 0 && bad)
10780 list_add_tail(&chunk_rec->list, bad);
10781 chunk_item = next_cache_extent(chunk_item);
10784 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10787 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10795 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10799 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10800 dext_rec->objectid,
10810 static int check_device_used(struct device_record *dev_rec,
10811 struct device_extent_tree *dext_cache)
10813 struct cache_extent *cache;
10814 struct device_extent_record *dev_extent_rec;
10815 u64 total_byte = 0;
10817 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10819 dev_extent_rec = container_of(cache,
10820 struct device_extent_record,
10822 if (dev_extent_rec->objectid != dev_rec->devid)
10825 list_del_init(&dev_extent_rec->device_list);
10826 total_byte += dev_extent_rec->length;
10827 cache = next_cache_extent(cache);
10830 if (total_byte != dev_rec->byte_used) {
10832 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10833 total_byte, dev_rec->byte_used, dev_rec->objectid,
10834 dev_rec->type, dev_rec->offset);
10842 * Extra (optional) check for dev_item size to report possbile problem on a new
10845 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10847 if (!IS_ALIGNED(total_bytes, sectorsize)) {
10849 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10850 devid, total_bytes, sectorsize);
10852 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10853 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10858 * Unlike device size alignment check above, some super total_bytes check
10859 * failure can lead to mount failure for newer kernel.
10861 * So this function will return the error for a fatal super total_bytes problem.
10863 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10865 struct btrfs_device *dev;
10866 struct list_head *dev_list = &fs_info->fs_devices->devices;
10867 u64 total_bytes = 0;
10868 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10870 list_for_each_entry(dev, dev_list, dev_list)
10871 total_bytes += dev->total_bytes;
10873 /* Important check, which can cause unmountable fs */
10874 if (super_bytes < total_bytes) {
10875 error("super total bytes %llu smaller than real device(s) size %llu",
10876 super_bytes, total_bytes);
10877 error("mounting this fs may fail for newer kernels");
10878 error("this can be fixed by 'btrfs rescue fix-device-size'");
10883 * Optional check, just to make everything aligned and match with each
10886 * For a btrfs-image restored fs, we don't need to check it anyway.
10888 if (btrfs_super_flags(fs_info->super_copy) &
10889 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10891 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10892 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10893 super_bytes != total_bytes) {
10894 warning("minor unaligned/mismatch device size detected");
10896 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10901 /* check btrfs_dev_item -> btrfs_dev_extent */
10902 static int check_devices(struct rb_root *dev_cache,
10903 struct device_extent_tree *dev_extent_cache)
10905 struct rb_node *dev_node;
10906 struct device_record *dev_rec;
10907 struct device_extent_record *dext_rec;
10911 dev_node = rb_first(dev_cache);
10913 dev_rec = container_of(dev_node, struct device_record, node);
10914 err = check_device_used(dev_rec, dev_extent_cache);
10918 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10919 global_info->sectorsize);
10920 dev_node = rb_next(dev_node);
10922 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10925 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10926 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10933 static int add_root_item_to_list(struct list_head *head,
10934 u64 objectid, u64 bytenr, u64 last_snapshot,
10935 u8 level, u8 drop_level,
10936 struct btrfs_key *drop_key)
10939 struct root_item_record *ri_rec;
10940 ri_rec = malloc(sizeof(*ri_rec));
10943 ri_rec->bytenr = bytenr;
10944 ri_rec->objectid = objectid;
10945 ri_rec->level = level;
10946 ri_rec->drop_level = drop_level;
10947 ri_rec->last_snapshot = last_snapshot;
10949 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10950 list_add_tail(&ri_rec->list, head);
10955 static void free_root_item_list(struct list_head *list)
10957 struct root_item_record *ri_rec;
10959 while (!list_empty(list)) {
10960 ri_rec = list_first_entry(list, struct root_item_record,
10962 list_del_init(&ri_rec->list);
10967 static int deal_root_from_list(struct list_head *list,
10968 struct btrfs_root *root,
10969 struct block_info *bits,
10971 struct cache_tree *pending,
10972 struct cache_tree *seen,
10973 struct cache_tree *reada,
10974 struct cache_tree *nodes,
10975 struct cache_tree *extent_cache,
10976 struct cache_tree *chunk_cache,
10977 struct rb_root *dev_cache,
10978 struct block_group_tree *block_group_cache,
10979 struct device_extent_tree *dev_extent_cache)
10984 while (!list_empty(list)) {
10985 struct root_item_record *rec;
10986 struct extent_buffer *buf;
10987 rec = list_entry(list->next,
10988 struct root_item_record, list);
10990 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10991 if (!extent_buffer_uptodate(buf)) {
10992 free_extent_buffer(buf);
10996 ret = add_root_to_pending(buf, extent_cache, pending,
10997 seen, nodes, rec->objectid);
11001 * To rebuild extent tree, we need deal with snapshot
11002 * one by one, otherwise we deal with node firstly which
11003 * can maximize readahead.
11006 ret = run_next_block(root, bits, bits_nr, &last,
11007 pending, seen, reada, nodes,
11008 extent_cache, chunk_cache,
11009 dev_cache, block_group_cache,
11010 dev_extent_cache, rec);
11014 free_extent_buffer(buf);
11015 list_del(&rec->list);
11021 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11022 reada, nodes, extent_cache, chunk_cache,
11023 dev_cache, block_group_cache,
11024 dev_extent_cache, NULL);
11034 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11036 struct rb_root dev_cache;
11037 struct cache_tree chunk_cache;
11038 struct block_group_tree block_group_cache;
11039 struct device_extent_tree dev_extent_cache;
11040 struct cache_tree extent_cache;
11041 struct cache_tree seen;
11042 struct cache_tree pending;
11043 struct cache_tree reada;
11044 struct cache_tree nodes;
11045 struct extent_io_tree excluded_extents;
11046 struct cache_tree corrupt_blocks;
11047 struct btrfs_path path;
11048 struct btrfs_key key;
11049 struct btrfs_key found_key;
11051 struct block_info *bits;
11053 struct extent_buffer *leaf;
11055 struct btrfs_root_item ri;
11056 struct list_head dropping_trees;
11057 struct list_head normal_trees;
11058 struct btrfs_root *root1;
11059 struct btrfs_root *root;
11063 root = fs_info->fs_root;
11064 dev_cache = RB_ROOT;
11065 cache_tree_init(&chunk_cache);
11066 block_group_tree_init(&block_group_cache);
11067 device_extent_tree_init(&dev_extent_cache);
11069 cache_tree_init(&extent_cache);
11070 cache_tree_init(&seen);
11071 cache_tree_init(&pending);
11072 cache_tree_init(&nodes);
11073 cache_tree_init(&reada);
11074 cache_tree_init(&corrupt_blocks);
11075 extent_io_tree_init(&excluded_extents);
11076 INIT_LIST_HEAD(&dropping_trees);
11077 INIT_LIST_HEAD(&normal_trees);
11080 fs_info->excluded_extents = &excluded_extents;
11081 fs_info->fsck_extent_cache = &extent_cache;
11082 fs_info->free_extent_hook = free_extent_hook;
11083 fs_info->corrupt_blocks = &corrupt_blocks;
11087 bits = malloc(bits_nr * sizeof(struct block_info));
11093 if (ctx.progress_enabled) {
11094 ctx.tp = TASK_EXTENTS;
11095 task_start(ctx.info);
11099 root1 = fs_info->tree_root;
11100 level = btrfs_header_level(root1->node);
11101 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11102 root1->node->start, 0, level, 0, NULL);
11105 root1 = fs_info->chunk_root;
11106 level = btrfs_header_level(root1->node);
11107 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11108 root1->node->start, 0, level, 0, NULL);
11111 btrfs_init_path(&path);
11114 key.type = BTRFS_ROOT_ITEM_KEY;
11115 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11119 leaf = path.nodes[0];
11120 slot = path.slots[0];
11121 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11122 ret = btrfs_next_leaf(root, &path);
11125 leaf = path.nodes[0];
11126 slot = path.slots[0];
11128 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11129 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11130 unsigned long offset;
11133 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11134 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11135 last_snapshot = btrfs_root_last_snapshot(&ri);
11136 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11137 level = btrfs_root_level(&ri);
11138 ret = add_root_item_to_list(&normal_trees,
11139 found_key.objectid,
11140 btrfs_root_bytenr(&ri),
11141 last_snapshot, level,
11146 level = btrfs_root_level(&ri);
11147 objectid = found_key.objectid;
11148 btrfs_disk_key_to_cpu(&found_key,
11149 &ri.drop_progress);
11150 ret = add_root_item_to_list(&dropping_trees,
11152 btrfs_root_bytenr(&ri),
11153 last_snapshot, level,
11154 ri.drop_level, &found_key);
11161 btrfs_release_path(&path);
11164 * check_block can return -EAGAIN if it fixes something, please keep
11165 * this in mind when dealing with return values from these functions, if
11166 * we get -EAGAIN we want to fall through and restart the loop.
11168 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11169 &seen, &reada, &nodes, &extent_cache,
11170 &chunk_cache, &dev_cache, &block_group_cache,
11171 &dev_extent_cache);
11173 if (ret == -EAGAIN)
11177 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11178 &pending, &seen, &reada, &nodes,
11179 &extent_cache, &chunk_cache, &dev_cache,
11180 &block_group_cache, &dev_extent_cache);
11182 if (ret == -EAGAIN)
11187 ret = check_chunks(&chunk_cache, &block_group_cache,
11188 &dev_extent_cache, NULL, NULL, NULL, 0);
11190 if (ret == -EAGAIN)
11195 ret = check_extent_refs(root, &extent_cache);
11197 if (ret == -EAGAIN)
11202 ret = check_devices(&dev_cache, &dev_extent_cache);
11207 task_stop(ctx.info);
11209 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11210 extent_io_tree_cleanup(&excluded_extents);
11211 fs_info->fsck_extent_cache = NULL;
11212 fs_info->free_extent_hook = NULL;
11213 fs_info->corrupt_blocks = NULL;
11214 fs_info->excluded_extents = NULL;
11217 free_chunk_cache_tree(&chunk_cache);
11218 free_device_cache_tree(&dev_cache);
11219 free_block_group_tree(&block_group_cache);
11220 free_device_extent_tree(&dev_extent_cache);
11221 free_extent_cache_tree(&seen);
11222 free_extent_cache_tree(&pending);
11223 free_extent_cache_tree(&reada);
11224 free_extent_cache_tree(&nodes);
11225 free_root_item_list(&normal_trees);
11226 free_root_item_list(&dropping_trees);
11229 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11230 free_extent_cache_tree(&seen);
11231 free_extent_cache_tree(&pending);
11232 free_extent_cache_tree(&reada);
11233 free_extent_cache_tree(&nodes);
11234 free_chunk_cache_tree(&chunk_cache);
11235 free_block_group_tree(&block_group_cache);
11236 free_device_cache_tree(&dev_cache);
11237 free_device_extent_tree(&dev_extent_cache);
11238 free_extent_record_cache(&extent_cache);
11239 free_root_item_list(&normal_trees);
11240 free_root_item_list(&dropping_trees);
11241 extent_io_tree_cleanup(&excluded_extents);
11245 static int check_extent_inline_ref(struct extent_buffer *eb,
11246 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11249 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11252 case BTRFS_TREE_BLOCK_REF_KEY:
11253 case BTRFS_EXTENT_DATA_REF_KEY:
11254 case BTRFS_SHARED_BLOCK_REF_KEY:
11255 case BTRFS_SHARED_DATA_REF_KEY:
11259 error("extent[%llu %u %llu] has unknown ref type: %d",
11260 key->objectid, key->type, key->offset, type);
11261 ret = UNKNOWN_TYPE;
11269 * Check backrefs of a tree block given by @bytenr or @eb.
11271 * @root: the root containing the @bytenr or @eb
11272 * @eb: tree block extent buffer, can be NULL
11273 * @bytenr: bytenr of the tree block to search
11274 * @level: tree level of the tree block
11275 * @owner: owner of the tree block
11277 * Return >0 for any error found and output error message
11278 * Return 0 for no error found
11280 static int check_tree_block_ref(struct btrfs_root *root,
11281 struct extent_buffer *eb, u64 bytenr,
11282 int level, u64 owner, struct node_refs *nrefs)
11284 struct btrfs_key key;
11285 struct btrfs_root *extent_root = root->fs_info->extent_root;
11286 struct btrfs_path path;
11287 struct btrfs_extent_item *ei;
11288 struct btrfs_extent_inline_ref *iref;
11289 struct extent_buffer *leaf;
11294 int root_level = btrfs_header_level(root->node);
11296 u32 nodesize = root->fs_info->nodesize;
11305 btrfs_init_path(&path);
11306 key.objectid = bytenr;
11307 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11308 key.type = BTRFS_METADATA_ITEM_KEY;
11310 key.type = BTRFS_EXTENT_ITEM_KEY;
11311 key.offset = (u64)-1;
11313 /* Search for the backref in extent tree */
11314 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11316 err |= BACKREF_MISSING;
11319 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11321 err |= BACKREF_MISSING;
11325 leaf = path.nodes[0];
11326 slot = path.slots[0];
11327 btrfs_item_key_to_cpu(leaf, &key, slot);
11329 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11331 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11332 skinny_level = (int)key.offset;
11333 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11335 struct btrfs_tree_block_info *info;
11337 info = (struct btrfs_tree_block_info *)(ei + 1);
11338 skinny_level = btrfs_tree_block_level(leaf, info);
11339 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11348 * Due to the feature of shared tree blocks, if the upper node
11349 * is a fs root or shared node, the extent of checked node may
11350 * not be updated until the next CoW.
11353 strict = should_check_extent_strictly(root, nrefs,
11355 if (!(btrfs_extent_flags(leaf, ei) &
11356 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11358 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11359 key.objectid, nodesize,
11360 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11361 err = BACKREF_MISMATCH;
11363 header_gen = btrfs_header_generation(eb);
11364 extent_gen = btrfs_extent_generation(leaf, ei);
11365 if (header_gen != extent_gen) {
11367 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11368 key.objectid, nodesize, header_gen,
11370 err = BACKREF_MISMATCH;
11372 if (level != skinny_level) {
11374 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11375 key.objectid, nodesize, level, skinny_level);
11376 err = BACKREF_MISMATCH;
11378 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11380 "extent[%llu %u] is referred by other roots than %llu",
11381 key.objectid, nodesize, root->objectid);
11382 err = BACKREF_MISMATCH;
11387 * Iterate the extent/metadata item to find the exact backref
11389 item_size = btrfs_item_size_nr(leaf, slot);
11390 ptr = (unsigned long)iref;
11391 end = (unsigned long)ei + item_size;
11393 while (ptr < end) {
11394 iref = (struct btrfs_extent_inline_ref *)ptr;
11395 type = btrfs_extent_inline_ref_type(leaf, iref);
11396 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11398 ret = check_extent_inline_ref(leaf, &key, iref);
11403 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11404 if (offset == root->objectid)
11406 if (!strict && owner == offset)
11408 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11410 * Backref of tree reloc root points to itself, no need
11411 * to check backref any more.
11413 * This may be an error of loop backref, but extent tree
11414 * checker should have already handled it.
11415 * Here we only need to avoid infinite iteration.
11417 if (offset == bytenr) {
11421 * Check if the backref points to valid
11424 found_ref = !check_tree_block_ref( root, NULL,
11425 offset, level + 1, owner,
11432 ptr += btrfs_extent_inline_ref_size(type);
11436 * Inlined extent item doesn't have what we need, check
11437 * TREE_BLOCK_REF_KEY
11440 btrfs_release_path(&path);
11441 key.objectid = bytenr;
11442 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11443 key.offset = root->objectid;
11445 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11450 * Finally check SHARED BLOCK REF, any found will be good
11451 * Here we're not doing comprehensive extent backref checking,
11452 * only need to ensure there is some extent referring to this
11456 btrfs_release_path(&path);
11457 key.objectid = bytenr;
11458 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11459 key.offset = (u64)-1;
11461 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11463 err |= BACKREF_MISSING;
11466 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11468 err |= BACKREF_MISSING;
11474 err |= BACKREF_MISSING;
11476 btrfs_release_path(&path);
11477 if (nrefs && strict &&
11478 level < root_level && nrefs->full_backref[level + 1])
11479 parent = nrefs->bytenr[level + 1];
11480 if (eb && (err & BACKREF_MISSING))
11482 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11483 bytenr, nodesize, owner, level,
11484 parent ? "parent" : "root",
11485 parent ? parent : root->objectid);
11490 * If @err contains BACKREF_MISSING then add extent of the
11491 * file_extent_data_item.
11493 * Returns error bits after reapir.
11495 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11496 struct btrfs_root *root,
11497 struct btrfs_path *pathp,
11498 struct node_refs *nrefs,
11501 struct btrfs_file_extent_item *fi;
11502 struct btrfs_key fi_key;
11503 struct btrfs_key key;
11504 struct btrfs_extent_item *ei;
11505 struct btrfs_path path;
11506 struct btrfs_root *extent_root = root->fs_info->extent_root;
11507 struct extent_buffer *eb;
11519 eb = pathp->nodes[0];
11520 slot = pathp->slots[0];
11521 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11522 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11524 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11525 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11528 file_offset = fi_key.offset;
11529 generation = btrfs_file_extent_generation(eb, fi);
11530 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11531 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11532 extent_offset = btrfs_file_extent_offset(eb, fi);
11533 offset = file_offset - extent_offset;
11535 /* now repair only adds backref */
11536 if ((err & BACKREF_MISSING) == 0)
11539 /* search extent item */
11540 key.objectid = disk_bytenr;
11541 key.type = BTRFS_EXTENT_ITEM_KEY;
11542 key.offset = num_bytes;
11544 btrfs_init_path(&path);
11545 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11551 /* insert an extent item */
11553 key.objectid = disk_bytenr;
11554 key.type = BTRFS_EXTENT_ITEM_KEY;
11555 key.offset = num_bytes;
11556 size = sizeof(*ei);
11558 btrfs_release_path(&path);
11559 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11563 eb = path.nodes[0];
11564 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11566 btrfs_set_extent_refs(eb, ei, 0);
11567 btrfs_set_extent_generation(eb, ei, generation);
11568 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11570 btrfs_mark_buffer_dirty(eb);
11571 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11573 btrfs_release_path(&path);
11576 if (nrefs->full_backref[0])
11577 parent = btrfs_header_bytenr(eb);
11581 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11583 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11587 "failed to increase extent data backref[%llu %llu] root %llu",
11588 disk_bytenr, num_bytes, root->objectid);
11591 printf("Add one extent data backref [%llu %llu]\n",
11592 disk_bytenr, num_bytes);
11595 err &= ~BACKREF_MISSING;
11598 error("can't repair root %llu extent data item[%llu %llu]",
11599 root->objectid, disk_bytenr, num_bytes);
11604 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11606 * Return >0 any error found and output error message
11607 * Return 0 for no error found
11609 static int check_extent_data_item(struct btrfs_root *root,
11610 struct btrfs_path *pathp,
11611 struct node_refs *nrefs, int account_bytes)
11613 struct btrfs_file_extent_item *fi;
11614 struct extent_buffer *eb = pathp->nodes[0];
11615 struct btrfs_path path;
11616 struct btrfs_root *extent_root = root->fs_info->extent_root;
11617 struct btrfs_key fi_key;
11618 struct btrfs_key dbref_key;
11619 struct extent_buffer *leaf;
11620 struct btrfs_extent_item *ei;
11621 struct btrfs_extent_inline_ref *iref;
11622 struct btrfs_extent_data_ref *dref;
11625 u64 disk_num_bytes;
11626 u64 extent_num_bytes;
11633 int found_dbackref = 0;
11634 int slot = pathp->slots[0];
11639 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11640 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11642 /* Nothing to check for hole and inline data extents */
11643 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11644 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11647 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11648 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11649 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11650 offset = btrfs_file_extent_offset(eb, fi);
11652 /* Check unaligned disk_num_bytes and num_bytes */
11653 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11655 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11656 fi_key.objectid, fi_key.offset, disk_num_bytes,
11657 root->fs_info->sectorsize);
11658 err |= BYTES_UNALIGNED;
11659 } else if (account_bytes) {
11660 data_bytes_allocated += disk_num_bytes;
11662 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11664 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11665 fi_key.objectid, fi_key.offset, extent_num_bytes,
11666 root->fs_info->sectorsize);
11667 err |= BYTES_UNALIGNED;
11668 } else if (account_bytes) {
11669 data_bytes_referenced += extent_num_bytes;
11671 owner = btrfs_header_owner(eb);
11673 /* Check the extent item of the file extent in extent tree */
11674 btrfs_init_path(&path);
11675 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11676 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11677 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11679 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11683 leaf = path.nodes[0];
11684 slot = path.slots[0];
11685 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11687 extent_flags = btrfs_extent_flags(leaf, ei);
11689 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11691 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11692 disk_bytenr, disk_num_bytes,
11693 BTRFS_EXTENT_FLAG_DATA);
11694 err |= BACKREF_MISMATCH;
11697 /* Check data backref inside that extent item */
11698 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11699 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11700 ptr = (unsigned long)iref;
11701 end = (unsigned long)ei + item_size;
11702 strict = should_check_extent_strictly(root, nrefs, -1);
11704 while (ptr < end) {
11708 bool match = false;
11710 iref = (struct btrfs_extent_inline_ref *)ptr;
11711 type = btrfs_extent_inline_ref_type(leaf, iref);
11712 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11714 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11719 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11720 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11721 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11722 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11724 if (ref_objectid == fi_key.objectid &&
11725 ref_offset == fi_key.offset - offset)
11727 if (ref_root == root->objectid && match)
11728 found_dbackref = 1;
11729 else if (!strict && owner == ref_root && match)
11730 found_dbackref = 1;
11731 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11732 found_dbackref = !check_tree_block_ref(root, NULL,
11733 btrfs_extent_inline_ref_offset(leaf, iref),
11737 if (found_dbackref)
11739 ptr += btrfs_extent_inline_ref_size(type);
11742 if (!found_dbackref) {
11743 btrfs_release_path(&path);
11745 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11746 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11747 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11748 dbref_key.offset = hash_extent_data_ref(root->objectid,
11749 fi_key.objectid, fi_key.offset - offset);
11751 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11752 &dbref_key, &path, 0, 0);
11754 found_dbackref = 1;
11758 btrfs_release_path(&path);
11761 * Neither inlined nor EXTENT_DATA_REF found, try
11762 * SHARED_DATA_REF as last chance.
11764 dbref_key.objectid = disk_bytenr;
11765 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11766 dbref_key.offset = eb->start;
11768 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11769 &dbref_key, &path, 0, 0);
11771 found_dbackref = 1;
11777 if (!found_dbackref)
11778 err |= BACKREF_MISSING;
11779 btrfs_release_path(&path);
11780 if (err & BACKREF_MISSING) {
11781 error("data extent[%llu %llu] backref lost",
11782 disk_bytenr, disk_num_bytes);
11788 * Get real tree block level for the case like shared block
11789 * Return >= 0 as tree level
11790 * Return <0 for error
11792 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11794 struct extent_buffer *eb;
11795 struct btrfs_path path;
11796 struct btrfs_key key;
11797 struct btrfs_extent_item *ei;
11804 /* Search extent tree for extent generation and level */
11805 key.objectid = bytenr;
11806 key.type = BTRFS_METADATA_ITEM_KEY;
11807 key.offset = (u64)-1;
11809 btrfs_init_path(&path);
11810 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11813 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11821 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11822 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11823 struct btrfs_extent_item);
11824 flags = btrfs_extent_flags(path.nodes[0], ei);
11825 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11830 /* Get transid for later read_tree_block() check */
11831 transid = btrfs_extent_generation(path.nodes[0], ei);
11833 /* Get backref level as one source */
11834 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11835 backref_level = key.offset;
11837 struct btrfs_tree_block_info *info;
11839 info = (struct btrfs_tree_block_info *)(ei + 1);
11840 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11842 btrfs_release_path(&path);
11844 /* Get level from tree block as an alternative source */
11845 eb = read_tree_block(fs_info, bytenr, transid);
11846 if (!extent_buffer_uptodate(eb)) {
11847 free_extent_buffer(eb);
11850 header_level = btrfs_header_level(eb);
11851 free_extent_buffer(eb);
11853 if (header_level != backref_level)
11855 return header_level;
11858 btrfs_release_path(&path);
11863 * Check if a tree block backref is valid (points to a valid tree block)
11864 * if level == -1, level will be resolved
11865 * Return >0 for any error found and print error message
11867 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11868 u64 bytenr, int level)
11870 struct btrfs_root *root;
11871 struct btrfs_key key;
11872 struct btrfs_path path;
11873 struct extent_buffer *eb;
11874 struct extent_buffer *node;
11875 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11879 /* Query level for level == -1 special case */
11881 level = query_tree_block_level(fs_info, bytenr);
11883 err |= REFERENCER_MISSING;
11887 key.objectid = root_id;
11888 key.type = BTRFS_ROOT_ITEM_KEY;
11889 key.offset = (u64)-1;
11891 root = btrfs_read_fs_root(fs_info, &key);
11892 if (IS_ERR(root)) {
11893 err |= REFERENCER_MISSING;
11897 /* Read out the tree block to get item/node key */
11898 eb = read_tree_block(fs_info, bytenr, 0);
11899 if (!extent_buffer_uptodate(eb)) {
11900 err |= REFERENCER_MISSING;
11901 free_extent_buffer(eb);
11905 /* Empty tree, no need to check key */
11906 if (!btrfs_header_nritems(eb) && !level) {
11907 free_extent_buffer(eb);
11912 btrfs_node_key_to_cpu(eb, &key, 0);
11914 btrfs_item_key_to_cpu(eb, &key, 0);
11916 free_extent_buffer(eb);
11918 btrfs_init_path(&path);
11919 path.lowest_level = level;
11920 /* Search with the first key, to ensure we can reach it */
11921 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11923 err |= REFERENCER_MISSING;
11927 node = path.nodes[level];
11928 if (btrfs_header_bytenr(node) != bytenr) {
11930 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11931 bytenr, nodesize, bytenr,
11932 btrfs_header_bytenr(node));
11933 err |= REFERENCER_MISMATCH;
11935 if (btrfs_header_level(node) != level) {
11937 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11938 bytenr, nodesize, level,
11939 btrfs_header_level(node));
11940 err |= REFERENCER_MISMATCH;
11944 btrfs_release_path(&path);
11946 if (err & REFERENCER_MISSING) {
11948 error("extent [%llu %d] lost referencer (owner: %llu)",
11949 bytenr, nodesize, root_id);
11952 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11953 bytenr, nodesize, root_id, level);
11960 * Check if tree block @eb is tree reloc root.
11961 * Return 0 if it's not or any problem happens
11962 * Return 1 if it's a tree reloc root
11964 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11965 struct extent_buffer *eb)
11967 struct btrfs_root *tree_reloc_root;
11968 struct btrfs_key key;
11969 u64 bytenr = btrfs_header_bytenr(eb);
11970 u64 owner = btrfs_header_owner(eb);
11973 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11974 key.offset = owner;
11975 key.type = BTRFS_ROOT_ITEM_KEY;
11977 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11978 if (IS_ERR(tree_reloc_root))
11981 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11983 btrfs_free_fs_root(tree_reloc_root);
11988 * Check referencer for shared block backref
11989 * If level == -1, this function will resolve the level.
11991 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11992 u64 parent, u64 bytenr, int level)
11994 struct extent_buffer *eb;
11996 int found_parent = 0;
11999 eb = read_tree_block(fs_info, parent, 0);
12000 if (!extent_buffer_uptodate(eb))
12004 level = query_tree_block_level(fs_info, bytenr);
12008 /* It's possible it's a tree reloc root */
12009 if (parent == bytenr) {
12010 if (is_tree_reloc_root(fs_info, eb))
12015 if (level + 1 != btrfs_header_level(eb))
12018 nr = btrfs_header_nritems(eb);
12019 for (i = 0; i < nr; i++) {
12020 if (bytenr == btrfs_node_blockptr(eb, i)) {
12026 free_extent_buffer(eb);
12027 if (!found_parent) {
12029 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12030 bytenr, fs_info->nodesize, parent, level);
12031 return REFERENCER_MISSING;
12037 * Check referencer for normal (inlined) data ref
12038 * If len == 0, it will be resolved by searching in extent tree
12040 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12041 u64 root_id, u64 objectid, u64 offset,
12042 u64 bytenr, u64 len, u32 count)
12044 struct btrfs_root *root;
12045 struct btrfs_root *extent_root = fs_info->extent_root;
12046 struct btrfs_key key;
12047 struct btrfs_path path;
12048 struct extent_buffer *leaf;
12049 struct btrfs_file_extent_item *fi;
12050 u32 found_count = 0;
12055 key.objectid = bytenr;
12056 key.type = BTRFS_EXTENT_ITEM_KEY;
12057 key.offset = (u64)-1;
12059 btrfs_init_path(&path);
12060 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12063 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12066 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12067 if (key.objectid != bytenr ||
12068 key.type != BTRFS_EXTENT_ITEM_KEY)
12071 btrfs_release_path(&path);
12073 key.objectid = root_id;
12074 key.type = BTRFS_ROOT_ITEM_KEY;
12075 key.offset = (u64)-1;
12076 btrfs_init_path(&path);
12078 root = btrfs_read_fs_root(fs_info, &key);
12082 key.objectid = objectid;
12083 key.type = BTRFS_EXTENT_DATA_KEY;
12085 * It can be nasty as data backref offset is
12086 * file offset - file extent offset, which is smaller or
12087 * equal to original backref offset. The only special case is
12088 * overflow. So we need to special check and do further search.
12090 key.offset = offset & (1ULL << 63) ? 0 : offset;
12092 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12097 * Search afterwards to get correct one
12098 * NOTE: As we must do a comprehensive check on the data backref to
12099 * make sure the dref count also matches, we must iterate all file
12100 * extents for that inode.
12103 leaf = path.nodes[0];
12104 slot = path.slots[0];
12106 if (slot >= btrfs_header_nritems(leaf) ||
12107 btrfs_header_owner(leaf) != root_id)
12109 btrfs_item_key_to_cpu(leaf, &key, slot);
12110 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12112 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12114 * Except normal disk bytenr and disk num bytes, we still
12115 * need to do extra check on dbackref offset as
12116 * dbackref offset = file_offset - file_extent_offset
12118 * Also, we must check the leaf owner.
12119 * In case of shared tree blocks (snapshots) we can inherit
12120 * leaves from source snapshot.
12121 * In that case, reference from source snapshot should not
12124 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12125 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12126 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12127 offset && btrfs_header_owner(leaf) == root_id)
12131 ret = btrfs_next_item(root, &path);
12136 btrfs_release_path(&path);
12137 if (found_count != count) {
12139 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12140 bytenr, len, root_id, objectid, offset, count, found_count);
12141 return REFERENCER_MISSING;
12147 * Check if the referencer of a shared data backref exists
12149 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12150 u64 parent, u64 bytenr)
12152 struct extent_buffer *eb;
12153 struct btrfs_key key;
12154 struct btrfs_file_extent_item *fi;
12156 int found_parent = 0;
12159 eb = read_tree_block(fs_info, parent, 0);
12160 if (!extent_buffer_uptodate(eb))
12163 nr = btrfs_header_nritems(eb);
12164 for (i = 0; i < nr; i++) {
12165 btrfs_item_key_to_cpu(eb, &key, i);
12166 if (key.type != BTRFS_EXTENT_DATA_KEY)
12169 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12170 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12173 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12180 free_extent_buffer(eb);
12181 if (!found_parent) {
12182 error("shared extent %llu referencer lost (parent: %llu)",
12184 return REFERENCER_MISSING;
12190 * Only delete backref if REFERENCER_MISSING now
12192 * Returns <0 the extent was deleted
12193 * Returns >0 the backref was deleted but extent still exists, returned value
12194 * means error after repair
12195 * Returns 0 nothing happened
12197 static int repair_extent_item(struct btrfs_trans_handle *trans,
12198 struct btrfs_root *root, struct btrfs_path *path,
12199 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12200 u64 owner, u64 offset, int err)
12202 struct btrfs_key old_key;
12206 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12208 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12209 /* delete the backref */
12210 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12211 num_bytes, parent, root_objectid, owner, offset);
12214 err &= ~REFERENCER_MISSING;
12215 printf("Delete backref in extent [%llu %llu]\n",
12216 bytenr, num_bytes);
12218 error("fail to delete backref in extent [%llu %llu]",
12219 bytenr, num_bytes);
12223 /* btrfs_free_extent may delete the extent */
12224 btrfs_release_path(path);
12225 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12235 * This function will check a given extent item, including its backref and
12236 * itself (like crossing stripe boundary and type)
12238 * Since we don't use extent_record anymore, introduce new error bit
12240 static int check_extent_item(struct btrfs_trans_handle *trans,
12241 struct btrfs_fs_info *fs_info,
12242 struct btrfs_path *path)
12244 struct btrfs_extent_item *ei;
12245 struct btrfs_extent_inline_ref *iref;
12246 struct btrfs_extent_data_ref *dref;
12247 struct extent_buffer *eb = path->nodes[0];
12250 int slot = path->slots[0];
12252 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12253 u32 item_size = btrfs_item_size_nr(eb, slot);
12263 struct btrfs_key key;
12267 btrfs_item_key_to_cpu(eb, &key, slot);
12268 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12269 bytes_used += key.offset;
12270 num_bytes = key.offset;
12272 bytes_used += nodesize;
12273 num_bytes = nodesize;
12276 if (item_size < sizeof(*ei)) {
12278 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12279 * old thing when on disk format is still un-determined.
12280 * No need to care about it anymore
12282 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12286 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12287 flags = btrfs_extent_flags(eb, ei);
12289 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12291 if (metadata && check_crossing_stripes(global_info, key.objectid,
12293 error("bad metadata [%llu, %llu) crossing stripe boundary",
12294 key.objectid, key.objectid + nodesize);
12295 err |= CROSSING_STRIPE_BOUNDARY;
12298 ptr = (unsigned long)(ei + 1);
12300 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12301 /* Old EXTENT_ITEM metadata */
12302 struct btrfs_tree_block_info *info;
12304 info = (struct btrfs_tree_block_info *)ptr;
12305 level = btrfs_tree_block_level(eb, info);
12306 ptr += sizeof(struct btrfs_tree_block_info);
12308 /* New METADATA_ITEM */
12309 level = key.offset;
12311 end = (unsigned long)ei + item_size;
12314 /* Reached extent item end normally */
12318 /* Beyond extent item end, wrong item size */
12320 err |= ITEM_SIZE_MISMATCH;
12321 error("extent item at bytenr %llu slot %d has wrong size",
12330 /* Now check every backref in this extent item */
12331 iref = (struct btrfs_extent_inline_ref *)ptr;
12332 type = btrfs_extent_inline_ref_type(eb, iref);
12333 offset = btrfs_extent_inline_ref_offset(eb, iref);
12335 case BTRFS_TREE_BLOCK_REF_KEY:
12336 root_objectid = offset;
12338 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12342 case BTRFS_SHARED_BLOCK_REF_KEY:
12344 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12348 case BTRFS_EXTENT_DATA_REF_KEY:
12349 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12350 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12351 owner = btrfs_extent_data_ref_objectid(eb, dref);
12352 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12353 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12354 owner_offset, key.objectid, key.offset,
12355 btrfs_extent_data_ref_count(eb, dref));
12358 case BTRFS_SHARED_DATA_REF_KEY:
12360 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12364 error("extent[%llu %d %llu] has unknown ref type: %d",
12365 key.objectid, key.type, key.offset, type);
12366 ret = UNKNOWN_TYPE;
12371 if (err && repair) {
12372 ret = repair_extent_item(trans, fs_info->extent_root, path,
12373 key.objectid, num_bytes, parent, root_objectid,
12374 owner, owner_offset, ret);
12383 ptr += btrfs_extent_inline_ref_size(type);
12391 * Check if a dev extent item is referred correctly by its chunk
12393 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12394 struct extent_buffer *eb, int slot)
12396 struct btrfs_root *chunk_root = fs_info->chunk_root;
12397 struct btrfs_dev_extent *ptr;
12398 struct btrfs_path path;
12399 struct btrfs_key chunk_key;
12400 struct btrfs_key devext_key;
12401 struct btrfs_chunk *chunk;
12402 struct extent_buffer *l;
12406 int found_chunk = 0;
12409 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12410 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12411 length = btrfs_dev_extent_length(eb, ptr);
12413 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12414 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12415 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12417 btrfs_init_path(&path);
12418 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12423 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12424 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12429 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12432 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12433 for (i = 0; i < num_stripes; i++) {
12434 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12435 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12437 if (devid == devext_key.objectid &&
12438 offset == devext_key.offset) {
12444 btrfs_release_path(&path);
12445 if (!found_chunk) {
12447 "device extent[%llu, %llu, %llu] did not find the related chunk",
12448 devext_key.objectid, devext_key.offset, length);
12449 return REFERENCER_MISSING;
12455 * Check if the used space is correct with the dev item
12457 static int check_dev_item(struct btrfs_fs_info *fs_info,
12458 struct extent_buffer *eb, int slot)
12460 struct btrfs_root *dev_root = fs_info->dev_root;
12461 struct btrfs_dev_item *dev_item;
12462 struct btrfs_path path;
12463 struct btrfs_key key;
12464 struct btrfs_dev_extent *ptr;
12471 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12472 dev_id = btrfs_device_id(eb, dev_item);
12473 used = btrfs_device_bytes_used(eb, dev_item);
12474 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12476 key.objectid = dev_id;
12477 key.type = BTRFS_DEV_EXTENT_KEY;
12480 btrfs_init_path(&path);
12481 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12483 btrfs_item_key_to_cpu(eb, &key, slot);
12484 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12485 key.objectid, key.type, key.offset);
12486 btrfs_release_path(&path);
12487 return REFERENCER_MISSING;
12490 /* Iterate dev_extents to calculate the used space of a device */
12492 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12495 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12496 if (key.objectid > dev_id)
12498 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12501 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12502 struct btrfs_dev_extent);
12503 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12505 ret = btrfs_next_item(dev_root, &path);
12509 btrfs_release_path(&path);
12511 if (used != total) {
12512 btrfs_item_key_to_cpu(eb, &key, slot);
12514 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12515 total, used, BTRFS_ROOT_TREE_OBJECTID,
12516 BTRFS_DEV_EXTENT_KEY, dev_id);
12517 return ACCOUNTING_MISMATCH;
12519 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12525 * Check a block group item with its referener (chunk) and its used space
12526 * with extent/metadata item
12528 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12529 struct extent_buffer *eb, int slot)
12531 struct btrfs_root *extent_root = fs_info->extent_root;
12532 struct btrfs_root *chunk_root = fs_info->chunk_root;
12533 struct btrfs_block_group_item *bi;
12534 struct btrfs_block_group_item bg_item;
12535 struct btrfs_path path;
12536 struct btrfs_key bg_key;
12537 struct btrfs_key chunk_key;
12538 struct btrfs_key extent_key;
12539 struct btrfs_chunk *chunk;
12540 struct extent_buffer *leaf;
12541 struct btrfs_extent_item *ei;
12542 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12550 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12551 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12552 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12553 used = btrfs_block_group_used(&bg_item);
12554 bg_flags = btrfs_block_group_flags(&bg_item);
12556 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12557 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12558 chunk_key.offset = bg_key.objectid;
12560 btrfs_init_path(&path);
12561 /* Search for the referencer chunk */
12562 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12565 "block group[%llu %llu] did not find the related chunk item",
12566 bg_key.objectid, bg_key.offset);
12567 err |= REFERENCER_MISSING;
12569 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12570 struct btrfs_chunk);
12571 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12574 "block group[%llu %llu] related chunk item length does not match",
12575 bg_key.objectid, bg_key.offset);
12576 err |= REFERENCER_MISMATCH;
12579 btrfs_release_path(&path);
12581 /* Search from the block group bytenr */
12582 extent_key.objectid = bg_key.objectid;
12583 extent_key.type = 0;
12584 extent_key.offset = 0;
12586 btrfs_init_path(&path);
12587 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12591 /* Iterate extent tree to account used space */
12593 leaf = path.nodes[0];
12595 /* Search slot can point to the last item beyond leaf nritems */
12596 if (path.slots[0] >= btrfs_header_nritems(leaf))
12599 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12600 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12603 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12604 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12606 if (extent_key.objectid < bg_key.objectid)
12609 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12612 total += extent_key.offset;
12614 ei = btrfs_item_ptr(leaf, path.slots[0],
12615 struct btrfs_extent_item);
12616 flags = btrfs_extent_flags(leaf, ei);
12617 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12618 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12620 "bad extent[%llu, %llu) type mismatch with chunk",
12621 extent_key.objectid,
12622 extent_key.objectid + extent_key.offset);
12623 err |= CHUNK_TYPE_MISMATCH;
12625 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12626 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12627 BTRFS_BLOCK_GROUP_METADATA))) {
12629 "bad extent[%llu, %llu) type mismatch with chunk",
12630 extent_key.objectid,
12631 extent_key.objectid + nodesize);
12632 err |= CHUNK_TYPE_MISMATCH;
12636 ret = btrfs_next_item(extent_root, &path);
12642 btrfs_release_path(&path);
12644 if (total != used) {
12646 "block group[%llu %llu] used %llu but extent items used %llu",
12647 bg_key.objectid, bg_key.offset, used, total);
12648 err |= BG_ACCOUNTING_ERROR;
12654 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12655 * FIXME: We still need to repair error of dev_item.
12657 * Returns error after repair.
12659 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12660 struct btrfs_root *chunk_root,
12661 struct btrfs_path *path, int err)
12663 struct btrfs_chunk *chunk;
12664 struct btrfs_key chunk_key;
12665 struct extent_buffer *eb = path->nodes[0];
12667 int slot = path->slots[0];
12671 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12672 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12674 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12675 type = btrfs_chunk_type(path->nodes[0], chunk);
12676 length = btrfs_chunk_length(eb, chunk);
12678 if (err & REFERENCER_MISSING) {
12679 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12680 type, chunk_key.offset, length);
12682 error("fail to add block group item[%llu %llu]",
12683 chunk_key.offset, length);
12686 err &= ~REFERENCER_MISSING;
12687 printf("Added block group item[%llu %llu]\n",
12688 chunk_key.offset, length);
12697 * Check a chunk item.
12698 * Including checking all referred dev_extents and block group
12700 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12701 struct extent_buffer *eb, int slot)
12703 struct btrfs_root *extent_root = fs_info->extent_root;
12704 struct btrfs_root *dev_root = fs_info->dev_root;
12705 struct btrfs_path path;
12706 struct btrfs_key chunk_key;
12707 struct btrfs_key bg_key;
12708 struct btrfs_key devext_key;
12709 struct btrfs_chunk *chunk;
12710 struct extent_buffer *leaf;
12711 struct btrfs_block_group_item *bi;
12712 struct btrfs_block_group_item bg_item;
12713 struct btrfs_dev_extent *ptr;
12725 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12726 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12727 length = btrfs_chunk_length(eb, chunk);
12728 chunk_end = chunk_key.offset + length;
12729 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12732 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12734 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12737 type = btrfs_chunk_type(eb, chunk);
12739 bg_key.objectid = chunk_key.offset;
12740 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12741 bg_key.offset = length;
12743 btrfs_init_path(&path);
12744 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12747 "chunk[%llu %llu) did not find the related block group item",
12748 chunk_key.offset, chunk_end);
12749 err |= REFERENCER_MISSING;
12751 leaf = path.nodes[0];
12752 bi = btrfs_item_ptr(leaf, path.slots[0],
12753 struct btrfs_block_group_item);
12754 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12756 if (btrfs_block_group_flags(&bg_item) != type) {
12758 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12759 chunk_key.offset, chunk_end, type,
12760 btrfs_block_group_flags(&bg_item));
12761 err |= REFERENCER_MISSING;
12765 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12766 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12767 for (i = 0; i < num_stripes; i++) {
12768 btrfs_release_path(&path);
12769 btrfs_init_path(&path);
12770 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12771 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12772 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12774 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12777 goto not_match_dev;
12779 leaf = path.nodes[0];
12780 ptr = btrfs_item_ptr(leaf, path.slots[0],
12781 struct btrfs_dev_extent);
12782 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12783 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12784 if (objectid != chunk_key.objectid ||
12785 offset != chunk_key.offset ||
12786 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12787 goto not_match_dev;
12790 err |= BACKREF_MISSING;
12792 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12793 chunk_key.objectid, chunk_end, i);
12796 btrfs_release_path(&path);
12801 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12802 struct btrfs_root *root,
12803 struct btrfs_path *path)
12805 struct btrfs_key key;
12808 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12809 btrfs_release_path(path);
12810 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12816 ret = btrfs_del_item(trans, root, path);
12820 if (path->slots[0] == 0)
12821 btrfs_prev_leaf(root, path);
12826 error("failed to delete root %llu item[%llu, %u, %llu]",
12827 root->objectid, key.objectid, key.type, key.offset);
12829 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12830 root->objectid, key.objectid, key.type, key.offset);
12835 * Main entry function to check known items and update related accounting info
12837 static int check_leaf_items(struct btrfs_trans_handle *trans,
12838 struct btrfs_root *root, struct btrfs_path *path,
12839 struct node_refs *nrefs, int account_bytes)
12841 struct btrfs_fs_info *fs_info = root->fs_info;
12842 struct btrfs_key key;
12843 struct extent_buffer *eb;
12846 struct btrfs_extent_data_ref *dref;
12851 eb = path->nodes[0];
12852 slot = path->slots[0];
12853 if (slot >= btrfs_header_nritems(eb)) {
12855 error("empty leaf [%llu %u] root %llu", eb->start,
12856 root->fs_info->nodesize, root->objectid);
12862 btrfs_item_key_to_cpu(eb, &key, slot);
12866 case BTRFS_EXTENT_DATA_KEY:
12867 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12869 ret = repair_extent_data_item(trans, root, path, nrefs,
12873 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12874 ret = check_block_group_item(fs_info, eb, slot);
12876 ret & REFERENCER_MISSING)
12877 ret = delete_extent_tree_item(trans, root, path);
12880 case BTRFS_DEV_ITEM_KEY:
12881 ret = check_dev_item(fs_info, eb, slot);
12884 case BTRFS_CHUNK_ITEM_KEY:
12885 ret = check_chunk_item(fs_info, eb, slot);
12887 ret = repair_chunk_item(trans, root, path, ret);
12890 case BTRFS_DEV_EXTENT_KEY:
12891 ret = check_dev_extent_item(fs_info, eb, slot);
12894 case BTRFS_EXTENT_ITEM_KEY:
12895 case BTRFS_METADATA_ITEM_KEY:
12896 ret = check_extent_item(trans, fs_info, path);
12899 case BTRFS_EXTENT_CSUM_KEY:
12900 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12903 case BTRFS_TREE_BLOCK_REF_KEY:
12904 ret = check_tree_block_backref(fs_info, key.offset,
12907 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12908 ret = delete_extent_tree_item(trans, root, path);
12911 case BTRFS_EXTENT_DATA_REF_KEY:
12912 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12913 ret = check_extent_data_backref(fs_info,
12914 btrfs_extent_data_ref_root(eb, dref),
12915 btrfs_extent_data_ref_objectid(eb, dref),
12916 btrfs_extent_data_ref_offset(eb, dref),
12918 btrfs_extent_data_ref_count(eb, dref));
12920 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12921 ret = delete_extent_tree_item(trans, root, path);
12924 case BTRFS_SHARED_BLOCK_REF_KEY:
12925 ret = check_shared_block_backref(fs_info, key.offset,
12928 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12929 ret = delete_extent_tree_item(trans, root, path);
12932 case BTRFS_SHARED_DATA_REF_KEY:
12933 ret = check_shared_data_backref(fs_info, key.offset,
12936 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12937 ret = delete_extent_tree_item(trans, root, path);
12951 * Low memory usage version check_chunks_and_extents.
12953 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12955 struct btrfs_trans_handle *trans = NULL;
12956 struct btrfs_path path;
12957 struct btrfs_key old_key;
12958 struct btrfs_key key;
12959 struct btrfs_root *root1;
12960 struct btrfs_root *root;
12961 struct btrfs_root *cur_root;
12965 root = fs_info->fs_root;
12968 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12969 if (IS_ERR(trans)) {
12970 error("failed to start transaction before check");
12971 return PTR_ERR(trans);
12975 root1 = root->fs_info->chunk_root;
12976 ret = check_btrfs_root(trans, root1, 0, 1);
12979 root1 = root->fs_info->tree_root;
12980 ret = check_btrfs_root(trans, root1, 0, 1);
12983 btrfs_init_path(&path);
12984 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12986 key.type = BTRFS_ROOT_ITEM_KEY;
12988 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12990 error("cannot find extent tree in tree_root");
12995 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12996 if (key.type != BTRFS_ROOT_ITEM_KEY)
12999 key.offset = (u64)-1;
13001 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13002 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13005 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13006 if (IS_ERR(cur_root) || !cur_root) {
13007 error("failed to read tree: %lld", key.objectid);
13011 ret = check_btrfs_root(trans, cur_root, 0, 1);
13014 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13015 btrfs_free_fs_root(cur_root);
13017 btrfs_release_path(&path);
13018 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13019 &old_key, &path, 0, 0);
13023 ret = btrfs_next_item(root1, &path);
13029 /* if repair, update block accounting */
13031 ret = btrfs_fix_block_accounting(trans, root);
13035 err &= ~BG_ACCOUNTING_ERROR;
13039 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13041 btrfs_release_path(&path);
13046 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13050 if (!ctx.progress_enabled)
13051 fprintf(stderr, "checking extents\n");
13052 if (check_mode == CHECK_MODE_LOWMEM)
13053 ret = check_chunks_and_extents_v2(fs_info);
13055 ret = check_chunks_and_extents(fs_info);
13057 /* Also repair device size related problems */
13058 if (repair && !ret) {
13059 ret = btrfs_fix_device_and_super_size(fs_info);
13066 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13067 struct btrfs_root *root, int overwrite)
13069 struct extent_buffer *c;
13070 struct extent_buffer *old = root->node;
13073 struct btrfs_disk_key disk_key = {0,0,0};
13079 extent_buffer_get(c);
13082 c = btrfs_alloc_free_block(trans, root,
13083 root->fs_info->nodesize,
13084 root->root_key.objectid,
13085 &disk_key, level, 0, 0);
13088 extent_buffer_get(c);
13092 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13093 btrfs_set_header_level(c, level);
13094 btrfs_set_header_bytenr(c, c->start);
13095 btrfs_set_header_generation(c, trans->transid);
13096 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13097 btrfs_set_header_owner(c, root->root_key.objectid);
13099 write_extent_buffer(c, root->fs_info->fsid,
13100 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13102 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13103 btrfs_header_chunk_tree_uuid(c),
13106 btrfs_mark_buffer_dirty(c);
13108 * this case can happen in the following case:
13110 * 1.overwrite previous root.
13112 * 2.reinit reloc data root, this is because we skip pin
13113 * down reloc data tree before which means we can allocate
13114 * same block bytenr here.
13116 if (old->start == c->start) {
13117 btrfs_set_root_generation(&root->root_item,
13119 root->root_item.level = btrfs_header_level(root->node);
13120 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13121 &root->root_key, &root->root_item);
13123 free_extent_buffer(c);
13127 free_extent_buffer(old);
13129 add_root_to_dirty_list(root);
13133 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13134 struct extent_buffer *eb, int tree_root)
13136 struct extent_buffer *tmp;
13137 struct btrfs_root_item *ri;
13138 struct btrfs_key key;
13140 int level = btrfs_header_level(eb);
13146 * If we have pinned this block before, don't pin it again.
13147 * This can not only avoid forever loop with broken filesystem
13148 * but also give us some speedups.
13150 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13151 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13154 btrfs_pin_extent(fs_info, eb->start, eb->len);
13156 nritems = btrfs_header_nritems(eb);
13157 for (i = 0; i < nritems; i++) {
13159 btrfs_item_key_to_cpu(eb, &key, i);
13160 if (key.type != BTRFS_ROOT_ITEM_KEY)
13162 /* Skip the extent root and reloc roots */
13163 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13164 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13165 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13167 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13168 bytenr = btrfs_disk_root_bytenr(eb, ri);
13171 * If at any point we start needing the real root we
13172 * will have to build a stump root for the root we are
13173 * in, but for now this doesn't actually use the root so
13174 * just pass in extent_root.
13176 tmp = read_tree_block(fs_info, bytenr, 0);
13177 if (!extent_buffer_uptodate(tmp)) {
13178 fprintf(stderr, "Error reading root block\n");
13181 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13182 free_extent_buffer(tmp);
13186 bytenr = btrfs_node_blockptr(eb, i);
13188 /* If we aren't the tree root don't read the block */
13189 if (level == 1 && !tree_root) {
13190 btrfs_pin_extent(fs_info, bytenr,
13191 fs_info->nodesize);
13195 tmp = read_tree_block(fs_info, bytenr, 0);
13196 if (!extent_buffer_uptodate(tmp)) {
13197 fprintf(stderr, "Error reading tree block\n");
13200 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13201 free_extent_buffer(tmp);
13210 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13214 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13218 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13221 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13223 struct btrfs_block_group_cache *cache;
13224 struct btrfs_path path;
13225 struct extent_buffer *leaf;
13226 struct btrfs_chunk *chunk;
13227 struct btrfs_key key;
13231 btrfs_init_path(&path);
13233 key.type = BTRFS_CHUNK_ITEM_KEY;
13235 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13237 btrfs_release_path(&path);
13242 * We do this in case the block groups were screwed up and had alloc
13243 * bits that aren't actually set on the chunks. This happens with
13244 * restored images every time and could happen in real life I guess.
13246 fs_info->avail_data_alloc_bits = 0;
13247 fs_info->avail_metadata_alloc_bits = 0;
13248 fs_info->avail_system_alloc_bits = 0;
13250 /* First we need to create the in-memory block groups */
13252 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13253 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13255 btrfs_release_path(&path);
13263 leaf = path.nodes[0];
13264 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13265 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13270 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13271 btrfs_add_block_group(fs_info, 0,
13272 btrfs_chunk_type(leaf, chunk), key.offset,
13273 btrfs_chunk_length(leaf, chunk));
13274 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13275 key.offset + btrfs_chunk_length(leaf, chunk));
13280 cache = btrfs_lookup_first_block_group(fs_info, start);
13284 start = cache->key.objectid + cache->key.offset;
13287 btrfs_release_path(&path);
13291 static int reset_balance(struct btrfs_trans_handle *trans,
13292 struct btrfs_fs_info *fs_info)
13294 struct btrfs_root *root = fs_info->tree_root;
13295 struct btrfs_path path;
13296 struct extent_buffer *leaf;
13297 struct btrfs_key key;
13298 int del_slot, del_nr = 0;
13302 btrfs_init_path(&path);
13303 key.objectid = BTRFS_BALANCE_OBJECTID;
13304 key.type = BTRFS_BALANCE_ITEM_KEY;
13306 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13311 goto reinit_data_reloc;
13316 ret = btrfs_del_item(trans, root, &path);
13319 btrfs_release_path(&path);
13321 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13322 key.type = BTRFS_ROOT_ITEM_KEY;
13324 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13328 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13333 ret = btrfs_del_items(trans, root, &path,
13340 btrfs_release_path(&path);
13343 ret = btrfs_search_slot(trans, root, &key, &path,
13350 leaf = path.nodes[0];
13351 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13352 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13354 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13359 del_slot = path.slots[0];
13368 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13372 btrfs_release_path(&path);
13375 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13376 key.type = BTRFS_ROOT_ITEM_KEY;
13377 key.offset = (u64)-1;
13378 root = btrfs_read_fs_root(fs_info, &key);
13379 if (IS_ERR(root)) {
13380 fprintf(stderr, "Error reading data reloc tree\n");
13381 ret = PTR_ERR(root);
13384 record_root_in_trans(trans, root);
13385 ret = btrfs_fsck_reinit_root(trans, root, 0);
13388 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13390 btrfs_release_path(&path);
13394 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13395 struct btrfs_fs_info *fs_info)
13401 * The only reason we don't do this is because right now we're just
13402 * walking the trees we find and pinning down their bytes, we don't look
13403 * at any of the leaves. In order to do mixed groups we'd have to check
13404 * the leaves of any fs roots and pin down the bytes for any file
13405 * extents we find. Not hard but why do it if we don't have to?
13407 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13408 fprintf(stderr, "We don't support re-initing the extent tree "
13409 "for mixed block groups yet, please notify a btrfs "
13410 "developer you want to do this so they can add this "
13411 "functionality.\n");
13416 * first we need to walk all of the trees except the extent tree and pin
13417 * down the bytes that are in use so we don't overwrite any existing
13420 ret = pin_metadata_blocks(fs_info);
13422 fprintf(stderr, "error pinning down used bytes\n");
13427 * Need to drop all the block groups since we're going to recreate all
13430 btrfs_free_block_groups(fs_info);
13431 ret = reset_block_groups(fs_info);
13433 fprintf(stderr, "error resetting the block groups\n");
13437 /* Ok we can allocate now, reinit the extent root */
13438 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13440 fprintf(stderr, "extent root initialization failed\n");
13442 * When the transaction code is updated we should end the
13443 * transaction, but for now progs only knows about commit so
13444 * just return an error.
13450 * Now we have all the in-memory block groups setup so we can make
13451 * allocations properly, and the metadata we care about is safe since we
13452 * pinned all of it above.
13455 struct btrfs_block_group_cache *cache;
13457 cache = btrfs_lookup_first_block_group(fs_info, start);
13460 start = cache->key.objectid + cache->key.offset;
13461 ret = btrfs_insert_item(trans, fs_info->extent_root,
13462 &cache->key, &cache->item,
13463 sizeof(cache->item));
13465 fprintf(stderr, "Error adding block group\n");
13468 btrfs_extent_post_op(trans, fs_info->extent_root);
13471 ret = reset_balance(trans, fs_info);
13473 fprintf(stderr, "error resetting the pending balance\n");
13478 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13480 struct btrfs_path path;
13481 struct btrfs_trans_handle *trans;
13482 struct btrfs_key key;
13485 printf("Recowing metadata block %llu\n", eb->start);
13486 key.objectid = btrfs_header_owner(eb);
13487 key.type = BTRFS_ROOT_ITEM_KEY;
13488 key.offset = (u64)-1;
13490 root = btrfs_read_fs_root(root->fs_info, &key);
13491 if (IS_ERR(root)) {
13492 fprintf(stderr, "Couldn't find owner root %llu\n",
13494 return PTR_ERR(root);
13497 trans = btrfs_start_transaction(root, 1);
13499 return PTR_ERR(trans);
13501 btrfs_init_path(&path);
13502 path.lowest_level = btrfs_header_level(eb);
13503 if (path.lowest_level)
13504 btrfs_node_key_to_cpu(eb, &key, 0);
13506 btrfs_item_key_to_cpu(eb, &key, 0);
13508 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13509 btrfs_commit_transaction(trans, root);
13510 btrfs_release_path(&path);
13514 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13516 struct btrfs_path path;
13517 struct btrfs_trans_handle *trans;
13518 struct btrfs_key key;
13521 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13522 bad->key.type, bad->key.offset);
13523 key.objectid = bad->root_id;
13524 key.type = BTRFS_ROOT_ITEM_KEY;
13525 key.offset = (u64)-1;
13527 root = btrfs_read_fs_root(root->fs_info, &key);
13528 if (IS_ERR(root)) {
13529 fprintf(stderr, "Couldn't find owner root %llu\n",
13531 return PTR_ERR(root);
13534 trans = btrfs_start_transaction(root, 1);
13536 return PTR_ERR(trans);
13538 btrfs_init_path(&path);
13539 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13545 ret = btrfs_del_item(trans, root, &path);
13547 btrfs_commit_transaction(trans, root);
13548 btrfs_release_path(&path);
13552 static int zero_log_tree(struct btrfs_root *root)
13554 struct btrfs_trans_handle *trans;
13557 trans = btrfs_start_transaction(root, 1);
13558 if (IS_ERR(trans)) {
13559 ret = PTR_ERR(trans);
13562 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13563 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13564 ret = btrfs_commit_transaction(trans, root);
13568 static int populate_csum(struct btrfs_trans_handle *trans,
13569 struct btrfs_root *csum_root, char *buf, u64 start,
13572 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13577 while (offset < len) {
13578 sectorsize = fs_info->sectorsize;
13579 ret = read_extent_data(fs_info, buf, start + offset,
13583 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13584 start + offset, buf, sectorsize);
13587 offset += sectorsize;
13592 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13593 struct btrfs_root *csum_root,
13594 struct btrfs_root *cur_root)
13596 struct btrfs_path path;
13597 struct btrfs_key key;
13598 struct extent_buffer *node;
13599 struct btrfs_file_extent_item *fi;
13606 buf = malloc(cur_root->fs_info->sectorsize);
13610 btrfs_init_path(&path);
13614 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13617 /* Iterate all regular file extents and fill its csum */
13619 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13621 if (key.type != BTRFS_EXTENT_DATA_KEY)
13623 node = path.nodes[0];
13624 slot = path.slots[0];
13625 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13626 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13628 start = btrfs_file_extent_disk_bytenr(node, fi);
13629 len = btrfs_file_extent_disk_num_bytes(node, fi);
13631 ret = populate_csum(trans, csum_root, buf, start, len);
13632 if (ret == -EEXIST)
13638 * TODO: if next leaf is corrupted, jump to nearest next valid
13641 ret = btrfs_next_item(cur_root, &path);
13651 btrfs_release_path(&path);
13656 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13657 struct btrfs_root *csum_root)
13659 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13660 struct btrfs_path path;
13661 struct btrfs_root *tree_root = fs_info->tree_root;
13662 struct btrfs_root *cur_root;
13663 struct extent_buffer *node;
13664 struct btrfs_key key;
13668 btrfs_init_path(&path);
13669 key.objectid = BTRFS_FS_TREE_OBJECTID;
13671 key.type = BTRFS_ROOT_ITEM_KEY;
13672 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13681 node = path.nodes[0];
13682 slot = path.slots[0];
13683 btrfs_item_key_to_cpu(node, &key, slot);
13684 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13686 if (key.type != BTRFS_ROOT_ITEM_KEY)
13688 if (!is_fstree(key.objectid))
13690 key.offset = (u64)-1;
13692 cur_root = btrfs_read_fs_root(fs_info, &key);
13693 if (IS_ERR(cur_root) || !cur_root) {
13694 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13698 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13703 ret = btrfs_next_item(tree_root, &path);
13713 btrfs_release_path(&path);
13717 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13718 struct btrfs_root *csum_root)
13720 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13721 struct btrfs_path path;
13722 struct btrfs_extent_item *ei;
13723 struct extent_buffer *leaf;
13725 struct btrfs_key key;
13728 btrfs_init_path(&path);
13730 key.type = BTRFS_EXTENT_ITEM_KEY;
13732 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13734 btrfs_release_path(&path);
13738 buf = malloc(csum_root->fs_info->sectorsize);
13740 btrfs_release_path(&path);
13745 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13746 ret = btrfs_next_leaf(extent_root, &path);
13754 leaf = path.nodes[0];
13756 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13757 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13762 ei = btrfs_item_ptr(leaf, path.slots[0],
13763 struct btrfs_extent_item);
13764 if (!(btrfs_extent_flags(leaf, ei) &
13765 BTRFS_EXTENT_FLAG_DATA)) {
13770 ret = populate_csum(trans, csum_root, buf, key.objectid,
13777 btrfs_release_path(&path);
13783 * Recalculate the csum and put it into the csum tree.
13785 * Extent tree init will wipe out all the extent info, so in that case, we
13786 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13787 * will use fs/subvol trees to init the csum tree.
13789 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13790 struct btrfs_root *csum_root,
13791 int search_fs_tree)
13793 if (search_fs_tree)
13794 return fill_csum_tree_from_fs(trans, csum_root);
13796 return fill_csum_tree_from_extent(trans, csum_root);
13799 static void free_roots_info_cache(void)
13801 if (!roots_info_cache)
13804 while (!cache_tree_empty(roots_info_cache)) {
13805 struct cache_extent *entry;
13806 struct root_item_info *rii;
13808 entry = first_cache_extent(roots_info_cache);
13811 remove_cache_extent(roots_info_cache, entry);
13812 rii = container_of(entry, struct root_item_info, cache_extent);
13816 free(roots_info_cache);
13817 roots_info_cache = NULL;
13820 static int build_roots_info_cache(struct btrfs_fs_info *info)
13823 struct btrfs_key key;
13824 struct extent_buffer *leaf;
13825 struct btrfs_path path;
13827 if (!roots_info_cache) {
13828 roots_info_cache = malloc(sizeof(*roots_info_cache));
13829 if (!roots_info_cache)
13831 cache_tree_init(roots_info_cache);
13834 btrfs_init_path(&path);
13836 key.type = BTRFS_EXTENT_ITEM_KEY;
13838 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13841 leaf = path.nodes[0];
13844 struct btrfs_key found_key;
13845 struct btrfs_extent_item *ei;
13846 struct btrfs_extent_inline_ref *iref;
13847 int slot = path.slots[0];
13852 struct cache_extent *entry;
13853 struct root_item_info *rii;
13855 if (slot >= btrfs_header_nritems(leaf)) {
13856 ret = btrfs_next_leaf(info->extent_root, &path);
13863 leaf = path.nodes[0];
13864 slot = path.slots[0];
13867 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13869 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13870 found_key.type != BTRFS_METADATA_ITEM_KEY)
13873 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13874 flags = btrfs_extent_flags(leaf, ei);
13876 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13877 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13880 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13881 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13882 level = found_key.offset;
13884 struct btrfs_tree_block_info *binfo;
13886 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13887 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13888 level = btrfs_tree_block_level(leaf, binfo);
13892 * For a root extent, it must be of the following type and the
13893 * first (and only one) iref in the item.
13895 type = btrfs_extent_inline_ref_type(leaf, iref);
13896 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13899 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13900 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13902 rii = malloc(sizeof(struct root_item_info));
13907 rii->cache_extent.start = root_id;
13908 rii->cache_extent.size = 1;
13909 rii->level = (u8)-1;
13910 entry = &rii->cache_extent;
13911 ret = insert_cache_extent(roots_info_cache, entry);
13914 rii = container_of(entry, struct root_item_info,
13918 ASSERT(rii->cache_extent.start == root_id);
13919 ASSERT(rii->cache_extent.size == 1);
13921 if (level > rii->level || rii->level == (u8)-1) {
13922 rii->level = level;
13923 rii->bytenr = found_key.objectid;
13924 rii->gen = btrfs_extent_generation(leaf, ei);
13925 rii->node_count = 1;
13926 } else if (level == rii->level) {
13934 btrfs_release_path(&path);
13939 static int maybe_repair_root_item(struct btrfs_path *path,
13940 const struct btrfs_key *root_key,
13941 const int read_only_mode)
13943 const u64 root_id = root_key->objectid;
13944 struct cache_extent *entry;
13945 struct root_item_info *rii;
13946 struct btrfs_root_item ri;
13947 unsigned long offset;
13949 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13952 "Error: could not find extent items for root %llu\n",
13953 root_key->objectid);
13957 rii = container_of(entry, struct root_item_info, cache_extent);
13958 ASSERT(rii->cache_extent.start == root_id);
13959 ASSERT(rii->cache_extent.size == 1);
13961 if (rii->node_count != 1) {
13963 "Error: could not find btree root extent for root %llu\n",
13968 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13969 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13971 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13972 btrfs_root_level(&ri) != rii->level ||
13973 btrfs_root_generation(&ri) != rii->gen) {
13976 * If we're in repair mode but our caller told us to not update
13977 * the root item, i.e. just check if it needs to be updated, don't
13978 * print this message, since the caller will call us again shortly
13979 * for the same root item without read only mode (the caller will
13980 * open a transaction first).
13982 if (!(read_only_mode && repair))
13984 "%sroot item for root %llu,"
13985 " current bytenr %llu, current gen %llu, current level %u,"
13986 " new bytenr %llu, new gen %llu, new level %u\n",
13987 (read_only_mode ? "" : "fixing "),
13989 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13990 btrfs_root_level(&ri),
13991 rii->bytenr, rii->gen, rii->level);
13993 if (btrfs_root_generation(&ri) > rii->gen) {
13995 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13996 root_id, btrfs_root_generation(&ri), rii->gen);
14000 if (!read_only_mode) {
14001 btrfs_set_root_bytenr(&ri, rii->bytenr);
14002 btrfs_set_root_level(&ri, rii->level);
14003 btrfs_set_root_generation(&ri, rii->gen);
14004 write_extent_buffer(path->nodes[0], &ri,
14005 offset, sizeof(ri));
14015 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14016 * caused read-only snapshots to be corrupted if they were created at a moment
14017 * when the source subvolume/snapshot had orphan items. The issue was that the
14018 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14019 * node instead of the post orphan cleanup root node.
14020 * So this function, and its callees, just detects and fixes those cases. Even
14021 * though the regression was for read-only snapshots, this function applies to
14022 * any snapshot/subvolume root.
14023 * This must be run before any other repair code - not doing it so, makes other
14024 * repair code delete or modify backrefs in the extent tree for example, which
14025 * will result in an inconsistent fs after repairing the root items.
14027 static int repair_root_items(struct btrfs_fs_info *info)
14029 struct btrfs_path path;
14030 struct btrfs_key key;
14031 struct extent_buffer *leaf;
14032 struct btrfs_trans_handle *trans = NULL;
14035 int need_trans = 0;
14037 btrfs_init_path(&path);
14039 ret = build_roots_info_cache(info);
14043 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14044 key.type = BTRFS_ROOT_ITEM_KEY;
14049 * Avoid opening and committing transactions if a leaf doesn't have
14050 * any root items that need to be fixed, so that we avoid rotating
14051 * backup roots unnecessarily.
14054 trans = btrfs_start_transaction(info->tree_root, 1);
14055 if (IS_ERR(trans)) {
14056 ret = PTR_ERR(trans);
14061 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14065 leaf = path.nodes[0];
14068 struct btrfs_key found_key;
14070 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14071 int no_more_keys = find_next_key(&path, &key);
14073 btrfs_release_path(&path);
14075 ret = btrfs_commit_transaction(trans,
14087 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14089 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14091 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14094 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14098 if (!trans && repair) {
14101 btrfs_release_path(&path);
14111 free_roots_info_cache();
14112 btrfs_release_path(&path);
14114 btrfs_commit_transaction(trans, info->tree_root);
14121 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14123 struct btrfs_trans_handle *trans;
14124 struct btrfs_block_group_cache *bg_cache;
14128 /* Clear all free space cache inodes and its extent data */
14130 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14133 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14136 current = bg_cache->key.objectid + bg_cache->key.offset;
14139 /* Don't forget to set cache_generation to -1 */
14140 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14141 if (IS_ERR(trans)) {
14142 error("failed to update super block cache generation");
14143 return PTR_ERR(trans);
14145 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14146 btrfs_commit_transaction(trans, fs_info->tree_root);
14151 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14156 if (clear_version == 1) {
14157 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14159 "free space cache v2 detected, use --clear-space-cache v2");
14163 printf("Clearing free space cache\n");
14164 ret = clear_free_space_cache(fs_info);
14166 error("failed to clear free space cache");
14169 printf("Free space cache cleared\n");
14171 } else if (clear_version == 2) {
14172 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14173 printf("no free space cache v2 to clear\n");
14177 printf("Clear free space cache v2\n");
14178 ret = btrfs_clear_free_space_tree(fs_info);
14180 error("failed to clear free space cache v2: %d", ret);
14183 printf("free space cache v2 cleared\n");
14190 const char * const cmd_check_usage[] = {
14191 "btrfs check [options] <device>",
14192 "Check structural integrity of a filesystem (unmounted).",
14193 "Check structural integrity of an unmounted filesystem. Verify internal",
14194 "trees' consistency and item connectivity. In the repair mode try to",
14195 "fix the problems found. ",
14196 "WARNING: the repair mode is considered dangerous",
14198 "-s|--super <superblock> use this superblock copy",
14199 "-b|--backup use the first valid backup root copy",
14200 "--force skip mount checks, repair is not possible",
14201 "--repair try to repair the filesystem",
14202 "--readonly run in read-only mode (default)",
14203 "--init-csum-tree create a new CRC tree",
14204 "--init-extent-tree create a new extent tree",
14205 "--mode <MODE> allows choice of memory/IO trade-offs",
14206 " where MODE is one of:",
14207 " original - read inodes and extents to memory (requires",
14208 " more memory, does less IO)",
14209 " lowmem - try to use less memory but read blocks again",
14211 "--check-data-csum verify checksums of data blocks",
14212 "-Q|--qgroup-report print a report on qgroup consistency",
14213 "-E|--subvol-extents <subvolid>",
14214 " print subvolume extents and sharing state",
14215 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14216 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14217 "-p|--progress indicate progress",
14218 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14222 int cmd_check(int argc, char **argv)
14224 struct cache_tree root_cache;
14225 struct btrfs_root *root;
14226 struct btrfs_fs_info *info;
14229 u64 tree_root_bytenr = 0;
14230 u64 chunk_root_bytenr = 0;
14231 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14235 int init_csum_tree = 0;
14237 int clear_space_cache = 0;
14238 int qgroup_report = 0;
14239 int qgroups_repaired = 0;
14240 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14245 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14246 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14247 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14248 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14249 GETOPT_VAL_FORCE };
14250 static const struct option long_options[] = {
14251 { "super", required_argument, NULL, 's' },
14252 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14253 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14254 { "init-csum-tree", no_argument, NULL,
14255 GETOPT_VAL_INIT_CSUM },
14256 { "init-extent-tree", no_argument, NULL,
14257 GETOPT_VAL_INIT_EXTENT },
14258 { "check-data-csum", no_argument, NULL,
14259 GETOPT_VAL_CHECK_CSUM },
14260 { "backup", no_argument, NULL, 'b' },
14261 { "subvol-extents", required_argument, NULL, 'E' },
14262 { "qgroup-report", no_argument, NULL, 'Q' },
14263 { "tree-root", required_argument, NULL, 'r' },
14264 { "chunk-root", required_argument, NULL,
14265 GETOPT_VAL_CHUNK_TREE },
14266 { "progress", no_argument, NULL, 'p' },
14267 { "mode", required_argument, NULL,
14269 { "clear-space-cache", required_argument, NULL,
14270 GETOPT_VAL_CLEAR_SPACE_CACHE},
14271 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14272 { NULL, 0, NULL, 0}
14275 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14279 case 'a': /* ignored */ break;
14281 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14284 num = arg_strtou64(optarg);
14285 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14287 "super mirror should be less than %d",
14288 BTRFS_SUPER_MIRROR_MAX);
14291 bytenr = btrfs_sb_offset(((int)num));
14292 printf("using SB copy %llu, bytenr %llu\n", num,
14293 (unsigned long long)bytenr);
14299 subvolid = arg_strtou64(optarg);
14302 tree_root_bytenr = arg_strtou64(optarg);
14304 case GETOPT_VAL_CHUNK_TREE:
14305 chunk_root_bytenr = arg_strtou64(optarg);
14308 ctx.progress_enabled = true;
14312 usage(cmd_check_usage);
14313 case GETOPT_VAL_REPAIR:
14314 printf("enabling repair mode\n");
14316 ctree_flags |= OPEN_CTREE_WRITES;
14318 case GETOPT_VAL_READONLY:
14321 case GETOPT_VAL_INIT_CSUM:
14322 printf("Creating a new CRC tree\n");
14323 init_csum_tree = 1;
14325 ctree_flags |= OPEN_CTREE_WRITES;
14327 case GETOPT_VAL_INIT_EXTENT:
14328 init_extent_tree = 1;
14329 ctree_flags |= (OPEN_CTREE_WRITES |
14330 OPEN_CTREE_NO_BLOCK_GROUPS);
14333 case GETOPT_VAL_CHECK_CSUM:
14334 check_data_csum = 1;
14336 case GETOPT_VAL_MODE:
14337 check_mode = parse_check_mode(optarg);
14338 if (check_mode == CHECK_MODE_UNKNOWN) {
14339 error("unknown mode: %s", optarg);
14343 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14344 if (strcmp(optarg, "v1") == 0) {
14345 clear_space_cache = 1;
14346 } else if (strcmp(optarg, "v2") == 0) {
14347 clear_space_cache = 2;
14348 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14351 "invalid argument to --clear-space-cache, must be v1 or v2");
14354 ctree_flags |= OPEN_CTREE_WRITES;
14356 case GETOPT_VAL_FORCE:
14362 if (check_argc_exact(argc - optind, 1))
14363 usage(cmd_check_usage);
14365 if (ctx.progress_enabled) {
14366 ctx.tp = TASK_NOTHING;
14367 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14370 /* This check is the only reason for --readonly to exist */
14371 if (readonly && repair) {
14372 error("repair options are not compatible with --readonly");
14377 * experimental and dangerous
14379 if (repair && check_mode == CHECK_MODE_LOWMEM)
14380 warning("low-memory mode repair support is only partial");
14383 cache_tree_init(&root_cache);
14385 ret = check_mounted(argv[optind]);
14388 error("could not check mount status: %s",
14394 "%s is currently mounted, use --force if you really intend to check the filesystem",
14402 error("repair and --force is not yet supported");
14409 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14413 "filesystem mounted, continuing because of --force");
14415 /* A block device is mounted in exclusive mode by kernel */
14416 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14419 /* only allow partial opening under repair mode */
14421 ctree_flags |= OPEN_CTREE_PARTIAL;
14423 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14424 chunk_root_bytenr, ctree_flags);
14426 error("cannot open file system");
14432 global_info = info;
14433 root = info->fs_root;
14434 uuid_unparse(info->super_copy->fsid, uuidbuf);
14436 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14439 * Check the bare minimum before starting anything else that could rely
14440 * on it, namely the tree roots, any local consistency checks
14442 if (!extent_buffer_uptodate(info->tree_root->node) ||
14443 !extent_buffer_uptodate(info->dev_root->node) ||
14444 !extent_buffer_uptodate(info->chunk_root->node)) {
14445 error("critical roots corrupted, unable to check the filesystem");
14451 if (clear_space_cache) {
14452 ret = do_clear_free_space_cache(info, clear_space_cache);
14458 * repair mode will force us to commit transaction which
14459 * will make us fail to load log tree when mounting.
14461 if (repair && btrfs_super_log_root(info->super_copy)) {
14462 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14468 ret = zero_log_tree(root);
14471 error("failed to zero log tree: %d", ret);
14476 if (qgroup_report) {
14477 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14479 ret = qgroup_verify_all(info);
14486 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14487 subvolid, argv[optind], uuidbuf);
14488 ret = print_extent_state(info, subvolid);
14493 if (init_extent_tree || init_csum_tree) {
14494 struct btrfs_trans_handle *trans;
14496 trans = btrfs_start_transaction(info->extent_root, 0);
14497 if (IS_ERR(trans)) {
14498 error("error starting transaction");
14499 ret = PTR_ERR(trans);
14504 if (init_extent_tree) {
14505 printf("Creating a new extent tree\n");
14506 ret = reinit_extent_tree(trans, info);
14512 if (init_csum_tree) {
14513 printf("Reinitialize checksum tree\n");
14514 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14516 error("checksum tree initialization failed: %d",
14523 ret = fill_csum_tree(trans, info->csum_root,
14527 error("checksum tree refilling failed: %d", ret);
14532 * Ok now we commit and run the normal fsck, which will add
14533 * extent entries for all of the items it finds.
14535 ret = btrfs_commit_transaction(trans, info->extent_root);
14540 if (!extent_buffer_uptodate(info->extent_root->node)) {
14541 error("critical: extent_root, unable to check the filesystem");
14546 if (!extent_buffer_uptodate(info->csum_root->node)) {
14547 error("critical: csum_root, unable to check the filesystem");
14553 if (!init_extent_tree) {
14554 ret = repair_root_items(info);
14557 error("failed to repair root items: %s", strerror(-ret));
14561 fprintf(stderr, "Fixed %d roots.\n", ret);
14563 } else if (ret > 0) {
14565 "Found %d roots with an outdated root item.\n",
14568 "Please run a filesystem check with the option --repair to fix them.\n");
14575 ret = do_check_chunks_and_extents(info);
14579 "errors found in extent allocation tree or chunk allocation");
14581 /* Only re-check super size after we checked and repaired the fs */
14582 err |= !is_super_size_valid(info);
14584 if (!ctx.progress_enabled) {
14585 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14586 fprintf(stderr, "checking free space tree\n");
14588 fprintf(stderr, "checking free space cache\n");
14590 ret = check_space_cache(root);
14593 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14594 error("errors found in free space tree");
14596 error("errors found in free space cache");
14601 * We used to have to have these hole extents in between our real
14602 * extents so if we don't have this flag set we need to make sure there
14603 * are no gaps in the file extents for inodes, otherwise we can just
14604 * ignore it when this happens.
14606 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14607 ret = do_check_fs_roots(info, &root_cache);
14610 error("errors found in fs roots");
14614 fprintf(stderr, "checking csums\n");
14615 ret = check_csums(root);
14618 error("errors found in csum tree");
14622 fprintf(stderr, "checking root refs\n");
14623 /* For low memory mode, check_fs_roots_v2 handles root refs */
14624 if (check_mode != CHECK_MODE_LOWMEM) {
14625 ret = check_root_refs(root, &root_cache);
14628 error("errors found in root refs");
14633 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14634 struct extent_buffer *eb;
14636 eb = list_first_entry(&root->fs_info->recow_ebs,
14637 struct extent_buffer, recow);
14638 list_del_init(&eb->recow);
14639 ret = recow_extent_buffer(root, eb);
14642 error("fails to fix transid errors");
14647 while (!list_empty(&delete_items)) {
14648 struct bad_item *bad;
14650 bad = list_first_entry(&delete_items, struct bad_item, list);
14651 list_del_init(&bad->list);
14653 ret = delete_bad_item(root, bad);
14659 if (info->quota_enabled) {
14660 fprintf(stderr, "checking quota groups\n");
14661 ret = qgroup_verify_all(info);
14664 error("failed to check quota groups");
14668 ret = repair_qgroups(info, &qgroups_repaired);
14671 error("failed to repair quota groups");
14677 if (!list_empty(&root->fs_info->recow_ebs)) {
14678 error("transid errors in file system");
14683 printf("found %llu bytes used, ",
14684 (unsigned long long)bytes_used);
14686 printf("error(s) found\n");
14688 printf("no error found\n");
14689 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14690 printf("total tree bytes: %llu\n",
14691 (unsigned long long)total_btree_bytes);
14692 printf("total fs tree bytes: %llu\n",
14693 (unsigned long long)total_fs_tree_bytes);
14694 printf("total extent tree bytes: %llu\n",
14695 (unsigned long long)total_extent_tree_bytes);
14696 printf("btree space waste bytes: %llu\n",
14697 (unsigned long long)btree_space_waste);
14698 printf("file data blocks allocated: %llu\n referenced %llu\n",
14699 (unsigned long long)data_bytes_allocated,
14700 (unsigned long long)data_bytes_referenced);
14702 free_qgroup_counts();
14703 free_root_recs_tree(&root_cache);
14707 if (ctx.progress_enabled)
14708 task_deinit(ctx.info);