2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
54 TASK_NOTHING, /* have to be the last element */
59 enum task_position tp;
61 struct task_info *info;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
81 enum btrfs_check_mode {
85 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
92 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94 struct data_backref *back1 = to_data_backref(ext1);
95 struct data_backref *back2 = to_data_backref(ext2);
97 WARN_ON(!ext1->is_data);
98 WARN_ON(!ext2->is_data);
100 /* parent and root are a union, so this covers both */
101 if (back1->parent > back2->parent)
103 if (back1->parent < back2->parent)
106 /* This is a full backref and the parents match. */
107 if (back1->node.full_backref)
110 if (back1->owner > back2->owner)
112 if (back1->owner < back2->owner)
115 if (back1->offset > back2->offset)
117 if (back1->offset < back2->offset)
120 if (back1->found_ref && back2->found_ref) {
121 if (back1->disk_bytenr > back2->disk_bytenr)
123 if (back1->disk_bytenr < back2->disk_bytenr)
126 if (back1->bytes > back2->bytes)
128 if (back1->bytes < back2->bytes)
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
137 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139 struct tree_backref *back1 = to_tree_backref(ext1);
140 struct tree_backref *back2 = to_tree_backref(ext2);
142 WARN_ON(ext1->is_data);
143 WARN_ON(ext2->is_data);
145 /* parent and root are a union, so this covers both */
146 if (back1->parent > back2->parent)
148 if (back1->parent < back2->parent)
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
156 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159 if (ext1->is_data > ext2->is_data)
162 if (ext1->is_data < ext2->is_data)
165 if (ext1->full_backref > ext2->full_backref)
167 if (ext1->full_backref < ext2->full_backref)
171 return compare_data_backref(node1, node2);
173 return compare_tree_backref(node1, node2);
177 static void *print_status_check(void *p)
179 struct task_ctx *priv = p;
180 const char work_indicator[] = { '.', 'o', 'O', 'o' };
182 static char *task_position_string[] = {
184 "checking free space cache",
188 task_period_start(priv->info, 1000 /* 1s */);
190 if (priv->tp == TASK_NOTHING)
194 printf("%s [%c]\r", task_position_string[priv->tp],
195 work_indicator[count % 4]);
198 task_period_wait(priv->info);
203 static int print_status_return(void *p)
211 static enum btrfs_check_mode parse_check_mode(const char *str)
213 if (strcmp(str, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM;
215 if (strcmp(str, "orig") == 0)
216 return CHECK_MODE_ORIGINAL;
217 if (strcmp(str, "original") == 0)
218 return CHECK_MODE_ORIGINAL;
220 return CHECK_MODE_UNKNOWN;
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
226 struct file_extent_hole *hole;
228 if (RB_EMPTY_ROOT(holes))
231 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
237 struct file_extent_hole *hole1;
238 struct file_extent_hole *hole2;
240 hole1 = rb_entry(node1, struct file_extent_hole, node);
241 hole2 = rb_entry(node2, struct file_extent_hole, node);
243 if (hole1->start > hole2->start)
245 if (hole1->start < hole2->start)
247 /* Now hole1->start == hole2->start */
248 if (hole1->len >= hole2->len)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
254 /* Hole 2 will be merge center */
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root *holes,
267 struct file_extent_hole *hole;
268 struct file_extent_hole *prev = NULL;
269 struct file_extent_hole *next = NULL;
271 hole = malloc(sizeof(*hole));
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes, &hole->node, compare_hole);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole->node))
281 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
283 if (prev && prev->start + prev->len >= hole->start) {
284 hole->len = hole->start + hole->len - prev->start;
285 hole->start = prev->start;
286 rb_erase(&prev->node, holes);
291 /* iterate merge with next holes */
293 if (!rb_next(&hole->node))
295 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
297 if (hole->start + hole->len >= next->start) {
298 if (hole->start + hole->len <= next->start + next->len)
299 hole->len = next->start + next->len -
301 rb_erase(&next->node, holes);
310 static int compare_hole_range(struct rb_node *node, void *data)
312 struct file_extent_hole *hole;
315 hole = (struct file_extent_hole *)data;
318 hole = rb_entry(node, struct file_extent_hole, node);
319 if (start < hole->start)
321 if (start >= hole->start && start < hole->start + hole->len)
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root *holes,
334 struct file_extent_hole *hole;
335 struct file_extent_hole tmp;
340 struct rb_node *node;
347 node = rb_search(holes, &tmp, compare_hole_range, NULL);
350 hole = rb_entry(node, struct file_extent_hole, node);
351 if (start + len > hole->start + hole->len)
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start > hole->start) {
359 prev_start = hole->start;
360 prev_len = start - hole->start;
363 if (hole->start + hole->len > start + len) {
364 next_start = start + len;
365 next_len = hole->start + hole->len - start - len;
368 rb_erase(node, holes);
371 ret = add_file_extent_hole(holes, prev_start, prev_len);
376 ret = add_file_extent_hole(holes, next_start, next_len);
383 static int copy_file_extent_holes(struct rb_root *dst,
386 struct file_extent_hole *hole;
387 struct rb_node *node;
390 node = rb_first(src);
392 hole = rb_entry(node, struct file_extent_hole, node);
393 ret = add_file_extent_hole(dst, hole->start, hole->len);
396 node = rb_next(node);
401 static void free_file_extent_holes(struct rb_root *holes)
403 struct rb_node *node;
404 struct file_extent_hole *hole;
406 node = rb_first(holes);
408 hole = rb_entry(node, struct file_extent_hole, node);
409 rb_erase(node, holes);
411 node = rb_first(holes);
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root)
420 if (root->last_trans != trans->transid) {
421 root->track_dirty = 1;
422 root->last_trans = trans->transid;
423 root->commit_root = root->node;
424 extent_buffer_get(root->node);
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
430 struct device_record *rec1;
431 struct device_record *rec2;
433 rec1 = rb_entry(node1, struct device_record, node);
434 rec2 = rb_entry(node2, struct device_record, node);
435 if (rec1->devid > rec2->devid)
437 else if (rec1->devid < rec2->devid)
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
445 struct inode_record *rec;
446 struct inode_backref *backref;
447 struct inode_backref *orig;
448 struct inode_backref *tmp;
449 struct orphan_data_extent *src_orphan;
450 struct orphan_data_extent *dst_orphan;
455 rec = malloc(sizeof(*rec));
457 return ERR_PTR(-ENOMEM);
458 memcpy(rec, orig_rec, sizeof(*rec));
460 INIT_LIST_HEAD(&rec->backrefs);
461 INIT_LIST_HEAD(&rec->orphan_extents);
462 rec->holes = RB_ROOT;
464 list_for_each_entry(orig, &orig_rec->backrefs, list) {
465 size = sizeof(*orig) + orig->namelen + 1;
466 backref = malloc(size);
471 memcpy(backref, orig, size);
472 list_add_tail(&backref->list, &rec->backrefs);
474 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475 dst_orphan = malloc(sizeof(*dst_orphan));
480 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
483 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
490 rb = rb_first(&rec->holes);
492 struct file_extent_hole *hole;
494 hole = rb_entry(rb, struct file_extent_hole, node);
500 if (!list_empty(&rec->backrefs))
501 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502 list_del(&orig->list);
506 if (!list_empty(&rec->orphan_extents))
507 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508 list_del(&orig->list);
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
520 struct orphan_data_extent *orphan;
522 if (list_empty(orphan_extents))
524 printf("The following data extent is lost in tree %llu:\n",
526 list_for_each_entry(orphan, orphan_extents, list) {
527 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528 orphan->objectid, orphan->offset, orphan->disk_bytenr,
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
535 u64 root_objectid = root->root_key.objectid;
536 int errors = rec->errors;
540 /* reloc root errors, we print its corresponding fs root objectid*/
541 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542 root_objectid = root->root_key.offset;
543 fprintf(stderr, "reloc");
545 fprintf(stderr, "root %llu inode %llu errors %x",
546 (unsigned long long) root_objectid,
547 (unsigned long long) rec->ino, rec->errors);
549 if (errors & I_ERR_NO_INODE_ITEM)
550 fprintf(stderr, ", no inode item");
551 if (errors & I_ERR_NO_ORPHAN_ITEM)
552 fprintf(stderr, ", no orphan item");
553 if (errors & I_ERR_DUP_INODE_ITEM)
554 fprintf(stderr, ", dup inode item");
555 if (errors & I_ERR_DUP_DIR_INDEX)
556 fprintf(stderr, ", dup dir index");
557 if (errors & I_ERR_ODD_DIR_ITEM)
558 fprintf(stderr, ", odd dir item");
559 if (errors & I_ERR_ODD_FILE_EXTENT)
560 fprintf(stderr, ", odd file extent");
561 if (errors & I_ERR_BAD_FILE_EXTENT)
562 fprintf(stderr, ", bad file extent");
563 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564 fprintf(stderr, ", file extent overlap");
565 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566 fprintf(stderr, ", file extent discount");
567 if (errors & I_ERR_DIR_ISIZE_WRONG)
568 fprintf(stderr, ", dir isize wrong");
569 if (errors & I_ERR_FILE_NBYTES_WRONG)
570 fprintf(stderr, ", nbytes wrong");
571 if (errors & I_ERR_ODD_CSUM_ITEM)
572 fprintf(stderr, ", odd csum item");
573 if (errors & I_ERR_SOME_CSUM_MISSING)
574 fprintf(stderr, ", some csum missing");
575 if (errors & I_ERR_LINK_COUNT_WRONG)
576 fprintf(stderr, ", link count wrong");
577 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578 fprintf(stderr, ", orphan file extent");
579 fprintf(stderr, "\n");
580 /* Print the orphan extents if needed */
581 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
584 /* Print the holes if needed */
585 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586 struct file_extent_hole *hole;
587 struct rb_node *node;
590 node = rb_first(&rec->holes);
591 fprintf(stderr, "Found file extent holes:\n");
594 hole = rb_entry(node, struct file_extent_hole, node);
595 fprintf(stderr, "\tstart: %llu, len: %llu\n",
596 hole->start, hole->len);
597 node = rb_next(node);
600 fprintf(stderr, "\tstart: 0, len: %llu\n",
602 root->fs_info->sectorsize));
606 static void print_ref_error(int errors)
608 if (errors & REF_ERR_NO_DIR_ITEM)
609 fprintf(stderr, ", no dir item");
610 if (errors & REF_ERR_NO_DIR_INDEX)
611 fprintf(stderr, ", no dir index");
612 if (errors & REF_ERR_NO_INODE_REF)
613 fprintf(stderr, ", no inode ref");
614 if (errors & REF_ERR_DUP_DIR_ITEM)
615 fprintf(stderr, ", dup dir item");
616 if (errors & REF_ERR_DUP_DIR_INDEX)
617 fprintf(stderr, ", dup dir index");
618 if (errors & REF_ERR_DUP_INODE_REF)
619 fprintf(stderr, ", dup inode ref");
620 if (errors & REF_ERR_INDEX_UNMATCH)
621 fprintf(stderr, ", index mismatch");
622 if (errors & REF_ERR_FILETYPE_UNMATCH)
623 fprintf(stderr, ", filetype mismatch");
624 if (errors & REF_ERR_NAME_TOO_LONG)
625 fprintf(stderr, ", name too long");
626 if (errors & REF_ERR_NO_ROOT_REF)
627 fprintf(stderr, ", no root ref");
628 if (errors & REF_ERR_NO_ROOT_BACKREF)
629 fprintf(stderr, ", no root backref");
630 if (errors & REF_ERR_DUP_ROOT_REF)
631 fprintf(stderr, ", dup root ref");
632 if (errors & REF_ERR_DUP_ROOT_BACKREF)
633 fprintf(stderr, ", dup root backref");
634 fprintf(stderr, "\n");
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
640 struct ptr_node *node;
641 struct cache_extent *cache;
642 struct inode_record *rec = NULL;
645 cache = lookup_cache_extent(inode_cache, ino, 1);
647 node = container_of(cache, struct ptr_node, cache);
649 if (mod && rec->refs > 1) {
650 node->data = clone_inode_rec(rec);
651 if (IS_ERR(node->data))
657 rec = calloc(1, sizeof(*rec));
659 return ERR_PTR(-ENOMEM);
661 rec->extent_start = (u64)-1;
663 INIT_LIST_HEAD(&rec->backrefs);
664 INIT_LIST_HEAD(&rec->orphan_extents);
665 rec->holes = RB_ROOT;
667 node = malloc(sizeof(*node));
670 return ERR_PTR(-ENOMEM);
672 node->cache.start = ino;
673 node->cache.size = 1;
676 if (ino == BTRFS_FREE_INO_OBJECTID)
679 ret = insert_cache_extent(inode_cache, &node->cache);
681 return ERR_PTR(-EEXIST);
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
688 struct orphan_data_extent *orphan;
690 while (!list_empty(orphan_extents)) {
691 orphan = list_entry(orphan_extents->next,
692 struct orphan_data_extent, list);
693 list_del(&orphan->list);
698 static void free_inode_rec(struct inode_record *rec)
700 struct inode_backref *backref;
705 while (!list_empty(&rec->backrefs)) {
706 backref = to_inode_backref(rec->backrefs.next);
707 list_del(&backref->list);
710 free_orphan_data_extents(&rec->orphan_extents);
711 free_file_extent_holes(&rec->holes);
715 static int can_free_inode_rec(struct inode_record *rec)
717 if (!rec->errors && rec->checked && rec->found_inode_item &&
718 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724 struct inode_record *rec)
726 struct cache_extent *cache;
727 struct inode_backref *tmp, *backref;
728 struct ptr_node *node;
731 if (!rec->found_inode_item)
734 filetype = imode_to_type(rec->imode);
735 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736 if (backref->found_dir_item && backref->found_dir_index) {
737 if (backref->filetype != filetype)
738 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739 if (!backref->errors && backref->found_inode_ref &&
740 rec->nlink == rec->found_link) {
741 list_del(&backref->list);
747 if (!rec->checked || rec->merging)
750 if (S_ISDIR(rec->imode)) {
751 if (rec->found_size != rec->isize)
752 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753 if (rec->found_file_extent)
754 rec->errors |= I_ERR_ODD_FILE_EXTENT;
755 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756 if (rec->found_dir_item)
757 rec->errors |= I_ERR_ODD_DIR_ITEM;
758 if (rec->found_size != rec->nbytes)
759 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760 if (rec->nlink > 0 && !no_holes &&
761 (rec->extent_end < rec->isize ||
762 first_extent_gap(&rec->holes) < rec->isize))
763 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
766 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767 if (rec->found_csum_item && rec->nodatasum)
768 rec->errors |= I_ERR_ODD_CSUM_ITEM;
769 if (rec->some_csum_missing && !rec->nodatasum)
770 rec->errors |= I_ERR_SOME_CSUM_MISSING;
773 BUG_ON(rec->refs != 1);
774 if (can_free_inode_rec(rec)) {
775 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776 node = container_of(cache, struct ptr_node, cache);
777 BUG_ON(node->data != rec);
778 remove_cache_extent(inode_cache, &node->cache);
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
786 struct btrfs_path path;
787 struct btrfs_key key;
790 key.objectid = BTRFS_ORPHAN_OBJECTID;
791 key.type = BTRFS_ORPHAN_ITEM_KEY;
794 btrfs_init_path(&path);
795 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796 btrfs_release_path(&path);
802 static int process_inode_item(struct extent_buffer *eb,
803 int slot, struct btrfs_key *key,
804 struct shared_node *active_node)
806 struct inode_record *rec;
807 struct btrfs_inode_item *item;
809 rec = active_node->current;
810 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811 if (rec->found_inode_item) {
812 rec->errors |= I_ERR_DUP_INODE_ITEM;
815 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816 rec->nlink = btrfs_inode_nlink(eb, item);
817 rec->isize = btrfs_inode_size(eb, item);
818 rec->nbytes = btrfs_inode_nbytes(eb, item);
819 rec->imode = btrfs_inode_mode(eb, item);
820 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
822 rec->found_inode_item = 1;
824 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825 maybe_free_inode_rec(&active_node->inode_cache, rec);
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
831 int namelen, u64 dir)
833 struct inode_backref *backref;
835 list_for_each_entry(backref, &rec->backrefs, list) {
836 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
838 if (backref->dir != dir || backref->namelen != namelen)
840 if (memcmp(name, backref->name, namelen))
845 backref = malloc(sizeof(*backref) + namelen + 1);
848 memset(backref, 0, sizeof(*backref));
850 backref->namelen = namelen;
851 memcpy(backref->name, name, namelen);
852 backref->name[namelen] = '\0';
853 list_add_tail(&backref->list, &rec->backrefs);
857 static int add_inode_backref(struct cache_tree *inode_cache,
858 u64 ino, u64 dir, u64 index,
859 const char *name, int namelen,
860 u8 filetype, u8 itemtype, int errors)
862 struct inode_record *rec;
863 struct inode_backref *backref;
865 rec = get_inode_rec(inode_cache, ino, 1);
867 backref = get_inode_backref(rec, name, namelen, dir);
870 backref->errors |= errors;
871 if (itemtype == BTRFS_DIR_INDEX_KEY) {
872 if (backref->found_dir_index)
873 backref->errors |= REF_ERR_DUP_DIR_INDEX;
874 if (backref->found_inode_ref && backref->index != index)
875 backref->errors |= REF_ERR_INDEX_UNMATCH;
876 if (backref->found_dir_item && backref->filetype != filetype)
877 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
879 backref->index = index;
880 backref->filetype = filetype;
881 backref->found_dir_index = 1;
882 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
884 if (backref->found_dir_item)
885 backref->errors |= REF_ERR_DUP_DIR_ITEM;
886 if (backref->found_dir_index && backref->filetype != filetype)
887 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
889 backref->filetype = filetype;
890 backref->found_dir_item = 1;
891 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893 if (backref->found_inode_ref)
894 backref->errors |= REF_ERR_DUP_INODE_REF;
895 if (backref->found_dir_index && backref->index != index)
896 backref->errors |= REF_ERR_INDEX_UNMATCH;
898 backref->index = index;
900 backref->ref_type = itemtype;
901 backref->found_inode_ref = 1;
906 maybe_free_inode_rec(inode_cache, rec);
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911 struct cache_tree *dst_cache)
913 struct inode_backref *backref;
918 list_for_each_entry(backref, &src->backrefs, list) {
919 if (backref->found_dir_index) {
920 add_inode_backref(dst_cache, dst->ino, backref->dir,
921 backref->index, backref->name,
922 backref->namelen, backref->filetype,
923 BTRFS_DIR_INDEX_KEY, backref->errors);
925 if (backref->found_dir_item) {
927 add_inode_backref(dst_cache, dst->ino,
928 backref->dir, 0, backref->name,
929 backref->namelen, backref->filetype,
930 BTRFS_DIR_ITEM_KEY, backref->errors);
932 if (backref->found_inode_ref) {
933 add_inode_backref(dst_cache, dst->ino,
934 backref->dir, backref->index,
935 backref->name, backref->namelen, 0,
936 backref->ref_type, backref->errors);
940 if (src->found_dir_item)
941 dst->found_dir_item = 1;
942 if (src->found_file_extent)
943 dst->found_file_extent = 1;
944 if (src->found_csum_item)
945 dst->found_csum_item = 1;
946 if (src->some_csum_missing)
947 dst->some_csum_missing = 1;
948 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949 ret = copy_file_extent_holes(&dst->holes, &src->holes);
954 BUG_ON(src->found_link < dir_count);
955 dst->found_link += src->found_link - dir_count;
956 dst->found_size += src->found_size;
957 if (src->extent_start != (u64)-1) {
958 if (dst->extent_start == (u64)-1) {
959 dst->extent_start = src->extent_start;
960 dst->extent_end = src->extent_end;
962 if (dst->extent_end > src->extent_start)
963 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964 else if (dst->extent_end < src->extent_start) {
965 ret = add_file_extent_hole(&dst->holes,
967 src->extent_start - dst->extent_end);
969 if (dst->extent_end < src->extent_end)
970 dst->extent_end = src->extent_end;
974 dst->errors |= src->errors;
975 if (src->found_inode_item) {
976 if (!dst->found_inode_item) {
977 dst->nlink = src->nlink;
978 dst->isize = src->isize;
979 dst->nbytes = src->nbytes;
980 dst->imode = src->imode;
981 dst->nodatasum = src->nodatasum;
982 dst->found_inode_item = 1;
984 dst->errors |= I_ERR_DUP_INODE_ITEM;
992 static int splice_shared_node(struct shared_node *src_node,
993 struct shared_node *dst_node)
995 struct cache_extent *cache;
996 struct ptr_node *node, *ins;
997 struct cache_tree *src, *dst;
998 struct inode_record *rec, *conflict;
1003 if (--src_node->refs == 0)
1005 if (src_node->current)
1006 current_ino = src_node->current->ino;
1008 src = &src_node->root_cache;
1009 dst = &dst_node->root_cache;
1011 cache = search_cache_extent(src, 0);
1013 node = container_of(cache, struct ptr_node, cache);
1015 cache = next_cache_extent(cache);
1018 remove_cache_extent(src, &node->cache);
1021 ins = malloc(sizeof(*ins));
1023 ins->cache.start = node->cache.start;
1024 ins->cache.size = node->cache.size;
1028 ret = insert_cache_extent(dst, &ins->cache);
1029 if (ret == -EEXIST) {
1030 conflict = get_inode_rec(dst, rec->ino, 1);
1031 BUG_ON(IS_ERR(conflict));
1032 merge_inode_recs(rec, conflict, dst);
1034 conflict->checked = 1;
1035 if (dst_node->current == conflict)
1036 dst_node->current = NULL;
1038 maybe_free_inode_rec(dst, conflict);
1039 free_inode_rec(rec);
1046 if (src == &src_node->root_cache) {
1047 src = &src_node->inode_cache;
1048 dst = &dst_node->inode_cache;
1052 if (current_ino > 0 && (!dst_node->current ||
1053 current_ino > dst_node->current->ino)) {
1054 if (dst_node->current) {
1055 dst_node->current->checked = 1;
1056 maybe_free_inode_rec(dst, dst_node->current);
1058 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059 BUG_ON(IS_ERR(dst_node->current));
1064 static void free_inode_ptr(struct cache_extent *cache)
1066 struct ptr_node *node;
1067 struct inode_record *rec;
1069 node = container_of(cache, struct ptr_node, cache);
1071 free_inode_rec(rec);
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1080 struct cache_extent *cache;
1081 struct shared_node *node;
1083 cache = lookup_cache_extent(shared, bytenr, 1);
1085 node = container_of(cache, struct shared_node, cache);
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1094 struct shared_node *node;
1096 node = calloc(1, sizeof(*node));
1099 node->cache.start = bytenr;
1100 node->cache.size = 1;
1101 cache_tree_init(&node->root_cache);
1102 cache_tree_init(&node->inode_cache);
1105 ret = insert_cache_extent(shared, &node->cache);
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111 struct walk_control *wc, int level)
1113 struct shared_node *node;
1114 struct shared_node *dest;
1117 if (level == wc->active_node)
1120 BUG_ON(wc->active_node <= level);
1121 node = find_shared_node(&wc->shared, bytenr);
1123 ret = add_shared_node(&wc->shared, bytenr, refs);
1125 node = find_shared_node(&wc->shared, bytenr);
1126 wc->nodes[level] = node;
1127 wc->active_node = level;
1131 if (wc->root_level == wc->active_node &&
1132 btrfs_root_refs(&root->root_item) == 0) {
1133 if (--node->refs == 0) {
1134 free_inode_recs_tree(&node->root_cache);
1135 free_inode_recs_tree(&node->inode_cache);
1136 remove_cache_extent(&wc->shared, &node->cache);
1142 dest = wc->nodes[wc->active_node];
1143 splice_shared_node(node, dest);
1144 if (node->refs == 0) {
1145 remove_cache_extent(&wc->shared, &node->cache);
1151 static int leave_shared_node(struct btrfs_root *root,
1152 struct walk_control *wc, int level)
1154 struct shared_node *node;
1155 struct shared_node *dest;
1158 if (level == wc->root_level)
1161 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1165 BUG_ON(i >= BTRFS_MAX_LEVEL);
1167 node = wc->nodes[wc->active_node];
1168 wc->nodes[wc->active_node] = NULL;
1169 wc->active_node = i;
1171 dest = wc->nodes[wc->active_node];
1172 if (wc->active_node < wc->root_level ||
1173 btrfs_root_refs(&root->root_item) > 0) {
1174 BUG_ON(node->refs <= 1);
1175 splice_shared_node(node, dest);
1177 BUG_ON(node->refs < 2);
1186 * 1 - if the root with id child_root_id is a child of root parent_root_id
1187 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1188 * has other root(s) as parent(s)
1189 * 2 - if the root child_root_id doesn't have any parent roots
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1194 struct btrfs_path path;
1195 struct btrfs_key key;
1196 struct extent_buffer *leaf;
1200 btrfs_init_path(&path);
1202 key.objectid = parent_root_id;
1203 key.type = BTRFS_ROOT_REF_KEY;
1204 key.offset = child_root_id;
1205 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1209 btrfs_release_path(&path);
1213 key.objectid = child_root_id;
1214 key.type = BTRFS_ROOT_BACKREF_KEY;
1216 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1222 leaf = path.nodes[0];
1223 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1227 leaf = path.nodes[0];
1230 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231 if (key.objectid != child_root_id ||
1232 key.type != BTRFS_ROOT_BACKREF_KEY)
1237 if (key.offset == parent_root_id) {
1238 btrfs_release_path(&path);
1245 btrfs_release_path(&path);
1248 return has_parent ? 0 : 2;
1251 static int process_dir_item(struct extent_buffer *eb,
1252 int slot, struct btrfs_key *key,
1253 struct shared_node *active_node)
1263 struct btrfs_dir_item *di;
1264 struct inode_record *rec;
1265 struct cache_tree *root_cache;
1266 struct cache_tree *inode_cache;
1267 struct btrfs_key location;
1268 char namebuf[BTRFS_NAME_LEN];
1270 root_cache = &active_node->root_cache;
1271 inode_cache = &active_node->inode_cache;
1272 rec = active_node->current;
1273 rec->found_dir_item = 1;
1275 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276 total = btrfs_item_size_nr(eb, slot);
1277 while (cur < total) {
1279 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280 name_len = btrfs_dir_name_len(eb, di);
1281 data_len = btrfs_dir_data_len(eb, di);
1282 filetype = btrfs_dir_type(eb, di);
1284 rec->found_size += name_len;
1285 if (cur + sizeof(*di) + name_len > total ||
1286 name_len > BTRFS_NAME_LEN) {
1287 error = REF_ERR_NAME_TOO_LONG;
1289 if (cur + sizeof(*di) > total)
1291 len = min_t(u32, total - cur - sizeof(*di),
1298 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1300 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301 key->offset != btrfs_name_hash(namebuf, len)) {
1302 rec->errors |= I_ERR_ODD_DIR_ITEM;
1303 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304 key->objectid, key->offset, namebuf, len, filetype,
1305 key->offset, btrfs_name_hash(namebuf, len));
1308 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309 add_inode_backref(inode_cache, location.objectid,
1310 key->objectid, key->offset, namebuf,
1311 len, filetype, key->type, error);
1312 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313 add_inode_backref(root_cache, location.objectid,
1314 key->objectid, key->offset,
1315 namebuf, len, filetype,
1319 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320 location.type, key->objectid, key->offset);
1321 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322 key->objectid, key->offset, namebuf,
1323 len, filetype, key->type, error);
1326 len = sizeof(*di) + name_len + data_len;
1327 di = (struct btrfs_dir_item *)((char *)di + len);
1330 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331 rec->errors |= I_ERR_DUP_DIR_INDEX;
1336 static int process_inode_ref(struct extent_buffer *eb,
1337 int slot, struct btrfs_key *key,
1338 struct shared_node *active_node)
1346 struct cache_tree *inode_cache;
1347 struct btrfs_inode_ref *ref;
1348 char namebuf[BTRFS_NAME_LEN];
1350 inode_cache = &active_node->inode_cache;
1352 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353 total = btrfs_item_size_nr(eb, slot);
1354 while (cur < total) {
1355 name_len = btrfs_inode_ref_name_len(eb, ref);
1356 index = btrfs_inode_ref_index(eb, ref);
1358 /* inode_ref + namelen should not cross item boundary */
1359 if (cur + sizeof(*ref) + name_len > total ||
1360 name_len > BTRFS_NAME_LEN) {
1361 if (total < cur + sizeof(*ref))
1364 /* Still try to read out the remaining part */
1365 len = min_t(u32, total - cur - sizeof(*ref),
1367 error = REF_ERR_NAME_TOO_LONG;
1373 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374 add_inode_backref(inode_cache, key->objectid, key->offset,
1375 index, namebuf, len, 0, key->type, error);
1377 len = sizeof(*ref) + name_len;
1378 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1384 static int process_inode_extref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1395 struct cache_tree *inode_cache;
1396 struct btrfs_inode_extref *extref;
1397 char namebuf[BTRFS_NAME_LEN];
1399 inode_cache = &active_node->inode_cache;
1401 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402 total = btrfs_item_size_nr(eb, slot);
1403 while (cur < total) {
1404 name_len = btrfs_inode_extref_name_len(eb, extref);
1405 index = btrfs_inode_extref_index(eb, extref);
1406 parent = btrfs_inode_extref_parent(eb, extref);
1407 if (name_len <= BTRFS_NAME_LEN) {
1411 len = BTRFS_NAME_LEN;
1412 error = REF_ERR_NAME_TOO_LONG;
1414 read_extent_buffer(eb, namebuf,
1415 (unsigned long)(extref + 1), len);
1416 add_inode_backref(inode_cache, key->objectid, parent,
1417 index, namebuf, len, 0, key->type, error);
1419 len = sizeof(*extref) + name_len;
1420 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1427 static int count_csum_range(struct btrfs_root *root, u64 start,
1428 u64 len, u64 *found)
1430 struct btrfs_key key;
1431 struct btrfs_path path;
1432 struct extent_buffer *leaf;
1437 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1439 btrfs_init_path(&path);
1441 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1443 key.type = BTRFS_EXTENT_CSUM_KEY;
1445 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1449 if (ret > 0 && path.slots[0] > 0) {
1450 leaf = path.nodes[0];
1451 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1452 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1453 key.type == BTRFS_EXTENT_CSUM_KEY)
1458 leaf = path.nodes[0];
1459 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1460 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1465 leaf = path.nodes[0];
1468 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1469 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1470 key.type != BTRFS_EXTENT_CSUM_KEY)
1473 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1474 if (key.offset >= start + len)
1477 if (key.offset > start)
1480 size = btrfs_item_size_nr(leaf, path.slots[0]);
1481 csum_end = key.offset + (size / csum_size) *
1482 root->fs_info->sectorsize;
1483 if (csum_end > start) {
1484 size = min(csum_end - start, len);
1493 btrfs_release_path(&path);
1499 static int process_file_extent(struct btrfs_root *root,
1500 struct extent_buffer *eb,
1501 int slot, struct btrfs_key *key,
1502 struct shared_node *active_node)
1504 struct inode_record *rec;
1505 struct btrfs_file_extent_item *fi;
1507 u64 disk_bytenr = 0;
1508 u64 extent_offset = 0;
1509 u64 mask = root->fs_info->sectorsize - 1;
1513 rec = active_node->current;
1514 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1515 rec->found_file_extent = 1;
1517 if (rec->extent_start == (u64)-1) {
1518 rec->extent_start = key->offset;
1519 rec->extent_end = key->offset;
1522 if (rec->extent_end > key->offset)
1523 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1524 else if (rec->extent_end < key->offset) {
1525 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1526 key->offset - rec->extent_end);
1531 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1532 extent_type = btrfs_file_extent_type(eb, fi);
1534 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1535 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1537 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1538 rec->found_size += num_bytes;
1539 num_bytes = (num_bytes + mask) & ~mask;
1540 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1541 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1542 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1543 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1544 extent_offset = btrfs_file_extent_offset(eb, fi);
1545 if (num_bytes == 0 || (num_bytes & mask))
1546 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1547 if (num_bytes + extent_offset >
1548 btrfs_file_extent_ram_bytes(eb, fi))
1549 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1550 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1551 (btrfs_file_extent_compression(eb, fi) ||
1552 btrfs_file_extent_encryption(eb, fi) ||
1553 btrfs_file_extent_other_encoding(eb, fi)))
1554 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1555 if (disk_bytenr > 0)
1556 rec->found_size += num_bytes;
1558 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1560 rec->extent_end = key->offset + num_bytes;
1563 * The data reloc tree will copy full extents into its inode and then
1564 * copy the corresponding csums. Because the extent it copied could be
1565 * a preallocated extent that hasn't been written to yet there may be no
1566 * csums to copy, ergo we won't have csums for our file extent. This is
1567 * ok so just don't bother checking csums if the inode belongs to the
1570 if (disk_bytenr > 0 &&
1571 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1573 if (btrfs_file_extent_compression(eb, fi))
1574 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1576 disk_bytenr += extent_offset;
1578 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1581 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1583 rec->found_csum_item = 1;
1584 if (found < num_bytes)
1585 rec->some_csum_missing = 1;
1586 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1588 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1594 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1595 struct walk_control *wc)
1597 struct btrfs_key key;
1601 struct cache_tree *inode_cache;
1602 struct shared_node *active_node;
1604 if (wc->root_level == wc->active_node &&
1605 btrfs_root_refs(&root->root_item) == 0)
1608 active_node = wc->nodes[wc->active_node];
1609 inode_cache = &active_node->inode_cache;
1610 nritems = btrfs_header_nritems(eb);
1611 for (i = 0; i < nritems; i++) {
1612 btrfs_item_key_to_cpu(eb, &key, i);
1614 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1616 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1619 if (active_node->current == NULL ||
1620 active_node->current->ino < key.objectid) {
1621 if (active_node->current) {
1622 active_node->current->checked = 1;
1623 maybe_free_inode_rec(inode_cache,
1624 active_node->current);
1626 active_node->current = get_inode_rec(inode_cache,
1628 BUG_ON(IS_ERR(active_node->current));
1631 case BTRFS_DIR_ITEM_KEY:
1632 case BTRFS_DIR_INDEX_KEY:
1633 ret = process_dir_item(eb, i, &key, active_node);
1635 case BTRFS_INODE_REF_KEY:
1636 ret = process_inode_ref(eb, i, &key, active_node);
1638 case BTRFS_INODE_EXTREF_KEY:
1639 ret = process_inode_extref(eb, i, &key, active_node);
1641 case BTRFS_INODE_ITEM_KEY:
1642 ret = process_inode_item(eb, i, &key, active_node);
1644 case BTRFS_EXTENT_DATA_KEY:
1645 ret = process_file_extent(root, eb, i, &key,
1655 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1656 struct extent_buffer *eb, struct node_refs *nrefs,
1657 u64 level, int check_all);
1658 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1659 unsigned int ext_ref);
1662 * Returns >0 Found error, not fatal, should continue
1663 * Returns <0 Fatal error, must exit the whole check
1664 * Returns 0 No errors found
1666 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1667 struct node_refs *nrefs, int *level, int ext_ref)
1669 struct extent_buffer *cur = path->nodes[0];
1670 struct btrfs_key key;
1674 int root_level = btrfs_header_level(root->node);
1676 int ret = 0; /* Final return value */
1677 int err = 0; /* Positive error bitmap */
1679 cur_bytenr = cur->start;
1681 /* skip to first inode item or the first inode number change */
1682 nritems = btrfs_header_nritems(cur);
1683 for (i = 0; i < nritems; i++) {
1684 btrfs_item_key_to_cpu(cur, &key, i);
1686 first_ino = key.objectid;
1687 if (key.type == BTRFS_INODE_ITEM_KEY ||
1688 (first_ino && first_ino != key.objectid))
1692 path->slots[0] = nritems;
1698 err |= check_inode_item(root, path, ext_ref);
1700 /* modify cur since check_inode_item may change path */
1701 cur = path->nodes[0];
1703 if (err & LAST_ITEM)
1706 /* still have inode items in thie leaf */
1707 if (cur->start == cur_bytenr)
1711 * we have switched to another leaf, above nodes may
1712 * have changed, here walk down the path, if a node
1713 * or leaf is shared, check whether we can skip this
1716 for (i = root_level; i >= 0; i--) {
1717 if (path->nodes[i]->start == nrefs->bytenr[i])
1720 ret = update_nodes_refs(root, path->nodes[i]->start,
1721 path->nodes[i], nrefs, i, 0);
1725 if (!nrefs->need_check[i]) {
1731 for (i = 0; i < *level; i++) {
1732 free_extent_buffer(path->nodes[i]);
1733 path->nodes[i] = NULL;
1742 static void reada_walk_down(struct btrfs_root *root,
1743 struct extent_buffer *node, int slot)
1745 struct btrfs_fs_info *fs_info = root->fs_info;
1752 level = btrfs_header_level(node);
1756 nritems = btrfs_header_nritems(node);
1757 for (i = slot; i < nritems; i++) {
1758 bytenr = btrfs_node_blockptr(node, i);
1759 ptr_gen = btrfs_node_ptr_generation(node, i);
1760 readahead_tree_block(fs_info, bytenr, ptr_gen);
1765 * Check the child node/leaf by the following condition:
1766 * 1. the first item key of the node/leaf should be the same with the one
1768 * 2. block in parent node should match the child node/leaf.
1769 * 3. generation of parent node and child's header should be consistent.
1771 * Or the child node/leaf pointed by the key in parent is not valid.
1773 * We hope to check leaf owner too, but since subvol may share leaves,
1774 * which makes leaf owner check not so strong, key check should be
1775 * sufficient enough for that case.
1777 static int check_child_node(struct extent_buffer *parent, int slot,
1778 struct extent_buffer *child)
1780 struct btrfs_key parent_key;
1781 struct btrfs_key child_key;
1784 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1785 if (btrfs_header_level(child) == 0)
1786 btrfs_item_key_to_cpu(child, &child_key, 0);
1788 btrfs_node_key_to_cpu(child, &child_key, 0);
1790 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1793 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1794 parent_key.objectid, parent_key.type, parent_key.offset,
1795 child_key.objectid, child_key.type, child_key.offset);
1797 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1799 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1800 btrfs_node_blockptr(parent, slot),
1801 btrfs_header_bytenr(child));
1803 if (btrfs_node_ptr_generation(parent, slot) !=
1804 btrfs_header_generation(child)) {
1806 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1807 btrfs_header_generation(child),
1808 btrfs_node_ptr_generation(parent, slot));
1814 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1815 * in every fs or file tree check. Here we find its all root ids, and only check
1816 * it in the fs or file tree which has the smallest root id.
1818 static int need_check(struct btrfs_root *root, struct ulist *roots)
1820 struct rb_node *node;
1821 struct ulist_node *u;
1824 * @roots can be empty if it belongs to tree reloc tree
1825 * In that case, we should always check the leaf, as we can't use
1826 * the tree owner to ensure some other root will check it.
1828 if (roots->nnodes == 1 || roots->nnodes == 0)
1831 node = rb_first(&roots->root);
1832 u = rb_entry(node, struct ulist_node, rb_node);
1834 * current root id is not smallest, we skip it and let it be checked
1835 * in the fs or file tree who hash the smallest root id.
1837 if (root->objectid != u->val)
1843 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1846 struct btrfs_root *extent_root = root->fs_info->extent_root;
1847 struct btrfs_root_item *ri = &root->root_item;
1848 struct btrfs_extent_inline_ref *iref;
1849 struct btrfs_extent_item *ei;
1850 struct btrfs_key key;
1851 struct btrfs_path *path = NULL;
1862 * Except file/reloc tree, we can not have FULL BACKREF MODE
1864 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1868 if (eb->start == btrfs_root_bytenr(ri))
1871 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1874 owner = btrfs_header_owner(eb);
1875 if (owner == root->objectid)
1878 path = btrfs_alloc_path();
1882 key.objectid = btrfs_header_bytenr(eb);
1884 key.offset = (u64)-1;
1886 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1893 ret = btrfs_previous_extent_item(extent_root, path,
1899 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1901 eb = path->nodes[0];
1902 slot = path->slots[0];
1903 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1905 flags = btrfs_extent_flags(eb, ei);
1906 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1909 ptr = (unsigned long)(ei + 1);
1910 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1912 if (key.type == BTRFS_EXTENT_ITEM_KEY)
1913 ptr += sizeof(struct btrfs_tree_block_info);
1916 /* Reached extent item ends normally */
1920 /* Beyond extent item end, wrong item size */
1922 error("extent item at bytenr %llu slot %d has wrong size",
1927 iref = (struct btrfs_extent_inline_ref *)ptr;
1928 offset = btrfs_extent_inline_ref_offset(eb, iref);
1929 type = btrfs_extent_inline_ref_type(eb, iref);
1931 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1933 ptr += btrfs_extent_inline_ref_size(type);
1937 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1941 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1943 btrfs_free_path(path);
1948 * for a tree node or leaf, we record its reference count, so later if we still
1949 * process this node or leaf, don't need to compute its reference count again.
1951 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1953 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1954 struct extent_buffer *eb, struct node_refs *nrefs,
1955 u64 level, int check_all)
1957 struct ulist *roots;
1960 int root_level = btrfs_header_level(root->node);
1964 if (nrefs->bytenr[level] == bytenr)
1967 if (bytenr != (u64)-1) {
1968 /* the return value of this function seems a mistake */
1969 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1970 level, 1, &refs, &flags);
1972 if (ret < 0 && !check_all)
1975 nrefs->bytenr[level] = bytenr;
1976 nrefs->refs[level] = refs;
1977 nrefs->full_backref[level] = 0;
1978 nrefs->checked[level] = 0;
1981 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1986 check = need_check(root, roots);
1988 nrefs->need_check[level] = check;
1991 nrefs->need_check[level] = 1;
1993 if (level == root_level) {
1994 nrefs->need_check[level] = 1;
1997 * The node refs may have not been
1998 * updated if upper needs checking (the
1999 * lowest root_objectid) the node can
2002 nrefs->need_check[level] =
2003 nrefs->need_check[level + 1];
2009 if (check_all && eb) {
2010 calc_extent_flag_v2(root, eb, &flags);
2011 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2012 nrefs->full_backref[level] = 1;
2019 * @level if @level == -1 means extent data item
2020 * else normal treeblocl.
2022 static int should_check_extent_strictly(struct btrfs_root *root,
2023 struct node_refs *nrefs, int level)
2025 int root_level = btrfs_header_level(root->node);
2027 if (level > root_level || level < -1)
2029 if (level == root_level)
2032 * if the upper node is marked full backref, it should contain shared
2033 * backref of the parent (except owner == root->objectid).
2035 while (++level <= root_level)
2036 if (nrefs->refs[level] > 1)
2042 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2043 struct walk_control *wc, int *level,
2044 struct node_refs *nrefs)
2046 enum btrfs_tree_block_status status;
2049 struct btrfs_fs_info *fs_info = root->fs_info;
2050 struct extent_buffer *next;
2051 struct extent_buffer *cur;
2055 WARN_ON(*level < 0);
2056 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2058 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2059 refs = nrefs->refs[*level];
2062 ret = btrfs_lookup_extent_info(NULL, root,
2063 path->nodes[*level]->start,
2064 *level, 1, &refs, NULL);
2069 nrefs->bytenr[*level] = path->nodes[*level]->start;
2070 nrefs->refs[*level] = refs;
2074 ret = enter_shared_node(root, path->nodes[*level]->start,
2082 while (*level >= 0) {
2083 WARN_ON(*level < 0);
2084 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2085 cur = path->nodes[*level];
2087 if (btrfs_header_level(cur) != *level)
2090 if (path->slots[*level] >= btrfs_header_nritems(cur))
2093 ret = process_one_leaf(root, cur, wc);
2098 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2099 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2101 if (bytenr == nrefs->bytenr[*level - 1]) {
2102 refs = nrefs->refs[*level - 1];
2104 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2105 *level - 1, 1, &refs, NULL);
2109 nrefs->bytenr[*level - 1] = bytenr;
2110 nrefs->refs[*level - 1] = refs;
2115 ret = enter_shared_node(root, bytenr, refs,
2118 path->slots[*level]++;
2123 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2124 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2125 free_extent_buffer(next);
2126 reada_walk_down(root, cur, path->slots[*level]);
2127 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2128 if (!extent_buffer_uptodate(next)) {
2129 struct btrfs_key node_key;
2131 btrfs_node_key_to_cpu(path->nodes[*level],
2133 path->slots[*level]);
2134 btrfs_add_corrupt_extent_record(root->fs_info,
2136 path->nodes[*level]->start,
2137 root->fs_info->nodesize,
2144 ret = check_child_node(cur, path->slots[*level], next);
2146 free_extent_buffer(next);
2151 if (btrfs_is_leaf(next))
2152 status = btrfs_check_leaf(root, NULL, next);
2154 status = btrfs_check_node(root, NULL, next);
2155 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2156 free_extent_buffer(next);
2161 *level = *level - 1;
2162 free_extent_buffer(path->nodes[*level]);
2163 path->nodes[*level] = next;
2164 path->slots[*level] = 0;
2167 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2172 * Update global fs information.
2174 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2178 struct extent_buffer *eb = path->nodes[level];
2180 total_btree_bytes += eb->len;
2181 if (fs_root_objectid(root->objectid))
2182 total_fs_tree_bytes += eb->len;
2183 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2184 total_extent_tree_bytes += eb->len;
2187 btree_space_waste += btrfs_leaf_free_space(root, eb);
2189 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2190 btrfs_header_nritems(eb));
2191 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2196 * This function only handles BACKREF_MISSING,
2197 * If corresponding extent item exists, increase the ref, else insert an extent
2200 * Returns error bits after repair.
2202 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2203 struct btrfs_root *root,
2204 struct extent_buffer *node,
2205 struct node_refs *nrefs, int level, int err)
2207 struct btrfs_fs_info *fs_info = root->fs_info;
2208 struct btrfs_root *extent_root = fs_info->extent_root;
2209 struct btrfs_path path;
2210 struct btrfs_extent_item *ei;
2211 struct btrfs_tree_block_info *bi;
2212 struct btrfs_key key;
2213 struct extent_buffer *eb;
2214 u32 size = sizeof(*ei);
2215 u32 node_size = root->fs_info->nodesize;
2216 int insert_extent = 0;
2217 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2218 int root_level = btrfs_header_level(root->node);
2223 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2226 if ((err & BACKREF_MISSING) == 0)
2229 WARN_ON(level > BTRFS_MAX_LEVEL);
2232 btrfs_init_path(&path);
2233 bytenr = btrfs_header_bytenr(node);
2234 owner = btrfs_header_owner(node);
2235 generation = btrfs_header_generation(node);
2237 key.objectid = bytenr;
2239 key.offset = (u64)-1;
2241 /* Search for the extent item */
2242 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2248 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2252 /* calculate if the extent item flag is full backref or not */
2253 if (nrefs->full_backref[level] != 0)
2254 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2256 /* insert an extent item */
2257 if (insert_extent) {
2258 struct btrfs_disk_key copy_key;
2260 generation = btrfs_header_generation(node);
2262 if (level < root_level && nrefs->full_backref[level + 1] &&
2263 owner != root->objectid) {
2264 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2267 key.objectid = bytenr;
2268 if (!skinny_metadata) {
2269 key.type = BTRFS_EXTENT_ITEM_KEY;
2270 key.offset = node_size;
2271 size += sizeof(*bi);
2273 key.type = BTRFS_METADATA_ITEM_KEY;
2277 btrfs_release_path(&path);
2278 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2284 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2286 btrfs_set_extent_refs(eb, ei, 0);
2287 btrfs_set_extent_generation(eb, ei, generation);
2288 btrfs_set_extent_flags(eb, ei, flags);
2290 if (!skinny_metadata) {
2291 bi = (struct btrfs_tree_block_info *)(ei + 1);
2292 memset_extent_buffer(eb, 0, (unsigned long)bi,
2294 btrfs_set_disk_key_objectid(©_key, root->objectid);
2295 btrfs_set_disk_key_type(©_key, 0);
2296 btrfs_set_disk_key_offset(©_key, 0);
2298 btrfs_set_tree_block_level(eb, bi, level);
2299 btrfs_set_tree_block_key(eb, bi, ©_key);
2301 btrfs_mark_buffer_dirty(eb);
2302 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2303 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2305 nrefs->refs[level] = 0;
2306 nrefs->full_backref[level] =
2307 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2308 btrfs_release_path(&path);
2311 if (level < root_level && nrefs->full_backref[level + 1] &&
2312 owner != root->objectid)
2313 parent = nrefs->bytenr[level + 1];
2315 /* increase the ref */
2316 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2317 parent, root->objectid, level, 0);
2319 nrefs->refs[level]++;
2321 btrfs_release_path(&path);
2324 "failed to repair tree block ref start %llu root %llu due to %s",
2325 bytenr, root->objectid, strerror(-ret));
2327 printf("Added one tree block ref start %llu %s %llu\n",
2328 bytenr, parent ? "parent" : "root",
2329 parent ? parent : root->objectid);
2330 err &= ~BACKREF_MISSING;
2336 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2337 unsigned int ext_ref);
2338 static int check_tree_block_ref(struct btrfs_root *root,
2339 struct extent_buffer *eb, u64 bytenr,
2340 int level, u64 owner, struct node_refs *nrefs);
2341 static int check_leaf_items(struct btrfs_trans_handle *trans,
2342 struct btrfs_root *root, struct btrfs_path *path,
2343 struct node_refs *nrefs, int account_bytes);
2346 * @trans just for lowmem repair mode
2347 * @check all if not 0 then check all tree block backrefs and items
2348 * 0 then just check relationship of items in fs tree(s)
2350 * Returns >0 Found error, should continue
2351 * Returns <0 Fatal error, must exit the whole check
2352 * Returns 0 No errors found
2354 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2355 struct btrfs_root *root, struct btrfs_path *path,
2356 int *level, struct node_refs *nrefs, int ext_ref,
2360 enum btrfs_tree_block_status status;
2363 struct btrfs_fs_info *fs_info = root->fs_info;
2364 struct extent_buffer *next;
2365 struct extent_buffer *cur;
2369 int account_file_data = 0;
2371 WARN_ON(*level < 0);
2372 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2374 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2375 path->nodes[*level], nrefs, *level, check_all);
2379 while (*level >= 0) {
2380 WARN_ON(*level < 0);
2381 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2382 cur = path->nodes[*level];
2383 bytenr = btrfs_header_bytenr(cur);
2384 check = nrefs->need_check[*level];
2386 if (btrfs_header_level(cur) != *level)
2389 * Update bytes accounting and check tree block ref
2390 * NOTE: Doing accounting and check before checking nritems
2391 * is necessary because of empty node/leaf.
2393 if ((check_all && !nrefs->checked[*level]) ||
2394 (!check_all && nrefs->need_check[*level])) {
2395 ret = check_tree_block_ref(root, cur,
2396 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2397 btrfs_header_owner(cur), nrefs);
2400 ret = repair_tree_block_ref(trans, root,
2401 path->nodes[*level], nrefs, *level, ret);
2404 if (check_all && nrefs->need_check[*level] &&
2405 nrefs->refs[*level]) {
2406 account_bytes(root, path, *level);
2407 account_file_data = 1;
2409 nrefs->checked[*level] = 1;
2412 if (path->slots[*level] >= btrfs_header_nritems(cur))
2415 /* Don't forgot to check leaf/node validation */
2417 /* skip duplicate check */
2418 if (check || !check_all) {
2419 ret = btrfs_check_leaf(root, NULL, cur);
2420 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2428 ret = process_one_leaf_v2(root, path, nrefs,
2431 ret = check_leaf_items(trans, root, path,
2432 nrefs, account_file_data);
2436 if (check || !check_all) {
2437 ret = btrfs_check_node(root, NULL, cur);
2438 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2445 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2446 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2448 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2453 * check all trees in check_chunks_and_extent_v2
2454 * check shared node once in check_fs_roots
2456 if (!check_all && !nrefs->need_check[*level - 1]) {
2457 path->slots[*level]++;
2461 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2462 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2463 free_extent_buffer(next);
2464 reada_walk_down(root, cur, path->slots[*level]);
2465 next = read_tree_block(fs_info, bytenr, ptr_gen);
2466 if (!extent_buffer_uptodate(next)) {
2467 struct btrfs_key node_key;
2469 btrfs_node_key_to_cpu(path->nodes[*level],
2471 path->slots[*level]);
2472 btrfs_add_corrupt_extent_record(fs_info,
2473 &node_key, path->nodes[*level]->start,
2474 fs_info->nodesize, *level);
2480 ret = check_child_node(cur, path->slots[*level], next);
2485 if (btrfs_is_leaf(next))
2486 status = btrfs_check_leaf(root, NULL, next);
2488 status = btrfs_check_node(root, NULL, next);
2489 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2490 free_extent_buffer(next);
2495 *level = *level - 1;
2496 free_extent_buffer(path->nodes[*level]);
2497 path->nodes[*level] = next;
2498 path->slots[*level] = 0;
2499 account_file_data = 0;
2501 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2506 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2507 struct walk_control *wc, int *level)
2510 struct extent_buffer *leaf;
2512 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2513 leaf = path->nodes[i];
2514 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2519 free_extent_buffer(path->nodes[*level]);
2520 path->nodes[*level] = NULL;
2521 BUG_ON(*level > wc->active_node);
2522 if (*level == wc->active_node)
2523 leave_shared_node(root, wc, *level);
2530 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2534 struct extent_buffer *leaf;
2536 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2537 leaf = path->nodes[i];
2538 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2543 free_extent_buffer(path->nodes[*level]);
2544 path->nodes[*level] = NULL;
2551 static int check_root_dir(struct inode_record *rec)
2553 struct inode_backref *backref;
2556 if (!rec->found_inode_item || rec->errors)
2558 if (rec->nlink != 1 || rec->found_link != 0)
2560 if (list_empty(&rec->backrefs))
2562 backref = to_inode_backref(rec->backrefs.next);
2563 if (!backref->found_inode_ref)
2565 if (backref->index != 0 || backref->namelen != 2 ||
2566 memcmp(backref->name, "..", 2))
2568 if (backref->found_dir_index || backref->found_dir_item)
2575 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2576 struct btrfs_root *root, struct btrfs_path *path,
2577 struct inode_record *rec)
2579 struct btrfs_inode_item *ei;
2580 struct btrfs_key key;
2583 key.objectid = rec->ino;
2584 key.type = BTRFS_INODE_ITEM_KEY;
2585 key.offset = (u64)-1;
2587 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2591 if (!path->slots[0]) {
2598 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2599 if (key.objectid != rec->ino) {
2604 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2605 struct btrfs_inode_item);
2606 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2607 btrfs_mark_buffer_dirty(path->nodes[0]);
2608 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2609 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2610 root->root_key.objectid);
2612 btrfs_release_path(path);
2616 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2617 struct btrfs_root *root,
2618 struct btrfs_path *path,
2619 struct inode_record *rec)
2623 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2624 btrfs_release_path(path);
2626 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2630 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2631 struct btrfs_root *root,
2632 struct btrfs_path *path,
2633 struct inode_record *rec)
2635 struct btrfs_inode_item *ei;
2636 struct btrfs_key key;
2639 key.objectid = rec->ino;
2640 key.type = BTRFS_INODE_ITEM_KEY;
2643 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2650 /* Since ret == 0, no need to check anything */
2651 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2652 struct btrfs_inode_item);
2653 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2654 btrfs_mark_buffer_dirty(path->nodes[0]);
2655 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2656 printf("reset nbytes for ino %llu root %llu\n",
2657 rec->ino, root->root_key.objectid);
2659 btrfs_release_path(path);
2663 static int add_missing_dir_index(struct btrfs_root *root,
2664 struct cache_tree *inode_cache,
2665 struct inode_record *rec,
2666 struct inode_backref *backref)
2668 struct btrfs_path path;
2669 struct btrfs_trans_handle *trans;
2670 struct btrfs_dir_item *dir_item;
2671 struct extent_buffer *leaf;
2672 struct btrfs_key key;
2673 struct btrfs_disk_key disk_key;
2674 struct inode_record *dir_rec;
2675 unsigned long name_ptr;
2676 u32 data_size = sizeof(*dir_item) + backref->namelen;
2679 trans = btrfs_start_transaction(root, 1);
2681 return PTR_ERR(trans);
2683 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2684 (unsigned long long)rec->ino);
2686 btrfs_init_path(&path);
2687 key.objectid = backref->dir;
2688 key.type = BTRFS_DIR_INDEX_KEY;
2689 key.offset = backref->index;
2690 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2693 leaf = path.nodes[0];
2694 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2696 disk_key.objectid = cpu_to_le64(rec->ino);
2697 disk_key.type = BTRFS_INODE_ITEM_KEY;
2698 disk_key.offset = 0;
2700 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2701 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2702 btrfs_set_dir_data_len(leaf, dir_item, 0);
2703 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2704 name_ptr = (unsigned long)(dir_item + 1);
2705 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2706 btrfs_mark_buffer_dirty(leaf);
2707 btrfs_release_path(&path);
2708 btrfs_commit_transaction(trans, root);
2710 backref->found_dir_index = 1;
2711 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2712 BUG_ON(IS_ERR(dir_rec));
2715 dir_rec->found_size += backref->namelen;
2716 if (dir_rec->found_size == dir_rec->isize &&
2717 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2718 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2719 if (dir_rec->found_size != dir_rec->isize)
2720 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2725 static int delete_dir_index(struct btrfs_root *root,
2726 struct inode_backref *backref)
2728 struct btrfs_trans_handle *trans;
2729 struct btrfs_dir_item *di;
2730 struct btrfs_path path;
2733 trans = btrfs_start_transaction(root, 1);
2735 return PTR_ERR(trans);
2737 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2738 (unsigned long long)backref->dir,
2739 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2740 (unsigned long long)root->objectid);
2742 btrfs_init_path(&path);
2743 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2744 backref->name, backref->namelen,
2745 backref->index, -1);
2748 btrfs_release_path(&path);
2749 btrfs_commit_transaction(trans, root);
2756 ret = btrfs_del_item(trans, root, &path);
2758 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2760 btrfs_release_path(&path);
2761 btrfs_commit_transaction(trans, root);
2765 static int __create_inode_item(struct btrfs_trans_handle *trans,
2766 struct btrfs_root *root, u64 ino, u64 size,
2767 u64 nbytes, u64 nlink, u32 mode)
2769 struct btrfs_inode_item ii;
2770 time_t now = time(NULL);
2773 btrfs_set_stack_inode_size(&ii, size);
2774 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2775 btrfs_set_stack_inode_nlink(&ii, nlink);
2776 btrfs_set_stack_inode_mode(&ii, mode);
2777 btrfs_set_stack_inode_generation(&ii, trans->transid);
2778 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2779 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2780 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2781 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2782 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2783 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2784 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2786 ret = btrfs_insert_inode(trans, root, ino, &ii);
2789 warning("root %llu inode %llu recreating inode item, this may "
2790 "be incomplete, please check permissions and content after "
2791 "the fsck completes.\n", (unsigned long long)root->objectid,
2792 (unsigned long long)ino);
2797 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2798 struct btrfs_root *root, u64 ino,
2801 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2803 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2806 static int create_inode_item(struct btrfs_root *root,
2807 struct inode_record *rec, int root_dir)
2809 struct btrfs_trans_handle *trans;
2815 trans = btrfs_start_transaction(root, 1);
2816 if (IS_ERR(trans)) {
2817 ret = PTR_ERR(trans);
2821 nlink = root_dir ? 1 : rec->found_link;
2822 if (rec->found_dir_item) {
2823 if (rec->found_file_extent)
2824 fprintf(stderr, "root %llu inode %llu has both a dir "
2825 "item and extents, unsure if it is a dir or a "
2826 "regular file so setting it as a directory\n",
2827 (unsigned long long)root->objectid,
2828 (unsigned long long)rec->ino);
2829 mode = S_IFDIR | 0755;
2830 size = rec->found_size;
2831 } else if (!rec->found_dir_item) {
2832 size = rec->extent_end;
2833 mode = S_IFREG | 0755;
2836 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2838 btrfs_commit_transaction(trans, root);
2842 static int repair_inode_backrefs(struct btrfs_root *root,
2843 struct inode_record *rec,
2844 struct cache_tree *inode_cache,
2847 struct inode_backref *tmp, *backref;
2848 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2852 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2853 if (!delete && rec->ino == root_dirid) {
2854 if (!rec->found_inode_item) {
2855 ret = create_inode_item(root, rec, 1);
2862 /* Index 0 for root dir's are special, don't mess with it */
2863 if (rec->ino == root_dirid && backref->index == 0)
2867 ((backref->found_dir_index && !backref->found_inode_ref) ||
2868 (backref->found_dir_index && backref->found_inode_ref &&
2869 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2870 ret = delete_dir_index(root, backref);
2874 list_del(&backref->list);
2879 if (!delete && !backref->found_dir_index &&
2880 backref->found_dir_item && backref->found_inode_ref) {
2881 ret = add_missing_dir_index(root, inode_cache, rec,
2886 if (backref->found_dir_item &&
2887 backref->found_dir_index) {
2888 if (!backref->errors &&
2889 backref->found_inode_ref) {
2890 list_del(&backref->list);
2897 if (!delete && (!backref->found_dir_index &&
2898 !backref->found_dir_item &&
2899 backref->found_inode_ref)) {
2900 struct btrfs_trans_handle *trans;
2901 struct btrfs_key location;
2903 ret = check_dir_conflict(root, backref->name,
2909 * let nlink fixing routine to handle it,
2910 * which can do it better.
2915 location.objectid = rec->ino;
2916 location.type = BTRFS_INODE_ITEM_KEY;
2917 location.offset = 0;
2919 trans = btrfs_start_transaction(root, 1);
2920 if (IS_ERR(trans)) {
2921 ret = PTR_ERR(trans);
2924 fprintf(stderr, "adding missing dir index/item pair "
2926 (unsigned long long)rec->ino);
2927 ret = btrfs_insert_dir_item(trans, root, backref->name,
2929 backref->dir, &location,
2930 imode_to_type(rec->imode),
2933 btrfs_commit_transaction(trans, root);
2937 if (!delete && (backref->found_inode_ref &&
2938 backref->found_dir_index &&
2939 backref->found_dir_item &&
2940 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2941 !rec->found_inode_item)) {
2942 ret = create_inode_item(root, rec, 0);
2949 return ret ? ret : repaired;
2953 * To determine the file type for nlink/inode_item repair
2955 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2956 * Return -ENOENT if file type is not found.
2958 static int find_file_type(struct inode_record *rec, u8 *type)
2960 struct inode_backref *backref;
2962 /* For inode item recovered case */
2963 if (rec->found_inode_item) {
2964 *type = imode_to_type(rec->imode);
2968 list_for_each_entry(backref, &rec->backrefs, list) {
2969 if (backref->found_dir_index || backref->found_dir_item) {
2970 *type = backref->filetype;
2978 * To determine the file name for nlink repair
2980 * Return 0 if file name is found, set name and namelen.
2981 * Return -ENOENT if file name is not found.
2983 static int find_file_name(struct inode_record *rec,
2984 char *name, int *namelen)
2986 struct inode_backref *backref;
2988 list_for_each_entry(backref, &rec->backrefs, list) {
2989 if (backref->found_dir_index || backref->found_dir_item ||
2990 backref->found_inode_ref) {
2991 memcpy(name, backref->name, backref->namelen);
2992 *namelen = backref->namelen;
2999 /* Reset the nlink of the inode to the correct one */
3000 static int reset_nlink(struct btrfs_trans_handle *trans,
3001 struct btrfs_root *root,
3002 struct btrfs_path *path,
3003 struct inode_record *rec)
3005 struct inode_backref *backref;
3006 struct inode_backref *tmp;
3007 struct btrfs_key key;
3008 struct btrfs_inode_item *inode_item;
3011 /* We don't believe this either, reset it and iterate backref */
3012 rec->found_link = 0;
3014 /* Remove all backref including the valid ones */
3015 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3016 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3017 backref->index, backref->name,
3018 backref->namelen, 0);
3022 /* remove invalid backref, so it won't be added back */
3023 if (!(backref->found_dir_index &&
3024 backref->found_dir_item &&
3025 backref->found_inode_ref)) {
3026 list_del(&backref->list);
3033 /* Set nlink to 0 */
3034 key.objectid = rec->ino;
3035 key.type = BTRFS_INODE_ITEM_KEY;
3037 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3044 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3045 struct btrfs_inode_item);
3046 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3047 btrfs_mark_buffer_dirty(path->nodes[0]);
3048 btrfs_release_path(path);
3051 * Add back valid inode_ref/dir_item/dir_index,
3052 * add_link() will handle the nlink inc, so new nlink must be correct
3054 list_for_each_entry(backref, &rec->backrefs, list) {
3055 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3056 backref->name, backref->namelen,
3057 backref->filetype, &backref->index, 1, 0);
3062 btrfs_release_path(path);
3066 static int get_highest_inode(struct btrfs_trans_handle *trans,
3067 struct btrfs_root *root,
3068 struct btrfs_path *path,
3071 struct btrfs_key key, found_key;
3074 btrfs_init_path(path);
3075 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3077 key.type = BTRFS_INODE_ITEM_KEY;
3078 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3080 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3081 path->slots[0] - 1);
3082 *highest_ino = found_key.objectid;
3085 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3087 btrfs_release_path(path);
3092 * Link inode to dir 'lost+found'. Increase @ref_count.
3094 * Returns 0 means success.
3095 * Returns <0 means failure.
3097 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3098 struct btrfs_root *root,
3099 struct btrfs_path *path,
3100 u64 ino, char *namebuf, u32 name_len,
3101 u8 filetype, u64 *ref_count)
3103 char *dir_name = "lost+found";
3108 btrfs_release_path(path);
3109 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3114 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3115 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3118 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3121 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3122 namebuf, name_len, filetype, NULL, 1, 0);
3124 * Add ".INO" suffix several times to handle case where
3125 * "FILENAME.INO" is already taken by another file.
3127 while (ret == -EEXIST) {
3129 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3131 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3135 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3137 name_len += count_digits(ino) + 1;
3138 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3139 name_len, filetype, NULL, 1, 0);
3142 error("failed to link the inode %llu to %s dir: %s",
3143 ino, dir_name, strerror(-ret));
3148 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3149 name_len, namebuf, dir_name);
3151 btrfs_release_path(path);
3153 error("failed to move file '%.*s' to '%s' dir", name_len,
3158 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3159 struct btrfs_root *root,
3160 struct btrfs_path *path,
3161 struct inode_record *rec)
3163 char namebuf[BTRFS_NAME_LEN] = {0};
3166 int name_recovered = 0;
3167 int type_recovered = 0;
3171 * Get file name and type first before these invalid inode ref
3172 * are deleted by remove_all_invalid_backref()
3174 name_recovered = !find_file_name(rec, namebuf, &namelen);
3175 type_recovered = !find_file_type(rec, &type);
3177 if (!name_recovered) {
3178 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3179 rec->ino, rec->ino);
3180 namelen = count_digits(rec->ino);
3181 sprintf(namebuf, "%llu", rec->ino);
3184 if (!type_recovered) {
3185 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3187 type = BTRFS_FT_REG_FILE;
3191 ret = reset_nlink(trans, root, path, rec);
3194 "Failed to reset nlink for inode %llu: %s\n",
3195 rec->ino, strerror(-ret));
3199 if (rec->found_link == 0) {
3200 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3201 namebuf, namelen, type,
3202 (u64 *)&rec->found_link);
3206 printf("Fixed the nlink of inode %llu\n", rec->ino);
3209 * Clear the flag anyway, or we will loop forever for the same inode
3210 * as it will not be removed from the bad inode list and the dead loop
3213 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3214 btrfs_release_path(path);
3219 * Check if there is any normal(reg or prealloc) file extent for given
3221 * This is used to determine the file type when neither its dir_index/item or
3222 * inode_item exists.
3224 * This will *NOT* report error, if any error happens, just consider it does
3225 * not have any normal file extent.
3227 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3229 struct btrfs_path path;
3230 struct btrfs_key key;
3231 struct btrfs_key found_key;
3232 struct btrfs_file_extent_item *fi;
3236 btrfs_init_path(&path);
3238 key.type = BTRFS_EXTENT_DATA_KEY;
3241 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3246 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3247 ret = btrfs_next_leaf(root, &path);
3254 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3256 if (found_key.objectid != ino ||
3257 found_key.type != BTRFS_EXTENT_DATA_KEY)
3259 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3260 struct btrfs_file_extent_item);
3261 type = btrfs_file_extent_type(path.nodes[0], fi);
3262 if (type != BTRFS_FILE_EXTENT_INLINE) {
3268 btrfs_release_path(&path);
3272 static u32 btrfs_type_to_imode(u8 type)
3274 static u32 imode_by_btrfs_type[] = {
3275 [BTRFS_FT_REG_FILE] = S_IFREG,
3276 [BTRFS_FT_DIR] = S_IFDIR,
3277 [BTRFS_FT_CHRDEV] = S_IFCHR,
3278 [BTRFS_FT_BLKDEV] = S_IFBLK,
3279 [BTRFS_FT_FIFO] = S_IFIFO,
3280 [BTRFS_FT_SOCK] = S_IFSOCK,
3281 [BTRFS_FT_SYMLINK] = S_IFLNK,
3284 return imode_by_btrfs_type[(type)];
3287 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3288 struct btrfs_root *root,
3289 struct btrfs_path *path,
3290 struct inode_record *rec)
3294 int type_recovered = 0;
3297 printf("Trying to rebuild inode:%llu\n", rec->ino);
3299 type_recovered = !find_file_type(rec, &filetype);
3302 * Try to determine inode type if type not found.
3304 * For found regular file extent, it must be FILE.
3305 * For found dir_item/index, it must be DIR.
3307 * For undetermined one, use FILE as fallback.
3310 * 1. If found backref(inode_index/item is already handled) to it,
3312 * Need new inode-inode ref structure to allow search for that.
3314 if (!type_recovered) {
3315 if (rec->found_file_extent &&
3316 find_normal_file_extent(root, rec->ino)) {
3318 filetype = BTRFS_FT_REG_FILE;
3319 } else if (rec->found_dir_item) {
3321 filetype = BTRFS_FT_DIR;
3322 } else if (!list_empty(&rec->orphan_extents)) {
3324 filetype = BTRFS_FT_REG_FILE;
3326 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3329 filetype = BTRFS_FT_REG_FILE;
3333 ret = btrfs_new_inode(trans, root, rec->ino,
3334 mode | btrfs_type_to_imode(filetype));
3339 * Here inode rebuild is done, we only rebuild the inode item,
3340 * don't repair the nlink(like move to lost+found).
3341 * That is the job of nlink repair.
3343 * We just fill the record and return
3345 rec->found_dir_item = 1;
3346 rec->imode = mode | btrfs_type_to_imode(filetype);
3348 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3349 /* Ensure the inode_nlinks repair function will be called */
3350 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3355 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3356 struct btrfs_root *root,
3357 struct btrfs_path *path,
3358 struct inode_record *rec)
3360 struct orphan_data_extent *orphan;
3361 struct orphan_data_extent *tmp;
3364 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3366 * Check for conflicting file extents
3368 * Here we don't know whether the extents is compressed or not,
3369 * so we can only assume it not compressed nor data offset,
3370 * and use its disk_len as extent length.
3372 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3373 orphan->offset, orphan->disk_len, 0);
3374 btrfs_release_path(path);
3379 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3380 orphan->disk_bytenr, orphan->disk_len);
3381 ret = btrfs_free_extent(trans,
3382 root->fs_info->extent_root,
3383 orphan->disk_bytenr, orphan->disk_len,
3384 0, root->objectid, orphan->objectid,
3389 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3390 orphan->offset, orphan->disk_bytenr,
3391 orphan->disk_len, orphan->disk_len);
3395 /* Update file size info */
3396 rec->found_size += orphan->disk_len;
3397 if (rec->found_size == rec->nbytes)
3398 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3400 /* Update the file extent hole info too */
3401 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3405 if (RB_EMPTY_ROOT(&rec->holes))
3406 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3408 list_del(&orphan->list);
3411 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3416 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3417 struct btrfs_root *root,
3418 struct btrfs_path *path,
3419 struct inode_record *rec)
3421 struct rb_node *node;
3422 struct file_extent_hole *hole;
3426 node = rb_first(&rec->holes);
3430 hole = rb_entry(node, struct file_extent_hole, node);
3431 ret = btrfs_punch_hole(trans, root, rec->ino,
3432 hole->start, hole->len);
3435 ret = del_file_extent_hole(&rec->holes, hole->start,
3439 if (RB_EMPTY_ROOT(&rec->holes))
3440 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3441 node = rb_first(&rec->holes);
3443 /* special case for a file losing all its file extent */
3445 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3446 round_up(rec->isize,
3447 root->fs_info->sectorsize));
3451 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3452 rec->ino, root->objectid);
3457 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3459 struct btrfs_trans_handle *trans;
3460 struct btrfs_path path;
3463 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3464 I_ERR_NO_ORPHAN_ITEM |
3465 I_ERR_LINK_COUNT_WRONG |
3466 I_ERR_NO_INODE_ITEM |
3467 I_ERR_FILE_EXTENT_ORPHAN |
3468 I_ERR_FILE_EXTENT_DISCOUNT|
3469 I_ERR_FILE_NBYTES_WRONG)))
3473 * For nlink repair, it may create a dir and add link, so
3474 * 2 for parent(256)'s dir_index and dir_item
3475 * 2 for lost+found dir's inode_item and inode_ref
3476 * 1 for the new inode_ref of the file
3477 * 2 for lost+found dir's dir_index and dir_item for the file
3479 trans = btrfs_start_transaction(root, 7);
3481 return PTR_ERR(trans);
3483 btrfs_init_path(&path);
3484 if (rec->errors & I_ERR_NO_INODE_ITEM)
3485 ret = repair_inode_no_item(trans, root, &path, rec);
3486 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3487 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3488 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3489 ret = repair_inode_discount_extent(trans, root, &path, rec);
3490 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3491 ret = repair_inode_isize(trans, root, &path, rec);
3492 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3493 ret = repair_inode_orphan_item(trans, root, &path, rec);
3494 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3495 ret = repair_inode_nlinks(trans, root, &path, rec);
3496 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3497 ret = repair_inode_nbytes(trans, root, &path, rec);
3498 btrfs_commit_transaction(trans, root);
3499 btrfs_release_path(&path);
3503 static int check_inode_recs(struct btrfs_root *root,
3504 struct cache_tree *inode_cache)
3506 struct cache_extent *cache;
3507 struct ptr_node *node;
3508 struct inode_record *rec;
3509 struct inode_backref *backref;
3514 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3516 if (btrfs_root_refs(&root->root_item) == 0) {
3517 if (!cache_tree_empty(inode_cache))
3518 fprintf(stderr, "warning line %d\n", __LINE__);
3523 * We need to repair backrefs first because we could change some of the
3524 * errors in the inode recs.
3526 * We also need to go through and delete invalid backrefs first and then
3527 * add the correct ones second. We do this because we may get EEXIST
3528 * when adding back the correct index because we hadn't yet deleted the
3531 * For example, if we were missing a dir index then the directories
3532 * isize would be wrong, so if we fixed the isize to what we thought it
3533 * would be and then fixed the backref we'd still have a invalid fs, so
3534 * we need to add back the dir index and then check to see if the isize
3539 if (stage == 3 && !err)
3542 cache = search_cache_extent(inode_cache, 0);
3543 while (repair && cache) {
3544 node = container_of(cache, struct ptr_node, cache);
3546 cache = next_cache_extent(cache);
3548 /* Need to free everything up and rescan */
3550 remove_cache_extent(inode_cache, &node->cache);
3552 free_inode_rec(rec);
3556 if (list_empty(&rec->backrefs))
3559 ret = repair_inode_backrefs(root, rec, inode_cache,
3573 rec = get_inode_rec(inode_cache, root_dirid, 0);
3574 BUG_ON(IS_ERR(rec));
3576 ret = check_root_dir(rec);
3578 fprintf(stderr, "root %llu root dir %llu error\n",
3579 (unsigned long long)root->root_key.objectid,
3580 (unsigned long long)root_dirid);
3581 print_inode_error(root, rec);
3586 struct btrfs_trans_handle *trans;
3588 trans = btrfs_start_transaction(root, 1);
3589 if (IS_ERR(trans)) {
3590 err = PTR_ERR(trans);
3595 "root %llu missing its root dir, recreating\n",
3596 (unsigned long long)root->objectid);
3598 ret = btrfs_make_root_dir(trans, root, root_dirid);
3601 btrfs_commit_transaction(trans, root);
3605 fprintf(stderr, "root %llu root dir %llu not found\n",
3606 (unsigned long long)root->root_key.objectid,
3607 (unsigned long long)root_dirid);
3611 cache = search_cache_extent(inode_cache, 0);
3614 node = container_of(cache, struct ptr_node, cache);
3616 remove_cache_extent(inode_cache, &node->cache);
3618 if (rec->ino == root_dirid ||
3619 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3620 free_inode_rec(rec);
3624 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3625 ret = check_orphan_item(root, rec->ino);
3627 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3628 if (can_free_inode_rec(rec)) {
3629 free_inode_rec(rec);
3634 if (!rec->found_inode_item)
3635 rec->errors |= I_ERR_NO_INODE_ITEM;
3636 if (rec->found_link != rec->nlink)
3637 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3639 ret = try_repair_inode(root, rec);
3640 if (ret == 0 && can_free_inode_rec(rec)) {
3641 free_inode_rec(rec);
3647 if (!(repair && ret == 0))
3649 print_inode_error(root, rec);
3650 list_for_each_entry(backref, &rec->backrefs, list) {
3651 if (!backref->found_dir_item)
3652 backref->errors |= REF_ERR_NO_DIR_ITEM;
3653 if (!backref->found_dir_index)
3654 backref->errors |= REF_ERR_NO_DIR_INDEX;
3655 if (!backref->found_inode_ref)
3656 backref->errors |= REF_ERR_NO_INODE_REF;
3657 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3658 " namelen %u name %s filetype %d errors %x",
3659 (unsigned long long)backref->dir,
3660 (unsigned long long)backref->index,
3661 backref->namelen, backref->name,
3662 backref->filetype, backref->errors);
3663 print_ref_error(backref->errors);
3665 free_inode_rec(rec);
3667 return (error > 0) ? -1 : 0;
3670 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3673 struct cache_extent *cache;
3674 struct root_record *rec = NULL;
3677 cache = lookup_cache_extent(root_cache, objectid, 1);
3679 rec = container_of(cache, struct root_record, cache);
3681 rec = calloc(1, sizeof(*rec));
3683 return ERR_PTR(-ENOMEM);
3684 rec->objectid = objectid;
3685 INIT_LIST_HEAD(&rec->backrefs);
3686 rec->cache.start = objectid;
3687 rec->cache.size = 1;
3689 ret = insert_cache_extent(root_cache, &rec->cache);
3691 return ERR_PTR(-EEXIST);
3696 static struct root_backref *get_root_backref(struct root_record *rec,
3697 u64 ref_root, u64 dir, u64 index,
3698 const char *name, int namelen)
3700 struct root_backref *backref;
3702 list_for_each_entry(backref, &rec->backrefs, list) {
3703 if (backref->ref_root != ref_root || backref->dir != dir ||
3704 backref->namelen != namelen)
3706 if (memcmp(name, backref->name, namelen))
3711 backref = calloc(1, sizeof(*backref) + namelen + 1);
3714 backref->ref_root = ref_root;
3716 backref->index = index;
3717 backref->namelen = namelen;
3718 memcpy(backref->name, name, namelen);
3719 backref->name[namelen] = '\0';
3720 list_add_tail(&backref->list, &rec->backrefs);
3724 static void free_root_record(struct cache_extent *cache)
3726 struct root_record *rec;
3727 struct root_backref *backref;
3729 rec = container_of(cache, struct root_record, cache);
3730 while (!list_empty(&rec->backrefs)) {
3731 backref = to_root_backref(rec->backrefs.next);
3732 list_del(&backref->list);
3739 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3741 static int add_root_backref(struct cache_tree *root_cache,
3742 u64 root_id, u64 ref_root, u64 dir, u64 index,
3743 const char *name, int namelen,
3744 int item_type, int errors)
3746 struct root_record *rec;
3747 struct root_backref *backref;
3749 rec = get_root_rec(root_cache, root_id);
3750 BUG_ON(IS_ERR(rec));
3751 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3754 backref->errors |= errors;
3756 if (item_type != BTRFS_DIR_ITEM_KEY) {
3757 if (backref->found_dir_index || backref->found_back_ref ||
3758 backref->found_forward_ref) {
3759 if (backref->index != index)
3760 backref->errors |= REF_ERR_INDEX_UNMATCH;
3762 backref->index = index;
3766 if (item_type == BTRFS_DIR_ITEM_KEY) {
3767 if (backref->found_forward_ref)
3769 backref->found_dir_item = 1;
3770 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3771 backref->found_dir_index = 1;
3772 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3773 if (backref->found_forward_ref)
3774 backref->errors |= REF_ERR_DUP_ROOT_REF;
3775 else if (backref->found_dir_item)
3777 backref->found_forward_ref = 1;
3778 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3779 if (backref->found_back_ref)
3780 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3781 backref->found_back_ref = 1;
3786 if (backref->found_forward_ref && backref->found_dir_item)
3787 backref->reachable = 1;
3791 static int merge_root_recs(struct btrfs_root *root,
3792 struct cache_tree *src_cache,
3793 struct cache_tree *dst_cache)
3795 struct cache_extent *cache;
3796 struct ptr_node *node;
3797 struct inode_record *rec;
3798 struct inode_backref *backref;
3801 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3802 free_inode_recs_tree(src_cache);
3807 cache = search_cache_extent(src_cache, 0);
3810 node = container_of(cache, struct ptr_node, cache);
3812 remove_cache_extent(src_cache, &node->cache);
3815 ret = is_child_root(root, root->objectid, rec->ino);
3821 list_for_each_entry(backref, &rec->backrefs, list) {
3822 BUG_ON(backref->found_inode_ref);
3823 if (backref->found_dir_item)
3824 add_root_backref(dst_cache, rec->ino,
3825 root->root_key.objectid, backref->dir,
3826 backref->index, backref->name,
3827 backref->namelen, BTRFS_DIR_ITEM_KEY,
3829 if (backref->found_dir_index)
3830 add_root_backref(dst_cache, rec->ino,
3831 root->root_key.objectid, backref->dir,
3832 backref->index, backref->name,
3833 backref->namelen, BTRFS_DIR_INDEX_KEY,
3837 free_inode_rec(rec);
3844 static int check_root_refs(struct btrfs_root *root,
3845 struct cache_tree *root_cache)
3847 struct root_record *rec;
3848 struct root_record *ref_root;
3849 struct root_backref *backref;
3850 struct cache_extent *cache;
3856 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3857 BUG_ON(IS_ERR(rec));
3860 /* fixme: this can not detect circular references */
3863 cache = search_cache_extent(root_cache, 0);
3867 rec = container_of(cache, struct root_record, cache);
3868 cache = next_cache_extent(cache);
3870 if (rec->found_ref == 0)
3873 list_for_each_entry(backref, &rec->backrefs, list) {
3874 if (!backref->reachable)
3877 ref_root = get_root_rec(root_cache,
3879 BUG_ON(IS_ERR(ref_root));
3880 if (ref_root->found_ref > 0)
3883 backref->reachable = 0;
3885 if (rec->found_ref == 0)
3891 cache = search_cache_extent(root_cache, 0);
3895 rec = container_of(cache, struct root_record, cache);
3896 cache = next_cache_extent(cache);
3898 if (rec->found_ref == 0 &&
3899 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3900 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3901 ret = check_orphan_item(root->fs_info->tree_root,
3907 * If we don't have a root item then we likely just have
3908 * a dir item in a snapshot for this root but no actual
3909 * ref key or anything so it's meaningless.
3911 if (!rec->found_root_item)
3914 fprintf(stderr, "fs tree %llu not referenced\n",
3915 (unsigned long long)rec->objectid);
3919 if (rec->found_ref > 0 && !rec->found_root_item)
3921 list_for_each_entry(backref, &rec->backrefs, list) {
3922 if (!backref->found_dir_item)
3923 backref->errors |= REF_ERR_NO_DIR_ITEM;
3924 if (!backref->found_dir_index)
3925 backref->errors |= REF_ERR_NO_DIR_INDEX;
3926 if (!backref->found_back_ref)
3927 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3928 if (!backref->found_forward_ref)
3929 backref->errors |= REF_ERR_NO_ROOT_REF;
3930 if (backref->reachable && backref->errors)
3937 fprintf(stderr, "fs tree %llu refs %u %s\n",
3938 (unsigned long long)rec->objectid, rec->found_ref,
3939 rec->found_root_item ? "" : "not found");
3941 list_for_each_entry(backref, &rec->backrefs, list) {
3942 if (!backref->reachable)
3944 if (!backref->errors && rec->found_root_item)
3946 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3947 " index %llu namelen %u name %s errors %x\n",
3948 (unsigned long long)backref->ref_root,
3949 (unsigned long long)backref->dir,
3950 (unsigned long long)backref->index,
3951 backref->namelen, backref->name,
3953 print_ref_error(backref->errors);
3956 return errors > 0 ? 1 : 0;
3959 static int process_root_ref(struct extent_buffer *eb, int slot,
3960 struct btrfs_key *key,
3961 struct cache_tree *root_cache)
3967 struct btrfs_root_ref *ref;
3968 char namebuf[BTRFS_NAME_LEN];
3971 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3973 dirid = btrfs_root_ref_dirid(eb, ref);
3974 index = btrfs_root_ref_sequence(eb, ref);
3975 name_len = btrfs_root_ref_name_len(eb, ref);
3977 if (name_len <= BTRFS_NAME_LEN) {
3981 len = BTRFS_NAME_LEN;
3982 error = REF_ERR_NAME_TOO_LONG;
3984 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3986 if (key->type == BTRFS_ROOT_REF_KEY) {
3987 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3988 index, namebuf, len, key->type, error);
3990 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3991 index, namebuf, len, key->type, error);
3996 static void free_corrupt_block(struct cache_extent *cache)
3998 struct btrfs_corrupt_block *corrupt;
4000 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4004 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4007 * Repair the btree of the given root.
4009 * The fix is to remove the node key in corrupt_blocks cache_tree.
4010 * and rebalance the tree.
4011 * After the fix, the btree should be writeable.
4013 static int repair_btree(struct btrfs_root *root,
4014 struct cache_tree *corrupt_blocks)
4016 struct btrfs_trans_handle *trans;
4017 struct btrfs_path path;
4018 struct btrfs_corrupt_block *corrupt;
4019 struct cache_extent *cache;
4020 struct btrfs_key key;
4025 if (cache_tree_empty(corrupt_blocks))
4028 trans = btrfs_start_transaction(root, 1);
4029 if (IS_ERR(trans)) {
4030 ret = PTR_ERR(trans);
4031 fprintf(stderr, "Error starting transaction: %s\n",
4035 btrfs_init_path(&path);
4036 cache = first_cache_extent(corrupt_blocks);
4038 corrupt = container_of(cache, struct btrfs_corrupt_block,
4040 level = corrupt->level;
4041 path.lowest_level = level;
4042 key.objectid = corrupt->key.objectid;
4043 key.type = corrupt->key.type;
4044 key.offset = corrupt->key.offset;
4047 * Here we don't want to do any tree balance, since it may
4048 * cause a balance with corrupted brother leaf/node,
4049 * so ins_len set to 0 here.
4050 * Balance will be done after all corrupt node/leaf is deleted.
4052 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4055 offset = btrfs_node_blockptr(path.nodes[level],
4058 /* Remove the ptr */
4059 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4063 * Remove the corresponding extent
4064 * return value is not concerned.
4066 btrfs_release_path(&path);
4067 ret = btrfs_free_extent(trans, root, offset,
4068 root->fs_info->nodesize, 0,
4069 root->root_key.objectid, level - 1, 0);
4070 cache = next_cache_extent(cache);
4073 /* Balance the btree using btrfs_search_slot() */
4074 cache = first_cache_extent(corrupt_blocks);
4076 corrupt = container_of(cache, struct btrfs_corrupt_block,
4078 memcpy(&key, &corrupt->key, sizeof(key));
4079 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4082 /* return will always >0 since it won't find the item */
4084 btrfs_release_path(&path);
4085 cache = next_cache_extent(cache);
4088 btrfs_commit_transaction(trans, root);
4089 btrfs_release_path(&path);
4093 static int check_fs_root(struct btrfs_root *root,
4094 struct cache_tree *root_cache,
4095 struct walk_control *wc)
4101 struct btrfs_path path;
4102 struct shared_node root_node;
4103 struct root_record *rec;
4104 struct btrfs_root_item *root_item = &root->root_item;
4105 struct cache_tree corrupt_blocks;
4106 struct orphan_data_extent *orphan;
4107 struct orphan_data_extent *tmp;
4108 enum btrfs_tree_block_status status;
4109 struct node_refs nrefs;
4112 * Reuse the corrupt_block cache tree to record corrupted tree block
4114 * Unlike the usage in extent tree check, here we do it in a per
4115 * fs/subvol tree base.
4117 cache_tree_init(&corrupt_blocks);
4118 root->fs_info->corrupt_blocks = &corrupt_blocks;
4120 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4121 rec = get_root_rec(root_cache, root->root_key.objectid);
4122 BUG_ON(IS_ERR(rec));
4123 if (btrfs_root_refs(root_item) > 0)
4124 rec->found_root_item = 1;
4127 btrfs_init_path(&path);
4128 memset(&root_node, 0, sizeof(root_node));
4129 cache_tree_init(&root_node.root_cache);
4130 cache_tree_init(&root_node.inode_cache);
4131 memset(&nrefs, 0, sizeof(nrefs));
4133 /* Move the orphan extent record to corresponding inode_record */
4134 list_for_each_entry_safe(orphan, tmp,
4135 &root->orphan_data_extents, list) {
4136 struct inode_record *inode;
4138 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4140 BUG_ON(IS_ERR(inode));
4141 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4142 list_move(&orphan->list, &inode->orphan_extents);
4145 level = btrfs_header_level(root->node);
4146 memset(wc->nodes, 0, sizeof(wc->nodes));
4147 wc->nodes[level] = &root_node;
4148 wc->active_node = level;
4149 wc->root_level = level;
4151 /* We may not have checked the root block, lets do that now */
4152 if (btrfs_is_leaf(root->node))
4153 status = btrfs_check_leaf(root, NULL, root->node);
4155 status = btrfs_check_node(root, NULL, root->node);
4156 if (status != BTRFS_TREE_BLOCK_CLEAN)
4159 if (btrfs_root_refs(root_item) > 0 ||
4160 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4161 path.nodes[level] = root->node;
4162 extent_buffer_get(root->node);
4163 path.slots[level] = 0;
4165 struct btrfs_key key;
4166 struct btrfs_disk_key found_key;
4168 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4169 level = root_item->drop_level;
4170 path.lowest_level = level;
4171 if (level > btrfs_header_level(root->node) ||
4172 level >= BTRFS_MAX_LEVEL) {
4173 error("ignoring invalid drop level: %u", level);
4176 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4179 btrfs_node_key(path.nodes[level], &found_key,
4181 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4182 sizeof(found_key)));
4186 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4192 wret = walk_up_tree(root, &path, wc, &level);
4199 btrfs_release_path(&path);
4201 if (!cache_tree_empty(&corrupt_blocks)) {
4202 struct cache_extent *cache;
4203 struct btrfs_corrupt_block *corrupt;
4205 printf("The following tree block(s) is corrupted in tree %llu:\n",
4206 root->root_key.objectid);
4207 cache = first_cache_extent(&corrupt_blocks);
4209 corrupt = container_of(cache,
4210 struct btrfs_corrupt_block,
4212 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4213 cache->start, corrupt->level,
4214 corrupt->key.objectid, corrupt->key.type,
4215 corrupt->key.offset);
4216 cache = next_cache_extent(cache);
4219 printf("Try to repair the btree for root %llu\n",
4220 root->root_key.objectid);
4221 ret = repair_btree(root, &corrupt_blocks);
4223 fprintf(stderr, "Failed to repair btree: %s\n",
4226 printf("Btree for root %llu is fixed\n",
4227 root->root_key.objectid);
4231 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4235 if (root_node.current) {
4236 root_node.current->checked = 1;
4237 maybe_free_inode_rec(&root_node.inode_cache,
4241 err = check_inode_recs(root, &root_node.inode_cache);
4245 free_corrupt_blocks_tree(&corrupt_blocks);
4246 root->fs_info->corrupt_blocks = NULL;
4247 free_orphan_data_extents(&root->orphan_data_extents);
4251 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4252 struct cache_tree *root_cache)
4254 struct btrfs_path path;
4255 struct btrfs_key key;
4256 struct walk_control wc;
4257 struct extent_buffer *leaf, *tree_node;
4258 struct btrfs_root *tmp_root;
4259 struct btrfs_root *tree_root = fs_info->tree_root;
4263 if (ctx.progress_enabled) {
4264 ctx.tp = TASK_FS_ROOTS;
4265 task_start(ctx.info);
4269 * Just in case we made any changes to the extent tree that weren't
4270 * reflected into the free space cache yet.
4273 reset_cached_block_groups(fs_info);
4274 memset(&wc, 0, sizeof(wc));
4275 cache_tree_init(&wc.shared);
4276 btrfs_init_path(&path);
4281 key.type = BTRFS_ROOT_ITEM_KEY;
4282 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4287 tree_node = tree_root->node;
4289 if (tree_node != tree_root->node) {
4290 free_root_recs_tree(root_cache);
4291 btrfs_release_path(&path);
4294 leaf = path.nodes[0];
4295 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4296 ret = btrfs_next_leaf(tree_root, &path);
4302 leaf = path.nodes[0];
4304 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4305 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4306 fs_root_objectid(key.objectid)) {
4307 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4308 tmp_root = btrfs_read_fs_root_no_cache(
4311 key.offset = (u64)-1;
4312 tmp_root = btrfs_read_fs_root(
4315 if (IS_ERR(tmp_root)) {
4319 ret = check_fs_root(tmp_root, root_cache, &wc);
4320 if (ret == -EAGAIN) {
4321 free_root_recs_tree(root_cache);
4322 btrfs_release_path(&path);
4327 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4328 btrfs_free_fs_root(tmp_root);
4329 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4330 key.type == BTRFS_ROOT_BACKREF_KEY) {
4331 process_root_ref(leaf, path.slots[0], &key,
4338 btrfs_release_path(&path);
4340 free_extent_cache_tree(&wc.shared);
4341 if (!cache_tree_empty(&wc.shared))
4342 fprintf(stderr, "warning line %d\n", __LINE__);
4344 task_stop(ctx.info);
4350 * Find the @index according by @ino and name.
4351 * Notice:time efficiency is O(N)
4353 * @root: the root of the fs/file tree
4354 * @index_ret: the index as return value
4355 * @namebuf: the name to match
4356 * @name_len: the length of name to match
4357 * @file_type: the file_type of INODE_ITEM to match
4359 * Returns 0 if found and *@index_ret will be modified with right value
4360 * Returns< 0 not found and *@index_ret will be (u64)-1
4362 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4363 u64 *index_ret, char *namebuf, u32 name_len,
4366 struct btrfs_path path;
4367 struct extent_buffer *node;
4368 struct btrfs_dir_item *di;
4369 struct btrfs_key key;
4370 struct btrfs_key location;
4371 char name[BTRFS_NAME_LEN] = {0};
4383 /* search from the last index */
4384 key.objectid = dirid;
4385 key.offset = (u64)-1;
4386 key.type = BTRFS_DIR_INDEX_KEY;
4388 btrfs_init_path(&path);
4389 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4394 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4397 *index_ret = (64)-1;
4400 /* Check whether inode_id/filetype/name match */
4401 node = path.nodes[0];
4402 slot = path.slots[0];
4403 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4404 total = btrfs_item_size_nr(node, slot);
4405 while (cur < total) {
4407 len = btrfs_dir_name_len(node, di);
4408 data_len = btrfs_dir_data_len(node, di);
4410 btrfs_dir_item_key_to_cpu(node, di, &location);
4411 if (location.objectid != location_id ||
4412 location.type != BTRFS_INODE_ITEM_KEY ||
4413 location.offset != 0)
4416 filetype = btrfs_dir_type(node, di);
4417 if (file_type != filetype)
4420 if (len > BTRFS_NAME_LEN)
4421 len = BTRFS_NAME_LEN;
4423 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4424 if (len != name_len || strncmp(namebuf, name, len))
4427 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4428 *index_ret = key.offset;
4432 len += sizeof(*di) + data_len;
4433 di = (struct btrfs_dir_item *)((char *)di + len);
4439 btrfs_release_path(&path);
4444 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4445 * INODE_REF/INODE_EXTREF match.
4447 * @root: the root of the fs/file tree
4448 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4449 * value while find index
4450 * @location_key: location key of the struct btrfs_dir_item to match
4451 * @name: the name to match
4452 * @namelen: the length of name
4453 * @file_type: the type of file to math
4455 * Return 0 if no error occurred.
4456 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4457 * DIR_ITEM/DIR_INDEX
4458 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4459 * and DIR_ITEM/DIR_INDEX mismatch
4461 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4462 struct btrfs_key *location_key, char *name,
4463 u32 namelen, u8 file_type)
4465 struct btrfs_path path;
4466 struct extent_buffer *node;
4467 struct btrfs_dir_item *di;
4468 struct btrfs_key location;
4469 char namebuf[BTRFS_NAME_LEN] = {0};
4478 /* get the index by traversing all index */
4479 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4480 ret = find_dir_index(root, key->objectid,
4481 location_key->objectid, &key->offset,
4482 name, namelen, file_type);
4484 ret = DIR_INDEX_MISSING;
4488 btrfs_init_path(&path);
4489 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4491 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4496 /* Check whether inode_id/filetype/name match */
4497 node = path.nodes[0];
4498 slot = path.slots[0];
4499 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4500 total = btrfs_item_size_nr(node, slot);
4501 while (cur < total) {
4502 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4503 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4505 len = btrfs_dir_name_len(node, di);
4506 data_len = btrfs_dir_data_len(node, di);
4508 btrfs_dir_item_key_to_cpu(node, di, &location);
4509 if (location.objectid != location_key->objectid ||
4510 location.type != location_key->type ||
4511 location.offset != location_key->offset)
4514 filetype = btrfs_dir_type(node, di);
4515 if (file_type != filetype)
4518 if (len > BTRFS_NAME_LEN) {
4519 len = BTRFS_NAME_LEN;
4520 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4522 key->type == BTRFS_DIR_ITEM_KEY ?
4523 "DIR_ITEM" : "DIR_INDEX",
4524 key->objectid, key->offset, len);
4526 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4528 if (len != namelen || strncmp(namebuf, name, len))
4534 len += sizeof(*di) + data_len;
4535 di = (struct btrfs_dir_item *)((char *)di + len);
4540 btrfs_release_path(&path);
4545 * Prints inode ref error message
4547 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4548 u64 index, const char *namebuf, int name_len,
4549 u8 filetype, int err)
4554 /* root dir error */
4555 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4557 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4558 root->objectid, key->objectid, key->offset, namebuf);
4563 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4564 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4565 root->objectid, key->offset,
4566 btrfs_name_hash(namebuf, name_len),
4567 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4569 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4570 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4571 root->objectid, key->offset, index,
4572 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4577 * Insert the missing inode item.
4579 * Returns 0 means success.
4580 * Returns <0 means error.
4582 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4585 struct btrfs_key key;
4586 struct btrfs_trans_handle *trans;
4587 struct btrfs_path path;
4591 key.type = BTRFS_INODE_ITEM_KEY;
4594 btrfs_init_path(&path);
4595 trans = btrfs_start_transaction(root, 1);
4596 if (IS_ERR(trans)) {
4601 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4602 if (ret < 0 || !ret)
4605 /* insert inode item */
4606 create_inode_item_lowmem(trans, root, ino, filetype);
4609 btrfs_commit_transaction(trans, root);
4612 error("failed to repair root %llu INODE ITEM[%llu] missing",
4613 root->objectid, ino);
4614 btrfs_release_path(&path);
4619 * The ternary means dir item, dir index and relative inode ref.
4620 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4621 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4623 * If two of three is missing or mismatched, delete the existing one.
4624 * If one of three is missing or mismatched, add the missing one.
4626 * returns 0 means success.
4627 * returns not 0 means on error;
4629 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4630 u64 index, char *name, int name_len, u8 filetype,
4633 struct btrfs_trans_handle *trans;
4638 * stage shall be one of following valild values:
4639 * 0: Fine, nothing to do.
4640 * 1: One of three is wrong, so add missing one.
4641 * 2: Two of three is wrong, so delete existed one.
4643 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4645 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4647 if (err & (INODE_REF_MISSING))
4650 /* stage must be smllarer than 3 */
4653 trans = btrfs_start_transaction(root, 1);
4655 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4660 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4661 filetype, &index, 1, 1);
4665 btrfs_commit_transaction(trans, root);
4668 error("fail to repair inode %llu name %s filetype %u",
4669 ino, name, filetype);
4671 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4672 stage == 2 ? "Delete" : "Add",
4673 ino, name, filetype);
4679 * Traverse the given INODE_REF and call find_dir_item() to find related
4680 * DIR_ITEM/DIR_INDEX.
4682 * @root: the root of the fs/file tree
4683 * @ref_key: the key of the INODE_REF
4684 * @path the path provides node and slot
4685 * @refs: the count of INODE_REF
4686 * @mode: the st_mode of INODE_ITEM
4687 * @name_ret: returns with the first ref's name
4688 * @name_len_ret: len of the name_ret
4690 * Return 0 if no error occurred.
4692 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4693 struct btrfs_path *path, char *name_ret,
4694 u32 *namelen_ret, u64 *refs_ret, int mode)
4696 struct btrfs_key key;
4697 struct btrfs_key location;
4698 struct btrfs_inode_ref *ref;
4699 struct extent_buffer *node;
4700 char namebuf[BTRFS_NAME_LEN] = {0};
4710 int need_research = 0;
4718 /* since after repair, path and the dir item may be changed */
4719 if (need_research) {
4721 btrfs_release_path(path);
4722 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4723 /* the item was deleted, let path point to the last checked item */
4725 if (path->slots[0] == 0)
4726 btrfs_prev_leaf(root, path);
4734 location.objectid = ref_key->objectid;
4735 location.type = BTRFS_INODE_ITEM_KEY;
4736 location.offset = 0;
4737 node = path->nodes[0];
4738 slot = path->slots[0];
4740 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4741 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4742 total = btrfs_item_size_nr(node, slot);
4745 /* Update inode ref count */
4748 index = btrfs_inode_ref_index(node, ref);
4749 name_len = btrfs_inode_ref_name_len(node, ref);
4751 if (name_len <= BTRFS_NAME_LEN) {
4754 len = BTRFS_NAME_LEN;
4755 warning("root %llu INODE_REF[%llu %llu] name too long",
4756 root->objectid, ref_key->objectid, ref_key->offset);
4759 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4761 /* copy the first name found to name_ret */
4762 if (refs == 1 && name_ret) {
4763 memcpy(name_ret, namebuf, len);
4767 /* Check root dir ref */
4768 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4769 if (index != 0 || len != strlen("..") ||
4770 strncmp("..", namebuf, len) ||
4771 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4772 /* set err bits then repair will delete the ref */
4773 err |= DIR_INDEX_MISSING;
4774 err |= DIR_ITEM_MISSING;
4779 /* Find related DIR_INDEX */
4780 key.objectid = ref_key->offset;
4781 key.type = BTRFS_DIR_INDEX_KEY;
4783 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4784 imode_to_type(mode));
4786 /* Find related dir_item */
4787 key.objectid = ref_key->offset;
4788 key.type = BTRFS_DIR_ITEM_KEY;
4789 key.offset = btrfs_name_hash(namebuf, len);
4790 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4791 imode_to_type(mode));
4793 if (tmp_err && repair) {
4794 ret = repair_ternary_lowmem(root, ref_key->offset,
4795 ref_key->objectid, index, namebuf,
4796 name_len, imode_to_type(mode),
4803 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4804 imode_to_type(mode), tmp_err);
4806 len = sizeof(*ref) + name_len;
4807 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4818 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4819 * DIR_ITEM/DIR_INDEX.
4821 * @root: the root of the fs/file tree
4822 * @ref_key: the key of the INODE_EXTREF
4823 * @refs: the count of INODE_EXTREF
4824 * @mode: the st_mode of INODE_ITEM
4826 * Return 0 if no error occurred.
4828 static int check_inode_extref(struct btrfs_root *root,
4829 struct btrfs_key *ref_key,
4830 struct extent_buffer *node, int slot, u64 *refs,
4833 struct btrfs_key key;
4834 struct btrfs_key location;
4835 struct btrfs_inode_extref *extref;
4836 char namebuf[BTRFS_NAME_LEN] = {0};
4846 location.objectid = ref_key->objectid;
4847 location.type = BTRFS_INODE_ITEM_KEY;
4848 location.offset = 0;
4850 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4851 total = btrfs_item_size_nr(node, slot);
4854 /* update inode ref count */
4856 name_len = btrfs_inode_extref_name_len(node, extref);
4857 index = btrfs_inode_extref_index(node, extref);
4858 parent = btrfs_inode_extref_parent(node, extref);
4859 if (name_len <= BTRFS_NAME_LEN) {
4862 len = BTRFS_NAME_LEN;
4863 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4864 root->objectid, ref_key->objectid, ref_key->offset);
4866 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4868 /* Check root dir ref name */
4869 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4870 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4871 root->objectid, ref_key->objectid, ref_key->offset,
4873 err |= ROOT_DIR_ERROR;
4876 /* find related dir_index */
4877 key.objectid = parent;
4878 key.type = BTRFS_DIR_INDEX_KEY;
4880 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4883 /* find related dir_item */
4884 key.objectid = parent;
4885 key.type = BTRFS_DIR_ITEM_KEY;
4886 key.offset = btrfs_name_hash(namebuf, len);
4887 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4890 len = sizeof(*extref) + name_len;
4891 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4901 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4902 * DIR_ITEM/DIR_INDEX match.
4903 * Return with @index_ret.
4905 * @root: the root of the fs/file tree
4906 * @key: the key of the INODE_REF/INODE_EXTREF
4907 * @name: the name in the INODE_REF/INODE_EXTREF
4908 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4909 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4910 * value (64)-1 means do not check index
4911 * @ext_ref: the EXTENDED_IREF feature
4913 * Return 0 if no error occurred.
4914 * Return >0 for error bitmap
4916 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4917 char *name, int namelen, u64 *index_ret,
4918 unsigned int ext_ref)
4920 struct btrfs_path path;
4921 struct btrfs_inode_ref *ref;
4922 struct btrfs_inode_extref *extref;
4923 struct extent_buffer *node;
4924 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4937 btrfs_init_path(&path);
4938 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4940 ret = INODE_REF_MISSING;
4944 node = path.nodes[0];
4945 slot = path.slots[0];
4947 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4948 total = btrfs_item_size_nr(node, slot);
4950 /* Iterate all entry of INODE_REF */
4951 while (cur < total) {
4952 ret = INODE_REF_MISSING;
4954 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4955 ref_index = btrfs_inode_ref_index(node, ref);
4956 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4959 if (cur + sizeof(*ref) + ref_namelen > total ||
4960 ref_namelen > BTRFS_NAME_LEN) {
4961 warning("root %llu INODE %s[%llu %llu] name too long",
4963 key->type == BTRFS_INODE_REF_KEY ?
4965 key->objectid, key->offset);
4967 if (cur + sizeof(*ref) > total)
4969 len = min_t(u32, total - cur - sizeof(*ref),
4975 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4978 if (len != namelen || strncmp(ref_namebuf, name, len))
4981 *index_ret = ref_index;
4985 len = sizeof(*ref) + ref_namelen;
4986 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4991 /* Skip if not support EXTENDED_IREF feature */
4995 btrfs_release_path(&path);
4996 btrfs_init_path(&path);
4998 dir_id = key->offset;
4999 key->type = BTRFS_INODE_EXTREF_KEY;
5000 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5002 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5004 ret = INODE_REF_MISSING;
5008 node = path.nodes[0];
5009 slot = path.slots[0];
5011 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5013 total = btrfs_item_size_nr(node, slot);
5015 /* Iterate all entry of INODE_EXTREF */
5016 while (cur < total) {
5017 ret = INODE_REF_MISSING;
5019 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5020 ref_index = btrfs_inode_extref_index(node, extref);
5021 parent = btrfs_inode_extref_parent(node, extref);
5022 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5025 if (parent != dir_id)
5028 if (ref_namelen <= BTRFS_NAME_LEN) {
5031 len = BTRFS_NAME_LEN;
5032 warning("root %llu INODE %s[%llu %llu] name too long",
5034 key->type == BTRFS_INODE_REF_KEY ?
5036 key->objectid, key->offset);
5038 read_extent_buffer(node, ref_namebuf,
5039 (unsigned long)(extref + 1), len);
5041 if (len != namelen || strncmp(ref_namebuf, name, len))
5044 *index_ret = ref_index;
5049 len = sizeof(*extref) + ref_namelen;
5050 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5055 btrfs_release_path(&path);
5059 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5060 u64 ino, u64 index, const char *namebuf,
5061 int name_len, u8 filetype, int err)
5063 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5064 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5065 root->objectid, key->objectid, key->offset, namebuf,
5067 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5070 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5071 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5072 root->objectid, key->objectid, index, namebuf, filetype,
5073 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5076 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5078 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5079 root->objectid, ino, index, namebuf, filetype,
5080 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5083 if (err & INODE_REF_MISSING)
5085 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5086 root->objectid, ino, key->objectid, namebuf, filetype);
5091 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5093 * Returns error after repair
5095 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5096 u64 index, u8 filetype, char *namebuf, u32 name_len,
5101 if (err & INODE_ITEM_MISSING) {
5102 ret = repair_inode_item_missing(root, ino, filetype);
5104 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5107 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5108 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5109 name_len, filetype, err);
5111 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5112 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5113 err &= ~(INODE_REF_MISSING);
5119 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5122 struct btrfs_key key;
5123 struct btrfs_path path;
5125 struct btrfs_dir_item *di;
5135 key.offset = (u64)-1;
5137 btrfs_init_path(&path);
5138 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5143 /* if found, go to spacial case */
5148 ret = btrfs_previous_item(root, &path, ino, type);
5156 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5158 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5160 while (cur < total) {
5161 len = btrfs_dir_name_len(path.nodes[0], di);
5162 if (len > BTRFS_NAME_LEN)
5163 len = BTRFS_NAME_LEN;
5166 len += btrfs_dir_data_len(path.nodes[0], di);
5168 di = (struct btrfs_dir_item *)((char *)di + len);
5174 btrfs_release_path(&path);
5178 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5185 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5189 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5193 *size = item_size + index_size;
5197 error("failed to count root %llu INODE[%llu] root size",
5198 root->objectid, ino);
5203 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5204 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5206 * @root: the root of the fs/file tree
5207 * @key: the key of the INODE_REF/INODE_EXTREF
5209 * @size: the st_size of the INODE_ITEM
5210 * @ext_ref: the EXTENDED_IREF feature
5212 * Return 0 if no error occurred.
5213 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5215 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5216 struct btrfs_path *path, u64 *size,
5217 unsigned int ext_ref)
5219 struct btrfs_dir_item *di;
5220 struct btrfs_inode_item *ii;
5221 struct btrfs_key key;
5222 struct btrfs_key location;
5223 struct extent_buffer *node;
5225 char namebuf[BTRFS_NAME_LEN] = {0};
5237 int need_research = 0;
5240 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5241 * ignore index check.
5243 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5244 index = di_key->offset;
5251 /* since after repair, path and the dir item may be changed */
5252 if (need_research) {
5254 err |= DIR_COUNT_AGAIN;
5255 btrfs_release_path(path);
5256 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5257 /* the item was deleted, let path point the last checked item */
5259 if (path->slots[0] == 0)
5260 btrfs_prev_leaf(root, path);
5268 node = path->nodes[0];
5269 slot = path->slots[0];
5271 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5272 total = btrfs_item_size_nr(node, slot);
5273 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5275 while (cur < total) {
5276 data_len = btrfs_dir_data_len(node, di);
5279 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5281 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5282 di_key->objectid, di_key->offset, data_len);
5284 name_len = btrfs_dir_name_len(node, di);
5285 if (name_len <= BTRFS_NAME_LEN) {
5288 len = BTRFS_NAME_LEN;
5289 warning("root %llu %s[%llu %llu] name too long",
5291 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5292 di_key->objectid, di_key->offset);
5294 (*size) += name_len;
5295 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5297 filetype = btrfs_dir_type(node, di);
5299 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5300 di_key->offset != btrfs_name_hash(namebuf, len)) {
5302 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5303 root->objectid, di_key->objectid, di_key->offset,
5304 namebuf, len, filetype, di_key->offset,
5305 btrfs_name_hash(namebuf, len));
5308 btrfs_dir_item_key_to_cpu(node, di, &location);
5309 /* Ignore related ROOT_ITEM check */
5310 if (location.type == BTRFS_ROOT_ITEM_KEY)
5313 btrfs_release_path(path);
5314 /* Check relative INODE_ITEM(existence/filetype) */
5315 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5317 tmp_err |= INODE_ITEM_MISSING;
5321 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5322 struct btrfs_inode_item);
5323 mode = btrfs_inode_mode(path->nodes[0], ii);
5324 if (imode_to_type(mode) != filetype) {
5325 tmp_err |= INODE_ITEM_MISMATCH;
5329 /* Check relative INODE_REF/INODE_EXTREF */
5330 key.objectid = location.objectid;
5331 key.type = BTRFS_INODE_REF_KEY;
5332 key.offset = di_key->objectid;
5333 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5336 /* check relative INDEX/ITEM */
5337 key.objectid = di_key->objectid;
5338 if (key.type == BTRFS_DIR_ITEM_KEY) {
5339 key.type = BTRFS_DIR_INDEX_KEY;
5342 key.type = BTRFS_DIR_ITEM_KEY;
5343 key.offset = btrfs_name_hash(namebuf, name_len);
5346 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5347 name_len, filetype);
5348 /* find_dir_item may find index */
5349 if (key.type == BTRFS_DIR_INDEX_KEY)
5353 if (tmp_err && repair) {
5354 ret = repair_dir_item(root, di_key->objectid,
5355 location.objectid, index,
5356 imode_to_type(mode), namebuf,
5358 if (ret != tmp_err) {
5363 btrfs_release_path(path);
5364 print_dir_item_err(root, di_key, location.objectid, index,
5365 namebuf, name_len, filetype, tmp_err);
5367 len = sizeof(*di) + name_len + data_len;
5368 di = (struct btrfs_dir_item *)((char *)di + len);
5371 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5372 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5373 root->objectid, di_key->objectid,
5380 btrfs_release_path(path);
5381 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5383 err |= ret > 0 ? -ENOENT : ret;
5388 * Wrapper function of btrfs_punch_hole.
5390 * Returns 0 means success.
5391 * Returns not 0 means error.
5393 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5396 struct btrfs_trans_handle *trans;
5399 trans = btrfs_start_transaction(root, 1);
5401 return PTR_ERR(trans);
5403 ret = btrfs_punch_hole(trans, root, ino, start, len);
5405 error("failed to add hole [%llu, %llu] in inode [%llu]",
5408 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5411 btrfs_commit_transaction(trans, root);
5416 * Check file extent datasum/hole, update the size of the file extents,
5417 * check and update the last offset of the file extent.
5419 * @root: the root of fs/file tree.
5420 * @fkey: the key of the file extent.
5421 * @nodatasum: INODE_NODATASUM feature.
5422 * @size: the sum of all EXTENT_DATA items size for this inode.
5423 * @end: the offset of the last extent.
5425 * Return 0 if no error occurred.
5427 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5428 struct extent_buffer *node, int slot,
5429 unsigned int nodatasum, u64 *size, u64 *end)
5431 struct btrfs_file_extent_item *fi;
5434 u64 extent_num_bytes;
5436 u64 csum_found; /* In byte size, sectorsize aligned */
5437 u64 search_start; /* Logical range start we search for csum */
5438 u64 search_len; /* Logical range len we search for csum */
5439 unsigned int extent_type;
5440 unsigned int is_hole;
5445 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5447 /* Check inline extent */
5448 extent_type = btrfs_file_extent_type(node, fi);
5449 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5450 struct btrfs_item *e = btrfs_item_nr(slot);
5451 u32 item_inline_len;
5453 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5454 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5455 compressed = btrfs_file_extent_compression(node, fi);
5456 if (extent_num_bytes == 0) {
5458 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5459 root->objectid, fkey->objectid, fkey->offset);
5460 err |= FILE_EXTENT_ERROR;
5462 if (!compressed && extent_num_bytes != item_inline_len) {
5464 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5465 root->objectid, fkey->objectid, fkey->offset,
5466 extent_num_bytes, item_inline_len);
5467 err |= FILE_EXTENT_ERROR;
5469 *end += extent_num_bytes;
5470 *size += extent_num_bytes;
5474 /* Check extent type */
5475 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5476 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5477 err |= FILE_EXTENT_ERROR;
5478 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5479 root->objectid, fkey->objectid, fkey->offset);
5483 /* Check REG_EXTENT/PREALLOC_EXTENT */
5484 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5485 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5486 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5487 extent_offset = btrfs_file_extent_offset(node, fi);
5488 compressed = btrfs_file_extent_compression(node, fi);
5489 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5492 * Check EXTENT_DATA csum
5494 * For plain (uncompressed) extent, we should only check the range
5495 * we're referring to, as it's possible that part of prealloc extent
5496 * has been written, and has csum:
5498 * |<--- Original large preallocated extent A ---->|
5499 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5502 * For compressed extent, we should check the whole range.
5505 search_start = disk_bytenr + extent_offset;
5506 search_len = extent_num_bytes;
5508 search_start = disk_bytenr;
5509 search_len = disk_num_bytes;
5511 ret = count_csum_range(root, search_start, search_len, &csum_found);
5512 if (csum_found > 0 && nodatasum) {
5513 err |= ODD_CSUM_ITEM;
5514 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5515 root->objectid, fkey->objectid, fkey->offset);
5516 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5517 !is_hole && (ret < 0 || csum_found < search_len)) {
5518 err |= CSUM_ITEM_MISSING;
5519 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5520 root->objectid, fkey->objectid, fkey->offset,
5521 csum_found, search_len);
5522 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5523 err |= ODD_CSUM_ITEM;
5524 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5525 root->objectid, fkey->objectid, fkey->offset, csum_found);
5528 /* Check EXTENT_DATA hole */
5529 if (!no_holes && *end != fkey->offset) {
5531 ret = punch_extent_hole(root, fkey->objectid,
5532 *end, fkey->offset - *end);
5533 if (!repair || ret) {
5534 err |= FILE_EXTENT_ERROR;
5536 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5537 root->objectid, fkey->objectid, fkey->offset,
5538 fkey->objectid, *end);
5542 *end += extent_num_bytes;
5544 *size += extent_num_bytes;
5550 * Set inode item nbytes to @nbytes
5552 * Returns 0 on success
5553 * Returns != 0 on error
5555 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5556 struct btrfs_path *path,
5557 u64 ino, u64 nbytes)
5559 struct btrfs_trans_handle *trans;
5560 struct btrfs_inode_item *ii;
5561 struct btrfs_key key;
5562 struct btrfs_key research_key;
5566 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5569 key.type = BTRFS_INODE_ITEM_KEY;
5572 trans = btrfs_start_transaction(root, 1);
5573 if (IS_ERR(trans)) {
5574 ret = PTR_ERR(trans);
5579 btrfs_release_path(path);
5580 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5588 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5589 struct btrfs_inode_item);
5590 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5591 btrfs_mark_buffer_dirty(path->nodes[0]);
5593 btrfs_commit_transaction(trans, root);
5596 error("failed to set nbytes in inode %llu root %llu",
5597 ino, root->root_key.objectid);
5599 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5600 root->root_key.objectid, nbytes);
5603 btrfs_release_path(path);
5604 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5611 * Set directory inode isize to @isize.
5613 * Returns 0 on success.
5614 * Returns != 0 on error.
5616 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5617 struct btrfs_path *path,
5620 struct btrfs_trans_handle *trans;
5621 struct btrfs_inode_item *ii;
5622 struct btrfs_key key;
5623 struct btrfs_key research_key;
5627 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5630 key.type = BTRFS_INODE_ITEM_KEY;
5633 trans = btrfs_start_transaction(root, 1);
5634 if (IS_ERR(trans)) {
5635 ret = PTR_ERR(trans);
5640 btrfs_release_path(path);
5641 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5649 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5650 struct btrfs_inode_item);
5651 btrfs_set_inode_size(path->nodes[0], ii, isize);
5652 btrfs_mark_buffer_dirty(path->nodes[0]);
5654 btrfs_commit_transaction(trans, root);
5657 error("failed to set isize in inode %llu root %llu",
5658 ino, root->root_key.objectid);
5660 printf("Set isize in inode %llu root %llu to %llu\n",
5661 ino, root->root_key.objectid, isize);
5663 btrfs_release_path(path);
5664 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5671 * Wrapper function for btrfs_add_orphan_item().
5673 * Returns 0 on success.
5674 * Returns != 0 on error.
5676 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5677 struct btrfs_path *path, u64 ino)
5679 struct btrfs_trans_handle *trans;
5680 struct btrfs_key research_key;
5684 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5686 trans = btrfs_start_transaction(root, 1);
5687 if (IS_ERR(trans)) {
5688 ret = PTR_ERR(trans);
5693 btrfs_release_path(path);
5694 ret = btrfs_add_orphan_item(trans, root, path, ino);
5696 btrfs_commit_transaction(trans, root);
5699 error("failed to add inode %llu as orphan item root %llu",
5700 ino, root->root_key.objectid);
5702 printf("Added inode %llu as orphan item root %llu\n",
5703 ino, root->root_key.objectid);
5705 btrfs_release_path(path);
5706 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5712 /* Set inode_item nlink to @ref_count.
5713 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5715 * Returns 0 on success
5717 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5718 struct btrfs_path *path, u64 ino,
5719 const char *name, u32 namelen,
5720 u64 ref_count, u8 filetype, u64 *nlink)
5722 struct btrfs_trans_handle *trans;
5723 struct btrfs_inode_item *ii;
5724 struct btrfs_key key;
5725 struct btrfs_key old_key;
5726 char namebuf[BTRFS_NAME_LEN] = {0};
5732 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5734 if (name && namelen) {
5735 ASSERT(namelen <= BTRFS_NAME_LEN);
5736 memcpy(namebuf, name, namelen);
5739 sprintf(namebuf, "%llu", ino);
5740 name_len = count_digits(ino);
5741 printf("Can't find file name for inode %llu, use %s instead\n",
5745 trans = btrfs_start_transaction(root, 1);
5746 if (IS_ERR(trans)) {
5747 ret = PTR_ERR(trans);
5751 btrfs_release_path(path);
5752 /* if refs is 0, put it into lostfound */
5753 if (ref_count == 0) {
5754 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5755 name_len, filetype, &ref_count);
5760 /* reset inode_item's nlink to ref_count */
5762 key.type = BTRFS_INODE_ITEM_KEY;
5765 btrfs_release_path(path);
5766 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5772 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5773 struct btrfs_inode_item);
5774 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5775 btrfs_mark_buffer_dirty(path->nodes[0]);
5780 btrfs_commit_transaction(trans, root);
5784 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5785 root->objectid, ino, namebuf, filetype);
5787 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5788 root->objectid, ino, namebuf, filetype);
5791 btrfs_release_path(path);
5792 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5799 * Check INODE_ITEM and related ITEMs (the same inode number)
5800 * 1. check link count
5801 * 2. check inode ref/extref
5802 * 3. check dir item/index
5804 * @ext_ref: the EXTENDED_IREF feature
5806 * Return 0 if no error occurred.
5807 * Return >0 for error or hit the traversal is done(by error bitmap)
5809 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5810 unsigned int ext_ref)
5812 struct extent_buffer *node;
5813 struct btrfs_inode_item *ii;
5814 struct btrfs_key key;
5815 struct btrfs_key last_key;
5824 u64 extent_size = 0;
5826 unsigned int nodatasum;
5830 char namebuf[BTRFS_NAME_LEN] = {0};
5833 node = path->nodes[0];
5834 slot = path->slots[0];
5836 btrfs_item_key_to_cpu(node, &key, slot);
5837 inode_id = key.objectid;
5839 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5840 ret = btrfs_next_item(root, path);
5846 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5847 isize = btrfs_inode_size(node, ii);
5848 nbytes = btrfs_inode_nbytes(node, ii);
5849 mode = btrfs_inode_mode(node, ii);
5850 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5851 nlink = btrfs_inode_nlink(node, ii);
5852 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5855 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5856 ret = btrfs_next_item(root, path);
5858 /* out will fill 'err' rusing current statistics */
5860 } else if (ret > 0) {
5865 node = path->nodes[0];
5866 slot = path->slots[0];
5867 btrfs_item_key_to_cpu(node, &key, slot);
5868 if (key.objectid != inode_id)
5872 case BTRFS_INODE_REF_KEY:
5873 ret = check_inode_ref(root, &key, path, namebuf,
5874 &name_len, &refs, mode);
5877 case BTRFS_INODE_EXTREF_KEY:
5878 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5879 warning("root %llu EXTREF[%llu %llu] isn't supported",
5880 root->objectid, key.objectid,
5882 ret = check_inode_extref(root, &key, node, slot, &refs,
5886 case BTRFS_DIR_ITEM_KEY:
5887 case BTRFS_DIR_INDEX_KEY:
5889 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5890 root->objectid, inode_id,
5891 imode_to_type(mode), key.objectid,
5894 ret = check_dir_item(root, &key, path, &size, ext_ref);
5897 case BTRFS_EXTENT_DATA_KEY:
5899 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5900 root->objectid, inode_id, key.objectid,
5903 ret = check_file_extent(root, &key, node, slot,
5904 nodatasum, &extent_size,
5908 case BTRFS_XATTR_ITEM_KEY:
5911 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5912 key.objectid, key.type, key.offset);
5917 if (err & LAST_ITEM) {
5918 btrfs_release_path(path);
5919 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5924 /* verify INODE_ITEM nlink/isize/nbytes */
5926 if (repair && (err & DIR_COUNT_AGAIN)) {
5927 err &= ~DIR_COUNT_AGAIN;
5928 count_dir_isize(root, inode_id, &size);
5931 if ((nlink != 1 || refs != 1) && repair) {
5932 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5933 namebuf, name_len, refs, imode_to_type(mode),
5938 err |= LINK_COUNT_ERROR;
5939 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5940 root->objectid, inode_id, nlink);
5944 * Just a warning, as dir inode nbytes is just an
5945 * instructive value.
5947 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5948 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5949 root->objectid, inode_id,
5950 root->fs_info->nodesize);
5953 if (isize != size) {
5955 ret = repair_dir_isize_lowmem(root, path,
5957 if (!repair || ret) {
5960 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5961 root->objectid, inode_id, isize, size);
5965 if (nlink != refs) {
5967 ret = repair_inode_nlinks_lowmem(root, path,
5968 inode_id, namebuf, name_len, refs,
5969 imode_to_type(mode), &nlink);
5970 if (!repair || ret) {
5971 err |= LINK_COUNT_ERROR;
5973 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5974 root->objectid, inode_id, nlink, refs);
5976 } else if (!nlink) {
5978 ret = repair_inode_orphan_item_lowmem(root,
5980 if (!repair || ret) {
5982 error("root %llu INODE[%llu] is orphan item",
5983 root->objectid, inode_id);
5987 if (!nbytes && !no_holes && extent_end < isize) {
5989 ret = punch_extent_hole(root, inode_id,
5990 extent_end, isize - extent_end);
5991 if (!repair || ret) {
5992 err |= NBYTES_ERROR;
5994 "root %llu INODE[%llu] size %llu should have a file extent hole",
5995 root->objectid, inode_id, isize);
5999 if (nbytes != extent_size) {
6001 ret = repair_inode_nbytes_lowmem(root, path,
6002 inode_id, extent_size);
6003 if (!repair || ret) {
6004 err |= NBYTES_ERROR;
6006 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6007 root->objectid, inode_id, nbytes,
6013 if (err & LAST_ITEM)
6014 btrfs_next_item(root, path);
6019 * Insert the missing inode item and inode ref.
6021 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6022 * Root dir should be handled specially because root dir is the root of fs.
6024 * returns err (>0 or 0) after repair
6026 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6028 struct btrfs_trans_handle *trans;
6029 struct btrfs_key key;
6030 struct btrfs_path path;
6031 int filetype = BTRFS_FT_DIR;
6034 btrfs_init_path(&path);
6036 if (err & INODE_REF_MISSING) {
6037 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6038 key.type = BTRFS_INODE_REF_KEY;
6039 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6041 trans = btrfs_start_transaction(root, 1);
6042 if (IS_ERR(trans)) {
6043 ret = PTR_ERR(trans);
6047 btrfs_release_path(&path);
6048 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6052 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6053 BTRFS_FIRST_FREE_OBJECTID,
6054 BTRFS_FIRST_FREE_OBJECTID, 0);
6058 printf("Add INODE_REF[%llu %llu] name %s\n",
6059 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6061 err &= ~INODE_REF_MISSING;
6064 error("fail to insert first inode's ref");
6065 btrfs_commit_transaction(trans, root);
6068 if (err & INODE_ITEM_MISSING) {
6069 ret = repair_inode_item_missing(root,
6070 BTRFS_FIRST_FREE_OBJECTID, filetype);
6073 err &= ~INODE_ITEM_MISSING;
6077 error("fail to repair first inode");
6078 btrfs_release_path(&path);
6083 * check first root dir's inode_item and inode_ref
6085 * returns 0 means no error
6086 * returns >0 means error
6087 * returns <0 means fatal error
6089 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6091 struct btrfs_path path;
6092 struct btrfs_key key;
6093 struct btrfs_inode_item *ii;
6099 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6100 key.type = BTRFS_INODE_ITEM_KEY;
6103 /* For root being dropped, we don't need to check first inode */
6104 if (btrfs_root_refs(&root->root_item) == 0 &&
6105 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6106 BTRFS_FIRST_FREE_OBJECTID)
6109 btrfs_init_path(&path);
6110 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6115 err |= INODE_ITEM_MISSING;
6117 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6118 struct btrfs_inode_item);
6119 mode = btrfs_inode_mode(path.nodes[0], ii);
6120 if (imode_to_type(mode) != BTRFS_FT_DIR)
6121 err |= INODE_ITEM_MISMATCH;
6124 /* lookup first inode ref */
6125 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6126 key.type = BTRFS_INODE_REF_KEY;
6127 /* special index value */
6130 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6136 btrfs_release_path(&path);
6139 err = repair_fs_first_inode(root, err);
6141 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6142 error("root dir INODE_ITEM is %s",
6143 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6144 if (err & INODE_REF_MISSING)
6145 error("root dir INODE_REF is missing");
6147 return ret < 0 ? ret : err;
6150 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6151 u64 parent, u64 root)
6153 struct rb_node *node;
6154 struct tree_backref *back = NULL;
6155 struct tree_backref match = {
6162 match.parent = parent;
6163 match.node.full_backref = 1;
6168 node = rb_search(&rec->backref_tree, &match.node.node,
6169 (rb_compare_keys)compare_extent_backref, NULL);
6171 back = to_tree_backref(rb_node_to_extent_backref(node));
6176 static struct data_backref *find_data_backref(struct extent_record *rec,
6177 u64 parent, u64 root,
6178 u64 owner, u64 offset,
6180 u64 disk_bytenr, u64 bytes)
6182 struct rb_node *node;
6183 struct data_backref *back = NULL;
6184 struct data_backref match = {
6191 .found_ref = found_ref,
6192 .disk_bytenr = disk_bytenr,
6196 match.parent = parent;
6197 match.node.full_backref = 1;
6202 node = rb_search(&rec->backref_tree, &match.node.node,
6203 (rb_compare_keys)compare_extent_backref, NULL);
6205 back = to_data_backref(rb_node_to_extent_backref(node));
6210 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6211 * blocks and integrity of fs tree items.
6213 * @root: the root of the tree to be checked.
6214 * @ext_ref feature EXTENDED_IREF is enable or not.
6215 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6216 * otherwise means check fs tree(s) items relationship and
6217 * @root MUST be a fs tree root.
6218 * Returns 0 represents OK.
6219 * Returns not 0 represents error.
6221 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6222 struct btrfs_root *root, unsigned int ext_ref,
6226 struct btrfs_path path;
6227 struct node_refs nrefs;
6228 struct btrfs_root_item *root_item = &root->root_item;
6233 memset(&nrefs, 0, sizeof(nrefs));
6236 * We need to manually check the first inode item (256)
6237 * As the following traversal function will only start from
6238 * the first inode item in the leaf, if inode item (256) is
6239 * missing we will skip it forever.
6241 ret = check_fs_first_inode(root, ext_ref);
6247 level = btrfs_header_level(root->node);
6248 btrfs_init_path(&path);
6250 if (btrfs_root_refs(root_item) > 0 ||
6251 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6252 path.nodes[level] = root->node;
6253 path.slots[level] = 0;
6254 extent_buffer_get(root->node);
6256 struct btrfs_key key;
6258 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6259 level = root_item->drop_level;
6260 path.lowest_level = level;
6261 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6268 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6269 ext_ref, check_all);
6273 /* if ret is negative, walk shall stop */
6279 ret = walk_up_tree_v2(root, &path, &level);
6281 /* Normal exit, reset ret to err */
6288 btrfs_release_path(&path);
6293 * Iterate all items in the tree and call check_inode_item() to check.
6295 * @root: the root of the tree to be checked.
6296 * @ext_ref: the EXTENDED_IREF feature
6298 * Return 0 if no error found.
6299 * Return <0 for error.
6301 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6303 reset_cached_block_groups(root->fs_info);
6304 return check_btrfs_root(NULL, root, ext_ref, 0);
6308 * Find the relative ref for root_ref and root_backref.
6310 * @root: the root of the root tree.
6311 * @ref_key: the key of the root ref.
6313 * Return 0 if no error occurred.
6315 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6316 struct extent_buffer *node, int slot)
6318 struct btrfs_path path;
6319 struct btrfs_key key;
6320 struct btrfs_root_ref *ref;
6321 struct btrfs_root_ref *backref;
6322 char ref_name[BTRFS_NAME_LEN] = {0};
6323 char backref_name[BTRFS_NAME_LEN] = {0};
6329 u32 backref_namelen;
6334 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6335 ref_dirid = btrfs_root_ref_dirid(node, ref);
6336 ref_seq = btrfs_root_ref_sequence(node, ref);
6337 ref_namelen = btrfs_root_ref_name_len(node, ref);
6339 if (ref_namelen <= BTRFS_NAME_LEN) {
6342 len = BTRFS_NAME_LEN;
6343 warning("%s[%llu %llu] ref_name too long",
6344 ref_key->type == BTRFS_ROOT_REF_KEY ?
6345 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6348 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6350 /* Find relative root_ref */
6351 key.objectid = ref_key->offset;
6352 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6353 key.offset = ref_key->objectid;
6355 btrfs_init_path(&path);
6356 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6358 err |= ROOT_REF_MISSING;
6359 error("%s[%llu %llu] couldn't find relative ref",
6360 ref_key->type == BTRFS_ROOT_REF_KEY ?
6361 "ROOT_REF" : "ROOT_BACKREF",
6362 ref_key->objectid, ref_key->offset);
6366 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6367 struct btrfs_root_ref);
6368 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6369 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6370 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6372 if (backref_namelen <= BTRFS_NAME_LEN) {
6373 len = backref_namelen;
6375 len = BTRFS_NAME_LEN;
6376 warning("%s[%llu %llu] ref_name too long",
6377 key.type == BTRFS_ROOT_REF_KEY ?
6378 "ROOT_REF" : "ROOT_BACKREF",
6379 key.objectid, key.offset);
6381 read_extent_buffer(path.nodes[0], backref_name,
6382 (unsigned long)(backref + 1), len);
6384 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6385 ref_namelen != backref_namelen ||
6386 strncmp(ref_name, backref_name, len)) {
6387 err |= ROOT_REF_MISMATCH;
6388 error("%s[%llu %llu] mismatch relative ref",
6389 ref_key->type == BTRFS_ROOT_REF_KEY ?
6390 "ROOT_REF" : "ROOT_BACKREF",
6391 ref_key->objectid, ref_key->offset);
6394 btrfs_release_path(&path);
6399 * Check all fs/file tree in low_memory mode.
6401 * 1. for fs tree root item, call check_fs_root_v2()
6402 * 2. for fs tree root ref/backref, call check_root_ref()
6404 * Return 0 if no error occurred.
6406 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6408 struct btrfs_root *tree_root = fs_info->tree_root;
6409 struct btrfs_root *cur_root = NULL;
6410 struct btrfs_path path;
6411 struct btrfs_key key;
6412 struct extent_buffer *node;
6413 unsigned int ext_ref;
6418 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6420 btrfs_init_path(&path);
6421 key.objectid = BTRFS_FS_TREE_OBJECTID;
6423 key.type = BTRFS_ROOT_ITEM_KEY;
6425 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6429 } else if (ret > 0) {
6435 node = path.nodes[0];
6436 slot = path.slots[0];
6437 btrfs_item_key_to_cpu(node, &key, slot);
6438 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6440 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6441 fs_root_objectid(key.objectid)) {
6442 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6443 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6446 key.offset = (u64)-1;
6447 cur_root = btrfs_read_fs_root(fs_info, &key);
6450 if (IS_ERR(cur_root)) {
6451 error("Fail to read fs/subvol tree: %lld",
6457 ret = check_fs_root_v2(cur_root, ext_ref);
6460 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6461 btrfs_free_fs_root(cur_root);
6462 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6463 key.type == BTRFS_ROOT_BACKREF_KEY) {
6464 ret = check_root_ref(tree_root, &key, node, slot);
6468 ret = btrfs_next_item(tree_root, &path);
6478 btrfs_release_path(&path);
6482 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6483 struct cache_tree *root_cache)
6487 if (!ctx.progress_enabled)
6488 fprintf(stderr, "checking fs roots\n");
6489 if (check_mode == CHECK_MODE_LOWMEM)
6490 ret = check_fs_roots_v2(fs_info);
6492 ret = check_fs_roots(fs_info, root_cache);
6497 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6499 struct extent_backref *back, *tmp;
6500 struct tree_backref *tback;
6501 struct data_backref *dback;
6505 rbtree_postorder_for_each_entry_safe(back, tmp,
6506 &rec->backref_tree, node) {
6507 if (!back->found_extent_tree) {
6511 if (back->is_data) {
6512 dback = to_data_backref(back);
6513 fprintf(stderr, "Data backref %llu %s %llu"
6514 " owner %llu offset %llu num_refs %lu"
6515 " not found in extent tree\n",
6516 (unsigned long long)rec->start,
6517 back->full_backref ?
6519 back->full_backref ?
6520 (unsigned long long)dback->parent:
6521 (unsigned long long)dback->root,
6522 (unsigned long long)dback->owner,
6523 (unsigned long long)dback->offset,
6524 (unsigned long)dback->num_refs);
6526 tback = to_tree_backref(back);
6527 fprintf(stderr, "Tree backref %llu parent %llu"
6528 " root %llu not found in extent tree\n",
6529 (unsigned long long)rec->start,
6530 (unsigned long long)tback->parent,
6531 (unsigned long long)tback->root);
6534 if (!back->is_data && !back->found_ref) {
6538 tback = to_tree_backref(back);
6539 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6540 (unsigned long long)rec->start,
6541 back->full_backref ? "parent" : "root",
6542 back->full_backref ?
6543 (unsigned long long)tback->parent :
6544 (unsigned long long)tback->root, back);
6546 if (back->is_data) {
6547 dback = to_data_backref(back);
6548 if (dback->found_ref != dback->num_refs) {
6552 fprintf(stderr, "Incorrect local backref count"
6553 " on %llu %s %llu owner %llu"
6554 " offset %llu found %u wanted %u back %p\n",
6555 (unsigned long long)rec->start,
6556 back->full_backref ?
6558 back->full_backref ?
6559 (unsigned long long)dback->parent:
6560 (unsigned long long)dback->root,
6561 (unsigned long long)dback->owner,
6562 (unsigned long long)dback->offset,
6563 dback->found_ref, dback->num_refs, back);
6565 if (dback->disk_bytenr != rec->start) {
6569 fprintf(stderr, "Backref disk bytenr does not"
6570 " match extent record, bytenr=%llu, "
6571 "ref bytenr=%llu\n",
6572 (unsigned long long)rec->start,
6573 (unsigned long long)dback->disk_bytenr);
6576 if (dback->bytes != rec->nr) {
6580 fprintf(stderr, "Backref bytes do not match "
6581 "extent backref, bytenr=%llu, ref "
6582 "bytes=%llu, backref bytes=%llu\n",
6583 (unsigned long long)rec->start,
6584 (unsigned long long)rec->nr,
6585 (unsigned long long)dback->bytes);
6588 if (!back->is_data) {
6591 dback = to_data_backref(back);
6592 found += dback->found_ref;
6595 if (found != rec->refs) {
6599 fprintf(stderr, "Incorrect global backref count "
6600 "on %llu found %llu wanted %llu\n",
6601 (unsigned long long)rec->start,
6602 (unsigned long long)found,
6603 (unsigned long long)rec->refs);
6609 static void __free_one_backref(struct rb_node *node)
6611 struct extent_backref *back = rb_node_to_extent_backref(node);
6616 static void free_all_extent_backrefs(struct extent_record *rec)
6618 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6621 static void free_extent_record_cache(struct cache_tree *extent_cache)
6623 struct cache_extent *cache;
6624 struct extent_record *rec;
6627 cache = first_cache_extent(extent_cache);
6630 rec = container_of(cache, struct extent_record, cache);
6631 remove_cache_extent(extent_cache, cache);
6632 free_all_extent_backrefs(rec);
6637 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6638 struct extent_record *rec)
6640 if (rec->content_checked && rec->owner_ref_checked &&
6641 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6642 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6643 !rec->bad_full_backref && !rec->crossing_stripes &&
6644 !rec->wrong_chunk_type) {
6645 remove_cache_extent(extent_cache, &rec->cache);
6646 free_all_extent_backrefs(rec);
6647 list_del_init(&rec->list);
6653 static int check_owner_ref(struct btrfs_root *root,
6654 struct extent_record *rec,
6655 struct extent_buffer *buf)
6657 struct extent_backref *node, *tmp;
6658 struct tree_backref *back;
6659 struct btrfs_root *ref_root;
6660 struct btrfs_key key;
6661 struct btrfs_path path;
6662 struct extent_buffer *parent;
6667 rbtree_postorder_for_each_entry_safe(node, tmp,
6668 &rec->backref_tree, node) {
6671 if (!node->found_ref)
6673 if (node->full_backref)
6675 back = to_tree_backref(node);
6676 if (btrfs_header_owner(buf) == back->root)
6679 BUG_ON(rec->is_root);
6681 /* try to find the block by search corresponding fs tree */
6682 key.objectid = btrfs_header_owner(buf);
6683 key.type = BTRFS_ROOT_ITEM_KEY;
6684 key.offset = (u64)-1;
6686 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6687 if (IS_ERR(ref_root))
6690 level = btrfs_header_level(buf);
6692 btrfs_item_key_to_cpu(buf, &key, 0);
6694 btrfs_node_key_to_cpu(buf, &key, 0);
6696 btrfs_init_path(&path);
6697 path.lowest_level = level + 1;
6698 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6702 parent = path.nodes[level + 1];
6703 if (parent && buf->start == btrfs_node_blockptr(parent,
6704 path.slots[level + 1]))
6707 btrfs_release_path(&path);
6708 return found ? 0 : 1;
6711 static int is_extent_tree_record(struct extent_record *rec)
6713 struct extent_backref *node, *tmp;
6714 struct tree_backref *back;
6717 rbtree_postorder_for_each_entry_safe(node, tmp,
6718 &rec->backref_tree, node) {
6721 back = to_tree_backref(node);
6722 if (node->full_backref)
6724 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6731 static int record_bad_block_io(struct btrfs_fs_info *info,
6732 struct cache_tree *extent_cache,
6735 struct extent_record *rec;
6736 struct cache_extent *cache;
6737 struct btrfs_key key;
6739 cache = lookup_cache_extent(extent_cache, start, len);
6743 rec = container_of(cache, struct extent_record, cache);
6744 if (!is_extent_tree_record(rec))
6747 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6748 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6751 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6752 struct extent_buffer *buf, int slot)
6754 if (btrfs_header_level(buf)) {
6755 struct btrfs_key_ptr ptr1, ptr2;
6757 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6758 sizeof(struct btrfs_key_ptr));
6759 read_extent_buffer(buf, &ptr2,
6760 btrfs_node_key_ptr_offset(slot + 1),
6761 sizeof(struct btrfs_key_ptr));
6762 write_extent_buffer(buf, &ptr1,
6763 btrfs_node_key_ptr_offset(slot + 1),
6764 sizeof(struct btrfs_key_ptr));
6765 write_extent_buffer(buf, &ptr2,
6766 btrfs_node_key_ptr_offset(slot),
6767 sizeof(struct btrfs_key_ptr));
6769 struct btrfs_disk_key key;
6770 btrfs_node_key(buf, &key, 0);
6771 btrfs_fixup_low_keys(root, path, &key,
6772 btrfs_header_level(buf) + 1);
6775 struct btrfs_item *item1, *item2;
6776 struct btrfs_key k1, k2;
6777 char *item1_data, *item2_data;
6778 u32 item1_offset, item2_offset, item1_size, item2_size;
6780 item1 = btrfs_item_nr(slot);
6781 item2 = btrfs_item_nr(slot + 1);
6782 btrfs_item_key_to_cpu(buf, &k1, slot);
6783 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6784 item1_offset = btrfs_item_offset(buf, item1);
6785 item2_offset = btrfs_item_offset(buf, item2);
6786 item1_size = btrfs_item_size(buf, item1);
6787 item2_size = btrfs_item_size(buf, item2);
6789 item1_data = malloc(item1_size);
6792 item2_data = malloc(item2_size);
6798 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6799 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6801 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6802 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6806 btrfs_set_item_offset(buf, item1, item2_offset);
6807 btrfs_set_item_offset(buf, item2, item1_offset);
6808 btrfs_set_item_size(buf, item1, item2_size);
6809 btrfs_set_item_size(buf, item2, item1_size);
6811 path->slots[0] = slot;
6812 btrfs_set_item_key_unsafe(root, path, &k2);
6813 path->slots[0] = slot + 1;
6814 btrfs_set_item_key_unsafe(root, path, &k1);
6819 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6821 struct extent_buffer *buf;
6822 struct btrfs_key k1, k2;
6824 int level = path->lowest_level;
6827 buf = path->nodes[level];
6828 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6830 btrfs_node_key_to_cpu(buf, &k1, i);
6831 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6833 btrfs_item_key_to_cpu(buf, &k1, i);
6834 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6836 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6838 ret = swap_values(root, path, buf, i);
6841 btrfs_mark_buffer_dirty(buf);
6847 static int delete_bogus_item(struct btrfs_root *root,
6848 struct btrfs_path *path,
6849 struct extent_buffer *buf, int slot)
6851 struct btrfs_key key;
6852 int nritems = btrfs_header_nritems(buf);
6854 btrfs_item_key_to_cpu(buf, &key, slot);
6856 /* These are all the keys we can deal with missing. */
6857 if (key.type != BTRFS_DIR_INDEX_KEY &&
6858 key.type != BTRFS_EXTENT_ITEM_KEY &&
6859 key.type != BTRFS_METADATA_ITEM_KEY &&
6860 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6861 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6864 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6865 (unsigned long long)key.objectid, key.type,
6866 (unsigned long long)key.offset, slot, buf->start);
6867 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6868 btrfs_item_nr_offset(slot + 1),
6869 sizeof(struct btrfs_item) *
6870 (nritems - slot - 1));
6871 btrfs_set_header_nritems(buf, nritems - 1);
6873 struct btrfs_disk_key disk_key;
6875 btrfs_item_key(buf, &disk_key, 0);
6876 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6878 btrfs_mark_buffer_dirty(buf);
6882 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6884 struct extent_buffer *buf;
6888 /* We should only get this for leaves */
6889 BUG_ON(path->lowest_level);
6890 buf = path->nodes[0];
6892 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6893 unsigned int shift = 0, offset;
6895 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6896 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6897 if (btrfs_item_end_nr(buf, i) >
6898 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6899 ret = delete_bogus_item(root, path, buf, i);
6902 fprintf(stderr, "item is off the end of the "
6903 "leaf, can't fix\n");
6907 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6908 btrfs_item_end_nr(buf, i);
6909 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6910 btrfs_item_offset_nr(buf, i - 1)) {
6911 if (btrfs_item_end_nr(buf, i) >
6912 btrfs_item_offset_nr(buf, i - 1)) {
6913 ret = delete_bogus_item(root, path, buf, i);
6916 fprintf(stderr, "items overlap, can't fix\n");
6920 shift = btrfs_item_offset_nr(buf, i - 1) -
6921 btrfs_item_end_nr(buf, i);
6926 printf("Shifting item nr %d by %u bytes in block %llu\n",
6927 i, shift, (unsigned long long)buf->start);
6928 offset = btrfs_item_offset_nr(buf, i);
6929 memmove_extent_buffer(buf,
6930 btrfs_leaf_data(buf) + offset + shift,
6931 btrfs_leaf_data(buf) + offset,
6932 btrfs_item_size_nr(buf, i));
6933 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6935 btrfs_mark_buffer_dirty(buf);
6939 * We may have moved things, in which case we want to exit so we don't
6940 * write those changes out. Once we have proper abort functionality in
6941 * progs this can be changed to something nicer.
6948 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6949 * then just return -EIO.
6951 static int try_to_fix_bad_block(struct btrfs_root *root,
6952 struct extent_buffer *buf,
6953 enum btrfs_tree_block_status status)
6955 struct btrfs_trans_handle *trans;
6956 struct ulist *roots;
6957 struct ulist_node *node;
6958 struct btrfs_root *search_root;
6959 struct btrfs_path path;
6960 struct ulist_iterator iter;
6961 struct btrfs_key root_key, key;
6964 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6965 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6968 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6972 btrfs_init_path(&path);
6973 ULIST_ITER_INIT(&iter);
6974 while ((node = ulist_next(roots, &iter))) {
6975 root_key.objectid = node->val;
6976 root_key.type = BTRFS_ROOT_ITEM_KEY;
6977 root_key.offset = (u64)-1;
6979 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6986 trans = btrfs_start_transaction(search_root, 0);
6987 if (IS_ERR(trans)) {
6988 ret = PTR_ERR(trans);
6992 path.lowest_level = btrfs_header_level(buf);
6993 path.skip_check_block = 1;
6994 if (path.lowest_level)
6995 btrfs_node_key_to_cpu(buf, &key, 0);
6997 btrfs_item_key_to_cpu(buf, &key, 0);
6998 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7001 btrfs_commit_transaction(trans, search_root);
7004 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7005 ret = fix_key_order(search_root, &path);
7006 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7007 ret = fix_item_offset(search_root, &path);
7009 btrfs_commit_transaction(trans, search_root);
7012 btrfs_release_path(&path);
7013 btrfs_commit_transaction(trans, search_root);
7016 btrfs_release_path(&path);
7020 static int check_block(struct btrfs_root *root,
7021 struct cache_tree *extent_cache,
7022 struct extent_buffer *buf, u64 flags)
7024 struct extent_record *rec;
7025 struct cache_extent *cache;
7026 struct btrfs_key key;
7027 enum btrfs_tree_block_status status;
7031 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7034 rec = container_of(cache, struct extent_record, cache);
7035 rec->generation = btrfs_header_generation(buf);
7037 level = btrfs_header_level(buf);
7038 if (btrfs_header_nritems(buf) > 0) {
7041 btrfs_item_key_to_cpu(buf, &key, 0);
7043 btrfs_node_key_to_cpu(buf, &key, 0);
7045 rec->info_objectid = key.objectid;
7047 rec->info_level = level;
7049 if (btrfs_is_leaf(buf))
7050 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7052 status = btrfs_check_node(root, &rec->parent_key, buf);
7054 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7056 status = try_to_fix_bad_block(root, buf, status);
7057 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7059 fprintf(stderr, "bad block %llu\n",
7060 (unsigned long long)buf->start);
7063 * Signal to callers we need to start the scan over
7064 * again since we'll have cowed blocks.
7069 rec->content_checked = 1;
7070 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7071 rec->owner_ref_checked = 1;
7073 ret = check_owner_ref(root, rec, buf);
7075 rec->owner_ref_checked = 1;
7079 maybe_free_extent_rec(extent_cache, rec);
7084 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7085 u64 parent, u64 root)
7087 struct list_head *cur = rec->backrefs.next;
7088 struct extent_backref *node;
7089 struct tree_backref *back;
7091 while(cur != &rec->backrefs) {
7092 node = to_extent_backref(cur);
7096 back = to_tree_backref(node);
7098 if (!node->full_backref)
7100 if (parent == back->parent)
7103 if (node->full_backref)
7105 if (back->root == root)
7113 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7114 u64 parent, u64 root)
7116 struct tree_backref *ref = malloc(sizeof(*ref));
7120 memset(&ref->node, 0, sizeof(ref->node));
7122 ref->parent = parent;
7123 ref->node.full_backref = 1;
7126 ref->node.full_backref = 0;
7133 static struct data_backref *find_data_backref(struct extent_record *rec,
7134 u64 parent, u64 root,
7135 u64 owner, u64 offset,
7137 u64 disk_bytenr, u64 bytes)
7139 struct list_head *cur = rec->backrefs.next;
7140 struct extent_backref *node;
7141 struct data_backref *back;
7143 while(cur != &rec->backrefs) {
7144 node = to_extent_backref(cur);
7148 back = to_data_backref(node);
7150 if (!node->full_backref)
7152 if (parent == back->parent)
7155 if (node->full_backref)
7157 if (back->root == root && back->owner == owner &&
7158 back->offset == offset) {
7159 if (found_ref && node->found_ref &&
7160 (back->bytes != bytes ||
7161 back->disk_bytenr != disk_bytenr))
7171 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7172 u64 parent, u64 root,
7173 u64 owner, u64 offset,
7176 struct data_backref *ref = malloc(sizeof(*ref));
7180 memset(&ref->node, 0, sizeof(ref->node));
7181 ref->node.is_data = 1;
7184 ref->parent = parent;
7187 ref->node.full_backref = 1;
7191 ref->offset = offset;
7192 ref->node.full_backref = 0;
7194 ref->bytes = max_size;
7197 if (max_size > rec->max_size)
7198 rec->max_size = max_size;
7202 /* Check if the type of extent matches with its chunk */
7203 static void check_extent_type(struct extent_record *rec)
7205 struct btrfs_block_group_cache *bg_cache;
7207 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7211 /* data extent, check chunk directly*/
7212 if (!rec->metadata) {
7213 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7214 rec->wrong_chunk_type = 1;
7218 /* metadata extent, check the obvious case first */
7219 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7220 BTRFS_BLOCK_GROUP_METADATA))) {
7221 rec->wrong_chunk_type = 1;
7226 * Check SYSTEM extent, as it's also marked as metadata, we can only
7227 * make sure it's a SYSTEM extent by its backref
7229 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7230 struct extent_backref *node;
7231 struct tree_backref *tback;
7234 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7235 if (node->is_data) {
7236 /* tree block shouldn't have data backref */
7237 rec->wrong_chunk_type = 1;
7240 tback = container_of(node, struct tree_backref, node);
7242 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7243 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7245 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7246 if (!(bg_cache->flags & bg_type))
7247 rec->wrong_chunk_type = 1;
7252 * Allocate a new extent record, fill default values from @tmpl and insert int
7253 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7254 * the cache, otherwise it fails.
7256 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7257 struct extent_record *tmpl)
7259 struct extent_record *rec;
7262 BUG_ON(tmpl->max_size == 0);
7263 rec = malloc(sizeof(*rec));
7266 rec->start = tmpl->start;
7267 rec->max_size = tmpl->max_size;
7268 rec->nr = max(tmpl->nr, tmpl->max_size);
7269 rec->found_rec = tmpl->found_rec;
7270 rec->content_checked = tmpl->content_checked;
7271 rec->owner_ref_checked = tmpl->owner_ref_checked;
7272 rec->num_duplicates = 0;
7273 rec->metadata = tmpl->metadata;
7274 rec->flag_block_full_backref = FLAG_UNSET;
7275 rec->bad_full_backref = 0;
7276 rec->crossing_stripes = 0;
7277 rec->wrong_chunk_type = 0;
7278 rec->is_root = tmpl->is_root;
7279 rec->refs = tmpl->refs;
7280 rec->extent_item_refs = tmpl->extent_item_refs;
7281 rec->parent_generation = tmpl->parent_generation;
7282 INIT_LIST_HEAD(&rec->backrefs);
7283 INIT_LIST_HEAD(&rec->dups);
7284 INIT_LIST_HEAD(&rec->list);
7285 rec->backref_tree = RB_ROOT;
7286 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7287 rec->cache.start = tmpl->start;
7288 rec->cache.size = tmpl->nr;
7289 ret = insert_cache_extent(extent_cache, &rec->cache);
7294 bytes_used += rec->nr;
7297 rec->crossing_stripes = check_crossing_stripes(global_info,
7298 rec->start, global_info->nodesize);
7299 check_extent_type(rec);
7304 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7306 * - refs - if found, increase refs
7307 * - is_root - if found, set
7308 * - content_checked - if found, set
7309 * - owner_ref_checked - if found, set
7311 * If not found, create a new one, initialize and insert.
7313 static int add_extent_rec(struct cache_tree *extent_cache,
7314 struct extent_record *tmpl)
7316 struct extent_record *rec;
7317 struct cache_extent *cache;
7321 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7323 rec = container_of(cache, struct extent_record, cache);
7327 rec->nr = max(tmpl->nr, tmpl->max_size);
7330 * We need to make sure to reset nr to whatever the extent
7331 * record says was the real size, this way we can compare it to
7334 if (tmpl->found_rec) {
7335 if (tmpl->start != rec->start || rec->found_rec) {
7336 struct extent_record *tmp;
7339 if (list_empty(&rec->list))
7340 list_add_tail(&rec->list,
7341 &duplicate_extents);
7344 * We have to do this song and dance in case we
7345 * find an extent record that falls inside of
7346 * our current extent record but does not have
7347 * the same objectid.
7349 tmp = malloc(sizeof(*tmp));
7352 tmp->start = tmpl->start;
7353 tmp->max_size = tmpl->max_size;
7356 tmp->metadata = tmpl->metadata;
7357 tmp->extent_item_refs = tmpl->extent_item_refs;
7358 INIT_LIST_HEAD(&tmp->list);
7359 list_add_tail(&tmp->list, &rec->dups);
7360 rec->num_duplicates++;
7367 if (tmpl->extent_item_refs && !dup) {
7368 if (rec->extent_item_refs) {
7369 fprintf(stderr, "block %llu rec "
7370 "extent_item_refs %llu, passed %llu\n",
7371 (unsigned long long)tmpl->start,
7372 (unsigned long long)
7373 rec->extent_item_refs,
7374 (unsigned long long)tmpl->extent_item_refs);
7376 rec->extent_item_refs = tmpl->extent_item_refs;
7380 if (tmpl->content_checked)
7381 rec->content_checked = 1;
7382 if (tmpl->owner_ref_checked)
7383 rec->owner_ref_checked = 1;
7384 memcpy(&rec->parent_key, &tmpl->parent_key,
7385 sizeof(tmpl->parent_key));
7386 if (tmpl->parent_generation)
7387 rec->parent_generation = tmpl->parent_generation;
7388 if (rec->max_size < tmpl->max_size)
7389 rec->max_size = tmpl->max_size;
7392 * A metadata extent can't cross stripe_len boundary, otherwise
7393 * kernel scrub won't be able to handle it.
7394 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7398 rec->crossing_stripes = check_crossing_stripes(
7399 global_info, rec->start,
7400 global_info->nodesize);
7401 check_extent_type(rec);
7402 maybe_free_extent_rec(extent_cache, rec);
7406 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7411 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7412 u64 parent, u64 root, int found_ref)
7414 struct extent_record *rec;
7415 struct tree_backref *back;
7416 struct cache_extent *cache;
7418 bool insert = false;
7420 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7422 struct extent_record tmpl;
7424 memset(&tmpl, 0, sizeof(tmpl));
7425 tmpl.start = bytenr;
7430 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7434 /* really a bug in cache_extent implement now */
7435 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7440 rec = container_of(cache, struct extent_record, cache);
7441 if (rec->start != bytenr) {
7443 * Several cause, from unaligned bytenr to over lapping extents
7448 back = find_tree_backref(rec, parent, root);
7450 back = alloc_tree_backref(rec, parent, root);
7457 if (back->node.found_ref) {
7458 fprintf(stderr, "Extent back ref already exists "
7459 "for %llu parent %llu root %llu \n",
7460 (unsigned long long)bytenr,
7461 (unsigned long long)parent,
7462 (unsigned long long)root);
7464 back->node.found_ref = 1;
7466 if (back->node.found_extent_tree) {
7467 fprintf(stderr, "Extent back ref already exists "
7468 "for %llu parent %llu root %llu \n",
7469 (unsigned long long)bytenr,
7470 (unsigned long long)parent,
7471 (unsigned long long)root);
7473 back->node.found_extent_tree = 1;
7476 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7477 compare_extent_backref));
7478 check_extent_type(rec);
7479 maybe_free_extent_rec(extent_cache, rec);
7483 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7484 u64 parent, u64 root, u64 owner, u64 offset,
7485 u32 num_refs, int found_ref, u64 max_size)
7487 struct extent_record *rec;
7488 struct data_backref *back;
7489 struct cache_extent *cache;
7491 bool insert = false;
7493 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7495 struct extent_record tmpl;
7497 memset(&tmpl, 0, sizeof(tmpl));
7498 tmpl.start = bytenr;
7500 tmpl.max_size = max_size;
7502 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7506 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7511 rec = container_of(cache, struct extent_record, cache);
7512 if (rec->max_size < max_size)
7513 rec->max_size = max_size;
7516 * If found_ref is set then max_size is the real size and must match the
7517 * existing refs. So if we have already found a ref then we need to
7518 * make sure that this ref matches the existing one, otherwise we need
7519 * to add a new backref so we can notice that the backrefs don't match
7520 * and we need to figure out who is telling the truth. This is to
7521 * account for that awful fsync bug I introduced where we'd end up with
7522 * a btrfs_file_extent_item that would have its length include multiple
7523 * prealloc extents or point inside of a prealloc extent.
7525 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7528 back = alloc_data_backref(rec, parent, root, owner, offset,
7535 BUG_ON(num_refs != 1);
7536 if (back->node.found_ref)
7537 BUG_ON(back->bytes != max_size);
7538 back->node.found_ref = 1;
7539 back->found_ref += 1;
7540 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7541 back->bytes = max_size;
7542 back->disk_bytenr = bytenr;
7544 /* Need to reinsert if not already in the tree */
7546 rb_erase(&back->node.node, &rec->backref_tree);
7551 rec->content_checked = 1;
7552 rec->owner_ref_checked = 1;
7554 if (back->node.found_extent_tree) {
7555 fprintf(stderr, "Extent back ref already exists "
7556 "for %llu parent %llu root %llu "
7557 "owner %llu offset %llu num_refs %lu\n",
7558 (unsigned long long)bytenr,
7559 (unsigned long long)parent,
7560 (unsigned long long)root,
7561 (unsigned long long)owner,
7562 (unsigned long long)offset,
7563 (unsigned long)num_refs);
7565 back->num_refs = num_refs;
7566 back->node.found_extent_tree = 1;
7569 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7570 compare_extent_backref));
7572 maybe_free_extent_rec(extent_cache, rec);
7576 static int add_pending(struct cache_tree *pending,
7577 struct cache_tree *seen, u64 bytenr, u32 size)
7580 ret = add_cache_extent(seen, bytenr, size);
7583 add_cache_extent(pending, bytenr, size);
7587 static int pick_next_pending(struct cache_tree *pending,
7588 struct cache_tree *reada,
7589 struct cache_tree *nodes,
7590 u64 last, struct block_info *bits, int bits_nr,
7593 unsigned long node_start = last;
7594 struct cache_extent *cache;
7597 cache = search_cache_extent(reada, 0);
7599 bits[0].start = cache->start;
7600 bits[0].size = cache->size;
7605 if (node_start > 32768)
7606 node_start -= 32768;
7608 cache = search_cache_extent(nodes, node_start);
7610 cache = search_cache_extent(nodes, 0);
7613 cache = search_cache_extent(pending, 0);
7618 bits[ret].start = cache->start;
7619 bits[ret].size = cache->size;
7620 cache = next_cache_extent(cache);
7622 } while (cache && ret < bits_nr);
7628 bits[ret].start = cache->start;
7629 bits[ret].size = cache->size;
7630 cache = next_cache_extent(cache);
7632 } while (cache && ret < bits_nr);
7634 if (bits_nr - ret > 8) {
7635 u64 lookup = bits[0].start + bits[0].size;
7636 struct cache_extent *next;
7637 next = search_cache_extent(pending, lookup);
7639 if (next->start - lookup > 32768)
7641 bits[ret].start = next->start;
7642 bits[ret].size = next->size;
7643 lookup = next->start + next->size;
7647 next = next_cache_extent(next);
7655 static void free_chunk_record(struct cache_extent *cache)
7657 struct chunk_record *rec;
7659 rec = container_of(cache, struct chunk_record, cache);
7660 list_del_init(&rec->list);
7661 list_del_init(&rec->dextents);
7665 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7667 cache_tree_free_extents(chunk_cache, free_chunk_record);
7670 static void free_device_record(struct rb_node *node)
7672 struct device_record *rec;
7674 rec = container_of(node, struct device_record, node);
7678 FREE_RB_BASED_TREE(device_cache, free_device_record);
7680 int insert_block_group_record(struct block_group_tree *tree,
7681 struct block_group_record *bg_rec)
7685 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7689 list_add_tail(&bg_rec->list, &tree->block_groups);
7693 static void free_block_group_record(struct cache_extent *cache)
7695 struct block_group_record *rec;
7697 rec = container_of(cache, struct block_group_record, cache);
7698 list_del_init(&rec->list);
7702 void free_block_group_tree(struct block_group_tree *tree)
7704 cache_tree_free_extents(&tree->tree, free_block_group_record);
7707 int insert_device_extent_record(struct device_extent_tree *tree,
7708 struct device_extent_record *de_rec)
7713 * Device extent is a bit different from the other extents, because
7714 * the extents which belong to the different devices may have the
7715 * same start and size, so we need use the special extent cache
7716 * search/insert functions.
7718 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7722 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7723 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7727 static void free_device_extent_record(struct cache_extent *cache)
7729 struct device_extent_record *rec;
7731 rec = container_of(cache, struct device_extent_record, cache);
7732 if (!list_empty(&rec->chunk_list))
7733 list_del_init(&rec->chunk_list);
7734 if (!list_empty(&rec->device_list))
7735 list_del_init(&rec->device_list);
7739 void free_device_extent_tree(struct device_extent_tree *tree)
7741 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7744 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7745 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7746 struct extent_buffer *leaf, int slot)
7748 struct btrfs_extent_ref_v0 *ref0;
7749 struct btrfs_key key;
7752 btrfs_item_key_to_cpu(leaf, &key, slot);
7753 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7754 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7755 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7758 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7759 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7765 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7766 struct btrfs_key *key,
7769 struct btrfs_chunk *ptr;
7770 struct chunk_record *rec;
7773 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7774 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7776 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7778 fprintf(stderr, "memory allocation failed\n");
7782 INIT_LIST_HEAD(&rec->list);
7783 INIT_LIST_HEAD(&rec->dextents);
7786 rec->cache.start = key->offset;
7787 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7789 rec->generation = btrfs_header_generation(leaf);
7791 rec->objectid = key->objectid;
7792 rec->type = key->type;
7793 rec->offset = key->offset;
7795 rec->length = rec->cache.size;
7796 rec->owner = btrfs_chunk_owner(leaf, ptr);
7797 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7798 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7799 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7800 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7801 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7802 rec->num_stripes = num_stripes;
7803 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7805 for (i = 0; i < rec->num_stripes; ++i) {
7806 rec->stripes[i].devid =
7807 btrfs_stripe_devid_nr(leaf, ptr, i);
7808 rec->stripes[i].offset =
7809 btrfs_stripe_offset_nr(leaf, ptr, i);
7810 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7811 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7818 static int process_chunk_item(struct cache_tree *chunk_cache,
7819 struct btrfs_key *key, struct extent_buffer *eb,
7822 struct chunk_record *rec;
7823 struct btrfs_chunk *chunk;
7826 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7828 * Do extra check for this chunk item,
7830 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7831 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7832 * and owner<->key_type check.
7834 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7837 error("chunk(%llu, %llu) is not valid, ignore it",
7838 key->offset, btrfs_chunk_length(eb, chunk));
7841 rec = btrfs_new_chunk_record(eb, key, slot);
7842 ret = insert_cache_extent(chunk_cache, &rec->cache);
7844 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7845 rec->offset, rec->length);
7852 static int process_device_item(struct rb_root *dev_cache,
7853 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7855 struct btrfs_dev_item *ptr;
7856 struct device_record *rec;
7859 ptr = btrfs_item_ptr(eb,
7860 slot, struct btrfs_dev_item);
7862 rec = malloc(sizeof(*rec));
7864 fprintf(stderr, "memory allocation failed\n");
7868 rec->devid = key->offset;
7869 rec->generation = btrfs_header_generation(eb);
7871 rec->objectid = key->objectid;
7872 rec->type = key->type;
7873 rec->offset = key->offset;
7875 rec->devid = btrfs_device_id(eb, ptr);
7876 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7877 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7879 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7881 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7888 struct block_group_record *
7889 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7892 struct btrfs_block_group_item *ptr;
7893 struct block_group_record *rec;
7895 rec = calloc(1, sizeof(*rec));
7897 fprintf(stderr, "memory allocation failed\n");
7901 rec->cache.start = key->objectid;
7902 rec->cache.size = key->offset;
7904 rec->generation = btrfs_header_generation(leaf);
7906 rec->objectid = key->objectid;
7907 rec->type = key->type;
7908 rec->offset = key->offset;
7910 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7911 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7913 INIT_LIST_HEAD(&rec->list);
7918 static int process_block_group_item(struct block_group_tree *block_group_cache,
7919 struct btrfs_key *key,
7920 struct extent_buffer *eb, int slot)
7922 struct block_group_record *rec;
7925 rec = btrfs_new_block_group_record(eb, key, slot);
7926 ret = insert_block_group_record(block_group_cache, rec);
7928 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7929 rec->objectid, rec->offset);
7936 struct device_extent_record *
7937 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7938 struct btrfs_key *key, int slot)
7940 struct device_extent_record *rec;
7941 struct btrfs_dev_extent *ptr;
7943 rec = calloc(1, sizeof(*rec));
7945 fprintf(stderr, "memory allocation failed\n");
7949 rec->cache.objectid = key->objectid;
7950 rec->cache.start = key->offset;
7952 rec->generation = btrfs_header_generation(leaf);
7954 rec->objectid = key->objectid;
7955 rec->type = key->type;
7956 rec->offset = key->offset;
7958 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7959 rec->chunk_objecteid =
7960 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7962 btrfs_dev_extent_chunk_offset(leaf, ptr);
7963 rec->length = btrfs_dev_extent_length(leaf, ptr);
7964 rec->cache.size = rec->length;
7966 INIT_LIST_HEAD(&rec->chunk_list);
7967 INIT_LIST_HEAD(&rec->device_list);
7973 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7974 struct btrfs_key *key, struct extent_buffer *eb,
7977 struct device_extent_record *rec;
7980 rec = btrfs_new_device_extent_record(eb, key, slot);
7981 ret = insert_device_extent_record(dev_extent_cache, rec);
7984 "Device extent[%llu, %llu, %llu] existed.\n",
7985 rec->objectid, rec->offset, rec->length);
7992 static int process_extent_item(struct btrfs_root *root,
7993 struct cache_tree *extent_cache,
7994 struct extent_buffer *eb, int slot)
7996 struct btrfs_extent_item *ei;
7997 struct btrfs_extent_inline_ref *iref;
7998 struct btrfs_extent_data_ref *dref;
7999 struct btrfs_shared_data_ref *sref;
8000 struct btrfs_key key;
8001 struct extent_record tmpl;
8006 u32 item_size = btrfs_item_size_nr(eb, slot);
8012 btrfs_item_key_to_cpu(eb, &key, slot);
8014 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8016 num_bytes = root->fs_info->nodesize;
8018 num_bytes = key.offset;
8021 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8022 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8023 key.objectid, root->fs_info->sectorsize);
8026 if (item_size < sizeof(*ei)) {
8027 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8028 struct btrfs_extent_item_v0 *ei0;
8029 if (item_size != sizeof(*ei0)) {
8031 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
8032 key.objectid, key.type, key.offset,
8033 btrfs_header_bytenr(eb), slot);
8036 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8037 refs = btrfs_extent_refs_v0(eb, ei0);
8041 memset(&tmpl, 0, sizeof(tmpl));
8042 tmpl.start = key.objectid;
8043 tmpl.nr = num_bytes;
8044 tmpl.extent_item_refs = refs;
8045 tmpl.metadata = metadata;
8047 tmpl.max_size = num_bytes;
8049 return add_extent_rec(extent_cache, &tmpl);
8052 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8053 refs = btrfs_extent_refs(eb, ei);
8054 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8058 if (metadata && num_bytes != root->fs_info->nodesize) {
8059 error("ignore invalid metadata extent, length %llu does not equal to %u",
8060 num_bytes, root->fs_info->nodesize);
8063 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8064 error("ignore invalid data extent, length %llu is not aligned to %u",
8065 num_bytes, root->fs_info->sectorsize);
8069 memset(&tmpl, 0, sizeof(tmpl));
8070 tmpl.start = key.objectid;
8071 tmpl.nr = num_bytes;
8072 tmpl.extent_item_refs = refs;
8073 tmpl.metadata = metadata;
8075 tmpl.max_size = num_bytes;
8076 add_extent_rec(extent_cache, &tmpl);
8078 ptr = (unsigned long)(ei + 1);
8079 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8080 key.type == BTRFS_EXTENT_ITEM_KEY)
8081 ptr += sizeof(struct btrfs_tree_block_info);
8083 end = (unsigned long)ei + item_size;
8085 iref = (struct btrfs_extent_inline_ref *)ptr;
8086 type = btrfs_extent_inline_ref_type(eb, iref);
8087 offset = btrfs_extent_inline_ref_offset(eb, iref);
8089 case BTRFS_TREE_BLOCK_REF_KEY:
8090 ret = add_tree_backref(extent_cache, key.objectid,
8094 "add_tree_backref failed (extent items tree block): %s",
8097 case BTRFS_SHARED_BLOCK_REF_KEY:
8098 ret = add_tree_backref(extent_cache, key.objectid,
8102 "add_tree_backref failed (extent items shared block): %s",
8105 case BTRFS_EXTENT_DATA_REF_KEY:
8106 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8107 add_data_backref(extent_cache, key.objectid, 0,
8108 btrfs_extent_data_ref_root(eb, dref),
8109 btrfs_extent_data_ref_objectid(eb,
8111 btrfs_extent_data_ref_offset(eb, dref),
8112 btrfs_extent_data_ref_count(eb, dref),
8115 case BTRFS_SHARED_DATA_REF_KEY:
8116 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8117 add_data_backref(extent_cache, key.objectid, offset,
8119 btrfs_shared_data_ref_count(eb, sref),
8123 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8124 key.objectid, key.type, num_bytes);
8127 ptr += btrfs_extent_inline_ref_size(type);
8134 static int check_cache_range(struct btrfs_root *root,
8135 struct btrfs_block_group_cache *cache,
8136 u64 offset, u64 bytes)
8138 struct btrfs_free_space *entry;
8144 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8145 bytenr = btrfs_sb_offset(i);
8146 ret = btrfs_rmap_block(root->fs_info,
8147 cache->key.objectid, bytenr, 0,
8148 &logical, &nr, &stripe_len);
8153 if (logical[nr] + stripe_len <= offset)
8155 if (offset + bytes <= logical[nr])
8157 if (logical[nr] == offset) {
8158 if (stripe_len >= bytes) {
8162 bytes -= stripe_len;
8163 offset += stripe_len;
8164 } else if (logical[nr] < offset) {
8165 if (logical[nr] + stripe_len >=
8170 bytes = (offset + bytes) -
8171 (logical[nr] + stripe_len);
8172 offset = logical[nr] + stripe_len;
8175 * Could be tricky, the super may land in the
8176 * middle of the area we're checking. First
8177 * check the easiest case, it's at the end.
8179 if (logical[nr] + stripe_len >=
8181 bytes = logical[nr] - offset;
8185 /* Check the left side */
8186 ret = check_cache_range(root, cache,
8188 logical[nr] - offset);
8194 /* Now we continue with the right side */
8195 bytes = (offset + bytes) -
8196 (logical[nr] + stripe_len);
8197 offset = logical[nr] + stripe_len;
8204 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8206 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8207 offset, offset+bytes);
8211 if (entry->offset != offset) {
8212 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8217 if (entry->bytes != bytes) {
8218 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8219 bytes, entry->bytes, offset);
8223 unlink_free_space(cache->free_space_ctl, entry);
8228 static int verify_space_cache(struct btrfs_root *root,
8229 struct btrfs_block_group_cache *cache)
8231 struct btrfs_path path;
8232 struct extent_buffer *leaf;
8233 struct btrfs_key key;
8237 root = root->fs_info->extent_root;
8239 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8241 btrfs_init_path(&path);
8242 key.objectid = last;
8244 key.type = BTRFS_EXTENT_ITEM_KEY;
8245 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8250 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8251 ret = btrfs_next_leaf(root, &path);
8259 leaf = path.nodes[0];
8260 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8261 if (key.objectid >= cache->key.offset + cache->key.objectid)
8263 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8264 key.type != BTRFS_METADATA_ITEM_KEY) {
8269 if (last == key.objectid) {
8270 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8271 last = key.objectid + key.offset;
8273 last = key.objectid + root->fs_info->nodesize;
8278 ret = check_cache_range(root, cache, last,
8279 key.objectid - last);
8282 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8283 last = key.objectid + key.offset;
8285 last = key.objectid + root->fs_info->nodesize;
8289 if (last < cache->key.objectid + cache->key.offset)
8290 ret = check_cache_range(root, cache, last,
8291 cache->key.objectid +
8292 cache->key.offset - last);
8295 btrfs_release_path(&path);
8298 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8299 fprintf(stderr, "There are still entries left in the space "
8307 static int check_space_cache(struct btrfs_root *root)
8309 struct btrfs_block_group_cache *cache;
8310 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8314 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8315 btrfs_super_generation(root->fs_info->super_copy) !=
8316 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8317 printf("cache and super generation don't match, space cache "
8318 "will be invalidated\n");
8322 if (ctx.progress_enabled) {
8323 ctx.tp = TASK_FREE_SPACE;
8324 task_start(ctx.info);
8328 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8332 start = cache->key.objectid + cache->key.offset;
8333 if (!cache->free_space_ctl) {
8334 if (btrfs_init_free_space_ctl(cache,
8335 root->fs_info->sectorsize)) {
8340 btrfs_remove_free_space_cache(cache);
8343 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8344 ret = exclude_super_stripes(root, cache);
8346 fprintf(stderr, "could not exclude super stripes: %s\n",
8351 ret = load_free_space_tree(root->fs_info, cache);
8352 free_excluded_extents(root, cache);
8354 fprintf(stderr, "could not load free space tree: %s\n",
8361 ret = load_free_space_cache(root->fs_info, cache);
8366 ret = verify_space_cache(root, cache);
8368 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8369 cache->key.objectid);
8374 task_stop(ctx.info);
8376 return error ? -EINVAL : 0;
8379 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8380 u64 num_bytes, unsigned long leaf_offset,
8381 struct extent_buffer *eb) {
8383 struct btrfs_fs_info *fs_info = root->fs_info;
8385 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8387 unsigned long csum_offset;
8391 u64 data_checked = 0;
8397 if (num_bytes % fs_info->sectorsize)
8400 data = malloc(num_bytes);
8404 while (offset < num_bytes) {
8407 read_len = num_bytes - offset;
8408 /* read as much space once a time */
8409 ret = read_extent_data(fs_info, data + offset,
8410 bytenr + offset, &read_len, mirror);
8414 /* verify every 4k data's checksum */
8415 while (data_checked < read_len) {
8417 tmp = offset + data_checked;
8419 csum = btrfs_csum_data((char *)data + tmp,
8420 csum, fs_info->sectorsize);
8421 btrfs_csum_final(csum, (u8 *)&csum);
8423 csum_offset = leaf_offset +
8424 tmp / fs_info->sectorsize * csum_size;
8425 read_extent_buffer(eb, (char *)&csum_expected,
8426 csum_offset, csum_size);
8427 /* try another mirror */
8428 if (csum != csum_expected) {
8429 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8430 mirror, bytenr + tmp,
8431 csum, csum_expected);
8432 num_copies = btrfs_num_copies(root->fs_info,
8434 if (mirror < num_copies - 1) {
8439 data_checked += fs_info->sectorsize;
8448 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8451 struct btrfs_path path;
8452 struct extent_buffer *leaf;
8453 struct btrfs_key key;
8456 btrfs_init_path(&path);
8457 key.objectid = bytenr;
8458 key.type = BTRFS_EXTENT_ITEM_KEY;
8459 key.offset = (u64)-1;
8462 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8465 fprintf(stderr, "Error looking up extent record %d\n", ret);
8466 btrfs_release_path(&path);
8469 if (path.slots[0] > 0) {
8472 ret = btrfs_prev_leaf(root, &path);
8475 } else if (ret > 0) {
8482 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8485 * Block group items come before extent items if they have the same
8486 * bytenr, so walk back one more just in case. Dear future traveller,
8487 * first congrats on mastering time travel. Now if it's not too much
8488 * trouble could you go back to 2006 and tell Chris to make the
8489 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8490 * EXTENT_ITEM_KEY please?
8492 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8493 if (path.slots[0] > 0) {
8496 ret = btrfs_prev_leaf(root, &path);
8499 } else if (ret > 0) {
8504 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8508 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8509 ret = btrfs_next_leaf(root, &path);
8511 fprintf(stderr, "Error going to next leaf "
8513 btrfs_release_path(&path);
8519 leaf = path.nodes[0];
8520 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8521 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8525 if (key.objectid + key.offset < bytenr) {
8529 if (key.objectid > bytenr + num_bytes)
8532 if (key.objectid == bytenr) {
8533 if (key.offset >= num_bytes) {
8537 num_bytes -= key.offset;
8538 bytenr += key.offset;
8539 } else if (key.objectid < bytenr) {
8540 if (key.objectid + key.offset >= bytenr + num_bytes) {
8544 num_bytes = (bytenr + num_bytes) -
8545 (key.objectid + key.offset);
8546 bytenr = key.objectid + key.offset;
8548 if (key.objectid + key.offset < bytenr + num_bytes) {
8549 u64 new_start = key.objectid + key.offset;
8550 u64 new_bytes = bytenr + num_bytes - new_start;
8553 * Weird case, the extent is in the middle of
8554 * our range, we'll have to search one side
8555 * and then the other. Not sure if this happens
8556 * in real life, but no harm in coding it up
8557 * anyway just in case.
8559 btrfs_release_path(&path);
8560 ret = check_extent_exists(root, new_start,
8563 fprintf(stderr, "Right section didn't "
8567 num_bytes = key.objectid - bytenr;
8570 num_bytes = key.objectid - bytenr;
8577 if (num_bytes && !ret) {
8578 fprintf(stderr, "There are no extents for csum range "
8579 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8583 btrfs_release_path(&path);
8587 static int check_csums(struct btrfs_root *root)
8589 struct btrfs_path path;
8590 struct extent_buffer *leaf;
8591 struct btrfs_key key;
8592 u64 offset = 0, num_bytes = 0;
8593 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8597 unsigned long leaf_offset;
8599 root = root->fs_info->csum_root;
8600 if (!extent_buffer_uptodate(root->node)) {
8601 fprintf(stderr, "No valid csum tree found\n");
8605 btrfs_init_path(&path);
8606 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8607 key.type = BTRFS_EXTENT_CSUM_KEY;
8609 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8611 fprintf(stderr, "Error searching csum tree %d\n", ret);
8612 btrfs_release_path(&path);
8616 if (ret > 0 && path.slots[0])
8621 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8622 ret = btrfs_next_leaf(root, &path);
8624 fprintf(stderr, "Error going to next leaf "
8631 leaf = path.nodes[0];
8633 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8634 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8639 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8640 csum_size) * root->fs_info->sectorsize;
8641 if (!check_data_csum)
8642 goto skip_csum_check;
8643 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8644 ret = check_extent_csums(root, key.offset, data_len,
8650 offset = key.offset;
8651 } else if (key.offset != offset + num_bytes) {
8652 ret = check_extent_exists(root, offset, num_bytes);
8654 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8655 "there is no extent record\n",
8656 offset, offset+num_bytes);
8659 offset = key.offset;
8662 num_bytes += data_len;
8666 btrfs_release_path(&path);
8670 static int is_dropped_key(struct btrfs_key *key,
8671 struct btrfs_key *drop_key) {
8672 if (key->objectid < drop_key->objectid)
8674 else if (key->objectid == drop_key->objectid) {
8675 if (key->type < drop_key->type)
8677 else if (key->type == drop_key->type) {
8678 if (key->offset < drop_key->offset)
8686 * Here are the rules for FULL_BACKREF.
8688 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8689 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8691 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8692 * if it happened after the relocation occurred since we'll have dropped the
8693 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8694 * have no real way to know for sure.
8696 * We process the blocks one root at a time, and we start from the lowest root
8697 * objectid and go to the highest. So we can just lookup the owner backref for
8698 * the record and if we don't find it then we know it doesn't exist and we have
8701 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8702 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8703 * be set or not and then we can check later once we've gathered all the refs.
8705 static int calc_extent_flag(struct cache_tree *extent_cache,
8706 struct extent_buffer *buf,
8707 struct root_item_record *ri,
8710 struct extent_record *rec;
8711 struct cache_extent *cache;
8712 struct tree_backref *tback;
8715 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8716 /* we have added this extent before */
8720 rec = container_of(cache, struct extent_record, cache);
8723 * Except file/reloc tree, we can not have
8726 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8731 if (buf->start == ri->bytenr)
8734 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8737 owner = btrfs_header_owner(buf);
8738 if (owner == ri->objectid)
8741 tback = find_tree_backref(rec, 0, owner);
8746 if (rec->flag_block_full_backref != FLAG_UNSET &&
8747 rec->flag_block_full_backref != 0)
8748 rec->bad_full_backref = 1;
8751 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8752 if (rec->flag_block_full_backref != FLAG_UNSET &&
8753 rec->flag_block_full_backref != 1)
8754 rec->bad_full_backref = 1;
8758 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8760 fprintf(stderr, "Invalid key type(");
8761 print_key_type(stderr, 0, key_type);
8762 fprintf(stderr, ") found in root(");
8763 print_objectid(stderr, rootid, 0);
8764 fprintf(stderr, ")\n");
8768 * Check if the key is valid with its extent buffer.
8770 * This is a early check in case invalid key exists in a extent buffer
8771 * This is not comprehensive yet, but should prevent wrong key/item passed
8774 static int check_type_with_root(u64 rootid, u8 key_type)
8777 /* Only valid in chunk tree */
8778 case BTRFS_DEV_ITEM_KEY:
8779 case BTRFS_CHUNK_ITEM_KEY:
8780 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8783 /* valid in csum and log tree */
8784 case BTRFS_CSUM_TREE_OBJECTID:
8785 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8789 case BTRFS_EXTENT_ITEM_KEY:
8790 case BTRFS_METADATA_ITEM_KEY:
8791 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8792 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8795 case BTRFS_ROOT_ITEM_KEY:
8796 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8799 case BTRFS_DEV_EXTENT_KEY:
8800 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8806 report_mismatch_key_root(key_type, rootid);
8810 static int run_next_block(struct btrfs_root *root,
8811 struct block_info *bits,
8814 struct cache_tree *pending,
8815 struct cache_tree *seen,
8816 struct cache_tree *reada,
8817 struct cache_tree *nodes,
8818 struct cache_tree *extent_cache,
8819 struct cache_tree *chunk_cache,
8820 struct rb_root *dev_cache,
8821 struct block_group_tree *block_group_cache,
8822 struct device_extent_tree *dev_extent_cache,
8823 struct root_item_record *ri)
8825 struct btrfs_fs_info *fs_info = root->fs_info;
8826 struct extent_buffer *buf;
8827 struct extent_record *rec = NULL;
8838 struct btrfs_key key;
8839 struct cache_extent *cache;
8842 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8843 bits_nr, &reada_bits);
8848 for(i = 0; i < nritems; i++) {
8849 ret = add_cache_extent(reada, bits[i].start,
8854 /* fixme, get the parent transid */
8855 readahead_tree_block(fs_info, bits[i].start, 0);
8858 *last = bits[0].start;
8859 bytenr = bits[0].start;
8860 size = bits[0].size;
8862 cache = lookup_cache_extent(pending, bytenr, size);
8864 remove_cache_extent(pending, cache);
8867 cache = lookup_cache_extent(reada, bytenr, size);
8869 remove_cache_extent(reada, cache);
8872 cache = lookup_cache_extent(nodes, bytenr, size);
8874 remove_cache_extent(nodes, cache);
8877 cache = lookup_cache_extent(extent_cache, bytenr, size);
8879 rec = container_of(cache, struct extent_record, cache);
8880 gen = rec->parent_generation;
8883 /* fixme, get the real parent transid */
8884 buf = read_tree_block(root->fs_info, bytenr, gen);
8885 if (!extent_buffer_uptodate(buf)) {
8886 record_bad_block_io(root->fs_info,
8887 extent_cache, bytenr, size);
8891 nritems = btrfs_header_nritems(buf);
8894 if (!init_extent_tree) {
8895 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8896 btrfs_header_level(buf), 1, NULL,
8899 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8901 fprintf(stderr, "Couldn't calc extent flags\n");
8902 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8907 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8909 fprintf(stderr, "Couldn't calc extent flags\n");
8910 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8914 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8916 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8917 ri->objectid == btrfs_header_owner(buf)) {
8919 * Ok we got to this block from it's original owner and
8920 * we have FULL_BACKREF set. Relocation can leave
8921 * converted blocks over so this is altogether possible,
8922 * however it's not possible if the generation > the
8923 * last snapshot, so check for this case.
8925 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8926 btrfs_header_generation(buf) > ri->last_snapshot) {
8927 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8928 rec->bad_full_backref = 1;
8933 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8934 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8935 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8936 rec->bad_full_backref = 1;
8940 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8941 rec->flag_block_full_backref = 1;
8945 rec->flag_block_full_backref = 0;
8947 owner = btrfs_header_owner(buf);
8950 ret = check_block(root, extent_cache, buf, flags);
8954 if (btrfs_is_leaf(buf)) {
8955 btree_space_waste += btrfs_leaf_free_space(root, buf);
8956 for (i = 0; i < nritems; i++) {
8957 struct btrfs_file_extent_item *fi;
8958 btrfs_item_key_to_cpu(buf, &key, i);
8960 * Check key type against the leaf owner.
8961 * Could filter quite a lot of early error if
8964 if (check_type_with_root(btrfs_header_owner(buf),
8966 fprintf(stderr, "ignoring invalid key\n");
8969 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8970 process_extent_item(root, extent_cache, buf,
8974 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8975 process_extent_item(root, extent_cache, buf,
8979 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8981 btrfs_item_size_nr(buf, i);
8984 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8985 process_chunk_item(chunk_cache, &key, buf, i);
8988 if (key.type == BTRFS_DEV_ITEM_KEY) {
8989 process_device_item(dev_cache, &key, buf, i);
8992 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8993 process_block_group_item(block_group_cache,
8997 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8998 process_device_extent_item(dev_extent_cache,
9003 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9004 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9005 process_extent_ref_v0(extent_cache, buf, i);
9012 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9013 ret = add_tree_backref(extent_cache,
9014 key.objectid, 0, key.offset, 0);
9017 "add_tree_backref failed (leaf tree block): %s",
9021 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9022 ret = add_tree_backref(extent_cache,
9023 key.objectid, key.offset, 0, 0);
9026 "add_tree_backref failed (leaf shared block): %s",
9030 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9031 struct btrfs_extent_data_ref *ref;
9032 ref = btrfs_item_ptr(buf, i,
9033 struct btrfs_extent_data_ref);
9034 add_data_backref(extent_cache,
9036 btrfs_extent_data_ref_root(buf, ref),
9037 btrfs_extent_data_ref_objectid(buf,
9039 btrfs_extent_data_ref_offset(buf, ref),
9040 btrfs_extent_data_ref_count(buf, ref),
9041 0, root->fs_info->sectorsize);
9044 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9045 struct btrfs_shared_data_ref *ref;
9046 ref = btrfs_item_ptr(buf, i,
9047 struct btrfs_shared_data_ref);
9048 add_data_backref(extent_cache,
9049 key.objectid, key.offset, 0, 0, 0,
9050 btrfs_shared_data_ref_count(buf, ref),
9051 0, root->fs_info->sectorsize);
9054 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9055 struct bad_item *bad;
9057 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9061 bad = malloc(sizeof(struct bad_item));
9064 INIT_LIST_HEAD(&bad->list);
9065 memcpy(&bad->key, &key,
9066 sizeof(struct btrfs_key));
9067 bad->root_id = owner;
9068 list_add_tail(&bad->list, &delete_items);
9071 if (key.type != BTRFS_EXTENT_DATA_KEY)
9073 fi = btrfs_item_ptr(buf, i,
9074 struct btrfs_file_extent_item);
9075 if (btrfs_file_extent_type(buf, fi) ==
9076 BTRFS_FILE_EXTENT_INLINE)
9078 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9081 data_bytes_allocated +=
9082 btrfs_file_extent_disk_num_bytes(buf, fi);
9083 if (data_bytes_allocated < root->fs_info->sectorsize) {
9086 data_bytes_referenced +=
9087 btrfs_file_extent_num_bytes(buf, fi);
9088 add_data_backref(extent_cache,
9089 btrfs_file_extent_disk_bytenr(buf, fi),
9090 parent, owner, key.objectid, key.offset -
9091 btrfs_file_extent_offset(buf, fi), 1, 1,
9092 btrfs_file_extent_disk_num_bytes(buf, fi));
9096 struct btrfs_key first_key;
9098 first_key.objectid = 0;
9101 btrfs_item_key_to_cpu(buf, &first_key, 0);
9102 level = btrfs_header_level(buf);
9103 for (i = 0; i < nritems; i++) {
9104 struct extent_record tmpl;
9106 ptr = btrfs_node_blockptr(buf, i);
9107 size = root->fs_info->nodesize;
9108 btrfs_node_key_to_cpu(buf, &key, i);
9110 if ((level == ri->drop_level)
9111 && is_dropped_key(&key, &ri->drop_key)) {
9116 memset(&tmpl, 0, sizeof(tmpl));
9117 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9118 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9123 tmpl.max_size = size;
9124 ret = add_extent_rec(extent_cache, &tmpl);
9128 ret = add_tree_backref(extent_cache, ptr, parent,
9132 "add_tree_backref failed (non-leaf block): %s",
9138 add_pending(nodes, seen, ptr, size);
9140 add_pending(pending, seen, ptr, size);
9143 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9144 nritems) * sizeof(struct btrfs_key_ptr);
9146 total_btree_bytes += buf->len;
9147 if (fs_root_objectid(btrfs_header_owner(buf)))
9148 total_fs_tree_bytes += buf->len;
9149 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9150 total_extent_tree_bytes += buf->len;
9152 free_extent_buffer(buf);
9156 static int add_root_to_pending(struct extent_buffer *buf,
9157 struct cache_tree *extent_cache,
9158 struct cache_tree *pending,
9159 struct cache_tree *seen,
9160 struct cache_tree *nodes,
9163 struct extent_record tmpl;
9166 if (btrfs_header_level(buf) > 0)
9167 add_pending(nodes, seen, buf->start, buf->len);
9169 add_pending(pending, seen, buf->start, buf->len);
9171 memset(&tmpl, 0, sizeof(tmpl));
9172 tmpl.start = buf->start;
9177 tmpl.max_size = buf->len;
9178 add_extent_rec(extent_cache, &tmpl);
9180 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9181 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9182 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9185 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9190 /* as we fix the tree, we might be deleting blocks that
9191 * we're tracking for repair. This hook makes sure we
9192 * remove any backrefs for blocks as we are fixing them.
9194 static int free_extent_hook(struct btrfs_trans_handle *trans,
9195 struct btrfs_root *root,
9196 u64 bytenr, u64 num_bytes, u64 parent,
9197 u64 root_objectid, u64 owner, u64 offset,
9200 struct extent_record *rec;
9201 struct cache_extent *cache;
9203 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9205 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9206 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9210 rec = container_of(cache, struct extent_record, cache);
9212 struct data_backref *back;
9213 back = find_data_backref(rec, parent, root_objectid, owner,
9214 offset, 1, bytenr, num_bytes);
9217 if (back->node.found_ref) {
9218 back->found_ref -= refs_to_drop;
9220 rec->refs -= refs_to_drop;
9222 if (back->node.found_extent_tree) {
9223 back->num_refs -= refs_to_drop;
9224 if (rec->extent_item_refs)
9225 rec->extent_item_refs -= refs_to_drop;
9227 if (back->found_ref == 0)
9228 back->node.found_ref = 0;
9229 if (back->num_refs == 0)
9230 back->node.found_extent_tree = 0;
9232 if (!back->node.found_extent_tree && back->node.found_ref) {
9233 rb_erase(&back->node.node, &rec->backref_tree);
9237 struct tree_backref *back;
9238 back = find_tree_backref(rec, parent, root_objectid);
9241 if (back->node.found_ref) {
9244 back->node.found_ref = 0;
9246 if (back->node.found_extent_tree) {
9247 if (rec->extent_item_refs)
9248 rec->extent_item_refs--;
9249 back->node.found_extent_tree = 0;
9251 if (!back->node.found_extent_tree && back->node.found_ref) {
9252 rb_erase(&back->node.node, &rec->backref_tree);
9256 maybe_free_extent_rec(extent_cache, rec);
9261 static int delete_extent_records(struct btrfs_trans_handle *trans,
9262 struct btrfs_root *root,
9263 struct btrfs_path *path,
9266 struct btrfs_key key;
9267 struct btrfs_key found_key;
9268 struct extent_buffer *leaf;
9273 key.objectid = bytenr;
9275 key.offset = (u64)-1;
9278 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9285 if (path->slots[0] == 0)
9291 leaf = path->nodes[0];
9292 slot = path->slots[0];
9294 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9295 if (found_key.objectid != bytenr)
9298 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9299 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9300 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9301 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9302 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9303 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9304 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9305 btrfs_release_path(path);
9306 if (found_key.type == 0) {
9307 if (found_key.offset == 0)
9309 key.offset = found_key.offset - 1;
9310 key.type = found_key.type;
9312 key.type = found_key.type - 1;
9313 key.offset = (u64)-1;
9317 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9318 found_key.objectid, found_key.type, found_key.offset);
9320 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9323 btrfs_release_path(path);
9325 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9326 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9327 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9328 found_key.offset : root->fs_info->nodesize;
9330 ret = btrfs_update_block_group(root, bytenr,
9337 btrfs_release_path(path);
9342 * for a single backref, this will allocate a new extent
9343 * and add the backref to it.
9345 static int record_extent(struct btrfs_trans_handle *trans,
9346 struct btrfs_fs_info *info,
9347 struct btrfs_path *path,
9348 struct extent_record *rec,
9349 struct extent_backref *back,
9350 int allocated, u64 flags)
9353 struct btrfs_root *extent_root = info->extent_root;
9354 struct extent_buffer *leaf;
9355 struct btrfs_key ins_key;
9356 struct btrfs_extent_item *ei;
9357 struct data_backref *dback;
9358 struct btrfs_tree_block_info *bi;
9361 rec->max_size = max_t(u64, rec->max_size,
9365 u32 item_size = sizeof(*ei);
9368 item_size += sizeof(*bi);
9370 ins_key.objectid = rec->start;
9371 ins_key.offset = rec->max_size;
9372 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9374 ret = btrfs_insert_empty_item(trans, extent_root, path,
9375 &ins_key, item_size);
9379 leaf = path->nodes[0];
9380 ei = btrfs_item_ptr(leaf, path->slots[0],
9381 struct btrfs_extent_item);
9383 btrfs_set_extent_refs(leaf, ei, 0);
9384 btrfs_set_extent_generation(leaf, ei, rec->generation);
9386 if (back->is_data) {
9387 btrfs_set_extent_flags(leaf, ei,
9388 BTRFS_EXTENT_FLAG_DATA);
9390 struct btrfs_disk_key copy_key;;
9392 bi = (struct btrfs_tree_block_info *)(ei + 1);
9393 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9396 btrfs_set_disk_key_objectid(©_key,
9397 rec->info_objectid);
9398 btrfs_set_disk_key_type(©_key, 0);
9399 btrfs_set_disk_key_offset(©_key, 0);
9401 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9402 btrfs_set_tree_block_key(leaf, bi, ©_key);
9404 btrfs_set_extent_flags(leaf, ei,
9405 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9408 btrfs_mark_buffer_dirty(leaf);
9409 ret = btrfs_update_block_group(extent_root, rec->start,
9410 rec->max_size, 1, 0);
9413 btrfs_release_path(path);
9416 if (back->is_data) {
9420 dback = to_data_backref(back);
9421 if (back->full_backref)
9422 parent = dback->parent;
9426 for (i = 0; i < dback->found_ref; i++) {
9427 /* if parent != 0, we're doing a full backref
9428 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9429 * just makes the backref allocator create a data
9432 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9433 rec->start, rec->max_size,
9437 BTRFS_FIRST_FREE_OBJECTID :
9443 fprintf(stderr, "adding new data backref"
9444 " on %llu %s %llu owner %llu"
9445 " offset %llu found %d\n",
9446 (unsigned long long)rec->start,
9447 back->full_backref ?
9449 back->full_backref ?
9450 (unsigned long long)parent :
9451 (unsigned long long)dback->root,
9452 (unsigned long long)dback->owner,
9453 (unsigned long long)dback->offset,
9457 struct tree_backref *tback;
9459 tback = to_tree_backref(back);
9460 if (back->full_backref)
9461 parent = tback->parent;
9465 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9466 rec->start, rec->max_size,
9467 parent, tback->root, 0, 0);
9468 fprintf(stderr, "adding new tree backref on "
9469 "start %llu len %llu parent %llu root %llu\n",
9470 rec->start, rec->max_size, parent, tback->root);
9473 btrfs_release_path(path);
9477 static struct extent_entry *find_entry(struct list_head *entries,
9478 u64 bytenr, u64 bytes)
9480 struct extent_entry *entry = NULL;
9482 list_for_each_entry(entry, entries, list) {
9483 if (entry->bytenr == bytenr && entry->bytes == bytes)
9490 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9492 struct extent_entry *entry, *best = NULL, *prev = NULL;
9494 list_for_each_entry(entry, entries, list) {
9496 * If there are as many broken entries as entries then we know
9497 * not to trust this particular entry.
9499 if (entry->broken == entry->count)
9503 * Special case, when there are only two entries and 'best' is
9513 * If our current entry == best then we can't be sure our best
9514 * is really the best, so we need to keep searching.
9516 if (best && best->count == entry->count) {
9522 /* Prev == entry, not good enough, have to keep searching */
9523 if (!prev->broken && prev->count == entry->count)
9527 best = (prev->count > entry->count) ? prev : entry;
9528 else if (best->count < entry->count)
9536 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9537 struct data_backref *dback, struct extent_entry *entry)
9539 struct btrfs_trans_handle *trans;
9540 struct btrfs_root *root;
9541 struct btrfs_file_extent_item *fi;
9542 struct extent_buffer *leaf;
9543 struct btrfs_key key;
9547 key.objectid = dback->root;
9548 key.type = BTRFS_ROOT_ITEM_KEY;
9549 key.offset = (u64)-1;
9550 root = btrfs_read_fs_root(info, &key);
9552 fprintf(stderr, "Couldn't find root for our ref\n");
9557 * The backref points to the original offset of the extent if it was
9558 * split, so we need to search down to the offset we have and then walk
9559 * forward until we find the backref we're looking for.
9561 key.objectid = dback->owner;
9562 key.type = BTRFS_EXTENT_DATA_KEY;
9563 key.offset = dback->offset;
9564 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9566 fprintf(stderr, "Error looking up ref %d\n", ret);
9571 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9572 ret = btrfs_next_leaf(root, path);
9574 fprintf(stderr, "Couldn't find our ref, next\n");
9578 leaf = path->nodes[0];
9579 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9580 if (key.objectid != dback->owner ||
9581 key.type != BTRFS_EXTENT_DATA_KEY) {
9582 fprintf(stderr, "Couldn't find our ref, search\n");
9585 fi = btrfs_item_ptr(leaf, path->slots[0],
9586 struct btrfs_file_extent_item);
9587 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9588 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9590 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9595 btrfs_release_path(path);
9597 trans = btrfs_start_transaction(root, 1);
9599 return PTR_ERR(trans);
9602 * Ok we have the key of the file extent we want to fix, now we can cow
9603 * down to the thing and fix it.
9605 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9607 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9608 key.objectid, key.type, key.offset, ret);
9612 fprintf(stderr, "Well that's odd, we just found this key "
9613 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9618 leaf = path->nodes[0];
9619 fi = btrfs_item_ptr(leaf, path->slots[0],
9620 struct btrfs_file_extent_item);
9622 if (btrfs_file_extent_compression(leaf, fi) &&
9623 dback->disk_bytenr != entry->bytenr) {
9624 fprintf(stderr, "Ref doesn't match the record start and is "
9625 "compressed, please take a btrfs-image of this file "
9626 "system and send it to a btrfs developer so they can "
9627 "complete this functionality for bytenr %Lu\n",
9628 dback->disk_bytenr);
9633 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9634 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9635 } else if (dback->disk_bytenr > entry->bytenr) {
9636 u64 off_diff, offset;
9638 off_diff = dback->disk_bytenr - entry->bytenr;
9639 offset = btrfs_file_extent_offset(leaf, fi);
9640 if (dback->disk_bytenr + offset +
9641 btrfs_file_extent_num_bytes(leaf, fi) >
9642 entry->bytenr + entry->bytes) {
9643 fprintf(stderr, "Ref is past the entry end, please "
9644 "take a btrfs-image of this file system and "
9645 "send it to a btrfs developer, ref %Lu\n",
9646 dback->disk_bytenr);
9651 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9652 btrfs_set_file_extent_offset(leaf, fi, offset);
9653 } else if (dback->disk_bytenr < entry->bytenr) {
9656 offset = btrfs_file_extent_offset(leaf, fi);
9657 if (dback->disk_bytenr + offset < entry->bytenr) {
9658 fprintf(stderr, "Ref is before the entry start, please"
9659 " take a btrfs-image of this file system and "
9660 "send it to a btrfs developer, ref %Lu\n",
9661 dback->disk_bytenr);
9666 offset += dback->disk_bytenr;
9667 offset -= entry->bytenr;
9668 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9669 btrfs_set_file_extent_offset(leaf, fi, offset);
9672 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9675 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9676 * only do this if we aren't using compression, otherwise it's a
9679 if (!btrfs_file_extent_compression(leaf, fi))
9680 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9682 printf("ram bytes may be wrong?\n");
9683 btrfs_mark_buffer_dirty(leaf);
9685 err = btrfs_commit_transaction(trans, root);
9686 btrfs_release_path(path);
9687 return ret ? ret : err;
9690 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9691 struct extent_record *rec)
9693 struct extent_backref *back, *tmp;
9694 struct data_backref *dback;
9695 struct extent_entry *entry, *best = NULL;
9698 int broken_entries = 0;
9703 * Metadata is easy and the backrefs should always agree on bytenr and
9704 * size, if not we've got bigger issues.
9709 rbtree_postorder_for_each_entry_safe(back, tmp,
9710 &rec->backref_tree, node) {
9711 if (back->full_backref || !back->is_data)
9714 dback = to_data_backref(back);
9717 * We only pay attention to backrefs that we found a real
9720 if (dback->found_ref == 0)
9724 * For now we only catch when the bytes don't match, not the
9725 * bytenr. We can easily do this at the same time, but I want
9726 * to have a fs image to test on before we just add repair
9727 * functionality willy-nilly so we know we won't screw up the
9731 entry = find_entry(&entries, dback->disk_bytenr,
9734 entry = malloc(sizeof(struct extent_entry));
9739 memset(entry, 0, sizeof(*entry));
9740 entry->bytenr = dback->disk_bytenr;
9741 entry->bytes = dback->bytes;
9742 list_add_tail(&entry->list, &entries);
9747 * If we only have on entry we may think the entries agree when
9748 * in reality they don't so we have to do some extra checking.
9750 if (dback->disk_bytenr != rec->start ||
9751 dback->bytes != rec->nr || back->broken)
9762 /* Yay all the backrefs agree, carry on good sir */
9763 if (nr_entries <= 1 && !mismatch)
9766 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9767 "%Lu\n", rec->start);
9770 * First we want to see if the backrefs can agree amongst themselves who
9771 * is right, so figure out which one of the entries has the highest
9774 best = find_most_right_entry(&entries);
9777 * Ok so we may have an even split between what the backrefs think, so
9778 * this is where we use the extent ref to see what it thinks.
9781 entry = find_entry(&entries, rec->start, rec->nr);
9782 if (!entry && (!broken_entries || !rec->found_rec)) {
9783 fprintf(stderr, "Backrefs don't agree with each other "
9784 "and extent record doesn't agree with anybody,"
9785 " so we can't fix bytenr %Lu bytes %Lu\n",
9786 rec->start, rec->nr);
9789 } else if (!entry) {
9791 * Ok our backrefs were broken, we'll assume this is the
9792 * correct value and add an entry for this range.
9794 entry = malloc(sizeof(struct extent_entry));
9799 memset(entry, 0, sizeof(*entry));
9800 entry->bytenr = rec->start;
9801 entry->bytes = rec->nr;
9802 list_add_tail(&entry->list, &entries);
9806 best = find_most_right_entry(&entries);
9808 fprintf(stderr, "Backrefs and extent record evenly "
9809 "split on who is right, this is going to "
9810 "require user input to fix bytenr %Lu bytes "
9811 "%Lu\n", rec->start, rec->nr);
9818 * I don't think this can happen currently as we'll abort() if we catch
9819 * this case higher up, but in case somebody removes that we still can't
9820 * deal with it properly here yet, so just bail out of that's the case.
9822 if (best->bytenr != rec->start) {
9823 fprintf(stderr, "Extent start and backref starts don't match, "
9824 "please use btrfs-image on this file system and send "
9825 "it to a btrfs developer so they can make fsck fix "
9826 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9827 rec->start, rec->nr);
9833 * Ok great we all agreed on an extent record, let's go find the real
9834 * references and fix up the ones that don't match.
9836 rbtree_postorder_for_each_entry_safe(back, tmp,
9837 &rec->backref_tree, node) {
9838 if (back->full_backref || !back->is_data)
9841 dback = to_data_backref(back);
9844 * Still ignoring backrefs that don't have a real ref attached
9847 if (dback->found_ref == 0)
9850 if (dback->bytes == best->bytes &&
9851 dback->disk_bytenr == best->bytenr)
9854 ret = repair_ref(info, path, dback, best);
9860 * Ok we messed with the actual refs, which means we need to drop our
9861 * entire cache and go back and rescan. I know this is a huge pain and
9862 * adds a lot of extra work, but it's the only way to be safe. Once all
9863 * the backrefs agree we may not need to do anything to the extent
9868 while (!list_empty(&entries)) {
9869 entry = list_entry(entries.next, struct extent_entry, list);
9870 list_del_init(&entry->list);
9876 static int process_duplicates(struct cache_tree *extent_cache,
9877 struct extent_record *rec)
9879 struct extent_record *good, *tmp;
9880 struct cache_extent *cache;
9884 * If we found a extent record for this extent then return, or if we
9885 * have more than one duplicate we are likely going to need to delete
9888 if (rec->found_rec || rec->num_duplicates > 1)
9891 /* Shouldn't happen but just in case */
9892 BUG_ON(!rec->num_duplicates);
9895 * So this happens if we end up with a backref that doesn't match the
9896 * actual extent entry. So either the backref is bad or the extent
9897 * entry is bad. Either way we want to have the extent_record actually
9898 * reflect what we found in the extent_tree, so we need to take the
9899 * duplicate out and use that as the extent_record since the only way we
9900 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9902 remove_cache_extent(extent_cache, &rec->cache);
9904 good = to_extent_record(rec->dups.next);
9905 list_del_init(&good->list);
9906 INIT_LIST_HEAD(&good->backrefs);
9907 INIT_LIST_HEAD(&good->dups);
9908 good->cache.start = good->start;
9909 good->cache.size = good->nr;
9910 good->content_checked = 0;
9911 good->owner_ref_checked = 0;
9912 good->num_duplicates = 0;
9913 good->refs = rec->refs;
9914 list_splice_init(&rec->backrefs, &good->backrefs);
9916 cache = lookup_cache_extent(extent_cache, good->start,
9920 tmp = container_of(cache, struct extent_record, cache);
9923 * If we find another overlapping extent and it's found_rec is
9924 * set then it's a duplicate and we need to try and delete
9927 if (tmp->found_rec || tmp->num_duplicates > 0) {
9928 if (list_empty(&good->list))
9929 list_add_tail(&good->list,
9930 &duplicate_extents);
9931 good->num_duplicates += tmp->num_duplicates + 1;
9932 list_splice_init(&tmp->dups, &good->dups);
9933 list_del_init(&tmp->list);
9934 list_add_tail(&tmp->list, &good->dups);
9935 remove_cache_extent(extent_cache, &tmp->cache);
9940 * Ok we have another non extent item backed extent rec, so lets
9941 * just add it to this extent and carry on like we did above.
9943 good->refs += tmp->refs;
9944 list_splice_init(&tmp->backrefs, &good->backrefs);
9945 remove_cache_extent(extent_cache, &tmp->cache);
9948 ret = insert_cache_extent(extent_cache, &good->cache);
9951 return good->num_duplicates ? 0 : 1;
9954 static int delete_duplicate_records(struct btrfs_root *root,
9955 struct extent_record *rec)
9957 struct btrfs_trans_handle *trans;
9958 LIST_HEAD(delete_list);
9959 struct btrfs_path path;
9960 struct extent_record *tmp, *good, *n;
9963 struct btrfs_key key;
9965 btrfs_init_path(&path);
9968 /* Find the record that covers all of the duplicates. */
9969 list_for_each_entry(tmp, &rec->dups, list) {
9970 if (good->start < tmp->start)
9972 if (good->nr > tmp->nr)
9975 if (tmp->start + tmp->nr < good->start + good->nr) {
9976 fprintf(stderr, "Ok we have overlapping extents that "
9977 "aren't completely covered by each other, this "
9978 "is going to require more careful thought. "
9979 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9980 tmp->start, tmp->nr, good->start, good->nr);
9987 list_add_tail(&rec->list, &delete_list);
9989 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9992 list_move_tail(&tmp->list, &delete_list);
9995 root = root->fs_info->extent_root;
9996 trans = btrfs_start_transaction(root, 1);
9997 if (IS_ERR(trans)) {
9998 ret = PTR_ERR(trans);
10002 list_for_each_entry(tmp, &delete_list, list) {
10003 if (tmp->found_rec == 0)
10005 key.objectid = tmp->start;
10006 key.type = BTRFS_EXTENT_ITEM_KEY;
10007 key.offset = tmp->nr;
10009 /* Shouldn't happen but just in case */
10010 if (tmp->metadata) {
10011 fprintf(stderr, "Well this shouldn't happen, extent "
10012 "record overlaps but is metadata? "
10013 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10017 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10023 ret = btrfs_del_item(trans, root, &path);
10026 btrfs_release_path(&path);
10029 err = btrfs_commit_transaction(trans, root);
10033 while (!list_empty(&delete_list)) {
10034 tmp = to_extent_record(delete_list.next);
10035 list_del_init(&tmp->list);
10041 while (!list_empty(&rec->dups)) {
10042 tmp = to_extent_record(rec->dups.next);
10043 list_del_init(&tmp->list);
10047 btrfs_release_path(&path);
10049 if (!ret && !nr_del)
10050 rec->num_duplicates = 0;
10052 return ret ? ret : nr_del;
10055 static int find_possible_backrefs(struct btrfs_fs_info *info,
10056 struct btrfs_path *path,
10057 struct cache_tree *extent_cache,
10058 struct extent_record *rec)
10060 struct btrfs_root *root;
10061 struct extent_backref *back, *tmp;
10062 struct data_backref *dback;
10063 struct cache_extent *cache;
10064 struct btrfs_file_extent_item *fi;
10065 struct btrfs_key key;
10069 rbtree_postorder_for_each_entry_safe(back, tmp,
10070 &rec->backref_tree, node) {
10071 /* Don't care about full backrefs (poor unloved backrefs) */
10072 if (back->full_backref || !back->is_data)
10075 dback = to_data_backref(back);
10077 /* We found this one, we don't need to do a lookup */
10078 if (dback->found_ref)
10081 key.objectid = dback->root;
10082 key.type = BTRFS_ROOT_ITEM_KEY;
10083 key.offset = (u64)-1;
10085 root = btrfs_read_fs_root(info, &key);
10087 /* No root, definitely a bad ref, skip */
10088 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10090 /* Other err, exit */
10092 return PTR_ERR(root);
10094 key.objectid = dback->owner;
10095 key.type = BTRFS_EXTENT_DATA_KEY;
10096 key.offset = dback->offset;
10097 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10099 btrfs_release_path(path);
10102 /* Didn't find it, we can carry on */
10107 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10108 struct btrfs_file_extent_item);
10109 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10110 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10111 btrfs_release_path(path);
10112 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10114 struct extent_record *tmp;
10115 tmp = container_of(cache, struct extent_record, cache);
10118 * If we found an extent record for the bytenr for this
10119 * particular backref then we can't add it to our
10120 * current extent record. We only want to add backrefs
10121 * that don't have a corresponding extent item in the
10122 * extent tree since they likely belong to this record
10123 * and we need to fix it if it doesn't match bytenrs.
10125 if (tmp->found_rec)
10129 dback->found_ref += 1;
10130 dback->disk_bytenr = bytenr;
10131 dback->bytes = bytes;
10134 * Set this so the verify backref code knows not to trust the
10135 * values in this backref.
10144 * Record orphan data ref into corresponding root.
10146 * Return 0 if the extent item contains data ref and recorded.
10147 * Return 1 if the extent item contains no useful data ref
10148 * On that case, it may contains only shared_dataref or metadata backref
10149 * or the file extent exists(this should be handled by the extent bytenr
10150 * recovery routine)
10151 * Return <0 if something goes wrong.
10153 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10154 struct extent_record *rec)
10156 struct btrfs_key key;
10157 struct btrfs_root *dest_root;
10158 struct extent_backref *back, *tmp;
10159 struct data_backref *dback;
10160 struct orphan_data_extent *orphan;
10161 struct btrfs_path path;
10162 int recorded_data_ref = 0;
10167 btrfs_init_path(&path);
10168 rbtree_postorder_for_each_entry_safe(back, tmp,
10169 &rec->backref_tree, node) {
10170 if (back->full_backref || !back->is_data ||
10171 !back->found_extent_tree)
10173 dback = to_data_backref(back);
10174 if (dback->found_ref)
10176 key.objectid = dback->root;
10177 key.type = BTRFS_ROOT_ITEM_KEY;
10178 key.offset = (u64)-1;
10180 dest_root = btrfs_read_fs_root(fs_info, &key);
10182 /* For non-exist root we just skip it */
10183 if (IS_ERR(dest_root) || !dest_root)
10186 key.objectid = dback->owner;
10187 key.type = BTRFS_EXTENT_DATA_KEY;
10188 key.offset = dback->offset;
10190 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10191 btrfs_release_path(&path);
10193 * For ret < 0, it's OK since the fs-tree may be corrupted,
10194 * we need to record it for inode/file extent rebuild.
10195 * For ret > 0, we record it only for file extent rebuild.
10196 * For ret == 0, the file extent exists but only bytenr
10197 * mismatch, let the original bytenr fix routine to handle,
10203 orphan = malloc(sizeof(*orphan));
10208 INIT_LIST_HEAD(&orphan->list);
10209 orphan->root = dback->root;
10210 orphan->objectid = dback->owner;
10211 orphan->offset = dback->offset;
10212 orphan->disk_bytenr = rec->cache.start;
10213 orphan->disk_len = rec->cache.size;
10214 list_add(&dest_root->orphan_data_extents, &orphan->list);
10215 recorded_data_ref = 1;
10218 btrfs_release_path(&path);
10220 return !recorded_data_ref;
10226 * when an incorrect extent item is found, this will delete
10227 * all of the existing entries for it and recreate them
10228 * based on what the tree scan found.
10230 static int fixup_extent_refs(struct btrfs_fs_info *info,
10231 struct cache_tree *extent_cache,
10232 struct extent_record *rec)
10234 struct btrfs_trans_handle *trans = NULL;
10236 struct btrfs_path path;
10237 struct cache_extent *cache;
10238 struct extent_backref *back, *tmp;
10242 if (rec->flag_block_full_backref)
10243 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10245 btrfs_init_path(&path);
10246 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10248 * Sometimes the backrefs themselves are so broken they don't
10249 * get attached to any meaningful rec, so first go back and
10250 * check any of our backrefs that we couldn't find and throw
10251 * them into the list if we find the backref so that
10252 * verify_backrefs can figure out what to do.
10254 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10259 /* step one, make sure all of the backrefs agree */
10260 ret = verify_backrefs(info, &path, rec);
10264 trans = btrfs_start_transaction(info->extent_root, 1);
10265 if (IS_ERR(trans)) {
10266 ret = PTR_ERR(trans);
10270 /* step two, delete all the existing records */
10271 ret = delete_extent_records(trans, info->extent_root, &path,
10277 /* was this block corrupt? If so, don't add references to it */
10278 cache = lookup_cache_extent(info->corrupt_blocks,
10279 rec->start, rec->max_size);
10285 /* step three, recreate all the refs we did find */
10286 rbtree_postorder_for_each_entry_safe(back, tmp,
10287 &rec->backref_tree, node) {
10289 * if we didn't find any references, don't create a
10290 * new extent record
10292 if (!back->found_ref)
10295 rec->bad_full_backref = 0;
10296 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10304 int err = btrfs_commit_transaction(trans, info->extent_root);
10310 fprintf(stderr, "Repaired extent references for %llu\n",
10311 (unsigned long long)rec->start);
10313 btrfs_release_path(&path);
10317 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10318 struct extent_record *rec)
10320 struct btrfs_trans_handle *trans;
10321 struct btrfs_root *root = fs_info->extent_root;
10322 struct btrfs_path path;
10323 struct btrfs_extent_item *ei;
10324 struct btrfs_key key;
10328 key.objectid = rec->start;
10329 if (rec->metadata) {
10330 key.type = BTRFS_METADATA_ITEM_KEY;
10331 key.offset = rec->info_level;
10333 key.type = BTRFS_EXTENT_ITEM_KEY;
10334 key.offset = rec->max_size;
10337 trans = btrfs_start_transaction(root, 0);
10339 return PTR_ERR(trans);
10341 btrfs_init_path(&path);
10342 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10344 btrfs_release_path(&path);
10345 btrfs_commit_transaction(trans, root);
10348 fprintf(stderr, "Didn't find extent for %llu\n",
10349 (unsigned long long)rec->start);
10350 btrfs_release_path(&path);
10351 btrfs_commit_transaction(trans, root);
10355 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10356 struct btrfs_extent_item);
10357 flags = btrfs_extent_flags(path.nodes[0], ei);
10358 if (rec->flag_block_full_backref) {
10359 fprintf(stderr, "setting full backref on %llu\n",
10360 (unsigned long long)key.objectid);
10361 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10363 fprintf(stderr, "clearing full backref on %llu\n",
10364 (unsigned long long)key.objectid);
10365 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10367 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10368 btrfs_mark_buffer_dirty(path.nodes[0]);
10369 btrfs_release_path(&path);
10370 ret = btrfs_commit_transaction(trans, root);
10372 fprintf(stderr, "Repaired extent flags for %llu\n",
10373 (unsigned long long)rec->start);
10378 /* right now we only prune from the extent allocation tree */
10379 static int prune_one_block(struct btrfs_trans_handle *trans,
10380 struct btrfs_fs_info *info,
10381 struct btrfs_corrupt_block *corrupt)
10384 struct btrfs_path path;
10385 struct extent_buffer *eb;
10389 int level = corrupt->level + 1;
10391 btrfs_init_path(&path);
10393 /* we want to stop at the parent to our busted block */
10394 path.lowest_level = level;
10396 ret = btrfs_search_slot(trans, info->extent_root,
10397 &corrupt->key, &path, -1, 1);
10402 eb = path.nodes[level];
10409 * hopefully the search gave us the block we want to prune,
10410 * lets try that first
10412 slot = path.slots[level];
10413 found = btrfs_node_blockptr(eb, slot);
10414 if (found == corrupt->cache.start)
10417 nritems = btrfs_header_nritems(eb);
10419 /* the search failed, lets scan this node and hope we find it */
10420 for (slot = 0; slot < nritems; slot++) {
10421 found = btrfs_node_blockptr(eb, slot);
10422 if (found == corrupt->cache.start)
10426 * we couldn't find the bad block. TODO, search all the nodes for pointers
10429 if (eb == info->extent_root->node) {
10434 btrfs_release_path(&path);
10439 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10440 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10443 btrfs_release_path(&path);
10447 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10449 struct btrfs_trans_handle *trans = NULL;
10450 struct cache_extent *cache;
10451 struct btrfs_corrupt_block *corrupt;
10454 cache = search_cache_extent(info->corrupt_blocks, 0);
10458 trans = btrfs_start_transaction(info->extent_root, 1);
10460 return PTR_ERR(trans);
10462 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10463 prune_one_block(trans, info, corrupt);
10464 remove_cache_extent(info->corrupt_blocks, cache);
10467 return btrfs_commit_transaction(trans, info->extent_root);
10471 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10473 struct btrfs_block_group_cache *cache;
10478 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10479 &start, &end, EXTENT_DIRTY);
10482 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10487 cache = btrfs_lookup_first_block_group(fs_info, start);
10492 start = cache->key.objectid + cache->key.offset;
10496 static int check_extent_refs(struct btrfs_root *root,
10497 struct cache_tree *extent_cache)
10499 struct extent_record *rec;
10500 struct cache_extent *cache;
10507 * if we're doing a repair, we have to make sure
10508 * we don't allocate from the problem extents.
10509 * In the worst case, this will be all the
10510 * extents in the FS
10512 cache = search_cache_extent(extent_cache, 0);
10514 rec = container_of(cache, struct extent_record, cache);
10515 set_extent_dirty(root->fs_info->excluded_extents,
10517 rec->start + rec->max_size - 1);
10518 cache = next_cache_extent(cache);
10521 /* pin down all the corrupted blocks too */
10522 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10524 set_extent_dirty(root->fs_info->excluded_extents,
10526 cache->start + cache->size - 1);
10527 cache = next_cache_extent(cache);
10529 prune_corrupt_blocks(root->fs_info);
10530 reset_cached_block_groups(root->fs_info);
10533 reset_cached_block_groups(root->fs_info);
10536 * We need to delete any duplicate entries we find first otherwise we
10537 * could mess up the extent tree when we have backrefs that actually
10538 * belong to a different extent item and not the weird duplicate one.
10540 while (repair && !list_empty(&duplicate_extents)) {
10541 rec = to_extent_record(duplicate_extents.next);
10542 list_del_init(&rec->list);
10544 /* Sometimes we can find a backref before we find an actual
10545 * extent, so we need to process it a little bit to see if there
10546 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10547 * if this is a backref screwup. If we need to delete stuff
10548 * process_duplicates() will return 0, otherwise it will return
10551 if (process_duplicates(extent_cache, rec))
10553 ret = delete_duplicate_records(root, rec);
10557 * delete_duplicate_records will return the number of entries
10558 * deleted, so if it's greater than 0 then we know we actually
10559 * did something and we need to remove.
10572 cache = search_cache_extent(extent_cache, 0);
10575 rec = container_of(cache, struct extent_record, cache);
10576 if (rec->num_duplicates) {
10577 fprintf(stderr, "extent item %llu has multiple extent "
10578 "items\n", (unsigned long long)rec->start);
10582 if (rec->refs != rec->extent_item_refs) {
10583 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10584 (unsigned long long)rec->start,
10585 (unsigned long long)rec->nr);
10586 fprintf(stderr, "extent item %llu, found %llu\n",
10587 (unsigned long long)rec->extent_item_refs,
10588 (unsigned long long)rec->refs);
10589 ret = record_orphan_data_extents(root->fs_info, rec);
10595 if (all_backpointers_checked(rec, 1)) {
10596 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10597 (unsigned long long)rec->start,
10598 (unsigned long long)rec->nr);
10602 if (!rec->owner_ref_checked) {
10603 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10604 (unsigned long long)rec->start,
10605 (unsigned long long)rec->nr);
10610 if (repair && fix) {
10611 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10617 if (rec->bad_full_backref) {
10618 fprintf(stderr, "bad full backref, on [%llu]\n",
10619 (unsigned long long)rec->start);
10621 ret = fixup_extent_flags(root->fs_info, rec);
10629 * Although it's not a extent ref's problem, we reuse this
10630 * routine for error reporting.
10631 * No repair function yet.
10633 if (rec->crossing_stripes) {
10635 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10636 rec->start, rec->start + rec->max_size);
10640 if (rec->wrong_chunk_type) {
10642 "bad extent [%llu, %llu), type mismatch with chunk\n",
10643 rec->start, rec->start + rec->max_size);
10648 remove_cache_extent(extent_cache, cache);
10649 free_all_extent_backrefs(rec);
10650 if (!init_extent_tree && repair && (!cur_err || fix))
10651 clear_extent_dirty(root->fs_info->excluded_extents,
10653 rec->start + rec->max_size - 1);
10658 if (ret && ret != -EAGAIN) {
10659 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10662 struct btrfs_trans_handle *trans;
10664 root = root->fs_info->extent_root;
10665 trans = btrfs_start_transaction(root, 1);
10666 if (IS_ERR(trans)) {
10667 ret = PTR_ERR(trans);
10671 ret = btrfs_fix_block_accounting(trans, root);
10674 ret = btrfs_commit_transaction(trans, root);
10686 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10690 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10691 stripe_size = length;
10692 stripe_size /= num_stripes;
10693 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10694 stripe_size = length * 2;
10695 stripe_size /= num_stripes;
10696 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10697 stripe_size = length;
10698 stripe_size /= (num_stripes - 1);
10699 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10700 stripe_size = length;
10701 stripe_size /= (num_stripes - 2);
10703 stripe_size = length;
10705 return stripe_size;
10709 * Check the chunk with its block group/dev list ref:
10710 * Return 0 if all refs seems valid.
10711 * Return 1 if part of refs seems valid, need later check for rebuild ref
10712 * like missing block group and needs to search extent tree to rebuild them.
10713 * Return -1 if essential refs are missing and unable to rebuild.
10715 static int check_chunk_refs(struct chunk_record *chunk_rec,
10716 struct block_group_tree *block_group_cache,
10717 struct device_extent_tree *dev_extent_cache,
10720 struct cache_extent *block_group_item;
10721 struct block_group_record *block_group_rec;
10722 struct cache_extent *dev_extent_item;
10723 struct device_extent_record *dev_extent_rec;
10727 int metadump_v2 = 0;
10731 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10733 chunk_rec->length);
10734 if (block_group_item) {
10735 block_group_rec = container_of(block_group_item,
10736 struct block_group_record,
10738 if (chunk_rec->length != block_group_rec->offset ||
10739 chunk_rec->offset != block_group_rec->objectid ||
10741 chunk_rec->type_flags != block_group_rec->flags)) {
10744 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10745 chunk_rec->objectid,
10750 chunk_rec->type_flags,
10751 block_group_rec->objectid,
10752 block_group_rec->type,
10753 block_group_rec->offset,
10754 block_group_rec->offset,
10755 block_group_rec->objectid,
10756 block_group_rec->flags);
10759 list_del_init(&block_group_rec->list);
10760 chunk_rec->bg_rec = block_group_rec;
10765 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10766 chunk_rec->objectid,
10771 chunk_rec->type_flags);
10778 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10779 chunk_rec->num_stripes);
10780 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10781 devid = chunk_rec->stripes[i].devid;
10782 offset = chunk_rec->stripes[i].offset;
10783 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10784 devid, offset, length);
10785 if (dev_extent_item) {
10786 dev_extent_rec = container_of(dev_extent_item,
10787 struct device_extent_record,
10789 if (dev_extent_rec->objectid != devid ||
10790 dev_extent_rec->offset != offset ||
10791 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10792 dev_extent_rec->length != length) {
10795 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10796 chunk_rec->objectid,
10799 chunk_rec->stripes[i].devid,
10800 chunk_rec->stripes[i].offset,
10801 dev_extent_rec->objectid,
10802 dev_extent_rec->offset,
10803 dev_extent_rec->length);
10806 list_move(&dev_extent_rec->chunk_list,
10807 &chunk_rec->dextents);
10812 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10813 chunk_rec->objectid,
10816 chunk_rec->stripes[i].devid,
10817 chunk_rec->stripes[i].offset);
10824 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10825 int check_chunks(struct cache_tree *chunk_cache,
10826 struct block_group_tree *block_group_cache,
10827 struct device_extent_tree *dev_extent_cache,
10828 struct list_head *good, struct list_head *bad,
10829 struct list_head *rebuild, int silent)
10831 struct cache_extent *chunk_item;
10832 struct chunk_record *chunk_rec;
10833 struct block_group_record *bg_rec;
10834 struct device_extent_record *dext_rec;
10838 chunk_item = first_cache_extent(chunk_cache);
10839 while (chunk_item) {
10840 chunk_rec = container_of(chunk_item, struct chunk_record,
10842 err = check_chunk_refs(chunk_rec, block_group_cache,
10843 dev_extent_cache, silent);
10846 if (err == 0 && good)
10847 list_add_tail(&chunk_rec->list, good);
10848 if (err > 0 && rebuild)
10849 list_add_tail(&chunk_rec->list, rebuild);
10850 if (err < 0 && bad)
10851 list_add_tail(&chunk_rec->list, bad);
10852 chunk_item = next_cache_extent(chunk_item);
10855 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10858 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10866 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10870 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10871 dext_rec->objectid,
10881 static int check_device_used(struct device_record *dev_rec,
10882 struct device_extent_tree *dext_cache)
10884 struct cache_extent *cache;
10885 struct device_extent_record *dev_extent_rec;
10886 u64 total_byte = 0;
10888 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10890 dev_extent_rec = container_of(cache,
10891 struct device_extent_record,
10893 if (dev_extent_rec->objectid != dev_rec->devid)
10896 list_del_init(&dev_extent_rec->device_list);
10897 total_byte += dev_extent_rec->length;
10898 cache = next_cache_extent(cache);
10901 if (total_byte != dev_rec->byte_used) {
10903 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10904 total_byte, dev_rec->byte_used, dev_rec->objectid,
10905 dev_rec->type, dev_rec->offset);
10913 * Extra (optional) check for dev_item size to report possbile problem on a new
10916 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10918 if (!IS_ALIGNED(total_bytes, sectorsize)) {
10920 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10921 devid, total_bytes, sectorsize);
10923 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10924 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10929 * Unlike device size alignment check above, some super total_bytes check
10930 * failure can lead to mount failure for newer kernel.
10932 * So this function will return the error for a fatal super total_bytes problem.
10934 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10936 struct btrfs_device *dev;
10937 struct list_head *dev_list = &fs_info->fs_devices->devices;
10938 u64 total_bytes = 0;
10939 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10941 list_for_each_entry(dev, dev_list, dev_list)
10942 total_bytes += dev->total_bytes;
10944 /* Important check, which can cause unmountable fs */
10945 if (super_bytes < total_bytes) {
10946 error("super total bytes %llu smaller than real device(s) size %llu",
10947 super_bytes, total_bytes);
10948 error("mounting this fs may fail for newer kernels");
10949 error("this can be fixed by 'btrfs rescue fix-device-size'");
10954 * Optional check, just to make everything aligned and match with each
10957 * For a btrfs-image restored fs, we don't need to check it anyway.
10959 if (btrfs_super_flags(fs_info->super_copy) &
10960 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10962 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10963 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10964 super_bytes != total_bytes) {
10965 warning("minor unaligned/mismatch device size detected");
10967 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10972 /* check btrfs_dev_item -> btrfs_dev_extent */
10973 static int check_devices(struct rb_root *dev_cache,
10974 struct device_extent_tree *dev_extent_cache)
10976 struct rb_node *dev_node;
10977 struct device_record *dev_rec;
10978 struct device_extent_record *dext_rec;
10982 dev_node = rb_first(dev_cache);
10984 dev_rec = container_of(dev_node, struct device_record, node);
10985 err = check_device_used(dev_rec, dev_extent_cache);
10989 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10990 global_info->sectorsize);
10991 dev_node = rb_next(dev_node);
10993 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10996 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10997 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11004 static int add_root_item_to_list(struct list_head *head,
11005 u64 objectid, u64 bytenr, u64 last_snapshot,
11006 u8 level, u8 drop_level,
11007 struct btrfs_key *drop_key)
11010 struct root_item_record *ri_rec;
11011 ri_rec = malloc(sizeof(*ri_rec));
11014 ri_rec->bytenr = bytenr;
11015 ri_rec->objectid = objectid;
11016 ri_rec->level = level;
11017 ri_rec->drop_level = drop_level;
11018 ri_rec->last_snapshot = last_snapshot;
11020 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11021 list_add_tail(&ri_rec->list, head);
11026 static void free_root_item_list(struct list_head *list)
11028 struct root_item_record *ri_rec;
11030 while (!list_empty(list)) {
11031 ri_rec = list_first_entry(list, struct root_item_record,
11033 list_del_init(&ri_rec->list);
11038 static int deal_root_from_list(struct list_head *list,
11039 struct btrfs_root *root,
11040 struct block_info *bits,
11042 struct cache_tree *pending,
11043 struct cache_tree *seen,
11044 struct cache_tree *reada,
11045 struct cache_tree *nodes,
11046 struct cache_tree *extent_cache,
11047 struct cache_tree *chunk_cache,
11048 struct rb_root *dev_cache,
11049 struct block_group_tree *block_group_cache,
11050 struct device_extent_tree *dev_extent_cache)
11055 while (!list_empty(list)) {
11056 struct root_item_record *rec;
11057 struct extent_buffer *buf;
11058 rec = list_entry(list->next,
11059 struct root_item_record, list);
11061 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11062 if (!extent_buffer_uptodate(buf)) {
11063 free_extent_buffer(buf);
11067 ret = add_root_to_pending(buf, extent_cache, pending,
11068 seen, nodes, rec->objectid);
11072 * To rebuild extent tree, we need deal with snapshot
11073 * one by one, otherwise we deal with node firstly which
11074 * can maximize readahead.
11077 ret = run_next_block(root, bits, bits_nr, &last,
11078 pending, seen, reada, nodes,
11079 extent_cache, chunk_cache,
11080 dev_cache, block_group_cache,
11081 dev_extent_cache, rec);
11085 free_extent_buffer(buf);
11086 list_del(&rec->list);
11092 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11093 reada, nodes, extent_cache, chunk_cache,
11094 dev_cache, block_group_cache,
11095 dev_extent_cache, NULL);
11105 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11107 struct rb_root dev_cache;
11108 struct cache_tree chunk_cache;
11109 struct block_group_tree block_group_cache;
11110 struct device_extent_tree dev_extent_cache;
11111 struct cache_tree extent_cache;
11112 struct cache_tree seen;
11113 struct cache_tree pending;
11114 struct cache_tree reada;
11115 struct cache_tree nodes;
11116 struct extent_io_tree excluded_extents;
11117 struct cache_tree corrupt_blocks;
11118 struct btrfs_path path;
11119 struct btrfs_key key;
11120 struct btrfs_key found_key;
11122 struct block_info *bits;
11124 struct extent_buffer *leaf;
11126 struct btrfs_root_item ri;
11127 struct list_head dropping_trees;
11128 struct list_head normal_trees;
11129 struct btrfs_root *root1;
11130 struct btrfs_root *root;
11134 root = fs_info->fs_root;
11135 dev_cache = RB_ROOT;
11136 cache_tree_init(&chunk_cache);
11137 block_group_tree_init(&block_group_cache);
11138 device_extent_tree_init(&dev_extent_cache);
11140 cache_tree_init(&extent_cache);
11141 cache_tree_init(&seen);
11142 cache_tree_init(&pending);
11143 cache_tree_init(&nodes);
11144 cache_tree_init(&reada);
11145 cache_tree_init(&corrupt_blocks);
11146 extent_io_tree_init(&excluded_extents);
11147 INIT_LIST_HEAD(&dropping_trees);
11148 INIT_LIST_HEAD(&normal_trees);
11151 fs_info->excluded_extents = &excluded_extents;
11152 fs_info->fsck_extent_cache = &extent_cache;
11153 fs_info->free_extent_hook = free_extent_hook;
11154 fs_info->corrupt_blocks = &corrupt_blocks;
11158 bits = malloc(bits_nr * sizeof(struct block_info));
11164 if (ctx.progress_enabled) {
11165 ctx.tp = TASK_EXTENTS;
11166 task_start(ctx.info);
11170 root1 = fs_info->tree_root;
11171 level = btrfs_header_level(root1->node);
11172 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11173 root1->node->start, 0, level, 0, NULL);
11176 root1 = fs_info->chunk_root;
11177 level = btrfs_header_level(root1->node);
11178 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11179 root1->node->start, 0, level, 0, NULL);
11182 btrfs_init_path(&path);
11185 key.type = BTRFS_ROOT_ITEM_KEY;
11186 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11190 leaf = path.nodes[0];
11191 slot = path.slots[0];
11192 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11193 ret = btrfs_next_leaf(root, &path);
11196 leaf = path.nodes[0];
11197 slot = path.slots[0];
11199 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11200 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11201 unsigned long offset;
11204 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11205 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11206 last_snapshot = btrfs_root_last_snapshot(&ri);
11207 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11208 level = btrfs_root_level(&ri);
11209 ret = add_root_item_to_list(&normal_trees,
11210 found_key.objectid,
11211 btrfs_root_bytenr(&ri),
11212 last_snapshot, level,
11217 level = btrfs_root_level(&ri);
11218 objectid = found_key.objectid;
11219 btrfs_disk_key_to_cpu(&found_key,
11220 &ri.drop_progress);
11221 ret = add_root_item_to_list(&dropping_trees,
11223 btrfs_root_bytenr(&ri),
11224 last_snapshot, level,
11225 ri.drop_level, &found_key);
11232 btrfs_release_path(&path);
11235 * check_block can return -EAGAIN if it fixes something, please keep
11236 * this in mind when dealing with return values from these functions, if
11237 * we get -EAGAIN we want to fall through and restart the loop.
11239 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11240 &seen, &reada, &nodes, &extent_cache,
11241 &chunk_cache, &dev_cache, &block_group_cache,
11242 &dev_extent_cache);
11244 if (ret == -EAGAIN)
11248 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11249 &pending, &seen, &reada, &nodes,
11250 &extent_cache, &chunk_cache, &dev_cache,
11251 &block_group_cache, &dev_extent_cache);
11253 if (ret == -EAGAIN)
11258 ret = check_chunks(&chunk_cache, &block_group_cache,
11259 &dev_extent_cache, NULL, NULL, NULL, 0);
11261 if (ret == -EAGAIN)
11266 ret = check_extent_refs(root, &extent_cache);
11268 if (ret == -EAGAIN)
11273 ret = check_devices(&dev_cache, &dev_extent_cache);
11278 task_stop(ctx.info);
11280 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11281 extent_io_tree_cleanup(&excluded_extents);
11282 fs_info->fsck_extent_cache = NULL;
11283 fs_info->free_extent_hook = NULL;
11284 fs_info->corrupt_blocks = NULL;
11285 fs_info->excluded_extents = NULL;
11288 free_chunk_cache_tree(&chunk_cache);
11289 free_device_cache_tree(&dev_cache);
11290 free_block_group_tree(&block_group_cache);
11291 free_device_extent_tree(&dev_extent_cache);
11292 free_extent_cache_tree(&seen);
11293 free_extent_cache_tree(&pending);
11294 free_extent_cache_tree(&reada);
11295 free_extent_cache_tree(&nodes);
11296 free_root_item_list(&normal_trees);
11297 free_root_item_list(&dropping_trees);
11300 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11301 free_extent_cache_tree(&seen);
11302 free_extent_cache_tree(&pending);
11303 free_extent_cache_tree(&reada);
11304 free_extent_cache_tree(&nodes);
11305 free_chunk_cache_tree(&chunk_cache);
11306 free_block_group_tree(&block_group_cache);
11307 free_device_cache_tree(&dev_cache);
11308 free_device_extent_tree(&dev_extent_cache);
11309 free_extent_record_cache(&extent_cache);
11310 free_root_item_list(&normal_trees);
11311 free_root_item_list(&dropping_trees);
11312 extent_io_tree_cleanup(&excluded_extents);
11316 static int check_extent_inline_ref(struct extent_buffer *eb,
11317 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11320 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11323 case BTRFS_TREE_BLOCK_REF_KEY:
11324 case BTRFS_EXTENT_DATA_REF_KEY:
11325 case BTRFS_SHARED_BLOCK_REF_KEY:
11326 case BTRFS_SHARED_DATA_REF_KEY:
11330 error("extent[%llu %u %llu] has unknown ref type: %d",
11331 key->objectid, key->type, key->offset, type);
11332 ret = UNKNOWN_TYPE;
11340 * Check backrefs of a tree block given by @bytenr or @eb.
11342 * @root: the root containing the @bytenr or @eb
11343 * @eb: tree block extent buffer, can be NULL
11344 * @bytenr: bytenr of the tree block to search
11345 * @level: tree level of the tree block
11346 * @owner: owner of the tree block
11348 * Return >0 for any error found and output error message
11349 * Return 0 for no error found
11351 static int check_tree_block_ref(struct btrfs_root *root,
11352 struct extent_buffer *eb, u64 bytenr,
11353 int level, u64 owner, struct node_refs *nrefs)
11355 struct btrfs_key key;
11356 struct btrfs_root *extent_root = root->fs_info->extent_root;
11357 struct btrfs_path path;
11358 struct btrfs_extent_item *ei;
11359 struct btrfs_extent_inline_ref *iref;
11360 struct extent_buffer *leaf;
11365 int root_level = btrfs_header_level(root->node);
11367 u32 nodesize = root->fs_info->nodesize;
11376 btrfs_init_path(&path);
11377 key.objectid = bytenr;
11378 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11379 key.type = BTRFS_METADATA_ITEM_KEY;
11381 key.type = BTRFS_EXTENT_ITEM_KEY;
11382 key.offset = (u64)-1;
11384 /* Search for the backref in extent tree */
11385 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11387 err |= BACKREF_MISSING;
11390 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11392 err |= BACKREF_MISSING;
11396 leaf = path.nodes[0];
11397 slot = path.slots[0];
11398 btrfs_item_key_to_cpu(leaf, &key, slot);
11400 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11402 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11403 skinny_level = (int)key.offset;
11404 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11406 struct btrfs_tree_block_info *info;
11408 info = (struct btrfs_tree_block_info *)(ei + 1);
11409 skinny_level = btrfs_tree_block_level(leaf, info);
11410 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11419 * Due to the feature of shared tree blocks, if the upper node
11420 * is a fs root or shared node, the extent of checked node may
11421 * not be updated until the next CoW.
11424 strict = should_check_extent_strictly(root, nrefs,
11426 if (!(btrfs_extent_flags(leaf, ei) &
11427 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11429 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11430 key.objectid, nodesize,
11431 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11432 err = BACKREF_MISMATCH;
11434 header_gen = btrfs_header_generation(eb);
11435 extent_gen = btrfs_extent_generation(leaf, ei);
11436 if (header_gen != extent_gen) {
11438 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11439 key.objectid, nodesize, header_gen,
11441 err = BACKREF_MISMATCH;
11443 if (level != skinny_level) {
11445 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11446 key.objectid, nodesize, level, skinny_level);
11447 err = BACKREF_MISMATCH;
11449 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11451 "extent[%llu %u] is referred by other roots than %llu",
11452 key.objectid, nodesize, root->objectid);
11453 err = BACKREF_MISMATCH;
11458 * Iterate the extent/metadata item to find the exact backref
11460 item_size = btrfs_item_size_nr(leaf, slot);
11461 ptr = (unsigned long)iref;
11462 end = (unsigned long)ei + item_size;
11464 while (ptr < end) {
11465 iref = (struct btrfs_extent_inline_ref *)ptr;
11466 type = btrfs_extent_inline_ref_type(leaf, iref);
11467 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11469 ret = check_extent_inline_ref(leaf, &key, iref);
11474 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11475 if (offset == root->objectid)
11477 if (!strict && owner == offset)
11479 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11481 * Backref of tree reloc root points to itself, no need
11482 * to check backref any more.
11484 * This may be an error of loop backref, but extent tree
11485 * checker should have already handled it.
11486 * Here we only need to avoid infinite iteration.
11488 if (offset == bytenr) {
11492 * Check if the backref points to valid
11495 found_ref = !check_tree_block_ref( root, NULL,
11496 offset, level + 1, owner,
11503 ptr += btrfs_extent_inline_ref_size(type);
11507 * Inlined extent item doesn't have what we need, check
11508 * TREE_BLOCK_REF_KEY
11511 btrfs_release_path(&path);
11512 key.objectid = bytenr;
11513 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11514 key.offset = root->objectid;
11516 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11521 * Finally check SHARED BLOCK REF, any found will be good
11522 * Here we're not doing comprehensive extent backref checking,
11523 * only need to ensure there is some extent referring to this
11527 btrfs_release_path(&path);
11528 key.objectid = bytenr;
11529 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11530 key.offset = (u64)-1;
11532 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11534 err |= BACKREF_MISSING;
11537 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11539 err |= BACKREF_MISSING;
11545 err |= BACKREF_MISSING;
11547 btrfs_release_path(&path);
11548 if (nrefs && strict &&
11549 level < root_level && nrefs->full_backref[level + 1])
11550 parent = nrefs->bytenr[level + 1];
11551 if (eb && (err & BACKREF_MISSING))
11553 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11554 bytenr, nodesize, owner, level,
11555 parent ? "parent" : "root",
11556 parent ? parent : root->objectid);
11561 * If @err contains BACKREF_MISSING then add extent of the
11562 * file_extent_data_item.
11564 * Returns error bits after reapir.
11566 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11567 struct btrfs_root *root,
11568 struct btrfs_path *pathp,
11569 struct node_refs *nrefs,
11572 struct btrfs_file_extent_item *fi;
11573 struct btrfs_key fi_key;
11574 struct btrfs_key key;
11575 struct btrfs_extent_item *ei;
11576 struct btrfs_path path;
11577 struct btrfs_root *extent_root = root->fs_info->extent_root;
11578 struct extent_buffer *eb;
11590 eb = pathp->nodes[0];
11591 slot = pathp->slots[0];
11592 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11593 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11595 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11596 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11599 file_offset = fi_key.offset;
11600 generation = btrfs_file_extent_generation(eb, fi);
11601 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11602 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11603 extent_offset = btrfs_file_extent_offset(eb, fi);
11604 offset = file_offset - extent_offset;
11606 /* now repair only adds backref */
11607 if ((err & BACKREF_MISSING) == 0)
11610 /* search extent item */
11611 key.objectid = disk_bytenr;
11612 key.type = BTRFS_EXTENT_ITEM_KEY;
11613 key.offset = num_bytes;
11615 btrfs_init_path(&path);
11616 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11622 /* insert an extent item */
11624 key.objectid = disk_bytenr;
11625 key.type = BTRFS_EXTENT_ITEM_KEY;
11626 key.offset = num_bytes;
11627 size = sizeof(*ei);
11629 btrfs_release_path(&path);
11630 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11634 eb = path.nodes[0];
11635 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11637 btrfs_set_extent_refs(eb, ei, 0);
11638 btrfs_set_extent_generation(eb, ei, generation);
11639 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11641 btrfs_mark_buffer_dirty(eb);
11642 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11644 btrfs_release_path(&path);
11647 if (nrefs->full_backref[0])
11648 parent = btrfs_header_bytenr(eb);
11652 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11654 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11658 "failed to increase extent data backref[%llu %llu] root %llu",
11659 disk_bytenr, num_bytes, root->objectid);
11662 printf("Add one extent data backref [%llu %llu]\n",
11663 disk_bytenr, num_bytes);
11666 err &= ~BACKREF_MISSING;
11669 error("can't repair root %llu extent data item[%llu %llu]",
11670 root->objectid, disk_bytenr, num_bytes);
11675 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11677 * Return >0 any error found and output error message
11678 * Return 0 for no error found
11680 static int check_extent_data_item(struct btrfs_root *root,
11681 struct btrfs_path *pathp,
11682 struct node_refs *nrefs, int account_bytes)
11684 struct btrfs_file_extent_item *fi;
11685 struct extent_buffer *eb = pathp->nodes[0];
11686 struct btrfs_path path;
11687 struct btrfs_root *extent_root = root->fs_info->extent_root;
11688 struct btrfs_key fi_key;
11689 struct btrfs_key dbref_key;
11690 struct extent_buffer *leaf;
11691 struct btrfs_extent_item *ei;
11692 struct btrfs_extent_inline_ref *iref;
11693 struct btrfs_extent_data_ref *dref;
11696 u64 disk_num_bytes;
11697 u64 extent_num_bytes;
11704 int found_dbackref = 0;
11705 int slot = pathp->slots[0];
11710 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11711 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11713 /* Nothing to check for hole and inline data extents */
11714 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11715 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11718 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11719 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11720 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11721 offset = btrfs_file_extent_offset(eb, fi);
11723 /* Check unaligned disk_num_bytes and num_bytes */
11724 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11726 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11727 fi_key.objectid, fi_key.offset, disk_num_bytes,
11728 root->fs_info->sectorsize);
11729 err |= BYTES_UNALIGNED;
11730 } else if (account_bytes) {
11731 data_bytes_allocated += disk_num_bytes;
11733 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11735 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11736 fi_key.objectid, fi_key.offset, extent_num_bytes,
11737 root->fs_info->sectorsize);
11738 err |= BYTES_UNALIGNED;
11739 } else if (account_bytes) {
11740 data_bytes_referenced += extent_num_bytes;
11742 owner = btrfs_header_owner(eb);
11744 /* Check the extent item of the file extent in extent tree */
11745 btrfs_init_path(&path);
11746 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11747 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11748 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11750 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11754 leaf = path.nodes[0];
11755 slot = path.slots[0];
11756 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11758 extent_flags = btrfs_extent_flags(leaf, ei);
11760 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11762 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11763 disk_bytenr, disk_num_bytes,
11764 BTRFS_EXTENT_FLAG_DATA);
11765 err |= BACKREF_MISMATCH;
11768 /* Check data backref inside that extent item */
11769 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11770 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11771 ptr = (unsigned long)iref;
11772 end = (unsigned long)ei + item_size;
11773 strict = should_check_extent_strictly(root, nrefs, -1);
11775 while (ptr < end) {
11779 bool match = false;
11781 iref = (struct btrfs_extent_inline_ref *)ptr;
11782 type = btrfs_extent_inline_ref_type(leaf, iref);
11783 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11785 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11790 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11791 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11792 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11793 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11795 if (ref_objectid == fi_key.objectid &&
11796 ref_offset == fi_key.offset - offset)
11798 if (ref_root == root->objectid && match)
11799 found_dbackref = 1;
11800 else if (!strict && owner == ref_root && match)
11801 found_dbackref = 1;
11802 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11803 found_dbackref = !check_tree_block_ref(root, NULL,
11804 btrfs_extent_inline_ref_offset(leaf, iref),
11808 if (found_dbackref)
11810 ptr += btrfs_extent_inline_ref_size(type);
11813 if (!found_dbackref) {
11814 btrfs_release_path(&path);
11816 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11817 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11818 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11819 dbref_key.offset = hash_extent_data_ref(root->objectid,
11820 fi_key.objectid, fi_key.offset - offset);
11822 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11823 &dbref_key, &path, 0, 0);
11825 found_dbackref = 1;
11829 btrfs_release_path(&path);
11832 * Neither inlined nor EXTENT_DATA_REF found, try
11833 * SHARED_DATA_REF as last chance.
11835 dbref_key.objectid = disk_bytenr;
11836 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11837 dbref_key.offset = eb->start;
11839 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11840 &dbref_key, &path, 0, 0);
11842 found_dbackref = 1;
11848 if (!found_dbackref)
11849 err |= BACKREF_MISSING;
11850 btrfs_release_path(&path);
11851 if (err & BACKREF_MISSING) {
11852 error("data extent[%llu %llu] backref lost",
11853 disk_bytenr, disk_num_bytes);
11859 * Get real tree block level for the case like shared block
11860 * Return >= 0 as tree level
11861 * Return <0 for error
11863 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11865 struct extent_buffer *eb;
11866 struct btrfs_path path;
11867 struct btrfs_key key;
11868 struct btrfs_extent_item *ei;
11875 /* Search extent tree for extent generation and level */
11876 key.objectid = bytenr;
11877 key.type = BTRFS_METADATA_ITEM_KEY;
11878 key.offset = (u64)-1;
11880 btrfs_init_path(&path);
11881 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11884 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11892 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11893 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11894 struct btrfs_extent_item);
11895 flags = btrfs_extent_flags(path.nodes[0], ei);
11896 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11901 /* Get transid for later read_tree_block() check */
11902 transid = btrfs_extent_generation(path.nodes[0], ei);
11904 /* Get backref level as one source */
11905 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11906 backref_level = key.offset;
11908 struct btrfs_tree_block_info *info;
11910 info = (struct btrfs_tree_block_info *)(ei + 1);
11911 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11913 btrfs_release_path(&path);
11915 /* Get level from tree block as an alternative source */
11916 eb = read_tree_block(fs_info, bytenr, transid);
11917 if (!extent_buffer_uptodate(eb)) {
11918 free_extent_buffer(eb);
11921 header_level = btrfs_header_level(eb);
11922 free_extent_buffer(eb);
11924 if (header_level != backref_level)
11926 return header_level;
11929 btrfs_release_path(&path);
11934 * Check if a tree block backref is valid (points to a valid tree block)
11935 * if level == -1, level will be resolved
11936 * Return >0 for any error found and print error message
11938 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11939 u64 bytenr, int level)
11941 struct btrfs_root *root;
11942 struct btrfs_key key;
11943 struct btrfs_path path;
11944 struct extent_buffer *eb;
11945 struct extent_buffer *node;
11946 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11950 /* Query level for level == -1 special case */
11952 level = query_tree_block_level(fs_info, bytenr);
11954 err |= REFERENCER_MISSING;
11958 key.objectid = root_id;
11959 key.type = BTRFS_ROOT_ITEM_KEY;
11960 key.offset = (u64)-1;
11962 root = btrfs_read_fs_root(fs_info, &key);
11963 if (IS_ERR(root)) {
11964 err |= REFERENCER_MISSING;
11968 /* Read out the tree block to get item/node key */
11969 eb = read_tree_block(fs_info, bytenr, 0);
11970 if (!extent_buffer_uptodate(eb)) {
11971 err |= REFERENCER_MISSING;
11972 free_extent_buffer(eb);
11976 /* Empty tree, no need to check key */
11977 if (!btrfs_header_nritems(eb) && !level) {
11978 free_extent_buffer(eb);
11983 btrfs_node_key_to_cpu(eb, &key, 0);
11985 btrfs_item_key_to_cpu(eb, &key, 0);
11987 free_extent_buffer(eb);
11989 btrfs_init_path(&path);
11990 path.lowest_level = level;
11991 /* Search with the first key, to ensure we can reach it */
11992 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11994 err |= REFERENCER_MISSING;
11998 node = path.nodes[level];
11999 if (btrfs_header_bytenr(node) != bytenr) {
12001 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12002 bytenr, nodesize, bytenr,
12003 btrfs_header_bytenr(node));
12004 err |= REFERENCER_MISMATCH;
12006 if (btrfs_header_level(node) != level) {
12008 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12009 bytenr, nodesize, level,
12010 btrfs_header_level(node));
12011 err |= REFERENCER_MISMATCH;
12015 btrfs_release_path(&path);
12017 if (err & REFERENCER_MISSING) {
12019 error("extent [%llu %d] lost referencer (owner: %llu)",
12020 bytenr, nodesize, root_id);
12023 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12024 bytenr, nodesize, root_id, level);
12031 * Check if tree block @eb is tree reloc root.
12032 * Return 0 if it's not or any problem happens
12033 * Return 1 if it's a tree reloc root
12035 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12036 struct extent_buffer *eb)
12038 struct btrfs_root *tree_reloc_root;
12039 struct btrfs_key key;
12040 u64 bytenr = btrfs_header_bytenr(eb);
12041 u64 owner = btrfs_header_owner(eb);
12044 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12045 key.offset = owner;
12046 key.type = BTRFS_ROOT_ITEM_KEY;
12048 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12049 if (IS_ERR(tree_reloc_root))
12052 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12054 btrfs_free_fs_root(tree_reloc_root);
12059 * Check referencer for shared block backref
12060 * If level == -1, this function will resolve the level.
12062 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12063 u64 parent, u64 bytenr, int level)
12065 struct extent_buffer *eb;
12067 int found_parent = 0;
12070 eb = read_tree_block(fs_info, parent, 0);
12071 if (!extent_buffer_uptodate(eb))
12075 level = query_tree_block_level(fs_info, bytenr);
12079 /* It's possible it's a tree reloc root */
12080 if (parent == bytenr) {
12081 if (is_tree_reloc_root(fs_info, eb))
12086 if (level + 1 != btrfs_header_level(eb))
12089 nr = btrfs_header_nritems(eb);
12090 for (i = 0; i < nr; i++) {
12091 if (bytenr == btrfs_node_blockptr(eb, i)) {
12097 free_extent_buffer(eb);
12098 if (!found_parent) {
12100 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12101 bytenr, fs_info->nodesize, parent, level);
12102 return REFERENCER_MISSING;
12108 * Check referencer for normal (inlined) data ref
12109 * If len == 0, it will be resolved by searching in extent tree
12111 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12112 u64 root_id, u64 objectid, u64 offset,
12113 u64 bytenr, u64 len, u32 count)
12115 struct btrfs_root *root;
12116 struct btrfs_root *extent_root = fs_info->extent_root;
12117 struct btrfs_key key;
12118 struct btrfs_path path;
12119 struct extent_buffer *leaf;
12120 struct btrfs_file_extent_item *fi;
12121 u32 found_count = 0;
12126 key.objectid = bytenr;
12127 key.type = BTRFS_EXTENT_ITEM_KEY;
12128 key.offset = (u64)-1;
12130 btrfs_init_path(&path);
12131 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12134 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12137 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12138 if (key.objectid != bytenr ||
12139 key.type != BTRFS_EXTENT_ITEM_KEY)
12142 btrfs_release_path(&path);
12144 key.objectid = root_id;
12145 key.type = BTRFS_ROOT_ITEM_KEY;
12146 key.offset = (u64)-1;
12147 btrfs_init_path(&path);
12149 root = btrfs_read_fs_root(fs_info, &key);
12153 key.objectid = objectid;
12154 key.type = BTRFS_EXTENT_DATA_KEY;
12156 * It can be nasty as data backref offset is
12157 * file offset - file extent offset, which is smaller or
12158 * equal to original backref offset. The only special case is
12159 * overflow. So we need to special check and do further search.
12161 key.offset = offset & (1ULL << 63) ? 0 : offset;
12163 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12168 * Search afterwards to get correct one
12169 * NOTE: As we must do a comprehensive check on the data backref to
12170 * make sure the dref count also matches, we must iterate all file
12171 * extents for that inode.
12174 leaf = path.nodes[0];
12175 slot = path.slots[0];
12177 if (slot >= btrfs_header_nritems(leaf) ||
12178 btrfs_header_owner(leaf) != root_id)
12180 btrfs_item_key_to_cpu(leaf, &key, slot);
12181 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12183 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12185 * Except normal disk bytenr and disk num bytes, we still
12186 * need to do extra check on dbackref offset as
12187 * dbackref offset = file_offset - file_extent_offset
12189 * Also, we must check the leaf owner.
12190 * In case of shared tree blocks (snapshots) we can inherit
12191 * leaves from source snapshot.
12192 * In that case, reference from source snapshot should not
12195 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12196 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12197 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12198 offset && btrfs_header_owner(leaf) == root_id)
12202 ret = btrfs_next_item(root, &path);
12207 btrfs_release_path(&path);
12208 if (found_count != count) {
12210 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12211 bytenr, len, root_id, objectid, offset, count, found_count);
12212 return REFERENCER_MISSING;
12218 * Check if the referencer of a shared data backref exists
12220 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12221 u64 parent, u64 bytenr)
12223 struct extent_buffer *eb;
12224 struct btrfs_key key;
12225 struct btrfs_file_extent_item *fi;
12227 int found_parent = 0;
12230 eb = read_tree_block(fs_info, parent, 0);
12231 if (!extent_buffer_uptodate(eb))
12234 nr = btrfs_header_nritems(eb);
12235 for (i = 0; i < nr; i++) {
12236 btrfs_item_key_to_cpu(eb, &key, i);
12237 if (key.type != BTRFS_EXTENT_DATA_KEY)
12240 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12241 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12244 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12251 free_extent_buffer(eb);
12252 if (!found_parent) {
12253 error("shared extent %llu referencer lost (parent: %llu)",
12255 return REFERENCER_MISSING;
12261 * Only delete backref if REFERENCER_MISSING now
12263 * Returns <0 the extent was deleted
12264 * Returns >0 the backref was deleted but extent still exists, returned value
12265 * means error after repair
12266 * Returns 0 nothing happened
12268 static int repair_extent_item(struct btrfs_trans_handle *trans,
12269 struct btrfs_root *root, struct btrfs_path *path,
12270 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12271 u64 owner, u64 offset, int err)
12273 struct btrfs_key old_key;
12277 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12279 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12280 /* delete the backref */
12281 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12282 num_bytes, parent, root_objectid, owner, offset);
12285 err &= ~REFERENCER_MISSING;
12286 printf("Delete backref in extent [%llu %llu]\n",
12287 bytenr, num_bytes);
12289 error("fail to delete backref in extent [%llu %llu]",
12290 bytenr, num_bytes);
12294 /* btrfs_free_extent may delete the extent */
12295 btrfs_release_path(path);
12296 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12306 * This function will check a given extent item, including its backref and
12307 * itself (like crossing stripe boundary and type)
12309 * Since we don't use extent_record anymore, introduce new error bit
12311 static int check_extent_item(struct btrfs_trans_handle *trans,
12312 struct btrfs_fs_info *fs_info,
12313 struct btrfs_path *path)
12315 struct btrfs_extent_item *ei;
12316 struct btrfs_extent_inline_ref *iref;
12317 struct btrfs_extent_data_ref *dref;
12318 struct extent_buffer *eb = path->nodes[0];
12321 int slot = path->slots[0];
12323 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12324 u32 item_size = btrfs_item_size_nr(eb, slot);
12334 struct btrfs_key key;
12338 btrfs_item_key_to_cpu(eb, &key, slot);
12339 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12340 bytes_used += key.offset;
12341 num_bytes = key.offset;
12343 bytes_used += nodesize;
12344 num_bytes = nodesize;
12347 if (item_size < sizeof(*ei)) {
12349 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12350 * old thing when on disk format is still un-determined.
12351 * No need to care about it anymore
12353 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12357 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12358 flags = btrfs_extent_flags(eb, ei);
12360 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12362 if (metadata && check_crossing_stripes(global_info, key.objectid,
12364 error("bad metadata [%llu, %llu) crossing stripe boundary",
12365 key.objectid, key.objectid + nodesize);
12366 err |= CROSSING_STRIPE_BOUNDARY;
12369 ptr = (unsigned long)(ei + 1);
12371 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12372 /* Old EXTENT_ITEM metadata */
12373 struct btrfs_tree_block_info *info;
12375 info = (struct btrfs_tree_block_info *)ptr;
12376 level = btrfs_tree_block_level(eb, info);
12377 ptr += sizeof(struct btrfs_tree_block_info);
12379 /* New METADATA_ITEM */
12380 level = key.offset;
12382 end = (unsigned long)ei + item_size;
12385 /* Reached extent item end normally */
12389 /* Beyond extent item end, wrong item size */
12391 err |= ITEM_SIZE_MISMATCH;
12392 error("extent item at bytenr %llu slot %d has wrong size",
12401 /* Now check every backref in this extent item */
12402 iref = (struct btrfs_extent_inline_ref *)ptr;
12403 type = btrfs_extent_inline_ref_type(eb, iref);
12404 offset = btrfs_extent_inline_ref_offset(eb, iref);
12406 case BTRFS_TREE_BLOCK_REF_KEY:
12407 root_objectid = offset;
12409 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12413 case BTRFS_SHARED_BLOCK_REF_KEY:
12415 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12419 case BTRFS_EXTENT_DATA_REF_KEY:
12420 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12421 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12422 owner = btrfs_extent_data_ref_objectid(eb, dref);
12423 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12424 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12425 owner_offset, key.objectid, key.offset,
12426 btrfs_extent_data_ref_count(eb, dref));
12429 case BTRFS_SHARED_DATA_REF_KEY:
12431 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12435 error("extent[%llu %d %llu] has unknown ref type: %d",
12436 key.objectid, key.type, key.offset, type);
12437 ret = UNKNOWN_TYPE;
12442 if (err && repair) {
12443 ret = repair_extent_item(trans, fs_info->extent_root, path,
12444 key.objectid, num_bytes, parent, root_objectid,
12445 owner, owner_offset, ret);
12454 ptr += btrfs_extent_inline_ref_size(type);
12462 * Check if a dev extent item is referred correctly by its chunk
12464 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12465 struct extent_buffer *eb, int slot)
12467 struct btrfs_root *chunk_root = fs_info->chunk_root;
12468 struct btrfs_dev_extent *ptr;
12469 struct btrfs_path path;
12470 struct btrfs_key chunk_key;
12471 struct btrfs_key devext_key;
12472 struct btrfs_chunk *chunk;
12473 struct extent_buffer *l;
12477 int found_chunk = 0;
12480 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12481 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12482 length = btrfs_dev_extent_length(eb, ptr);
12484 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12485 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12486 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12488 btrfs_init_path(&path);
12489 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12494 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12495 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12500 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12503 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12504 for (i = 0; i < num_stripes; i++) {
12505 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12506 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12508 if (devid == devext_key.objectid &&
12509 offset == devext_key.offset) {
12515 btrfs_release_path(&path);
12516 if (!found_chunk) {
12518 "device extent[%llu, %llu, %llu] did not find the related chunk",
12519 devext_key.objectid, devext_key.offset, length);
12520 return REFERENCER_MISSING;
12526 * Check if the used space is correct with the dev item
12528 static int check_dev_item(struct btrfs_fs_info *fs_info,
12529 struct extent_buffer *eb, int slot)
12531 struct btrfs_root *dev_root = fs_info->dev_root;
12532 struct btrfs_dev_item *dev_item;
12533 struct btrfs_path path;
12534 struct btrfs_key key;
12535 struct btrfs_dev_extent *ptr;
12542 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12543 dev_id = btrfs_device_id(eb, dev_item);
12544 used = btrfs_device_bytes_used(eb, dev_item);
12545 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12547 key.objectid = dev_id;
12548 key.type = BTRFS_DEV_EXTENT_KEY;
12551 btrfs_init_path(&path);
12552 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12554 btrfs_item_key_to_cpu(eb, &key, slot);
12555 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12556 key.objectid, key.type, key.offset);
12557 btrfs_release_path(&path);
12558 return REFERENCER_MISSING;
12561 /* Iterate dev_extents to calculate the used space of a device */
12563 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12566 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12567 if (key.objectid > dev_id)
12569 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12572 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12573 struct btrfs_dev_extent);
12574 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12576 ret = btrfs_next_item(dev_root, &path);
12580 btrfs_release_path(&path);
12582 if (used != total) {
12583 btrfs_item_key_to_cpu(eb, &key, slot);
12585 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12586 total, used, BTRFS_ROOT_TREE_OBJECTID,
12587 BTRFS_DEV_EXTENT_KEY, dev_id);
12588 return ACCOUNTING_MISMATCH;
12590 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12596 * Check a block group item with its referener (chunk) and its used space
12597 * with extent/metadata item
12599 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12600 struct extent_buffer *eb, int slot)
12602 struct btrfs_root *extent_root = fs_info->extent_root;
12603 struct btrfs_root *chunk_root = fs_info->chunk_root;
12604 struct btrfs_block_group_item *bi;
12605 struct btrfs_block_group_item bg_item;
12606 struct btrfs_path path;
12607 struct btrfs_key bg_key;
12608 struct btrfs_key chunk_key;
12609 struct btrfs_key extent_key;
12610 struct btrfs_chunk *chunk;
12611 struct extent_buffer *leaf;
12612 struct btrfs_extent_item *ei;
12613 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12621 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12622 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12623 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12624 used = btrfs_block_group_used(&bg_item);
12625 bg_flags = btrfs_block_group_flags(&bg_item);
12627 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12628 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12629 chunk_key.offset = bg_key.objectid;
12631 btrfs_init_path(&path);
12632 /* Search for the referencer chunk */
12633 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12636 "block group[%llu %llu] did not find the related chunk item",
12637 bg_key.objectid, bg_key.offset);
12638 err |= REFERENCER_MISSING;
12640 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12641 struct btrfs_chunk);
12642 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12645 "block group[%llu %llu] related chunk item length does not match",
12646 bg_key.objectid, bg_key.offset);
12647 err |= REFERENCER_MISMATCH;
12650 btrfs_release_path(&path);
12652 /* Search from the block group bytenr */
12653 extent_key.objectid = bg_key.objectid;
12654 extent_key.type = 0;
12655 extent_key.offset = 0;
12657 btrfs_init_path(&path);
12658 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12662 /* Iterate extent tree to account used space */
12664 leaf = path.nodes[0];
12666 /* Search slot can point to the last item beyond leaf nritems */
12667 if (path.slots[0] >= btrfs_header_nritems(leaf))
12670 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12671 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12674 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12675 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12677 if (extent_key.objectid < bg_key.objectid)
12680 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12683 total += extent_key.offset;
12685 ei = btrfs_item_ptr(leaf, path.slots[0],
12686 struct btrfs_extent_item);
12687 flags = btrfs_extent_flags(leaf, ei);
12688 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12689 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12691 "bad extent[%llu, %llu) type mismatch with chunk",
12692 extent_key.objectid,
12693 extent_key.objectid + extent_key.offset);
12694 err |= CHUNK_TYPE_MISMATCH;
12696 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12697 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12698 BTRFS_BLOCK_GROUP_METADATA))) {
12700 "bad extent[%llu, %llu) type mismatch with chunk",
12701 extent_key.objectid,
12702 extent_key.objectid + nodesize);
12703 err |= CHUNK_TYPE_MISMATCH;
12707 ret = btrfs_next_item(extent_root, &path);
12713 btrfs_release_path(&path);
12715 if (total != used) {
12717 "block group[%llu %llu] used %llu but extent items used %llu",
12718 bg_key.objectid, bg_key.offset, used, total);
12719 err |= BG_ACCOUNTING_ERROR;
12725 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12726 * FIXME: We still need to repair error of dev_item.
12728 * Returns error after repair.
12730 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12731 struct btrfs_root *chunk_root,
12732 struct btrfs_path *path, int err)
12734 struct btrfs_chunk *chunk;
12735 struct btrfs_key chunk_key;
12736 struct extent_buffer *eb = path->nodes[0];
12738 int slot = path->slots[0];
12742 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12743 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12745 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12746 type = btrfs_chunk_type(path->nodes[0], chunk);
12747 length = btrfs_chunk_length(eb, chunk);
12749 if (err & REFERENCER_MISSING) {
12750 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12751 type, chunk_key.offset, length);
12753 error("fail to add block group item[%llu %llu]",
12754 chunk_key.offset, length);
12757 err &= ~REFERENCER_MISSING;
12758 printf("Added block group item[%llu %llu]\n",
12759 chunk_key.offset, length);
12768 * Check a chunk item.
12769 * Including checking all referred dev_extents and block group
12771 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12772 struct extent_buffer *eb, int slot)
12774 struct btrfs_root *extent_root = fs_info->extent_root;
12775 struct btrfs_root *dev_root = fs_info->dev_root;
12776 struct btrfs_path path;
12777 struct btrfs_key chunk_key;
12778 struct btrfs_key bg_key;
12779 struct btrfs_key devext_key;
12780 struct btrfs_chunk *chunk;
12781 struct extent_buffer *leaf;
12782 struct btrfs_block_group_item *bi;
12783 struct btrfs_block_group_item bg_item;
12784 struct btrfs_dev_extent *ptr;
12796 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12797 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12798 length = btrfs_chunk_length(eb, chunk);
12799 chunk_end = chunk_key.offset + length;
12800 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12803 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12805 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12808 type = btrfs_chunk_type(eb, chunk);
12810 bg_key.objectid = chunk_key.offset;
12811 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12812 bg_key.offset = length;
12814 btrfs_init_path(&path);
12815 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12818 "chunk[%llu %llu) did not find the related block group item",
12819 chunk_key.offset, chunk_end);
12820 err |= REFERENCER_MISSING;
12822 leaf = path.nodes[0];
12823 bi = btrfs_item_ptr(leaf, path.slots[0],
12824 struct btrfs_block_group_item);
12825 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12827 if (btrfs_block_group_flags(&bg_item) != type) {
12829 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12830 chunk_key.offset, chunk_end, type,
12831 btrfs_block_group_flags(&bg_item));
12832 err |= REFERENCER_MISSING;
12836 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12837 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12838 for (i = 0; i < num_stripes; i++) {
12839 btrfs_release_path(&path);
12840 btrfs_init_path(&path);
12841 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12842 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12843 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12845 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12848 goto not_match_dev;
12850 leaf = path.nodes[0];
12851 ptr = btrfs_item_ptr(leaf, path.slots[0],
12852 struct btrfs_dev_extent);
12853 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12854 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12855 if (objectid != chunk_key.objectid ||
12856 offset != chunk_key.offset ||
12857 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12858 goto not_match_dev;
12861 err |= BACKREF_MISSING;
12863 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12864 chunk_key.objectid, chunk_end, i);
12867 btrfs_release_path(&path);
12872 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12873 struct btrfs_root *root,
12874 struct btrfs_path *path)
12876 struct btrfs_key key;
12879 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12880 btrfs_release_path(path);
12881 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12887 ret = btrfs_del_item(trans, root, path);
12891 if (path->slots[0] == 0)
12892 btrfs_prev_leaf(root, path);
12897 error("failed to delete root %llu item[%llu, %u, %llu]",
12898 root->objectid, key.objectid, key.type, key.offset);
12900 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12901 root->objectid, key.objectid, key.type, key.offset);
12906 * Main entry function to check known items and update related accounting info
12908 static int check_leaf_items(struct btrfs_trans_handle *trans,
12909 struct btrfs_root *root, struct btrfs_path *path,
12910 struct node_refs *nrefs, int account_bytes)
12912 struct btrfs_fs_info *fs_info = root->fs_info;
12913 struct btrfs_key key;
12914 struct extent_buffer *eb;
12917 struct btrfs_extent_data_ref *dref;
12922 eb = path->nodes[0];
12923 slot = path->slots[0];
12924 if (slot >= btrfs_header_nritems(eb)) {
12926 error("empty leaf [%llu %u] root %llu", eb->start,
12927 root->fs_info->nodesize, root->objectid);
12933 btrfs_item_key_to_cpu(eb, &key, slot);
12937 case BTRFS_EXTENT_DATA_KEY:
12938 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12940 ret = repair_extent_data_item(trans, root, path, nrefs,
12944 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12945 ret = check_block_group_item(fs_info, eb, slot);
12947 ret & REFERENCER_MISSING)
12948 ret = delete_extent_tree_item(trans, root, path);
12951 case BTRFS_DEV_ITEM_KEY:
12952 ret = check_dev_item(fs_info, eb, slot);
12955 case BTRFS_CHUNK_ITEM_KEY:
12956 ret = check_chunk_item(fs_info, eb, slot);
12958 ret = repair_chunk_item(trans, root, path, ret);
12961 case BTRFS_DEV_EXTENT_KEY:
12962 ret = check_dev_extent_item(fs_info, eb, slot);
12965 case BTRFS_EXTENT_ITEM_KEY:
12966 case BTRFS_METADATA_ITEM_KEY:
12967 ret = check_extent_item(trans, fs_info, path);
12970 case BTRFS_EXTENT_CSUM_KEY:
12971 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12974 case BTRFS_TREE_BLOCK_REF_KEY:
12975 ret = check_tree_block_backref(fs_info, key.offset,
12978 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12979 ret = delete_extent_tree_item(trans, root, path);
12982 case BTRFS_EXTENT_DATA_REF_KEY:
12983 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12984 ret = check_extent_data_backref(fs_info,
12985 btrfs_extent_data_ref_root(eb, dref),
12986 btrfs_extent_data_ref_objectid(eb, dref),
12987 btrfs_extent_data_ref_offset(eb, dref),
12989 btrfs_extent_data_ref_count(eb, dref));
12991 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12992 ret = delete_extent_tree_item(trans, root, path);
12995 case BTRFS_SHARED_BLOCK_REF_KEY:
12996 ret = check_shared_block_backref(fs_info, key.offset,
12999 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13000 ret = delete_extent_tree_item(trans, root, path);
13003 case BTRFS_SHARED_DATA_REF_KEY:
13004 ret = check_shared_data_backref(fs_info, key.offset,
13007 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13008 ret = delete_extent_tree_item(trans, root, path);
13022 * Low memory usage version check_chunks_and_extents.
13024 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13026 struct btrfs_trans_handle *trans = NULL;
13027 struct btrfs_path path;
13028 struct btrfs_key old_key;
13029 struct btrfs_key key;
13030 struct btrfs_root *root1;
13031 struct btrfs_root *root;
13032 struct btrfs_root *cur_root;
13036 root = fs_info->fs_root;
13039 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13040 if (IS_ERR(trans)) {
13041 error("failed to start transaction before check");
13042 return PTR_ERR(trans);
13046 root1 = root->fs_info->chunk_root;
13047 ret = check_btrfs_root(trans, root1, 0, 1);
13050 root1 = root->fs_info->tree_root;
13051 ret = check_btrfs_root(trans, root1, 0, 1);
13054 btrfs_init_path(&path);
13055 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13057 key.type = BTRFS_ROOT_ITEM_KEY;
13059 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13061 error("cannot find extent tree in tree_root");
13066 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13067 if (key.type != BTRFS_ROOT_ITEM_KEY)
13070 key.offset = (u64)-1;
13072 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13073 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13076 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13077 if (IS_ERR(cur_root) || !cur_root) {
13078 error("failed to read tree: %lld", key.objectid);
13082 ret = check_btrfs_root(trans, cur_root, 0, 1);
13085 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13086 btrfs_free_fs_root(cur_root);
13088 btrfs_release_path(&path);
13089 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13090 &old_key, &path, 0, 0);
13094 ret = btrfs_next_item(root1, &path);
13100 /* if repair, update block accounting */
13102 ret = btrfs_fix_block_accounting(trans, root);
13106 err &= ~BG_ACCOUNTING_ERROR;
13110 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13112 btrfs_release_path(&path);
13117 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13121 if (!ctx.progress_enabled)
13122 fprintf(stderr, "checking extents\n");
13123 if (check_mode == CHECK_MODE_LOWMEM)
13124 ret = check_chunks_and_extents_v2(fs_info);
13126 ret = check_chunks_and_extents(fs_info);
13128 /* Also repair device size related problems */
13129 if (repair && !ret) {
13130 ret = btrfs_fix_device_and_super_size(fs_info);
13137 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13138 struct btrfs_root *root, int overwrite)
13140 struct extent_buffer *c;
13141 struct extent_buffer *old = root->node;
13144 struct btrfs_disk_key disk_key = {0,0,0};
13150 extent_buffer_get(c);
13153 c = btrfs_alloc_free_block(trans, root,
13154 root->fs_info->nodesize,
13155 root->root_key.objectid,
13156 &disk_key, level, 0, 0);
13159 extent_buffer_get(c);
13163 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13164 btrfs_set_header_level(c, level);
13165 btrfs_set_header_bytenr(c, c->start);
13166 btrfs_set_header_generation(c, trans->transid);
13167 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13168 btrfs_set_header_owner(c, root->root_key.objectid);
13170 write_extent_buffer(c, root->fs_info->fsid,
13171 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13173 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13174 btrfs_header_chunk_tree_uuid(c),
13177 btrfs_mark_buffer_dirty(c);
13179 * this case can happen in the following case:
13181 * 1.overwrite previous root.
13183 * 2.reinit reloc data root, this is because we skip pin
13184 * down reloc data tree before which means we can allocate
13185 * same block bytenr here.
13187 if (old->start == c->start) {
13188 btrfs_set_root_generation(&root->root_item,
13190 root->root_item.level = btrfs_header_level(root->node);
13191 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13192 &root->root_key, &root->root_item);
13194 free_extent_buffer(c);
13198 free_extent_buffer(old);
13200 add_root_to_dirty_list(root);
13204 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13205 struct extent_buffer *eb, int tree_root)
13207 struct extent_buffer *tmp;
13208 struct btrfs_root_item *ri;
13209 struct btrfs_key key;
13211 int level = btrfs_header_level(eb);
13217 * If we have pinned this block before, don't pin it again.
13218 * This can not only avoid forever loop with broken filesystem
13219 * but also give us some speedups.
13221 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13222 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13225 btrfs_pin_extent(fs_info, eb->start, eb->len);
13227 nritems = btrfs_header_nritems(eb);
13228 for (i = 0; i < nritems; i++) {
13230 btrfs_item_key_to_cpu(eb, &key, i);
13231 if (key.type != BTRFS_ROOT_ITEM_KEY)
13233 /* Skip the extent root and reloc roots */
13234 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13235 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13236 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13238 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13239 bytenr = btrfs_disk_root_bytenr(eb, ri);
13242 * If at any point we start needing the real root we
13243 * will have to build a stump root for the root we are
13244 * in, but for now this doesn't actually use the root so
13245 * just pass in extent_root.
13247 tmp = read_tree_block(fs_info, bytenr, 0);
13248 if (!extent_buffer_uptodate(tmp)) {
13249 fprintf(stderr, "Error reading root block\n");
13252 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13253 free_extent_buffer(tmp);
13257 bytenr = btrfs_node_blockptr(eb, i);
13259 /* If we aren't the tree root don't read the block */
13260 if (level == 1 && !tree_root) {
13261 btrfs_pin_extent(fs_info, bytenr,
13262 fs_info->nodesize);
13266 tmp = read_tree_block(fs_info, bytenr, 0);
13267 if (!extent_buffer_uptodate(tmp)) {
13268 fprintf(stderr, "Error reading tree block\n");
13271 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13272 free_extent_buffer(tmp);
13281 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13285 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13289 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13292 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13294 struct btrfs_block_group_cache *cache;
13295 struct btrfs_path path;
13296 struct extent_buffer *leaf;
13297 struct btrfs_chunk *chunk;
13298 struct btrfs_key key;
13302 btrfs_init_path(&path);
13304 key.type = BTRFS_CHUNK_ITEM_KEY;
13306 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13308 btrfs_release_path(&path);
13313 * We do this in case the block groups were screwed up and had alloc
13314 * bits that aren't actually set on the chunks. This happens with
13315 * restored images every time and could happen in real life I guess.
13317 fs_info->avail_data_alloc_bits = 0;
13318 fs_info->avail_metadata_alloc_bits = 0;
13319 fs_info->avail_system_alloc_bits = 0;
13321 /* First we need to create the in-memory block groups */
13323 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13324 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13326 btrfs_release_path(&path);
13334 leaf = path.nodes[0];
13335 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13336 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13341 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13342 btrfs_add_block_group(fs_info, 0,
13343 btrfs_chunk_type(leaf, chunk), key.offset,
13344 btrfs_chunk_length(leaf, chunk));
13345 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13346 key.offset + btrfs_chunk_length(leaf, chunk));
13351 cache = btrfs_lookup_first_block_group(fs_info, start);
13355 start = cache->key.objectid + cache->key.offset;
13358 btrfs_release_path(&path);
13362 static int reset_balance(struct btrfs_trans_handle *trans,
13363 struct btrfs_fs_info *fs_info)
13365 struct btrfs_root *root = fs_info->tree_root;
13366 struct btrfs_path path;
13367 struct extent_buffer *leaf;
13368 struct btrfs_key key;
13369 int del_slot, del_nr = 0;
13373 btrfs_init_path(&path);
13374 key.objectid = BTRFS_BALANCE_OBJECTID;
13375 key.type = BTRFS_BALANCE_ITEM_KEY;
13377 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13382 goto reinit_data_reloc;
13387 ret = btrfs_del_item(trans, root, &path);
13390 btrfs_release_path(&path);
13392 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13393 key.type = BTRFS_ROOT_ITEM_KEY;
13395 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13399 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13404 ret = btrfs_del_items(trans, root, &path,
13411 btrfs_release_path(&path);
13414 ret = btrfs_search_slot(trans, root, &key, &path,
13421 leaf = path.nodes[0];
13422 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13423 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13425 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13430 del_slot = path.slots[0];
13439 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13443 btrfs_release_path(&path);
13446 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13447 key.type = BTRFS_ROOT_ITEM_KEY;
13448 key.offset = (u64)-1;
13449 root = btrfs_read_fs_root(fs_info, &key);
13450 if (IS_ERR(root)) {
13451 fprintf(stderr, "Error reading data reloc tree\n");
13452 ret = PTR_ERR(root);
13455 record_root_in_trans(trans, root);
13456 ret = btrfs_fsck_reinit_root(trans, root, 0);
13459 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13461 btrfs_release_path(&path);
13465 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13466 struct btrfs_fs_info *fs_info)
13472 * The only reason we don't do this is because right now we're just
13473 * walking the trees we find and pinning down their bytes, we don't look
13474 * at any of the leaves. In order to do mixed groups we'd have to check
13475 * the leaves of any fs roots and pin down the bytes for any file
13476 * extents we find. Not hard but why do it if we don't have to?
13478 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13479 fprintf(stderr, "We don't support re-initing the extent tree "
13480 "for mixed block groups yet, please notify a btrfs "
13481 "developer you want to do this so they can add this "
13482 "functionality.\n");
13487 * first we need to walk all of the trees except the extent tree and pin
13488 * down the bytes that are in use so we don't overwrite any existing
13491 ret = pin_metadata_blocks(fs_info);
13493 fprintf(stderr, "error pinning down used bytes\n");
13498 * Need to drop all the block groups since we're going to recreate all
13501 btrfs_free_block_groups(fs_info);
13502 ret = reset_block_groups(fs_info);
13504 fprintf(stderr, "error resetting the block groups\n");
13508 /* Ok we can allocate now, reinit the extent root */
13509 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13511 fprintf(stderr, "extent root initialization failed\n");
13513 * When the transaction code is updated we should end the
13514 * transaction, but for now progs only knows about commit so
13515 * just return an error.
13521 * Now we have all the in-memory block groups setup so we can make
13522 * allocations properly, and the metadata we care about is safe since we
13523 * pinned all of it above.
13526 struct btrfs_block_group_cache *cache;
13528 cache = btrfs_lookup_first_block_group(fs_info, start);
13531 start = cache->key.objectid + cache->key.offset;
13532 ret = btrfs_insert_item(trans, fs_info->extent_root,
13533 &cache->key, &cache->item,
13534 sizeof(cache->item));
13536 fprintf(stderr, "Error adding block group\n");
13539 btrfs_extent_post_op(trans, fs_info->extent_root);
13542 ret = reset_balance(trans, fs_info);
13544 fprintf(stderr, "error resetting the pending balance\n");
13549 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13551 struct btrfs_path path;
13552 struct btrfs_trans_handle *trans;
13553 struct btrfs_key key;
13556 printf("Recowing metadata block %llu\n", eb->start);
13557 key.objectid = btrfs_header_owner(eb);
13558 key.type = BTRFS_ROOT_ITEM_KEY;
13559 key.offset = (u64)-1;
13561 root = btrfs_read_fs_root(root->fs_info, &key);
13562 if (IS_ERR(root)) {
13563 fprintf(stderr, "Couldn't find owner root %llu\n",
13565 return PTR_ERR(root);
13568 trans = btrfs_start_transaction(root, 1);
13570 return PTR_ERR(trans);
13572 btrfs_init_path(&path);
13573 path.lowest_level = btrfs_header_level(eb);
13574 if (path.lowest_level)
13575 btrfs_node_key_to_cpu(eb, &key, 0);
13577 btrfs_item_key_to_cpu(eb, &key, 0);
13579 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13580 btrfs_commit_transaction(trans, root);
13581 btrfs_release_path(&path);
13585 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13587 struct btrfs_path path;
13588 struct btrfs_trans_handle *trans;
13589 struct btrfs_key key;
13592 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13593 bad->key.type, bad->key.offset);
13594 key.objectid = bad->root_id;
13595 key.type = BTRFS_ROOT_ITEM_KEY;
13596 key.offset = (u64)-1;
13598 root = btrfs_read_fs_root(root->fs_info, &key);
13599 if (IS_ERR(root)) {
13600 fprintf(stderr, "Couldn't find owner root %llu\n",
13602 return PTR_ERR(root);
13605 trans = btrfs_start_transaction(root, 1);
13607 return PTR_ERR(trans);
13609 btrfs_init_path(&path);
13610 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13616 ret = btrfs_del_item(trans, root, &path);
13618 btrfs_commit_transaction(trans, root);
13619 btrfs_release_path(&path);
13623 static int zero_log_tree(struct btrfs_root *root)
13625 struct btrfs_trans_handle *trans;
13628 trans = btrfs_start_transaction(root, 1);
13629 if (IS_ERR(trans)) {
13630 ret = PTR_ERR(trans);
13633 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13634 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13635 ret = btrfs_commit_transaction(trans, root);
13639 static int populate_csum(struct btrfs_trans_handle *trans,
13640 struct btrfs_root *csum_root, char *buf, u64 start,
13643 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13648 while (offset < len) {
13649 sectorsize = fs_info->sectorsize;
13650 ret = read_extent_data(fs_info, buf, start + offset,
13654 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13655 start + offset, buf, sectorsize);
13658 offset += sectorsize;
13663 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13664 struct btrfs_root *csum_root,
13665 struct btrfs_root *cur_root)
13667 struct btrfs_path path;
13668 struct btrfs_key key;
13669 struct extent_buffer *node;
13670 struct btrfs_file_extent_item *fi;
13677 buf = malloc(cur_root->fs_info->sectorsize);
13681 btrfs_init_path(&path);
13685 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13688 /* Iterate all regular file extents and fill its csum */
13690 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13692 if (key.type != BTRFS_EXTENT_DATA_KEY)
13694 node = path.nodes[0];
13695 slot = path.slots[0];
13696 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13697 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13699 start = btrfs_file_extent_disk_bytenr(node, fi);
13700 len = btrfs_file_extent_disk_num_bytes(node, fi);
13702 ret = populate_csum(trans, csum_root, buf, start, len);
13703 if (ret == -EEXIST)
13709 * TODO: if next leaf is corrupted, jump to nearest next valid
13712 ret = btrfs_next_item(cur_root, &path);
13722 btrfs_release_path(&path);
13727 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13728 struct btrfs_root *csum_root)
13730 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13731 struct btrfs_path path;
13732 struct btrfs_root *tree_root = fs_info->tree_root;
13733 struct btrfs_root *cur_root;
13734 struct extent_buffer *node;
13735 struct btrfs_key key;
13739 btrfs_init_path(&path);
13740 key.objectid = BTRFS_FS_TREE_OBJECTID;
13742 key.type = BTRFS_ROOT_ITEM_KEY;
13743 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13752 node = path.nodes[0];
13753 slot = path.slots[0];
13754 btrfs_item_key_to_cpu(node, &key, slot);
13755 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13757 if (key.type != BTRFS_ROOT_ITEM_KEY)
13759 if (!is_fstree(key.objectid))
13761 key.offset = (u64)-1;
13763 cur_root = btrfs_read_fs_root(fs_info, &key);
13764 if (IS_ERR(cur_root) || !cur_root) {
13765 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13769 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13774 ret = btrfs_next_item(tree_root, &path);
13784 btrfs_release_path(&path);
13788 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13789 struct btrfs_root *csum_root)
13791 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13792 struct btrfs_path path;
13793 struct btrfs_extent_item *ei;
13794 struct extent_buffer *leaf;
13796 struct btrfs_key key;
13799 btrfs_init_path(&path);
13801 key.type = BTRFS_EXTENT_ITEM_KEY;
13803 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13805 btrfs_release_path(&path);
13809 buf = malloc(csum_root->fs_info->sectorsize);
13811 btrfs_release_path(&path);
13816 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13817 ret = btrfs_next_leaf(extent_root, &path);
13825 leaf = path.nodes[0];
13827 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13828 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13833 ei = btrfs_item_ptr(leaf, path.slots[0],
13834 struct btrfs_extent_item);
13835 if (!(btrfs_extent_flags(leaf, ei) &
13836 BTRFS_EXTENT_FLAG_DATA)) {
13841 ret = populate_csum(trans, csum_root, buf, key.objectid,
13848 btrfs_release_path(&path);
13854 * Recalculate the csum and put it into the csum tree.
13856 * Extent tree init will wipe out all the extent info, so in that case, we
13857 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13858 * will use fs/subvol trees to init the csum tree.
13860 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13861 struct btrfs_root *csum_root,
13862 int search_fs_tree)
13864 if (search_fs_tree)
13865 return fill_csum_tree_from_fs(trans, csum_root);
13867 return fill_csum_tree_from_extent(trans, csum_root);
13870 static void free_roots_info_cache(void)
13872 if (!roots_info_cache)
13875 while (!cache_tree_empty(roots_info_cache)) {
13876 struct cache_extent *entry;
13877 struct root_item_info *rii;
13879 entry = first_cache_extent(roots_info_cache);
13882 remove_cache_extent(roots_info_cache, entry);
13883 rii = container_of(entry, struct root_item_info, cache_extent);
13887 free(roots_info_cache);
13888 roots_info_cache = NULL;
13891 static int build_roots_info_cache(struct btrfs_fs_info *info)
13894 struct btrfs_key key;
13895 struct extent_buffer *leaf;
13896 struct btrfs_path path;
13898 if (!roots_info_cache) {
13899 roots_info_cache = malloc(sizeof(*roots_info_cache));
13900 if (!roots_info_cache)
13902 cache_tree_init(roots_info_cache);
13905 btrfs_init_path(&path);
13907 key.type = BTRFS_EXTENT_ITEM_KEY;
13909 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13912 leaf = path.nodes[0];
13915 struct btrfs_key found_key;
13916 struct btrfs_extent_item *ei;
13917 struct btrfs_extent_inline_ref *iref;
13918 int slot = path.slots[0];
13923 struct cache_extent *entry;
13924 struct root_item_info *rii;
13926 if (slot >= btrfs_header_nritems(leaf)) {
13927 ret = btrfs_next_leaf(info->extent_root, &path);
13934 leaf = path.nodes[0];
13935 slot = path.slots[0];
13938 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13940 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13941 found_key.type != BTRFS_METADATA_ITEM_KEY)
13944 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13945 flags = btrfs_extent_flags(leaf, ei);
13947 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13948 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13951 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13952 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13953 level = found_key.offset;
13955 struct btrfs_tree_block_info *binfo;
13957 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13958 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13959 level = btrfs_tree_block_level(leaf, binfo);
13963 * For a root extent, it must be of the following type and the
13964 * first (and only one) iref in the item.
13966 type = btrfs_extent_inline_ref_type(leaf, iref);
13967 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13970 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13971 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13973 rii = malloc(sizeof(struct root_item_info));
13978 rii->cache_extent.start = root_id;
13979 rii->cache_extent.size = 1;
13980 rii->level = (u8)-1;
13981 entry = &rii->cache_extent;
13982 ret = insert_cache_extent(roots_info_cache, entry);
13985 rii = container_of(entry, struct root_item_info,
13989 ASSERT(rii->cache_extent.start == root_id);
13990 ASSERT(rii->cache_extent.size == 1);
13992 if (level > rii->level || rii->level == (u8)-1) {
13993 rii->level = level;
13994 rii->bytenr = found_key.objectid;
13995 rii->gen = btrfs_extent_generation(leaf, ei);
13996 rii->node_count = 1;
13997 } else if (level == rii->level) {
14005 btrfs_release_path(&path);
14010 static int maybe_repair_root_item(struct btrfs_path *path,
14011 const struct btrfs_key *root_key,
14012 const int read_only_mode)
14014 const u64 root_id = root_key->objectid;
14015 struct cache_extent *entry;
14016 struct root_item_info *rii;
14017 struct btrfs_root_item ri;
14018 unsigned long offset;
14020 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14023 "Error: could not find extent items for root %llu\n",
14024 root_key->objectid);
14028 rii = container_of(entry, struct root_item_info, cache_extent);
14029 ASSERT(rii->cache_extent.start == root_id);
14030 ASSERT(rii->cache_extent.size == 1);
14032 if (rii->node_count != 1) {
14034 "Error: could not find btree root extent for root %llu\n",
14039 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14040 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14042 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14043 btrfs_root_level(&ri) != rii->level ||
14044 btrfs_root_generation(&ri) != rii->gen) {
14047 * If we're in repair mode but our caller told us to not update
14048 * the root item, i.e. just check if it needs to be updated, don't
14049 * print this message, since the caller will call us again shortly
14050 * for the same root item without read only mode (the caller will
14051 * open a transaction first).
14053 if (!(read_only_mode && repair))
14055 "%sroot item for root %llu,"
14056 " current bytenr %llu, current gen %llu, current level %u,"
14057 " new bytenr %llu, new gen %llu, new level %u\n",
14058 (read_only_mode ? "" : "fixing "),
14060 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14061 btrfs_root_level(&ri),
14062 rii->bytenr, rii->gen, rii->level);
14064 if (btrfs_root_generation(&ri) > rii->gen) {
14066 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14067 root_id, btrfs_root_generation(&ri), rii->gen);
14071 if (!read_only_mode) {
14072 btrfs_set_root_bytenr(&ri, rii->bytenr);
14073 btrfs_set_root_level(&ri, rii->level);
14074 btrfs_set_root_generation(&ri, rii->gen);
14075 write_extent_buffer(path->nodes[0], &ri,
14076 offset, sizeof(ri));
14086 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14087 * caused read-only snapshots to be corrupted if they were created at a moment
14088 * when the source subvolume/snapshot had orphan items. The issue was that the
14089 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14090 * node instead of the post orphan cleanup root node.
14091 * So this function, and its callees, just detects and fixes those cases. Even
14092 * though the regression was for read-only snapshots, this function applies to
14093 * any snapshot/subvolume root.
14094 * This must be run before any other repair code - not doing it so, makes other
14095 * repair code delete or modify backrefs in the extent tree for example, which
14096 * will result in an inconsistent fs after repairing the root items.
14098 static int repair_root_items(struct btrfs_fs_info *info)
14100 struct btrfs_path path;
14101 struct btrfs_key key;
14102 struct extent_buffer *leaf;
14103 struct btrfs_trans_handle *trans = NULL;
14106 int need_trans = 0;
14108 btrfs_init_path(&path);
14110 ret = build_roots_info_cache(info);
14114 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14115 key.type = BTRFS_ROOT_ITEM_KEY;
14120 * Avoid opening and committing transactions if a leaf doesn't have
14121 * any root items that need to be fixed, so that we avoid rotating
14122 * backup roots unnecessarily.
14125 trans = btrfs_start_transaction(info->tree_root, 1);
14126 if (IS_ERR(trans)) {
14127 ret = PTR_ERR(trans);
14132 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14136 leaf = path.nodes[0];
14139 struct btrfs_key found_key;
14141 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14142 int no_more_keys = find_next_key(&path, &key);
14144 btrfs_release_path(&path);
14146 ret = btrfs_commit_transaction(trans,
14158 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14160 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14162 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14165 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14169 if (!trans && repair) {
14172 btrfs_release_path(&path);
14182 free_roots_info_cache();
14183 btrfs_release_path(&path);
14185 btrfs_commit_transaction(trans, info->tree_root);
14192 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14194 struct btrfs_trans_handle *trans;
14195 struct btrfs_block_group_cache *bg_cache;
14199 /* Clear all free space cache inodes and its extent data */
14201 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14204 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14207 current = bg_cache->key.objectid + bg_cache->key.offset;
14210 /* Don't forget to set cache_generation to -1 */
14211 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14212 if (IS_ERR(trans)) {
14213 error("failed to update super block cache generation");
14214 return PTR_ERR(trans);
14216 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14217 btrfs_commit_transaction(trans, fs_info->tree_root);
14222 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14227 if (clear_version == 1) {
14228 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14230 "free space cache v2 detected, use --clear-space-cache v2");
14234 printf("Clearing free space cache\n");
14235 ret = clear_free_space_cache(fs_info);
14237 error("failed to clear free space cache");
14240 printf("Free space cache cleared\n");
14242 } else if (clear_version == 2) {
14243 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14244 printf("no free space cache v2 to clear\n");
14248 printf("Clear free space cache v2\n");
14249 ret = btrfs_clear_free_space_tree(fs_info);
14251 error("failed to clear free space cache v2: %d", ret);
14254 printf("free space cache v2 cleared\n");
14261 const char * const cmd_check_usage[] = {
14262 "btrfs check [options] <device>",
14263 "Check structural integrity of a filesystem (unmounted).",
14264 "Check structural integrity of an unmounted filesystem. Verify internal",
14265 "trees' consistency and item connectivity. In the repair mode try to",
14266 "fix the problems found. ",
14267 "WARNING: the repair mode is considered dangerous",
14269 "-s|--super <superblock> use this superblock copy",
14270 "-b|--backup use the first valid backup root copy",
14271 "--force skip mount checks, repair is not possible",
14272 "--repair try to repair the filesystem",
14273 "--readonly run in read-only mode (default)",
14274 "--init-csum-tree create a new CRC tree",
14275 "--init-extent-tree create a new extent tree",
14276 "--mode <MODE> allows choice of memory/IO trade-offs",
14277 " where MODE is one of:",
14278 " original - read inodes and extents to memory (requires",
14279 " more memory, does less IO)",
14280 " lowmem - try to use less memory but read blocks again",
14282 "--check-data-csum verify checksums of data blocks",
14283 "-Q|--qgroup-report print a report on qgroup consistency",
14284 "-E|--subvol-extents <subvolid>",
14285 " print subvolume extents and sharing state",
14286 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14287 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14288 "-p|--progress indicate progress",
14289 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14293 int cmd_check(int argc, char **argv)
14295 struct cache_tree root_cache;
14296 struct btrfs_root *root;
14297 struct btrfs_fs_info *info;
14300 u64 tree_root_bytenr = 0;
14301 u64 chunk_root_bytenr = 0;
14302 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14306 int init_csum_tree = 0;
14308 int clear_space_cache = 0;
14309 int qgroup_report = 0;
14310 int qgroups_repaired = 0;
14311 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14316 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14317 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14318 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14319 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14320 GETOPT_VAL_FORCE };
14321 static const struct option long_options[] = {
14322 { "super", required_argument, NULL, 's' },
14323 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14324 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14325 { "init-csum-tree", no_argument, NULL,
14326 GETOPT_VAL_INIT_CSUM },
14327 { "init-extent-tree", no_argument, NULL,
14328 GETOPT_VAL_INIT_EXTENT },
14329 { "check-data-csum", no_argument, NULL,
14330 GETOPT_VAL_CHECK_CSUM },
14331 { "backup", no_argument, NULL, 'b' },
14332 { "subvol-extents", required_argument, NULL, 'E' },
14333 { "qgroup-report", no_argument, NULL, 'Q' },
14334 { "tree-root", required_argument, NULL, 'r' },
14335 { "chunk-root", required_argument, NULL,
14336 GETOPT_VAL_CHUNK_TREE },
14337 { "progress", no_argument, NULL, 'p' },
14338 { "mode", required_argument, NULL,
14340 { "clear-space-cache", required_argument, NULL,
14341 GETOPT_VAL_CLEAR_SPACE_CACHE},
14342 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14343 { NULL, 0, NULL, 0}
14346 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14350 case 'a': /* ignored */ break;
14352 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14355 num = arg_strtou64(optarg);
14356 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14358 "super mirror should be less than %d",
14359 BTRFS_SUPER_MIRROR_MAX);
14362 bytenr = btrfs_sb_offset(((int)num));
14363 printf("using SB copy %llu, bytenr %llu\n", num,
14364 (unsigned long long)bytenr);
14370 subvolid = arg_strtou64(optarg);
14373 tree_root_bytenr = arg_strtou64(optarg);
14375 case GETOPT_VAL_CHUNK_TREE:
14376 chunk_root_bytenr = arg_strtou64(optarg);
14379 ctx.progress_enabled = true;
14383 usage(cmd_check_usage);
14384 case GETOPT_VAL_REPAIR:
14385 printf("enabling repair mode\n");
14387 ctree_flags |= OPEN_CTREE_WRITES;
14389 case GETOPT_VAL_READONLY:
14392 case GETOPT_VAL_INIT_CSUM:
14393 printf("Creating a new CRC tree\n");
14394 init_csum_tree = 1;
14396 ctree_flags |= OPEN_CTREE_WRITES;
14398 case GETOPT_VAL_INIT_EXTENT:
14399 init_extent_tree = 1;
14400 ctree_flags |= (OPEN_CTREE_WRITES |
14401 OPEN_CTREE_NO_BLOCK_GROUPS);
14404 case GETOPT_VAL_CHECK_CSUM:
14405 check_data_csum = 1;
14407 case GETOPT_VAL_MODE:
14408 check_mode = parse_check_mode(optarg);
14409 if (check_mode == CHECK_MODE_UNKNOWN) {
14410 error("unknown mode: %s", optarg);
14414 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14415 if (strcmp(optarg, "v1") == 0) {
14416 clear_space_cache = 1;
14417 } else if (strcmp(optarg, "v2") == 0) {
14418 clear_space_cache = 2;
14419 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14422 "invalid argument to --clear-space-cache, must be v1 or v2");
14425 ctree_flags |= OPEN_CTREE_WRITES;
14427 case GETOPT_VAL_FORCE:
14433 if (check_argc_exact(argc - optind, 1))
14434 usage(cmd_check_usage);
14436 if (ctx.progress_enabled) {
14437 ctx.tp = TASK_NOTHING;
14438 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14441 /* This check is the only reason for --readonly to exist */
14442 if (readonly && repair) {
14443 error("repair options are not compatible with --readonly");
14448 * experimental and dangerous
14450 if (repair && check_mode == CHECK_MODE_LOWMEM)
14451 warning("low-memory mode repair support is only partial");
14454 cache_tree_init(&root_cache);
14456 ret = check_mounted(argv[optind]);
14459 error("could not check mount status: %s",
14465 "%s is currently mounted, use --force if you really intend to check the filesystem",
14473 error("repair and --force is not yet supported");
14480 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14484 "filesystem mounted, continuing because of --force");
14486 /* A block device is mounted in exclusive mode by kernel */
14487 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14490 /* only allow partial opening under repair mode */
14492 ctree_flags |= OPEN_CTREE_PARTIAL;
14494 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14495 chunk_root_bytenr, ctree_flags);
14497 error("cannot open file system");
14503 global_info = info;
14504 root = info->fs_root;
14505 uuid_unparse(info->super_copy->fsid, uuidbuf);
14507 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14510 * Check the bare minimum before starting anything else that could rely
14511 * on it, namely the tree roots, any local consistency checks
14513 if (!extent_buffer_uptodate(info->tree_root->node) ||
14514 !extent_buffer_uptodate(info->dev_root->node) ||
14515 !extent_buffer_uptodate(info->chunk_root->node)) {
14516 error("critical roots corrupted, unable to check the filesystem");
14522 if (clear_space_cache) {
14523 ret = do_clear_free_space_cache(info, clear_space_cache);
14529 * repair mode will force us to commit transaction which
14530 * will make us fail to load log tree when mounting.
14532 if (repair && btrfs_super_log_root(info->super_copy)) {
14533 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14539 ret = zero_log_tree(root);
14542 error("failed to zero log tree: %d", ret);
14547 if (qgroup_report) {
14548 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14550 ret = qgroup_verify_all(info);
14557 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14558 subvolid, argv[optind], uuidbuf);
14559 ret = print_extent_state(info, subvolid);
14564 if (init_extent_tree || init_csum_tree) {
14565 struct btrfs_trans_handle *trans;
14567 trans = btrfs_start_transaction(info->extent_root, 0);
14568 if (IS_ERR(trans)) {
14569 error("error starting transaction");
14570 ret = PTR_ERR(trans);
14575 if (init_extent_tree) {
14576 printf("Creating a new extent tree\n");
14577 ret = reinit_extent_tree(trans, info);
14583 if (init_csum_tree) {
14584 printf("Reinitialize checksum tree\n");
14585 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14587 error("checksum tree initialization failed: %d",
14594 ret = fill_csum_tree(trans, info->csum_root,
14598 error("checksum tree refilling failed: %d", ret);
14603 * Ok now we commit and run the normal fsck, which will add
14604 * extent entries for all of the items it finds.
14606 ret = btrfs_commit_transaction(trans, info->extent_root);
14611 if (!extent_buffer_uptodate(info->extent_root->node)) {
14612 error("critical: extent_root, unable to check the filesystem");
14617 if (!extent_buffer_uptodate(info->csum_root->node)) {
14618 error("critical: csum_root, unable to check the filesystem");
14624 if (!init_extent_tree) {
14625 ret = repair_root_items(info);
14628 error("failed to repair root items: %s", strerror(-ret));
14632 fprintf(stderr, "Fixed %d roots.\n", ret);
14634 } else if (ret > 0) {
14636 "Found %d roots with an outdated root item.\n",
14639 "Please run a filesystem check with the option --repair to fix them.\n");
14646 ret = do_check_chunks_and_extents(info);
14650 "errors found in extent allocation tree or chunk allocation");
14652 /* Only re-check super size after we checked and repaired the fs */
14653 err |= !is_super_size_valid(info);
14655 if (!ctx.progress_enabled) {
14656 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14657 fprintf(stderr, "checking free space tree\n");
14659 fprintf(stderr, "checking free space cache\n");
14661 ret = check_space_cache(root);
14664 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14665 error("errors found in free space tree");
14667 error("errors found in free space cache");
14672 * We used to have to have these hole extents in between our real
14673 * extents so if we don't have this flag set we need to make sure there
14674 * are no gaps in the file extents for inodes, otherwise we can just
14675 * ignore it when this happens.
14677 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14678 ret = do_check_fs_roots(info, &root_cache);
14681 error("errors found in fs roots");
14685 fprintf(stderr, "checking csums\n");
14686 ret = check_csums(root);
14689 error("errors found in csum tree");
14693 fprintf(stderr, "checking root refs\n");
14694 /* For low memory mode, check_fs_roots_v2 handles root refs */
14695 if (check_mode != CHECK_MODE_LOWMEM) {
14696 ret = check_root_refs(root, &root_cache);
14699 error("errors found in root refs");
14704 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14705 struct extent_buffer *eb;
14707 eb = list_first_entry(&root->fs_info->recow_ebs,
14708 struct extent_buffer, recow);
14709 list_del_init(&eb->recow);
14710 ret = recow_extent_buffer(root, eb);
14713 error("fails to fix transid errors");
14718 while (!list_empty(&delete_items)) {
14719 struct bad_item *bad;
14721 bad = list_first_entry(&delete_items, struct bad_item, list);
14722 list_del_init(&bad->list);
14724 ret = delete_bad_item(root, bad);
14730 if (info->quota_enabled) {
14731 fprintf(stderr, "checking quota groups\n");
14732 ret = qgroup_verify_all(info);
14735 error("failed to check quota groups");
14739 ret = repair_qgroups(info, &qgroups_repaired);
14742 error("failed to repair quota groups");
14748 if (!list_empty(&root->fs_info->recow_ebs)) {
14749 error("transid errors in file system");
14754 printf("found %llu bytes used, ",
14755 (unsigned long long)bytes_used);
14757 printf("error(s) found\n");
14759 printf("no error found\n");
14760 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14761 printf("total tree bytes: %llu\n",
14762 (unsigned long long)total_btree_bytes);
14763 printf("total fs tree bytes: %llu\n",
14764 (unsigned long long)total_fs_tree_bytes);
14765 printf("total extent tree bytes: %llu\n",
14766 (unsigned long long)total_extent_tree_bytes);
14767 printf("btree space waste bytes: %llu\n",
14768 (unsigned long long)btree_space_waste);
14769 printf("file data blocks allocated: %llu\n referenced %llu\n",
14770 (unsigned long long)data_bytes_allocated,
14771 (unsigned long long)data_bytes_referenced);
14773 free_qgroup_counts();
14774 free_root_recs_tree(&root_cache);
14778 if (ctx.progress_enabled)
14779 task_deinit(ctx.info);