2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
54 TASK_NOTHING, /* have to be the last element */
59 enum task_position tp;
61 struct task_info *info;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
81 enum btrfs_check_mode {
85 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
92 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94 struct data_backref *back1 = to_data_backref(ext1);
95 struct data_backref *back2 = to_data_backref(ext2);
97 WARN_ON(!ext1->is_data);
98 WARN_ON(!ext2->is_data);
100 /* parent and root are a union, so this covers both */
101 if (back1->parent > back2->parent)
103 if (back1->parent < back2->parent)
106 /* This is a full backref and the parents match. */
107 if (back1->node.full_backref)
110 if (back1->owner > back2->owner)
112 if (back1->owner < back2->owner)
115 if (back1->offset > back2->offset)
117 if (back1->offset < back2->offset)
120 if (back1->found_ref && back2->found_ref) {
121 if (back1->disk_bytenr > back2->disk_bytenr)
123 if (back1->disk_bytenr < back2->disk_bytenr)
126 if (back1->bytes > back2->bytes)
128 if (back1->bytes < back2->bytes)
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
137 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139 struct tree_backref *back1 = to_tree_backref(ext1);
140 struct tree_backref *back2 = to_tree_backref(ext2);
142 WARN_ON(ext1->is_data);
143 WARN_ON(ext2->is_data);
145 /* parent and root are a union, so this covers both */
146 if (back1->parent > back2->parent)
148 if (back1->parent < back2->parent)
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
156 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159 if (ext1->is_data > ext2->is_data)
162 if (ext1->is_data < ext2->is_data)
165 if (ext1->full_backref > ext2->full_backref)
167 if (ext1->full_backref < ext2->full_backref)
171 return compare_data_backref(node1, node2);
173 return compare_tree_backref(node1, node2);
177 static void *print_status_check(void *p)
179 struct task_ctx *priv = p;
180 const char work_indicator[] = { '.', 'o', 'O', 'o' };
182 static char *task_position_string[] = {
184 "checking free space cache",
188 task_period_start(priv->info, 1000 /* 1s */);
190 if (priv->tp == TASK_NOTHING)
194 printf("%s [%c]\r", task_position_string[priv->tp],
195 work_indicator[count % 4]);
198 task_period_wait(priv->info);
203 static int print_status_return(void *p)
211 static enum btrfs_check_mode parse_check_mode(const char *str)
213 if (strcmp(str, "lowmem") == 0)
214 return CHECK_MODE_LOWMEM;
215 if (strcmp(str, "orig") == 0)
216 return CHECK_MODE_ORIGINAL;
217 if (strcmp(str, "original") == 0)
218 return CHECK_MODE_ORIGINAL;
220 return CHECK_MODE_UNKNOWN;
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
226 struct file_extent_hole *hole;
228 if (RB_EMPTY_ROOT(holes))
231 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
237 struct file_extent_hole *hole1;
238 struct file_extent_hole *hole2;
240 hole1 = rb_entry(node1, struct file_extent_hole, node);
241 hole2 = rb_entry(node2, struct file_extent_hole, node);
243 if (hole1->start > hole2->start)
245 if (hole1->start < hole2->start)
247 /* Now hole1->start == hole2->start */
248 if (hole1->len >= hole2->len)
250 * Hole 1 will be merge center
251 * Same hole will be merged later
254 /* Hole 2 will be merge center */
259 * Add a hole to the record
261 * This will do hole merge for copy_file_extent_holes(),
262 * which will ensure there won't be continuous holes.
264 static int add_file_extent_hole(struct rb_root *holes,
267 struct file_extent_hole *hole;
268 struct file_extent_hole *prev = NULL;
269 struct file_extent_hole *next = NULL;
271 hole = malloc(sizeof(*hole));
276 /* Since compare will not return 0, no -EEXIST will happen */
277 rb_insert(holes, &hole->node, compare_hole);
279 /* simple merge with previous hole */
280 if (rb_prev(&hole->node))
281 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
283 if (prev && prev->start + prev->len >= hole->start) {
284 hole->len = hole->start + hole->len - prev->start;
285 hole->start = prev->start;
286 rb_erase(&prev->node, holes);
291 /* iterate merge with next holes */
293 if (!rb_next(&hole->node))
295 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
297 if (hole->start + hole->len >= next->start) {
298 if (hole->start + hole->len <= next->start + next->len)
299 hole->len = next->start + next->len -
301 rb_erase(&next->node, holes);
310 static int compare_hole_range(struct rb_node *node, void *data)
312 struct file_extent_hole *hole;
315 hole = (struct file_extent_hole *)data;
318 hole = rb_entry(node, struct file_extent_hole, node);
319 if (start < hole->start)
321 if (start >= hole->start && start < hole->start + hole->len)
327 * Delete a hole in the record
329 * This will do the hole split and is much restrict than add.
331 static int del_file_extent_hole(struct rb_root *holes,
334 struct file_extent_hole *hole;
335 struct file_extent_hole tmp;
340 struct rb_node *node;
347 node = rb_search(holes, &tmp, compare_hole_range, NULL);
350 hole = rb_entry(node, struct file_extent_hole, node);
351 if (start + len > hole->start + hole->len)
355 * Now there will be no overlap, delete the hole and re-add the
356 * split(s) if they exists.
358 if (start > hole->start) {
359 prev_start = hole->start;
360 prev_len = start - hole->start;
363 if (hole->start + hole->len > start + len) {
364 next_start = start + len;
365 next_len = hole->start + hole->len - start - len;
368 rb_erase(node, holes);
371 ret = add_file_extent_hole(holes, prev_start, prev_len);
376 ret = add_file_extent_hole(holes, next_start, next_len);
383 static int copy_file_extent_holes(struct rb_root *dst,
386 struct file_extent_hole *hole;
387 struct rb_node *node;
390 node = rb_first(src);
392 hole = rb_entry(node, struct file_extent_hole, node);
393 ret = add_file_extent_hole(dst, hole->start, hole->len);
396 node = rb_next(node);
401 static void free_file_extent_holes(struct rb_root *holes)
403 struct rb_node *node;
404 struct file_extent_hole *hole;
406 node = rb_first(holes);
408 hole = rb_entry(node, struct file_extent_hole, node);
409 rb_erase(node, holes);
411 node = rb_first(holes);
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root)
420 if (root->last_trans != trans->transid) {
421 root->track_dirty = 1;
422 root->last_trans = trans->transid;
423 root->commit_root = root->node;
424 extent_buffer_get(root->node);
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
430 struct device_record *rec1;
431 struct device_record *rec2;
433 rec1 = rb_entry(node1, struct device_record, node);
434 rec2 = rb_entry(node2, struct device_record, node);
435 if (rec1->devid > rec2->devid)
437 else if (rec1->devid < rec2->devid)
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
445 struct inode_record *rec;
446 struct inode_backref *backref;
447 struct inode_backref *orig;
448 struct inode_backref *tmp;
449 struct orphan_data_extent *src_orphan;
450 struct orphan_data_extent *dst_orphan;
455 rec = malloc(sizeof(*rec));
457 return ERR_PTR(-ENOMEM);
458 memcpy(rec, orig_rec, sizeof(*rec));
460 INIT_LIST_HEAD(&rec->backrefs);
461 INIT_LIST_HEAD(&rec->orphan_extents);
462 rec->holes = RB_ROOT;
464 list_for_each_entry(orig, &orig_rec->backrefs, list) {
465 size = sizeof(*orig) + orig->namelen + 1;
466 backref = malloc(size);
471 memcpy(backref, orig, size);
472 list_add_tail(&backref->list, &rec->backrefs);
474 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475 dst_orphan = malloc(sizeof(*dst_orphan));
480 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
483 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
490 rb = rb_first(&rec->holes);
492 struct file_extent_hole *hole;
494 hole = rb_entry(rb, struct file_extent_hole, node);
500 if (!list_empty(&rec->backrefs))
501 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502 list_del(&orig->list);
506 if (!list_empty(&rec->orphan_extents))
507 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508 list_del(&orig->list);
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
520 struct orphan_data_extent *orphan;
522 if (list_empty(orphan_extents))
524 printf("The following data extent is lost in tree %llu:\n",
526 list_for_each_entry(orphan, orphan_extents, list) {
527 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528 orphan->objectid, orphan->offset, orphan->disk_bytenr,
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
535 u64 root_objectid = root->root_key.objectid;
536 int errors = rec->errors;
540 /* reloc root errors, we print its corresponding fs root objectid*/
541 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542 root_objectid = root->root_key.offset;
543 fprintf(stderr, "reloc");
545 fprintf(stderr, "root %llu inode %llu errors %x",
546 (unsigned long long) root_objectid,
547 (unsigned long long) rec->ino, rec->errors);
549 if (errors & I_ERR_NO_INODE_ITEM)
550 fprintf(stderr, ", no inode item");
551 if (errors & I_ERR_NO_ORPHAN_ITEM)
552 fprintf(stderr, ", no orphan item");
553 if (errors & I_ERR_DUP_INODE_ITEM)
554 fprintf(stderr, ", dup inode item");
555 if (errors & I_ERR_DUP_DIR_INDEX)
556 fprintf(stderr, ", dup dir index");
557 if (errors & I_ERR_ODD_DIR_ITEM)
558 fprintf(stderr, ", odd dir item");
559 if (errors & I_ERR_ODD_FILE_EXTENT)
560 fprintf(stderr, ", odd file extent");
561 if (errors & I_ERR_BAD_FILE_EXTENT)
562 fprintf(stderr, ", bad file extent");
563 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564 fprintf(stderr, ", file extent overlap");
565 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566 fprintf(stderr, ", file extent discount");
567 if (errors & I_ERR_DIR_ISIZE_WRONG)
568 fprintf(stderr, ", dir isize wrong");
569 if (errors & I_ERR_FILE_NBYTES_WRONG)
570 fprintf(stderr, ", nbytes wrong");
571 if (errors & I_ERR_ODD_CSUM_ITEM)
572 fprintf(stderr, ", odd csum item");
573 if (errors & I_ERR_SOME_CSUM_MISSING)
574 fprintf(stderr, ", some csum missing");
575 if (errors & I_ERR_LINK_COUNT_WRONG)
576 fprintf(stderr, ", link count wrong");
577 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578 fprintf(stderr, ", orphan file extent");
579 fprintf(stderr, "\n");
580 /* Print the orphan extents if needed */
581 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
584 /* Print the holes if needed */
585 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586 struct file_extent_hole *hole;
587 struct rb_node *node;
590 node = rb_first(&rec->holes);
591 fprintf(stderr, "Found file extent holes:\n");
594 hole = rb_entry(node, struct file_extent_hole, node);
595 fprintf(stderr, "\tstart: %llu, len: %llu\n",
596 hole->start, hole->len);
597 node = rb_next(node);
600 fprintf(stderr, "\tstart: 0, len: %llu\n",
602 root->fs_info->sectorsize));
606 static void print_ref_error(int errors)
608 if (errors & REF_ERR_NO_DIR_ITEM)
609 fprintf(stderr, ", no dir item");
610 if (errors & REF_ERR_NO_DIR_INDEX)
611 fprintf(stderr, ", no dir index");
612 if (errors & REF_ERR_NO_INODE_REF)
613 fprintf(stderr, ", no inode ref");
614 if (errors & REF_ERR_DUP_DIR_ITEM)
615 fprintf(stderr, ", dup dir item");
616 if (errors & REF_ERR_DUP_DIR_INDEX)
617 fprintf(stderr, ", dup dir index");
618 if (errors & REF_ERR_DUP_INODE_REF)
619 fprintf(stderr, ", dup inode ref");
620 if (errors & REF_ERR_INDEX_UNMATCH)
621 fprintf(stderr, ", index mismatch");
622 if (errors & REF_ERR_FILETYPE_UNMATCH)
623 fprintf(stderr, ", filetype mismatch");
624 if (errors & REF_ERR_NAME_TOO_LONG)
625 fprintf(stderr, ", name too long");
626 if (errors & REF_ERR_NO_ROOT_REF)
627 fprintf(stderr, ", no root ref");
628 if (errors & REF_ERR_NO_ROOT_BACKREF)
629 fprintf(stderr, ", no root backref");
630 if (errors & REF_ERR_DUP_ROOT_REF)
631 fprintf(stderr, ", dup root ref");
632 if (errors & REF_ERR_DUP_ROOT_BACKREF)
633 fprintf(stderr, ", dup root backref");
634 fprintf(stderr, "\n");
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
640 struct ptr_node *node;
641 struct cache_extent *cache;
642 struct inode_record *rec = NULL;
645 cache = lookup_cache_extent(inode_cache, ino, 1);
647 node = container_of(cache, struct ptr_node, cache);
649 if (mod && rec->refs > 1) {
650 node->data = clone_inode_rec(rec);
651 if (IS_ERR(node->data))
657 rec = calloc(1, sizeof(*rec));
659 return ERR_PTR(-ENOMEM);
661 rec->extent_start = (u64)-1;
663 INIT_LIST_HEAD(&rec->backrefs);
664 INIT_LIST_HEAD(&rec->orphan_extents);
665 rec->holes = RB_ROOT;
667 node = malloc(sizeof(*node));
670 return ERR_PTR(-ENOMEM);
672 node->cache.start = ino;
673 node->cache.size = 1;
676 if (ino == BTRFS_FREE_INO_OBJECTID)
679 ret = insert_cache_extent(inode_cache, &node->cache);
681 return ERR_PTR(-EEXIST);
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
688 struct orphan_data_extent *orphan;
690 while (!list_empty(orphan_extents)) {
691 orphan = list_entry(orphan_extents->next,
692 struct orphan_data_extent, list);
693 list_del(&orphan->list);
698 static void free_inode_rec(struct inode_record *rec)
700 struct inode_backref *backref;
705 while (!list_empty(&rec->backrefs)) {
706 backref = to_inode_backref(rec->backrefs.next);
707 list_del(&backref->list);
710 free_orphan_data_extents(&rec->orphan_extents);
711 free_file_extent_holes(&rec->holes);
715 static int can_free_inode_rec(struct inode_record *rec)
717 if (!rec->errors && rec->checked && rec->found_inode_item &&
718 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724 struct inode_record *rec)
726 struct cache_extent *cache;
727 struct inode_backref *tmp, *backref;
728 struct ptr_node *node;
731 if (!rec->found_inode_item)
734 filetype = imode_to_type(rec->imode);
735 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736 if (backref->found_dir_item && backref->found_dir_index) {
737 if (backref->filetype != filetype)
738 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739 if (!backref->errors && backref->found_inode_ref &&
740 rec->nlink == rec->found_link) {
741 list_del(&backref->list);
747 if (!rec->checked || rec->merging)
750 if (S_ISDIR(rec->imode)) {
751 if (rec->found_size != rec->isize)
752 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753 if (rec->found_file_extent)
754 rec->errors |= I_ERR_ODD_FILE_EXTENT;
755 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756 if (rec->found_dir_item)
757 rec->errors |= I_ERR_ODD_DIR_ITEM;
758 if (rec->found_size != rec->nbytes)
759 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760 if (rec->nlink > 0 && !no_holes &&
761 (rec->extent_end < rec->isize ||
762 first_extent_gap(&rec->holes) < rec->isize))
763 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
766 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767 if (rec->found_csum_item && rec->nodatasum)
768 rec->errors |= I_ERR_ODD_CSUM_ITEM;
769 if (rec->some_csum_missing && !rec->nodatasum)
770 rec->errors |= I_ERR_SOME_CSUM_MISSING;
773 BUG_ON(rec->refs != 1);
774 if (can_free_inode_rec(rec)) {
775 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776 node = container_of(cache, struct ptr_node, cache);
777 BUG_ON(node->data != rec);
778 remove_cache_extent(inode_cache, &node->cache);
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
786 struct btrfs_path path;
787 struct btrfs_key key;
790 key.objectid = BTRFS_ORPHAN_OBJECTID;
791 key.type = BTRFS_ORPHAN_ITEM_KEY;
794 btrfs_init_path(&path);
795 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796 btrfs_release_path(&path);
802 static int process_inode_item(struct extent_buffer *eb,
803 int slot, struct btrfs_key *key,
804 struct shared_node *active_node)
806 struct inode_record *rec;
807 struct btrfs_inode_item *item;
809 rec = active_node->current;
810 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811 if (rec->found_inode_item) {
812 rec->errors |= I_ERR_DUP_INODE_ITEM;
815 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816 rec->nlink = btrfs_inode_nlink(eb, item);
817 rec->isize = btrfs_inode_size(eb, item);
818 rec->nbytes = btrfs_inode_nbytes(eb, item);
819 rec->imode = btrfs_inode_mode(eb, item);
820 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
822 rec->found_inode_item = 1;
824 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825 maybe_free_inode_rec(&active_node->inode_cache, rec);
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
831 int namelen, u64 dir)
833 struct inode_backref *backref;
835 list_for_each_entry(backref, &rec->backrefs, list) {
836 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
838 if (backref->dir != dir || backref->namelen != namelen)
840 if (memcmp(name, backref->name, namelen))
845 backref = malloc(sizeof(*backref) + namelen + 1);
848 memset(backref, 0, sizeof(*backref));
850 backref->namelen = namelen;
851 memcpy(backref->name, name, namelen);
852 backref->name[namelen] = '\0';
853 list_add_tail(&backref->list, &rec->backrefs);
857 static int add_inode_backref(struct cache_tree *inode_cache,
858 u64 ino, u64 dir, u64 index,
859 const char *name, int namelen,
860 u8 filetype, u8 itemtype, int errors)
862 struct inode_record *rec;
863 struct inode_backref *backref;
865 rec = get_inode_rec(inode_cache, ino, 1);
867 backref = get_inode_backref(rec, name, namelen, dir);
870 backref->errors |= errors;
871 if (itemtype == BTRFS_DIR_INDEX_KEY) {
872 if (backref->found_dir_index)
873 backref->errors |= REF_ERR_DUP_DIR_INDEX;
874 if (backref->found_inode_ref && backref->index != index)
875 backref->errors |= REF_ERR_INDEX_UNMATCH;
876 if (backref->found_dir_item && backref->filetype != filetype)
877 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
879 backref->index = index;
880 backref->filetype = filetype;
881 backref->found_dir_index = 1;
882 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
884 if (backref->found_dir_item)
885 backref->errors |= REF_ERR_DUP_DIR_ITEM;
886 if (backref->found_dir_index && backref->filetype != filetype)
887 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
889 backref->filetype = filetype;
890 backref->found_dir_item = 1;
891 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893 if (backref->found_inode_ref)
894 backref->errors |= REF_ERR_DUP_INODE_REF;
895 if (backref->found_dir_index && backref->index != index)
896 backref->errors |= REF_ERR_INDEX_UNMATCH;
898 backref->index = index;
900 backref->ref_type = itemtype;
901 backref->found_inode_ref = 1;
906 maybe_free_inode_rec(inode_cache, rec);
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911 struct cache_tree *dst_cache)
913 struct inode_backref *backref;
918 list_for_each_entry(backref, &src->backrefs, list) {
919 if (backref->found_dir_index) {
920 add_inode_backref(dst_cache, dst->ino, backref->dir,
921 backref->index, backref->name,
922 backref->namelen, backref->filetype,
923 BTRFS_DIR_INDEX_KEY, backref->errors);
925 if (backref->found_dir_item) {
927 add_inode_backref(dst_cache, dst->ino,
928 backref->dir, 0, backref->name,
929 backref->namelen, backref->filetype,
930 BTRFS_DIR_ITEM_KEY, backref->errors);
932 if (backref->found_inode_ref) {
933 add_inode_backref(dst_cache, dst->ino,
934 backref->dir, backref->index,
935 backref->name, backref->namelen, 0,
936 backref->ref_type, backref->errors);
940 if (src->found_dir_item)
941 dst->found_dir_item = 1;
942 if (src->found_file_extent)
943 dst->found_file_extent = 1;
944 if (src->found_csum_item)
945 dst->found_csum_item = 1;
946 if (src->some_csum_missing)
947 dst->some_csum_missing = 1;
948 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949 ret = copy_file_extent_holes(&dst->holes, &src->holes);
954 BUG_ON(src->found_link < dir_count);
955 dst->found_link += src->found_link - dir_count;
956 dst->found_size += src->found_size;
957 if (src->extent_start != (u64)-1) {
958 if (dst->extent_start == (u64)-1) {
959 dst->extent_start = src->extent_start;
960 dst->extent_end = src->extent_end;
962 if (dst->extent_end > src->extent_start)
963 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964 else if (dst->extent_end < src->extent_start) {
965 ret = add_file_extent_hole(&dst->holes,
967 src->extent_start - dst->extent_end);
969 if (dst->extent_end < src->extent_end)
970 dst->extent_end = src->extent_end;
974 dst->errors |= src->errors;
975 if (src->found_inode_item) {
976 if (!dst->found_inode_item) {
977 dst->nlink = src->nlink;
978 dst->isize = src->isize;
979 dst->nbytes = src->nbytes;
980 dst->imode = src->imode;
981 dst->nodatasum = src->nodatasum;
982 dst->found_inode_item = 1;
984 dst->errors |= I_ERR_DUP_INODE_ITEM;
992 static int splice_shared_node(struct shared_node *src_node,
993 struct shared_node *dst_node)
995 struct cache_extent *cache;
996 struct ptr_node *node, *ins;
997 struct cache_tree *src, *dst;
998 struct inode_record *rec, *conflict;
1003 if (--src_node->refs == 0)
1005 if (src_node->current)
1006 current_ino = src_node->current->ino;
1008 src = &src_node->root_cache;
1009 dst = &dst_node->root_cache;
1011 cache = search_cache_extent(src, 0);
1013 node = container_of(cache, struct ptr_node, cache);
1015 cache = next_cache_extent(cache);
1018 remove_cache_extent(src, &node->cache);
1021 ins = malloc(sizeof(*ins));
1023 ins->cache.start = node->cache.start;
1024 ins->cache.size = node->cache.size;
1028 ret = insert_cache_extent(dst, &ins->cache);
1029 if (ret == -EEXIST) {
1030 conflict = get_inode_rec(dst, rec->ino, 1);
1031 BUG_ON(IS_ERR(conflict));
1032 merge_inode_recs(rec, conflict, dst);
1034 conflict->checked = 1;
1035 if (dst_node->current == conflict)
1036 dst_node->current = NULL;
1038 maybe_free_inode_rec(dst, conflict);
1039 free_inode_rec(rec);
1046 if (src == &src_node->root_cache) {
1047 src = &src_node->inode_cache;
1048 dst = &dst_node->inode_cache;
1052 if (current_ino > 0 && (!dst_node->current ||
1053 current_ino > dst_node->current->ino)) {
1054 if (dst_node->current) {
1055 dst_node->current->checked = 1;
1056 maybe_free_inode_rec(dst, dst_node->current);
1058 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059 BUG_ON(IS_ERR(dst_node->current));
1064 static void free_inode_ptr(struct cache_extent *cache)
1066 struct ptr_node *node;
1067 struct inode_record *rec;
1069 node = container_of(cache, struct ptr_node, cache);
1071 free_inode_rec(rec);
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1080 struct cache_extent *cache;
1081 struct shared_node *node;
1083 cache = lookup_cache_extent(shared, bytenr, 1);
1085 node = container_of(cache, struct shared_node, cache);
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1094 struct shared_node *node;
1096 node = calloc(1, sizeof(*node));
1099 node->cache.start = bytenr;
1100 node->cache.size = 1;
1101 cache_tree_init(&node->root_cache);
1102 cache_tree_init(&node->inode_cache);
1105 ret = insert_cache_extent(shared, &node->cache);
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111 struct walk_control *wc, int level)
1113 struct shared_node *node;
1114 struct shared_node *dest;
1117 if (level == wc->active_node)
1120 BUG_ON(wc->active_node <= level);
1121 node = find_shared_node(&wc->shared, bytenr);
1123 ret = add_shared_node(&wc->shared, bytenr, refs);
1125 node = find_shared_node(&wc->shared, bytenr);
1126 wc->nodes[level] = node;
1127 wc->active_node = level;
1131 if (wc->root_level == wc->active_node &&
1132 btrfs_root_refs(&root->root_item) == 0) {
1133 if (--node->refs == 0) {
1134 free_inode_recs_tree(&node->root_cache);
1135 free_inode_recs_tree(&node->inode_cache);
1136 remove_cache_extent(&wc->shared, &node->cache);
1142 dest = wc->nodes[wc->active_node];
1143 splice_shared_node(node, dest);
1144 if (node->refs == 0) {
1145 remove_cache_extent(&wc->shared, &node->cache);
1151 static int leave_shared_node(struct btrfs_root *root,
1152 struct walk_control *wc, int level)
1154 struct shared_node *node;
1155 struct shared_node *dest;
1158 if (level == wc->root_level)
1161 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1165 BUG_ON(i >= BTRFS_MAX_LEVEL);
1167 node = wc->nodes[wc->active_node];
1168 wc->nodes[wc->active_node] = NULL;
1169 wc->active_node = i;
1171 dest = wc->nodes[wc->active_node];
1172 if (wc->active_node < wc->root_level ||
1173 btrfs_root_refs(&root->root_item) > 0) {
1174 BUG_ON(node->refs <= 1);
1175 splice_shared_node(node, dest);
1177 BUG_ON(node->refs < 2);
1186 * 1 - if the root with id child_root_id is a child of root parent_root_id
1187 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1188 * has other root(s) as parent(s)
1189 * 2 - if the root child_root_id doesn't have any parent roots
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1194 struct btrfs_path path;
1195 struct btrfs_key key;
1196 struct extent_buffer *leaf;
1200 btrfs_init_path(&path);
1202 key.objectid = parent_root_id;
1203 key.type = BTRFS_ROOT_REF_KEY;
1204 key.offset = child_root_id;
1205 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1209 btrfs_release_path(&path);
1213 key.objectid = child_root_id;
1214 key.type = BTRFS_ROOT_BACKREF_KEY;
1216 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1222 leaf = path.nodes[0];
1223 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1227 leaf = path.nodes[0];
1230 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231 if (key.objectid != child_root_id ||
1232 key.type != BTRFS_ROOT_BACKREF_KEY)
1237 if (key.offset == parent_root_id) {
1238 btrfs_release_path(&path);
1245 btrfs_release_path(&path);
1248 return has_parent ? 0 : 2;
1251 static int process_dir_item(struct extent_buffer *eb,
1252 int slot, struct btrfs_key *key,
1253 struct shared_node *active_node)
1263 struct btrfs_dir_item *di;
1264 struct inode_record *rec;
1265 struct cache_tree *root_cache;
1266 struct cache_tree *inode_cache;
1267 struct btrfs_key location;
1268 char namebuf[BTRFS_NAME_LEN];
1270 root_cache = &active_node->root_cache;
1271 inode_cache = &active_node->inode_cache;
1272 rec = active_node->current;
1273 rec->found_dir_item = 1;
1275 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276 total = btrfs_item_size_nr(eb, slot);
1277 while (cur < total) {
1279 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280 name_len = btrfs_dir_name_len(eb, di);
1281 data_len = btrfs_dir_data_len(eb, di);
1282 filetype = btrfs_dir_type(eb, di);
1284 rec->found_size += name_len;
1285 if (cur + sizeof(*di) + name_len > total ||
1286 name_len > BTRFS_NAME_LEN) {
1287 error = REF_ERR_NAME_TOO_LONG;
1289 if (cur + sizeof(*di) > total)
1291 len = min_t(u32, total - cur - sizeof(*di),
1298 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1300 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301 key->offset != btrfs_name_hash(namebuf, len)) {
1302 rec->errors |= I_ERR_ODD_DIR_ITEM;
1303 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304 key->objectid, key->offset, namebuf, len, filetype,
1305 key->offset, btrfs_name_hash(namebuf, len));
1308 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309 add_inode_backref(inode_cache, location.objectid,
1310 key->objectid, key->offset, namebuf,
1311 len, filetype, key->type, error);
1312 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313 add_inode_backref(root_cache, location.objectid,
1314 key->objectid, key->offset,
1315 namebuf, len, filetype,
1319 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320 location.type, key->objectid, key->offset);
1321 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322 key->objectid, key->offset, namebuf,
1323 len, filetype, key->type, error);
1326 len = sizeof(*di) + name_len + data_len;
1327 di = (struct btrfs_dir_item *)((char *)di + len);
1330 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331 rec->errors |= I_ERR_DUP_DIR_INDEX;
1336 static int process_inode_ref(struct extent_buffer *eb,
1337 int slot, struct btrfs_key *key,
1338 struct shared_node *active_node)
1346 struct cache_tree *inode_cache;
1347 struct btrfs_inode_ref *ref;
1348 char namebuf[BTRFS_NAME_LEN];
1350 inode_cache = &active_node->inode_cache;
1352 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353 total = btrfs_item_size_nr(eb, slot);
1354 while (cur < total) {
1355 name_len = btrfs_inode_ref_name_len(eb, ref);
1356 index = btrfs_inode_ref_index(eb, ref);
1358 /* inode_ref + namelen should not cross item boundary */
1359 if (cur + sizeof(*ref) + name_len > total ||
1360 name_len > BTRFS_NAME_LEN) {
1361 if (total < cur + sizeof(*ref))
1364 /* Still try to read out the remaining part */
1365 len = min_t(u32, total - cur - sizeof(*ref),
1367 error = REF_ERR_NAME_TOO_LONG;
1373 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374 add_inode_backref(inode_cache, key->objectid, key->offset,
1375 index, namebuf, len, 0, key->type, error);
1377 len = sizeof(*ref) + name_len;
1378 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1384 static int process_inode_extref(struct extent_buffer *eb,
1385 int slot, struct btrfs_key *key,
1386 struct shared_node *active_node)
1395 struct cache_tree *inode_cache;
1396 struct btrfs_inode_extref *extref;
1397 char namebuf[BTRFS_NAME_LEN];
1399 inode_cache = &active_node->inode_cache;
1401 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402 total = btrfs_item_size_nr(eb, slot);
1403 while (cur < total) {
1404 name_len = btrfs_inode_extref_name_len(eb, extref);
1405 index = btrfs_inode_extref_index(eb, extref);
1406 parent = btrfs_inode_extref_parent(eb, extref);
1407 if (name_len <= BTRFS_NAME_LEN) {
1411 len = BTRFS_NAME_LEN;
1412 error = REF_ERR_NAME_TOO_LONG;
1414 read_extent_buffer(eb, namebuf,
1415 (unsigned long)(extref + 1), len);
1416 add_inode_backref(inode_cache, key->objectid, parent,
1417 index, namebuf, len, 0, key->type, error);
1419 len = sizeof(*extref) + name_len;
1420 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1427 static int count_csum_range(struct btrfs_root *root, u64 start,
1428 u64 len, u64 *found)
1430 struct btrfs_key key;
1431 struct btrfs_path path;
1432 struct extent_buffer *leaf;
1437 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1439 btrfs_init_path(&path);
1441 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1443 key.type = BTRFS_EXTENT_CSUM_KEY;
1445 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1449 if (ret > 0 && path.slots[0] > 0) {
1450 leaf = path.nodes[0];
1451 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1452 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1453 key.type == BTRFS_EXTENT_CSUM_KEY)
1458 leaf = path.nodes[0];
1459 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1460 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1465 leaf = path.nodes[0];
1468 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1469 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1470 key.type != BTRFS_EXTENT_CSUM_KEY)
1473 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1474 if (key.offset >= start + len)
1477 if (key.offset > start)
1480 size = btrfs_item_size_nr(leaf, path.slots[0]);
1481 csum_end = key.offset + (size / csum_size) *
1482 root->fs_info->sectorsize;
1483 if (csum_end > start) {
1484 size = min(csum_end - start, len);
1493 btrfs_release_path(&path);
1499 static int process_file_extent(struct btrfs_root *root,
1500 struct extent_buffer *eb,
1501 int slot, struct btrfs_key *key,
1502 struct shared_node *active_node)
1504 struct inode_record *rec;
1505 struct btrfs_file_extent_item *fi;
1507 u64 disk_bytenr = 0;
1508 u64 extent_offset = 0;
1509 u64 mask = root->fs_info->sectorsize - 1;
1513 rec = active_node->current;
1514 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1515 rec->found_file_extent = 1;
1517 if (rec->extent_start == (u64)-1) {
1518 rec->extent_start = key->offset;
1519 rec->extent_end = key->offset;
1522 if (rec->extent_end > key->offset)
1523 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1524 else if (rec->extent_end < key->offset) {
1525 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1526 key->offset - rec->extent_end);
1531 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1532 extent_type = btrfs_file_extent_type(eb, fi);
1534 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1535 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1537 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1538 rec->found_size += num_bytes;
1539 num_bytes = (num_bytes + mask) & ~mask;
1540 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1541 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1542 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1543 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1544 extent_offset = btrfs_file_extent_offset(eb, fi);
1545 if (num_bytes == 0 || (num_bytes & mask))
1546 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1547 if (num_bytes + extent_offset >
1548 btrfs_file_extent_ram_bytes(eb, fi))
1549 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1550 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1551 (btrfs_file_extent_compression(eb, fi) ||
1552 btrfs_file_extent_encryption(eb, fi) ||
1553 btrfs_file_extent_other_encoding(eb, fi)))
1554 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1555 if (disk_bytenr > 0)
1556 rec->found_size += num_bytes;
1558 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1560 rec->extent_end = key->offset + num_bytes;
1563 * The data reloc tree will copy full extents into its inode and then
1564 * copy the corresponding csums. Because the extent it copied could be
1565 * a preallocated extent that hasn't been written to yet there may be no
1566 * csums to copy, ergo we won't have csums for our file extent. This is
1567 * ok so just don't bother checking csums if the inode belongs to the
1570 if (disk_bytenr > 0 &&
1571 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1573 if (btrfs_file_extent_compression(eb, fi))
1574 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1576 disk_bytenr += extent_offset;
1578 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1581 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1583 rec->found_csum_item = 1;
1584 if (found < num_bytes)
1585 rec->some_csum_missing = 1;
1586 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1588 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1594 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1595 struct walk_control *wc)
1597 struct btrfs_key key;
1601 struct cache_tree *inode_cache;
1602 struct shared_node *active_node;
1604 if (wc->root_level == wc->active_node &&
1605 btrfs_root_refs(&root->root_item) == 0)
1608 active_node = wc->nodes[wc->active_node];
1609 inode_cache = &active_node->inode_cache;
1610 nritems = btrfs_header_nritems(eb);
1611 for (i = 0; i < nritems; i++) {
1612 btrfs_item_key_to_cpu(eb, &key, i);
1614 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1616 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1619 if (active_node->current == NULL ||
1620 active_node->current->ino < key.objectid) {
1621 if (active_node->current) {
1622 active_node->current->checked = 1;
1623 maybe_free_inode_rec(inode_cache,
1624 active_node->current);
1626 active_node->current = get_inode_rec(inode_cache,
1628 BUG_ON(IS_ERR(active_node->current));
1631 case BTRFS_DIR_ITEM_KEY:
1632 case BTRFS_DIR_INDEX_KEY:
1633 ret = process_dir_item(eb, i, &key, active_node);
1635 case BTRFS_INODE_REF_KEY:
1636 ret = process_inode_ref(eb, i, &key, active_node);
1638 case BTRFS_INODE_EXTREF_KEY:
1639 ret = process_inode_extref(eb, i, &key, active_node);
1641 case BTRFS_INODE_ITEM_KEY:
1642 ret = process_inode_item(eb, i, &key, active_node);
1644 case BTRFS_EXTENT_DATA_KEY:
1645 ret = process_file_extent(root, eb, i, &key,
1655 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1656 struct extent_buffer *eb, struct node_refs *nrefs,
1657 u64 level, int check_all);
1658 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1659 unsigned int ext_ref);
1662 * Returns >0 Found error, not fatal, should continue
1663 * Returns <0 Fatal error, must exit the whole check
1664 * Returns 0 No errors found
1666 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1667 struct node_refs *nrefs, int *level, int ext_ref)
1669 struct extent_buffer *cur = path->nodes[0];
1670 struct btrfs_key key;
1674 int root_level = btrfs_header_level(root->node);
1676 int ret = 0; /* Final return value */
1677 int err = 0; /* Positive error bitmap */
1679 cur_bytenr = cur->start;
1681 /* skip to first inode item or the first inode number change */
1682 nritems = btrfs_header_nritems(cur);
1683 for (i = 0; i < nritems; i++) {
1684 btrfs_item_key_to_cpu(cur, &key, i);
1686 first_ino = key.objectid;
1687 if (key.type == BTRFS_INODE_ITEM_KEY ||
1688 (first_ino && first_ino != key.objectid))
1692 path->slots[0] = nritems;
1698 err |= check_inode_item(root, path, ext_ref);
1700 /* modify cur since check_inode_item may change path */
1701 cur = path->nodes[0];
1703 if (err & LAST_ITEM)
1706 /* still have inode items in thie leaf */
1707 if (cur->start == cur_bytenr)
1711 * we have switched to another leaf, above nodes may
1712 * have changed, here walk down the path, if a node
1713 * or leaf is shared, check whether we can skip this
1716 for (i = root_level; i >= 0; i--) {
1717 if (path->nodes[i]->start == nrefs->bytenr[i])
1720 ret = update_nodes_refs(root, path->nodes[i]->start,
1721 path->nodes[i], nrefs, i, 0);
1725 if (!nrefs->need_check[i]) {
1731 for (i = 0; i < *level; i++) {
1732 free_extent_buffer(path->nodes[i]);
1733 path->nodes[i] = NULL;
1742 static void reada_walk_down(struct btrfs_root *root,
1743 struct extent_buffer *node, int slot)
1745 struct btrfs_fs_info *fs_info = root->fs_info;
1752 level = btrfs_header_level(node);
1756 nritems = btrfs_header_nritems(node);
1757 for (i = slot; i < nritems; i++) {
1758 bytenr = btrfs_node_blockptr(node, i);
1759 ptr_gen = btrfs_node_ptr_generation(node, i);
1760 readahead_tree_block(fs_info, bytenr, ptr_gen);
1765 * Check the child node/leaf by the following condition:
1766 * 1. the first item key of the node/leaf should be the same with the one
1768 * 2. block in parent node should match the child node/leaf.
1769 * 3. generation of parent node and child's header should be consistent.
1771 * Or the child node/leaf pointed by the key in parent is not valid.
1773 * We hope to check leaf owner too, but since subvol may share leaves,
1774 * which makes leaf owner check not so strong, key check should be
1775 * sufficient enough for that case.
1777 static int check_child_node(struct extent_buffer *parent, int slot,
1778 struct extent_buffer *child)
1780 struct btrfs_key parent_key;
1781 struct btrfs_key child_key;
1784 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1785 if (btrfs_header_level(child) == 0)
1786 btrfs_item_key_to_cpu(child, &child_key, 0);
1788 btrfs_node_key_to_cpu(child, &child_key, 0);
1790 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1793 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1794 parent_key.objectid, parent_key.type, parent_key.offset,
1795 child_key.objectid, child_key.type, child_key.offset);
1797 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1799 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1800 btrfs_node_blockptr(parent, slot),
1801 btrfs_header_bytenr(child));
1803 if (btrfs_node_ptr_generation(parent, slot) !=
1804 btrfs_header_generation(child)) {
1806 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1807 btrfs_header_generation(child),
1808 btrfs_node_ptr_generation(parent, slot));
1814 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1815 * in every fs or file tree check. Here we find its all root ids, and only check
1816 * it in the fs or file tree which has the smallest root id.
1818 static int need_check(struct btrfs_root *root, struct ulist *roots)
1820 struct rb_node *node;
1821 struct ulist_node *u;
1824 * @roots can be empty if it belongs to tree reloc tree
1825 * In that case, we should always check the leaf, as we can't use
1826 * the tree owner to ensure some other root will check it.
1828 if (roots->nnodes == 1 || roots->nnodes == 0)
1831 node = rb_first(&roots->root);
1832 u = rb_entry(node, struct ulist_node, rb_node);
1834 * current root id is not smallest, we skip it and let it be checked
1835 * in the fs or file tree who hash the smallest root id.
1837 if (root->objectid != u->val)
1843 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1846 struct btrfs_root *extent_root = root->fs_info->extent_root;
1847 struct btrfs_root_item *ri = &root->root_item;
1848 struct btrfs_extent_inline_ref *iref;
1849 struct btrfs_extent_item *ei;
1850 struct btrfs_key key;
1851 struct btrfs_path *path = NULL;
1862 * Except file/reloc tree, we can not have FULL BACKREF MODE
1864 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1868 if (eb->start == btrfs_root_bytenr(ri))
1871 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1874 owner = btrfs_header_owner(eb);
1875 if (owner == root->objectid)
1878 path = btrfs_alloc_path();
1882 key.objectid = btrfs_header_bytenr(eb);
1884 key.offset = (u64)-1;
1886 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1893 ret = btrfs_previous_extent_item(extent_root, path,
1899 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1901 eb = path->nodes[0];
1902 slot = path->slots[0];
1903 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1905 flags = btrfs_extent_flags(eb, ei);
1906 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1909 ptr = (unsigned long)(ei + 1);
1910 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1912 if (key.type == BTRFS_EXTENT_ITEM_KEY)
1913 ptr += sizeof(struct btrfs_tree_block_info);
1916 /* Reached extent item ends normally */
1920 /* Beyond extent item end, wrong item size */
1922 error("extent item at bytenr %llu slot %d has wrong size",
1927 iref = (struct btrfs_extent_inline_ref *)ptr;
1928 offset = btrfs_extent_inline_ref_offset(eb, iref);
1929 type = btrfs_extent_inline_ref_type(eb, iref);
1931 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1933 ptr += btrfs_extent_inline_ref_size(type);
1937 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1941 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1943 btrfs_free_path(path);
1948 * for a tree node or leaf, we record its reference count, so later if we still
1949 * process this node or leaf, don't need to compute its reference count again.
1951 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1953 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1954 struct extent_buffer *eb, struct node_refs *nrefs,
1955 u64 level, int check_all)
1957 struct ulist *roots;
1960 int root_level = btrfs_header_level(root->node);
1964 if (nrefs->bytenr[level] == bytenr)
1967 if (bytenr != (u64)-1) {
1968 /* the return value of this function seems a mistake */
1969 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1970 level, 1, &refs, &flags);
1972 if (ret < 0 && !check_all)
1975 nrefs->bytenr[level] = bytenr;
1976 nrefs->refs[level] = refs;
1977 nrefs->full_backref[level] = 0;
1978 nrefs->checked[level] = 0;
1981 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1986 check = need_check(root, roots);
1988 nrefs->need_check[level] = check;
1991 nrefs->need_check[level] = 1;
1993 if (level == root_level) {
1994 nrefs->need_check[level] = 1;
1997 * The node refs may have not been
1998 * updated if upper needs checking (the
1999 * lowest root_objectid) the node can
2002 nrefs->need_check[level] =
2003 nrefs->need_check[level + 1];
2009 if (check_all && eb) {
2010 calc_extent_flag_v2(root, eb, &flags);
2011 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2012 nrefs->full_backref[level] = 1;
2019 * @level if @level == -1 means extent data item
2020 * else normal treeblocl.
2022 static int should_check_extent_strictly(struct btrfs_root *root,
2023 struct node_refs *nrefs, int level)
2025 int root_level = btrfs_header_level(root->node);
2027 if (level > root_level || level < -1)
2029 if (level == root_level)
2032 * if the upper node is marked full backref, it should contain shared
2033 * backref of the parent (except owner == root->objectid).
2035 while (++level <= root_level)
2036 if (nrefs->refs[level] > 1)
2042 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2043 struct walk_control *wc, int *level,
2044 struct node_refs *nrefs)
2046 enum btrfs_tree_block_status status;
2049 struct btrfs_fs_info *fs_info = root->fs_info;
2050 struct extent_buffer *next;
2051 struct extent_buffer *cur;
2055 WARN_ON(*level < 0);
2056 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2058 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2059 refs = nrefs->refs[*level];
2062 ret = btrfs_lookup_extent_info(NULL, root,
2063 path->nodes[*level]->start,
2064 *level, 1, &refs, NULL);
2069 nrefs->bytenr[*level] = path->nodes[*level]->start;
2070 nrefs->refs[*level] = refs;
2074 ret = enter_shared_node(root, path->nodes[*level]->start,
2082 while (*level >= 0) {
2083 WARN_ON(*level < 0);
2084 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2085 cur = path->nodes[*level];
2087 if (btrfs_header_level(cur) != *level)
2090 if (path->slots[*level] >= btrfs_header_nritems(cur))
2093 ret = process_one_leaf(root, cur, wc);
2098 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2099 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2101 if (bytenr == nrefs->bytenr[*level - 1]) {
2102 refs = nrefs->refs[*level - 1];
2104 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2105 *level - 1, 1, &refs, NULL);
2109 nrefs->bytenr[*level - 1] = bytenr;
2110 nrefs->refs[*level - 1] = refs;
2115 ret = enter_shared_node(root, bytenr, refs,
2118 path->slots[*level]++;
2123 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2124 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2125 free_extent_buffer(next);
2126 reada_walk_down(root, cur, path->slots[*level]);
2127 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2128 if (!extent_buffer_uptodate(next)) {
2129 struct btrfs_key node_key;
2131 btrfs_node_key_to_cpu(path->nodes[*level],
2133 path->slots[*level]);
2134 btrfs_add_corrupt_extent_record(root->fs_info,
2136 path->nodes[*level]->start,
2137 root->fs_info->nodesize,
2144 ret = check_child_node(cur, path->slots[*level], next);
2146 free_extent_buffer(next);
2151 if (btrfs_is_leaf(next))
2152 status = btrfs_check_leaf(root, NULL, next);
2154 status = btrfs_check_node(root, NULL, next);
2155 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2156 free_extent_buffer(next);
2161 *level = *level - 1;
2162 free_extent_buffer(path->nodes[*level]);
2163 path->nodes[*level] = next;
2164 path->slots[*level] = 0;
2167 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2171 static int fs_root_objectid(u64 objectid);
2174 * Update global fs information.
2176 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2180 struct extent_buffer *eb = path->nodes[level];
2182 total_btree_bytes += eb->len;
2183 if (fs_root_objectid(root->objectid))
2184 total_fs_tree_bytes += eb->len;
2185 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2186 total_extent_tree_bytes += eb->len;
2189 btree_space_waste += btrfs_leaf_free_space(root, eb);
2191 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2192 btrfs_header_nritems(eb));
2193 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2198 * This function only handles BACKREF_MISSING,
2199 * If corresponding extent item exists, increase the ref, else insert an extent
2202 * Returns error bits after repair.
2204 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2205 struct btrfs_root *root,
2206 struct extent_buffer *node,
2207 struct node_refs *nrefs, int level, int err)
2209 struct btrfs_fs_info *fs_info = root->fs_info;
2210 struct btrfs_root *extent_root = fs_info->extent_root;
2211 struct btrfs_path path;
2212 struct btrfs_extent_item *ei;
2213 struct btrfs_tree_block_info *bi;
2214 struct btrfs_key key;
2215 struct extent_buffer *eb;
2216 u32 size = sizeof(*ei);
2217 u32 node_size = root->fs_info->nodesize;
2218 int insert_extent = 0;
2219 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2220 int root_level = btrfs_header_level(root->node);
2225 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2228 if ((err & BACKREF_MISSING) == 0)
2231 WARN_ON(level > BTRFS_MAX_LEVEL);
2234 btrfs_init_path(&path);
2235 bytenr = btrfs_header_bytenr(node);
2236 owner = btrfs_header_owner(node);
2237 generation = btrfs_header_generation(node);
2239 key.objectid = bytenr;
2241 key.offset = (u64)-1;
2243 /* Search for the extent item */
2244 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2250 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2254 /* calculate if the extent item flag is full backref or not */
2255 if (nrefs->full_backref[level] != 0)
2256 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2258 /* insert an extent item */
2259 if (insert_extent) {
2260 struct btrfs_disk_key copy_key;
2262 generation = btrfs_header_generation(node);
2264 if (level < root_level && nrefs->full_backref[level + 1] &&
2265 owner != root->objectid) {
2266 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2269 key.objectid = bytenr;
2270 if (!skinny_metadata) {
2271 key.type = BTRFS_EXTENT_ITEM_KEY;
2272 key.offset = node_size;
2273 size += sizeof(*bi);
2275 key.type = BTRFS_METADATA_ITEM_KEY;
2279 btrfs_release_path(&path);
2280 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2286 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2288 btrfs_set_extent_refs(eb, ei, 0);
2289 btrfs_set_extent_generation(eb, ei, generation);
2290 btrfs_set_extent_flags(eb, ei, flags);
2292 if (!skinny_metadata) {
2293 bi = (struct btrfs_tree_block_info *)(ei + 1);
2294 memset_extent_buffer(eb, 0, (unsigned long)bi,
2296 btrfs_set_disk_key_objectid(©_key, root->objectid);
2297 btrfs_set_disk_key_type(©_key, 0);
2298 btrfs_set_disk_key_offset(©_key, 0);
2300 btrfs_set_tree_block_level(eb, bi, level);
2301 btrfs_set_tree_block_key(eb, bi, ©_key);
2303 btrfs_mark_buffer_dirty(eb);
2304 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2305 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2307 nrefs->refs[level] = 0;
2308 nrefs->full_backref[level] =
2309 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2310 btrfs_release_path(&path);
2313 if (level < root_level && nrefs->full_backref[level + 1] &&
2314 owner != root->objectid)
2315 parent = nrefs->bytenr[level + 1];
2317 /* increase the ref */
2318 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2319 parent, root->objectid, level, 0);
2321 nrefs->refs[level]++;
2323 btrfs_release_path(&path);
2326 "failed to repair tree block ref start %llu root %llu due to %s",
2327 bytenr, root->objectid, strerror(-ret));
2329 printf("Added one tree block ref start %llu %s %llu\n",
2330 bytenr, parent ? "parent" : "root",
2331 parent ? parent : root->objectid);
2332 err &= ~BACKREF_MISSING;
2338 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2339 unsigned int ext_ref);
2340 static int check_tree_block_ref(struct btrfs_root *root,
2341 struct extent_buffer *eb, u64 bytenr,
2342 int level, u64 owner, struct node_refs *nrefs);
2343 static int check_leaf_items(struct btrfs_trans_handle *trans,
2344 struct btrfs_root *root, struct btrfs_path *path,
2345 struct node_refs *nrefs, int account_bytes);
2348 * @trans just for lowmem repair mode
2349 * @check all if not 0 then check all tree block backrefs and items
2350 * 0 then just check relationship of items in fs tree(s)
2352 * Returns >0 Found error, should continue
2353 * Returns <0 Fatal error, must exit the whole check
2354 * Returns 0 No errors found
2356 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2357 struct btrfs_root *root, struct btrfs_path *path,
2358 int *level, struct node_refs *nrefs, int ext_ref,
2362 enum btrfs_tree_block_status status;
2365 struct btrfs_fs_info *fs_info = root->fs_info;
2366 struct extent_buffer *next;
2367 struct extent_buffer *cur;
2371 int account_file_data = 0;
2373 WARN_ON(*level < 0);
2374 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2376 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2377 path->nodes[*level], nrefs, *level, check_all);
2381 while (*level >= 0) {
2382 WARN_ON(*level < 0);
2383 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2384 cur = path->nodes[*level];
2385 bytenr = btrfs_header_bytenr(cur);
2386 check = nrefs->need_check[*level];
2388 if (btrfs_header_level(cur) != *level)
2391 * Update bytes accounting and check tree block ref
2392 * NOTE: Doing accounting and check before checking nritems
2393 * is necessary because of empty node/leaf.
2395 if ((check_all && !nrefs->checked[*level]) ||
2396 (!check_all && nrefs->need_check[*level])) {
2397 ret = check_tree_block_ref(root, cur,
2398 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2399 btrfs_header_owner(cur), nrefs);
2402 ret = repair_tree_block_ref(trans, root,
2403 path->nodes[*level], nrefs, *level, ret);
2406 if (check_all && nrefs->need_check[*level] &&
2407 nrefs->refs[*level]) {
2408 account_bytes(root, path, *level);
2409 account_file_data = 1;
2411 nrefs->checked[*level] = 1;
2414 if (path->slots[*level] >= btrfs_header_nritems(cur))
2417 /* Don't forgot to check leaf/node validation */
2419 /* skip duplicate check */
2420 if (check || !check_all) {
2421 ret = btrfs_check_leaf(root, NULL, cur);
2422 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2430 ret = process_one_leaf_v2(root, path, nrefs,
2433 ret = check_leaf_items(trans, root, path,
2434 nrefs, account_file_data);
2438 if (check || !check_all) {
2439 ret = btrfs_check_node(root, NULL, cur);
2440 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2447 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2448 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2450 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2455 * check all trees in check_chunks_and_extent_v2
2456 * check shared node once in check_fs_roots
2458 if (!check_all && !nrefs->need_check[*level - 1]) {
2459 path->slots[*level]++;
2463 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2464 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2465 free_extent_buffer(next);
2466 reada_walk_down(root, cur, path->slots[*level]);
2467 next = read_tree_block(fs_info, bytenr, ptr_gen);
2468 if (!extent_buffer_uptodate(next)) {
2469 struct btrfs_key node_key;
2471 btrfs_node_key_to_cpu(path->nodes[*level],
2473 path->slots[*level]);
2474 btrfs_add_corrupt_extent_record(fs_info,
2475 &node_key, path->nodes[*level]->start,
2476 fs_info->nodesize, *level);
2482 ret = check_child_node(cur, path->slots[*level], next);
2487 if (btrfs_is_leaf(next))
2488 status = btrfs_check_leaf(root, NULL, next);
2490 status = btrfs_check_node(root, NULL, next);
2491 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2492 free_extent_buffer(next);
2497 *level = *level - 1;
2498 free_extent_buffer(path->nodes[*level]);
2499 path->nodes[*level] = next;
2500 path->slots[*level] = 0;
2501 account_file_data = 0;
2503 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2508 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2509 struct walk_control *wc, int *level)
2512 struct extent_buffer *leaf;
2514 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2515 leaf = path->nodes[i];
2516 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2521 free_extent_buffer(path->nodes[*level]);
2522 path->nodes[*level] = NULL;
2523 BUG_ON(*level > wc->active_node);
2524 if (*level == wc->active_node)
2525 leave_shared_node(root, wc, *level);
2532 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2536 struct extent_buffer *leaf;
2538 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2539 leaf = path->nodes[i];
2540 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2545 free_extent_buffer(path->nodes[*level]);
2546 path->nodes[*level] = NULL;
2553 static int check_root_dir(struct inode_record *rec)
2555 struct inode_backref *backref;
2558 if (!rec->found_inode_item || rec->errors)
2560 if (rec->nlink != 1 || rec->found_link != 0)
2562 if (list_empty(&rec->backrefs))
2564 backref = to_inode_backref(rec->backrefs.next);
2565 if (!backref->found_inode_ref)
2567 if (backref->index != 0 || backref->namelen != 2 ||
2568 memcmp(backref->name, "..", 2))
2570 if (backref->found_dir_index || backref->found_dir_item)
2577 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2578 struct btrfs_root *root, struct btrfs_path *path,
2579 struct inode_record *rec)
2581 struct btrfs_inode_item *ei;
2582 struct btrfs_key key;
2585 key.objectid = rec->ino;
2586 key.type = BTRFS_INODE_ITEM_KEY;
2587 key.offset = (u64)-1;
2589 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2593 if (!path->slots[0]) {
2600 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2601 if (key.objectid != rec->ino) {
2606 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2607 struct btrfs_inode_item);
2608 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2609 btrfs_mark_buffer_dirty(path->nodes[0]);
2610 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2611 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2612 root->root_key.objectid);
2614 btrfs_release_path(path);
2618 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2619 struct btrfs_root *root,
2620 struct btrfs_path *path,
2621 struct inode_record *rec)
2625 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2626 btrfs_release_path(path);
2628 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2632 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2633 struct btrfs_root *root,
2634 struct btrfs_path *path,
2635 struct inode_record *rec)
2637 struct btrfs_inode_item *ei;
2638 struct btrfs_key key;
2641 key.objectid = rec->ino;
2642 key.type = BTRFS_INODE_ITEM_KEY;
2645 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2652 /* Since ret == 0, no need to check anything */
2653 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2654 struct btrfs_inode_item);
2655 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2656 btrfs_mark_buffer_dirty(path->nodes[0]);
2657 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2658 printf("reset nbytes for ino %llu root %llu\n",
2659 rec->ino, root->root_key.objectid);
2661 btrfs_release_path(path);
2665 static int add_missing_dir_index(struct btrfs_root *root,
2666 struct cache_tree *inode_cache,
2667 struct inode_record *rec,
2668 struct inode_backref *backref)
2670 struct btrfs_path path;
2671 struct btrfs_trans_handle *trans;
2672 struct btrfs_dir_item *dir_item;
2673 struct extent_buffer *leaf;
2674 struct btrfs_key key;
2675 struct btrfs_disk_key disk_key;
2676 struct inode_record *dir_rec;
2677 unsigned long name_ptr;
2678 u32 data_size = sizeof(*dir_item) + backref->namelen;
2681 trans = btrfs_start_transaction(root, 1);
2683 return PTR_ERR(trans);
2685 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2686 (unsigned long long)rec->ino);
2688 btrfs_init_path(&path);
2689 key.objectid = backref->dir;
2690 key.type = BTRFS_DIR_INDEX_KEY;
2691 key.offset = backref->index;
2692 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2695 leaf = path.nodes[0];
2696 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2698 disk_key.objectid = cpu_to_le64(rec->ino);
2699 disk_key.type = BTRFS_INODE_ITEM_KEY;
2700 disk_key.offset = 0;
2702 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2703 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2704 btrfs_set_dir_data_len(leaf, dir_item, 0);
2705 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2706 name_ptr = (unsigned long)(dir_item + 1);
2707 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2708 btrfs_mark_buffer_dirty(leaf);
2709 btrfs_release_path(&path);
2710 btrfs_commit_transaction(trans, root);
2712 backref->found_dir_index = 1;
2713 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2714 BUG_ON(IS_ERR(dir_rec));
2717 dir_rec->found_size += backref->namelen;
2718 if (dir_rec->found_size == dir_rec->isize &&
2719 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2720 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2721 if (dir_rec->found_size != dir_rec->isize)
2722 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2727 static int delete_dir_index(struct btrfs_root *root,
2728 struct inode_backref *backref)
2730 struct btrfs_trans_handle *trans;
2731 struct btrfs_dir_item *di;
2732 struct btrfs_path path;
2735 trans = btrfs_start_transaction(root, 1);
2737 return PTR_ERR(trans);
2739 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2740 (unsigned long long)backref->dir,
2741 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2742 (unsigned long long)root->objectid);
2744 btrfs_init_path(&path);
2745 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2746 backref->name, backref->namelen,
2747 backref->index, -1);
2750 btrfs_release_path(&path);
2751 btrfs_commit_transaction(trans, root);
2758 ret = btrfs_del_item(trans, root, &path);
2760 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2762 btrfs_release_path(&path);
2763 btrfs_commit_transaction(trans, root);
2767 static int __create_inode_item(struct btrfs_trans_handle *trans,
2768 struct btrfs_root *root, u64 ino, u64 size,
2769 u64 nbytes, u64 nlink, u32 mode)
2771 struct btrfs_inode_item ii;
2772 time_t now = time(NULL);
2775 btrfs_set_stack_inode_size(&ii, size);
2776 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2777 btrfs_set_stack_inode_nlink(&ii, nlink);
2778 btrfs_set_stack_inode_mode(&ii, mode);
2779 btrfs_set_stack_inode_generation(&ii, trans->transid);
2780 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2781 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2782 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2783 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2784 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2785 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2786 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2788 ret = btrfs_insert_inode(trans, root, ino, &ii);
2791 warning("root %llu inode %llu recreating inode item, this may "
2792 "be incomplete, please check permissions and content after "
2793 "the fsck completes.\n", (unsigned long long)root->objectid,
2794 (unsigned long long)ino);
2799 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2800 struct btrfs_root *root, u64 ino,
2803 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2805 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2808 static int create_inode_item(struct btrfs_root *root,
2809 struct inode_record *rec, int root_dir)
2811 struct btrfs_trans_handle *trans;
2817 trans = btrfs_start_transaction(root, 1);
2818 if (IS_ERR(trans)) {
2819 ret = PTR_ERR(trans);
2823 nlink = root_dir ? 1 : rec->found_link;
2824 if (rec->found_dir_item) {
2825 if (rec->found_file_extent)
2826 fprintf(stderr, "root %llu inode %llu has both a dir "
2827 "item and extents, unsure if it is a dir or a "
2828 "regular file so setting it as a directory\n",
2829 (unsigned long long)root->objectid,
2830 (unsigned long long)rec->ino);
2831 mode = S_IFDIR | 0755;
2832 size = rec->found_size;
2833 } else if (!rec->found_dir_item) {
2834 size = rec->extent_end;
2835 mode = S_IFREG | 0755;
2838 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2840 btrfs_commit_transaction(trans, root);
2844 static int repair_inode_backrefs(struct btrfs_root *root,
2845 struct inode_record *rec,
2846 struct cache_tree *inode_cache,
2849 struct inode_backref *tmp, *backref;
2850 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2854 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2855 if (!delete && rec->ino == root_dirid) {
2856 if (!rec->found_inode_item) {
2857 ret = create_inode_item(root, rec, 1);
2864 /* Index 0 for root dir's are special, don't mess with it */
2865 if (rec->ino == root_dirid && backref->index == 0)
2869 ((backref->found_dir_index && !backref->found_inode_ref) ||
2870 (backref->found_dir_index && backref->found_inode_ref &&
2871 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2872 ret = delete_dir_index(root, backref);
2876 list_del(&backref->list);
2881 if (!delete && !backref->found_dir_index &&
2882 backref->found_dir_item && backref->found_inode_ref) {
2883 ret = add_missing_dir_index(root, inode_cache, rec,
2888 if (backref->found_dir_item &&
2889 backref->found_dir_index) {
2890 if (!backref->errors &&
2891 backref->found_inode_ref) {
2892 list_del(&backref->list);
2899 if (!delete && (!backref->found_dir_index &&
2900 !backref->found_dir_item &&
2901 backref->found_inode_ref)) {
2902 struct btrfs_trans_handle *trans;
2903 struct btrfs_key location;
2905 ret = check_dir_conflict(root, backref->name,
2911 * let nlink fixing routine to handle it,
2912 * which can do it better.
2917 location.objectid = rec->ino;
2918 location.type = BTRFS_INODE_ITEM_KEY;
2919 location.offset = 0;
2921 trans = btrfs_start_transaction(root, 1);
2922 if (IS_ERR(trans)) {
2923 ret = PTR_ERR(trans);
2926 fprintf(stderr, "adding missing dir index/item pair "
2928 (unsigned long long)rec->ino);
2929 ret = btrfs_insert_dir_item(trans, root, backref->name,
2931 backref->dir, &location,
2932 imode_to_type(rec->imode),
2935 btrfs_commit_transaction(trans, root);
2939 if (!delete && (backref->found_inode_ref &&
2940 backref->found_dir_index &&
2941 backref->found_dir_item &&
2942 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2943 !rec->found_inode_item)) {
2944 ret = create_inode_item(root, rec, 0);
2951 return ret ? ret : repaired;
2955 * To determine the file type for nlink/inode_item repair
2957 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2958 * Return -ENOENT if file type is not found.
2960 static int find_file_type(struct inode_record *rec, u8 *type)
2962 struct inode_backref *backref;
2964 /* For inode item recovered case */
2965 if (rec->found_inode_item) {
2966 *type = imode_to_type(rec->imode);
2970 list_for_each_entry(backref, &rec->backrefs, list) {
2971 if (backref->found_dir_index || backref->found_dir_item) {
2972 *type = backref->filetype;
2980 * To determine the file name for nlink repair
2982 * Return 0 if file name is found, set name and namelen.
2983 * Return -ENOENT if file name is not found.
2985 static int find_file_name(struct inode_record *rec,
2986 char *name, int *namelen)
2988 struct inode_backref *backref;
2990 list_for_each_entry(backref, &rec->backrefs, list) {
2991 if (backref->found_dir_index || backref->found_dir_item ||
2992 backref->found_inode_ref) {
2993 memcpy(name, backref->name, backref->namelen);
2994 *namelen = backref->namelen;
3001 /* Reset the nlink of the inode to the correct one */
3002 static int reset_nlink(struct btrfs_trans_handle *trans,
3003 struct btrfs_root *root,
3004 struct btrfs_path *path,
3005 struct inode_record *rec)
3007 struct inode_backref *backref;
3008 struct inode_backref *tmp;
3009 struct btrfs_key key;
3010 struct btrfs_inode_item *inode_item;
3013 /* We don't believe this either, reset it and iterate backref */
3014 rec->found_link = 0;
3016 /* Remove all backref including the valid ones */
3017 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3018 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3019 backref->index, backref->name,
3020 backref->namelen, 0);
3024 /* remove invalid backref, so it won't be added back */
3025 if (!(backref->found_dir_index &&
3026 backref->found_dir_item &&
3027 backref->found_inode_ref)) {
3028 list_del(&backref->list);
3035 /* Set nlink to 0 */
3036 key.objectid = rec->ino;
3037 key.type = BTRFS_INODE_ITEM_KEY;
3039 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3046 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3047 struct btrfs_inode_item);
3048 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3049 btrfs_mark_buffer_dirty(path->nodes[0]);
3050 btrfs_release_path(path);
3053 * Add back valid inode_ref/dir_item/dir_index,
3054 * add_link() will handle the nlink inc, so new nlink must be correct
3056 list_for_each_entry(backref, &rec->backrefs, list) {
3057 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3058 backref->name, backref->namelen,
3059 backref->filetype, &backref->index, 1, 0);
3064 btrfs_release_path(path);
3068 static int get_highest_inode(struct btrfs_trans_handle *trans,
3069 struct btrfs_root *root,
3070 struct btrfs_path *path,
3073 struct btrfs_key key, found_key;
3076 btrfs_init_path(path);
3077 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3079 key.type = BTRFS_INODE_ITEM_KEY;
3080 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3082 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3083 path->slots[0] - 1);
3084 *highest_ino = found_key.objectid;
3087 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3089 btrfs_release_path(path);
3094 * Link inode to dir 'lost+found'. Increase @ref_count.
3096 * Returns 0 means success.
3097 * Returns <0 means failure.
3099 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3100 struct btrfs_root *root,
3101 struct btrfs_path *path,
3102 u64 ino, char *namebuf, u32 name_len,
3103 u8 filetype, u64 *ref_count)
3105 char *dir_name = "lost+found";
3110 btrfs_release_path(path);
3111 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3116 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3117 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3120 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3123 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3124 namebuf, name_len, filetype, NULL, 1, 0);
3126 * Add ".INO" suffix several times to handle case where
3127 * "FILENAME.INO" is already taken by another file.
3129 while (ret == -EEXIST) {
3131 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3133 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3137 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3139 name_len += count_digits(ino) + 1;
3140 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3141 name_len, filetype, NULL, 1, 0);
3144 error("failed to link the inode %llu to %s dir: %s",
3145 ino, dir_name, strerror(-ret));
3150 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3151 name_len, namebuf, dir_name);
3153 btrfs_release_path(path);
3155 error("failed to move file '%.*s' to '%s' dir", name_len,
3160 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3161 struct btrfs_root *root,
3162 struct btrfs_path *path,
3163 struct inode_record *rec)
3165 char namebuf[BTRFS_NAME_LEN] = {0};
3168 int name_recovered = 0;
3169 int type_recovered = 0;
3173 * Get file name and type first before these invalid inode ref
3174 * are deleted by remove_all_invalid_backref()
3176 name_recovered = !find_file_name(rec, namebuf, &namelen);
3177 type_recovered = !find_file_type(rec, &type);
3179 if (!name_recovered) {
3180 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3181 rec->ino, rec->ino);
3182 namelen = count_digits(rec->ino);
3183 sprintf(namebuf, "%llu", rec->ino);
3186 if (!type_recovered) {
3187 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3189 type = BTRFS_FT_REG_FILE;
3193 ret = reset_nlink(trans, root, path, rec);
3196 "Failed to reset nlink for inode %llu: %s\n",
3197 rec->ino, strerror(-ret));
3201 if (rec->found_link == 0) {
3202 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3203 namebuf, namelen, type,
3204 (u64 *)&rec->found_link);
3208 printf("Fixed the nlink of inode %llu\n", rec->ino);
3211 * Clear the flag anyway, or we will loop forever for the same inode
3212 * as it will not be removed from the bad inode list and the dead loop
3215 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3216 btrfs_release_path(path);
3221 * Check if there is any normal(reg or prealloc) file extent for given
3223 * This is used to determine the file type when neither its dir_index/item or
3224 * inode_item exists.
3226 * This will *NOT* report error, if any error happens, just consider it does
3227 * not have any normal file extent.
3229 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3231 struct btrfs_path path;
3232 struct btrfs_key key;
3233 struct btrfs_key found_key;
3234 struct btrfs_file_extent_item *fi;
3238 btrfs_init_path(&path);
3240 key.type = BTRFS_EXTENT_DATA_KEY;
3243 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3248 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3249 ret = btrfs_next_leaf(root, &path);
3256 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3258 if (found_key.objectid != ino ||
3259 found_key.type != BTRFS_EXTENT_DATA_KEY)
3261 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3262 struct btrfs_file_extent_item);
3263 type = btrfs_file_extent_type(path.nodes[0], fi);
3264 if (type != BTRFS_FILE_EXTENT_INLINE) {
3270 btrfs_release_path(&path);
3274 static u32 btrfs_type_to_imode(u8 type)
3276 static u32 imode_by_btrfs_type[] = {
3277 [BTRFS_FT_REG_FILE] = S_IFREG,
3278 [BTRFS_FT_DIR] = S_IFDIR,
3279 [BTRFS_FT_CHRDEV] = S_IFCHR,
3280 [BTRFS_FT_BLKDEV] = S_IFBLK,
3281 [BTRFS_FT_FIFO] = S_IFIFO,
3282 [BTRFS_FT_SOCK] = S_IFSOCK,
3283 [BTRFS_FT_SYMLINK] = S_IFLNK,
3286 return imode_by_btrfs_type[(type)];
3289 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3290 struct btrfs_root *root,
3291 struct btrfs_path *path,
3292 struct inode_record *rec)
3296 int type_recovered = 0;
3299 printf("Trying to rebuild inode:%llu\n", rec->ino);
3301 type_recovered = !find_file_type(rec, &filetype);
3304 * Try to determine inode type if type not found.
3306 * For found regular file extent, it must be FILE.
3307 * For found dir_item/index, it must be DIR.
3309 * For undetermined one, use FILE as fallback.
3312 * 1. If found backref(inode_index/item is already handled) to it,
3314 * Need new inode-inode ref structure to allow search for that.
3316 if (!type_recovered) {
3317 if (rec->found_file_extent &&
3318 find_normal_file_extent(root, rec->ino)) {
3320 filetype = BTRFS_FT_REG_FILE;
3321 } else if (rec->found_dir_item) {
3323 filetype = BTRFS_FT_DIR;
3324 } else if (!list_empty(&rec->orphan_extents)) {
3326 filetype = BTRFS_FT_REG_FILE;
3328 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3331 filetype = BTRFS_FT_REG_FILE;
3335 ret = btrfs_new_inode(trans, root, rec->ino,
3336 mode | btrfs_type_to_imode(filetype));
3341 * Here inode rebuild is done, we only rebuild the inode item,
3342 * don't repair the nlink(like move to lost+found).
3343 * That is the job of nlink repair.
3345 * We just fill the record and return
3347 rec->found_dir_item = 1;
3348 rec->imode = mode | btrfs_type_to_imode(filetype);
3350 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3351 /* Ensure the inode_nlinks repair function will be called */
3352 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3357 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3358 struct btrfs_root *root,
3359 struct btrfs_path *path,
3360 struct inode_record *rec)
3362 struct orphan_data_extent *orphan;
3363 struct orphan_data_extent *tmp;
3366 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3368 * Check for conflicting file extents
3370 * Here we don't know whether the extents is compressed or not,
3371 * so we can only assume it not compressed nor data offset,
3372 * and use its disk_len as extent length.
3374 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3375 orphan->offset, orphan->disk_len, 0);
3376 btrfs_release_path(path);
3381 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3382 orphan->disk_bytenr, orphan->disk_len);
3383 ret = btrfs_free_extent(trans,
3384 root->fs_info->extent_root,
3385 orphan->disk_bytenr, orphan->disk_len,
3386 0, root->objectid, orphan->objectid,
3391 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3392 orphan->offset, orphan->disk_bytenr,
3393 orphan->disk_len, orphan->disk_len);
3397 /* Update file size info */
3398 rec->found_size += orphan->disk_len;
3399 if (rec->found_size == rec->nbytes)
3400 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3402 /* Update the file extent hole info too */
3403 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3407 if (RB_EMPTY_ROOT(&rec->holes))
3408 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3410 list_del(&orphan->list);
3413 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3418 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3419 struct btrfs_root *root,
3420 struct btrfs_path *path,
3421 struct inode_record *rec)
3423 struct rb_node *node;
3424 struct file_extent_hole *hole;
3428 node = rb_first(&rec->holes);
3432 hole = rb_entry(node, struct file_extent_hole, node);
3433 ret = btrfs_punch_hole(trans, root, rec->ino,
3434 hole->start, hole->len);
3437 ret = del_file_extent_hole(&rec->holes, hole->start,
3441 if (RB_EMPTY_ROOT(&rec->holes))
3442 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3443 node = rb_first(&rec->holes);
3445 /* special case for a file losing all its file extent */
3447 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3448 round_up(rec->isize,
3449 root->fs_info->sectorsize));
3453 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3454 rec->ino, root->objectid);
3459 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3461 struct btrfs_trans_handle *trans;
3462 struct btrfs_path path;
3465 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3466 I_ERR_NO_ORPHAN_ITEM |
3467 I_ERR_LINK_COUNT_WRONG |
3468 I_ERR_NO_INODE_ITEM |
3469 I_ERR_FILE_EXTENT_ORPHAN |
3470 I_ERR_FILE_EXTENT_DISCOUNT|
3471 I_ERR_FILE_NBYTES_WRONG)))
3475 * For nlink repair, it may create a dir and add link, so
3476 * 2 for parent(256)'s dir_index and dir_item
3477 * 2 for lost+found dir's inode_item and inode_ref
3478 * 1 for the new inode_ref of the file
3479 * 2 for lost+found dir's dir_index and dir_item for the file
3481 trans = btrfs_start_transaction(root, 7);
3483 return PTR_ERR(trans);
3485 btrfs_init_path(&path);
3486 if (rec->errors & I_ERR_NO_INODE_ITEM)
3487 ret = repair_inode_no_item(trans, root, &path, rec);
3488 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3489 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3490 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3491 ret = repair_inode_discount_extent(trans, root, &path, rec);
3492 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3493 ret = repair_inode_isize(trans, root, &path, rec);
3494 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3495 ret = repair_inode_orphan_item(trans, root, &path, rec);
3496 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3497 ret = repair_inode_nlinks(trans, root, &path, rec);
3498 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3499 ret = repair_inode_nbytes(trans, root, &path, rec);
3500 btrfs_commit_transaction(trans, root);
3501 btrfs_release_path(&path);
3505 static int check_inode_recs(struct btrfs_root *root,
3506 struct cache_tree *inode_cache)
3508 struct cache_extent *cache;
3509 struct ptr_node *node;
3510 struct inode_record *rec;
3511 struct inode_backref *backref;
3516 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3518 if (btrfs_root_refs(&root->root_item) == 0) {
3519 if (!cache_tree_empty(inode_cache))
3520 fprintf(stderr, "warning line %d\n", __LINE__);
3525 * We need to repair backrefs first because we could change some of the
3526 * errors in the inode recs.
3528 * We also need to go through and delete invalid backrefs first and then
3529 * add the correct ones second. We do this because we may get EEXIST
3530 * when adding back the correct index because we hadn't yet deleted the
3533 * For example, if we were missing a dir index then the directories
3534 * isize would be wrong, so if we fixed the isize to what we thought it
3535 * would be and then fixed the backref we'd still have a invalid fs, so
3536 * we need to add back the dir index and then check to see if the isize
3541 if (stage == 3 && !err)
3544 cache = search_cache_extent(inode_cache, 0);
3545 while (repair && cache) {
3546 node = container_of(cache, struct ptr_node, cache);
3548 cache = next_cache_extent(cache);
3550 /* Need to free everything up and rescan */
3552 remove_cache_extent(inode_cache, &node->cache);
3554 free_inode_rec(rec);
3558 if (list_empty(&rec->backrefs))
3561 ret = repair_inode_backrefs(root, rec, inode_cache,
3575 rec = get_inode_rec(inode_cache, root_dirid, 0);
3576 BUG_ON(IS_ERR(rec));
3578 ret = check_root_dir(rec);
3580 fprintf(stderr, "root %llu root dir %llu error\n",
3581 (unsigned long long)root->root_key.objectid,
3582 (unsigned long long)root_dirid);
3583 print_inode_error(root, rec);
3588 struct btrfs_trans_handle *trans;
3590 trans = btrfs_start_transaction(root, 1);
3591 if (IS_ERR(trans)) {
3592 err = PTR_ERR(trans);
3597 "root %llu missing its root dir, recreating\n",
3598 (unsigned long long)root->objectid);
3600 ret = btrfs_make_root_dir(trans, root, root_dirid);
3603 btrfs_commit_transaction(trans, root);
3607 fprintf(stderr, "root %llu root dir %llu not found\n",
3608 (unsigned long long)root->root_key.objectid,
3609 (unsigned long long)root_dirid);
3613 cache = search_cache_extent(inode_cache, 0);
3616 node = container_of(cache, struct ptr_node, cache);
3618 remove_cache_extent(inode_cache, &node->cache);
3620 if (rec->ino == root_dirid ||
3621 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3622 free_inode_rec(rec);
3626 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3627 ret = check_orphan_item(root, rec->ino);
3629 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3630 if (can_free_inode_rec(rec)) {
3631 free_inode_rec(rec);
3636 if (!rec->found_inode_item)
3637 rec->errors |= I_ERR_NO_INODE_ITEM;
3638 if (rec->found_link != rec->nlink)
3639 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3641 ret = try_repair_inode(root, rec);
3642 if (ret == 0 && can_free_inode_rec(rec)) {
3643 free_inode_rec(rec);
3649 if (!(repair && ret == 0))
3651 print_inode_error(root, rec);
3652 list_for_each_entry(backref, &rec->backrefs, list) {
3653 if (!backref->found_dir_item)
3654 backref->errors |= REF_ERR_NO_DIR_ITEM;
3655 if (!backref->found_dir_index)
3656 backref->errors |= REF_ERR_NO_DIR_INDEX;
3657 if (!backref->found_inode_ref)
3658 backref->errors |= REF_ERR_NO_INODE_REF;
3659 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3660 " namelen %u name %s filetype %d errors %x",
3661 (unsigned long long)backref->dir,
3662 (unsigned long long)backref->index,
3663 backref->namelen, backref->name,
3664 backref->filetype, backref->errors);
3665 print_ref_error(backref->errors);
3667 free_inode_rec(rec);
3669 return (error > 0) ? -1 : 0;
3672 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3675 struct cache_extent *cache;
3676 struct root_record *rec = NULL;
3679 cache = lookup_cache_extent(root_cache, objectid, 1);
3681 rec = container_of(cache, struct root_record, cache);
3683 rec = calloc(1, sizeof(*rec));
3685 return ERR_PTR(-ENOMEM);
3686 rec->objectid = objectid;
3687 INIT_LIST_HEAD(&rec->backrefs);
3688 rec->cache.start = objectid;
3689 rec->cache.size = 1;
3691 ret = insert_cache_extent(root_cache, &rec->cache);
3693 return ERR_PTR(-EEXIST);
3698 static struct root_backref *get_root_backref(struct root_record *rec,
3699 u64 ref_root, u64 dir, u64 index,
3700 const char *name, int namelen)
3702 struct root_backref *backref;
3704 list_for_each_entry(backref, &rec->backrefs, list) {
3705 if (backref->ref_root != ref_root || backref->dir != dir ||
3706 backref->namelen != namelen)
3708 if (memcmp(name, backref->name, namelen))
3713 backref = calloc(1, sizeof(*backref) + namelen + 1);
3716 backref->ref_root = ref_root;
3718 backref->index = index;
3719 backref->namelen = namelen;
3720 memcpy(backref->name, name, namelen);
3721 backref->name[namelen] = '\0';
3722 list_add_tail(&backref->list, &rec->backrefs);
3726 static void free_root_record(struct cache_extent *cache)
3728 struct root_record *rec;
3729 struct root_backref *backref;
3731 rec = container_of(cache, struct root_record, cache);
3732 while (!list_empty(&rec->backrefs)) {
3733 backref = to_root_backref(rec->backrefs.next);
3734 list_del(&backref->list);
3741 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3743 static int add_root_backref(struct cache_tree *root_cache,
3744 u64 root_id, u64 ref_root, u64 dir, u64 index,
3745 const char *name, int namelen,
3746 int item_type, int errors)
3748 struct root_record *rec;
3749 struct root_backref *backref;
3751 rec = get_root_rec(root_cache, root_id);
3752 BUG_ON(IS_ERR(rec));
3753 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3756 backref->errors |= errors;
3758 if (item_type != BTRFS_DIR_ITEM_KEY) {
3759 if (backref->found_dir_index || backref->found_back_ref ||
3760 backref->found_forward_ref) {
3761 if (backref->index != index)
3762 backref->errors |= REF_ERR_INDEX_UNMATCH;
3764 backref->index = index;
3768 if (item_type == BTRFS_DIR_ITEM_KEY) {
3769 if (backref->found_forward_ref)
3771 backref->found_dir_item = 1;
3772 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3773 backref->found_dir_index = 1;
3774 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3775 if (backref->found_forward_ref)
3776 backref->errors |= REF_ERR_DUP_ROOT_REF;
3777 else if (backref->found_dir_item)
3779 backref->found_forward_ref = 1;
3780 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3781 if (backref->found_back_ref)
3782 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3783 backref->found_back_ref = 1;
3788 if (backref->found_forward_ref && backref->found_dir_item)
3789 backref->reachable = 1;
3793 static int merge_root_recs(struct btrfs_root *root,
3794 struct cache_tree *src_cache,
3795 struct cache_tree *dst_cache)
3797 struct cache_extent *cache;
3798 struct ptr_node *node;
3799 struct inode_record *rec;
3800 struct inode_backref *backref;
3803 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3804 free_inode_recs_tree(src_cache);
3809 cache = search_cache_extent(src_cache, 0);
3812 node = container_of(cache, struct ptr_node, cache);
3814 remove_cache_extent(src_cache, &node->cache);
3817 ret = is_child_root(root, root->objectid, rec->ino);
3823 list_for_each_entry(backref, &rec->backrefs, list) {
3824 BUG_ON(backref->found_inode_ref);
3825 if (backref->found_dir_item)
3826 add_root_backref(dst_cache, rec->ino,
3827 root->root_key.objectid, backref->dir,
3828 backref->index, backref->name,
3829 backref->namelen, BTRFS_DIR_ITEM_KEY,
3831 if (backref->found_dir_index)
3832 add_root_backref(dst_cache, rec->ino,
3833 root->root_key.objectid, backref->dir,
3834 backref->index, backref->name,
3835 backref->namelen, BTRFS_DIR_INDEX_KEY,
3839 free_inode_rec(rec);
3846 static int check_root_refs(struct btrfs_root *root,
3847 struct cache_tree *root_cache)
3849 struct root_record *rec;
3850 struct root_record *ref_root;
3851 struct root_backref *backref;
3852 struct cache_extent *cache;
3858 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3859 BUG_ON(IS_ERR(rec));
3862 /* fixme: this can not detect circular references */
3865 cache = search_cache_extent(root_cache, 0);
3869 rec = container_of(cache, struct root_record, cache);
3870 cache = next_cache_extent(cache);
3872 if (rec->found_ref == 0)
3875 list_for_each_entry(backref, &rec->backrefs, list) {
3876 if (!backref->reachable)
3879 ref_root = get_root_rec(root_cache,
3881 BUG_ON(IS_ERR(ref_root));
3882 if (ref_root->found_ref > 0)
3885 backref->reachable = 0;
3887 if (rec->found_ref == 0)
3893 cache = search_cache_extent(root_cache, 0);
3897 rec = container_of(cache, struct root_record, cache);
3898 cache = next_cache_extent(cache);
3900 if (rec->found_ref == 0 &&
3901 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3902 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3903 ret = check_orphan_item(root->fs_info->tree_root,
3909 * If we don't have a root item then we likely just have
3910 * a dir item in a snapshot for this root but no actual
3911 * ref key or anything so it's meaningless.
3913 if (!rec->found_root_item)
3916 fprintf(stderr, "fs tree %llu not referenced\n",
3917 (unsigned long long)rec->objectid);
3921 if (rec->found_ref > 0 && !rec->found_root_item)
3923 list_for_each_entry(backref, &rec->backrefs, list) {
3924 if (!backref->found_dir_item)
3925 backref->errors |= REF_ERR_NO_DIR_ITEM;
3926 if (!backref->found_dir_index)
3927 backref->errors |= REF_ERR_NO_DIR_INDEX;
3928 if (!backref->found_back_ref)
3929 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3930 if (!backref->found_forward_ref)
3931 backref->errors |= REF_ERR_NO_ROOT_REF;
3932 if (backref->reachable && backref->errors)
3939 fprintf(stderr, "fs tree %llu refs %u %s\n",
3940 (unsigned long long)rec->objectid, rec->found_ref,
3941 rec->found_root_item ? "" : "not found");
3943 list_for_each_entry(backref, &rec->backrefs, list) {
3944 if (!backref->reachable)
3946 if (!backref->errors && rec->found_root_item)
3948 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3949 " index %llu namelen %u name %s errors %x\n",
3950 (unsigned long long)backref->ref_root,
3951 (unsigned long long)backref->dir,
3952 (unsigned long long)backref->index,
3953 backref->namelen, backref->name,
3955 print_ref_error(backref->errors);
3958 return errors > 0 ? 1 : 0;
3961 static int process_root_ref(struct extent_buffer *eb, int slot,
3962 struct btrfs_key *key,
3963 struct cache_tree *root_cache)
3969 struct btrfs_root_ref *ref;
3970 char namebuf[BTRFS_NAME_LEN];
3973 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3975 dirid = btrfs_root_ref_dirid(eb, ref);
3976 index = btrfs_root_ref_sequence(eb, ref);
3977 name_len = btrfs_root_ref_name_len(eb, ref);
3979 if (name_len <= BTRFS_NAME_LEN) {
3983 len = BTRFS_NAME_LEN;
3984 error = REF_ERR_NAME_TOO_LONG;
3986 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3988 if (key->type == BTRFS_ROOT_REF_KEY) {
3989 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3990 index, namebuf, len, key->type, error);
3992 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3993 index, namebuf, len, key->type, error);
3998 static void free_corrupt_block(struct cache_extent *cache)
4000 struct btrfs_corrupt_block *corrupt;
4002 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4006 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4009 * Repair the btree of the given root.
4011 * The fix is to remove the node key in corrupt_blocks cache_tree.
4012 * and rebalance the tree.
4013 * After the fix, the btree should be writeable.
4015 static int repair_btree(struct btrfs_root *root,
4016 struct cache_tree *corrupt_blocks)
4018 struct btrfs_trans_handle *trans;
4019 struct btrfs_path path;
4020 struct btrfs_corrupt_block *corrupt;
4021 struct cache_extent *cache;
4022 struct btrfs_key key;
4027 if (cache_tree_empty(corrupt_blocks))
4030 trans = btrfs_start_transaction(root, 1);
4031 if (IS_ERR(trans)) {
4032 ret = PTR_ERR(trans);
4033 fprintf(stderr, "Error starting transaction: %s\n",
4037 btrfs_init_path(&path);
4038 cache = first_cache_extent(corrupt_blocks);
4040 corrupt = container_of(cache, struct btrfs_corrupt_block,
4042 level = corrupt->level;
4043 path.lowest_level = level;
4044 key.objectid = corrupt->key.objectid;
4045 key.type = corrupt->key.type;
4046 key.offset = corrupt->key.offset;
4049 * Here we don't want to do any tree balance, since it may
4050 * cause a balance with corrupted brother leaf/node,
4051 * so ins_len set to 0 here.
4052 * Balance will be done after all corrupt node/leaf is deleted.
4054 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4057 offset = btrfs_node_blockptr(path.nodes[level],
4060 /* Remove the ptr */
4061 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4065 * Remove the corresponding extent
4066 * return value is not concerned.
4068 btrfs_release_path(&path);
4069 ret = btrfs_free_extent(trans, root, offset,
4070 root->fs_info->nodesize, 0,
4071 root->root_key.objectid, level - 1, 0);
4072 cache = next_cache_extent(cache);
4075 /* Balance the btree using btrfs_search_slot() */
4076 cache = first_cache_extent(corrupt_blocks);
4078 corrupt = container_of(cache, struct btrfs_corrupt_block,
4080 memcpy(&key, &corrupt->key, sizeof(key));
4081 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4084 /* return will always >0 since it won't find the item */
4086 btrfs_release_path(&path);
4087 cache = next_cache_extent(cache);
4090 btrfs_commit_transaction(trans, root);
4091 btrfs_release_path(&path);
4095 static int check_fs_root(struct btrfs_root *root,
4096 struct cache_tree *root_cache,
4097 struct walk_control *wc)
4103 struct btrfs_path path;
4104 struct shared_node root_node;
4105 struct root_record *rec;
4106 struct btrfs_root_item *root_item = &root->root_item;
4107 struct cache_tree corrupt_blocks;
4108 struct orphan_data_extent *orphan;
4109 struct orphan_data_extent *tmp;
4110 enum btrfs_tree_block_status status;
4111 struct node_refs nrefs;
4114 * Reuse the corrupt_block cache tree to record corrupted tree block
4116 * Unlike the usage in extent tree check, here we do it in a per
4117 * fs/subvol tree base.
4119 cache_tree_init(&corrupt_blocks);
4120 root->fs_info->corrupt_blocks = &corrupt_blocks;
4122 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4123 rec = get_root_rec(root_cache, root->root_key.objectid);
4124 BUG_ON(IS_ERR(rec));
4125 if (btrfs_root_refs(root_item) > 0)
4126 rec->found_root_item = 1;
4129 btrfs_init_path(&path);
4130 memset(&root_node, 0, sizeof(root_node));
4131 cache_tree_init(&root_node.root_cache);
4132 cache_tree_init(&root_node.inode_cache);
4133 memset(&nrefs, 0, sizeof(nrefs));
4135 /* Move the orphan extent record to corresponding inode_record */
4136 list_for_each_entry_safe(orphan, tmp,
4137 &root->orphan_data_extents, list) {
4138 struct inode_record *inode;
4140 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4142 BUG_ON(IS_ERR(inode));
4143 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4144 list_move(&orphan->list, &inode->orphan_extents);
4147 level = btrfs_header_level(root->node);
4148 memset(wc->nodes, 0, sizeof(wc->nodes));
4149 wc->nodes[level] = &root_node;
4150 wc->active_node = level;
4151 wc->root_level = level;
4153 /* We may not have checked the root block, lets do that now */
4154 if (btrfs_is_leaf(root->node))
4155 status = btrfs_check_leaf(root, NULL, root->node);
4157 status = btrfs_check_node(root, NULL, root->node);
4158 if (status != BTRFS_TREE_BLOCK_CLEAN)
4161 if (btrfs_root_refs(root_item) > 0 ||
4162 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4163 path.nodes[level] = root->node;
4164 extent_buffer_get(root->node);
4165 path.slots[level] = 0;
4167 struct btrfs_key key;
4168 struct btrfs_disk_key found_key;
4170 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4171 level = root_item->drop_level;
4172 path.lowest_level = level;
4173 if (level > btrfs_header_level(root->node) ||
4174 level >= BTRFS_MAX_LEVEL) {
4175 error("ignoring invalid drop level: %u", level);
4178 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4181 btrfs_node_key(path.nodes[level], &found_key,
4183 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4184 sizeof(found_key)));
4188 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4194 wret = walk_up_tree(root, &path, wc, &level);
4201 btrfs_release_path(&path);
4203 if (!cache_tree_empty(&corrupt_blocks)) {
4204 struct cache_extent *cache;
4205 struct btrfs_corrupt_block *corrupt;
4207 printf("The following tree block(s) is corrupted in tree %llu:\n",
4208 root->root_key.objectid);
4209 cache = first_cache_extent(&corrupt_blocks);
4211 corrupt = container_of(cache,
4212 struct btrfs_corrupt_block,
4214 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4215 cache->start, corrupt->level,
4216 corrupt->key.objectid, corrupt->key.type,
4217 corrupt->key.offset);
4218 cache = next_cache_extent(cache);
4221 printf("Try to repair the btree for root %llu\n",
4222 root->root_key.objectid);
4223 ret = repair_btree(root, &corrupt_blocks);
4225 fprintf(stderr, "Failed to repair btree: %s\n",
4228 printf("Btree for root %llu is fixed\n",
4229 root->root_key.objectid);
4233 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4237 if (root_node.current) {
4238 root_node.current->checked = 1;
4239 maybe_free_inode_rec(&root_node.inode_cache,
4243 err = check_inode_recs(root, &root_node.inode_cache);
4247 free_corrupt_blocks_tree(&corrupt_blocks);
4248 root->fs_info->corrupt_blocks = NULL;
4249 free_orphan_data_extents(&root->orphan_data_extents);
4253 static int fs_root_objectid(u64 objectid)
4255 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4256 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4258 return is_fstree(objectid);
4261 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4262 struct cache_tree *root_cache)
4264 struct btrfs_path path;
4265 struct btrfs_key key;
4266 struct walk_control wc;
4267 struct extent_buffer *leaf, *tree_node;
4268 struct btrfs_root *tmp_root;
4269 struct btrfs_root *tree_root = fs_info->tree_root;
4273 if (ctx.progress_enabled) {
4274 ctx.tp = TASK_FS_ROOTS;
4275 task_start(ctx.info);
4279 * Just in case we made any changes to the extent tree that weren't
4280 * reflected into the free space cache yet.
4283 reset_cached_block_groups(fs_info);
4284 memset(&wc, 0, sizeof(wc));
4285 cache_tree_init(&wc.shared);
4286 btrfs_init_path(&path);
4291 key.type = BTRFS_ROOT_ITEM_KEY;
4292 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4297 tree_node = tree_root->node;
4299 if (tree_node != tree_root->node) {
4300 free_root_recs_tree(root_cache);
4301 btrfs_release_path(&path);
4304 leaf = path.nodes[0];
4305 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4306 ret = btrfs_next_leaf(tree_root, &path);
4312 leaf = path.nodes[0];
4314 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4315 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4316 fs_root_objectid(key.objectid)) {
4317 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4318 tmp_root = btrfs_read_fs_root_no_cache(
4321 key.offset = (u64)-1;
4322 tmp_root = btrfs_read_fs_root(
4325 if (IS_ERR(tmp_root)) {
4329 ret = check_fs_root(tmp_root, root_cache, &wc);
4330 if (ret == -EAGAIN) {
4331 free_root_recs_tree(root_cache);
4332 btrfs_release_path(&path);
4337 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4338 btrfs_free_fs_root(tmp_root);
4339 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4340 key.type == BTRFS_ROOT_BACKREF_KEY) {
4341 process_root_ref(leaf, path.slots[0], &key,
4348 btrfs_release_path(&path);
4350 free_extent_cache_tree(&wc.shared);
4351 if (!cache_tree_empty(&wc.shared))
4352 fprintf(stderr, "warning line %d\n", __LINE__);
4354 task_stop(ctx.info);
4360 * Find the @index according by @ino and name.
4361 * Notice:time efficiency is O(N)
4363 * @root: the root of the fs/file tree
4364 * @index_ret: the index as return value
4365 * @namebuf: the name to match
4366 * @name_len: the length of name to match
4367 * @file_type: the file_type of INODE_ITEM to match
4369 * Returns 0 if found and *@index_ret will be modified with right value
4370 * Returns< 0 not found and *@index_ret will be (u64)-1
4372 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4373 u64 *index_ret, char *namebuf, u32 name_len,
4376 struct btrfs_path path;
4377 struct extent_buffer *node;
4378 struct btrfs_dir_item *di;
4379 struct btrfs_key key;
4380 struct btrfs_key location;
4381 char name[BTRFS_NAME_LEN] = {0};
4393 /* search from the last index */
4394 key.objectid = dirid;
4395 key.offset = (u64)-1;
4396 key.type = BTRFS_DIR_INDEX_KEY;
4398 btrfs_init_path(&path);
4399 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4404 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4407 *index_ret = (64)-1;
4410 /* Check whether inode_id/filetype/name match */
4411 node = path.nodes[0];
4412 slot = path.slots[0];
4413 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4414 total = btrfs_item_size_nr(node, slot);
4415 while (cur < total) {
4417 len = btrfs_dir_name_len(node, di);
4418 data_len = btrfs_dir_data_len(node, di);
4420 btrfs_dir_item_key_to_cpu(node, di, &location);
4421 if (location.objectid != location_id ||
4422 location.type != BTRFS_INODE_ITEM_KEY ||
4423 location.offset != 0)
4426 filetype = btrfs_dir_type(node, di);
4427 if (file_type != filetype)
4430 if (len > BTRFS_NAME_LEN)
4431 len = BTRFS_NAME_LEN;
4433 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4434 if (len != name_len || strncmp(namebuf, name, len))
4437 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4438 *index_ret = key.offset;
4442 len += sizeof(*di) + data_len;
4443 di = (struct btrfs_dir_item *)((char *)di + len);
4449 btrfs_release_path(&path);
4454 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4455 * INODE_REF/INODE_EXTREF match.
4457 * @root: the root of the fs/file tree
4458 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4459 * value while find index
4460 * @location_key: location key of the struct btrfs_dir_item to match
4461 * @name: the name to match
4462 * @namelen: the length of name
4463 * @file_type: the type of file to math
4465 * Return 0 if no error occurred.
4466 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4467 * DIR_ITEM/DIR_INDEX
4468 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4469 * and DIR_ITEM/DIR_INDEX mismatch
4471 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4472 struct btrfs_key *location_key, char *name,
4473 u32 namelen, u8 file_type)
4475 struct btrfs_path path;
4476 struct extent_buffer *node;
4477 struct btrfs_dir_item *di;
4478 struct btrfs_key location;
4479 char namebuf[BTRFS_NAME_LEN] = {0};
4488 /* get the index by traversing all index */
4489 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4490 ret = find_dir_index(root, key->objectid,
4491 location_key->objectid, &key->offset,
4492 name, namelen, file_type);
4494 ret = DIR_INDEX_MISSING;
4498 btrfs_init_path(&path);
4499 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4501 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4506 /* Check whether inode_id/filetype/name match */
4507 node = path.nodes[0];
4508 slot = path.slots[0];
4509 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4510 total = btrfs_item_size_nr(node, slot);
4511 while (cur < total) {
4512 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4513 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4515 len = btrfs_dir_name_len(node, di);
4516 data_len = btrfs_dir_data_len(node, di);
4518 btrfs_dir_item_key_to_cpu(node, di, &location);
4519 if (location.objectid != location_key->objectid ||
4520 location.type != location_key->type ||
4521 location.offset != location_key->offset)
4524 filetype = btrfs_dir_type(node, di);
4525 if (file_type != filetype)
4528 if (len > BTRFS_NAME_LEN) {
4529 len = BTRFS_NAME_LEN;
4530 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4532 key->type == BTRFS_DIR_ITEM_KEY ?
4533 "DIR_ITEM" : "DIR_INDEX",
4534 key->objectid, key->offset, len);
4536 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4538 if (len != namelen || strncmp(namebuf, name, len))
4544 len += sizeof(*di) + data_len;
4545 di = (struct btrfs_dir_item *)((char *)di + len);
4550 btrfs_release_path(&path);
4555 * Prints inode ref error message
4557 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4558 u64 index, const char *namebuf, int name_len,
4559 u8 filetype, int err)
4564 /* root dir error */
4565 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4567 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4568 root->objectid, key->objectid, key->offset, namebuf);
4573 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4574 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4575 root->objectid, key->offset,
4576 btrfs_name_hash(namebuf, name_len),
4577 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4579 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4580 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4581 root->objectid, key->offset, index,
4582 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4587 * Insert the missing inode item.
4589 * Returns 0 means success.
4590 * Returns <0 means error.
4592 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4595 struct btrfs_key key;
4596 struct btrfs_trans_handle *trans;
4597 struct btrfs_path path;
4601 key.type = BTRFS_INODE_ITEM_KEY;
4604 btrfs_init_path(&path);
4605 trans = btrfs_start_transaction(root, 1);
4606 if (IS_ERR(trans)) {
4611 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4612 if (ret < 0 || !ret)
4615 /* insert inode item */
4616 create_inode_item_lowmem(trans, root, ino, filetype);
4619 btrfs_commit_transaction(trans, root);
4622 error("failed to repair root %llu INODE ITEM[%llu] missing",
4623 root->objectid, ino);
4624 btrfs_release_path(&path);
4629 * The ternary means dir item, dir index and relative inode ref.
4630 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4631 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4633 * If two of three is missing or mismatched, delete the existing one.
4634 * If one of three is missing or mismatched, add the missing one.
4636 * returns 0 means success.
4637 * returns not 0 means on error;
4639 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4640 u64 index, char *name, int name_len, u8 filetype,
4643 struct btrfs_trans_handle *trans;
4648 * stage shall be one of following valild values:
4649 * 0: Fine, nothing to do.
4650 * 1: One of three is wrong, so add missing one.
4651 * 2: Two of three is wrong, so delete existed one.
4653 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4655 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4657 if (err & (INODE_REF_MISSING))
4660 /* stage must be smllarer than 3 */
4663 trans = btrfs_start_transaction(root, 1);
4665 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4670 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4671 filetype, &index, 1, 1);
4675 btrfs_commit_transaction(trans, root);
4678 error("fail to repair inode %llu name %s filetype %u",
4679 ino, name, filetype);
4681 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4682 stage == 2 ? "Delete" : "Add",
4683 ino, name, filetype);
4689 * Traverse the given INODE_REF and call find_dir_item() to find related
4690 * DIR_ITEM/DIR_INDEX.
4692 * @root: the root of the fs/file tree
4693 * @ref_key: the key of the INODE_REF
4694 * @path the path provides node and slot
4695 * @refs: the count of INODE_REF
4696 * @mode: the st_mode of INODE_ITEM
4697 * @name_ret: returns with the first ref's name
4698 * @name_len_ret: len of the name_ret
4700 * Return 0 if no error occurred.
4702 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4703 struct btrfs_path *path, char *name_ret,
4704 u32 *namelen_ret, u64 *refs_ret, int mode)
4706 struct btrfs_key key;
4707 struct btrfs_key location;
4708 struct btrfs_inode_ref *ref;
4709 struct extent_buffer *node;
4710 char namebuf[BTRFS_NAME_LEN] = {0};
4720 int need_research = 0;
4728 /* since after repair, path and the dir item may be changed */
4729 if (need_research) {
4731 btrfs_release_path(path);
4732 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4733 /* the item was deleted, let path point to the last checked item */
4735 if (path->slots[0] == 0)
4736 btrfs_prev_leaf(root, path);
4744 location.objectid = ref_key->objectid;
4745 location.type = BTRFS_INODE_ITEM_KEY;
4746 location.offset = 0;
4747 node = path->nodes[0];
4748 slot = path->slots[0];
4750 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4751 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4752 total = btrfs_item_size_nr(node, slot);
4755 /* Update inode ref count */
4758 index = btrfs_inode_ref_index(node, ref);
4759 name_len = btrfs_inode_ref_name_len(node, ref);
4761 if (name_len <= BTRFS_NAME_LEN) {
4764 len = BTRFS_NAME_LEN;
4765 warning("root %llu INODE_REF[%llu %llu] name too long",
4766 root->objectid, ref_key->objectid, ref_key->offset);
4769 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4771 /* copy the first name found to name_ret */
4772 if (refs == 1 && name_ret) {
4773 memcpy(name_ret, namebuf, len);
4777 /* Check root dir ref */
4778 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4779 if (index != 0 || len != strlen("..") ||
4780 strncmp("..", namebuf, len) ||
4781 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4782 /* set err bits then repair will delete the ref */
4783 err |= DIR_INDEX_MISSING;
4784 err |= DIR_ITEM_MISSING;
4789 /* Find related DIR_INDEX */
4790 key.objectid = ref_key->offset;
4791 key.type = BTRFS_DIR_INDEX_KEY;
4793 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4794 imode_to_type(mode));
4796 /* Find related dir_item */
4797 key.objectid = ref_key->offset;
4798 key.type = BTRFS_DIR_ITEM_KEY;
4799 key.offset = btrfs_name_hash(namebuf, len);
4800 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4801 imode_to_type(mode));
4803 if (tmp_err && repair) {
4804 ret = repair_ternary_lowmem(root, ref_key->offset,
4805 ref_key->objectid, index, namebuf,
4806 name_len, imode_to_type(mode),
4813 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4814 imode_to_type(mode), tmp_err);
4816 len = sizeof(*ref) + name_len;
4817 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4828 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4829 * DIR_ITEM/DIR_INDEX.
4831 * @root: the root of the fs/file tree
4832 * @ref_key: the key of the INODE_EXTREF
4833 * @refs: the count of INODE_EXTREF
4834 * @mode: the st_mode of INODE_ITEM
4836 * Return 0 if no error occurred.
4838 static int check_inode_extref(struct btrfs_root *root,
4839 struct btrfs_key *ref_key,
4840 struct extent_buffer *node, int slot, u64 *refs,
4843 struct btrfs_key key;
4844 struct btrfs_key location;
4845 struct btrfs_inode_extref *extref;
4846 char namebuf[BTRFS_NAME_LEN] = {0};
4856 location.objectid = ref_key->objectid;
4857 location.type = BTRFS_INODE_ITEM_KEY;
4858 location.offset = 0;
4860 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4861 total = btrfs_item_size_nr(node, slot);
4864 /* update inode ref count */
4866 name_len = btrfs_inode_extref_name_len(node, extref);
4867 index = btrfs_inode_extref_index(node, extref);
4868 parent = btrfs_inode_extref_parent(node, extref);
4869 if (name_len <= BTRFS_NAME_LEN) {
4872 len = BTRFS_NAME_LEN;
4873 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4874 root->objectid, ref_key->objectid, ref_key->offset);
4876 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4878 /* Check root dir ref name */
4879 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4880 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4881 root->objectid, ref_key->objectid, ref_key->offset,
4883 err |= ROOT_DIR_ERROR;
4886 /* find related dir_index */
4887 key.objectid = parent;
4888 key.type = BTRFS_DIR_INDEX_KEY;
4890 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4893 /* find related dir_item */
4894 key.objectid = parent;
4895 key.type = BTRFS_DIR_ITEM_KEY;
4896 key.offset = btrfs_name_hash(namebuf, len);
4897 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4900 len = sizeof(*extref) + name_len;
4901 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4911 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4912 * DIR_ITEM/DIR_INDEX match.
4913 * Return with @index_ret.
4915 * @root: the root of the fs/file tree
4916 * @key: the key of the INODE_REF/INODE_EXTREF
4917 * @name: the name in the INODE_REF/INODE_EXTREF
4918 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4919 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4920 * value (64)-1 means do not check index
4921 * @ext_ref: the EXTENDED_IREF feature
4923 * Return 0 if no error occurred.
4924 * Return >0 for error bitmap
4926 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4927 char *name, int namelen, u64 *index_ret,
4928 unsigned int ext_ref)
4930 struct btrfs_path path;
4931 struct btrfs_inode_ref *ref;
4932 struct btrfs_inode_extref *extref;
4933 struct extent_buffer *node;
4934 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4947 btrfs_init_path(&path);
4948 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4950 ret = INODE_REF_MISSING;
4954 node = path.nodes[0];
4955 slot = path.slots[0];
4957 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4958 total = btrfs_item_size_nr(node, slot);
4960 /* Iterate all entry of INODE_REF */
4961 while (cur < total) {
4962 ret = INODE_REF_MISSING;
4964 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4965 ref_index = btrfs_inode_ref_index(node, ref);
4966 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4969 if (cur + sizeof(*ref) + ref_namelen > total ||
4970 ref_namelen > BTRFS_NAME_LEN) {
4971 warning("root %llu INODE %s[%llu %llu] name too long",
4973 key->type == BTRFS_INODE_REF_KEY ?
4975 key->objectid, key->offset);
4977 if (cur + sizeof(*ref) > total)
4979 len = min_t(u32, total - cur - sizeof(*ref),
4985 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4988 if (len != namelen || strncmp(ref_namebuf, name, len))
4991 *index_ret = ref_index;
4995 len = sizeof(*ref) + ref_namelen;
4996 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5001 /* Skip if not support EXTENDED_IREF feature */
5005 btrfs_release_path(&path);
5006 btrfs_init_path(&path);
5008 dir_id = key->offset;
5009 key->type = BTRFS_INODE_EXTREF_KEY;
5010 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5012 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5014 ret = INODE_REF_MISSING;
5018 node = path.nodes[0];
5019 slot = path.slots[0];
5021 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5023 total = btrfs_item_size_nr(node, slot);
5025 /* Iterate all entry of INODE_EXTREF */
5026 while (cur < total) {
5027 ret = INODE_REF_MISSING;
5029 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5030 ref_index = btrfs_inode_extref_index(node, extref);
5031 parent = btrfs_inode_extref_parent(node, extref);
5032 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5035 if (parent != dir_id)
5038 if (ref_namelen <= BTRFS_NAME_LEN) {
5041 len = BTRFS_NAME_LEN;
5042 warning("root %llu INODE %s[%llu %llu] name too long",
5044 key->type == BTRFS_INODE_REF_KEY ?
5046 key->objectid, key->offset);
5048 read_extent_buffer(node, ref_namebuf,
5049 (unsigned long)(extref + 1), len);
5051 if (len != namelen || strncmp(ref_namebuf, name, len))
5054 *index_ret = ref_index;
5059 len = sizeof(*extref) + ref_namelen;
5060 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5065 btrfs_release_path(&path);
5069 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5070 u64 ino, u64 index, const char *namebuf,
5071 int name_len, u8 filetype, int err)
5073 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5074 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5075 root->objectid, key->objectid, key->offset, namebuf,
5077 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5080 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5081 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5082 root->objectid, key->objectid, index, namebuf, filetype,
5083 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5086 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5088 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5089 root->objectid, ino, index, namebuf, filetype,
5090 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5093 if (err & INODE_REF_MISSING)
5095 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5096 root->objectid, ino, key->objectid, namebuf, filetype);
5101 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5103 * Returns error after repair
5105 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5106 u64 index, u8 filetype, char *namebuf, u32 name_len,
5111 if (err & INODE_ITEM_MISSING) {
5112 ret = repair_inode_item_missing(root, ino, filetype);
5114 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5117 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5118 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5119 name_len, filetype, err);
5121 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5122 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5123 err &= ~(INODE_REF_MISSING);
5129 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5132 struct btrfs_key key;
5133 struct btrfs_path path;
5135 struct btrfs_dir_item *di;
5145 key.offset = (u64)-1;
5147 btrfs_init_path(&path);
5148 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5153 /* if found, go to spacial case */
5158 ret = btrfs_previous_item(root, &path, ino, type);
5166 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5168 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5170 while (cur < total) {
5171 len = btrfs_dir_name_len(path.nodes[0], di);
5172 if (len > BTRFS_NAME_LEN)
5173 len = BTRFS_NAME_LEN;
5176 len += btrfs_dir_data_len(path.nodes[0], di);
5178 di = (struct btrfs_dir_item *)((char *)di + len);
5184 btrfs_release_path(&path);
5188 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5195 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5199 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5203 *size = item_size + index_size;
5207 error("failed to count root %llu INODE[%llu] root size",
5208 root->objectid, ino);
5213 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5214 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5216 * @root: the root of the fs/file tree
5217 * @key: the key of the INODE_REF/INODE_EXTREF
5219 * @size: the st_size of the INODE_ITEM
5220 * @ext_ref: the EXTENDED_IREF feature
5222 * Return 0 if no error occurred.
5223 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5225 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5226 struct btrfs_path *path, u64 *size,
5227 unsigned int ext_ref)
5229 struct btrfs_dir_item *di;
5230 struct btrfs_inode_item *ii;
5231 struct btrfs_key key;
5232 struct btrfs_key location;
5233 struct extent_buffer *node;
5235 char namebuf[BTRFS_NAME_LEN] = {0};
5247 int need_research = 0;
5250 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5251 * ignore index check.
5253 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5254 index = di_key->offset;
5261 /* since after repair, path and the dir item may be changed */
5262 if (need_research) {
5264 err |= DIR_COUNT_AGAIN;
5265 btrfs_release_path(path);
5266 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5267 /* the item was deleted, let path point the last checked item */
5269 if (path->slots[0] == 0)
5270 btrfs_prev_leaf(root, path);
5278 node = path->nodes[0];
5279 slot = path->slots[0];
5281 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5282 total = btrfs_item_size_nr(node, slot);
5283 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5285 while (cur < total) {
5286 data_len = btrfs_dir_data_len(node, di);
5289 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5291 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5292 di_key->objectid, di_key->offset, data_len);
5294 name_len = btrfs_dir_name_len(node, di);
5295 if (name_len <= BTRFS_NAME_LEN) {
5298 len = BTRFS_NAME_LEN;
5299 warning("root %llu %s[%llu %llu] name too long",
5301 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5302 di_key->objectid, di_key->offset);
5304 (*size) += name_len;
5305 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5307 filetype = btrfs_dir_type(node, di);
5309 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5310 di_key->offset != btrfs_name_hash(namebuf, len)) {
5312 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5313 root->objectid, di_key->objectid, di_key->offset,
5314 namebuf, len, filetype, di_key->offset,
5315 btrfs_name_hash(namebuf, len));
5318 btrfs_dir_item_key_to_cpu(node, di, &location);
5319 /* Ignore related ROOT_ITEM check */
5320 if (location.type == BTRFS_ROOT_ITEM_KEY)
5323 btrfs_release_path(path);
5324 /* Check relative INODE_ITEM(existence/filetype) */
5325 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5327 tmp_err |= INODE_ITEM_MISSING;
5331 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5332 struct btrfs_inode_item);
5333 mode = btrfs_inode_mode(path->nodes[0], ii);
5334 if (imode_to_type(mode) != filetype) {
5335 tmp_err |= INODE_ITEM_MISMATCH;
5339 /* Check relative INODE_REF/INODE_EXTREF */
5340 key.objectid = location.objectid;
5341 key.type = BTRFS_INODE_REF_KEY;
5342 key.offset = di_key->objectid;
5343 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5346 /* check relative INDEX/ITEM */
5347 key.objectid = di_key->objectid;
5348 if (key.type == BTRFS_DIR_ITEM_KEY) {
5349 key.type = BTRFS_DIR_INDEX_KEY;
5352 key.type = BTRFS_DIR_ITEM_KEY;
5353 key.offset = btrfs_name_hash(namebuf, name_len);
5356 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5357 name_len, filetype);
5358 /* find_dir_item may find index */
5359 if (key.type == BTRFS_DIR_INDEX_KEY)
5363 if (tmp_err && repair) {
5364 ret = repair_dir_item(root, di_key->objectid,
5365 location.objectid, index,
5366 imode_to_type(mode), namebuf,
5368 if (ret != tmp_err) {
5373 btrfs_release_path(path);
5374 print_dir_item_err(root, di_key, location.objectid, index,
5375 namebuf, name_len, filetype, tmp_err);
5377 len = sizeof(*di) + name_len + data_len;
5378 di = (struct btrfs_dir_item *)((char *)di + len);
5381 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5382 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5383 root->objectid, di_key->objectid,
5390 btrfs_release_path(path);
5391 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5393 err |= ret > 0 ? -ENOENT : ret;
5398 * Wrapper function of btrfs_punch_hole.
5400 * Returns 0 means success.
5401 * Returns not 0 means error.
5403 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5406 struct btrfs_trans_handle *trans;
5409 trans = btrfs_start_transaction(root, 1);
5411 return PTR_ERR(trans);
5413 ret = btrfs_punch_hole(trans, root, ino, start, len);
5415 error("failed to add hole [%llu, %llu] in inode [%llu]",
5418 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5421 btrfs_commit_transaction(trans, root);
5426 * Check file extent datasum/hole, update the size of the file extents,
5427 * check and update the last offset of the file extent.
5429 * @root: the root of fs/file tree.
5430 * @fkey: the key of the file extent.
5431 * @nodatasum: INODE_NODATASUM feature.
5432 * @size: the sum of all EXTENT_DATA items size for this inode.
5433 * @end: the offset of the last extent.
5435 * Return 0 if no error occurred.
5437 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5438 struct extent_buffer *node, int slot,
5439 unsigned int nodatasum, u64 *size, u64 *end)
5441 struct btrfs_file_extent_item *fi;
5444 u64 extent_num_bytes;
5446 u64 csum_found; /* In byte size, sectorsize aligned */
5447 u64 search_start; /* Logical range start we search for csum */
5448 u64 search_len; /* Logical range len we search for csum */
5449 unsigned int extent_type;
5450 unsigned int is_hole;
5455 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5457 /* Check inline extent */
5458 extent_type = btrfs_file_extent_type(node, fi);
5459 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5460 struct btrfs_item *e = btrfs_item_nr(slot);
5461 u32 item_inline_len;
5463 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5464 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5465 compressed = btrfs_file_extent_compression(node, fi);
5466 if (extent_num_bytes == 0) {
5468 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5469 root->objectid, fkey->objectid, fkey->offset);
5470 err |= FILE_EXTENT_ERROR;
5472 if (!compressed && extent_num_bytes != item_inline_len) {
5474 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5475 root->objectid, fkey->objectid, fkey->offset,
5476 extent_num_bytes, item_inline_len);
5477 err |= FILE_EXTENT_ERROR;
5479 *end += extent_num_bytes;
5480 *size += extent_num_bytes;
5484 /* Check extent type */
5485 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5486 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5487 err |= FILE_EXTENT_ERROR;
5488 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5489 root->objectid, fkey->objectid, fkey->offset);
5493 /* Check REG_EXTENT/PREALLOC_EXTENT */
5494 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5495 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5496 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5497 extent_offset = btrfs_file_extent_offset(node, fi);
5498 compressed = btrfs_file_extent_compression(node, fi);
5499 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5502 * Check EXTENT_DATA csum
5504 * For plain (uncompressed) extent, we should only check the range
5505 * we're referring to, as it's possible that part of prealloc extent
5506 * has been written, and has csum:
5508 * |<--- Original large preallocated extent A ---->|
5509 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5512 * For compressed extent, we should check the whole range.
5515 search_start = disk_bytenr + extent_offset;
5516 search_len = extent_num_bytes;
5518 search_start = disk_bytenr;
5519 search_len = disk_num_bytes;
5521 ret = count_csum_range(root, search_start, search_len, &csum_found);
5522 if (csum_found > 0 && nodatasum) {
5523 err |= ODD_CSUM_ITEM;
5524 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5525 root->objectid, fkey->objectid, fkey->offset);
5526 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5527 !is_hole && (ret < 0 || csum_found < search_len)) {
5528 err |= CSUM_ITEM_MISSING;
5529 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5530 root->objectid, fkey->objectid, fkey->offset,
5531 csum_found, search_len);
5532 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5533 err |= ODD_CSUM_ITEM;
5534 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5535 root->objectid, fkey->objectid, fkey->offset, csum_found);
5538 /* Check EXTENT_DATA hole */
5539 if (!no_holes && *end != fkey->offset) {
5541 ret = punch_extent_hole(root, fkey->objectid,
5542 *end, fkey->offset - *end);
5543 if (!repair || ret) {
5544 err |= FILE_EXTENT_ERROR;
5546 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5547 root->objectid, fkey->objectid, fkey->offset,
5548 fkey->objectid, *end);
5552 *end += extent_num_bytes;
5554 *size += extent_num_bytes;
5560 * Set inode item nbytes to @nbytes
5562 * Returns 0 on success
5563 * Returns != 0 on error
5565 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5566 struct btrfs_path *path,
5567 u64 ino, u64 nbytes)
5569 struct btrfs_trans_handle *trans;
5570 struct btrfs_inode_item *ii;
5571 struct btrfs_key key;
5572 struct btrfs_key research_key;
5576 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5579 key.type = BTRFS_INODE_ITEM_KEY;
5582 trans = btrfs_start_transaction(root, 1);
5583 if (IS_ERR(trans)) {
5584 ret = PTR_ERR(trans);
5589 btrfs_release_path(path);
5590 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5598 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5599 struct btrfs_inode_item);
5600 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5601 btrfs_mark_buffer_dirty(path->nodes[0]);
5603 btrfs_commit_transaction(trans, root);
5606 error("failed to set nbytes in inode %llu root %llu",
5607 ino, root->root_key.objectid);
5609 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5610 root->root_key.objectid, nbytes);
5613 btrfs_release_path(path);
5614 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5621 * Set directory inode isize to @isize.
5623 * Returns 0 on success.
5624 * Returns != 0 on error.
5626 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5627 struct btrfs_path *path,
5630 struct btrfs_trans_handle *trans;
5631 struct btrfs_inode_item *ii;
5632 struct btrfs_key key;
5633 struct btrfs_key research_key;
5637 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5640 key.type = BTRFS_INODE_ITEM_KEY;
5643 trans = btrfs_start_transaction(root, 1);
5644 if (IS_ERR(trans)) {
5645 ret = PTR_ERR(trans);
5650 btrfs_release_path(path);
5651 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5659 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5660 struct btrfs_inode_item);
5661 btrfs_set_inode_size(path->nodes[0], ii, isize);
5662 btrfs_mark_buffer_dirty(path->nodes[0]);
5664 btrfs_commit_transaction(trans, root);
5667 error("failed to set isize in inode %llu root %llu",
5668 ino, root->root_key.objectid);
5670 printf("Set isize in inode %llu root %llu to %llu\n",
5671 ino, root->root_key.objectid, isize);
5673 btrfs_release_path(path);
5674 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5681 * Wrapper function for btrfs_add_orphan_item().
5683 * Returns 0 on success.
5684 * Returns != 0 on error.
5686 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5687 struct btrfs_path *path, u64 ino)
5689 struct btrfs_trans_handle *trans;
5690 struct btrfs_key research_key;
5694 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5696 trans = btrfs_start_transaction(root, 1);
5697 if (IS_ERR(trans)) {
5698 ret = PTR_ERR(trans);
5703 btrfs_release_path(path);
5704 ret = btrfs_add_orphan_item(trans, root, path, ino);
5706 btrfs_commit_transaction(trans, root);
5709 error("failed to add inode %llu as orphan item root %llu",
5710 ino, root->root_key.objectid);
5712 printf("Added inode %llu as orphan item root %llu\n",
5713 ino, root->root_key.objectid);
5715 btrfs_release_path(path);
5716 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5722 /* Set inode_item nlink to @ref_count.
5723 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5725 * Returns 0 on success
5727 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5728 struct btrfs_path *path, u64 ino,
5729 const char *name, u32 namelen,
5730 u64 ref_count, u8 filetype, u64 *nlink)
5732 struct btrfs_trans_handle *trans;
5733 struct btrfs_inode_item *ii;
5734 struct btrfs_key key;
5735 struct btrfs_key old_key;
5736 char namebuf[BTRFS_NAME_LEN] = {0};
5742 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5744 if (name && namelen) {
5745 ASSERT(namelen <= BTRFS_NAME_LEN);
5746 memcpy(namebuf, name, namelen);
5749 sprintf(namebuf, "%llu", ino);
5750 name_len = count_digits(ino);
5751 printf("Can't find file name for inode %llu, use %s instead\n",
5755 trans = btrfs_start_transaction(root, 1);
5756 if (IS_ERR(trans)) {
5757 ret = PTR_ERR(trans);
5761 btrfs_release_path(path);
5762 /* if refs is 0, put it into lostfound */
5763 if (ref_count == 0) {
5764 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5765 name_len, filetype, &ref_count);
5770 /* reset inode_item's nlink to ref_count */
5772 key.type = BTRFS_INODE_ITEM_KEY;
5775 btrfs_release_path(path);
5776 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5782 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5783 struct btrfs_inode_item);
5784 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5785 btrfs_mark_buffer_dirty(path->nodes[0]);
5790 btrfs_commit_transaction(trans, root);
5794 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5795 root->objectid, ino, namebuf, filetype);
5797 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5798 root->objectid, ino, namebuf, filetype);
5801 btrfs_release_path(path);
5802 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5809 * Check INODE_ITEM and related ITEMs (the same inode number)
5810 * 1. check link count
5811 * 2. check inode ref/extref
5812 * 3. check dir item/index
5814 * @ext_ref: the EXTENDED_IREF feature
5816 * Return 0 if no error occurred.
5817 * Return >0 for error or hit the traversal is done(by error bitmap)
5819 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5820 unsigned int ext_ref)
5822 struct extent_buffer *node;
5823 struct btrfs_inode_item *ii;
5824 struct btrfs_key key;
5825 struct btrfs_key last_key;
5834 u64 extent_size = 0;
5836 unsigned int nodatasum;
5840 char namebuf[BTRFS_NAME_LEN] = {0};
5843 node = path->nodes[0];
5844 slot = path->slots[0];
5846 btrfs_item_key_to_cpu(node, &key, slot);
5847 inode_id = key.objectid;
5849 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5850 ret = btrfs_next_item(root, path);
5856 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5857 isize = btrfs_inode_size(node, ii);
5858 nbytes = btrfs_inode_nbytes(node, ii);
5859 mode = btrfs_inode_mode(node, ii);
5860 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5861 nlink = btrfs_inode_nlink(node, ii);
5862 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5865 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5866 ret = btrfs_next_item(root, path);
5868 /* out will fill 'err' rusing current statistics */
5870 } else if (ret > 0) {
5875 node = path->nodes[0];
5876 slot = path->slots[0];
5877 btrfs_item_key_to_cpu(node, &key, slot);
5878 if (key.objectid != inode_id)
5882 case BTRFS_INODE_REF_KEY:
5883 ret = check_inode_ref(root, &key, path, namebuf,
5884 &name_len, &refs, mode);
5887 case BTRFS_INODE_EXTREF_KEY:
5888 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5889 warning("root %llu EXTREF[%llu %llu] isn't supported",
5890 root->objectid, key.objectid,
5892 ret = check_inode_extref(root, &key, node, slot, &refs,
5896 case BTRFS_DIR_ITEM_KEY:
5897 case BTRFS_DIR_INDEX_KEY:
5899 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5900 root->objectid, inode_id,
5901 imode_to_type(mode), key.objectid,
5904 ret = check_dir_item(root, &key, path, &size, ext_ref);
5907 case BTRFS_EXTENT_DATA_KEY:
5909 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5910 root->objectid, inode_id, key.objectid,
5913 ret = check_file_extent(root, &key, node, slot,
5914 nodatasum, &extent_size,
5918 case BTRFS_XATTR_ITEM_KEY:
5921 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5922 key.objectid, key.type, key.offset);
5927 if (err & LAST_ITEM) {
5928 btrfs_release_path(path);
5929 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5934 /* verify INODE_ITEM nlink/isize/nbytes */
5936 if (repair && (err & DIR_COUNT_AGAIN)) {
5937 err &= ~DIR_COUNT_AGAIN;
5938 count_dir_isize(root, inode_id, &size);
5941 if ((nlink != 1 || refs != 1) && repair) {
5942 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5943 namebuf, name_len, refs, imode_to_type(mode),
5948 err |= LINK_COUNT_ERROR;
5949 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5950 root->objectid, inode_id, nlink);
5954 * Just a warning, as dir inode nbytes is just an
5955 * instructive value.
5957 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5958 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5959 root->objectid, inode_id,
5960 root->fs_info->nodesize);
5963 if (isize != size) {
5965 ret = repair_dir_isize_lowmem(root, path,
5967 if (!repair || ret) {
5970 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5971 root->objectid, inode_id, isize, size);
5975 if (nlink != refs) {
5977 ret = repair_inode_nlinks_lowmem(root, path,
5978 inode_id, namebuf, name_len, refs,
5979 imode_to_type(mode), &nlink);
5980 if (!repair || ret) {
5981 err |= LINK_COUNT_ERROR;
5983 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5984 root->objectid, inode_id, nlink, refs);
5986 } else if (!nlink) {
5988 ret = repair_inode_orphan_item_lowmem(root,
5990 if (!repair || ret) {
5992 error("root %llu INODE[%llu] is orphan item",
5993 root->objectid, inode_id);
5997 if (!nbytes && !no_holes && extent_end < isize) {
5999 ret = punch_extent_hole(root, inode_id,
6000 extent_end, isize - extent_end);
6001 if (!repair || ret) {
6002 err |= NBYTES_ERROR;
6004 "root %llu INODE[%llu] size %llu should have a file extent hole",
6005 root->objectid, inode_id, isize);
6009 if (nbytes != extent_size) {
6011 ret = repair_inode_nbytes_lowmem(root, path,
6012 inode_id, extent_size);
6013 if (!repair || ret) {
6014 err |= NBYTES_ERROR;
6016 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6017 root->objectid, inode_id, nbytes,
6023 if (err & LAST_ITEM)
6024 btrfs_next_item(root, path);
6029 * Insert the missing inode item and inode ref.
6031 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6032 * Root dir should be handled specially because root dir is the root of fs.
6034 * returns err (>0 or 0) after repair
6036 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6038 struct btrfs_trans_handle *trans;
6039 struct btrfs_key key;
6040 struct btrfs_path path;
6041 int filetype = BTRFS_FT_DIR;
6044 btrfs_init_path(&path);
6046 if (err & INODE_REF_MISSING) {
6047 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6048 key.type = BTRFS_INODE_REF_KEY;
6049 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6051 trans = btrfs_start_transaction(root, 1);
6052 if (IS_ERR(trans)) {
6053 ret = PTR_ERR(trans);
6057 btrfs_release_path(&path);
6058 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6062 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6063 BTRFS_FIRST_FREE_OBJECTID,
6064 BTRFS_FIRST_FREE_OBJECTID, 0);
6068 printf("Add INODE_REF[%llu %llu] name %s\n",
6069 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6071 err &= ~INODE_REF_MISSING;
6074 error("fail to insert first inode's ref");
6075 btrfs_commit_transaction(trans, root);
6078 if (err & INODE_ITEM_MISSING) {
6079 ret = repair_inode_item_missing(root,
6080 BTRFS_FIRST_FREE_OBJECTID, filetype);
6083 err &= ~INODE_ITEM_MISSING;
6087 error("fail to repair first inode");
6088 btrfs_release_path(&path);
6093 * check first root dir's inode_item and inode_ref
6095 * returns 0 means no error
6096 * returns >0 means error
6097 * returns <0 means fatal error
6099 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6101 struct btrfs_path path;
6102 struct btrfs_key key;
6103 struct btrfs_inode_item *ii;
6109 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6110 key.type = BTRFS_INODE_ITEM_KEY;
6113 /* For root being dropped, we don't need to check first inode */
6114 if (btrfs_root_refs(&root->root_item) == 0 &&
6115 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6116 BTRFS_FIRST_FREE_OBJECTID)
6119 btrfs_init_path(&path);
6120 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6125 err |= INODE_ITEM_MISSING;
6127 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6128 struct btrfs_inode_item);
6129 mode = btrfs_inode_mode(path.nodes[0], ii);
6130 if (imode_to_type(mode) != BTRFS_FT_DIR)
6131 err |= INODE_ITEM_MISMATCH;
6134 /* lookup first inode ref */
6135 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6136 key.type = BTRFS_INODE_REF_KEY;
6137 /* special index value */
6140 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6146 btrfs_release_path(&path);
6149 err = repair_fs_first_inode(root, err);
6151 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6152 error("root dir INODE_ITEM is %s",
6153 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6154 if (err & INODE_REF_MISSING)
6155 error("root dir INODE_REF is missing");
6157 return ret < 0 ? ret : err;
6160 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6161 u64 parent, u64 root)
6163 struct rb_node *node;
6164 struct tree_backref *back = NULL;
6165 struct tree_backref match = {
6172 match.parent = parent;
6173 match.node.full_backref = 1;
6178 node = rb_search(&rec->backref_tree, &match.node.node,
6179 (rb_compare_keys)compare_extent_backref, NULL);
6181 back = to_tree_backref(rb_node_to_extent_backref(node));
6186 static struct data_backref *find_data_backref(struct extent_record *rec,
6187 u64 parent, u64 root,
6188 u64 owner, u64 offset,
6190 u64 disk_bytenr, u64 bytes)
6192 struct rb_node *node;
6193 struct data_backref *back = NULL;
6194 struct data_backref match = {
6201 .found_ref = found_ref,
6202 .disk_bytenr = disk_bytenr,
6206 match.parent = parent;
6207 match.node.full_backref = 1;
6212 node = rb_search(&rec->backref_tree, &match.node.node,
6213 (rb_compare_keys)compare_extent_backref, NULL);
6215 back = to_data_backref(rb_node_to_extent_backref(node));
6220 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6221 * blocks and integrity of fs tree items.
6223 * @root: the root of the tree to be checked.
6224 * @ext_ref feature EXTENDED_IREF is enable or not.
6225 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6226 * otherwise means check fs tree(s) items relationship and
6227 * @root MUST be a fs tree root.
6228 * Returns 0 represents OK.
6229 * Returns not 0 represents error.
6231 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6232 struct btrfs_root *root, unsigned int ext_ref,
6236 struct btrfs_path path;
6237 struct node_refs nrefs;
6238 struct btrfs_root_item *root_item = &root->root_item;
6243 memset(&nrefs, 0, sizeof(nrefs));
6246 * We need to manually check the first inode item (256)
6247 * As the following traversal function will only start from
6248 * the first inode item in the leaf, if inode item (256) is
6249 * missing we will skip it forever.
6251 ret = check_fs_first_inode(root, ext_ref);
6257 level = btrfs_header_level(root->node);
6258 btrfs_init_path(&path);
6260 if (btrfs_root_refs(root_item) > 0 ||
6261 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6262 path.nodes[level] = root->node;
6263 path.slots[level] = 0;
6264 extent_buffer_get(root->node);
6266 struct btrfs_key key;
6268 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6269 level = root_item->drop_level;
6270 path.lowest_level = level;
6271 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6278 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6279 ext_ref, check_all);
6283 /* if ret is negative, walk shall stop */
6289 ret = walk_up_tree_v2(root, &path, &level);
6291 /* Normal exit, reset ret to err */
6298 btrfs_release_path(&path);
6303 * Iterate all items in the tree and call check_inode_item() to check.
6305 * @root: the root of the tree to be checked.
6306 * @ext_ref: the EXTENDED_IREF feature
6308 * Return 0 if no error found.
6309 * Return <0 for error.
6311 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6313 reset_cached_block_groups(root->fs_info);
6314 return check_btrfs_root(NULL, root, ext_ref, 0);
6318 * Find the relative ref for root_ref and root_backref.
6320 * @root: the root of the root tree.
6321 * @ref_key: the key of the root ref.
6323 * Return 0 if no error occurred.
6325 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6326 struct extent_buffer *node, int slot)
6328 struct btrfs_path path;
6329 struct btrfs_key key;
6330 struct btrfs_root_ref *ref;
6331 struct btrfs_root_ref *backref;
6332 char ref_name[BTRFS_NAME_LEN] = {0};
6333 char backref_name[BTRFS_NAME_LEN] = {0};
6339 u32 backref_namelen;
6344 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6345 ref_dirid = btrfs_root_ref_dirid(node, ref);
6346 ref_seq = btrfs_root_ref_sequence(node, ref);
6347 ref_namelen = btrfs_root_ref_name_len(node, ref);
6349 if (ref_namelen <= BTRFS_NAME_LEN) {
6352 len = BTRFS_NAME_LEN;
6353 warning("%s[%llu %llu] ref_name too long",
6354 ref_key->type == BTRFS_ROOT_REF_KEY ?
6355 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6358 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6360 /* Find relative root_ref */
6361 key.objectid = ref_key->offset;
6362 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6363 key.offset = ref_key->objectid;
6365 btrfs_init_path(&path);
6366 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6368 err |= ROOT_REF_MISSING;
6369 error("%s[%llu %llu] couldn't find relative ref",
6370 ref_key->type == BTRFS_ROOT_REF_KEY ?
6371 "ROOT_REF" : "ROOT_BACKREF",
6372 ref_key->objectid, ref_key->offset);
6376 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6377 struct btrfs_root_ref);
6378 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6379 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6380 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6382 if (backref_namelen <= BTRFS_NAME_LEN) {
6383 len = backref_namelen;
6385 len = BTRFS_NAME_LEN;
6386 warning("%s[%llu %llu] ref_name too long",
6387 key.type == BTRFS_ROOT_REF_KEY ?
6388 "ROOT_REF" : "ROOT_BACKREF",
6389 key.objectid, key.offset);
6391 read_extent_buffer(path.nodes[0], backref_name,
6392 (unsigned long)(backref + 1), len);
6394 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6395 ref_namelen != backref_namelen ||
6396 strncmp(ref_name, backref_name, len)) {
6397 err |= ROOT_REF_MISMATCH;
6398 error("%s[%llu %llu] mismatch relative ref",
6399 ref_key->type == BTRFS_ROOT_REF_KEY ?
6400 "ROOT_REF" : "ROOT_BACKREF",
6401 ref_key->objectid, ref_key->offset);
6404 btrfs_release_path(&path);
6409 * Check all fs/file tree in low_memory mode.
6411 * 1. for fs tree root item, call check_fs_root_v2()
6412 * 2. for fs tree root ref/backref, call check_root_ref()
6414 * Return 0 if no error occurred.
6416 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6418 struct btrfs_root *tree_root = fs_info->tree_root;
6419 struct btrfs_root *cur_root = NULL;
6420 struct btrfs_path path;
6421 struct btrfs_key key;
6422 struct extent_buffer *node;
6423 unsigned int ext_ref;
6428 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6430 btrfs_init_path(&path);
6431 key.objectid = BTRFS_FS_TREE_OBJECTID;
6433 key.type = BTRFS_ROOT_ITEM_KEY;
6435 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6439 } else if (ret > 0) {
6445 node = path.nodes[0];
6446 slot = path.slots[0];
6447 btrfs_item_key_to_cpu(node, &key, slot);
6448 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6450 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6451 fs_root_objectid(key.objectid)) {
6452 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6453 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6456 key.offset = (u64)-1;
6457 cur_root = btrfs_read_fs_root(fs_info, &key);
6460 if (IS_ERR(cur_root)) {
6461 error("Fail to read fs/subvol tree: %lld",
6467 ret = check_fs_root_v2(cur_root, ext_ref);
6470 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6471 btrfs_free_fs_root(cur_root);
6472 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6473 key.type == BTRFS_ROOT_BACKREF_KEY) {
6474 ret = check_root_ref(tree_root, &key, node, slot);
6478 ret = btrfs_next_item(tree_root, &path);
6488 btrfs_release_path(&path);
6492 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6493 struct cache_tree *root_cache)
6497 if (!ctx.progress_enabled)
6498 fprintf(stderr, "checking fs roots\n");
6499 if (check_mode == CHECK_MODE_LOWMEM)
6500 ret = check_fs_roots_v2(fs_info);
6502 ret = check_fs_roots(fs_info, root_cache);
6507 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6509 struct extent_backref *back, *tmp;
6510 struct tree_backref *tback;
6511 struct data_backref *dback;
6515 rbtree_postorder_for_each_entry_safe(back, tmp,
6516 &rec->backref_tree, node) {
6517 if (!back->found_extent_tree) {
6521 if (back->is_data) {
6522 dback = to_data_backref(back);
6523 fprintf(stderr, "Data backref %llu %s %llu"
6524 " owner %llu offset %llu num_refs %lu"
6525 " not found in extent tree\n",
6526 (unsigned long long)rec->start,
6527 back->full_backref ?
6529 back->full_backref ?
6530 (unsigned long long)dback->parent:
6531 (unsigned long long)dback->root,
6532 (unsigned long long)dback->owner,
6533 (unsigned long long)dback->offset,
6534 (unsigned long)dback->num_refs);
6536 tback = to_tree_backref(back);
6537 fprintf(stderr, "Tree backref %llu parent %llu"
6538 " root %llu not found in extent tree\n",
6539 (unsigned long long)rec->start,
6540 (unsigned long long)tback->parent,
6541 (unsigned long long)tback->root);
6544 if (!back->is_data && !back->found_ref) {
6548 tback = to_tree_backref(back);
6549 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6550 (unsigned long long)rec->start,
6551 back->full_backref ? "parent" : "root",
6552 back->full_backref ?
6553 (unsigned long long)tback->parent :
6554 (unsigned long long)tback->root, back);
6556 if (back->is_data) {
6557 dback = to_data_backref(back);
6558 if (dback->found_ref != dback->num_refs) {
6562 fprintf(stderr, "Incorrect local backref count"
6563 " on %llu %s %llu owner %llu"
6564 " offset %llu found %u wanted %u back %p\n",
6565 (unsigned long long)rec->start,
6566 back->full_backref ?
6568 back->full_backref ?
6569 (unsigned long long)dback->parent:
6570 (unsigned long long)dback->root,
6571 (unsigned long long)dback->owner,
6572 (unsigned long long)dback->offset,
6573 dback->found_ref, dback->num_refs, back);
6575 if (dback->disk_bytenr != rec->start) {
6579 fprintf(stderr, "Backref disk bytenr does not"
6580 " match extent record, bytenr=%llu, "
6581 "ref bytenr=%llu\n",
6582 (unsigned long long)rec->start,
6583 (unsigned long long)dback->disk_bytenr);
6586 if (dback->bytes != rec->nr) {
6590 fprintf(stderr, "Backref bytes do not match "
6591 "extent backref, bytenr=%llu, ref "
6592 "bytes=%llu, backref bytes=%llu\n",
6593 (unsigned long long)rec->start,
6594 (unsigned long long)rec->nr,
6595 (unsigned long long)dback->bytes);
6598 if (!back->is_data) {
6601 dback = to_data_backref(back);
6602 found += dback->found_ref;
6605 if (found != rec->refs) {
6609 fprintf(stderr, "Incorrect global backref count "
6610 "on %llu found %llu wanted %llu\n",
6611 (unsigned long long)rec->start,
6612 (unsigned long long)found,
6613 (unsigned long long)rec->refs);
6619 static void __free_one_backref(struct rb_node *node)
6621 struct extent_backref *back = rb_node_to_extent_backref(node);
6626 static void free_all_extent_backrefs(struct extent_record *rec)
6628 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6631 static void free_extent_record_cache(struct cache_tree *extent_cache)
6633 struct cache_extent *cache;
6634 struct extent_record *rec;
6637 cache = first_cache_extent(extent_cache);
6640 rec = container_of(cache, struct extent_record, cache);
6641 remove_cache_extent(extent_cache, cache);
6642 free_all_extent_backrefs(rec);
6647 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6648 struct extent_record *rec)
6650 if (rec->content_checked && rec->owner_ref_checked &&
6651 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6652 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6653 !rec->bad_full_backref && !rec->crossing_stripes &&
6654 !rec->wrong_chunk_type) {
6655 remove_cache_extent(extent_cache, &rec->cache);
6656 free_all_extent_backrefs(rec);
6657 list_del_init(&rec->list);
6663 static int check_owner_ref(struct btrfs_root *root,
6664 struct extent_record *rec,
6665 struct extent_buffer *buf)
6667 struct extent_backref *node, *tmp;
6668 struct tree_backref *back;
6669 struct btrfs_root *ref_root;
6670 struct btrfs_key key;
6671 struct btrfs_path path;
6672 struct extent_buffer *parent;
6677 rbtree_postorder_for_each_entry_safe(node, tmp,
6678 &rec->backref_tree, node) {
6681 if (!node->found_ref)
6683 if (node->full_backref)
6685 back = to_tree_backref(node);
6686 if (btrfs_header_owner(buf) == back->root)
6689 BUG_ON(rec->is_root);
6691 /* try to find the block by search corresponding fs tree */
6692 key.objectid = btrfs_header_owner(buf);
6693 key.type = BTRFS_ROOT_ITEM_KEY;
6694 key.offset = (u64)-1;
6696 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6697 if (IS_ERR(ref_root))
6700 level = btrfs_header_level(buf);
6702 btrfs_item_key_to_cpu(buf, &key, 0);
6704 btrfs_node_key_to_cpu(buf, &key, 0);
6706 btrfs_init_path(&path);
6707 path.lowest_level = level + 1;
6708 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6712 parent = path.nodes[level + 1];
6713 if (parent && buf->start == btrfs_node_blockptr(parent,
6714 path.slots[level + 1]))
6717 btrfs_release_path(&path);
6718 return found ? 0 : 1;
6721 static int is_extent_tree_record(struct extent_record *rec)
6723 struct extent_backref *node, *tmp;
6724 struct tree_backref *back;
6727 rbtree_postorder_for_each_entry_safe(node, tmp,
6728 &rec->backref_tree, node) {
6731 back = to_tree_backref(node);
6732 if (node->full_backref)
6734 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6741 static int record_bad_block_io(struct btrfs_fs_info *info,
6742 struct cache_tree *extent_cache,
6745 struct extent_record *rec;
6746 struct cache_extent *cache;
6747 struct btrfs_key key;
6749 cache = lookup_cache_extent(extent_cache, start, len);
6753 rec = container_of(cache, struct extent_record, cache);
6754 if (!is_extent_tree_record(rec))
6757 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6758 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6761 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6762 struct extent_buffer *buf, int slot)
6764 if (btrfs_header_level(buf)) {
6765 struct btrfs_key_ptr ptr1, ptr2;
6767 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6768 sizeof(struct btrfs_key_ptr));
6769 read_extent_buffer(buf, &ptr2,
6770 btrfs_node_key_ptr_offset(slot + 1),
6771 sizeof(struct btrfs_key_ptr));
6772 write_extent_buffer(buf, &ptr1,
6773 btrfs_node_key_ptr_offset(slot + 1),
6774 sizeof(struct btrfs_key_ptr));
6775 write_extent_buffer(buf, &ptr2,
6776 btrfs_node_key_ptr_offset(slot),
6777 sizeof(struct btrfs_key_ptr));
6779 struct btrfs_disk_key key;
6780 btrfs_node_key(buf, &key, 0);
6781 btrfs_fixup_low_keys(root, path, &key,
6782 btrfs_header_level(buf) + 1);
6785 struct btrfs_item *item1, *item2;
6786 struct btrfs_key k1, k2;
6787 char *item1_data, *item2_data;
6788 u32 item1_offset, item2_offset, item1_size, item2_size;
6790 item1 = btrfs_item_nr(slot);
6791 item2 = btrfs_item_nr(slot + 1);
6792 btrfs_item_key_to_cpu(buf, &k1, slot);
6793 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6794 item1_offset = btrfs_item_offset(buf, item1);
6795 item2_offset = btrfs_item_offset(buf, item2);
6796 item1_size = btrfs_item_size(buf, item1);
6797 item2_size = btrfs_item_size(buf, item2);
6799 item1_data = malloc(item1_size);
6802 item2_data = malloc(item2_size);
6808 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6809 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6811 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6812 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6816 btrfs_set_item_offset(buf, item1, item2_offset);
6817 btrfs_set_item_offset(buf, item2, item1_offset);
6818 btrfs_set_item_size(buf, item1, item2_size);
6819 btrfs_set_item_size(buf, item2, item1_size);
6821 path->slots[0] = slot;
6822 btrfs_set_item_key_unsafe(root, path, &k2);
6823 path->slots[0] = slot + 1;
6824 btrfs_set_item_key_unsafe(root, path, &k1);
6829 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6831 struct extent_buffer *buf;
6832 struct btrfs_key k1, k2;
6834 int level = path->lowest_level;
6837 buf = path->nodes[level];
6838 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6840 btrfs_node_key_to_cpu(buf, &k1, i);
6841 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6843 btrfs_item_key_to_cpu(buf, &k1, i);
6844 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6846 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6848 ret = swap_values(root, path, buf, i);
6851 btrfs_mark_buffer_dirty(buf);
6857 static int delete_bogus_item(struct btrfs_root *root,
6858 struct btrfs_path *path,
6859 struct extent_buffer *buf, int slot)
6861 struct btrfs_key key;
6862 int nritems = btrfs_header_nritems(buf);
6864 btrfs_item_key_to_cpu(buf, &key, slot);
6866 /* These are all the keys we can deal with missing. */
6867 if (key.type != BTRFS_DIR_INDEX_KEY &&
6868 key.type != BTRFS_EXTENT_ITEM_KEY &&
6869 key.type != BTRFS_METADATA_ITEM_KEY &&
6870 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6871 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6874 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6875 (unsigned long long)key.objectid, key.type,
6876 (unsigned long long)key.offset, slot, buf->start);
6877 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6878 btrfs_item_nr_offset(slot + 1),
6879 sizeof(struct btrfs_item) *
6880 (nritems - slot - 1));
6881 btrfs_set_header_nritems(buf, nritems - 1);
6883 struct btrfs_disk_key disk_key;
6885 btrfs_item_key(buf, &disk_key, 0);
6886 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6888 btrfs_mark_buffer_dirty(buf);
6892 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6894 struct extent_buffer *buf;
6898 /* We should only get this for leaves */
6899 BUG_ON(path->lowest_level);
6900 buf = path->nodes[0];
6902 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6903 unsigned int shift = 0, offset;
6905 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6906 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6907 if (btrfs_item_end_nr(buf, i) >
6908 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6909 ret = delete_bogus_item(root, path, buf, i);
6912 fprintf(stderr, "item is off the end of the "
6913 "leaf, can't fix\n");
6917 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6918 btrfs_item_end_nr(buf, i);
6919 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6920 btrfs_item_offset_nr(buf, i - 1)) {
6921 if (btrfs_item_end_nr(buf, i) >
6922 btrfs_item_offset_nr(buf, i - 1)) {
6923 ret = delete_bogus_item(root, path, buf, i);
6926 fprintf(stderr, "items overlap, can't fix\n");
6930 shift = btrfs_item_offset_nr(buf, i - 1) -
6931 btrfs_item_end_nr(buf, i);
6936 printf("Shifting item nr %d by %u bytes in block %llu\n",
6937 i, shift, (unsigned long long)buf->start);
6938 offset = btrfs_item_offset_nr(buf, i);
6939 memmove_extent_buffer(buf,
6940 btrfs_leaf_data(buf) + offset + shift,
6941 btrfs_leaf_data(buf) + offset,
6942 btrfs_item_size_nr(buf, i));
6943 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6945 btrfs_mark_buffer_dirty(buf);
6949 * We may have moved things, in which case we want to exit so we don't
6950 * write those changes out. Once we have proper abort functionality in
6951 * progs this can be changed to something nicer.
6958 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6959 * then just return -EIO.
6961 static int try_to_fix_bad_block(struct btrfs_root *root,
6962 struct extent_buffer *buf,
6963 enum btrfs_tree_block_status status)
6965 struct btrfs_trans_handle *trans;
6966 struct ulist *roots;
6967 struct ulist_node *node;
6968 struct btrfs_root *search_root;
6969 struct btrfs_path path;
6970 struct ulist_iterator iter;
6971 struct btrfs_key root_key, key;
6974 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6975 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6978 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6982 btrfs_init_path(&path);
6983 ULIST_ITER_INIT(&iter);
6984 while ((node = ulist_next(roots, &iter))) {
6985 root_key.objectid = node->val;
6986 root_key.type = BTRFS_ROOT_ITEM_KEY;
6987 root_key.offset = (u64)-1;
6989 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6996 trans = btrfs_start_transaction(search_root, 0);
6997 if (IS_ERR(trans)) {
6998 ret = PTR_ERR(trans);
7002 path.lowest_level = btrfs_header_level(buf);
7003 path.skip_check_block = 1;
7004 if (path.lowest_level)
7005 btrfs_node_key_to_cpu(buf, &key, 0);
7007 btrfs_item_key_to_cpu(buf, &key, 0);
7008 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7011 btrfs_commit_transaction(trans, search_root);
7014 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7015 ret = fix_key_order(search_root, &path);
7016 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7017 ret = fix_item_offset(search_root, &path);
7019 btrfs_commit_transaction(trans, search_root);
7022 btrfs_release_path(&path);
7023 btrfs_commit_transaction(trans, search_root);
7026 btrfs_release_path(&path);
7030 static int check_block(struct btrfs_root *root,
7031 struct cache_tree *extent_cache,
7032 struct extent_buffer *buf, u64 flags)
7034 struct extent_record *rec;
7035 struct cache_extent *cache;
7036 struct btrfs_key key;
7037 enum btrfs_tree_block_status status;
7041 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7044 rec = container_of(cache, struct extent_record, cache);
7045 rec->generation = btrfs_header_generation(buf);
7047 level = btrfs_header_level(buf);
7048 if (btrfs_header_nritems(buf) > 0) {
7051 btrfs_item_key_to_cpu(buf, &key, 0);
7053 btrfs_node_key_to_cpu(buf, &key, 0);
7055 rec->info_objectid = key.objectid;
7057 rec->info_level = level;
7059 if (btrfs_is_leaf(buf))
7060 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7062 status = btrfs_check_node(root, &rec->parent_key, buf);
7064 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7066 status = try_to_fix_bad_block(root, buf, status);
7067 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7069 fprintf(stderr, "bad block %llu\n",
7070 (unsigned long long)buf->start);
7073 * Signal to callers we need to start the scan over
7074 * again since we'll have cowed blocks.
7079 rec->content_checked = 1;
7080 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7081 rec->owner_ref_checked = 1;
7083 ret = check_owner_ref(root, rec, buf);
7085 rec->owner_ref_checked = 1;
7089 maybe_free_extent_rec(extent_cache, rec);
7094 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7095 u64 parent, u64 root)
7097 struct list_head *cur = rec->backrefs.next;
7098 struct extent_backref *node;
7099 struct tree_backref *back;
7101 while(cur != &rec->backrefs) {
7102 node = to_extent_backref(cur);
7106 back = to_tree_backref(node);
7108 if (!node->full_backref)
7110 if (parent == back->parent)
7113 if (node->full_backref)
7115 if (back->root == root)
7123 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7124 u64 parent, u64 root)
7126 struct tree_backref *ref = malloc(sizeof(*ref));
7130 memset(&ref->node, 0, sizeof(ref->node));
7132 ref->parent = parent;
7133 ref->node.full_backref = 1;
7136 ref->node.full_backref = 0;
7143 static struct data_backref *find_data_backref(struct extent_record *rec,
7144 u64 parent, u64 root,
7145 u64 owner, u64 offset,
7147 u64 disk_bytenr, u64 bytes)
7149 struct list_head *cur = rec->backrefs.next;
7150 struct extent_backref *node;
7151 struct data_backref *back;
7153 while(cur != &rec->backrefs) {
7154 node = to_extent_backref(cur);
7158 back = to_data_backref(node);
7160 if (!node->full_backref)
7162 if (parent == back->parent)
7165 if (node->full_backref)
7167 if (back->root == root && back->owner == owner &&
7168 back->offset == offset) {
7169 if (found_ref && node->found_ref &&
7170 (back->bytes != bytes ||
7171 back->disk_bytenr != disk_bytenr))
7181 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7182 u64 parent, u64 root,
7183 u64 owner, u64 offset,
7186 struct data_backref *ref = malloc(sizeof(*ref));
7190 memset(&ref->node, 0, sizeof(ref->node));
7191 ref->node.is_data = 1;
7194 ref->parent = parent;
7197 ref->node.full_backref = 1;
7201 ref->offset = offset;
7202 ref->node.full_backref = 0;
7204 ref->bytes = max_size;
7207 if (max_size > rec->max_size)
7208 rec->max_size = max_size;
7212 /* Check if the type of extent matches with its chunk */
7213 static void check_extent_type(struct extent_record *rec)
7215 struct btrfs_block_group_cache *bg_cache;
7217 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7221 /* data extent, check chunk directly*/
7222 if (!rec->metadata) {
7223 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7224 rec->wrong_chunk_type = 1;
7228 /* metadata extent, check the obvious case first */
7229 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7230 BTRFS_BLOCK_GROUP_METADATA))) {
7231 rec->wrong_chunk_type = 1;
7236 * Check SYSTEM extent, as it's also marked as metadata, we can only
7237 * make sure it's a SYSTEM extent by its backref
7239 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7240 struct extent_backref *node;
7241 struct tree_backref *tback;
7244 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7245 if (node->is_data) {
7246 /* tree block shouldn't have data backref */
7247 rec->wrong_chunk_type = 1;
7250 tback = container_of(node, struct tree_backref, node);
7252 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7253 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7255 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7256 if (!(bg_cache->flags & bg_type))
7257 rec->wrong_chunk_type = 1;
7262 * Allocate a new extent record, fill default values from @tmpl and insert int
7263 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7264 * the cache, otherwise it fails.
7266 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7267 struct extent_record *tmpl)
7269 struct extent_record *rec;
7272 BUG_ON(tmpl->max_size == 0);
7273 rec = malloc(sizeof(*rec));
7276 rec->start = tmpl->start;
7277 rec->max_size = tmpl->max_size;
7278 rec->nr = max(tmpl->nr, tmpl->max_size);
7279 rec->found_rec = tmpl->found_rec;
7280 rec->content_checked = tmpl->content_checked;
7281 rec->owner_ref_checked = tmpl->owner_ref_checked;
7282 rec->num_duplicates = 0;
7283 rec->metadata = tmpl->metadata;
7284 rec->flag_block_full_backref = FLAG_UNSET;
7285 rec->bad_full_backref = 0;
7286 rec->crossing_stripes = 0;
7287 rec->wrong_chunk_type = 0;
7288 rec->is_root = tmpl->is_root;
7289 rec->refs = tmpl->refs;
7290 rec->extent_item_refs = tmpl->extent_item_refs;
7291 rec->parent_generation = tmpl->parent_generation;
7292 INIT_LIST_HEAD(&rec->backrefs);
7293 INIT_LIST_HEAD(&rec->dups);
7294 INIT_LIST_HEAD(&rec->list);
7295 rec->backref_tree = RB_ROOT;
7296 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7297 rec->cache.start = tmpl->start;
7298 rec->cache.size = tmpl->nr;
7299 ret = insert_cache_extent(extent_cache, &rec->cache);
7304 bytes_used += rec->nr;
7307 rec->crossing_stripes = check_crossing_stripes(global_info,
7308 rec->start, global_info->nodesize);
7309 check_extent_type(rec);
7314 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7316 * - refs - if found, increase refs
7317 * - is_root - if found, set
7318 * - content_checked - if found, set
7319 * - owner_ref_checked - if found, set
7321 * If not found, create a new one, initialize and insert.
7323 static int add_extent_rec(struct cache_tree *extent_cache,
7324 struct extent_record *tmpl)
7326 struct extent_record *rec;
7327 struct cache_extent *cache;
7331 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7333 rec = container_of(cache, struct extent_record, cache);
7337 rec->nr = max(tmpl->nr, tmpl->max_size);
7340 * We need to make sure to reset nr to whatever the extent
7341 * record says was the real size, this way we can compare it to
7344 if (tmpl->found_rec) {
7345 if (tmpl->start != rec->start || rec->found_rec) {
7346 struct extent_record *tmp;
7349 if (list_empty(&rec->list))
7350 list_add_tail(&rec->list,
7351 &duplicate_extents);
7354 * We have to do this song and dance in case we
7355 * find an extent record that falls inside of
7356 * our current extent record but does not have
7357 * the same objectid.
7359 tmp = malloc(sizeof(*tmp));
7362 tmp->start = tmpl->start;
7363 tmp->max_size = tmpl->max_size;
7366 tmp->metadata = tmpl->metadata;
7367 tmp->extent_item_refs = tmpl->extent_item_refs;
7368 INIT_LIST_HEAD(&tmp->list);
7369 list_add_tail(&tmp->list, &rec->dups);
7370 rec->num_duplicates++;
7377 if (tmpl->extent_item_refs && !dup) {
7378 if (rec->extent_item_refs) {
7379 fprintf(stderr, "block %llu rec "
7380 "extent_item_refs %llu, passed %llu\n",
7381 (unsigned long long)tmpl->start,
7382 (unsigned long long)
7383 rec->extent_item_refs,
7384 (unsigned long long)tmpl->extent_item_refs);
7386 rec->extent_item_refs = tmpl->extent_item_refs;
7390 if (tmpl->content_checked)
7391 rec->content_checked = 1;
7392 if (tmpl->owner_ref_checked)
7393 rec->owner_ref_checked = 1;
7394 memcpy(&rec->parent_key, &tmpl->parent_key,
7395 sizeof(tmpl->parent_key));
7396 if (tmpl->parent_generation)
7397 rec->parent_generation = tmpl->parent_generation;
7398 if (rec->max_size < tmpl->max_size)
7399 rec->max_size = tmpl->max_size;
7402 * A metadata extent can't cross stripe_len boundary, otherwise
7403 * kernel scrub won't be able to handle it.
7404 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7408 rec->crossing_stripes = check_crossing_stripes(
7409 global_info, rec->start,
7410 global_info->nodesize);
7411 check_extent_type(rec);
7412 maybe_free_extent_rec(extent_cache, rec);
7416 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7421 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7422 u64 parent, u64 root, int found_ref)
7424 struct extent_record *rec;
7425 struct tree_backref *back;
7426 struct cache_extent *cache;
7428 bool insert = false;
7430 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7432 struct extent_record tmpl;
7434 memset(&tmpl, 0, sizeof(tmpl));
7435 tmpl.start = bytenr;
7440 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7444 /* really a bug in cache_extent implement now */
7445 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7450 rec = container_of(cache, struct extent_record, cache);
7451 if (rec->start != bytenr) {
7453 * Several cause, from unaligned bytenr to over lapping extents
7458 back = find_tree_backref(rec, parent, root);
7460 back = alloc_tree_backref(rec, parent, root);
7467 if (back->node.found_ref) {
7468 fprintf(stderr, "Extent back ref already exists "
7469 "for %llu parent %llu root %llu \n",
7470 (unsigned long long)bytenr,
7471 (unsigned long long)parent,
7472 (unsigned long long)root);
7474 back->node.found_ref = 1;
7476 if (back->node.found_extent_tree) {
7477 fprintf(stderr, "Extent back ref already exists "
7478 "for %llu parent %llu root %llu \n",
7479 (unsigned long long)bytenr,
7480 (unsigned long long)parent,
7481 (unsigned long long)root);
7483 back->node.found_extent_tree = 1;
7486 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7487 compare_extent_backref));
7488 check_extent_type(rec);
7489 maybe_free_extent_rec(extent_cache, rec);
7493 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7494 u64 parent, u64 root, u64 owner, u64 offset,
7495 u32 num_refs, int found_ref, u64 max_size)
7497 struct extent_record *rec;
7498 struct data_backref *back;
7499 struct cache_extent *cache;
7501 bool insert = false;
7503 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7505 struct extent_record tmpl;
7507 memset(&tmpl, 0, sizeof(tmpl));
7508 tmpl.start = bytenr;
7510 tmpl.max_size = max_size;
7512 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7516 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7521 rec = container_of(cache, struct extent_record, cache);
7522 if (rec->max_size < max_size)
7523 rec->max_size = max_size;
7526 * If found_ref is set then max_size is the real size and must match the
7527 * existing refs. So if we have already found a ref then we need to
7528 * make sure that this ref matches the existing one, otherwise we need
7529 * to add a new backref so we can notice that the backrefs don't match
7530 * and we need to figure out who is telling the truth. This is to
7531 * account for that awful fsync bug I introduced where we'd end up with
7532 * a btrfs_file_extent_item that would have its length include multiple
7533 * prealloc extents or point inside of a prealloc extent.
7535 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7538 back = alloc_data_backref(rec, parent, root, owner, offset,
7545 BUG_ON(num_refs != 1);
7546 if (back->node.found_ref)
7547 BUG_ON(back->bytes != max_size);
7548 back->node.found_ref = 1;
7549 back->found_ref += 1;
7550 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7551 back->bytes = max_size;
7552 back->disk_bytenr = bytenr;
7554 /* Need to reinsert if not already in the tree */
7556 rb_erase(&back->node.node, &rec->backref_tree);
7561 rec->content_checked = 1;
7562 rec->owner_ref_checked = 1;
7564 if (back->node.found_extent_tree) {
7565 fprintf(stderr, "Extent back ref already exists "
7566 "for %llu parent %llu root %llu "
7567 "owner %llu offset %llu num_refs %lu\n",
7568 (unsigned long long)bytenr,
7569 (unsigned long long)parent,
7570 (unsigned long long)root,
7571 (unsigned long long)owner,
7572 (unsigned long long)offset,
7573 (unsigned long)num_refs);
7575 back->num_refs = num_refs;
7576 back->node.found_extent_tree = 1;
7579 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7580 compare_extent_backref));
7582 maybe_free_extent_rec(extent_cache, rec);
7586 static int add_pending(struct cache_tree *pending,
7587 struct cache_tree *seen, u64 bytenr, u32 size)
7590 ret = add_cache_extent(seen, bytenr, size);
7593 add_cache_extent(pending, bytenr, size);
7597 static int pick_next_pending(struct cache_tree *pending,
7598 struct cache_tree *reada,
7599 struct cache_tree *nodes,
7600 u64 last, struct block_info *bits, int bits_nr,
7603 unsigned long node_start = last;
7604 struct cache_extent *cache;
7607 cache = search_cache_extent(reada, 0);
7609 bits[0].start = cache->start;
7610 bits[0].size = cache->size;
7615 if (node_start > 32768)
7616 node_start -= 32768;
7618 cache = search_cache_extent(nodes, node_start);
7620 cache = search_cache_extent(nodes, 0);
7623 cache = search_cache_extent(pending, 0);
7628 bits[ret].start = cache->start;
7629 bits[ret].size = cache->size;
7630 cache = next_cache_extent(cache);
7632 } while (cache && ret < bits_nr);
7638 bits[ret].start = cache->start;
7639 bits[ret].size = cache->size;
7640 cache = next_cache_extent(cache);
7642 } while (cache && ret < bits_nr);
7644 if (bits_nr - ret > 8) {
7645 u64 lookup = bits[0].start + bits[0].size;
7646 struct cache_extent *next;
7647 next = search_cache_extent(pending, lookup);
7649 if (next->start - lookup > 32768)
7651 bits[ret].start = next->start;
7652 bits[ret].size = next->size;
7653 lookup = next->start + next->size;
7657 next = next_cache_extent(next);
7665 static void free_chunk_record(struct cache_extent *cache)
7667 struct chunk_record *rec;
7669 rec = container_of(cache, struct chunk_record, cache);
7670 list_del_init(&rec->list);
7671 list_del_init(&rec->dextents);
7675 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7677 cache_tree_free_extents(chunk_cache, free_chunk_record);
7680 static void free_device_record(struct rb_node *node)
7682 struct device_record *rec;
7684 rec = container_of(node, struct device_record, node);
7688 FREE_RB_BASED_TREE(device_cache, free_device_record);
7690 int insert_block_group_record(struct block_group_tree *tree,
7691 struct block_group_record *bg_rec)
7695 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7699 list_add_tail(&bg_rec->list, &tree->block_groups);
7703 static void free_block_group_record(struct cache_extent *cache)
7705 struct block_group_record *rec;
7707 rec = container_of(cache, struct block_group_record, cache);
7708 list_del_init(&rec->list);
7712 void free_block_group_tree(struct block_group_tree *tree)
7714 cache_tree_free_extents(&tree->tree, free_block_group_record);
7717 int insert_device_extent_record(struct device_extent_tree *tree,
7718 struct device_extent_record *de_rec)
7723 * Device extent is a bit different from the other extents, because
7724 * the extents which belong to the different devices may have the
7725 * same start and size, so we need use the special extent cache
7726 * search/insert functions.
7728 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7732 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7733 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7737 static void free_device_extent_record(struct cache_extent *cache)
7739 struct device_extent_record *rec;
7741 rec = container_of(cache, struct device_extent_record, cache);
7742 if (!list_empty(&rec->chunk_list))
7743 list_del_init(&rec->chunk_list);
7744 if (!list_empty(&rec->device_list))
7745 list_del_init(&rec->device_list);
7749 void free_device_extent_tree(struct device_extent_tree *tree)
7751 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7754 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7755 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7756 struct extent_buffer *leaf, int slot)
7758 struct btrfs_extent_ref_v0 *ref0;
7759 struct btrfs_key key;
7762 btrfs_item_key_to_cpu(leaf, &key, slot);
7763 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7764 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7765 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7768 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7769 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7775 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7776 struct btrfs_key *key,
7779 struct btrfs_chunk *ptr;
7780 struct chunk_record *rec;
7783 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7784 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7786 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7788 fprintf(stderr, "memory allocation failed\n");
7792 INIT_LIST_HEAD(&rec->list);
7793 INIT_LIST_HEAD(&rec->dextents);
7796 rec->cache.start = key->offset;
7797 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7799 rec->generation = btrfs_header_generation(leaf);
7801 rec->objectid = key->objectid;
7802 rec->type = key->type;
7803 rec->offset = key->offset;
7805 rec->length = rec->cache.size;
7806 rec->owner = btrfs_chunk_owner(leaf, ptr);
7807 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7808 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7809 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7810 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7811 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7812 rec->num_stripes = num_stripes;
7813 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7815 for (i = 0; i < rec->num_stripes; ++i) {
7816 rec->stripes[i].devid =
7817 btrfs_stripe_devid_nr(leaf, ptr, i);
7818 rec->stripes[i].offset =
7819 btrfs_stripe_offset_nr(leaf, ptr, i);
7820 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7821 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7828 static int process_chunk_item(struct cache_tree *chunk_cache,
7829 struct btrfs_key *key, struct extent_buffer *eb,
7832 struct chunk_record *rec;
7833 struct btrfs_chunk *chunk;
7836 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7838 * Do extra check for this chunk item,
7840 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7841 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7842 * and owner<->key_type check.
7844 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7847 error("chunk(%llu, %llu) is not valid, ignore it",
7848 key->offset, btrfs_chunk_length(eb, chunk));
7851 rec = btrfs_new_chunk_record(eb, key, slot);
7852 ret = insert_cache_extent(chunk_cache, &rec->cache);
7854 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7855 rec->offset, rec->length);
7862 static int process_device_item(struct rb_root *dev_cache,
7863 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7865 struct btrfs_dev_item *ptr;
7866 struct device_record *rec;
7869 ptr = btrfs_item_ptr(eb,
7870 slot, struct btrfs_dev_item);
7872 rec = malloc(sizeof(*rec));
7874 fprintf(stderr, "memory allocation failed\n");
7878 rec->devid = key->offset;
7879 rec->generation = btrfs_header_generation(eb);
7881 rec->objectid = key->objectid;
7882 rec->type = key->type;
7883 rec->offset = key->offset;
7885 rec->devid = btrfs_device_id(eb, ptr);
7886 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7887 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7889 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7891 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7898 struct block_group_record *
7899 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7902 struct btrfs_block_group_item *ptr;
7903 struct block_group_record *rec;
7905 rec = calloc(1, sizeof(*rec));
7907 fprintf(stderr, "memory allocation failed\n");
7911 rec->cache.start = key->objectid;
7912 rec->cache.size = key->offset;
7914 rec->generation = btrfs_header_generation(leaf);
7916 rec->objectid = key->objectid;
7917 rec->type = key->type;
7918 rec->offset = key->offset;
7920 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7921 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7923 INIT_LIST_HEAD(&rec->list);
7928 static int process_block_group_item(struct block_group_tree *block_group_cache,
7929 struct btrfs_key *key,
7930 struct extent_buffer *eb, int slot)
7932 struct block_group_record *rec;
7935 rec = btrfs_new_block_group_record(eb, key, slot);
7936 ret = insert_block_group_record(block_group_cache, rec);
7938 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7939 rec->objectid, rec->offset);
7946 struct device_extent_record *
7947 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7948 struct btrfs_key *key, int slot)
7950 struct device_extent_record *rec;
7951 struct btrfs_dev_extent *ptr;
7953 rec = calloc(1, sizeof(*rec));
7955 fprintf(stderr, "memory allocation failed\n");
7959 rec->cache.objectid = key->objectid;
7960 rec->cache.start = key->offset;
7962 rec->generation = btrfs_header_generation(leaf);
7964 rec->objectid = key->objectid;
7965 rec->type = key->type;
7966 rec->offset = key->offset;
7968 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7969 rec->chunk_objecteid =
7970 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7972 btrfs_dev_extent_chunk_offset(leaf, ptr);
7973 rec->length = btrfs_dev_extent_length(leaf, ptr);
7974 rec->cache.size = rec->length;
7976 INIT_LIST_HEAD(&rec->chunk_list);
7977 INIT_LIST_HEAD(&rec->device_list);
7983 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7984 struct btrfs_key *key, struct extent_buffer *eb,
7987 struct device_extent_record *rec;
7990 rec = btrfs_new_device_extent_record(eb, key, slot);
7991 ret = insert_device_extent_record(dev_extent_cache, rec);
7994 "Device extent[%llu, %llu, %llu] existed.\n",
7995 rec->objectid, rec->offset, rec->length);
8002 static int process_extent_item(struct btrfs_root *root,
8003 struct cache_tree *extent_cache,
8004 struct extent_buffer *eb, int slot)
8006 struct btrfs_extent_item *ei;
8007 struct btrfs_extent_inline_ref *iref;
8008 struct btrfs_extent_data_ref *dref;
8009 struct btrfs_shared_data_ref *sref;
8010 struct btrfs_key key;
8011 struct extent_record tmpl;
8016 u32 item_size = btrfs_item_size_nr(eb, slot);
8022 btrfs_item_key_to_cpu(eb, &key, slot);
8024 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8026 num_bytes = root->fs_info->nodesize;
8028 num_bytes = key.offset;
8031 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8032 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8033 key.objectid, root->fs_info->sectorsize);
8036 if (item_size < sizeof(*ei)) {
8037 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8038 struct btrfs_extent_item_v0 *ei0;
8039 if (item_size != sizeof(*ei0)) {
8041 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
8042 key.objectid, key.type, key.offset,
8043 btrfs_header_bytenr(eb), slot);
8046 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8047 refs = btrfs_extent_refs_v0(eb, ei0);
8051 memset(&tmpl, 0, sizeof(tmpl));
8052 tmpl.start = key.objectid;
8053 tmpl.nr = num_bytes;
8054 tmpl.extent_item_refs = refs;
8055 tmpl.metadata = metadata;
8057 tmpl.max_size = num_bytes;
8059 return add_extent_rec(extent_cache, &tmpl);
8062 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8063 refs = btrfs_extent_refs(eb, ei);
8064 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8068 if (metadata && num_bytes != root->fs_info->nodesize) {
8069 error("ignore invalid metadata extent, length %llu does not equal to %u",
8070 num_bytes, root->fs_info->nodesize);
8073 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8074 error("ignore invalid data extent, length %llu is not aligned to %u",
8075 num_bytes, root->fs_info->sectorsize);
8079 memset(&tmpl, 0, sizeof(tmpl));
8080 tmpl.start = key.objectid;
8081 tmpl.nr = num_bytes;
8082 tmpl.extent_item_refs = refs;
8083 tmpl.metadata = metadata;
8085 tmpl.max_size = num_bytes;
8086 add_extent_rec(extent_cache, &tmpl);
8088 ptr = (unsigned long)(ei + 1);
8089 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8090 key.type == BTRFS_EXTENT_ITEM_KEY)
8091 ptr += sizeof(struct btrfs_tree_block_info);
8093 end = (unsigned long)ei + item_size;
8095 iref = (struct btrfs_extent_inline_ref *)ptr;
8096 type = btrfs_extent_inline_ref_type(eb, iref);
8097 offset = btrfs_extent_inline_ref_offset(eb, iref);
8099 case BTRFS_TREE_BLOCK_REF_KEY:
8100 ret = add_tree_backref(extent_cache, key.objectid,
8104 "add_tree_backref failed (extent items tree block): %s",
8107 case BTRFS_SHARED_BLOCK_REF_KEY:
8108 ret = add_tree_backref(extent_cache, key.objectid,
8112 "add_tree_backref failed (extent items shared block): %s",
8115 case BTRFS_EXTENT_DATA_REF_KEY:
8116 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8117 add_data_backref(extent_cache, key.objectid, 0,
8118 btrfs_extent_data_ref_root(eb, dref),
8119 btrfs_extent_data_ref_objectid(eb,
8121 btrfs_extent_data_ref_offset(eb, dref),
8122 btrfs_extent_data_ref_count(eb, dref),
8125 case BTRFS_SHARED_DATA_REF_KEY:
8126 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8127 add_data_backref(extent_cache, key.objectid, offset,
8129 btrfs_shared_data_ref_count(eb, sref),
8133 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8134 key.objectid, key.type, num_bytes);
8137 ptr += btrfs_extent_inline_ref_size(type);
8144 static int check_cache_range(struct btrfs_root *root,
8145 struct btrfs_block_group_cache *cache,
8146 u64 offset, u64 bytes)
8148 struct btrfs_free_space *entry;
8154 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8155 bytenr = btrfs_sb_offset(i);
8156 ret = btrfs_rmap_block(root->fs_info,
8157 cache->key.objectid, bytenr, 0,
8158 &logical, &nr, &stripe_len);
8163 if (logical[nr] + stripe_len <= offset)
8165 if (offset + bytes <= logical[nr])
8167 if (logical[nr] == offset) {
8168 if (stripe_len >= bytes) {
8172 bytes -= stripe_len;
8173 offset += stripe_len;
8174 } else if (logical[nr] < offset) {
8175 if (logical[nr] + stripe_len >=
8180 bytes = (offset + bytes) -
8181 (logical[nr] + stripe_len);
8182 offset = logical[nr] + stripe_len;
8185 * Could be tricky, the super may land in the
8186 * middle of the area we're checking. First
8187 * check the easiest case, it's at the end.
8189 if (logical[nr] + stripe_len >=
8191 bytes = logical[nr] - offset;
8195 /* Check the left side */
8196 ret = check_cache_range(root, cache,
8198 logical[nr] - offset);
8204 /* Now we continue with the right side */
8205 bytes = (offset + bytes) -
8206 (logical[nr] + stripe_len);
8207 offset = logical[nr] + stripe_len;
8214 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8216 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8217 offset, offset+bytes);
8221 if (entry->offset != offset) {
8222 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8227 if (entry->bytes != bytes) {
8228 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8229 bytes, entry->bytes, offset);
8233 unlink_free_space(cache->free_space_ctl, entry);
8238 static int verify_space_cache(struct btrfs_root *root,
8239 struct btrfs_block_group_cache *cache)
8241 struct btrfs_path path;
8242 struct extent_buffer *leaf;
8243 struct btrfs_key key;
8247 root = root->fs_info->extent_root;
8249 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8251 btrfs_init_path(&path);
8252 key.objectid = last;
8254 key.type = BTRFS_EXTENT_ITEM_KEY;
8255 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8260 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8261 ret = btrfs_next_leaf(root, &path);
8269 leaf = path.nodes[0];
8270 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8271 if (key.objectid >= cache->key.offset + cache->key.objectid)
8273 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8274 key.type != BTRFS_METADATA_ITEM_KEY) {
8279 if (last == key.objectid) {
8280 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8281 last = key.objectid + key.offset;
8283 last = key.objectid + root->fs_info->nodesize;
8288 ret = check_cache_range(root, cache, last,
8289 key.objectid - last);
8292 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8293 last = key.objectid + key.offset;
8295 last = key.objectid + root->fs_info->nodesize;
8299 if (last < cache->key.objectid + cache->key.offset)
8300 ret = check_cache_range(root, cache, last,
8301 cache->key.objectid +
8302 cache->key.offset - last);
8305 btrfs_release_path(&path);
8308 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8309 fprintf(stderr, "There are still entries left in the space "
8317 static int check_space_cache(struct btrfs_root *root)
8319 struct btrfs_block_group_cache *cache;
8320 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8324 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8325 btrfs_super_generation(root->fs_info->super_copy) !=
8326 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8327 printf("cache and super generation don't match, space cache "
8328 "will be invalidated\n");
8332 if (ctx.progress_enabled) {
8333 ctx.tp = TASK_FREE_SPACE;
8334 task_start(ctx.info);
8338 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8342 start = cache->key.objectid + cache->key.offset;
8343 if (!cache->free_space_ctl) {
8344 if (btrfs_init_free_space_ctl(cache,
8345 root->fs_info->sectorsize)) {
8350 btrfs_remove_free_space_cache(cache);
8353 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8354 ret = exclude_super_stripes(root, cache);
8356 fprintf(stderr, "could not exclude super stripes: %s\n",
8361 ret = load_free_space_tree(root->fs_info, cache);
8362 free_excluded_extents(root, cache);
8364 fprintf(stderr, "could not load free space tree: %s\n",
8371 ret = load_free_space_cache(root->fs_info, cache);
8376 ret = verify_space_cache(root, cache);
8378 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8379 cache->key.objectid);
8384 task_stop(ctx.info);
8386 return error ? -EINVAL : 0;
8389 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8390 u64 num_bytes, unsigned long leaf_offset,
8391 struct extent_buffer *eb) {
8393 struct btrfs_fs_info *fs_info = root->fs_info;
8395 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8397 unsigned long csum_offset;
8401 u64 data_checked = 0;
8407 if (num_bytes % fs_info->sectorsize)
8410 data = malloc(num_bytes);
8414 while (offset < num_bytes) {
8417 read_len = num_bytes - offset;
8418 /* read as much space once a time */
8419 ret = read_extent_data(fs_info, data + offset,
8420 bytenr + offset, &read_len, mirror);
8424 /* verify every 4k data's checksum */
8425 while (data_checked < read_len) {
8427 tmp = offset + data_checked;
8429 csum = btrfs_csum_data((char *)data + tmp,
8430 csum, fs_info->sectorsize);
8431 btrfs_csum_final(csum, (u8 *)&csum);
8433 csum_offset = leaf_offset +
8434 tmp / fs_info->sectorsize * csum_size;
8435 read_extent_buffer(eb, (char *)&csum_expected,
8436 csum_offset, csum_size);
8437 /* try another mirror */
8438 if (csum != csum_expected) {
8439 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8440 mirror, bytenr + tmp,
8441 csum, csum_expected);
8442 num_copies = btrfs_num_copies(root->fs_info,
8444 if (mirror < num_copies - 1) {
8449 data_checked += fs_info->sectorsize;
8458 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8461 struct btrfs_path path;
8462 struct extent_buffer *leaf;
8463 struct btrfs_key key;
8466 btrfs_init_path(&path);
8467 key.objectid = bytenr;
8468 key.type = BTRFS_EXTENT_ITEM_KEY;
8469 key.offset = (u64)-1;
8472 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8475 fprintf(stderr, "Error looking up extent record %d\n", ret);
8476 btrfs_release_path(&path);
8479 if (path.slots[0] > 0) {
8482 ret = btrfs_prev_leaf(root, &path);
8485 } else if (ret > 0) {
8492 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8495 * Block group items come before extent items if they have the same
8496 * bytenr, so walk back one more just in case. Dear future traveller,
8497 * first congrats on mastering time travel. Now if it's not too much
8498 * trouble could you go back to 2006 and tell Chris to make the
8499 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8500 * EXTENT_ITEM_KEY please?
8502 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8503 if (path.slots[0] > 0) {
8506 ret = btrfs_prev_leaf(root, &path);
8509 } else if (ret > 0) {
8514 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8518 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8519 ret = btrfs_next_leaf(root, &path);
8521 fprintf(stderr, "Error going to next leaf "
8523 btrfs_release_path(&path);
8529 leaf = path.nodes[0];
8530 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8531 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8535 if (key.objectid + key.offset < bytenr) {
8539 if (key.objectid > bytenr + num_bytes)
8542 if (key.objectid == bytenr) {
8543 if (key.offset >= num_bytes) {
8547 num_bytes -= key.offset;
8548 bytenr += key.offset;
8549 } else if (key.objectid < bytenr) {
8550 if (key.objectid + key.offset >= bytenr + num_bytes) {
8554 num_bytes = (bytenr + num_bytes) -
8555 (key.objectid + key.offset);
8556 bytenr = key.objectid + key.offset;
8558 if (key.objectid + key.offset < bytenr + num_bytes) {
8559 u64 new_start = key.objectid + key.offset;
8560 u64 new_bytes = bytenr + num_bytes - new_start;
8563 * Weird case, the extent is in the middle of
8564 * our range, we'll have to search one side
8565 * and then the other. Not sure if this happens
8566 * in real life, but no harm in coding it up
8567 * anyway just in case.
8569 btrfs_release_path(&path);
8570 ret = check_extent_exists(root, new_start,
8573 fprintf(stderr, "Right section didn't "
8577 num_bytes = key.objectid - bytenr;
8580 num_bytes = key.objectid - bytenr;
8587 if (num_bytes && !ret) {
8588 fprintf(stderr, "There are no extents for csum range "
8589 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8593 btrfs_release_path(&path);
8597 static int check_csums(struct btrfs_root *root)
8599 struct btrfs_path path;
8600 struct extent_buffer *leaf;
8601 struct btrfs_key key;
8602 u64 offset = 0, num_bytes = 0;
8603 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8607 unsigned long leaf_offset;
8609 root = root->fs_info->csum_root;
8610 if (!extent_buffer_uptodate(root->node)) {
8611 fprintf(stderr, "No valid csum tree found\n");
8615 btrfs_init_path(&path);
8616 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8617 key.type = BTRFS_EXTENT_CSUM_KEY;
8619 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8621 fprintf(stderr, "Error searching csum tree %d\n", ret);
8622 btrfs_release_path(&path);
8626 if (ret > 0 && path.slots[0])
8631 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8632 ret = btrfs_next_leaf(root, &path);
8634 fprintf(stderr, "Error going to next leaf "
8641 leaf = path.nodes[0];
8643 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8644 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8649 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8650 csum_size) * root->fs_info->sectorsize;
8651 if (!check_data_csum)
8652 goto skip_csum_check;
8653 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8654 ret = check_extent_csums(root, key.offset, data_len,
8660 offset = key.offset;
8661 } else if (key.offset != offset + num_bytes) {
8662 ret = check_extent_exists(root, offset, num_bytes);
8664 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8665 "there is no extent record\n",
8666 offset, offset+num_bytes);
8669 offset = key.offset;
8672 num_bytes += data_len;
8676 btrfs_release_path(&path);
8680 static int is_dropped_key(struct btrfs_key *key,
8681 struct btrfs_key *drop_key) {
8682 if (key->objectid < drop_key->objectid)
8684 else if (key->objectid == drop_key->objectid) {
8685 if (key->type < drop_key->type)
8687 else if (key->type == drop_key->type) {
8688 if (key->offset < drop_key->offset)
8696 * Here are the rules for FULL_BACKREF.
8698 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8699 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8701 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8702 * if it happened after the relocation occurred since we'll have dropped the
8703 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8704 * have no real way to know for sure.
8706 * We process the blocks one root at a time, and we start from the lowest root
8707 * objectid and go to the highest. So we can just lookup the owner backref for
8708 * the record and if we don't find it then we know it doesn't exist and we have
8711 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8712 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8713 * be set or not and then we can check later once we've gathered all the refs.
8715 static int calc_extent_flag(struct cache_tree *extent_cache,
8716 struct extent_buffer *buf,
8717 struct root_item_record *ri,
8720 struct extent_record *rec;
8721 struct cache_extent *cache;
8722 struct tree_backref *tback;
8725 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8726 /* we have added this extent before */
8730 rec = container_of(cache, struct extent_record, cache);
8733 * Except file/reloc tree, we can not have
8736 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8741 if (buf->start == ri->bytenr)
8744 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8747 owner = btrfs_header_owner(buf);
8748 if (owner == ri->objectid)
8751 tback = find_tree_backref(rec, 0, owner);
8756 if (rec->flag_block_full_backref != FLAG_UNSET &&
8757 rec->flag_block_full_backref != 0)
8758 rec->bad_full_backref = 1;
8761 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8762 if (rec->flag_block_full_backref != FLAG_UNSET &&
8763 rec->flag_block_full_backref != 1)
8764 rec->bad_full_backref = 1;
8768 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8770 fprintf(stderr, "Invalid key type(");
8771 print_key_type(stderr, 0, key_type);
8772 fprintf(stderr, ") found in root(");
8773 print_objectid(stderr, rootid, 0);
8774 fprintf(stderr, ")\n");
8778 * Check if the key is valid with its extent buffer.
8780 * This is a early check in case invalid key exists in a extent buffer
8781 * This is not comprehensive yet, but should prevent wrong key/item passed
8784 static int check_type_with_root(u64 rootid, u8 key_type)
8787 /* Only valid in chunk tree */
8788 case BTRFS_DEV_ITEM_KEY:
8789 case BTRFS_CHUNK_ITEM_KEY:
8790 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8793 /* valid in csum and log tree */
8794 case BTRFS_CSUM_TREE_OBJECTID:
8795 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8799 case BTRFS_EXTENT_ITEM_KEY:
8800 case BTRFS_METADATA_ITEM_KEY:
8801 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8802 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8805 case BTRFS_ROOT_ITEM_KEY:
8806 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8809 case BTRFS_DEV_EXTENT_KEY:
8810 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8816 report_mismatch_key_root(key_type, rootid);
8820 static int run_next_block(struct btrfs_root *root,
8821 struct block_info *bits,
8824 struct cache_tree *pending,
8825 struct cache_tree *seen,
8826 struct cache_tree *reada,
8827 struct cache_tree *nodes,
8828 struct cache_tree *extent_cache,
8829 struct cache_tree *chunk_cache,
8830 struct rb_root *dev_cache,
8831 struct block_group_tree *block_group_cache,
8832 struct device_extent_tree *dev_extent_cache,
8833 struct root_item_record *ri)
8835 struct btrfs_fs_info *fs_info = root->fs_info;
8836 struct extent_buffer *buf;
8837 struct extent_record *rec = NULL;
8848 struct btrfs_key key;
8849 struct cache_extent *cache;
8852 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8853 bits_nr, &reada_bits);
8858 for(i = 0; i < nritems; i++) {
8859 ret = add_cache_extent(reada, bits[i].start,
8864 /* fixme, get the parent transid */
8865 readahead_tree_block(fs_info, bits[i].start, 0);
8868 *last = bits[0].start;
8869 bytenr = bits[0].start;
8870 size = bits[0].size;
8872 cache = lookup_cache_extent(pending, bytenr, size);
8874 remove_cache_extent(pending, cache);
8877 cache = lookup_cache_extent(reada, bytenr, size);
8879 remove_cache_extent(reada, cache);
8882 cache = lookup_cache_extent(nodes, bytenr, size);
8884 remove_cache_extent(nodes, cache);
8887 cache = lookup_cache_extent(extent_cache, bytenr, size);
8889 rec = container_of(cache, struct extent_record, cache);
8890 gen = rec->parent_generation;
8893 /* fixme, get the real parent transid */
8894 buf = read_tree_block(root->fs_info, bytenr, gen);
8895 if (!extent_buffer_uptodate(buf)) {
8896 record_bad_block_io(root->fs_info,
8897 extent_cache, bytenr, size);
8901 nritems = btrfs_header_nritems(buf);
8904 if (!init_extent_tree) {
8905 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8906 btrfs_header_level(buf), 1, NULL,
8909 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8911 fprintf(stderr, "Couldn't calc extent flags\n");
8912 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8917 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8919 fprintf(stderr, "Couldn't calc extent flags\n");
8920 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8924 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8926 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8927 ri->objectid == btrfs_header_owner(buf)) {
8929 * Ok we got to this block from it's original owner and
8930 * we have FULL_BACKREF set. Relocation can leave
8931 * converted blocks over so this is altogether possible,
8932 * however it's not possible if the generation > the
8933 * last snapshot, so check for this case.
8935 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8936 btrfs_header_generation(buf) > ri->last_snapshot) {
8937 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8938 rec->bad_full_backref = 1;
8943 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8944 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8945 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8946 rec->bad_full_backref = 1;
8950 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8951 rec->flag_block_full_backref = 1;
8955 rec->flag_block_full_backref = 0;
8957 owner = btrfs_header_owner(buf);
8960 ret = check_block(root, extent_cache, buf, flags);
8964 if (btrfs_is_leaf(buf)) {
8965 btree_space_waste += btrfs_leaf_free_space(root, buf);
8966 for (i = 0; i < nritems; i++) {
8967 struct btrfs_file_extent_item *fi;
8968 btrfs_item_key_to_cpu(buf, &key, i);
8970 * Check key type against the leaf owner.
8971 * Could filter quite a lot of early error if
8974 if (check_type_with_root(btrfs_header_owner(buf),
8976 fprintf(stderr, "ignoring invalid key\n");
8979 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8980 process_extent_item(root, extent_cache, buf,
8984 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8985 process_extent_item(root, extent_cache, buf,
8989 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8991 btrfs_item_size_nr(buf, i);
8994 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8995 process_chunk_item(chunk_cache, &key, buf, i);
8998 if (key.type == BTRFS_DEV_ITEM_KEY) {
8999 process_device_item(dev_cache, &key, buf, i);
9002 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9003 process_block_group_item(block_group_cache,
9007 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9008 process_device_extent_item(dev_extent_cache,
9013 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9014 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9015 process_extent_ref_v0(extent_cache, buf, i);
9022 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9023 ret = add_tree_backref(extent_cache,
9024 key.objectid, 0, key.offset, 0);
9027 "add_tree_backref failed (leaf tree block): %s",
9031 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9032 ret = add_tree_backref(extent_cache,
9033 key.objectid, key.offset, 0, 0);
9036 "add_tree_backref failed (leaf shared block): %s",
9040 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9041 struct btrfs_extent_data_ref *ref;
9042 ref = btrfs_item_ptr(buf, i,
9043 struct btrfs_extent_data_ref);
9044 add_data_backref(extent_cache,
9046 btrfs_extent_data_ref_root(buf, ref),
9047 btrfs_extent_data_ref_objectid(buf,
9049 btrfs_extent_data_ref_offset(buf, ref),
9050 btrfs_extent_data_ref_count(buf, ref),
9051 0, root->fs_info->sectorsize);
9054 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9055 struct btrfs_shared_data_ref *ref;
9056 ref = btrfs_item_ptr(buf, i,
9057 struct btrfs_shared_data_ref);
9058 add_data_backref(extent_cache,
9059 key.objectid, key.offset, 0, 0, 0,
9060 btrfs_shared_data_ref_count(buf, ref),
9061 0, root->fs_info->sectorsize);
9064 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9065 struct bad_item *bad;
9067 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9071 bad = malloc(sizeof(struct bad_item));
9074 INIT_LIST_HEAD(&bad->list);
9075 memcpy(&bad->key, &key,
9076 sizeof(struct btrfs_key));
9077 bad->root_id = owner;
9078 list_add_tail(&bad->list, &delete_items);
9081 if (key.type != BTRFS_EXTENT_DATA_KEY)
9083 fi = btrfs_item_ptr(buf, i,
9084 struct btrfs_file_extent_item);
9085 if (btrfs_file_extent_type(buf, fi) ==
9086 BTRFS_FILE_EXTENT_INLINE)
9088 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9091 data_bytes_allocated +=
9092 btrfs_file_extent_disk_num_bytes(buf, fi);
9093 if (data_bytes_allocated < root->fs_info->sectorsize) {
9096 data_bytes_referenced +=
9097 btrfs_file_extent_num_bytes(buf, fi);
9098 add_data_backref(extent_cache,
9099 btrfs_file_extent_disk_bytenr(buf, fi),
9100 parent, owner, key.objectid, key.offset -
9101 btrfs_file_extent_offset(buf, fi), 1, 1,
9102 btrfs_file_extent_disk_num_bytes(buf, fi));
9106 struct btrfs_key first_key;
9108 first_key.objectid = 0;
9111 btrfs_item_key_to_cpu(buf, &first_key, 0);
9112 level = btrfs_header_level(buf);
9113 for (i = 0; i < nritems; i++) {
9114 struct extent_record tmpl;
9116 ptr = btrfs_node_blockptr(buf, i);
9117 size = root->fs_info->nodesize;
9118 btrfs_node_key_to_cpu(buf, &key, i);
9120 if ((level == ri->drop_level)
9121 && is_dropped_key(&key, &ri->drop_key)) {
9126 memset(&tmpl, 0, sizeof(tmpl));
9127 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9128 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9133 tmpl.max_size = size;
9134 ret = add_extent_rec(extent_cache, &tmpl);
9138 ret = add_tree_backref(extent_cache, ptr, parent,
9142 "add_tree_backref failed (non-leaf block): %s",
9148 add_pending(nodes, seen, ptr, size);
9150 add_pending(pending, seen, ptr, size);
9153 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9154 nritems) * sizeof(struct btrfs_key_ptr);
9156 total_btree_bytes += buf->len;
9157 if (fs_root_objectid(btrfs_header_owner(buf)))
9158 total_fs_tree_bytes += buf->len;
9159 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9160 total_extent_tree_bytes += buf->len;
9162 free_extent_buffer(buf);
9166 static int add_root_to_pending(struct extent_buffer *buf,
9167 struct cache_tree *extent_cache,
9168 struct cache_tree *pending,
9169 struct cache_tree *seen,
9170 struct cache_tree *nodes,
9173 struct extent_record tmpl;
9176 if (btrfs_header_level(buf) > 0)
9177 add_pending(nodes, seen, buf->start, buf->len);
9179 add_pending(pending, seen, buf->start, buf->len);
9181 memset(&tmpl, 0, sizeof(tmpl));
9182 tmpl.start = buf->start;
9187 tmpl.max_size = buf->len;
9188 add_extent_rec(extent_cache, &tmpl);
9190 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9191 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9192 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9195 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9200 /* as we fix the tree, we might be deleting blocks that
9201 * we're tracking for repair. This hook makes sure we
9202 * remove any backrefs for blocks as we are fixing them.
9204 static int free_extent_hook(struct btrfs_trans_handle *trans,
9205 struct btrfs_root *root,
9206 u64 bytenr, u64 num_bytes, u64 parent,
9207 u64 root_objectid, u64 owner, u64 offset,
9210 struct extent_record *rec;
9211 struct cache_extent *cache;
9213 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9215 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9216 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9220 rec = container_of(cache, struct extent_record, cache);
9222 struct data_backref *back;
9223 back = find_data_backref(rec, parent, root_objectid, owner,
9224 offset, 1, bytenr, num_bytes);
9227 if (back->node.found_ref) {
9228 back->found_ref -= refs_to_drop;
9230 rec->refs -= refs_to_drop;
9232 if (back->node.found_extent_tree) {
9233 back->num_refs -= refs_to_drop;
9234 if (rec->extent_item_refs)
9235 rec->extent_item_refs -= refs_to_drop;
9237 if (back->found_ref == 0)
9238 back->node.found_ref = 0;
9239 if (back->num_refs == 0)
9240 back->node.found_extent_tree = 0;
9242 if (!back->node.found_extent_tree && back->node.found_ref) {
9243 rb_erase(&back->node.node, &rec->backref_tree);
9247 struct tree_backref *back;
9248 back = find_tree_backref(rec, parent, root_objectid);
9251 if (back->node.found_ref) {
9254 back->node.found_ref = 0;
9256 if (back->node.found_extent_tree) {
9257 if (rec->extent_item_refs)
9258 rec->extent_item_refs--;
9259 back->node.found_extent_tree = 0;
9261 if (!back->node.found_extent_tree && back->node.found_ref) {
9262 rb_erase(&back->node.node, &rec->backref_tree);
9266 maybe_free_extent_rec(extent_cache, rec);
9271 static int delete_extent_records(struct btrfs_trans_handle *trans,
9272 struct btrfs_root *root,
9273 struct btrfs_path *path,
9276 struct btrfs_key key;
9277 struct btrfs_key found_key;
9278 struct extent_buffer *leaf;
9283 key.objectid = bytenr;
9285 key.offset = (u64)-1;
9288 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9295 if (path->slots[0] == 0)
9301 leaf = path->nodes[0];
9302 slot = path->slots[0];
9304 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9305 if (found_key.objectid != bytenr)
9308 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9309 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9310 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9311 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9312 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9313 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9314 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9315 btrfs_release_path(path);
9316 if (found_key.type == 0) {
9317 if (found_key.offset == 0)
9319 key.offset = found_key.offset - 1;
9320 key.type = found_key.type;
9322 key.type = found_key.type - 1;
9323 key.offset = (u64)-1;
9327 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9328 found_key.objectid, found_key.type, found_key.offset);
9330 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9333 btrfs_release_path(path);
9335 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9336 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9337 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9338 found_key.offset : root->fs_info->nodesize;
9340 ret = btrfs_update_block_group(root, bytenr,
9347 btrfs_release_path(path);
9352 * for a single backref, this will allocate a new extent
9353 * and add the backref to it.
9355 static int record_extent(struct btrfs_trans_handle *trans,
9356 struct btrfs_fs_info *info,
9357 struct btrfs_path *path,
9358 struct extent_record *rec,
9359 struct extent_backref *back,
9360 int allocated, u64 flags)
9363 struct btrfs_root *extent_root = info->extent_root;
9364 struct extent_buffer *leaf;
9365 struct btrfs_key ins_key;
9366 struct btrfs_extent_item *ei;
9367 struct data_backref *dback;
9368 struct btrfs_tree_block_info *bi;
9371 rec->max_size = max_t(u64, rec->max_size,
9375 u32 item_size = sizeof(*ei);
9378 item_size += sizeof(*bi);
9380 ins_key.objectid = rec->start;
9381 ins_key.offset = rec->max_size;
9382 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9384 ret = btrfs_insert_empty_item(trans, extent_root, path,
9385 &ins_key, item_size);
9389 leaf = path->nodes[0];
9390 ei = btrfs_item_ptr(leaf, path->slots[0],
9391 struct btrfs_extent_item);
9393 btrfs_set_extent_refs(leaf, ei, 0);
9394 btrfs_set_extent_generation(leaf, ei, rec->generation);
9396 if (back->is_data) {
9397 btrfs_set_extent_flags(leaf, ei,
9398 BTRFS_EXTENT_FLAG_DATA);
9400 struct btrfs_disk_key copy_key;;
9402 bi = (struct btrfs_tree_block_info *)(ei + 1);
9403 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9406 btrfs_set_disk_key_objectid(©_key,
9407 rec->info_objectid);
9408 btrfs_set_disk_key_type(©_key, 0);
9409 btrfs_set_disk_key_offset(©_key, 0);
9411 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9412 btrfs_set_tree_block_key(leaf, bi, ©_key);
9414 btrfs_set_extent_flags(leaf, ei,
9415 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9418 btrfs_mark_buffer_dirty(leaf);
9419 ret = btrfs_update_block_group(extent_root, rec->start,
9420 rec->max_size, 1, 0);
9423 btrfs_release_path(path);
9426 if (back->is_data) {
9430 dback = to_data_backref(back);
9431 if (back->full_backref)
9432 parent = dback->parent;
9436 for (i = 0; i < dback->found_ref; i++) {
9437 /* if parent != 0, we're doing a full backref
9438 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9439 * just makes the backref allocator create a data
9442 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9443 rec->start, rec->max_size,
9447 BTRFS_FIRST_FREE_OBJECTID :
9453 fprintf(stderr, "adding new data backref"
9454 " on %llu %s %llu owner %llu"
9455 " offset %llu found %d\n",
9456 (unsigned long long)rec->start,
9457 back->full_backref ?
9459 back->full_backref ?
9460 (unsigned long long)parent :
9461 (unsigned long long)dback->root,
9462 (unsigned long long)dback->owner,
9463 (unsigned long long)dback->offset,
9467 struct tree_backref *tback;
9469 tback = to_tree_backref(back);
9470 if (back->full_backref)
9471 parent = tback->parent;
9475 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9476 rec->start, rec->max_size,
9477 parent, tback->root, 0, 0);
9478 fprintf(stderr, "adding new tree backref on "
9479 "start %llu len %llu parent %llu root %llu\n",
9480 rec->start, rec->max_size, parent, tback->root);
9483 btrfs_release_path(path);
9487 static struct extent_entry *find_entry(struct list_head *entries,
9488 u64 bytenr, u64 bytes)
9490 struct extent_entry *entry = NULL;
9492 list_for_each_entry(entry, entries, list) {
9493 if (entry->bytenr == bytenr && entry->bytes == bytes)
9500 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9502 struct extent_entry *entry, *best = NULL, *prev = NULL;
9504 list_for_each_entry(entry, entries, list) {
9506 * If there are as many broken entries as entries then we know
9507 * not to trust this particular entry.
9509 if (entry->broken == entry->count)
9513 * Special case, when there are only two entries and 'best' is
9523 * If our current entry == best then we can't be sure our best
9524 * is really the best, so we need to keep searching.
9526 if (best && best->count == entry->count) {
9532 /* Prev == entry, not good enough, have to keep searching */
9533 if (!prev->broken && prev->count == entry->count)
9537 best = (prev->count > entry->count) ? prev : entry;
9538 else if (best->count < entry->count)
9546 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9547 struct data_backref *dback, struct extent_entry *entry)
9549 struct btrfs_trans_handle *trans;
9550 struct btrfs_root *root;
9551 struct btrfs_file_extent_item *fi;
9552 struct extent_buffer *leaf;
9553 struct btrfs_key key;
9557 key.objectid = dback->root;
9558 key.type = BTRFS_ROOT_ITEM_KEY;
9559 key.offset = (u64)-1;
9560 root = btrfs_read_fs_root(info, &key);
9562 fprintf(stderr, "Couldn't find root for our ref\n");
9567 * The backref points to the original offset of the extent if it was
9568 * split, so we need to search down to the offset we have and then walk
9569 * forward until we find the backref we're looking for.
9571 key.objectid = dback->owner;
9572 key.type = BTRFS_EXTENT_DATA_KEY;
9573 key.offset = dback->offset;
9574 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9576 fprintf(stderr, "Error looking up ref %d\n", ret);
9581 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9582 ret = btrfs_next_leaf(root, path);
9584 fprintf(stderr, "Couldn't find our ref, next\n");
9588 leaf = path->nodes[0];
9589 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9590 if (key.objectid != dback->owner ||
9591 key.type != BTRFS_EXTENT_DATA_KEY) {
9592 fprintf(stderr, "Couldn't find our ref, search\n");
9595 fi = btrfs_item_ptr(leaf, path->slots[0],
9596 struct btrfs_file_extent_item);
9597 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9598 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9600 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9605 btrfs_release_path(path);
9607 trans = btrfs_start_transaction(root, 1);
9609 return PTR_ERR(trans);
9612 * Ok we have the key of the file extent we want to fix, now we can cow
9613 * down to the thing and fix it.
9615 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9617 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9618 key.objectid, key.type, key.offset, ret);
9622 fprintf(stderr, "Well that's odd, we just found this key "
9623 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9628 leaf = path->nodes[0];
9629 fi = btrfs_item_ptr(leaf, path->slots[0],
9630 struct btrfs_file_extent_item);
9632 if (btrfs_file_extent_compression(leaf, fi) &&
9633 dback->disk_bytenr != entry->bytenr) {
9634 fprintf(stderr, "Ref doesn't match the record start and is "
9635 "compressed, please take a btrfs-image of this file "
9636 "system and send it to a btrfs developer so they can "
9637 "complete this functionality for bytenr %Lu\n",
9638 dback->disk_bytenr);
9643 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9644 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9645 } else if (dback->disk_bytenr > entry->bytenr) {
9646 u64 off_diff, offset;
9648 off_diff = dback->disk_bytenr - entry->bytenr;
9649 offset = btrfs_file_extent_offset(leaf, fi);
9650 if (dback->disk_bytenr + offset +
9651 btrfs_file_extent_num_bytes(leaf, fi) >
9652 entry->bytenr + entry->bytes) {
9653 fprintf(stderr, "Ref is past the entry end, please "
9654 "take a btrfs-image of this file system and "
9655 "send it to a btrfs developer, ref %Lu\n",
9656 dback->disk_bytenr);
9661 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9662 btrfs_set_file_extent_offset(leaf, fi, offset);
9663 } else if (dback->disk_bytenr < entry->bytenr) {
9666 offset = btrfs_file_extent_offset(leaf, fi);
9667 if (dback->disk_bytenr + offset < entry->bytenr) {
9668 fprintf(stderr, "Ref is before the entry start, please"
9669 " take a btrfs-image of this file system and "
9670 "send it to a btrfs developer, ref %Lu\n",
9671 dback->disk_bytenr);
9676 offset += dback->disk_bytenr;
9677 offset -= entry->bytenr;
9678 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9679 btrfs_set_file_extent_offset(leaf, fi, offset);
9682 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9685 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9686 * only do this if we aren't using compression, otherwise it's a
9689 if (!btrfs_file_extent_compression(leaf, fi))
9690 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9692 printf("ram bytes may be wrong?\n");
9693 btrfs_mark_buffer_dirty(leaf);
9695 err = btrfs_commit_transaction(trans, root);
9696 btrfs_release_path(path);
9697 return ret ? ret : err;
9700 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9701 struct extent_record *rec)
9703 struct extent_backref *back, *tmp;
9704 struct data_backref *dback;
9705 struct extent_entry *entry, *best = NULL;
9708 int broken_entries = 0;
9713 * Metadata is easy and the backrefs should always agree on bytenr and
9714 * size, if not we've got bigger issues.
9719 rbtree_postorder_for_each_entry_safe(back, tmp,
9720 &rec->backref_tree, node) {
9721 if (back->full_backref || !back->is_data)
9724 dback = to_data_backref(back);
9727 * We only pay attention to backrefs that we found a real
9730 if (dback->found_ref == 0)
9734 * For now we only catch when the bytes don't match, not the
9735 * bytenr. We can easily do this at the same time, but I want
9736 * to have a fs image to test on before we just add repair
9737 * functionality willy-nilly so we know we won't screw up the
9741 entry = find_entry(&entries, dback->disk_bytenr,
9744 entry = malloc(sizeof(struct extent_entry));
9749 memset(entry, 0, sizeof(*entry));
9750 entry->bytenr = dback->disk_bytenr;
9751 entry->bytes = dback->bytes;
9752 list_add_tail(&entry->list, &entries);
9757 * If we only have on entry we may think the entries agree when
9758 * in reality they don't so we have to do some extra checking.
9760 if (dback->disk_bytenr != rec->start ||
9761 dback->bytes != rec->nr || back->broken)
9772 /* Yay all the backrefs agree, carry on good sir */
9773 if (nr_entries <= 1 && !mismatch)
9776 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9777 "%Lu\n", rec->start);
9780 * First we want to see if the backrefs can agree amongst themselves who
9781 * is right, so figure out which one of the entries has the highest
9784 best = find_most_right_entry(&entries);
9787 * Ok so we may have an even split between what the backrefs think, so
9788 * this is where we use the extent ref to see what it thinks.
9791 entry = find_entry(&entries, rec->start, rec->nr);
9792 if (!entry && (!broken_entries || !rec->found_rec)) {
9793 fprintf(stderr, "Backrefs don't agree with each other "
9794 "and extent record doesn't agree with anybody,"
9795 " so we can't fix bytenr %Lu bytes %Lu\n",
9796 rec->start, rec->nr);
9799 } else if (!entry) {
9801 * Ok our backrefs were broken, we'll assume this is the
9802 * correct value and add an entry for this range.
9804 entry = malloc(sizeof(struct extent_entry));
9809 memset(entry, 0, sizeof(*entry));
9810 entry->bytenr = rec->start;
9811 entry->bytes = rec->nr;
9812 list_add_tail(&entry->list, &entries);
9816 best = find_most_right_entry(&entries);
9818 fprintf(stderr, "Backrefs and extent record evenly "
9819 "split on who is right, this is going to "
9820 "require user input to fix bytenr %Lu bytes "
9821 "%Lu\n", rec->start, rec->nr);
9828 * I don't think this can happen currently as we'll abort() if we catch
9829 * this case higher up, but in case somebody removes that we still can't
9830 * deal with it properly here yet, so just bail out of that's the case.
9832 if (best->bytenr != rec->start) {
9833 fprintf(stderr, "Extent start and backref starts don't match, "
9834 "please use btrfs-image on this file system and send "
9835 "it to a btrfs developer so they can make fsck fix "
9836 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9837 rec->start, rec->nr);
9843 * Ok great we all agreed on an extent record, let's go find the real
9844 * references and fix up the ones that don't match.
9846 rbtree_postorder_for_each_entry_safe(back, tmp,
9847 &rec->backref_tree, node) {
9848 if (back->full_backref || !back->is_data)
9851 dback = to_data_backref(back);
9854 * Still ignoring backrefs that don't have a real ref attached
9857 if (dback->found_ref == 0)
9860 if (dback->bytes == best->bytes &&
9861 dback->disk_bytenr == best->bytenr)
9864 ret = repair_ref(info, path, dback, best);
9870 * Ok we messed with the actual refs, which means we need to drop our
9871 * entire cache and go back and rescan. I know this is a huge pain and
9872 * adds a lot of extra work, but it's the only way to be safe. Once all
9873 * the backrefs agree we may not need to do anything to the extent
9878 while (!list_empty(&entries)) {
9879 entry = list_entry(entries.next, struct extent_entry, list);
9880 list_del_init(&entry->list);
9886 static int process_duplicates(struct cache_tree *extent_cache,
9887 struct extent_record *rec)
9889 struct extent_record *good, *tmp;
9890 struct cache_extent *cache;
9894 * If we found a extent record for this extent then return, or if we
9895 * have more than one duplicate we are likely going to need to delete
9898 if (rec->found_rec || rec->num_duplicates > 1)
9901 /* Shouldn't happen but just in case */
9902 BUG_ON(!rec->num_duplicates);
9905 * So this happens if we end up with a backref that doesn't match the
9906 * actual extent entry. So either the backref is bad or the extent
9907 * entry is bad. Either way we want to have the extent_record actually
9908 * reflect what we found in the extent_tree, so we need to take the
9909 * duplicate out and use that as the extent_record since the only way we
9910 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9912 remove_cache_extent(extent_cache, &rec->cache);
9914 good = to_extent_record(rec->dups.next);
9915 list_del_init(&good->list);
9916 INIT_LIST_HEAD(&good->backrefs);
9917 INIT_LIST_HEAD(&good->dups);
9918 good->cache.start = good->start;
9919 good->cache.size = good->nr;
9920 good->content_checked = 0;
9921 good->owner_ref_checked = 0;
9922 good->num_duplicates = 0;
9923 good->refs = rec->refs;
9924 list_splice_init(&rec->backrefs, &good->backrefs);
9926 cache = lookup_cache_extent(extent_cache, good->start,
9930 tmp = container_of(cache, struct extent_record, cache);
9933 * If we find another overlapping extent and it's found_rec is
9934 * set then it's a duplicate and we need to try and delete
9937 if (tmp->found_rec || tmp->num_duplicates > 0) {
9938 if (list_empty(&good->list))
9939 list_add_tail(&good->list,
9940 &duplicate_extents);
9941 good->num_duplicates += tmp->num_duplicates + 1;
9942 list_splice_init(&tmp->dups, &good->dups);
9943 list_del_init(&tmp->list);
9944 list_add_tail(&tmp->list, &good->dups);
9945 remove_cache_extent(extent_cache, &tmp->cache);
9950 * Ok we have another non extent item backed extent rec, so lets
9951 * just add it to this extent and carry on like we did above.
9953 good->refs += tmp->refs;
9954 list_splice_init(&tmp->backrefs, &good->backrefs);
9955 remove_cache_extent(extent_cache, &tmp->cache);
9958 ret = insert_cache_extent(extent_cache, &good->cache);
9961 return good->num_duplicates ? 0 : 1;
9964 static int delete_duplicate_records(struct btrfs_root *root,
9965 struct extent_record *rec)
9967 struct btrfs_trans_handle *trans;
9968 LIST_HEAD(delete_list);
9969 struct btrfs_path path;
9970 struct extent_record *tmp, *good, *n;
9973 struct btrfs_key key;
9975 btrfs_init_path(&path);
9978 /* Find the record that covers all of the duplicates. */
9979 list_for_each_entry(tmp, &rec->dups, list) {
9980 if (good->start < tmp->start)
9982 if (good->nr > tmp->nr)
9985 if (tmp->start + tmp->nr < good->start + good->nr) {
9986 fprintf(stderr, "Ok we have overlapping extents that "
9987 "aren't completely covered by each other, this "
9988 "is going to require more careful thought. "
9989 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9990 tmp->start, tmp->nr, good->start, good->nr);
9997 list_add_tail(&rec->list, &delete_list);
9999 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10002 list_move_tail(&tmp->list, &delete_list);
10005 root = root->fs_info->extent_root;
10006 trans = btrfs_start_transaction(root, 1);
10007 if (IS_ERR(trans)) {
10008 ret = PTR_ERR(trans);
10012 list_for_each_entry(tmp, &delete_list, list) {
10013 if (tmp->found_rec == 0)
10015 key.objectid = tmp->start;
10016 key.type = BTRFS_EXTENT_ITEM_KEY;
10017 key.offset = tmp->nr;
10019 /* Shouldn't happen but just in case */
10020 if (tmp->metadata) {
10021 fprintf(stderr, "Well this shouldn't happen, extent "
10022 "record overlaps but is metadata? "
10023 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10027 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10033 ret = btrfs_del_item(trans, root, &path);
10036 btrfs_release_path(&path);
10039 err = btrfs_commit_transaction(trans, root);
10043 while (!list_empty(&delete_list)) {
10044 tmp = to_extent_record(delete_list.next);
10045 list_del_init(&tmp->list);
10051 while (!list_empty(&rec->dups)) {
10052 tmp = to_extent_record(rec->dups.next);
10053 list_del_init(&tmp->list);
10057 btrfs_release_path(&path);
10059 if (!ret && !nr_del)
10060 rec->num_duplicates = 0;
10062 return ret ? ret : nr_del;
10065 static int find_possible_backrefs(struct btrfs_fs_info *info,
10066 struct btrfs_path *path,
10067 struct cache_tree *extent_cache,
10068 struct extent_record *rec)
10070 struct btrfs_root *root;
10071 struct extent_backref *back, *tmp;
10072 struct data_backref *dback;
10073 struct cache_extent *cache;
10074 struct btrfs_file_extent_item *fi;
10075 struct btrfs_key key;
10079 rbtree_postorder_for_each_entry_safe(back, tmp,
10080 &rec->backref_tree, node) {
10081 /* Don't care about full backrefs (poor unloved backrefs) */
10082 if (back->full_backref || !back->is_data)
10085 dback = to_data_backref(back);
10087 /* We found this one, we don't need to do a lookup */
10088 if (dback->found_ref)
10091 key.objectid = dback->root;
10092 key.type = BTRFS_ROOT_ITEM_KEY;
10093 key.offset = (u64)-1;
10095 root = btrfs_read_fs_root(info, &key);
10097 /* No root, definitely a bad ref, skip */
10098 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10100 /* Other err, exit */
10102 return PTR_ERR(root);
10104 key.objectid = dback->owner;
10105 key.type = BTRFS_EXTENT_DATA_KEY;
10106 key.offset = dback->offset;
10107 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10109 btrfs_release_path(path);
10112 /* Didn't find it, we can carry on */
10117 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10118 struct btrfs_file_extent_item);
10119 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10120 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10121 btrfs_release_path(path);
10122 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10124 struct extent_record *tmp;
10125 tmp = container_of(cache, struct extent_record, cache);
10128 * If we found an extent record for the bytenr for this
10129 * particular backref then we can't add it to our
10130 * current extent record. We only want to add backrefs
10131 * that don't have a corresponding extent item in the
10132 * extent tree since they likely belong to this record
10133 * and we need to fix it if it doesn't match bytenrs.
10135 if (tmp->found_rec)
10139 dback->found_ref += 1;
10140 dback->disk_bytenr = bytenr;
10141 dback->bytes = bytes;
10144 * Set this so the verify backref code knows not to trust the
10145 * values in this backref.
10154 * Record orphan data ref into corresponding root.
10156 * Return 0 if the extent item contains data ref and recorded.
10157 * Return 1 if the extent item contains no useful data ref
10158 * On that case, it may contains only shared_dataref or metadata backref
10159 * or the file extent exists(this should be handled by the extent bytenr
10160 * recovery routine)
10161 * Return <0 if something goes wrong.
10163 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10164 struct extent_record *rec)
10166 struct btrfs_key key;
10167 struct btrfs_root *dest_root;
10168 struct extent_backref *back, *tmp;
10169 struct data_backref *dback;
10170 struct orphan_data_extent *orphan;
10171 struct btrfs_path path;
10172 int recorded_data_ref = 0;
10177 btrfs_init_path(&path);
10178 rbtree_postorder_for_each_entry_safe(back, tmp,
10179 &rec->backref_tree, node) {
10180 if (back->full_backref || !back->is_data ||
10181 !back->found_extent_tree)
10183 dback = to_data_backref(back);
10184 if (dback->found_ref)
10186 key.objectid = dback->root;
10187 key.type = BTRFS_ROOT_ITEM_KEY;
10188 key.offset = (u64)-1;
10190 dest_root = btrfs_read_fs_root(fs_info, &key);
10192 /* For non-exist root we just skip it */
10193 if (IS_ERR(dest_root) || !dest_root)
10196 key.objectid = dback->owner;
10197 key.type = BTRFS_EXTENT_DATA_KEY;
10198 key.offset = dback->offset;
10200 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10201 btrfs_release_path(&path);
10203 * For ret < 0, it's OK since the fs-tree may be corrupted,
10204 * we need to record it for inode/file extent rebuild.
10205 * For ret > 0, we record it only for file extent rebuild.
10206 * For ret == 0, the file extent exists but only bytenr
10207 * mismatch, let the original bytenr fix routine to handle,
10213 orphan = malloc(sizeof(*orphan));
10218 INIT_LIST_HEAD(&orphan->list);
10219 orphan->root = dback->root;
10220 orphan->objectid = dback->owner;
10221 orphan->offset = dback->offset;
10222 orphan->disk_bytenr = rec->cache.start;
10223 orphan->disk_len = rec->cache.size;
10224 list_add(&dest_root->orphan_data_extents, &orphan->list);
10225 recorded_data_ref = 1;
10228 btrfs_release_path(&path);
10230 return !recorded_data_ref;
10236 * when an incorrect extent item is found, this will delete
10237 * all of the existing entries for it and recreate them
10238 * based on what the tree scan found.
10240 static int fixup_extent_refs(struct btrfs_fs_info *info,
10241 struct cache_tree *extent_cache,
10242 struct extent_record *rec)
10244 struct btrfs_trans_handle *trans = NULL;
10246 struct btrfs_path path;
10247 struct cache_extent *cache;
10248 struct extent_backref *back, *tmp;
10252 if (rec->flag_block_full_backref)
10253 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10255 btrfs_init_path(&path);
10256 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10258 * Sometimes the backrefs themselves are so broken they don't
10259 * get attached to any meaningful rec, so first go back and
10260 * check any of our backrefs that we couldn't find and throw
10261 * them into the list if we find the backref so that
10262 * verify_backrefs can figure out what to do.
10264 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10269 /* step one, make sure all of the backrefs agree */
10270 ret = verify_backrefs(info, &path, rec);
10274 trans = btrfs_start_transaction(info->extent_root, 1);
10275 if (IS_ERR(trans)) {
10276 ret = PTR_ERR(trans);
10280 /* step two, delete all the existing records */
10281 ret = delete_extent_records(trans, info->extent_root, &path,
10287 /* was this block corrupt? If so, don't add references to it */
10288 cache = lookup_cache_extent(info->corrupt_blocks,
10289 rec->start, rec->max_size);
10295 /* step three, recreate all the refs we did find */
10296 rbtree_postorder_for_each_entry_safe(back, tmp,
10297 &rec->backref_tree, node) {
10299 * if we didn't find any references, don't create a
10300 * new extent record
10302 if (!back->found_ref)
10305 rec->bad_full_backref = 0;
10306 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10314 int err = btrfs_commit_transaction(trans, info->extent_root);
10320 fprintf(stderr, "Repaired extent references for %llu\n",
10321 (unsigned long long)rec->start);
10323 btrfs_release_path(&path);
10327 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10328 struct extent_record *rec)
10330 struct btrfs_trans_handle *trans;
10331 struct btrfs_root *root = fs_info->extent_root;
10332 struct btrfs_path path;
10333 struct btrfs_extent_item *ei;
10334 struct btrfs_key key;
10338 key.objectid = rec->start;
10339 if (rec->metadata) {
10340 key.type = BTRFS_METADATA_ITEM_KEY;
10341 key.offset = rec->info_level;
10343 key.type = BTRFS_EXTENT_ITEM_KEY;
10344 key.offset = rec->max_size;
10347 trans = btrfs_start_transaction(root, 0);
10349 return PTR_ERR(trans);
10351 btrfs_init_path(&path);
10352 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10354 btrfs_release_path(&path);
10355 btrfs_commit_transaction(trans, root);
10358 fprintf(stderr, "Didn't find extent for %llu\n",
10359 (unsigned long long)rec->start);
10360 btrfs_release_path(&path);
10361 btrfs_commit_transaction(trans, root);
10365 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10366 struct btrfs_extent_item);
10367 flags = btrfs_extent_flags(path.nodes[0], ei);
10368 if (rec->flag_block_full_backref) {
10369 fprintf(stderr, "setting full backref on %llu\n",
10370 (unsigned long long)key.objectid);
10371 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10373 fprintf(stderr, "clearing full backref on %llu\n",
10374 (unsigned long long)key.objectid);
10375 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10377 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10378 btrfs_mark_buffer_dirty(path.nodes[0]);
10379 btrfs_release_path(&path);
10380 ret = btrfs_commit_transaction(trans, root);
10382 fprintf(stderr, "Repaired extent flags for %llu\n",
10383 (unsigned long long)rec->start);
10388 /* right now we only prune from the extent allocation tree */
10389 static int prune_one_block(struct btrfs_trans_handle *trans,
10390 struct btrfs_fs_info *info,
10391 struct btrfs_corrupt_block *corrupt)
10394 struct btrfs_path path;
10395 struct extent_buffer *eb;
10399 int level = corrupt->level + 1;
10401 btrfs_init_path(&path);
10403 /* we want to stop at the parent to our busted block */
10404 path.lowest_level = level;
10406 ret = btrfs_search_slot(trans, info->extent_root,
10407 &corrupt->key, &path, -1, 1);
10412 eb = path.nodes[level];
10419 * hopefully the search gave us the block we want to prune,
10420 * lets try that first
10422 slot = path.slots[level];
10423 found = btrfs_node_blockptr(eb, slot);
10424 if (found == corrupt->cache.start)
10427 nritems = btrfs_header_nritems(eb);
10429 /* the search failed, lets scan this node and hope we find it */
10430 for (slot = 0; slot < nritems; slot++) {
10431 found = btrfs_node_blockptr(eb, slot);
10432 if (found == corrupt->cache.start)
10436 * we couldn't find the bad block. TODO, search all the nodes for pointers
10439 if (eb == info->extent_root->node) {
10444 btrfs_release_path(&path);
10449 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10450 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10453 btrfs_release_path(&path);
10457 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10459 struct btrfs_trans_handle *trans = NULL;
10460 struct cache_extent *cache;
10461 struct btrfs_corrupt_block *corrupt;
10464 cache = search_cache_extent(info->corrupt_blocks, 0);
10468 trans = btrfs_start_transaction(info->extent_root, 1);
10470 return PTR_ERR(trans);
10472 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10473 prune_one_block(trans, info, corrupt);
10474 remove_cache_extent(info->corrupt_blocks, cache);
10477 return btrfs_commit_transaction(trans, info->extent_root);
10481 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10483 struct btrfs_block_group_cache *cache;
10488 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10489 &start, &end, EXTENT_DIRTY);
10492 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10497 cache = btrfs_lookup_first_block_group(fs_info, start);
10502 start = cache->key.objectid + cache->key.offset;
10506 static int check_extent_refs(struct btrfs_root *root,
10507 struct cache_tree *extent_cache)
10509 struct extent_record *rec;
10510 struct cache_extent *cache;
10517 * if we're doing a repair, we have to make sure
10518 * we don't allocate from the problem extents.
10519 * In the worst case, this will be all the
10520 * extents in the FS
10522 cache = search_cache_extent(extent_cache, 0);
10524 rec = container_of(cache, struct extent_record, cache);
10525 set_extent_dirty(root->fs_info->excluded_extents,
10527 rec->start + rec->max_size - 1);
10528 cache = next_cache_extent(cache);
10531 /* pin down all the corrupted blocks too */
10532 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10534 set_extent_dirty(root->fs_info->excluded_extents,
10536 cache->start + cache->size - 1);
10537 cache = next_cache_extent(cache);
10539 prune_corrupt_blocks(root->fs_info);
10540 reset_cached_block_groups(root->fs_info);
10543 reset_cached_block_groups(root->fs_info);
10546 * We need to delete any duplicate entries we find first otherwise we
10547 * could mess up the extent tree when we have backrefs that actually
10548 * belong to a different extent item and not the weird duplicate one.
10550 while (repair && !list_empty(&duplicate_extents)) {
10551 rec = to_extent_record(duplicate_extents.next);
10552 list_del_init(&rec->list);
10554 /* Sometimes we can find a backref before we find an actual
10555 * extent, so we need to process it a little bit to see if there
10556 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10557 * if this is a backref screwup. If we need to delete stuff
10558 * process_duplicates() will return 0, otherwise it will return
10561 if (process_duplicates(extent_cache, rec))
10563 ret = delete_duplicate_records(root, rec);
10567 * delete_duplicate_records will return the number of entries
10568 * deleted, so if it's greater than 0 then we know we actually
10569 * did something and we need to remove.
10582 cache = search_cache_extent(extent_cache, 0);
10585 rec = container_of(cache, struct extent_record, cache);
10586 if (rec->num_duplicates) {
10587 fprintf(stderr, "extent item %llu has multiple extent "
10588 "items\n", (unsigned long long)rec->start);
10592 if (rec->refs != rec->extent_item_refs) {
10593 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10594 (unsigned long long)rec->start,
10595 (unsigned long long)rec->nr);
10596 fprintf(stderr, "extent item %llu, found %llu\n",
10597 (unsigned long long)rec->extent_item_refs,
10598 (unsigned long long)rec->refs);
10599 ret = record_orphan_data_extents(root->fs_info, rec);
10605 if (all_backpointers_checked(rec, 1)) {
10606 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10607 (unsigned long long)rec->start,
10608 (unsigned long long)rec->nr);
10612 if (!rec->owner_ref_checked) {
10613 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10614 (unsigned long long)rec->start,
10615 (unsigned long long)rec->nr);
10620 if (repair && fix) {
10621 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10627 if (rec->bad_full_backref) {
10628 fprintf(stderr, "bad full backref, on [%llu]\n",
10629 (unsigned long long)rec->start);
10631 ret = fixup_extent_flags(root->fs_info, rec);
10639 * Although it's not a extent ref's problem, we reuse this
10640 * routine for error reporting.
10641 * No repair function yet.
10643 if (rec->crossing_stripes) {
10645 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10646 rec->start, rec->start + rec->max_size);
10650 if (rec->wrong_chunk_type) {
10652 "bad extent [%llu, %llu), type mismatch with chunk\n",
10653 rec->start, rec->start + rec->max_size);
10658 remove_cache_extent(extent_cache, cache);
10659 free_all_extent_backrefs(rec);
10660 if (!init_extent_tree && repair && (!cur_err || fix))
10661 clear_extent_dirty(root->fs_info->excluded_extents,
10663 rec->start + rec->max_size - 1);
10668 if (ret && ret != -EAGAIN) {
10669 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10672 struct btrfs_trans_handle *trans;
10674 root = root->fs_info->extent_root;
10675 trans = btrfs_start_transaction(root, 1);
10676 if (IS_ERR(trans)) {
10677 ret = PTR_ERR(trans);
10681 ret = btrfs_fix_block_accounting(trans, root);
10684 ret = btrfs_commit_transaction(trans, root);
10696 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10700 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10701 stripe_size = length;
10702 stripe_size /= num_stripes;
10703 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10704 stripe_size = length * 2;
10705 stripe_size /= num_stripes;
10706 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10707 stripe_size = length;
10708 stripe_size /= (num_stripes - 1);
10709 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10710 stripe_size = length;
10711 stripe_size /= (num_stripes - 2);
10713 stripe_size = length;
10715 return stripe_size;
10719 * Check the chunk with its block group/dev list ref:
10720 * Return 0 if all refs seems valid.
10721 * Return 1 if part of refs seems valid, need later check for rebuild ref
10722 * like missing block group and needs to search extent tree to rebuild them.
10723 * Return -1 if essential refs are missing and unable to rebuild.
10725 static int check_chunk_refs(struct chunk_record *chunk_rec,
10726 struct block_group_tree *block_group_cache,
10727 struct device_extent_tree *dev_extent_cache,
10730 struct cache_extent *block_group_item;
10731 struct block_group_record *block_group_rec;
10732 struct cache_extent *dev_extent_item;
10733 struct device_extent_record *dev_extent_rec;
10737 int metadump_v2 = 0;
10741 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10743 chunk_rec->length);
10744 if (block_group_item) {
10745 block_group_rec = container_of(block_group_item,
10746 struct block_group_record,
10748 if (chunk_rec->length != block_group_rec->offset ||
10749 chunk_rec->offset != block_group_rec->objectid ||
10751 chunk_rec->type_flags != block_group_rec->flags)) {
10754 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10755 chunk_rec->objectid,
10760 chunk_rec->type_flags,
10761 block_group_rec->objectid,
10762 block_group_rec->type,
10763 block_group_rec->offset,
10764 block_group_rec->offset,
10765 block_group_rec->objectid,
10766 block_group_rec->flags);
10769 list_del_init(&block_group_rec->list);
10770 chunk_rec->bg_rec = block_group_rec;
10775 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10776 chunk_rec->objectid,
10781 chunk_rec->type_flags);
10788 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10789 chunk_rec->num_stripes);
10790 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10791 devid = chunk_rec->stripes[i].devid;
10792 offset = chunk_rec->stripes[i].offset;
10793 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10794 devid, offset, length);
10795 if (dev_extent_item) {
10796 dev_extent_rec = container_of(dev_extent_item,
10797 struct device_extent_record,
10799 if (dev_extent_rec->objectid != devid ||
10800 dev_extent_rec->offset != offset ||
10801 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10802 dev_extent_rec->length != length) {
10805 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10806 chunk_rec->objectid,
10809 chunk_rec->stripes[i].devid,
10810 chunk_rec->stripes[i].offset,
10811 dev_extent_rec->objectid,
10812 dev_extent_rec->offset,
10813 dev_extent_rec->length);
10816 list_move(&dev_extent_rec->chunk_list,
10817 &chunk_rec->dextents);
10822 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10823 chunk_rec->objectid,
10826 chunk_rec->stripes[i].devid,
10827 chunk_rec->stripes[i].offset);
10834 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10835 int check_chunks(struct cache_tree *chunk_cache,
10836 struct block_group_tree *block_group_cache,
10837 struct device_extent_tree *dev_extent_cache,
10838 struct list_head *good, struct list_head *bad,
10839 struct list_head *rebuild, int silent)
10841 struct cache_extent *chunk_item;
10842 struct chunk_record *chunk_rec;
10843 struct block_group_record *bg_rec;
10844 struct device_extent_record *dext_rec;
10848 chunk_item = first_cache_extent(chunk_cache);
10849 while (chunk_item) {
10850 chunk_rec = container_of(chunk_item, struct chunk_record,
10852 err = check_chunk_refs(chunk_rec, block_group_cache,
10853 dev_extent_cache, silent);
10856 if (err == 0 && good)
10857 list_add_tail(&chunk_rec->list, good);
10858 if (err > 0 && rebuild)
10859 list_add_tail(&chunk_rec->list, rebuild);
10860 if (err < 0 && bad)
10861 list_add_tail(&chunk_rec->list, bad);
10862 chunk_item = next_cache_extent(chunk_item);
10865 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10868 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10876 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10880 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10881 dext_rec->objectid,
10891 static int check_device_used(struct device_record *dev_rec,
10892 struct device_extent_tree *dext_cache)
10894 struct cache_extent *cache;
10895 struct device_extent_record *dev_extent_rec;
10896 u64 total_byte = 0;
10898 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10900 dev_extent_rec = container_of(cache,
10901 struct device_extent_record,
10903 if (dev_extent_rec->objectid != dev_rec->devid)
10906 list_del_init(&dev_extent_rec->device_list);
10907 total_byte += dev_extent_rec->length;
10908 cache = next_cache_extent(cache);
10911 if (total_byte != dev_rec->byte_used) {
10913 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10914 total_byte, dev_rec->byte_used, dev_rec->objectid,
10915 dev_rec->type, dev_rec->offset);
10923 * Extra (optional) check for dev_item size to report possbile problem on a new
10926 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10928 if (!IS_ALIGNED(total_bytes, sectorsize)) {
10930 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10931 devid, total_bytes, sectorsize);
10933 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10934 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10939 * Unlike device size alignment check above, some super total_bytes check
10940 * failure can lead to mount failure for newer kernel.
10942 * So this function will return the error for a fatal super total_bytes problem.
10944 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10946 struct btrfs_device *dev;
10947 struct list_head *dev_list = &fs_info->fs_devices->devices;
10948 u64 total_bytes = 0;
10949 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10951 list_for_each_entry(dev, dev_list, dev_list)
10952 total_bytes += dev->total_bytes;
10954 /* Important check, which can cause unmountable fs */
10955 if (super_bytes < total_bytes) {
10956 error("super total bytes %llu smaller than real device(s) size %llu",
10957 super_bytes, total_bytes);
10958 error("mounting this fs may fail for newer kernels");
10959 error("this can be fixed by 'btrfs rescue fix-device-size'");
10964 * Optional check, just to make everything aligned and match with each
10967 * For a btrfs-image restored fs, we don't need to check it anyway.
10969 if (btrfs_super_flags(fs_info->super_copy) &
10970 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10972 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10973 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10974 super_bytes != total_bytes) {
10975 warning("minor unaligned/mismatch device size detected");
10977 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10982 /* check btrfs_dev_item -> btrfs_dev_extent */
10983 static int check_devices(struct rb_root *dev_cache,
10984 struct device_extent_tree *dev_extent_cache)
10986 struct rb_node *dev_node;
10987 struct device_record *dev_rec;
10988 struct device_extent_record *dext_rec;
10992 dev_node = rb_first(dev_cache);
10994 dev_rec = container_of(dev_node, struct device_record, node);
10995 err = check_device_used(dev_rec, dev_extent_cache);
10999 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11000 global_info->sectorsize);
11001 dev_node = rb_next(dev_node);
11003 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11006 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11007 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11014 static int add_root_item_to_list(struct list_head *head,
11015 u64 objectid, u64 bytenr, u64 last_snapshot,
11016 u8 level, u8 drop_level,
11017 struct btrfs_key *drop_key)
11020 struct root_item_record *ri_rec;
11021 ri_rec = malloc(sizeof(*ri_rec));
11024 ri_rec->bytenr = bytenr;
11025 ri_rec->objectid = objectid;
11026 ri_rec->level = level;
11027 ri_rec->drop_level = drop_level;
11028 ri_rec->last_snapshot = last_snapshot;
11030 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11031 list_add_tail(&ri_rec->list, head);
11036 static void free_root_item_list(struct list_head *list)
11038 struct root_item_record *ri_rec;
11040 while (!list_empty(list)) {
11041 ri_rec = list_first_entry(list, struct root_item_record,
11043 list_del_init(&ri_rec->list);
11048 static int deal_root_from_list(struct list_head *list,
11049 struct btrfs_root *root,
11050 struct block_info *bits,
11052 struct cache_tree *pending,
11053 struct cache_tree *seen,
11054 struct cache_tree *reada,
11055 struct cache_tree *nodes,
11056 struct cache_tree *extent_cache,
11057 struct cache_tree *chunk_cache,
11058 struct rb_root *dev_cache,
11059 struct block_group_tree *block_group_cache,
11060 struct device_extent_tree *dev_extent_cache)
11065 while (!list_empty(list)) {
11066 struct root_item_record *rec;
11067 struct extent_buffer *buf;
11068 rec = list_entry(list->next,
11069 struct root_item_record, list);
11071 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11072 if (!extent_buffer_uptodate(buf)) {
11073 free_extent_buffer(buf);
11077 ret = add_root_to_pending(buf, extent_cache, pending,
11078 seen, nodes, rec->objectid);
11082 * To rebuild extent tree, we need deal with snapshot
11083 * one by one, otherwise we deal with node firstly which
11084 * can maximize readahead.
11087 ret = run_next_block(root, bits, bits_nr, &last,
11088 pending, seen, reada, nodes,
11089 extent_cache, chunk_cache,
11090 dev_cache, block_group_cache,
11091 dev_extent_cache, rec);
11095 free_extent_buffer(buf);
11096 list_del(&rec->list);
11102 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11103 reada, nodes, extent_cache, chunk_cache,
11104 dev_cache, block_group_cache,
11105 dev_extent_cache, NULL);
11115 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11117 struct rb_root dev_cache;
11118 struct cache_tree chunk_cache;
11119 struct block_group_tree block_group_cache;
11120 struct device_extent_tree dev_extent_cache;
11121 struct cache_tree extent_cache;
11122 struct cache_tree seen;
11123 struct cache_tree pending;
11124 struct cache_tree reada;
11125 struct cache_tree nodes;
11126 struct extent_io_tree excluded_extents;
11127 struct cache_tree corrupt_blocks;
11128 struct btrfs_path path;
11129 struct btrfs_key key;
11130 struct btrfs_key found_key;
11132 struct block_info *bits;
11134 struct extent_buffer *leaf;
11136 struct btrfs_root_item ri;
11137 struct list_head dropping_trees;
11138 struct list_head normal_trees;
11139 struct btrfs_root *root1;
11140 struct btrfs_root *root;
11144 root = fs_info->fs_root;
11145 dev_cache = RB_ROOT;
11146 cache_tree_init(&chunk_cache);
11147 block_group_tree_init(&block_group_cache);
11148 device_extent_tree_init(&dev_extent_cache);
11150 cache_tree_init(&extent_cache);
11151 cache_tree_init(&seen);
11152 cache_tree_init(&pending);
11153 cache_tree_init(&nodes);
11154 cache_tree_init(&reada);
11155 cache_tree_init(&corrupt_blocks);
11156 extent_io_tree_init(&excluded_extents);
11157 INIT_LIST_HEAD(&dropping_trees);
11158 INIT_LIST_HEAD(&normal_trees);
11161 fs_info->excluded_extents = &excluded_extents;
11162 fs_info->fsck_extent_cache = &extent_cache;
11163 fs_info->free_extent_hook = free_extent_hook;
11164 fs_info->corrupt_blocks = &corrupt_blocks;
11168 bits = malloc(bits_nr * sizeof(struct block_info));
11174 if (ctx.progress_enabled) {
11175 ctx.tp = TASK_EXTENTS;
11176 task_start(ctx.info);
11180 root1 = fs_info->tree_root;
11181 level = btrfs_header_level(root1->node);
11182 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11183 root1->node->start, 0, level, 0, NULL);
11186 root1 = fs_info->chunk_root;
11187 level = btrfs_header_level(root1->node);
11188 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11189 root1->node->start, 0, level, 0, NULL);
11192 btrfs_init_path(&path);
11195 key.type = BTRFS_ROOT_ITEM_KEY;
11196 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11200 leaf = path.nodes[0];
11201 slot = path.slots[0];
11202 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11203 ret = btrfs_next_leaf(root, &path);
11206 leaf = path.nodes[0];
11207 slot = path.slots[0];
11209 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11210 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11211 unsigned long offset;
11214 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11215 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11216 last_snapshot = btrfs_root_last_snapshot(&ri);
11217 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11218 level = btrfs_root_level(&ri);
11219 ret = add_root_item_to_list(&normal_trees,
11220 found_key.objectid,
11221 btrfs_root_bytenr(&ri),
11222 last_snapshot, level,
11227 level = btrfs_root_level(&ri);
11228 objectid = found_key.objectid;
11229 btrfs_disk_key_to_cpu(&found_key,
11230 &ri.drop_progress);
11231 ret = add_root_item_to_list(&dropping_trees,
11233 btrfs_root_bytenr(&ri),
11234 last_snapshot, level,
11235 ri.drop_level, &found_key);
11242 btrfs_release_path(&path);
11245 * check_block can return -EAGAIN if it fixes something, please keep
11246 * this in mind when dealing with return values from these functions, if
11247 * we get -EAGAIN we want to fall through and restart the loop.
11249 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11250 &seen, &reada, &nodes, &extent_cache,
11251 &chunk_cache, &dev_cache, &block_group_cache,
11252 &dev_extent_cache);
11254 if (ret == -EAGAIN)
11258 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11259 &pending, &seen, &reada, &nodes,
11260 &extent_cache, &chunk_cache, &dev_cache,
11261 &block_group_cache, &dev_extent_cache);
11263 if (ret == -EAGAIN)
11268 ret = check_chunks(&chunk_cache, &block_group_cache,
11269 &dev_extent_cache, NULL, NULL, NULL, 0);
11271 if (ret == -EAGAIN)
11276 ret = check_extent_refs(root, &extent_cache);
11278 if (ret == -EAGAIN)
11283 ret = check_devices(&dev_cache, &dev_extent_cache);
11288 task_stop(ctx.info);
11290 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11291 extent_io_tree_cleanup(&excluded_extents);
11292 fs_info->fsck_extent_cache = NULL;
11293 fs_info->free_extent_hook = NULL;
11294 fs_info->corrupt_blocks = NULL;
11295 fs_info->excluded_extents = NULL;
11298 free_chunk_cache_tree(&chunk_cache);
11299 free_device_cache_tree(&dev_cache);
11300 free_block_group_tree(&block_group_cache);
11301 free_device_extent_tree(&dev_extent_cache);
11302 free_extent_cache_tree(&seen);
11303 free_extent_cache_tree(&pending);
11304 free_extent_cache_tree(&reada);
11305 free_extent_cache_tree(&nodes);
11306 free_root_item_list(&normal_trees);
11307 free_root_item_list(&dropping_trees);
11310 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11311 free_extent_cache_tree(&seen);
11312 free_extent_cache_tree(&pending);
11313 free_extent_cache_tree(&reada);
11314 free_extent_cache_tree(&nodes);
11315 free_chunk_cache_tree(&chunk_cache);
11316 free_block_group_tree(&block_group_cache);
11317 free_device_cache_tree(&dev_cache);
11318 free_device_extent_tree(&dev_extent_cache);
11319 free_extent_record_cache(&extent_cache);
11320 free_root_item_list(&normal_trees);
11321 free_root_item_list(&dropping_trees);
11322 extent_io_tree_cleanup(&excluded_extents);
11326 static int check_extent_inline_ref(struct extent_buffer *eb,
11327 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11330 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11333 case BTRFS_TREE_BLOCK_REF_KEY:
11334 case BTRFS_EXTENT_DATA_REF_KEY:
11335 case BTRFS_SHARED_BLOCK_REF_KEY:
11336 case BTRFS_SHARED_DATA_REF_KEY:
11340 error("extent[%llu %u %llu] has unknown ref type: %d",
11341 key->objectid, key->type, key->offset, type);
11342 ret = UNKNOWN_TYPE;
11350 * Check backrefs of a tree block given by @bytenr or @eb.
11352 * @root: the root containing the @bytenr or @eb
11353 * @eb: tree block extent buffer, can be NULL
11354 * @bytenr: bytenr of the tree block to search
11355 * @level: tree level of the tree block
11356 * @owner: owner of the tree block
11358 * Return >0 for any error found and output error message
11359 * Return 0 for no error found
11361 static int check_tree_block_ref(struct btrfs_root *root,
11362 struct extent_buffer *eb, u64 bytenr,
11363 int level, u64 owner, struct node_refs *nrefs)
11365 struct btrfs_key key;
11366 struct btrfs_root *extent_root = root->fs_info->extent_root;
11367 struct btrfs_path path;
11368 struct btrfs_extent_item *ei;
11369 struct btrfs_extent_inline_ref *iref;
11370 struct extent_buffer *leaf;
11375 int root_level = btrfs_header_level(root->node);
11377 u32 nodesize = root->fs_info->nodesize;
11386 btrfs_init_path(&path);
11387 key.objectid = bytenr;
11388 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11389 key.type = BTRFS_METADATA_ITEM_KEY;
11391 key.type = BTRFS_EXTENT_ITEM_KEY;
11392 key.offset = (u64)-1;
11394 /* Search for the backref in extent tree */
11395 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11397 err |= BACKREF_MISSING;
11400 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11402 err |= BACKREF_MISSING;
11406 leaf = path.nodes[0];
11407 slot = path.slots[0];
11408 btrfs_item_key_to_cpu(leaf, &key, slot);
11410 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11412 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11413 skinny_level = (int)key.offset;
11414 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11416 struct btrfs_tree_block_info *info;
11418 info = (struct btrfs_tree_block_info *)(ei + 1);
11419 skinny_level = btrfs_tree_block_level(leaf, info);
11420 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11429 * Due to the feature of shared tree blocks, if the upper node
11430 * is a fs root or shared node, the extent of checked node may
11431 * not be updated until the next CoW.
11434 strict = should_check_extent_strictly(root, nrefs,
11436 if (!(btrfs_extent_flags(leaf, ei) &
11437 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11439 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11440 key.objectid, nodesize,
11441 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11442 err = BACKREF_MISMATCH;
11444 header_gen = btrfs_header_generation(eb);
11445 extent_gen = btrfs_extent_generation(leaf, ei);
11446 if (header_gen != extent_gen) {
11448 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11449 key.objectid, nodesize, header_gen,
11451 err = BACKREF_MISMATCH;
11453 if (level != skinny_level) {
11455 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11456 key.objectid, nodesize, level, skinny_level);
11457 err = BACKREF_MISMATCH;
11459 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11461 "extent[%llu %u] is referred by other roots than %llu",
11462 key.objectid, nodesize, root->objectid);
11463 err = BACKREF_MISMATCH;
11468 * Iterate the extent/metadata item to find the exact backref
11470 item_size = btrfs_item_size_nr(leaf, slot);
11471 ptr = (unsigned long)iref;
11472 end = (unsigned long)ei + item_size;
11474 while (ptr < end) {
11475 iref = (struct btrfs_extent_inline_ref *)ptr;
11476 type = btrfs_extent_inline_ref_type(leaf, iref);
11477 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11479 ret = check_extent_inline_ref(leaf, &key, iref);
11484 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11485 if (offset == root->objectid)
11487 if (!strict && owner == offset)
11489 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11491 * Backref of tree reloc root points to itself, no need
11492 * to check backref any more.
11494 * This may be an error of loop backref, but extent tree
11495 * checker should have already handled it.
11496 * Here we only need to avoid infinite iteration.
11498 if (offset == bytenr) {
11502 * Check if the backref points to valid
11505 found_ref = !check_tree_block_ref( root, NULL,
11506 offset, level + 1, owner,
11513 ptr += btrfs_extent_inline_ref_size(type);
11517 * Inlined extent item doesn't have what we need, check
11518 * TREE_BLOCK_REF_KEY
11521 btrfs_release_path(&path);
11522 key.objectid = bytenr;
11523 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11524 key.offset = root->objectid;
11526 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11531 * Finally check SHARED BLOCK REF, any found will be good
11532 * Here we're not doing comprehensive extent backref checking,
11533 * only need to ensure there is some extent referring to this
11537 btrfs_release_path(&path);
11538 key.objectid = bytenr;
11539 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11540 key.offset = (u64)-1;
11542 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11544 err |= BACKREF_MISSING;
11547 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11549 err |= BACKREF_MISSING;
11555 err |= BACKREF_MISSING;
11557 btrfs_release_path(&path);
11558 if (nrefs && strict &&
11559 level < root_level && nrefs->full_backref[level + 1])
11560 parent = nrefs->bytenr[level + 1];
11561 if (eb && (err & BACKREF_MISSING))
11563 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11564 bytenr, nodesize, owner, level,
11565 parent ? "parent" : "root",
11566 parent ? parent : root->objectid);
11571 * If @err contains BACKREF_MISSING then add extent of the
11572 * file_extent_data_item.
11574 * Returns error bits after reapir.
11576 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11577 struct btrfs_root *root,
11578 struct btrfs_path *pathp,
11579 struct node_refs *nrefs,
11582 struct btrfs_file_extent_item *fi;
11583 struct btrfs_key fi_key;
11584 struct btrfs_key key;
11585 struct btrfs_extent_item *ei;
11586 struct btrfs_path path;
11587 struct btrfs_root *extent_root = root->fs_info->extent_root;
11588 struct extent_buffer *eb;
11600 eb = pathp->nodes[0];
11601 slot = pathp->slots[0];
11602 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11603 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11605 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11606 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11609 file_offset = fi_key.offset;
11610 generation = btrfs_file_extent_generation(eb, fi);
11611 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11612 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11613 extent_offset = btrfs_file_extent_offset(eb, fi);
11614 offset = file_offset - extent_offset;
11616 /* now repair only adds backref */
11617 if ((err & BACKREF_MISSING) == 0)
11620 /* search extent item */
11621 key.objectid = disk_bytenr;
11622 key.type = BTRFS_EXTENT_ITEM_KEY;
11623 key.offset = num_bytes;
11625 btrfs_init_path(&path);
11626 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11632 /* insert an extent item */
11634 key.objectid = disk_bytenr;
11635 key.type = BTRFS_EXTENT_ITEM_KEY;
11636 key.offset = num_bytes;
11637 size = sizeof(*ei);
11639 btrfs_release_path(&path);
11640 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11644 eb = path.nodes[0];
11645 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11647 btrfs_set_extent_refs(eb, ei, 0);
11648 btrfs_set_extent_generation(eb, ei, generation);
11649 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11651 btrfs_mark_buffer_dirty(eb);
11652 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11654 btrfs_release_path(&path);
11657 if (nrefs->full_backref[0])
11658 parent = btrfs_header_bytenr(eb);
11662 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11664 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11668 "failed to increase extent data backref[%llu %llu] root %llu",
11669 disk_bytenr, num_bytes, root->objectid);
11672 printf("Add one extent data backref [%llu %llu]\n",
11673 disk_bytenr, num_bytes);
11676 err &= ~BACKREF_MISSING;
11679 error("can't repair root %llu extent data item[%llu %llu]",
11680 root->objectid, disk_bytenr, num_bytes);
11685 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11687 * Return >0 any error found and output error message
11688 * Return 0 for no error found
11690 static int check_extent_data_item(struct btrfs_root *root,
11691 struct btrfs_path *pathp,
11692 struct node_refs *nrefs, int account_bytes)
11694 struct btrfs_file_extent_item *fi;
11695 struct extent_buffer *eb = pathp->nodes[0];
11696 struct btrfs_path path;
11697 struct btrfs_root *extent_root = root->fs_info->extent_root;
11698 struct btrfs_key fi_key;
11699 struct btrfs_key dbref_key;
11700 struct extent_buffer *leaf;
11701 struct btrfs_extent_item *ei;
11702 struct btrfs_extent_inline_ref *iref;
11703 struct btrfs_extent_data_ref *dref;
11706 u64 disk_num_bytes;
11707 u64 extent_num_bytes;
11714 int found_dbackref = 0;
11715 int slot = pathp->slots[0];
11720 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11721 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11723 /* Nothing to check for hole and inline data extents */
11724 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11725 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11728 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11729 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11730 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11731 offset = btrfs_file_extent_offset(eb, fi);
11733 /* Check unaligned disk_num_bytes and num_bytes */
11734 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11736 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11737 fi_key.objectid, fi_key.offset, disk_num_bytes,
11738 root->fs_info->sectorsize);
11739 err |= BYTES_UNALIGNED;
11740 } else if (account_bytes) {
11741 data_bytes_allocated += disk_num_bytes;
11743 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11745 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11746 fi_key.objectid, fi_key.offset, extent_num_bytes,
11747 root->fs_info->sectorsize);
11748 err |= BYTES_UNALIGNED;
11749 } else if (account_bytes) {
11750 data_bytes_referenced += extent_num_bytes;
11752 owner = btrfs_header_owner(eb);
11754 /* Check the extent item of the file extent in extent tree */
11755 btrfs_init_path(&path);
11756 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11757 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11758 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11760 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11764 leaf = path.nodes[0];
11765 slot = path.slots[0];
11766 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11768 extent_flags = btrfs_extent_flags(leaf, ei);
11770 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11772 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11773 disk_bytenr, disk_num_bytes,
11774 BTRFS_EXTENT_FLAG_DATA);
11775 err |= BACKREF_MISMATCH;
11778 /* Check data backref inside that extent item */
11779 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11780 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11781 ptr = (unsigned long)iref;
11782 end = (unsigned long)ei + item_size;
11783 strict = should_check_extent_strictly(root, nrefs, -1);
11785 while (ptr < end) {
11789 bool match = false;
11791 iref = (struct btrfs_extent_inline_ref *)ptr;
11792 type = btrfs_extent_inline_ref_type(leaf, iref);
11793 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11795 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11800 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11801 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11802 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11803 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11805 if (ref_objectid == fi_key.objectid &&
11806 ref_offset == fi_key.offset - offset)
11808 if (ref_root == root->objectid && match)
11809 found_dbackref = 1;
11810 else if (!strict && owner == ref_root && match)
11811 found_dbackref = 1;
11812 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11813 found_dbackref = !check_tree_block_ref(root, NULL,
11814 btrfs_extent_inline_ref_offset(leaf, iref),
11818 if (found_dbackref)
11820 ptr += btrfs_extent_inline_ref_size(type);
11823 if (!found_dbackref) {
11824 btrfs_release_path(&path);
11826 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11827 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11828 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11829 dbref_key.offset = hash_extent_data_ref(root->objectid,
11830 fi_key.objectid, fi_key.offset - offset);
11832 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11833 &dbref_key, &path, 0, 0);
11835 found_dbackref = 1;
11839 btrfs_release_path(&path);
11842 * Neither inlined nor EXTENT_DATA_REF found, try
11843 * SHARED_DATA_REF as last chance.
11845 dbref_key.objectid = disk_bytenr;
11846 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11847 dbref_key.offset = eb->start;
11849 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11850 &dbref_key, &path, 0, 0);
11852 found_dbackref = 1;
11858 if (!found_dbackref)
11859 err |= BACKREF_MISSING;
11860 btrfs_release_path(&path);
11861 if (err & BACKREF_MISSING) {
11862 error("data extent[%llu %llu] backref lost",
11863 disk_bytenr, disk_num_bytes);
11869 * Get real tree block level for the case like shared block
11870 * Return >= 0 as tree level
11871 * Return <0 for error
11873 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11875 struct extent_buffer *eb;
11876 struct btrfs_path path;
11877 struct btrfs_key key;
11878 struct btrfs_extent_item *ei;
11885 /* Search extent tree for extent generation and level */
11886 key.objectid = bytenr;
11887 key.type = BTRFS_METADATA_ITEM_KEY;
11888 key.offset = (u64)-1;
11890 btrfs_init_path(&path);
11891 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11894 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11902 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11903 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11904 struct btrfs_extent_item);
11905 flags = btrfs_extent_flags(path.nodes[0], ei);
11906 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11911 /* Get transid for later read_tree_block() check */
11912 transid = btrfs_extent_generation(path.nodes[0], ei);
11914 /* Get backref level as one source */
11915 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11916 backref_level = key.offset;
11918 struct btrfs_tree_block_info *info;
11920 info = (struct btrfs_tree_block_info *)(ei + 1);
11921 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11923 btrfs_release_path(&path);
11925 /* Get level from tree block as an alternative source */
11926 eb = read_tree_block(fs_info, bytenr, transid);
11927 if (!extent_buffer_uptodate(eb)) {
11928 free_extent_buffer(eb);
11931 header_level = btrfs_header_level(eb);
11932 free_extent_buffer(eb);
11934 if (header_level != backref_level)
11936 return header_level;
11939 btrfs_release_path(&path);
11944 * Check if a tree block backref is valid (points to a valid tree block)
11945 * if level == -1, level will be resolved
11946 * Return >0 for any error found and print error message
11948 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11949 u64 bytenr, int level)
11951 struct btrfs_root *root;
11952 struct btrfs_key key;
11953 struct btrfs_path path;
11954 struct extent_buffer *eb;
11955 struct extent_buffer *node;
11956 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11960 /* Query level for level == -1 special case */
11962 level = query_tree_block_level(fs_info, bytenr);
11964 err |= REFERENCER_MISSING;
11968 key.objectid = root_id;
11969 key.type = BTRFS_ROOT_ITEM_KEY;
11970 key.offset = (u64)-1;
11972 root = btrfs_read_fs_root(fs_info, &key);
11973 if (IS_ERR(root)) {
11974 err |= REFERENCER_MISSING;
11978 /* Read out the tree block to get item/node key */
11979 eb = read_tree_block(fs_info, bytenr, 0);
11980 if (!extent_buffer_uptodate(eb)) {
11981 err |= REFERENCER_MISSING;
11982 free_extent_buffer(eb);
11986 /* Empty tree, no need to check key */
11987 if (!btrfs_header_nritems(eb) && !level) {
11988 free_extent_buffer(eb);
11993 btrfs_node_key_to_cpu(eb, &key, 0);
11995 btrfs_item_key_to_cpu(eb, &key, 0);
11997 free_extent_buffer(eb);
11999 btrfs_init_path(&path);
12000 path.lowest_level = level;
12001 /* Search with the first key, to ensure we can reach it */
12002 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12004 err |= REFERENCER_MISSING;
12008 node = path.nodes[level];
12009 if (btrfs_header_bytenr(node) != bytenr) {
12011 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12012 bytenr, nodesize, bytenr,
12013 btrfs_header_bytenr(node));
12014 err |= REFERENCER_MISMATCH;
12016 if (btrfs_header_level(node) != level) {
12018 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12019 bytenr, nodesize, level,
12020 btrfs_header_level(node));
12021 err |= REFERENCER_MISMATCH;
12025 btrfs_release_path(&path);
12027 if (err & REFERENCER_MISSING) {
12029 error("extent [%llu %d] lost referencer (owner: %llu)",
12030 bytenr, nodesize, root_id);
12033 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12034 bytenr, nodesize, root_id, level);
12041 * Check if tree block @eb is tree reloc root.
12042 * Return 0 if it's not or any problem happens
12043 * Return 1 if it's a tree reloc root
12045 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12046 struct extent_buffer *eb)
12048 struct btrfs_root *tree_reloc_root;
12049 struct btrfs_key key;
12050 u64 bytenr = btrfs_header_bytenr(eb);
12051 u64 owner = btrfs_header_owner(eb);
12054 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12055 key.offset = owner;
12056 key.type = BTRFS_ROOT_ITEM_KEY;
12058 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12059 if (IS_ERR(tree_reloc_root))
12062 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12064 btrfs_free_fs_root(tree_reloc_root);
12069 * Check referencer for shared block backref
12070 * If level == -1, this function will resolve the level.
12072 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12073 u64 parent, u64 bytenr, int level)
12075 struct extent_buffer *eb;
12077 int found_parent = 0;
12080 eb = read_tree_block(fs_info, parent, 0);
12081 if (!extent_buffer_uptodate(eb))
12085 level = query_tree_block_level(fs_info, bytenr);
12089 /* It's possible it's a tree reloc root */
12090 if (parent == bytenr) {
12091 if (is_tree_reloc_root(fs_info, eb))
12096 if (level + 1 != btrfs_header_level(eb))
12099 nr = btrfs_header_nritems(eb);
12100 for (i = 0; i < nr; i++) {
12101 if (bytenr == btrfs_node_blockptr(eb, i)) {
12107 free_extent_buffer(eb);
12108 if (!found_parent) {
12110 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12111 bytenr, fs_info->nodesize, parent, level);
12112 return REFERENCER_MISSING;
12118 * Check referencer for normal (inlined) data ref
12119 * If len == 0, it will be resolved by searching in extent tree
12121 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12122 u64 root_id, u64 objectid, u64 offset,
12123 u64 bytenr, u64 len, u32 count)
12125 struct btrfs_root *root;
12126 struct btrfs_root *extent_root = fs_info->extent_root;
12127 struct btrfs_key key;
12128 struct btrfs_path path;
12129 struct extent_buffer *leaf;
12130 struct btrfs_file_extent_item *fi;
12131 u32 found_count = 0;
12136 key.objectid = bytenr;
12137 key.type = BTRFS_EXTENT_ITEM_KEY;
12138 key.offset = (u64)-1;
12140 btrfs_init_path(&path);
12141 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12144 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12147 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12148 if (key.objectid != bytenr ||
12149 key.type != BTRFS_EXTENT_ITEM_KEY)
12152 btrfs_release_path(&path);
12154 key.objectid = root_id;
12155 key.type = BTRFS_ROOT_ITEM_KEY;
12156 key.offset = (u64)-1;
12157 btrfs_init_path(&path);
12159 root = btrfs_read_fs_root(fs_info, &key);
12163 key.objectid = objectid;
12164 key.type = BTRFS_EXTENT_DATA_KEY;
12166 * It can be nasty as data backref offset is
12167 * file offset - file extent offset, which is smaller or
12168 * equal to original backref offset. The only special case is
12169 * overflow. So we need to special check and do further search.
12171 key.offset = offset & (1ULL << 63) ? 0 : offset;
12173 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12178 * Search afterwards to get correct one
12179 * NOTE: As we must do a comprehensive check on the data backref to
12180 * make sure the dref count also matches, we must iterate all file
12181 * extents for that inode.
12184 leaf = path.nodes[0];
12185 slot = path.slots[0];
12187 if (slot >= btrfs_header_nritems(leaf) ||
12188 btrfs_header_owner(leaf) != root_id)
12190 btrfs_item_key_to_cpu(leaf, &key, slot);
12191 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12193 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12195 * Except normal disk bytenr and disk num bytes, we still
12196 * need to do extra check on dbackref offset as
12197 * dbackref offset = file_offset - file_extent_offset
12199 * Also, we must check the leaf owner.
12200 * In case of shared tree blocks (snapshots) we can inherit
12201 * leaves from source snapshot.
12202 * In that case, reference from source snapshot should not
12205 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12206 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12207 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12208 offset && btrfs_header_owner(leaf) == root_id)
12212 ret = btrfs_next_item(root, &path);
12217 btrfs_release_path(&path);
12218 if (found_count != count) {
12220 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12221 bytenr, len, root_id, objectid, offset, count, found_count);
12222 return REFERENCER_MISSING;
12228 * Check if the referencer of a shared data backref exists
12230 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12231 u64 parent, u64 bytenr)
12233 struct extent_buffer *eb;
12234 struct btrfs_key key;
12235 struct btrfs_file_extent_item *fi;
12237 int found_parent = 0;
12240 eb = read_tree_block(fs_info, parent, 0);
12241 if (!extent_buffer_uptodate(eb))
12244 nr = btrfs_header_nritems(eb);
12245 for (i = 0; i < nr; i++) {
12246 btrfs_item_key_to_cpu(eb, &key, i);
12247 if (key.type != BTRFS_EXTENT_DATA_KEY)
12250 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12251 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12254 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12261 free_extent_buffer(eb);
12262 if (!found_parent) {
12263 error("shared extent %llu referencer lost (parent: %llu)",
12265 return REFERENCER_MISSING;
12271 * Only delete backref if REFERENCER_MISSING now
12273 * Returns <0 the extent was deleted
12274 * Returns >0 the backref was deleted but extent still exists, returned value
12275 * means error after repair
12276 * Returns 0 nothing happened
12278 static int repair_extent_item(struct btrfs_trans_handle *trans,
12279 struct btrfs_root *root, struct btrfs_path *path,
12280 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12281 u64 owner, u64 offset, int err)
12283 struct btrfs_key old_key;
12287 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12289 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12290 /* delete the backref */
12291 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12292 num_bytes, parent, root_objectid, owner, offset);
12295 err &= ~REFERENCER_MISSING;
12296 printf("Delete backref in extent [%llu %llu]\n",
12297 bytenr, num_bytes);
12299 error("fail to delete backref in extent [%llu %llu]",
12300 bytenr, num_bytes);
12304 /* btrfs_free_extent may delete the extent */
12305 btrfs_release_path(path);
12306 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12316 * This function will check a given extent item, including its backref and
12317 * itself (like crossing stripe boundary and type)
12319 * Since we don't use extent_record anymore, introduce new error bit
12321 static int check_extent_item(struct btrfs_trans_handle *trans,
12322 struct btrfs_fs_info *fs_info,
12323 struct btrfs_path *path)
12325 struct btrfs_extent_item *ei;
12326 struct btrfs_extent_inline_ref *iref;
12327 struct btrfs_extent_data_ref *dref;
12328 struct extent_buffer *eb = path->nodes[0];
12331 int slot = path->slots[0];
12333 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12334 u32 item_size = btrfs_item_size_nr(eb, slot);
12344 struct btrfs_key key;
12348 btrfs_item_key_to_cpu(eb, &key, slot);
12349 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12350 bytes_used += key.offset;
12351 num_bytes = key.offset;
12353 bytes_used += nodesize;
12354 num_bytes = nodesize;
12357 if (item_size < sizeof(*ei)) {
12359 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12360 * old thing when on disk format is still un-determined.
12361 * No need to care about it anymore
12363 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12367 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12368 flags = btrfs_extent_flags(eb, ei);
12370 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12372 if (metadata && check_crossing_stripes(global_info, key.objectid,
12374 error("bad metadata [%llu, %llu) crossing stripe boundary",
12375 key.objectid, key.objectid + nodesize);
12376 err |= CROSSING_STRIPE_BOUNDARY;
12379 ptr = (unsigned long)(ei + 1);
12381 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12382 /* Old EXTENT_ITEM metadata */
12383 struct btrfs_tree_block_info *info;
12385 info = (struct btrfs_tree_block_info *)ptr;
12386 level = btrfs_tree_block_level(eb, info);
12387 ptr += sizeof(struct btrfs_tree_block_info);
12389 /* New METADATA_ITEM */
12390 level = key.offset;
12392 end = (unsigned long)ei + item_size;
12395 /* Reached extent item end normally */
12399 /* Beyond extent item end, wrong item size */
12401 err |= ITEM_SIZE_MISMATCH;
12402 error("extent item at bytenr %llu slot %d has wrong size",
12411 /* Now check every backref in this extent item */
12412 iref = (struct btrfs_extent_inline_ref *)ptr;
12413 type = btrfs_extent_inline_ref_type(eb, iref);
12414 offset = btrfs_extent_inline_ref_offset(eb, iref);
12416 case BTRFS_TREE_BLOCK_REF_KEY:
12417 root_objectid = offset;
12419 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12423 case BTRFS_SHARED_BLOCK_REF_KEY:
12425 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12429 case BTRFS_EXTENT_DATA_REF_KEY:
12430 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12431 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12432 owner = btrfs_extent_data_ref_objectid(eb, dref);
12433 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12434 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12435 owner_offset, key.objectid, key.offset,
12436 btrfs_extent_data_ref_count(eb, dref));
12439 case BTRFS_SHARED_DATA_REF_KEY:
12441 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12445 error("extent[%llu %d %llu] has unknown ref type: %d",
12446 key.objectid, key.type, key.offset, type);
12447 ret = UNKNOWN_TYPE;
12452 if (err && repair) {
12453 ret = repair_extent_item(trans, fs_info->extent_root, path,
12454 key.objectid, num_bytes, parent, root_objectid,
12455 owner, owner_offset, ret);
12464 ptr += btrfs_extent_inline_ref_size(type);
12472 * Check if a dev extent item is referred correctly by its chunk
12474 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12475 struct extent_buffer *eb, int slot)
12477 struct btrfs_root *chunk_root = fs_info->chunk_root;
12478 struct btrfs_dev_extent *ptr;
12479 struct btrfs_path path;
12480 struct btrfs_key chunk_key;
12481 struct btrfs_key devext_key;
12482 struct btrfs_chunk *chunk;
12483 struct extent_buffer *l;
12487 int found_chunk = 0;
12490 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12491 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12492 length = btrfs_dev_extent_length(eb, ptr);
12494 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12495 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12496 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12498 btrfs_init_path(&path);
12499 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12504 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12505 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12510 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12513 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12514 for (i = 0; i < num_stripes; i++) {
12515 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12516 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12518 if (devid == devext_key.objectid &&
12519 offset == devext_key.offset) {
12525 btrfs_release_path(&path);
12526 if (!found_chunk) {
12528 "device extent[%llu, %llu, %llu] did not find the related chunk",
12529 devext_key.objectid, devext_key.offset, length);
12530 return REFERENCER_MISSING;
12536 * Check if the used space is correct with the dev item
12538 static int check_dev_item(struct btrfs_fs_info *fs_info,
12539 struct extent_buffer *eb, int slot)
12541 struct btrfs_root *dev_root = fs_info->dev_root;
12542 struct btrfs_dev_item *dev_item;
12543 struct btrfs_path path;
12544 struct btrfs_key key;
12545 struct btrfs_dev_extent *ptr;
12552 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12553 dev_id = btrfs_device_id(eb, dev_item);
12554 used = btrfs_device_bytes_used(eb, dev_item);
12555 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12557 key.objectid = dev_id;
12558 key.type = BTRFS_DEV_EXTENT_KEY;
12561 btrfs_init_path(&path);
12562 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12564 btrfs_item_key_to_cpu(eb, &key, slot);
12565 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12566 key.objectid, key.type, key.offset);
12567 btrfs_release_path(&path);
12568 return REFERENCER_MISSING;
12571 /* Iterate dev_extents to calculate the used space of a device */
12573 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12576 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12577 if (key.objectid > dev_id)
12579 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12582 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12583 struct btrfs_dev_extent);
12584 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12586 ret = btrfs_next_item(dev_root, &path);
12590 btrfs_release_path(&path);
12592 if (used != total) {
12593 btrfs_item_key_to_cpu(eb, &key, slot);
12595 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12596 total, used, BTRFS_ROOT_TREE_OBJECTID,
12597 BTRFS_DEV_EXTENT_KEY, dev_id);
12598 return ACCOUNTING_MISMATCH;
12600 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12606 * Check a block group item with its referener (chunk) and its used space
12607 * with extent/metadata item
12609 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12610 struct extent_buffer *eb, int slot)
12612 struct btrfs_root *extent_root = fs_info->extent_root;
12613 struct btrfs_root *chunk_root = fs_info->chunk_root;
12614 struct btrfs_block_group_item *bi;
12615 struct btrfs_block_group_item bg_item;
12616 struct btrfs_path path;
12617 struct btrfs_key bg_key;
12618 struct btrfs_key chunk_key;
12619 struct btrfs_key extent_key;
12620 struct btrfs_chunk *chunk;
12621 struct extent_buffer *leaf;
12622 struct btrfs_extent_item *ei;
12623 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12631 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12632 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12633 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12634 used = btrfs_block_group_used(&bg_item);
12635 bg_flags = btrfs_block_group_flags(&bg_item);
12637 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12638 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12639 chunk_key.offset = bg_key.objectid;
12641 btrfs_init_path(&path);
12642 /* Search for the referencer chunk */
12643 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12646 "block group[%llu %llu] did not find the related chunk item",
12647 bg_key.objectid, bg_key.offset);
12648 err |= REFERENCER_MISSING;
12650 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12651 struct btrfs_chunk);
12652 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12655 "block group[%llu %llu] related chunk item length does not match",
12656 bg_key.objectid, bg_key.offset);
12657 err |= REFERENCER_MISMATCH;
12660 btrfs_release_path(&path);
12662 /* Search from the block group bytenr */
12663 extent_key.objectid = bg_key.objectid;
12664 extent_key.type = 0;
12665 extent_key.offset = 0;
12667 btrfs_init_path(&path);
12668 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12672 /* Iterate extent tree to account used space */
12674 leaf = path.nodes[0];
12676 /* Search slot can point to the last item beyond leaf nritems */
12677 if (path.slots[0] >= btrfs_header_nritems(leaf))
12680 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12681 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12684 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12685 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12687 if (extent_key.objectid < bg_key.objectid)
12690 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12693 total += extent_key.offset;
12695 ei = btrfs_item_ptr(leaf, path.slots[0],
12696 struct btrfs_extent_item);
12697 flags = btrfs_extent_flags(leaf, ei);
12698 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12699 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12701 "bad extent[%llu, %llu) type mismatch with chunk",
12702 extent_key.objectid,
12703 extent_key.objectid + extent_key.offset);
12704 err |= CHUNK_TYPE_MISMATCH;
12706 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12707 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12708 BTRFS_BLOCK_GROUP_METADATA))) {
12710 "bad extent[%llu, %llu) type mismatch with chunk",
12711 extent_key.objectid,
12712 extent_key.objectid + nodesize);
12713 err |= CHUNK_TYPE_MISMATCH;
12717 ret = btrfs_next_item(extent_root, &path);
12723 btrfs_release_path(&path);
12725 if (total != used) {
12727 "block group[%llu %llu] used %llu but extent items used %llu",
12728 bg_key.objectid, bg_key.offset, used, total);
12729 err |= BG_ACCOUNTING_ERROR;
12735 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12736 * FIXME: We still need to repair error of dev_item.
12738 * Returns error after repair.
12740 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12741 struct btrfs_root *chunk_root,
12742 struct btrfs_path *path, int err)
12744 struct btrfs_chunk *chunk;
12745 struct btrfs_key chunk_key;
12746 struct extent_buffer *eb = path->nodes[0];
12748 int slot = path->slots[0];
12752 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12753 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12755 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12756 type = btrfs_chunk_type(path->nodes[0], chunk);
12757 length = btrfs_chunk_length(eb, chunk);
12759 if (err & REFERENCER_MISSING) {
12760 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12761 type, chunk_key.offset, length);
12763 error("fail to add block group item[%llu %llu]",
12764 chunk_key.offset, length);
12767 err &= ~REFERENCER_MISSING;
12768 printf("Added block group item[%llu %llu]\n",
12769 chunk_key.offset, length);
12778 * Check a chunk item.
12779 * Including checking all referred dev_extents and block group
12781 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12782 struct extent_buffer *eb, int slot)
12784 struct btrfs_root *extent_root = fs_info->extent_root;
12785 struct btrfs_root *dev_root = fs_info->dev_root;
12786 struct btrfs_path path;
12787 struct btrfs_key chunk_key;
12788 struct btrfs_key bg_key;
12789 struct btrfs_key devext_key;
12790 struct btrfs_chunk *chunk;
12791 struct extent_buffer *leaf;
12792 struct btrfs_block_group_item *bi;
12793 struct btrfs_block_group_item bg_item;
12794 struct btrfs_dev_extent *ptr;
12806 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12807 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12808 length = btrfs_chunk_length(eb, chunk);
12809 chunk_end = chunk_key.offset + length;
12810 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12813 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12815 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12818 type = btrfs_chunk_type(eb, chunk);
12820 bg_key.objectid = chunk_key.offset;
12821 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12822 bg_key.offset = length;
12824 btrfs_init_path(&path);
12825 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12828 "chunk[%llu %llu) did not find the related block group item",
12829 chunk_key.offset, chunk_end);
12830 err |= REFERENCER_MISSING;
12832 leaf = path.nodes[0];
12833 bi = btrfs_item_ptr(leaf, path.slots[0],
12834 struct btrfs_block_group_item);
12835 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12837 if (btrfs_block_group_flags(&bg_item) != type) {
12839 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12840 chunk_key.offset, chunk_end, type,
12841 btrfs_block_group_flags(&bg_item));
12842 err |= REFERENCER_MISSING;
12846 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12847 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12848 for (i = 0; i < num_stripes; i++) {
12849 btrfs_release_path(&path);
12850 btrfs_init_path(&path);
12851 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12852 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12853 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12855 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12858 goto not_match_dev;
12860 leaf = path.nodes[0];
12861 ptr = btrfs_item_ptr(leaf, path.slots[0],
12862 struct btrfs_dev_extent);
12863 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12864 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12865 if (objectid != chunk_key.objectid ||
12866 offset != chunk_key.offset ||
12867 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12868 goto not_match_dev;
12871 err |= BACKREF_MISSING;
12873 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12874 chunk_key.objectid, chunk_end, i);
12877 btrfs_release_path(&path);
12882 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12883 struct btrfs_root *root,
12884 struct btrfs_path *path)
12886 struct btrfs_key key;
12889 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12890 btrfs_release_path(path);
12891 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12897 ret = btrfs_del_item(trans, root, path);
12901 if (path->slots[0] == 0)
12902 btrfs_prev_leaf(root, path);
12907 error("failed to delete root %llu item[%llu, %u, %llu]",
12908 root->objectid, key.objectid, key.type, key.offset);
12910 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12911 root->objectid, key.objectid, key.type, key.offset);
12916 * Main entry function to check known items and update related accounting info
12918 static int check_leaf_items(struct btrfs_trans_handle *trans,
12919 struct btrfs_root *root, struct btrfs_path *path,
12920 struct node_refs *nrefs, int account_bytes)
12922 struct btrfs_fs_info *fs_info = root->fs_info;
12923 struct btrfs_key key;
12924 struct extent_buffer *eb;
12927 struct btrfs_extent_data_ref *dref;
12932 eb = path->nodes[0];
12933 slot = path->slots[0];
12934 if (slot >= btrfs_header_nritems(eb)) {
12936 error("empty leaf [%llu %u] root %llu", eb->start,
12937 root->fs_info->nodesize, root->objectid);
12943 btrfs_item_key_to_cpu(eb, &key, slot);
12947 case BTRFS_EXTENT_DATA_KEY:
12948 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12950 ret = repair_extent_data_item(trans, root, path, nrefs,
12954 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12955 ret = check_block_group_item(fs_info, eb, slot);
12957 ret & REFERENCER_MISSING)
12958 ret = delete_extent_tree_item(trans, root, path);
12961 case BTRFS_DEV_ITEM_KEY:
12962 ret = check_dev_item(fs_info, eb, slot);
12965 case BTRFS_CHUNK_ITEM_KEY:
12966 ret = check_chunk_item(fs_info, eb, slot);
12968 ret = repair_chunk_item(trans, root, path, ret);
12971 case BTRFS_DEV_EXTENT_KEY:
12972 ret = check_dev_extent_item(fs_info, eb, slot);
12975 case BTRFS_EXTENT_ITEM_KEY:
12976 case BTRFS_METADATA_ITEM_KEY:
12977 ret = check_extent_item(trans, fs_info, path);
12980 case BTRFS_EXTENT_CSUM_KEY:
12981 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12984 case BTRFS_TREE_BLOCK_REF_KEY:
12985 ret = check_tree_block_backref(fs_info, key.offset,
12988 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12989 ret = delete_extent_tree_item(trans, root, path);
12992 case BTRFS_EXTENT_DATA_REF_KEY:
12993 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12994 ret = check_extent_data_backref(fs_info,
12995 btrfs_extent_data_ref_root(eb, dref),
12996 btrfs_extent_data_ref_objectid(eb, dref),
12997 btrfs_extent_data_ref_offset(eb, dref),
12999 btrfs_extent_data_ref_count(eb, dref));
13001 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13002 ret = delete_extent_tree_item(trans, root, path);
13005 case BTRFS_SHARED_BLOCK_REF_KEY:
13006 ret = check_shared_block_backref(fs_info, key.offset,
13009 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13010 ret = delete_extent_tree_item(trans, root, path);
13013 case BTRFS_SHARED_DATA_REF_KEY:
13014 ret = check_shared_data_backref(fs_info, key.offset,
13017 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13018 ret = delete_extent_tree_item(trans, root, path);
13032 * Low memory usage version check_chunks_and_extents.
13034 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13036 struct btrfs_trans_handle *trans = NULL;
13037 struct btrfs_path path;
13038 struct btrfs_key old_key;
13039 struct btrfs_key key;
13040 struct btrfs_root *root1;
13041 struct btrfs_root *root;
13042 struct btrfs_root *cur_root;
13046 root = fs_info->fs_root;
13049 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13050 if (IS_ERR(trans)) {
13051 error("failed to start transaction before check");
13052 return PTR_ERR(trans);
13056 root1 = root->fs_info->chunk_root;
13057 ret = check_btrfs_root(trans, root1, 0, 1);
13060 root1 = root->fs_info->tree_root;
13061 ret = check_btrfs_root(trans, root1, 0, 1);
13064 btrfs_init_path(&path);
13065 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13067 key.type = BTRFS_ROOT_ITEM_KEY;
13069 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13071 error("cannot find extent tree in tree_root");
13076 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13077 if (key.type != BTRFS_ROOT_ITEM_KEY)
13080 key.offset = (u64)-1;
13082 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13083 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13086 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13087 if (IS_ERR(cur_root) || !cur_root) {
13088 error("failed to read tree: %lld", key.objectid);
13092 ret = check_btrfs_root(trans, cur_root, 0, 1);
13095 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13096 btrfs_free_fs_root(cur_root);
13098 btrfs_release_path(&path);
13099 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13100 &old_key, &path, 0, 0);
13104 ret = btrfs_next_item(root1, &path);
13110 /* if repair, update block accounting */
13112 ret = btrfs_fix_block_accounting(trans, root);
13116 err &= ~BG_ACCOUNTING_ERROR;
13120 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13122 btrfs_release_path(&path);
13127 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13131 if (!ctx.progress_enabled)
13132 fprintf(stderr, "checking extents\n");
13133 if (check_mode == CHECK_MODE_LOWMEM)
13134 ret = check_chunks_and_extents_v2(fs_info);
13136 ret = check_chunks_and_extents(fs_info);
13138 /* Also repair device size related problems */
13139 if (repair && !ret) {
13140 ret = btrfs_fix_device_and_super_size(fs_info);
13147 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13148 struct btrfs_root *root, int overwrite)
13150 struct extent_buffer *c;
13151 struct extent_buffer *old = root->node;
13154 struct btrfs_disk_key disk_key = {0,0,0};
13160 extent_buffer_get(c);
13163 c = btrfs_alloc_free_block(trans, root,
13164 root->fs_info->nodesize,
13165 root->root_key.objectid,
13166 &disk_key, level, 0, 0);
13169 extent_buffer_get(c);
13173 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13174 btrfs_set_header_level(c, level);
13175 btrfs_set_header_bytenr(c, c->start);
13176 btrfs_set_header_generation(c, trans->transid);
13177 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13178 btrfs_set_header_owner(c, root->root_key.objectid);
13180 write_extent_buffer(c, root->fs_info->fsid,
13181 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13183 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13184 btrfs_header_chunk_tree_uuid(c),
13187 btrfs_mark_buffer_dirty(c);
13189 * this case can happen in the following case:
13191 * 1.overwrite previous root.
13193 * 2.reinit reloc data root, this is because we skip pin
13194 * down reloc data tree before which means we can allocate
13195 * same block bytenr here.
13197 if (old->start == c->start) {
13198 btrfs_set_root_generation(&root->root_item,
13200 root->root_item.level = btrfs_header_level(root->node);
13201 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13202 &root->root_key, &root->root_item);
13204 free_extent_buffer(c);
13208 free_extent_buffer(old);
13210 add_root_to_dirty_list(root);
13214 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13215 struct extent_buffer *eb, int tree_root)
13217 struct extent_buffer *tmp;
13218 struct btrfs_root_item *ri;
13219 struct btrfs_key key;
13221 int level = btrfs_header_level(eb);
13227 * If we have pinned this block before, don't pin it again.
13228 * This can not only avoid forever loop with broken filesystem
13229 * but also give us some speedups.
13231 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13232 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13235 btrfs_pin_extent(fs_info, eb->start, eb->len);
13237 nritems = btrfs_header_nritems(eb);
13238 for (i = 0; i < nritems; i++) {
13240 btrfs_item_key_to_cpu(eb, &key, i);
13241 if (key.type != BTRFS_ROOT_ITEM_KEY)
13243 /* Skip the extent root and reloc roots */
13244 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13245 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13246 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13248 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13249 bytenr = btrfs_disk_root_bytenr(eb, ri);
13252 * If at any point we start needing the real root we
13253 * will have to build a stump root for the root we are
13254 * in, but for now this doesn't actually use the root so
13255 * just pass in extent_root.
13257 tmp = read_tree_block(fs_info, bytenr, 0);
13258 if (!extent_buffer_uptodate(tmp)) {
13259 fprintf(stderr, "Error reading root block\n");
13262 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13263 free_extent_buffer(tmp);
13267 bytenr = btrfs_node_blockptr(eb, i);
13269 /* If we aren't the tree root don't read the block */
13270 if (level == 1 && !tree_root) {
13271 btrfs_pin_extent(fs_info, bytenr,
13272 fs_info->nodesize);
13276 tmp = read_tree_block(fs_info, bytenr, 0);
13277 if (!extent_buffer_uptodate(tmp)) {
13278 fprintf(stderr, "Error reading tree block\n");
13281 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13282 free_extent_buffer(tmp);
13291 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13295 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13299 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13302 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13304 struct btrfs_block_group_cache *cache;
13305 struct btrfs_path path;
13306 struct extent_buffer *leaf;
13307 struct btrfs_chunk *chunk;
13308 struct btrfs_key key;
13312 btrfs_init_path(&path);
13314 key.type = BTRFS_CHUNK_ITEM_KEY;
13316 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13318 btrfs_release_path(&path);
13323 * We do this in case the block groups were screwed up and had alloc
13324 * bits that aren't actually set on the chunks. This happens with
13325 * restored images every time and could happen in real life I guess.
13327 fs_info->avail_data_alloc_bits = 0;
13328 fs_info->avail_metadata_alloc_bits = 0;
13329 fs_info->avail_system_alloc_bits = 0;
13331 /* First we need to create the in-memory block groups */
13333 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13334 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13336 btrfs_release_path(&path);
13344 leaf = path.nodes[0];
13345 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13346 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13351 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13352 btrfs_add_block_group(fs_info, 0,
13353 btrfs_chunk_type(leaf, chunk), key.offset,
13354 btrfs_chunk_length(leaf, chunk));
13355 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13356 key.offset + btrfs_chunk_length(leaf, chunk));
13361 cache = btrfs_lookup_first_block_group(fs_info, start);
13365 start = cache->key.objectid + cache->key.offset;
13368 btrfs_release_path(&path);
13372 static int reset_balance(struct btrfs_trans_handle *trans,
13373 struct btrfs_fs_info *fs_info)
13375 struct btrfs_root *root = fs_info->tree_root;
13376 struct btrfs_path path;
13377 struct extent_buffer *leaf;
13378 struct btrfs_key key;
13379 int del_slot, del_nr = 0;
13383 btrfs_init_path(&path);
13384 key.objectid = BTRFS_BALANCE_OBJECTID;
13385 key.type = BTRFS_BALANCE_ITEM_KEY;
13387 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13392 goto reinit_data_reloc;
13397 ret = btrfs_del_item(trans, root, &path);
13400 btrfs_release_path(&path);
13402 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13403 key.type = BTRFS_ROOT_ITEM_KEY;
13405 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13409 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13414 ret = btrfs_del_items(trans, root, &path,
13421 btrfs_release_path(&path);
13424 ret = btrfs_search_slot(trans, root, &key, &path,
13431 leaf = path.nodes[0];
13432 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13433 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13435 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13440 del_slot = path.slots[0];
13449 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13453 btrfs_release_path(&path);
13456 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13457 key.type = BTRFS_ROOT_ITEM_KEY;
13458 key.offset = (u64)-1;
13459 root = btrfs_read_fs_root(fs_info, &key);
13460 if (IS_ERR(root)) {
13461 fprintf(stderr, "Error reading data reloc tree\n");
13462 ret = PTR_ERR(root);
13465 record_root_in_trans(trans, root);
13466 ret = btrfs_fsck_reinit_root(trans, root, 0);
13469 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13471 btrfs_release_path(&path);
13475 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13476 struct btrfs_fs_info *fs_info)
13482 * The only reason we don't do this is because right now we're just
13483 * walking the trees we find and pinning down their bytes, we don't look
13484 * at any of the leaves. In order to do mixed groups we'd have to check
13485 * the leaves of any fs roots and pin down the bytes for any file
13486 * extents we find. Not hard but why do it if we don't have to?
13488 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13489 fprintf(stderr, "We don't support re-initing the extent tree "
13490 "for mixed block groups yet, please notify a btrfs "
13491 "developer you want to do this so they can add this "
13492 "functionality.\n");
13497 * first we need to walk all of the trees except the extent tree and pin
13498 * down the bytes that are in use so we don't overwrite any existing
13501 ret = pin_metadata_blocks(fs_info);
13503 fprintf(stderr, "error pinning down used bytes\n");
13508 * Need to drop all the block groups since we're going to recreate all
13511 btrfs_free_block_groups(fs_info);
13512 ret = reset_block_groups(fs_info);
13514 fprintf(stderr, "error resetting the block groups\n");
13518 /* Ok we can allocate now, reinit the extent root */
13519 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13521 fprintf(stderr, "extent root initialization failed\n");
13523 * When the transaction code is updated we should end the
13524 * transaction, but for now progs only knows about commit so
13525 * just return an error.
13531 * Now we have all the in-memory block groups setup so we can make
13532 * allocations properly, and the metadata we care about is safe since we
13533 * pinned all of it above.
13536 struct btrfs_block_group_cache *cache;
13538 cache = btrfs_lookup_first_block_group(fs_info, start);
13541 start = cache->key.objectid + cache->key.offset;
13542 ret = btrfs_insert_item(trans, fs_info->extent_root,
13543 &cache->key, &cache->item,
13544 sizeof(cache->item));
13546 fprintf(stderr, "Error adding block group\n");
13549 btrfs_extent_post_op(trans, fs_info->extent_root);
13552 ret = reset_balance(trans, fs_info);
13554 fprintf(stderr, "error resetting the pending balance\n");
13559 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13561 struct btrfs_path path;
13562 struct btrfs_trans_handle *trans;
13563 struct btrfs_key key;
13566 printf("Recowing metadata block %llu\n", eb->start);
13567 key.objectid = btrfs_header_owner(eb);
13568 key.type = BTRFS_ROOT_ITEM_KEY;
13569 key.offset = (u64)-1;
13571 root = btrfs_read_fs_root(root->fs_info, &key);
13572 if (IS_ERR(root)) {
13573 fprintf(stderr, "Couldn't find owner root %llu\n",
13575 return PTR_ERR(root);
13578 trans = btrfs_start_transaction(root, 1);
13580 return PTR_ERR(trans);
13582 btrfs_init_path(&path);
13583 path.lowest_level = btrfs_header_level(eb);
13584 if (path.lowest_level)
13585 btrfs_node_key_to_cpu(eb, &key, 0);
13587 btrfs_item_key_to_cpu(eb, &key, 0);
13589 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13590 btrfs_commit_transaction(trans, root);
13591 btrfs_release_path(&path);
13595 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13597 struct btrfs_path path;
13598 struct btrfs_trans_handle *trans;
13599 struct btrfs_key key;
13602 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13603 bad->key.type, bad->key.offset);
13604 key.objectid = bad->root_id;
13605 key.type = BTRFS_ROOT_ITEM_KEY;
13606 key.offset = (u64)-1;
13608 root = btrfs_read_fs_root(root->fs_info, &key);
13609 if (IS_ERR(root)) {
13610 fprintf(stderr, "Couldn't find owner root %llu\n",
13612 return PTR_ERR(root);
13615 trans = btrfs_start_transaction(root, 1);
13617 return PTR_ERR(trans);
13619 btrfs_init_path(&path);
13620 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13626 ret = btrfs_del_item(trans, root, &path);
13628 btrfs_commit_transaction(trans, root);
13629 btrfs_release_path(&path);
13633 static int zero_log_tree(struct btrfs_root *root)
13635 struct btrfs_trans_handle *trans;
13638 trans = btrfs_start_transaction(root, 1);
13639 if (IS_ERR(trans)) {
13640 ret = PTR_ERR(trans);
13643 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13644 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13645 ret = btrfs_commit_transaction(trans, root);
13649 static int populate_csum(struct btrfs_trans_handle *trans,
13650 struct btrfs_root *csum_root, char *buf, u64 start,
13653 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13658 while (offset < len) {
13659 sectorsize = fs_info->sectorsize;
13660 ret = read_extent_data(fs_info, buf, start + offset,
13664 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13665 start + offset, buf, sectorsize);
13668 offset += sectorsize;
13673 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13674 struct btrfs_root *csum_root,
13675 struct btrfs_root *cur_root)
13677 struct btrfs_path path;
13678 struct btrfs_key key;
13679 struct extent_buffer *node;
13680 struct btrfs_file_extent_item *fi;
13687 buf = malloc(cur_root->fs_info->sectorsize);
13691 btrfs_init_path(&path);
13695 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13698 /* Iterate all regular file extents and fill its csum */
13700 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13702 if (key.type != BTRFS_EXTENT_DATA_KEY)
13704 node = path.nodes[0];
13705 slot = path.slots[0];
13706 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13707 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13709 start = btrfs_file_extent_disk_bytenr(node, fi);
13710 len = btrfs_file_extent_disk_num_bytes(node, fi);
13712 ret = populate_csum(trans, csum_root, buf, start, len);
13713 if (ret == -EEXIST)
13719 * TODO: if next leaf is corrupted, jump to nearest next valid
13722 ret = btrfs_next_item(cur_root, &path);
13732 btrfs_release_path(&path);
13737 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13738 struct btrfs_root *csum_root)
13740 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13741 struct btrfs_path path;
13742 struct btrfs_root *tree_root = fs_info->tree_root;
13743 struct btrfs_root *cur_root;
13744 struct extent_buffer *node;
13745 struct btrfs_key key;
13749 btrfs_init_path(&path);
13750 key.objectid = BTRFS_FS_TREE_OBJECTID;
13752 key.type = BTRFS_ROOT_ITEM_KEY;
13753 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13762 node = path.nodes[0];
13763 slot = path.slots[0];
13764 btrfs_item_key_to_cpu(node, &key, slot);
13765 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13767 if (key.type != BTRFS_ROOT_ITEM_KEY)
13769 if (!is_fstree(key.objectid))
13771 key.offset = (u64)-1;
13773 cur_root = btrfs_read_fs_root(fs_info, &key);
13774 if (IS_ERR(cur_root) || !cur_root) {
13775 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13779 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13784 ret = btrfs_next_item(tree_root, &path);
13794 btrfs_release_path(&path);
13798 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13799 struct btrfs_root *csum_root)
13801 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13802 struct btrfs_path path;
13803 struct btrfs_extent_item *ei;
13804 struct extent_buffer *leaf;
13806 struct btrfs_key key;
13809 btrfs_init_path(&path);
13811 key.type = BTRFS_EXTENT_ITEM_KEY;
13813 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13815 btrfs_release_path(&path);
13819 buf = malloc(csum_root->fs_info->sectorsize);
13821 btrfs_release_path(&path);
13826 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13827 ret = btrfs_next_leaf(extent_root, &path);
13835 leaf = path.nodes[0];
13837 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13838 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13843 ei = btrfs_item_ptr(leaf, path.slots[0],
13844 struct btrfs_extent_item);
13845 if (!(btrfs_extent_flags(leaf, ei) &
13846 BTRFS_EXTENT_FLAG_DATA)) {
13851 ret = populate_csum(trans, csum_root, buf, key.objectid,
13858 btrfs_release_path(&path);
13864 * Recalculate the csum and put it into the csum tree.
13866 * Extent tree init will wipe out all the extent info, so in that case, we
13867 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13868 * will use fs/subvol trees to init the csum tree.
13870 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13871 struct btrfs_root *csum_root,
13872 int search_fs_tree)
13874 if (search_fs_tree)
13875 return fill_csum_tree_from_fs(trans, csum_root);
13877 return fill_csum_tree_from_extent(trans, csum_root);
13880 static void free_roots_info_cache(void)
13882 if (!roots_info_cache)
13885 while (!cache_tree_empty(roots_info_cache)) {
13886 struct cache_extent *entry;
13887 struct root_item_info *rii;
13889 entry = first_cache_extent(roots_info_cache);
13892 remove_cache_extent(roots_info_cache, entry);
13893 rii = container_of(entry, struct root_item_info, cache_extent);
13897 free(roots_info_cache);
13898 roots_info_cache = NULL;
13901 static int build_roots_info_cache(struct btrfs_fs_info *info)
13904 struct btrfs_key key;
13905 struct extent_buffer *leaf;
13906 struct btrfs_path path;
13908 if (!roots_info_cache) {
13909 roots_info_cache = malloc(sizeof(*roots_info_cache));
13910 if (!roots_info_cache)
13912 cache_tree_init(roots_info_cache);
13915 btrfs_init_path(&path);
13917 key.type = BTRFS_EXTENT_ITEM_KEY;
13919 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13922 leaf = path.nodes[0];
13925 struct btrfs_key found_key;
13926 struct btrfs_extent_item *ei;
13927 struct btrfs_extent_inline_ref *iref;
13928 int slot = path.slots[0];
13933 struct cache_extent *entry;
13934 struct root_item_info *rii;
13936 if (slot >= btrfs_header_nritems(leaf)) {
13937 ret = btrfs_next_leaf(info->extent_root, &path);
13944 leaf = path.nodes[0];
13945 slot = path.slots[0];
13948 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13950 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13951 found_key.type != BTRFS_METADATA_ITEM_KEY)
13954 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13955 flags = btrfs_extent_flags(leaf, ei);
13957 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13958 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13961 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13962 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13963 level = found_key.offset;
13965 struct btrfs_tree_block_info *binfo;
13967 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13968 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13969 level = btrfs_tree_block_level(leaf, binfo);
13973 * For a root extent, it must be of the following type and the
13974 * first (and only one) iref in the item.
13976 type = btrfs_extent_inline_ref_type(leaf, iref);
13977 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13980 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13981 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13983 rii = malloc(sizeof(struct root_item_info));
13988 rii->cache_extent.start = root_id;
13989 rii->cache_extent.size = 1;
13990 rii->level = (u8)-1;
13991 entry = &rii->cache_extent;
13992 ret = insert_cache_extent(roots_info_cache, entry);
13995 rii = container_of(entry, struct root_item_info,
13999 ASSERT(rii->cache_extent.start == root_id);
14000 ASSERT(rii->cache_extent.size == 1);
14002 if (level > rii->level || rii->level == (u8)-1) {
14003 rii->level = level;
14004 rii->bytenr = found_key.objectid;
14005 rii->gen = btrfs_extent_generation(leaf, ei);
14006 rii->node_count = 1;
14007 } else if (level == rii->level) {
14015 btrfs_release_path(&path);
14020 static int maybe_repair_root_item(struct btrfs_path *path,
14021 const struct btrfs_key *root_key,
14022 const int read_only_mode)
14024 const u64 root_id = root_key->objectid;
14025 struct cache_extent *entry;
14026 struct root_item_info *rii;
14027 struct btrfs_root_item ri;
14028 unsigned long offset;
14030 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14033 "Error: could not find extent items for root %llu\n",
14034 root_key->objectid);
14038 rii = container_of(entry, struct root_item_info, cache_extent);
14039 ASSERT(rii->cache_extent.start == root_id);
14040 ASSERT(rii->cache_extent.size == 1);
14042 if (rii->node_count != 1) {
14044 "Error: could not find btree root extent for root %llu\n",
14049 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14050 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14052 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14053 btrfs_root_level(&ri) != rii->level ||
14054 btrfs_root_generation(&ri) != rii->gen) {
14057 * If we're in repair mode but our caller told us to not update
14058 * the root item, i.e. just check if it needs to be updated, don't
14059 * print this message, since the caller will call us again shortly
14060 * for the same root item without read only mode (the caller will
14061 * open a transaction first).
14063 if (!(read_only_mode && repair))
14065 "%sroot item for root %llu,"
14066 " current bytenr %llu, current gen %llu, current level %u,"
14067 " new bytenr %llu, new gen %llu, new level %u\n",
14068 (read_only_mode ? "" : "fixing "),
14070 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14071 btrfs_root_level(&ri),
14072 rii->bytenr, rii->gen, rii->level);
14074 if (btrfs_root_generation(&ri) > rii->gen) {
14076 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14077 root_id, btrfs_root_generation(&ri), rii->gen);
14081 if (!read_only_mode) {
14082 btrfs_set_root_bytenr(&ri, rii->bytenr);
14083 btrfs_set_root_level(&ri, rii->level);
14084 btrfs_set_root_generation(&ri, rii->gen);
14085 write_extent_buffer(path->nodes[0], &ri,
14086 offset, sizeof(ri));
14096 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14097 * caused read-only snapshots to be corrupted if they were created at a moment
14098 * when the source subvolume/snapshot had orphan items. The issue was that the
14099 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14100 * node instead of the post orphan cleanup root node.
14101 * So this function, and its callees, just detects and fixes those cases. Even
14102 * though the regression was for read-only snapshots, this function applies to
14103 * any snapshot/subvolume root.
14104 * This must be run before any other repair code - not doing it so, makes other
14105 * repair code delete or modify backrefs in the extent tree for example, which
14106 * will result in an inconsistent fs after repairing the root items.
14108 static int repair_root_items(struct btrfs_fs_info *info)
14110 struct btrfs_path path;
14111 struct btrfs_key key;
14112 struct extent_buffer *leaf;
14113 struct btrfs_trans_handle *trans = NULL;
14116 int need_trans = 0;
14118 btrfs_init_path(&path);
14120 ret = build_roots_info_cache(info);
14124 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14125 key.type = BTRFS_ROOT_ITEM_KEY;
14130 * Avoid opening and committing transactions if a leaf doesn't have
14131 * any root items that need to be fixed, so that we avoid rotating
14132 * backup roots unnecessarily.
14135 trans = btrfs_start_transaction(info->tree_root, 1);
14136 if (IS_ERR(trans)) {
14137 ret = PTR_ERR(trans);
14142 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14146 leaf = path.nodes[0];
14149 struct btrfs_key found_key;
14151 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14152 int no_more_keys = find_next_key(&path, &key);
14154 btrfs_release_path(&path);
14156 ret = btrfs_commit_transaction(trans,
14168 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14170 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14172 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14175 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14179 if (!trans && repair) {
14182 btrfs_release_path(&path);
14192 free_roots_info_cache();
14193 btrfs_release_path(&path);
14195 btrfs_commit_transaction(trans, info->tree_root);
14202 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14204 struct btrfs_trans_handle *trans;
14205 struct btrfs_block_group_cache *bg_cache;
14209 /* Clear all free space cache inodes and its extent data */
14211 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14214 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14217 current = bg_cache->key.objectid + bg_cache->key.offset;
14220 /* Don't forget to set cache_generation to -1 */
14221 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14222 if (IS_ERR(trans)) {
14223 error("failed to update super block cache generation");
14224 return PTR_ERR(trans);
14226 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14227 btrfs_commit_transaction(trans, fs_info->tree_root);
14232 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14237 if (clear_version == 1) {
14238 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14240 "free space cache v2 detected, use --clear-space-cache v2");
14244 printf("Clearing free space cache\n");
14245 ret = clear_free_space_cache(fs_info);
14247 error("failed to clear free space cache");
14250 printf("Free space cache cleared\n");
14252 } else if (clear_version == 2) {
14253 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14254 printf("no free space cache v2 to clear\n");
14258 printf("Clear free space cache v2\n");
14259 ret = btrfs_clear_free_space_tree(fs_info);
14261 error("failed to clear free space cache v2: %d", ret);
14264 printf("free space cache v2 cleared\n");
14271 const char * const cmd_check_usage[] = {
14272 "btrfs check [options] <device>",
14273 "Check structural integrity of a filesystem (unmounted).",
14274 "Check structural integrity of an unmounted filesystem. Verify internal",
14275 "trees' consistency and item connectivity. In the repair mode try to",
14276 "fix the problems found. ",
14277 "WARNING: the repair mode is considered dangerous",
14279 "-s|--super <superblock> use this superblock copy",
14280 "-b|--backup use the first valid backup root copy",
14281 "--force skip mount checks, repair is not possible",
14282 "--repair try to repair the filesystem",
14283 "--readonly run in read-only mode (default)",
14284 "--init-csum-tree create a new CRC tree",
14285 "--init-extent-tree create a new extent tree",
14286 "--mode <MODE> allows choice of memory/IO trade-offs",
14287 " where MODE is one of:",
14288 " original - read inodes and extents to memory (requires",
14289 " more memory, does less IO)",
14290 " lowmem - try to use less memory but read blocks again",
14292 "--check-data-csum verify checksums of data blocks",
14293 "-Q|--qgroup-report print a report on qgroup consistency",
14294 "-E|--subvol-extents <subvolid>",
14295 " print subvolume extents and sharing state",
14296 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14297 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14298 "-p|--progress indicate progress",
14299 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14303 int cmd_check(int argc, char **argv)
14305 struct cache_tree root_cache;
14306 struct btrfs_root *root;
14307 struct btrfs_fs_info *info;
14310 u64 tree_root_bytenr = 0;
14311 u64 chunk_root_bytenr = 0;
14312 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14316 int init_csum_tree = 0;
14318 int clear_space_cache = 0;
14319 int qgroup_report = 0;
14320 int qgroups_repaired = 0;
14321 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14326 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14327 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14328 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14329 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14330 GETOPT_VAL_FORCE };
14331 static const struct option long_options[] = {
14332 { "super", required_argument, NULL, 's' },
14333 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14334 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14335 { "init-csum-tree", no_argument, NULL,
14336 GETOPT_VAL_INIT_CSUM },
14337 { "init-extent-tree", no_argument, NULL,
14338 GETOPT_VAL_INIT_EXTENT },
14339 { "check-data-csum", no_argument, NULL,
14340 GETOPT_VAL_CHECK_CSUM },
14341 { "backup", no_argument, NULL, 'b' },
14342 { "subvol-extents", required_argument, NULL, 'E' },
14343 { "qgroup-report", no_argument, NULL, 'Q' },
14344 { "tree-root", required_argument, NULL, 'r' },
14345 { "chunk-root", required_argument, NULL,
14346 GETOPT_VAL_CHUNK_TREE },
14347 { "progress", no_argument, NULL, 'p' },
14348 { "mode", required_argument, NULL,
14350 { "clear-space-cache", required_argument, NULL,
14351 GETOPT_VAL_CLEAR_SPACE_CACHE},
14352 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14353 { NULL, 0, NULL, 0}
14356 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14360 case 'a': /* ignored */ break;
14362 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14365 num = arg_strtou64(optarg);
14366 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14368 "super mirror should be less than %d",
14369 BTRFS_SUPER_MIRROR_MAX);
14372 bytenr = btrfs_sb_offset(((int)num));
14373 printf("using SB copy %llu, bytenr %llu\n", num,
14374 (unsigned long long)bytenr);
14380 subvolid = arg_strtou64(optarg);
14383 tree_root_bytenr = arg_strtou64(optarg);
14385 case GETOPT_VAL_CHUNK_TREE:
14386 chunk_root_bytenr = arg_strtou64(optarg);
14389 ctx.progress_enabled = true;
14393 usage(cmd_check_usage);
14394 case GETOPT_VAL_REPAIR:
14395 printf("enabling repair mode\n");
14397 ctree_flags |= OPEN_CTREE_WRITES;
14399 case GETOPT_VAL_READONLY:
14402 case GETOPT_VAL_INIT_CSUM:
14403 printf("Creating a new CRC tree\n");
14404 init_csum_tree = 1;
14406 ctree_flags |= OPEN_CTREE_WRITES;
14408 case GETOPT_VAL_INIT_EXTENT:
14409 init_extent_tree = 1;
14410 ctree_flags |= (OPEN_CTREE_WRITES |
14411 OPEN_CTREE_NO_BLOCK_GROUPS);
14414 case GETOPT_VAL_CHECK_CSUM:
14415 check_data_csum = 1;
14417 case GETOPT_VAL_MODE:
14418 check_mode = parse_check_mode(optarg);
14419 if (check_mode == CHECK_MODE_UNKNOWN) {
14420 error("unknown mode: %s", optarg);
14424 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14425 if (strcmp(optarg, "v1") == 0) {
14426 clear_space_cache = 1;
14427 } else if (strcmp(optarg, "v2") == 0) {
14428 clear_space_cache = 2;
14429 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14432 "invalid argument to --clear-space-cache, must be v1 or v2");
14435 ctree_flags |= OPEN_CTREE_WRITES;
14437 case GETOPT_VAL_FORCE:
14443 if (check_argc_exact(argc - optind, 1))
14444 usage(cmd_check_usage);
14446 if (ctx.progress_enabled) {
14447 ctx.tp = TASK_NOTHING;
14448 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14451 /* This check is the only reason for --readonly to exist */
14452 if (readonly && repair) {
14453 error("repair options are not compatible with --readonly");
14458 * experimental and dangerous
14460 if (repair && check_mode == CHECK_MODE_LOWMEM)
14461 warning("low-memory mode repair support is only partial");
14464 cache_tree_init(&root_cache);
14466 ret = check_mounted(argv[optind]);
14469 error("could not check mount status: %s",
14475 "%s is currently mounted, use --force if you really intend to check the filesystem",
14483 error("repair and --force is not yet supported");
14490 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14494 "filesystem mounted, continuing because of --force");
14496 /* A block device is mounted in exclusive mode by kernel */
14497 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14500 /* only allow partial opening under repair mode */
14502 ctree_flags |= OPEN_CTREE_PARTIAL;
14504 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14505 chunk_root_bytenr, ctree_flags);
14507 error("cannot open file system");
14513 global_info = info;
14514 root = info->fs_root;
14515 uuid_unparse(info->super_copy->fsid, uuidbuf);
14517 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14520 * Check the bare minimum before starting anything else that could rely
14521 * on it, namely the tree roots, any local consistency checks
14523 if (!extent_buffer_uptodate(info->tree_root->node) ||
14524 !extent_buffer_uptodate(info->dev_root->node) ||
14525 !extent_buffer_uptodate(info->chunk_root->node)) {
14526 error("critical roots corrupted, unable to check the filesystem");
14532 if (clear_space_cache) {
14533 ret = do_clear_free_space_cache(info, clear_space_cache);
14539 * repair mode will force us to commit transaction which
14540 * will make us fail to load log tree when mounting.
14542 if (repair && btrfs_super_log_root(info->super_copy)) {
14543 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14549 ret = zero_log_tree(root);
14552 error("failed to zero log tree: %d", ret);
14557 if (qgroup_report) {
14558 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14560 ret = qgroup_verify_all(info);
14567 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14568 subvolid, argv[optind], uuidbuf);
14569 ret = print_extent_state(info, subvolid);
14574 if (init_extent_tree || init_csum_tree) {
14575 struct btrfs_trans_handle *trans;
14577 trans = btrfs_start_transaction(info->extent_root, 0);
14578 if (IS_ERR(trans)) {
14579 error("error starting transaction");
14580 ret = PTR_ERR(trans);
14585 if (init_extent_tree) {
14586 printf("Creating a new extent tree\n");
14587 ret = reinit_extent_tree(trans, info);
14593 if (init_csum_tree) {
14594 printf("Reinitialize checksum tree\n");
14595 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14597 error("checksum tree initialization failed: %d",
14604 ret = fill_csum_tree(trans, info->csum_root,
14608 error("checksum tree refilling failed: %d", ret);
14613 * Ok now we commit and run the normal fsck, which will add
14614 * extent entries for all of the items it finds.
14616 ret = btrfs_commit_transaction(trans, info->extent_root);
14621 if (!extent_buffer_uptodate(info->extent_root->node)) {
14622 error("critical: extent_root, unable to check the filesystem");
14627 if (!extent_buffer_uptodate(info->csum_root->node)) {
14628 error("critical: csum_root, unable to check the filesystem");
14634 if (!init_extent_tree) {
14635 ret = repair_root_items(info);
14638 error("failed to repair root items: %s", strerror(-ret));
14642 fprintf(stderr, "Fixed %d roots.\n", ret);
14644 } else if (ret > 0) {
14646 "Found %d roots with an outdated root item.\n",
14649 "Please run a filesystem check with the option --repair to fix them.\n");
14656 ret = do_check_chunks_and_extents(info);
14660 "errors found in extent allocation tree or chunk allocation");
14662 /* Only re-check super size after we checked and repaired the fs */
14663 err |= !is_super_size_valid(info);
14665 if (!ctx.progress_enabled) {
14666 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14667 fprintf(stderr, "checking free space tree\n");
14669 fprintf(stderr, "checking free space cache\n");
14671 ret = check_space_cache(root);
14674 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14675 error("errors found in free space tree");
14677 error("errors found in free space cache");
14682 * We used to have to have these hole extents in between our real
14683 * extents so if we don't have this flag set we need to make sure there
14684 * are no gaps in the file extents for inodes, otherwise we can just
14685 * ignore it when this happens.
14687 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14688 ret = do_check_fs_roots(info, &root_cache);
14691 error("errors found in fs roots");
14695 fprintf(stderr, "checking csums\n");
14696 ret = check_csums(root);
14699 error("errors found in csum tree");
14703 fprintf(stderr, "checking root refs\n");
14704 /* For low memory mode, check_fs_roots_v2 handles root refs */
14705 if (check_mode != CHECK_MODE_LOWMEM) {
14706 ret = check_root_refs(root, &root_cache);
14709 error("errors found in root refs");
14714 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14715 struct extent_buffer *eb;
14717 eb = list_first_entry(&root->fs_info->recow_ebs,
14718 struct extent_buffer, recow);
14719 list_del_init(&eb->recow);
14720 ret = recow_extent_buffer(root, eb);
14723 error("fails to fix transid errors");
14728 while (!list_empty(&delete_items)) {
14729 struct bad_item *bad;
14731 bad = list_first_entry(&delete_items, struct bad_item, list);
14732 list_del_init(&bad->list);
14734 ret = delete_bad_item(root, bad);
14740 if (info->quota_enabled) {
14741 fprintf(stderr, "checking quota groups\n");
14742 ret = qgroup_verify_all(info);
14745 error("failed to check quota groups");
14749 ret = repair_qgroups(info, &qgroups_repaired);
14752 error("failed to repair quota groups");
14758 if (!list_empty(&root->fs_info->recow_ebs)) {
14759 error("transid errors in file system");
14764 printf("found %llu bytes used, ",
14765 (unsigned long long)bytes_used);
14767 printf("error(s) found\n");
14769 printf("no error found\n");
14770 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14771 printf("total tree bytes: %llu\n",
14772 (unsigned long long)total_btree_bytes);
14773 printf("total fs tree bytes: %llu\n",
14774 (unsigned long long)total_fs_tree_bytes);
14775 printf("total extent tree bytes: %llu\n",
14776 (unsigned long long)total_extent_tree_bytes);
14777 printf("btree space waste bytes: %llu\n",
14778 (unsigned long long)btree_space_waste);
14779 printf("file data blocks allocated: %llu\n referenced %llu\n",
14780 (unsigned long long)data_bytes_allocated,
14781 (unsigned long long)data_bytes_referenced);
14783 free_qgroup_counts();
14784 free_root_recs_tree(&root_cache);
14788 if (ctx.progress_enabled)
14789 task_deinit(ctx.info);