2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 return list_entry(entry, struct extent_backref, list);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
121 * Much like data_backref, just removed the undetermined members
122 * and change it to use list_head.
123 * During extent scan, it is stored in root->orphan_data_extent.
124 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126 struct orphan_data_extent {
127 struct list_head list;
135 struct tree_backref {
136 struct extent_backref node;
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 return container_of(back, struct tree_backref, node);
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
151 struct extent_record {
152 struct list_head backrefs;
153 struct list_head dups;
154 struct list_head list;
155 struct cache_extent cache;
156 struct btrfs_disk_key parent_key;
161 u64 extent_item_refs;
163 u64 parent_generation;
167 unsigned int flag_block_full_backref:2;
168 unsigned int found_rec:1;
169 unsigned int content_checked:1;
170 unsigned int owner_ref_checked:1;
171 unsigned int is_root:1;
172 unsigned int metadata:1;
173 unsigned int bad_full_backref:1;
174 unsigned int crossing_stripes:1;
175 unsigned int wrong_chunk_type:1;
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 return container_of(entry, struct extent_record, list);
183 struct inode_backref {
184 struct list_head list;
185 unsigned int found_dir_item:1;
186 unsigned int found_dir_index:1;
187 unsigned int found_inode_ref:1;
188 unsigned int filetype:8;
190 unsigned int ref_type;
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 return list_entry(entry, struct inode_backref, list);
202 struct root_item_record {
203 struct list_head list;
210 struct btrfs_key drop_key;
213 #define REF_ERR_NO_DIR_ITEM (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX (1 << 1)
215 #define REF_ERR_NO_INODE_REF (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
218 #define REF_ERR_DUP_INODE_REF (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
227 struct file_extent_hole {
233 struct inode_record {
234 struct list_head backrefs;
235 unsigned int checked:1;
236 unsigned int merging:1;
237 unsigned int found_inode_item:1;
238 unsigned int found_dir_item:1;
239 unsigned int found_file_extent:1;
240 unsigned int found_csum_item:1;
241 unsigned int some_csum_missing:1;
242 unsigned int nodatasum:1;
255 struct rb_root holes;
256 struct list_head orphan_extents;
261 #define I_ERR_NO_INODE_ITEM (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
277 struct root_backref {
278 struct list_head list;
279 unsigned int found_dir_item:1;
280 unsigned int found_dir_index:1;
281 unsigned int found_back_ref:1;
282 unsigned int found_forward_ref:1;
283 unsigned int reachable:1;
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 return list_entry(entry, struct root_backref, list);
298 struct list_head backrefs;
299 struct cache_extent cache;
300 unsigned int found_root_item:1;
306 struct cache_extent cache;
311 struct cache_extent cache;
312 struct cache_tree root_cache;
313 struct cache_tree inode_cache;
314 struct inode_record *current;
323 struct walk_control {
324 struct cache_tree shared;
325 struct shared_node *nodes[BTRFS_MAX_LEVEL];
331 struct btrfs_key key;
333 struct list_head list;
336 struct extent_entry {
341 struct list_head list;
344 struct root_item_info {
345 /* level of the root */
347 /* number of nodes at this level, must be 1 for a root */
351 struct cache_extent cache_extent;
355 * Error bit for low memory mode check.
357 * Currently no caller cares about it yet. Just internal use for error
360 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH (1 << 8)
371 static void *print_status_check(void *p)
373 struct task_ctx *priv = p;
374 const char work_indicator[] = { '.', 'o', 'O', 'o' };
376 static char *task_position_string[] = {
378 "checking free space cache",
382 task_period_start(priv->info, 1000 /* 1s */);
384 if (priv->tp == TASK_NOTHING)
388 printf("%s [%c]\r", task_position_string[priv->tp],
389 work_indicator[count % 4]);
392 task_period_wait(priv->info);
397 static int print_status_return(void *p)
405 static enum btrfs_check_mode parse_check_mode(const char *str)
407 if (strcmp(str, "lowmem") == 0)
408 return CHECK_MODE_LOWMEM;
409 if (strcmp(str, "orig") == 0)
410 return CHECK_MODE_ORIGINAL;
411 if (strcmp(str, "original") == 0)
412 return CHECK_MODE_ORIGINAL;
414 return CHECK_MODE_UNKNOWN;
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
420 struct file_extent_hole *hole;
422 if (RB_EMPTY_ROOT(holes))
425 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 struct file_extent_hole *hole1;
432 struct file_extent_hole *hole2;
434 hole1 = rb_entry(node1, struct file_extent_hole, node);
435 hole2 = rb_entry(node2, struct file_extent_hole, node);
437 if (hole1->start > hole2->start)
439 if (hole1->start < hole2->start)
441 /* Now hole1->start == hole2->start */
442 if (hole1->len >= hole2->len)
444 * Hole 1 will be merge center
445 * Same hole will be merged later
448 /* Hole 2 will be merge center */
453 * Add a hole to the record
455 * This will do hole merge for copy_file_extent_holes(),
456 * which will ensure there won't be continuous holes.
458 static int add_file_extent_hole(struct rb_root *holes,
461 struct file_extent_hole *hole;
462 struct file_extent_hole *prev = NULL;
463 struct file_extent_hole *next = NULL;
465 hole = malloc(sizeof(*hole));
470 /* Since compare will not return 0, no -EEXIST will happen */
471 rb_insert(holes, &hole->node, compare_hole);
473 /* simple merge with previous hole */
474 if (rb_prev(&hole->node))
475 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477 if (prev && prev->start + prev->len >= hole->start) {
478 hole->len = hole->start + hole->len - prev->start;
479 hole->start = prev->start;
480 rb_erase(&prev->node, holes);
485 /* iterate merge with next holes */
487 if (!rb_next(&hole->node))
489 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491 if (hole->start + hole->len >= next->start) {
492 if (hole->start + hole->len <= next->start + next->len)
493 hole->len = next->start + next->len -
495 rb_erase(&next->node, holes);
504 static int compare_hole_range(struct rb_node *node, void *data)
506 struct file_extent_hole *hole;
509 hole = (struct file_extent_hole *)data;
512 hole = rb_entry(node, struct file_extent_hole, node);
513 if (start < hole->start)
515 if (start >= hole->start && start < hole->start + hole->len)
521 * Delete a hole in the record
523 * This will do the hole split and is much restrict than add.
525 static int del_file_extent_hole(struct rb_root *holes,
528 struct file_extent_hole *hole;
529 struct file_extent_hole tmp;
534 struct rb_node *node;
541 node = rb_search(holes, &tmp, compare_hole_range, NULL);
544 hole = rb_entry(node, struct file_extent_hole, node);
545 if (start + len > hole->start + hole->len)
549 * Now there will be no overlap, delete the hole and re-add the
550 * split(s) if they exists.
552 if (start > hole->start) {
553 prev_start = hole->start;
554 prev_len = start - hole->start;
557 if (hole->start + hole->len > start + len) {
558 next_start = start + len;
559 next_len = hole->start + hole->len - start - len;
562 rb_erase(node, holes);
565 ret = add_file_extent_hole(holes, prev_start, prev_len);
570 ret = add_file_extent_hole(holes, next_start, next_len);
577 static int copy_file_extent_holes(struct rb_root *dst,
580 struct file_extent_hole *hole;
581 struct rb_node *node;
584 node = rb_first(src);
586 hole = rb_entry(node, struct file_extent_hole, node);
587 ret = add_file_extent_hole(dst, hole->start, hole->len);
590 node = rb_next(node);
595 static void free_file_extent_holes(struct rb_root *holes)
597 struct rb_node *node;
598 struct file_extent_hole *hole;
600 node = rb_first(holes);
602 hole = rb_entry(node, struct file_extent_hole, node);
603 rb_erase(node, holes);
605 node = rb_first(holes);
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612 struct btrfs_root *root)
614 if (root->last_trans != trans->transid) {
615 root->track_dirty = 1;
616 root->last_trans = trans->transid;
617 root->commit_root = root->node;
618 extent_buffer_get(root->node);
622 static u8 imode_to_type(u32 imode)
625 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
627 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
628 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
629 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
630 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
631 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
632 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
635 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 struct device_record *rec1;
642 struct device_record *rec2;
644 rec1 = rb_entry(node1, struct device_record, node);
645 rec2 = rb_entry(node2, struct device_record, node);
646 if (rec1->devid > rec2->devid)
648 else if (rec1->devid < rec2->devid)
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 struct inode_record *rec;
657 struct inode_backref *backref;
658 struct inode_backref *orig;
659 struct inode_backref *tmp;
660 struct orphan_data_extent *src_orphan;
661 struct orphan_data_extent *dst_orphan;
666 rec = malloc(sizeof(*rec));
668 return ERR_PTR(-ENOMEM);
669 memcpy(rec, orig_rec, sizeof(*rec));
671 INIT_LIST_HEAD(&rec->backrefs);
672 INIT_LIST_HEAD(&rec->orphan_extents);
673 rec->holes = RB_ROOT;
675 list_for_each_entry(orig, &orig_rec->backrefs, list) {
676 size = sizeof(*orig) + orig->namelen + 1;
677 backref = malloc(size);
682 memcpy(backref, orig, size);
683 list_add_tail(&backref->list, &rec->backrefs);
685 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686 dst_orphan = malloc(sizeof(*dst_orphan));
691 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
701 rb = rb_first(&rec->holes);
703 struct file_extent_hole *hole;
705 hole = rb_entry(rb, struct file_extent_hole, node);
711 if (!list_empty(&rec->backrefs))
712 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713 list_del(&orig->list);
717 if (!list_empty(&rec->orphan_extents))
718 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719 list_del(&orig->list);
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
731 struct orphan_data_extent *orphan;
733 if (list_empty(orphan_extents))
735 printf("The following data extent is lost in tree %llu:\n",
737 list_for_each_entry(orphan, orphan_extents, list) {
738 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739 orphan->objectid, orphan->offset, orphan->disk_bytenr,
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 u64 root_objectid = root->root_key.objectid;
747 int errors = rec->errors;
751 /* reloc root errors, we print its corresponding fs root objectid*/
752 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753 root_objectid = root->root_key.offset;
754 fprintf(stderr, "reloc");
756 fprintf(stderr, "root %llu inode %llu errors %x",
757 (unsigned long long) root_objectid,
758 (unsigned long long) rec->ino, rec->errors);
760 if (errors & I_ERR_NO_INODE_ITEM)
761 fprintf(stderr, ", no inode item");
762 if (errors & I_ERR_NO_ORPHAN_ITEM)
763 fprintf(stderr, ", no orphan item");
764 if (errors & I_ERR_DUP_INODE_ITEM)
765 fprintf(stderr, ", dup inode item");
766 if (errors & I_ERR_DUP_DIR_INDEX)
767 fprintf(stderr, ", dup dir index");
768 if (errors & I_ERR_ODD_DIR_ITEM)
769 fprintf(stderr, ", odd dir item");
770 if (errors & I_ERR_ODD_FILE_EXTENT)
771 fprintf(stderr, ", odd file extent");
772 if (errors & I_ERR_BAD_FILE_EXTENT)
773 fprintf(stderr, ", bad file extent");
774 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775 fprintf(stderr, ", file extent overlap");
776 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777 fprintf(stderr, ", file extent discount");
778 if (errors & I_ERR_DIR_ISIZE_WRONG)
779 fprintf(stderr, ", dir isize wrong");
780 if (errors & I_ERR_FILE_NBYTES_WRONG)
781 fprintf(stderr, ", nbytes wrong");
782 if (errors & I_ERR_ODD_CSUM_ITEM)
783 fprintf(stderr, ", odd csum item");
784 if (errors & I_ERR_SOME_CSUM_MISSING)
785 fprintf(stderr, ", some csum missing");
786 if (errors & I_ERR_LINK_COUNT_WRONG)
787 fprintf(stderr, ", link count wrong");
788 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789 fprintf(stderr, ", orphan file extent");
790 fprintf(stderr, "\n");
791 /* Print the orphan extents if needed */
792 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795 /* Print the holes if needed */
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797 struct file_extent_hole *hole;
798 struct rb_node *node;
801 node = rb_first(&rec->holes);
802 fprintf(stderr, "Found file extent holes:\n");
805 hole = rb_entry(node, struct file_extent_hole, node);
806 fprintf(stderr, "\tstart: %llu, len: %llu\n",
807 hole->start, hole->len);
808 node = rb_next(node);
811 fprintf(stderr, "\tstart: 0, len: %llu\n",
812 round_up(rec->isize, root->sectorsize));
816 static void print_ref_error(int errors)
818 if (errors & REF_ERR_NO_DIR_ITEM)
819 fprintf(stderr, ", no dir item");
820 if (errors & REF_ERR_NO_DIR_INDEX)
821 fprintf(stderr, ", no dir index");
822 if (errors & REF_ERR_NO_INODE_REF)
823 fprintf(stderr, ", no inode ref");
824 if (errors & REF_ERR_DUP_DIR_ITEM)
825 fprintf(stderr, ", dup dir item");
826 if (errors & REF_ERR_DUP_DIR_INDEX)
827 fprintf(stderr, ", dup dir index");
828 if (errors & REF_ERR_DUP_INODE_REF)
829 fprintf(stderr, ", dup inode ref");
830 if (errors & REF_ERR_INDEX_UNMATCH)
831 fprintf(stderr, ", index mismatch");
832 if (errors & REF_ERR_FILETYPE_UNMATCH)
833 fprintf(stderr, ", filetype mismatch");
834 if (errors & REF_ERR_NAME_TOO_LONG)
835 fprintf(stderr, ", name too long");
836 if (errors & REF_ERR_NO_ROOT_REF)
837 fprintf(stderr, ", no root ref");
838 if (errors & REF_ERR_NO_ROOT_BACKREF)
839 fprintf(stderr, ", no root backref");
840 if (errors & REF_ERR_DUP_ROOT_REF)
841 fprintf(stderr, ", dup root ref");
842 if (errors & REF_ERR_DUP_ROOT_BACKREF)
843 fprintf(stderr, ", dup root backref");
844 fprintf(stderr, "\n");
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
850 struct ptr_node *node;
851 struct cache_extent *cache;
852 struct inode_record *rec = NULL;
855 cache = lookup_cache_extent(inode_cache, ino, 1);
857 node = container_of(cache, struct ptr_node, cache);
859 if (mod && rec->refs > 1) {
860 node->data = clone_inode_rec(rec);
861 if (IS_ERR(node->data))
867 rec = calloc(1, sizeof(*rec));
869 return ERR_PTR(-ENOMEM);
871 rec->extent_start = (u64)-1;
873 INIT_LIST_HEAD(&rec->backrefs);
874 INIT_LIST_HEAD(&rec->orphan_extents);
875 rec->holes = RB_ROOT;
877 node = malloc(sizeof(*node));
880 return ERR_PTR(-ENOMEM);
882 node->cache.start = ino;
883 node->cache.size = 1;
886 if (ino == BTRFS_FREE_INO_OBJECTID)
889 ret = insert_cache_extent(inode_cache, &node->cache);
891 return ERR_PTR(-EEXIST);
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 struct orphan_data_extent *orphan;
900 while (!list_empty(orphan_extents)) {
901 orphan = list_entry(orphan_extents->next,
902 struct orphan_data_extent, list);
903 list_del(&orphan->list);
908 static void free_inode_rec(struct inode_record *rec)
910 struct inode_backref *backref;
915 while (!list_empty(&rec->backrefs)) {
916 backref = to_inode_backref(rec->backrefs.next);
917 list_del(&backref->list);
920 free_orphan_data_extents(&rec->orphan_extents);
921 free_file_extent_holes(&rec->holes);
925 static int can_free_inode_rec(struct inode_record *rec)
927 if (!rec->errors && rec->checked && rec->found_inode_item &&
928 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934 struct inode_record *rec)
936 struct cache_extent *cache;
937 struct inode_backref *tmp, *backref;
938 struct ptr_node *node;
939 unsigned char filetype;
941 if (!rec->found_inode_item)
944 filetype = imode_to_type(rec->imode);
945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946 if (backref->found_dir_item && backref->found_dir_index) {
947 if (backref->filetype != filetype)
948 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949 if (!backref->errors && backref->found_inode_ref &&
950 rec->nlink == rec->found_link) {
951 list_del(&backref->list);
957 if (!rec->checked || rec->merging)
960 if (S_ISDIR(rec->imode)) {
961 if (rec->found_size != rec->isize)
962 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963 if (rec->found_file_extent)
964 rec->errors |= I_ERR_ODD_FILE_EXTENT;
965 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966 if (rec->found_dir_item)
967 rec->errors |= I_ERR_ODD_DIR_ITEM;
968 if (rec->found_size != rec->nbytes)
969 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970 if (rec->nlink > 0 && !no_holes &&
971 (rec->extent_end < rec->isize ||
972 first_extent_gap(&rec->holes) < rec->isize))
973 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
976 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977 if (rec->found_csum_item && rec->nodatasum)
978 rec->errors |= I_ERR_ODD_CSUM_ITEM;
979 if (rec->some_csum_missing && !rec->nodatasum)
980 rec->errors |= I_ERR_SOME_CSUM_MISSING;
983 BUG_ON(rec->refs != 1);
984 if (can_free_inode_rec(rec)) {
985 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986 node = container_of(cache, struct ptr_node, cache);
987 BUG_ON(node->data != rec);
988 remove_cache_extent(inode_cache, &node->cache);
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 struct btrfs_path path;
997 struct btrfs_key key;
1000 key.objectid = BTRFS_ORPHAN_OBJECTID;
1001 key.type = BTRFS_ORPHAN_ITEM_KEY;
1004 btrfs_init_path(&path);
1005 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006 btrfs_release_path(&path);
1012 static int process_inode_item(struct extent_buffer *eb,
1013 int slot, struct btrfs_key *key,
1014 struct shared_node *active_node)
1016 struct inode_record *rec;
1017 struct btrfs_inode_item *item;
1019 rec = active_node->current;
1020 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021 if (rec->found_inode_item) {
1022 rec->errors |= I_ERR_DUP_INODE_ITEM;
1025 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026 rec->nlink = btrfs_inode_nlink(eb, item);
1027 rec->isize = btrfs_inode_size(eb, item);
1028 rec->nbytes = btrfs_inode_nbytes(eb, item);
1029 rec->imode = btrfs_inode_mode(eb, item);
1030 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032 rec->found_inode_item = 1;
1033 if (rec->nlink == 0)
1034 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035 maybe_free_inode_rec(&active_node->inode_cache, rec);
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041 int namelen, u64 dir)
1043 struct inode_backref *backref;
1045 list_for_each_entry(backref, &rec->backrefs, list) {
1046 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048 if (backref->dir != dir || backref->namelen != namelen)
1050 if (memcmp(name, backref->name, namelen))
1055 backref = malloc(sizeof(*backref) + namelen + 1);
1058 memset(backref, 0, sizeof(*backref));
1060 backref->namelen = namelen;
1061 memcpy(backref->name, name, namelen);
1062 backref->name[namelen] = '\0';
1063 list_add_tail(&backref->list, &rec->backrefs);
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068 u64 ino, u64 dir, u64 index,
1069 const char *name, int namelen,
1070 int filetype, int itemtype, int errors)
1072 struct inode_record *rec;
1073 struct inode_backref *backref;
1075 rec = get_inode_rec(inode_cache, ino, 1);
1076 BUG_ON(IS_ERR(rec));
1077 backref = get_inode_backref(rec, name, namelen, dir);
1080 backref->errors |= errors;
1081 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082 if (backref->found_dir_index)
1083 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084 if (backref->found_inode_ref && backref->index != index)
1085 backref->errors |= REF_ERR_INDEX_UNMATCH;
1086 if (backref->found_dir_item && backref->filetype != filetype)
1087 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089 backref->index = index;
1090 backref->filetype = filetype;
1091 backref->found_dir_index = 1;
1092 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094 if (backref->found_dir_item)
1095 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096 if (backref->found_dir_index && backref->filetype != filetype)
1097 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099 backref->filetype = filetype;
1100 backref->found_dir_item = 1;
1101 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103 if (backref->found_inode_ref)
1104 backref->errors |= REF_ERR_DUP_INODE_REF;
1105 if (backref->found_dir_index && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1108 backref->index = index;
1110 backref->ref_type = itemtype;
1111 backref->found_inode_ref = 1;
1116 maybe_free_inode_rec(inode_cache, rec);
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121 struct cache_tree *dst_cache)
1123 struct inode_backref *backref;
1128 list_for_each_entry(backref, &src->backrefs, list) {
1129 if (backref->found_dir_index) {
1130 add_inode_backref(dst_cache, dst->ino, backref->dir,
1131 backref->index, backref->name,
1132 backref->namelen, backref->filetype,
1133 BTRFS_DIR_INDEX_KEY, backref->errors);
1135 if (backref->found_dir_item) {
1137 add_inode_backref(dst_cache, dst->ino,
1138 backref->dir, 0, backref->name,
1139 backref->namelen, backref->filetype,
1140 BTRFS_DIR_ITEM_KEY, backref->errors);
1142 if (backref->found_inode_ref) {
1143 add_inode_backref(dst_cache, dst->ino,
1144 backref->dir, backref->index,
1145 backref->name, backref->namelen, 0,
1146 backref->ref_type, backref->errors);
1150 if (src->found_dir_item)
1151 dst->found_dir_item = 1;
1152 if (src->found_file_extent)
1153 dst->found_file_extent = 1;
1154 if (src->found_csum_item)
1155 dst->found_csum_item = 1;
1156 if (src->some_csum_missing)
1157 dst->some_csum_missing = 1;
1158 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1164 BUG_ON(src->found_link < dir_count);
1165 dst->found_link += src->found_link - dir_count;
1166 dst->found_size += src->found_size;
1167 if (src->extent_start != (u64)-1) {
1168 if (dst->extent_start == (u64)-1) {
1169 dst->extent_start = src->extent_start;
1170 dst->extent_end = src->extent_end;
1172 if (dst->extent_end > src->extent_start)
1173 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174 else if (dst->extent_end < src->extent_start) {
1175 ret = add_file_extent_hole(&dst->holes,
1177 src->extent_start - dst->extent_end);
1179 if (dst->extent_end < src->extent_end)
1180 dst->extent_end = src->extent_end;
1184 dst->errors |= src->errors;
1185 if (src->found_inode_item) {
1186 if (!dst->found_inode_item) {
1187 dst->nlink = src->nlink;
1188 dst->isize = src->isize;
1189 dst->nbytes = src->nbytes;
1190 dst->imode = src->imode;
1191 dst->nodatasum = src->nodatasum;
1192 dst->found_inode_item = 1;
1194 dst->errors |= I_ERR_DUP_INODE_ITEM;
1202 static int splice_shared_node(struct shared_node *src_node,
1203 struct shared_node *dst_node)
1205 struct cache_extent *cache;
1206 struct ptr_node *node, *ins;
1207 struct cache_tree *src, *dst;
1208 struct inode_record *rec, *conflict;
1209 u64 current_ino = 0;
1213 if (--src_node->refs == 0)
1215 if (src_node->current)
1216 current_ino = src_node->current->ino;
1218 src = &src_node->root_cache;
1219 dst = &dst_node->root_cache;
1221 cache = search_cache_extent(src, 0);
1223 node = container_of(cache, struct ptr_node, cache);
1225 cache = next_cache_extent(cache);
1228 remove_cache_extent(src, &node->cache);
1231 ins = malloc(sizeof(*ins));
1233 ins->cache.start = node->cache.start;
1234 ins->cache.size = node->cache.size;
1238 ret = insert_cache_extent(dst, &ins->cache);
1239 if (ret == -EEXIST) {
1240 conflict = get_inode_rec(dst, rec->ino, 1);
1241 BUG_ON(IS_ERR(conflict));
1242 merge_inode_recs(rec, conflict, dst);
1244 conflict->checked = 1;
1245 if (dst_node->current == conflict)
1246 dst_node->current = NULL;
1248 maybe_free_inode_rec(dst, conflict);
1249 free_inode_rec(rec);
1256 if (src == &src_node->root_cache) {
1257 src = &src_node->inode_cache;
1258 dst = &dst_node->inode_cache;
1262 if (current_ino > 0 && (!dst_node->current ||
1263 current_ino > dst_node->current->ino)) {
1264 if (dst_node->current) {
1265 dst_node->current->checked = 1;
1266 maybe_free_inode_rec(dst, dst_node->current);
1268 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269 BUG_ON(IS_ERR(dst_node->current));
1274 static void free_inode_ptr(struct cache_extent *cache)
1276 struct ptr_node *node;
1277 struct inode_record *rec;
1279 node = container_of(cache, struct ptr_node, cache);
1281 free_inode_rec(rec);
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1290 struct cache_extent *cache;
1291 struct shared_node *node;
1293 cache = lookup_cache_extent(shared, bytenr, 1);
1295 node = container_of(cache, struct shared_node, cache);
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1304 struct shared_node *node;
1306 node = calloc(1, sizeof(*node));
1309 node->cache.start = bytenr;
1310 node->cache.size = 1;
1311 cache_tree_init(&node->root_cache);
1312 cache_tree_init(&node->inode_cache);
1315 ret = insert_cache_extent(shared, &node->cache);
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321 struct walk_control *wc, int level)
1323 struct shared_node *node;
1324 struct shared_node *dest;
1327 if (level == wc->active_node)
1330 BUG_ON(wc->active_node <= level);
1331 node = find_shared_node(&wc->shared, bytenr);
1333 ret = add_shared_node(&wc->shared, bytenr, refs);
1335 node = find_shared_node(&wc->shared, bytenr);
1336 wc->nodes[level] = node;
1337 wc->active_node = level;
1341 if (wc->root_level == wc->active_node &&
1342 btrfs_root_refs(&root->root_item) == 0) {
1343 if (--node->refs == 0) {
1344 free_inode_recs_tree(&node->root_cache);
1345 free_inode_recs_tree(&node->inode_cache);
1346 remove_cache_extent(&wc->shared, &node->cache);
1352 dest = wc->nodes[wc->active_node];
1353 splice_shared_node(node, dest);
1354 if (node->refs == 0) {
1355 remove_cache_extent(&wc->shared, &node->cache);
1361 static int leave_shared_node(struct btrfs_root *root,
1362 struct walk_control *wc, int level)
1364 struct shared_node *node;
1365 struct shared_node *dest;
1368 if (level == wc->root_level)
1371 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1375 BUG_ON(i >= BTRFS_MAX_LEVEL);
1377 node = wc->nodes[wc->active_node];
1378 wc->nodes[wc->active_node] = NULL;
1379 wc->active_node = i;
1381 dest = wc->nodes[wc->active_node];
1382 if (wc->active_node < wc->root_level ||
1383 btrfs_root_refs(&root->root_item) > 0) {
1384 BUG_ON(node->refs <= 1);
1385 splice_shared_node(node, dest);
1387 BUG_ON(node->refs < 2);
1396 * 1 - if the root with id child_root_id is a child of root parent_root_id
1397 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1398 * has other root(s) as parent(s)
1399 * 2 - if the root child_root_id doesn't have any parent roots
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1404 struct btrfs_path path;
1405 struct btrfs_key key;
1406 struct extent_buffer *leaf;
1410 btrfs_init_path(&path);
1412 key.objectid = parent_root_id;
1413 key.type = BTRFS_ROOT_REF_KEY;
1414 key.offset = child_root_id;
1415 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1419 btrfs_release_path(&path);
1423 key.objectid = child_root_id;
1424 key.type = BTRFS_ROOT_BACKREF_KEY;
1426 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1432 leaf = path.nodes[0];
1433 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1437 leaf = path.nodes[0];
1440 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441 if (key.objectid != child_root_id ||
1442 key.type != BTRFS_ROOT_BACKREF_KEY)
1447 if (key.offset == parent_root_id) {
1448 btrfs_release_path(&path);
1455 btrfs_release_path(&path);
1458 return has_parent ? 0 : 2;
1461 static int process_dir_item(struct btrfs_root *root,
1462 struct extent_buffer *eb,
1463 int slot, struct btrfs_key *key,
1464 struct shared_node *active_node)
1474 struct btrfs_dir_item *di;
1475 struct inode_record *rec;
1476 struct cache_tree *root_cache;
1477 struct cache_tree *inode_cache;
1478 struct btrfs_key location;
1479 char namebuf[BTRFS_NAME_LEN];
1481 root_cache = &active_node->root_cache;
1482 inode_cache = &active_node->inode_cache;
1483 rec = active_node->current;
1484 rec->found_dir_item = 1;
1486 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487 total = btrfs_item_size_nr(eb, slot);
1488 while (cur < total) {
1490 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491 name_len = btrfs_dir_name_len(eb, di);
1492 data_len = btrfs_dir_data_len(eb, di);
1493 filetype = btrfs_dir_type(eb, di);
1495 rec->found_size += name_len;
1496 if (name_len <= BTRFS_NAME_LEN) {
1500 len = BTRFS_NAME_LEN;
1501 error = REF_ERR_NAME_TOO_LONG;
1503 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506 add_inode_backref(inode_cache, location.objectid,
1507 key->objectid, key->offset, namebuf,
1508 len, filetype, key->type, error);
1509 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510 add_inode_backref(root_cache, location.objectid,
1511 key->objectid, key->offset,
1512 namebuf, len, filetype,
1515 fprintf(stderr, "invalid location in dir item %u\n",
1517 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518 key->objectid, key->offset, namebuf,
1519 len, filetype, key->type, error);
1522 len = sizeof(*di) + name_len + data_len;
1523 di = (struct btrfs_dir_item *)((char *)di + len);
1526 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527 rec->errors |= I_ERR_DUP_DIR_INDEX;
1532 static int process_inode_ref(struct extent_buffer *eb,
1533 int slot, struct btrfs_key *key,
1534 struct shared_node *active_node)
1542 struct cache_tree *inode_cache;
1543 struct btrfs_inode_ref *ref;
1544 char namebuf[BTRFS_NAME_LEN];
1546 inode_cache = &active_node->inode_cache;
1548 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549 total = btrfs_item_size_nr(eb, slot);
1550 while (cur < total) {
1551 name_len = btrfs_inode_ref_name_len(eb, ref);
1552 index = btrfs_inode_ref_index(eb, ref);
1553 if (name_len <= BTRFS_NAME_LEN) {
1557 len = BTRFS_NAME_LEN;
1558 error = REF_ERR_NAME_TOO_LONG;
1560 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561 add_inode_backref(inode_cache, key->objectid, key->offset,
1562 index, namebuf, len, 0, key->type, error);
1564 len = sizeof(*ref) + name_len;
1565 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1571 static int process_inode_extref(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1582 struct cache_tree *inode_cache;
1583 struct btrfs_inode_extref *extref;
1584 char namebuf[BTRFS_NAME_LEN];
1586 inode_cache = &active_node->inode_cache;
1588 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589 total = btrfs_item_size_nr(eb, slot);
1590 while (cur < total) {
1591 name_len = btrfs_inode_extref_name_len(eb, extref);
1592 index = btrfs_inode_extref_index(eb, extref);
1593 parent = btrfs_inode_extref_parent(eb, extref);
1594 if (name_len <= BTRFS_NAME_LEN) {
1598 len = BTRFS_NAME_LEN;
1599 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf,
1602 (unsigned long)(extref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, parent,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*extref) + name_len;
1607 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615 u64 len, u64 *found)
1617 struct btrfs_key key;
1618 struct btrfs_path path;
1619 struct extent_buffer *leaf;
1624 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626 btrfs_init_path(&path);
1628 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630 key.type = BTRFS_EXTENT_CSUM_KEY;
1632 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1636 if (ret > 0 && path.slots[0] > 0) {
1637 leaf = path.nodes[0];
1638 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640 key.type == BTRFS_EXTENT_CSUM_KEY)
1645 leaf = path.nodes[0];
1646 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1652 leaf = path.nodes[0];
1655 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657 key.type != BTRFS_EXTENT_CSUM_KEY)
1660 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661 if (key.offset >= start + len)
1664 if (key.offset > start)
1667 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669 if (csum_end > start) {
1670 size = min(csum_end - start, len);
1679 btrfs_release_path(&path);
1685 static int process_file_extent(struct btrfs_root *root,
1686 struct extent_buffer *eb,
1687 int slot, struct btrfs_key *key,
1688 struct shared_node *active_node)
1690 struct inode_record *rec;
1691 struct btrfs_file_extent_item *fi;
1693 u64 disk_bytenr = 0;
1694 u64 extent_offset = 0;
1695 u64 mask = root->sectorsize - 1;
1699 rec = active_node->current;
1700 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701 rec->found_file_extent = 1;
1703 if (rec->extent_start == (u64)-1) {
1704 rec->extent_start = key->offset;
1705 rec->extent_end = key->offset;
1708 if (rec->extent_end > key->offset)
1709 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710 else if (rec->extent_end < key->offset) {
1711 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712 key->offset - rec->extent_end);
1717 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718 extent_type = btrfs_file_extent_type(eb, fi);
1720 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724 rec->found_size += num_bytes;
1725 num_bytes = (num_bytes + mask) & ~mask;
1726 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730 extent_offset = btrfs_file_extent_offset(eb, fi);
1731 if (num_bytes == 0 || (num_bytes & mask))
1732 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733 if (num_bytes + extent_offset >
1734 btrfs_file_extent_ram_bytes(eb, fi))
1735 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737 (btrfs_file_extent_compression(eb, fi) ||
1738 btrfs_file_extent_encryption(eb, fi) ||
1739 btrfs_file_extent_other_encoding(eb, fi)))
1740 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741 if (disk_bytenr > 0)
1742 rec->found_size += num_bytes;
1744 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746 rec->extent_end = key->offset + num_bytes;
1749 * The data reloc tree will copy full extents into its inode and then
1750 * copy the corresponding csums. Because the extent it copied could be
1751 * a preallocated extent that hasn't been written to yet there may be no
1752 * csums to copy, ergo we won't have csums for our file extent. This is
1753 * ok so just don't bother checking csums if the inode belongs to the
1756 if (disk_bytenr > 0 &&
1757 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759 if (btrfs_file_extent_compression(eb, fi))
1760 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762 disk_bytenr += extent_offset;
1764 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1767 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769 rec->found_csum_item = 1;
1770 if (found < num_bytes)
1771 rec->some_csum_missing = 1;
1772 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781 struct walk_control *wc)
1783 struct btrfs_key key;
1787 struct cache_tree *inode_cache;
1788 struct shared_node *active_node;
1790 if (wc->root_level == wc->active_node &&
1791 btrfs_root_refs(&root->root_item) == 0)
1794 active_node = wc->nodes[wc->active_node];
1795 inode_cache = &active_node->inode_cache;
1796 nritems = btrfs_header_nritems(eb);
1797 for (i = 0; i < nritems; i++) {
1798 btrfs_item_key_to_cpu(eb, &key, i);
1800 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1805 if (active_node->current == NULL ||
1806 active_node->current->ino < key.objectid) {
1807 if (active_node->current) {
1808 active_node->current->checked = 1;
1809 maybe_free_inode_rec(inode_cache,
1810 active_node->current);
1812 active_node->current = get_inode_rec(inode_cache,
1814 BUG_ON(IS_ERR(active_node->current));
1817 case BTRFS_DIR_ITEM_KEY:
1818 case BTRFS_DIR_INDEX_KEY:
1819 ret = process_dir_item(root, eb, i, &key, active_node);
1821 case BTRFS_INODE_REF_KEY:
1822 ret = process_inode_ref(eb, i, &key, active_node);
1824 case BTRFS_INODE_EXTREF_KEY:
1825 ret = process_inode_extref(eb, i, &key, active_node);
1827 case BTRFS_INODE_ITEM_KEY:
1828 ret = process_inode_item(eb, i, &key, active_node);
1830 case BTRFS_EXTENT_DATA_KEY:
1831 ret = process_file_extent(root, eb, i, &key,
1841 static void reada_walk_down(struct btrfs_root *root,
1842 struct extent_buffer *node, int slot)
1851 level = btrfs_header_level(node);
1855 nritems = btrfs_header_nritems(node);
1856 blocksize = root->nodesize;
1857 for (i = slot; i < nritems; i++) {
1858 bytenr = btrfs_node_blockptr(node, i);
1859 ptr_gen = btrfs_node_ptr_generation(node, i);
1860 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1865 * Check the child node/leaf by the following condition:
1866 * 1. the first item key of the node/leaf should be the same with the one
1868 * 2. block in parent node should match the child node/leaf.
1869 * 3. generation of parent node and child's header should be consistent.
1871 * Or the child node/leaf pointed by the key in parent is not valid.
1873 * We hope to check leaf owner too, but since subvol may share leaves,
1874 * which makes leaf owner check not so strong, key check should be
1875 * sufficient enough for that case.
1877 static int check_child_node(struct btrfs_root *root,
1878 struct extent_buffer *parent, int slot,
1879 struct extent_buffer *child)
1881 struct btrfs_key parent_key;
1882 struct btrfs_key child_key;
1885 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886 if (btrfs_header_level(child) == 0)
1887 btrfs_item_key_to_cpu(child, &child_key, 0);
1889 btrfs_node_key_to_cpu(child, &child_key, 0);
1891 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1894 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895 parent_key.objectid, parent_key.type, parent_key.offset,
1896 child_key.objectid, child_key.type, child_key.offset);
1898 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901 btrfs_node_blockptr(parent, slot),
1902 btrfs_header_bytenr(child));
1904 if (btrfs_node_ptr_generation(parent, slot) !=
1905 btrfs_header_generation(child)) {
1907 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908 btrfs_header_generation(child),
1909 btrfs_node_ptr_generation(parent, slot));
1915 u64 bytenr[BTRFS_MAX_LEVEL];
1916 u64 refs[BTRFS_MAX_LEVEL];
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920 struct walk_control *wc, int *level,
1921 struct node_refs *nrefs)
1923 enum btrfs_tree_block_status status;
1926 struct extent_buffer *next;
1927 struct extent_buffer *cur;
1932 WARN_ON(*level < 0);
1933 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936 refs = nrefs->refs[*level];
1939 ret = btrfs_lookup_extent_info(NULL, root,
1940 path->nodes[*level]->start,
1941 *level, 1, &refs, NULL);
1946 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947 nrefs->refs[*level] = refs;
1951 ret = enter_shared_node(root, path->nodes[*level]->start,
1959 while (*level >= 0) {
1960 WARN_ON(*level < 0);
1961 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962 cur = path->nodes[*level];
1964 if (btrfs_header_level(cur) != *level)
1967 if (path->slots[*level] >= btrfs_header_nritems(cur))
1970 ret = process_one_leaf(root, cur, wc);
1975 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977 blocksize = root->nodesize;
1979 if (bytenr == nrefs->bytenr[*level - 1]) {
1980 refs = nrefs->refs[*level - 1];
1982 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983 *level - 1, 1, &refs, NULL);
1987 nrefs->bytenr[*level - 1] = bytenr;
1988 nrefs->refs[*level - 1] = refs;
1993 ret = enter_shared_node(root, bytenr, refs,
1996 path->slots[*level]++;
2001 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003 free_extent_buffer(next);
2004 reada_walk_down(root, cur, path->slots[*level]);
2005 next = read_tree_block(root, bytenr, blocksize,
2007 if (!extent_buffer_uptodate(next)) {
2008 struct btrfs_key node_key;
2010 btrfs_node_key_to_cpu(path->nodes[*level],
2012 path->slots[*level]);
2013 btrfs_add_corrupt_extent_record(root->fs_info,
2015 path->nodes[*level]->start,
2016 root->nodesize, *level);
2022 ret = check_child_node(root, cur, path->slots[*level], next);
2028 if (btrfs_is_leaf(next))
2029 status = btrfs_check_leaf(root, NULL, next);
2031 status = btrfs_check_node(root, NULL, next);
2032 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033 free_extent_buffer(next);
2038 *level = *level - 1;
2039 free_extent_buffer(path->nodes[*level]);
2040 path->nodes[*level] = next;
2041 path->slots[*level] = 0;
2044 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049 struct walk_control *wc, int *level)
2052 struct extent_buffer *leaf;
2054 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055 leaf = path->nodes[i];
2056 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2061 free_extent_buffer(path->nodes[*level]);
2062 path->nodes[*level] = NULL;
2063 BUG_ON(*level > wc->active_node);
2064 if (*level == wc->active_node)
2065 leave_shared_node(root, wc, *level);
2072 static int check_root_dir(struct inode_record *rec)
2074 struct inode_backref *backref;
2077 if (!rec->found_inode_item || rec->errors)
2079 if (rec->nlink != 1 || rec->found_link != 0)
2081 if (list_empty(&rec->backrefs))
2083 backref = to_inode_backref(rec->backrefs.next);
2084 if (!backref->found_inode_ref)
2086 if (backref->index != 0 || backref->namelen != 2 ||
2087 memcmp(backref->name, "..", 2))
2089 if (backref->found_dir_index || backref->found_dir_item)
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097 struct btrfs_root *root, struct btrfs_path *path,
2098 struct inode_record *rec)
2100 struct btrfs_inode_item *ei;
2101 struct btrfs_key key;
2104 key.objectid = rec->ino;
2105 key.type = BTRFS_INODE_ITEM_KEY;
2106 key.offset = (u64)-1;
2108 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2112 if (!path->slots[0]) {
2119 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120 if (key.objectid != rec->ino) {
2125 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126 struct btrfs_inode_item);
2127 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128 btrfs_mark_buffer_dirty(path->nodes[0]);
2129 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131 root->root_key.objectid);
2133 btrfs_release_path(path);
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 struct btrfs_path *path,
2140 struct inode_record *rec)
2144 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145 btrfs_release_path(path);
2147 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152 struct btrfs_root *root,
2153 struct btrfs_path *path,
2154 struct inode_record *rec)
2156 struct btrfs_inode_item *ei;
2157 struct btrfs_key key;
2160 key.objectid = rec->ino;
2161 key.type = BTRFS_INODE_ITEM_KEY;
2164 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2171 /* Since ret == 0, no need to check anything */
2172 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173 struct btrfs_inode_item);
2174 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175 btrfs_mark_buffer_dirty(path->nodes[0]);
2176 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177 printf("reset nbytes for ino %llu root %llu\n",
2178 rec->ino, root->root_key.objectid);
2180 btrfs_release_path(path);
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185 struct cache_tree *inode_cache,
2186 struct inode_record *rec,
2187 struct inode_backref *backref)
2189 struct btrfs_path *path;
2190 struct btrfs_trans_handle *trans;
2191 struct btrfs_dir_item *dir_item;
2192 struct extent_buffer *leaf;
2193 struct btrfs_key key;
2194 struct btrfs_disk_key disk_key;
2195 struct inode_record *dir_rec;
2196 unsigned long name_ptr;
2197 u32 data_size = sizeof(*dir_item) + backref->namelen;
2200 path = btrfs_alloc_path();
2204 trans = btrfs_start_transaction(root, 1);
2205 if (IS_ERR(trans)) {
2206 btrfs_free_path(path);
2207 return PTR_ERR(trans);
2210 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2211 (unsigned long long)rec->ino);
2212 key.objectid = backref->dir;
2213 key.type = BTRFS_DIR_INDEX_KEY;
2214 key.offset = backref->index;
2216 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2219 leaf = path->nodes[0];
2220 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2222 disk_key.objectid = cpu_to_le64(rec->ino);
2223 disk_key.type = BTRFS_INODE_ITEM_KEY;
2224 disk_key.offset = 0;
2226 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2227 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2228 btrfs_set_dir_data_len(leaf, dir_item, 0);
2229 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2230 name_ptr = (unsigned long)(dir_item + 1);
2231 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2232 btrfs_mark_buffer_dirty(leaf);
2233 btrfs_free_path(path);
2234 btrfs_commit_transaction(trans, root);
2236 backref->found_dir_index = 1;
2237 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2238 BUG_ON(IS_ERR(dir_rec));
2241 dir_rec->found_size += backref->namelen;
2242 if (dir_rec->found_size == dir_rec->isize &&
2243 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2244 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2245 if (dir_rec->found_size != dir_rec->isize)
2246 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2251 static int delete_dir_index(struct btrfs_root *root,
2252 struct cache_tree *inode_cache,
2253 struct inode_record *rec,
2254 struct inode_backref *backref)
2256 struct btrfs_trans_handle *trans;
2257 struct btrfs_dir_item *di;
2258 struct btrfs_path *path;
2261 path = btrfs_alloc_path();
2265 trans = btrfs_start_transaction(root, 1);
2266 if (IS_ERR(trans)) {
2267 btrfs_free_path(path);
2268 return PTR_ERR(trans);
2272 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2273 (unsigned long long)backref->dir,
2274 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2275 (unsigned long long)root->objectid);
2277 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2278 backref->name, backref->namelen,
2279 backref->index, -1);
2282 btrfs_free_path(path);
2283 btrfs_commit_transaction(trans, root);
2290 ret = btrfs_del_item(trans, root, path);
2292 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2294 btrfs_free_path(path);
2295 btrfs_commit_transaction(trans, root);
2299 static int create_inode_item(struct btrfs_root *root,
2300 struct inode_record *rec,
2301 struct inode_backref *backref, int root_dir)
2303 struct btrfs_trans_handle *trans;
2304 struct btrfs_inode_item inode_item;
2305 time_t now = time(NULL);
2308 trans = btrfs_start_transaction(root, 1);
2309 if (IS_ERR(trans)) {
2310 ret = PTR_ERR(trans);
2314 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2315 "be incomplete, please check permissions and content after "
2316 "the fsck completes.\n", (unsigned long long)root->objectid,
2317 (unsigned long long)rec->ino);
2319 memset(&inode_item, 0, sizeof(inode_item));
2320 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2322 btrfs_set_stack_inode_nlink(&inode_item, 1);
2324 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2325 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2326 if (rec->found_dir_item) {
2327 if (rec->found_file_extent)
2328 fprintf(stderr, "root %llu inode %llu has both a dir "
2329 "item and extents, unsure if it is a dir or a "
2330 "regular file so setting it as a directory\n",
2331 (unsigned long long)root->objectid,
2332 (unsigned long long)rec->ino);
2333 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2334 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2335 } else if (!rec->found_dir_item) {
2336 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2337 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2339 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2340 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2341 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2342 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2343 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2344 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2345 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2346 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2348 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2350 btrfs_commit_transaction(trans, root);
2354 static int repair_inode_backrefs(struct btrfs_root *root,
2355 struct inode_record *rec,
2356 struct cache_tree *inode_cache,
2359 struct inode_backref *tmp, *backref;
2360 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2364 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2365 if (!delete && rec->ino == root_dirid) {
2366 if (!rec->found_inode_item) {
2367 ret = create_inode_item(root, rec, backref, 1);
2374 /* Index 0 for root dir's are special, don't mess with it */
2375 if (rec->ino == root_dirid && backref->index == 0)
2379 ((backref->found_dir_index && !backref->found_inode_ref) ||
2380 (backref->found_dir_index && backref->found_inode_ref &&
2381 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2382 ret = delete_dir_index(root, inode_cache, rec, backref);
2386 list_del(&backref->list);
2390 if (!delete && !backref->found_dir_index &&
2391 backref->found_dir_item && backref->found_inode_ref) {
2392 ret = add_missing_dir_index(root, inode_cache, rec,
2397 if (backref->found_dir_item &&
2398 backref->found_dir_index &&
2399 backref->found_dir_index) {
2400 if (!backref->errors &&
2401 backref->found_inode_ref) {
2402 list_del(&backref->list);
2408 if (!delete && (!backref->found_dir_index &&
2409 !backref->found_dir_item &&
2410 backref->found_inode_ref)) {
2411 struct btrfs_trans_handle *trans;
2412 struct btrfs_key location;
2414 ret = check_dir_conflict(root, backref->name,
2420 * let nlink fixing routine to handle it,
2421 * which can do it better.
2426 location.objectid = rec->ino;
2427 location.type = BTRFS_INODE_ITEM_KEY;
2428 location.offset = 0;
2430 trans = btrfs_start_transaction(root, 1);
2431 if (IS_ERR(trans)) {
2432 ret = PTR_ERR(trans);
2435 fprintf(stderr, "adding missing dir index/item pair "
2437 (unsigned long long)rec->ino);
2438 ret = btrfs_insert_dir_item(trans, root, backref->name,
2440 backref->dir, &location,
2441 imode_to_type(rec->imode),
2444 btrfs_commit_transaction(trans, root);
2448 if (!delete && (backref->found_inode_ref &&
2449 backref->found_dir_index &&
2450 backref->found_dir_item &&
2451 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2452 !rec->found_inode_item)) {
2453 ret = create_inode_item(root, rec, backref, 0);
2460 return ret ? ret : repaired;
2464 * To determine the file type for nlink/inode_item repair
2466 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2467 * Return -ENOENT if file type is not found.
2469 static int find_file_type(struct inode_record *rec, u8 *type)
2471 struct inode_backref *backref;
2473 /* For inode item recovered case */
2474 if (rec->found_inode_item) {
2475 *type = imode_to_type(rec->imode);
2479 list_for_each_entry(backref, &rec->backrefs, list) {
2480 if (backref->found_dir_index || backref->found_dir_item) {
2481 *type = backref->filetype;
2489 * To determine the file name for nlink repair
2491 * Return 0 if file name is found, set name and namelen.
2492 * Return -ENOENT if file name is not found.
2494 static int find_file_name(struct inode_record *rec,
2495 char *name, int *namelen)
2497 struct inode_backref *backref;
2499 list_for_each_entry(backref, &rec->backrefs, list) {
2500 if (backref->found_dir_index || backref->found_dir_item ||
2501 backref->found_inode_ref) {
2502 memcpy(name, backref->name, backref->namelen);
2503 *namelen = backref->namelen;
2510 /* Reset the nlink of the inode to the correct one */
2511 static int reset_nlink(struct btrfs_trans_handle *trans,
2512 struct btrfs_root *root,
2513 struct btrfs_path *path,
2514 struct inode_record *rec)
2516 struct inode_backref *backref;
2517 struct inode_backref *tmp;
2518 struct btrfs_key key;
2519 struct btrfs_inode_item *inode_item;
2522 /* We don't believe this either, reset it and iterate backref */
2523 rec->found_link = 0;
2525 /* Remove all backref including the valid ones */
2526 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2527 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2528 backref->index, backref->name,
2529 backref->namelen, 0);
2533 /* remove invalid backref, so it won't be added back */
2534 if (!(backref->found_dir_index &&
2535 backref->found_dir_item &&
2536 backref->found_inode_ref)) {
2537 list_del(&backref->list);
2544 /* Set nlink to 0 */
2545 key.objectid = rec->ino;
2546 key.type = BTRFS_INODE_ITEM_KEY;
2548 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2555 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2556 struct btrfs_inode_item);
2557 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2558 btrfs_mark_buffer_dirty(path->nodes[0]);
2559 btrfs_release_path(path);
2562 * Add back valid inode_ref/dir_item/dir_index,
2563 * add_link() will handle the nlink inc, so new nlink must be correct
2565 list_for_each_entry(backref, &rec->backrefs, list) {
2566 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2567 backref->name, backref->namelen,
2568 backref->filetype, &backref->index, 1);
2573 btrfs_release_path(path);
2577 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2578 struct btrfs_root *root,
2579 struct btrfs_path *path,
2580 struct inode_record *rec)
2582 char *dir_name = "lost+found";
2583 char namebuf[BTRFS_NAME_LEN] = {0};
2588 int name_recovered = 0;
2589 int type_recovered = 0;
2593 * Get file name and type first before these invalid inode ref
2594 * are deleted by remove_all_invalid_backref()
2596 name_recovered = !find_file_name(rec, namebuf, &namelen);
2597 type_recovered = !find_file_type(rec, &type);
2599 if (!name_recovered) {
2600 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2601 rec->ino, rec->ino);
2602 namelen = count_digits(rec->ino);
2603 sprintf(namebuf, "%llu", rec->ino);
2606 if (!type_recovered) {
2607 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2609 type = BTRFS_FT_REG_FILE;
2613 ret = reset_nlink(trans, root, path, rec);
2616 "Failed to reset nlink for inode %llu: %s\n",
2617 rec->ino, strerror(-ret));
2621 if (rec->found_link == 0) {
2622 lost_found_ino = root->highest_inode;
2623 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2628 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2629 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2632 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2633 dir_name, strerror(-ret));
2636 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2637 namebuf, namelen, type, NULL, 1);
2639 * Add ".INO" suffix several times to handle case where
2640 * "FILENAME.INO" is already taken by another file.
2642 while (ret == -EEXIST) {
2644 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2646 if (namelen + count_digits(rec->ino) + 1 >
2651 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2653 namelen += count_digits(rec->ino) + 1;
2654 ret = btrfs_add_link(trans, root, rec->ino,
2655 lost_found_ino, namebuf,
2656 namelen, type, NULL, 1);
2660 "Failed to link the inode %llu to %s dir: %s\n",
2661 rec->ino, dir_name, strerror(-ret));
2665 * Just increase the found_link, don't actually add the
2666 * backref. This will make things easier and this inode
2667 * record will be freed after the repair is done.
2668 * So fsck will not report problem about this inode.
2671 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2672 namelen, namebuf, dir_name);
2674 printf("Fixed the nlink of inode %llu\n", rec->ino);
2677 * Clear the flag anyway, or we will loop forever for the same inode
2678 * as it will not be removed from the bad inode list and the dead loop
2681 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2682 btrfs_release_path(path);
2687 * Check if there is any normal(reg or prealloc) file extent for given
2689 * This is used to determine the file type when neither its dir_index/item or
2690 * inode_item exists.
2692 * This will *NOT* report error, if any error happens, just consider it does
2693 * not have any normal file extent.
2695 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2697 struct btrfs_path *path;
2698 struct btrfs_key key;
2699 struct btrfs_key found_key;
2700 struct btrfs_file_extent_item *fi;
2704 path = btrfs_alloc_path();
2708 key.type = BTRFS_EXTENT_DATA_KEY;
2711 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2716 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2717 ret = btrfs_next_leaf(root, path);
2724 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2726 if (found_key.objectid != ino ||
2727 found_key.type != BTRFS_EXTENT_DATA_KEY)
2729 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2730 struct btrfs_file_extent_item);
2731 type = btrfs_file_extent_type(path->nodes[0], fi);
2732 if (type != BTRFS_FILE_EXTENT_INLINE) {
2738 btrfs_free_path(path);
2742 static u32 btrfs_type_to_imode(u8 type)
2744 static u32 imode_by_btrfs_type[] = {
2745 [BTRFS_FT_REG_FILE] = S_IFREG,
2746 [BTRFS_FT_DIR] = S_IFDIR,
2747 [BTRFS_FT_CHRDEV] = S_IFCHR,
2748 [BTRFS_FT_BLKDEV] = S_IFBLK,
2749 [BTRFS_FT_FIFO] = S_IFIFO,
2750 [BTRFS_FT_SOCK] = S_IFSOCK,
2751 [BTRFS_FT_SYMLINK] = S_IFLNK,
2754 return imode_by_btrfs_type[(type)];
2757 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2758 struct btrfs_root *root,
2759 struct btrfs_path *path,
2760 struct inode_record *rec)
2764 int type_recovered = 0;
2767 printf("Trying to rebuild inode:%llu\n", rec->ino);
2769 type_recovered = !find_file_type(rec, &filetype);
2772 * Try to determine inode type if type not found.
2774 * For found regular file extent, it must be FILE.
2775 * For found dir_item/index, it must be DIR.
2777 * For undetermined one, use FILE as fallback.
2780 * 1. If found backref(inode_index/item is already handled) to it,
2782 * Need new inode-inode ref structure to allow search for that.
2784 if (!type_recovered) {
2785 if (rec->found_file_extent &&
2786 find_normal_file_extent(root, rec->ino)) {
2788 filetype = BTRFS_FT_REG_FILE;
2789 } else if (rec->found_dir_item) {
2791 filetype = BTRFS_FT_DIR;
2792 } else if (!list_empty(&rec->orphan_extents)) {
2794 filetype = BTRFS_FT_REG_FILE;
2796 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2799 filetype = BTRFS_FT_REG_FILE;
2803 ret = btrfs_new_inode(trans, root, rec->ino,
2804 mode | btrfs_type_to_imode(filetype));
2809 * Here inode rebuild is done, we only rebuild the inode item,
2810 * don't repair the nlink(like move to lost+found).
2811 * That is the job of nlink repair.
2813 * We just fill the record and return
2815 rec->found_dir_item = 1;
2816 rec->imode = mode | btrfs_type_to_imode(filetype);
2818 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2819 /* Ensure the inode_nlinks repair function will be called */
2820 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2825 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2826 struct btrfs_root *root,
2827 struct btrfs_path *path,
2828 struct inode_record *rec)
2830 struct orphan_data_extent *orphan;
2831 struct orphan_data_extent *tmp;
2834 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2836 * Check for conflicting file extents
2838 * Here we don't know whether the extents is compressed or not,
2839 * so we can only assume it not compressed nor data offset,
2840 * and use its disk_len as extent length.
2842 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2843 orphan->offset, orphan->disk_len, 0);
2844 btrfs_release_path(path);
2849 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2850 orphan->disk_bytenr, orphan->disk_len);
2851 ret = btrfs_free_extent(trans,
2852 root->fs_info->extent_root,
2853 orphan->disk_bytenr, orphan->disk_len,
2854 0, root->objectid, orphan->objectid,
2859 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2860 orphan->offset, orphan->disk_bytenr,
2861 orphan->disk_len, orphan->disk_len);
2865 /* Update file size info */
2866 rec->found_size += orphan->disk_len;
2867 if (rec->found_size == rec->nbytes)
2868 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2870 /* Update the file extent hole info too */
2871 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2875 if (RB_EMPTY_ROOT(&rec->holes))
2876 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2878 list_del(&orphan->list);
2881 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2886 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2887 struct btrfs_root *root,
2888 struct btrfs_path *path,
2889 struct inode_record *rec)
2891 struct rb_node *node;
2892 struct file_extent_hole *hole;
2896 node = rb_first(&rec->holes);
2900 hole = rb_entry(node, struct file_extent_hole, node);
2901 ret = btrfs_punch_hole(trans, root, rec->ino,
2902 hole->start, hole->len);
2905 ret = del_file_extent_hole(&rec->holes, hole->start,
2909 if (RB_EMPTY_ROOT(&rec->holes))
2910 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2911 node = rb_first(&rec->holes);
2913 /* special case for a file losing all its file extent */
2915 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2916 round_up(rec->isize, root->sectorsize));
2920 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2921 rec->ino, root->objectid);
2926 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2928 struct btrfs_trans_handle *trans;
2929 struct btrfs_path *path;
2932 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2933 I_ERR_NO_ORPHAN_ITEM |
2934 I_ERR_LINK_COUNT_WRONG |
2935 I_ERR_NO_INODE_ITEM |
2936 I_ERR_FILE_EXTENT_ORPHAN |
2937 I_ERR_FILE_EXTENT_DISCOUNT|
2938 I_ERR_FILE_NBYTES_WRONG)))
2941 path = btrfs_alloc_path();
2946 * For nlink repair, it may create a dir and add link, so
2947 * 2 for parent(256)'s dir_index and dir_item
2948 * 2 for lost+found dir's inode_item and inode_ref
2949 * 1 for the new inode_ref of the file
2950 * 2 for lost+found dir's dir_index and dir_item for the file
2952 trans = btrfs_start_transaction(root, 7);
2953 if (IS_ERR(trans)) {
2954 btrfs_free_path(path);
2955 return PTR_ERR(trans);
2958 if (rec->errors & I_ERR_NO_INODE_ITEM)
2959 ret = repair_inode_no_item(trans, root, path, rec);
2960 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2961 ret = repair_inode_orphan_extent(trans, root, path, rec);
2962 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2963 ret = repair_inode_discount_extent(trans, root, path, rec);
2964 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2965 ret = repair_inode_isize(trans, root, path, rec);
2966 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2967 ret = repair_inode_orphan_item(trans, root, path, rec);
2968 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2969 ret = repair_inode_nlinks(trans, root, path, rec);
2970 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2971 ret = repair_inode_nbytes(trans, root, path, rec);
2972 btrfs_commit_transaction(trans, root);
2973 btrfs_free_path(path);
2977 static int check_inode_recs(struct btrfs_root *root,
2978 struct cache_tree *inode_cache)
2980 struct cache_extent *cache;
2981 struct ptr_node *node;
2982 struct inode_record *rec;
2983 struct inode_backref *backref;
2988 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2990 if (btrfs_root_refs(&root->root_item) == 0) {
2991 if (!cache_tree_empty(inode_cache))
2992 fprintf(stderr, "warning line %d\n", __LINE__);
2997 * We need to record the highest inode number for later 'lost+found'
2999 * We must select an ino not used/referred by any existing inode, or
3000 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3001 * this may cause 'lost+found' dir has wrong nlinks.
3003 cache = last_cache_extent(inode_cache);
3005 node = container_of(cache, struct ptr_node, cache);
3007 if (rec->ino > root->highest_inode)
3008 root->highest_inode = rec->ino;
3012 * We need to repair backrefs first because we could change some of the
3013 * errors in the inode recs.
3015 * We also need to go through and delete invalid backrefs first and then
3016 * add the correct ones second. We do this because we may get EEXIST
3017 * when adding back the correct index because we hadn't yet deleted the
3020 * For example, if we were missing a dir index then the directories
3021 * isize would be wrong, so if we fixed the isize to what we thought it
3022 * would be and then fixed the backref we'd still have a invalid fs, so
3023 * we need to add back the dir index and then check to see if the isize
3028 if (stage == 3 && !err)
3031 cache = search_cache_extent(inode_cache, 0);
3032 while (repair && cache) {
3033 node = container_of(cache, struct ptr_node, cache);
3035 cache = next_cache_extent(cache);
3037 /* Need to free everything up and rescan */
3039 remove_cache_extent(inode_cache, &node->cache);
3041 free_inode_rec(rec);
3045 if (list_empty(&rec->backrefs))
3048 ret = repair_inode_backrefs(root, rec, inode_cache,
3062 rec = get_inode_rec(inode_cache, root_dirid, 0);
3063 BUG_ON(IS_ERR(rec));
3065 ret = check_root_dir(rec);
3067 fprintf(stderr, "root %llu root dir %llu error\n",
3068 (unsigned long long)root->root_key.objectid,
3069 (unsigned long long)root_dirid);
3070 print_inode_error(root, rec);
3075 struct btrfs_trans_handle *trans;
3077 trans = btrfs_start_transaction(root, 1);
3078 if (IS_ERR(trans)) {
3079 err = PTR_ERR(trans);
3084 "root %llu missing its root dir, recreating\n",
3085 (unsigned long long)root->objectid);
3087 ret = btrfs_make_root_dir(trans, root, root_dirid);
3090 btrfs_commit_transaction(trans, root);
3094 fprintf(stderr, "root %llu root dir %llu not found\n",
3095 (unsigned long long)root->root_key.objectid,
3096 (unsigned long long)root_dirid);
3100 cache = search_cache_extent(inode_cache, 0);
3103 node = container_of(cache, struct ptr_node, cache);
3105 remove_cache_extent(inode_cache, &node->cache);
3107 if (rec->ino == root_dirid ||
3108 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3109 free_inode_rec(rec);
3113 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3114 ret = check_orphan_item(root, rec->ino);
3116 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3117 if (can_free_inode_rec(rec)) {
3118 free_inode_rec(rec);
3123 if (!rec->found_inode_item)
3124 rec->errors |= I_ERR_NO_INODE_ITEM;
3125 if (rec->found_link != rec->nlink)
3126 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3128 ret = try_repair_inode(root, rec);
3129 if (ret == 0 && can_free_inode_rec(rec)) {
3130 free_inode_rec(rec);
3136 if (!(repair && ret == 0))
3138 print_inode_error(root, rec);
3139 list_for_each_entry(backref, &rec->backrefs, list) {
3140 if (!backref->found_dir_item)
3141 backref->errors |= REF_ERR_NO_DIR_ITEM;
3142 if (!backref->found_dir_index)
3143 backref->errors |= REF_ERR_NO_DIR_INDEX;
3144 if (!backref->found_inode_ref)
3145 backref->errors |= REF_ERR_NO_INODE_REF;
3146 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3147 " namelen %u name %s filetype %d errors %x",
3148 (unsigned long long)backref->dir,
3149 (unsigned long long)backref->index,
3150 backref->namelen, backref->name,
3151 backref->filetype, backref->errors);
3152 print_ref_error(backref->errors);
3154 free_inode_rec(rec);
3156 return (error > 0) ? -1 : 0;
3159 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3162 struct cache_extent *cache;
3163 struct root_record *rec = NULL;
3166 cache = lookup_cache_extent(root_cache, objectid, 1);
3168 rec = container_of(cache, struct root_record, cache);
3170 rec = calloc(1, sizeof(*rec));
3172 return ERR_PTR(-ENOMEM);
3173 rec->objectid = objectid;
3174 INIT_LIST_HEAD(&rec->backrefs);
3175 rec->cache.start = objectid;
3176 rec->cache.size = 1;
3178 ret = insert_cache_extent(root_cache, &rec->cache);
3180 return ERR_PTR(-EEXIST);
3185 static struct root_backref *get_root_backref(struct root_record *rec,
3186 u64 ref_root, u64 dir, u64 index,
3187 const char *name, int namelen)
3189 struct root_backref *backref;
3191 list_for_each_entry(backref, &rec->backrefs, list) {
3192 if (backref->ref_root != ref_root || backref->dir != dir ||
3193 backref->namelen != namelen)
3195 if (memcmp(name, backref->name, namelen))
3200 backref = calloc(1, sizeof(*backref) + namelen + 1);
3203 backref->ref_root = ref_root;
3205 backref->index = index;
3206 backref->namelen = namelen;
3207 memcpy(backref->name, name, namelen);
3208 backref->name[namelen] = '\0';
3209 list_add_tail(&backref->list, &rec->backrefs);
3213 static void free_root_record(struct cache_extent *cache)
3215 struct root_record *rec;
3216 struct root_backref *backref;
3218 rec = container_of(cache, struct root_record, cache);
3219 while (!list_empty(&rec->backrefs)) {
3220 backref = to_root_backref(rec->backrefs.next);
3221 list_del(&backref->list);
3228 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3230 static int add_root_backref(struct cache_tree *root_cache,
3231 u64 root_id, u64 ref_root, u64 dir, u64 index,
3232 const char *name, int namelen,
3233 int item_type, int errors)
3235 struct root_record *rec;
3236 struct root_backref *backref;
3238 rec = get_root_rec(root_cache, root_id);
3239 BUG_ON(IS_ERR(rec));
3240 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3243 backref->errors |= errors;
3245 if (item_type != BTRFS_DIR_ITEM_KEY) {
3246 if (backref->found_dir_index || backref->found_back_ref ||
3247 backref->found_forward_ref) {
3248 if (backref->index != index)
3249 backref->errors |= REF_ERR_INDEX_UNMATCH;
3251 backref->index = index;
3255 if (item_type == BTRFS_DIR_ITEM_KEY) {
3256 if (backref->found_forward_ref)
3258 backref->found_dir_item = 1;
3259 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3260 backref->found_dir_index = 1;
3261 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3262 if (backref->found_forward_ref)
3263 backref->errors |= REF_ERR_DUP_ROOT_REF;
3264 else if (backref->found_dir_item)
3266 backref->found_forward_ref = 1;
3267 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3268 if (backref->found_back_ref)
3269 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3270 backref->found_back_ref = 1;
3275 if (backref->found_forward_ref && backref->found_dir_item)
3276 backref->reachable = 1;
3280 static int merge_root_recs(struct btrfs_root *root,
3281 struct cache_tree *src_cache,
3282 struct cache_tree *dst_cache)
3284 struct cache_extent *cache;
3285 struct ptr_node *node;
3286 struct inode_record *rec;
3287 struct inode_backref *backref;
3290 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3291 free_inode_recs_tree(src_cache);
3296 cache = search_cache_extent(src_cache, 0);
3299 node = container_of(cache, struct ptr_node, cache);
3301 remove_cache_extent(src_cache, &node->cache);
3304 ret = is_child_root(root, root->objectid, rec->ino);
3310 list_for_each_entry(backref, &rec->backrefs, list) {
3311 BUG_ON(backref->found_inode_ref);
3312 if (backref->found_dir_item)
3313 add_root_backref(dst_cache, rec->ino,
3314 root->root_key.objectid, backref->dir,
3315 backref->index, backref->name,
3316 backref->namelen, BTRFS_DIR_ITEM_KEY,
3318 if (backref->found_dir_index)
3319 add_root_backref(dst_cache, rec->ino,
3320 root->root_key.objectid, backref->dir,
3321 backref->index, backref->name,
3322 backref->namelen, BTRFS_DIR_INDEX_KEY,
3326 free_inode_rec(rec);
3333 static int check_root_refs(struct btrfs_root *root,
3334 struct cache_tree *root_cache)
3336 struct root_record *rec;
3337 struct root_record *ref_root;
3338 struct root_backref *backref;
3339 struct cache_extent *cache;
3345 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3346 BUG_ON(IS_ERR(rec));
3349 /* fixme: this can not detect circular references */
3352 cache = search_cache_extent(root_cache, 0);
3356 rec = container_of(cache, struct root_record, cache);
3357 cache = next_cache_extent(cache);
3359 if (rec->found_ref == 0)
3362 list_for_each_entry(backref, &rec->backrefs, list) {
3363 if (!backref->reachable)
3366 ref_root = get_root_rec(root_cache,
3368 BUG_ON(IS_ERR(ref_root));
3369 if (ref_root->found_ref > 0)
3372 backref->reachable = 0;
3374 if (rec->found_ref == 0)
3380 cache = search_cache_extent(root_cache, 0);
3384 rec = container_of(cache, struct root_record, cache);
3385 cache = next_cache_extent(cache);
3387 if (rec->found_ref == 0 &&
3388 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3389 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3390 ret = check_orphan_item(root->fs_info->tree_root,
3396 * If we don't have a root item then we likely just have
3397 * a dir item in a snapshot for this root but no actual
3398 * ref key or anything so it's meaningless.
3400 if (!rec->found_root_item)
3403 fprintf(stderr, "fs tree %llu not referenced\n",
3404 (unsigned long long)rec->objectid);
3408 if (rec->found_ref > 0 && !rec->found_root_item)
3410 list_for_each_entry(backref, &rec->backrefs, list) {
3411 if (!backref->found_dir_item)
3412 backref->errors |= REF_ERR_NO_DIR_ITEM;
3413 if (!backref->found_dir_index)
3414 backref->errors |= REF_ERR_NO_DIR_INDEX;
3415 if (!backref->found_back_ref)
3416 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3417 if (!backref->found_forward_ref)
3418 backref->errors |= REF_ERR_NO_ROOT_REF;
3419 if (backref->reachable && backref->errors)
3426 fprintf(stderr, "fs tree %llu refs %u %s\n",
3427 (unsigned long long)rec->objectid, rec->found_ref,
3428 rec->found_root_item ? "" : "not found");
3430 list_for_each_entry(backref, &rec->backrefs, list) {
3431 if (!backref->reachable)
3433 if (!backref->errors && rec->found_root_item)
3435 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3436 " index %llu namelen %u name %s errors %x\n",
3437 (unsigned long long)backref->ref_root,
3438 (unsigned long long)backref->dir,
3439 (unsigned long long)backref->index,
3440 backref->namelen, backref->name,
3442 print_ref_error(backref->errors);
3445 return errors > 0 ? 1 : 0;
3448 static int process_root_ref(struct extent_buffer *eb, int slot,
3449 struct btrfs_key *key,
3450 struct cache_tree *root_cache)
3456 struct btrfs_root_ref *ref;
3457 char namebuf[BTRFS_NAME_LEN];
3460 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3462 dirid = btrfs_root_ref_dirid(eb, ref);
3463 index = btrfs_root_ref_sequence(eb, ref);
3464 name_len = btrfs_root_ref_name_len(eb, ref);
3466 if (name_len <= BTRFS_NAME_LEN) {
3470 len = BTRFS_NAME_LEN;
3471 error = REF_ERR_NAME_TOO_LONG;
3473 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3475 if (key->type == BTRFS_ROOT_REF_KEY) {
3476 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3477 index, namebuf, len, key->type, error);
3479 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3480 index, namebuf, len, key->type, error);
3485 static void free_corrupt_block(struct cache_extent *cache)
3487 struct btrfs_corrupt_block *corrupt;
3489 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3493 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3496 * Repair the btree of the given root.
3498 * The fix is to remove the node key in corrupt_blocks cache_tree.
3499 * and rebalance the tree.
3500 * After the fix, the btree should be writeable.
3502 static int repair_btree(struct btrfs_root *root,
3503 struct cache_tree *corrupt_blocks)
3505 struct btrfs_trans_handle *trans;
3506 struct btrfs_path *path;
3507 struct btrfs_corrupt_block *corrupt;
3508 struct cache_extent *cache;
3509 struct btrfs_key key;
3514 if (cache_tree_empty(corrupt_blocks))
3517 path = btrfs_alloc_path();
3521 trans = btrfs_start_transaction(root, 1);
3522 if (IS_ERR(trans)) {
3523 ret = PTR_ERR(trans);
3524 fprintf(stderr, "Error starting transaction: %s\n",
3528 cache = first_cache_extent(corrupt_blocks);
3530 corrupt = container_of(cache, struct btrfs_corrupt_block,
3532 level = corrupt->level;
3533 path->lowest_level = level;
3534 key.objectid = corrupt->key.objectid;
3535 key.type = corrupt->key.type;
3536 key.offset = corrupt->key.offset;
3539 * Here we don't want to do any tree balance, since it may
3540 * cause a balance with corrupted brother leaf/node,
3541 * so ins_len set to 0 here.
3542 * Balance will be done after all corrupt node/leaf is deleted.
3544 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3547 offset = btrfs_node_blockptr(path->nodes[level],
3548 path->slots[level]);
3550 /* Remove the ptr */
3551 ret = btrfs_del_ptr(trans, root, path, level,
3552 path->slots[level]);
3556 * Remove the corresponding extent
3557 * return value is not concerned.
3559 btrfs_release_path(path);
3560 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3561 0, root->root_key.objectid,
3563 cache = next_cache_extent(cache);
3566 /* Balance the btree using btrfs_search_slot() */
3567 cache = first_cache_extent(corrupt_blocks);
3569 corrupt = container_of(cache, struct btrfs_corrupt_block,
3571 memcpy(&key, &corrupt->key, sizeof(key));
3572 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3575 /* return will always >0 since it won't find the item */
3577 btrfs_release_path(path);
3578 cache = next_cache_extent(cache);
3581 btrfs_commit_transaction(trans, root);
3583 btrfs_free_path(path);
3587 static int check_fs_root(struct btrfs_root *root,
3588 struct cache_tree *root_cache,
3589 struct walk_control *wc)
3595 struct btrfs_path path;
3596 struct shared_node root_node;
3597 struct root_record *rec;
3598 struct btrfs_root_item *root_item = &root->root_item;
3599 struct cache_tree corrupt_blocks;
3600 struct orphan_data_extent *orphan;
3601 struct orphan_data_extent *tmp;
3602 enum btrfs_tree_block_status status;
3603 struct node_refs nrefs;
3606 * Reuse the corrupt_block cache tree to record corrupted tree block
3608 * Unlike the usage in extent tree check, here we do it in a per
3609 * fs/subvol tree base.
3611 cache_tree_init(&corrupt_blocks);
3612 root->fs_info->corrupt_blocks = &corrupt_blocks;
3614 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3615 rec = get_root_rec(root_cache, root->root_key.objectid);
3616 BUG_ON(IS_ERR(rec));
3617 if (btrfs_root_refs(root_item) > 0)
3618 rec->found_root_item = 1;
3621 btrfs_init_path(&path);
3622 memset(&root_node, 0, sizeof(root_node));
3623 cache_tree_init(&root_node.root_cache);
3624 cache_tree_init(&root_node.inode_cache);
3625 memset(&nrefs, 0, sizeof(nrefs));
3627 /* Move the orphan extent record to corresponding inode_record */
3628 list_for_each_entry_safe(orphan, tmp,
3629 &root->orphan_data_extents, list) {
3630 struct inode_record *inode;
3632 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3634 BUG_ON(IS_ERR(inode));
3635 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3636 list_move(&orphan->list, &inode->orphan_extents);
3639 level = btrfs_header_level(root->node);
3640 memset(wc->nodes, 0, sizeof(wc->nodes));
3641 wc->nodes[level] = &root_node;
3642 wc->active_node = level;
3643 wc->root_level = level;
3645 /* We may not have checked the root block, lets do that now */
3646 if (btrfs_is_leaf(root->node))
3647 status = btrfs_check_leaf(root, NULL, root->node);
3649 status = btrfs_check_node(root, NULL, root->node);
3650 if (status != BTRFS_TREE_BLOCK_CLEAN)
3653 if (btrfs_root_refs(root_item) > 0 ||
3654 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3655 path.nodes[level] = root->node;
3656 extent_buffer_get(root->node);
3657 path.slots[level] = 0;
3659 struct btrfs_key key;
3660 struct btrfs_disk_key found_key;
3662 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3663 level = root_item->drop_level;
3664 path.lowest_level = level;
3665 if (level > btrfs_header_level(root->node) ||
3666 level >= BTRFS_MAX_LEVEL) {
3667 error("ignoring invalid drop level: %u", level);
3670 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3673 btrfs_node_key(path.nodes[level], &found_key,
3675 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3676 sizeof(found_key)));
3680 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3686 wret = walk_up_tree(root, &path, wc, &level);
3693 btrfs_release_path(&path);
3695 if (!cache_tree_empty(&corrupt_blocks)) {
3696 struct cache_extent *cache;
3697 struct btrfs_corrupt_block *corrupt;
3699 printf("The following tree block(s) is corrupted in tree %llu:\n",
3700 root->root_key.objectid);
3701 cache = first_cache_extent(&corrupt_blocks);
3703 corrupt = container_of(cache,
3704 struct btrfs_corrupt_block,
3706 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3707 cache->start, corrupt->level,
3708 corrupt->key.objectid, corrupt->key.type,
3709 corrupt->key.offset);
3710 cache = next_cache_extent(cache);
3713 printf("Try to repair the btree for root %llu\n",
3714 root->root_key.objectid);
3715 ret = repair_btree(root, &corrupt_blocks);
3717 fprintf(stderr, "Failed to repair btree: %s\n",
3720 printf("Btree for root %llu is fixed\n",
3721 root->root_key.objectid);
3725 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3729 if (root_node.current) {
3730 root_node.current->checked = 1;
3731 maybe_free_inode_rec(&root_node.inode_cache,
3735 err = check_inode_recs(root, &root_node.inode_cache);
3739 free_corrupt_blocks_tree(&corrupt_blocks);
3740 root->fs_info->corrupt_blocks = NULL;
3741 free_orphan_data_extents(&root->orphan_data_extents);
3745 static int fs_root_objectid(u64 objectid)
3747 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3748 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3750 return is_fstree(objectid);
3753 static int check_fs_roots(struct btrfs_root *root,
3754 struct cache_tree *root_cache)
3756 struct btrfs_path path;
3757 struct btrfs_key key;
3758 struct walk_control wc;
3759 struct extent_buffer *leaf, *tree_node;
3760 struct btrfs_root *tmp_root;
3761 struct btrfs_root *tree_root = root->fs_info->tree_root;
3765 if (ctx.progress_enabled) {
3766 ctx.tp = TASK_FS_ROOTS;
3767 task_start(ctx.info);
3771 * Just in case we made any changes to the extent tree that weren't
3772 * reflected into the free space cache yet.
3775 reset_cached_block_groups(root->fs_info);
3776 memset(&wc, 0, sizeof(wc));
3777 cache_tree_init(&wc.shared);
3778 btrfs_init_path(&path);
3783 key.type = BTRFS_ROOT_ITEM_KEY;
3784 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3789 tree_node = tree_root->node;
3791 if (tree_node != tree_root->node) {
3792 free_root_recs_tree(root_cache);
3793 btrfs_release_path(&path);
3796 leaf = path.nodes[0];
3797 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3798 ret = btrfs_next_leaf(tree_root, &path);
3804 leaf = path.nodes[0];
3806 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3807 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3808 fs_root_objectid(key.objectid)) {
3809 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3810 tmp_root = btrfs_read_fs_root_no_cache(
3811 root->fs_info, &key);
3813 key.offset = (u64)-1;
3814 tmp_root = btrfs_read_fs_root(
3815 root->fs_info, &key);
3817 if (IS_ERR(tmp_root)) {
3821 ret = check_fs_root(tmp_root, root_cache, &wc);
3822 if (ret == -EAGAIN) {
3823 free_root_recs_tree(root_cache);
3824 btrfs_release_path(&path);
3829 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3830 btrfs_free_fs_root(tmp_root);
3831 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3832 key.type == BTRFS_ROOT_BACKREF_KEY) {
3833 process_root_ref(leaf, path.slots[0], &key,
3840 btrfs_release_path(&path);
3842 free_extent_cache_tree(&wc.shared);
3843 if (!cache_tree_empty(&wc.shared))
3844 fprintf(stderr, "warning line %d\n", __LINE__);
3846 task_stop(ctx.info);
3851 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3853 struct list_head *cur = rec->backrefs.next;
3854 struct extent_backref *back;
3855 struct tree_backref *tback;
3856 struct data_backref *dback;
3860 while(cur != &rec->backrefs) {
3861 back = to_extent_backref(cur);
3863 if (!back->found_extent_tree) {
3867 if (back->is_data) {
3868 dback = to_data_backref(back);
3869 fprintf(stderr, "Backref %llu %s %llu"
3870 " owner %llu offset %llu num_refs %lu"
3871 " not found in extent tree\n",
3872 (unsigned long long)rec->start,
3873 back->full_backref ?
3875 back->full_backref ?
3876 (unsigned long long)dback->parent:
3877 (unsigned long long)dback->root,
3878 (unsigned long long)dback->owner,
3879 (unsigned long long)dback->offset,
3880 (unsigned long)dback->num_refs);
3882 tback = to_tree_backref(back);
3883 fprintf(stderr, "Backref %llu parent %llu"
3884 " root %llu not found in extent tree\n",
3885 (unsigned long long)rec->start,
3886 (unsigned long long)tback->parent,
3887 (unsigned long long)tback->root);
3890 if (!back->is_data && !back->found_ref) {
3894 tback = to_tree_backref(back);
3895 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3896 (unsigned long long)rec->start,
3897 back->full_backref ? "parent" : "root",
3898 back->full_backref ?
3899 (unsigned long long)tback->parent :
3900 (unsigned long long)tback->root, back);
3902 if (back->is_data) {
3903 dback = to_data_backref(back);
3904 if (dback->found_ref != dback->num_refs) {
3908 fprintf(stderr, "Incorrect local backref count"
3909 " on %llu %s %llu owner %llu"
3910 " offset %llu found %u wanted %u back %p\n",
3911 (unsigned long long)rec->start,
3912 back->full_backref ?
3914 back->full_backref ?
3915 (unsigned long long)dback->parent:
3916 (unsigned long long)dback->root,
3917 (unsigned long long)dback->owner,
3918 (unsigned long long)dback->offset,
3919 dback->found_ref, dback->num_refs, back);
3921 if (dback->disk_bytenr != rec->start) {
3925 fprintf(stderr, "Backref disk bytenr does not"
3926 " match extent record, bytenr=%llu, "
3927 "ref bytenr=%llu\n",
3928 (unsigned long long)rec->start,
3929 (unsigned long long)dback->disk_bytenr);
3932 if (dback->bytes != rec->nr) {
3936 fprintf(stderr, "Backref bytes do not match "
3937 "extent backref, bytenr=%llu, ref "
3938 "bytes=%llu, backref bytes=%llu\n",
3939 (unsigned long long)rec->start,
3940 (unsigned long long)rec->nr,
3941 (unsigned long long)dback->bytes);
3944 if (!back->is_data) {
3947 dback = to_data_backref(back);
3948 found += dback->found_ref;
3951 if (found != rec->refs) {
3955 fprintf(stderr, "Incorrect global backref count "
3956 "on %llu found %llu wanted %llu\n",
3957 (unsigned long long)rec->start,
3958 (unsigned long long)found,
3959 (unsigned long long)rec->refs);
3965 static int free_all_extent_backrefs(struct extent_record *rec)
3967 struct extent_backref *back;
3968 struct list_head *cur;
3969 while (!list_empty(&rec->backrefs)) {
3970 cur = rec->backrefs.next;
3971 back = to_extent_backref(cur);
3978 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3979 struct cache_tree *extent_cache)
3981 struct cache_extent *cache;
3982 struct extent_record *rec;
3985 cache = first_cache_extent(extent_cache);
3988 rec = container_of(cache, struct extent_record, cache);
3989 remove_cache_extent(extent_cache, cache);
3990 free_all_extent_backrefs(rec);
3995 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3996 struct extent_record *rec)
3998 if (rec->content_checked && rec->owner_ref_checked &&
3999 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4000 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4001 !rec->bad_full_backref && !rec->crossing_stripes &&
4002 !rec->wrong_chunk_type) {
4003 remove_cache_extent(extent_cache, &rec->cache);
4004 free_all_extent_backrefs(rec);
4005 list_del_init(&rec->list);
4011 static int check_owner_ref(struct btrfs_root *root,
4012 struct extent_record *rec,
4013 struct extent_buffer *buf)
4015 struct extent_backref *node;
4016 struct tree_backref *back;
4017 struct btrfs_root *ref_root;
4018 struct btrfs_key key;
4019 struct btrfs_path path;
4020 struct extent_buffer *parent;
4025 list_for_each_entry(node, &rec->backrefs, list) {
4028 if (!node->found_ref)
4030 if (node->full_backref)
4032 back = to_tree_backref(node);
4033 if (btrfs_header_owner(buf) == back->root)
4036 BUG_ON(rec->is_root);
4038 /* try to find the block by search corresponding fs tree */
4039 key.objectid = btrfs_header_owner(buf);
4040 key.type = BTRFS_ROOT_ITEM_KEY;
4041 key.offset = (u64)-1;
4043 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4044 if (IS_ERR(ref_root))
4047 level = btrfs_header_level(buf);
4049 btrfs_item_key_to_cpu(buf, &key, 0);
4051 btrfs_node_key_to_cpu(buf, &key, 0);
4053 btrfs_init_path(&path);
4054 path.lowest_level = level + 1;
4055 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4059 parent = path.nodes[level + 1];
4060 if (parent && buf->start == btrfs_node_blockptr(parent,
4061 path.slots[level + 1]))
4064 btrfs_release_path(&path);
4065 return found ? 0 : 1;
4068 static int is_extent_tree_record(struct extent_record *rec)
4070 struct list_head *cur = rec->backrefs.next;
4071 struct extent_backref *node;
4072 struct tree_backref *back;
4075 while(cur != &rec->backrefs) {
4076 node = to_extent_backref(cur);
4080 back = to_tree_backref(node);
4081 if (node->full_backref)
4083 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4090 static int record_bad_block_io(struct btrfs_fs_info *info,
4091 struct cache_tree *extent_cache,
4094 struct extent_record *rec;
4095 struct cache_extent *cache;
4096 struct btrfs_key key;
4098 cache = lookup_cache_extent(extent_cache, start, len);
4102 rec = container_of(cache, struct extent_record, cache);
4103 if (!is_extent_tree_record(rec))
4106 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4107 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4110 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4111 struct extent_buffer *buf, int slot)
4113 if (btrfs_header_level(buf)) {
4114 struct btrfs_key_ptr ptr1, ptr2;
4116 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4117 sizeof(struct btrfs_key_ptr));
4118 read_extent_buffer(buf, &ptr2,
4119 btrfs_node_key_ptr_offset(slot + 1),
4120 sizeof(struct btrfs_key_ptr));
4121 write_extent_buffer(buf, &ptr1,
4122 btrfs_node_key_ptr_offset(slot + 1),
4123 sizeof(struct btrfs_key_ptr));
4124 write_extent_buffer(buf, &ptr2,
4125 btrfs_node_key_ptr_offset(slot),
4126 sizeof(struct btrfs_key_ptr));
4128 struct btrfs_disk_key key;
4129 btrfs_node_key(buf, &key, 0);
4130 btrfs_fixup_low_keys(root, path, &key,
4131 btrfs_header_level(buf) + 1);
4134 struct btrfs_item *item1, *item2;
4135 struct btrfs_key k1, k2;
4136 char *item1_data, *item2_data;
4137 u32 item1_offset, item2_offset, item1_size, item2_size;
4139 item1 = btrfs_item_nr(slot);
4140 item2 = btrfs_item_nr(slot + 1);
4141 btrfs_item_key_to_cpu(buf, &k1, slot);
4142 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4143 item1_offset = btrfs_item_offset(buf, item1);
4144 item2_offset = btrfs_item_offset(buf, item2);
4145 item1_size = btrfs_item_size(buf, item1);
4146 item2_size = btrfs_item_size(buf, item2);
4148 item1_data = malloc(item1_size);
4151 item2_data = malloc(item2_size);
4157 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4158 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4160 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4161 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4165 btrfs_set_item_offset(buf, item1, item2_offset);
4166 btrfs_set_item_offset(buf, item2, item1_offset);
4167 btrfs_set_item_size(buf, item1, item2_size);
4168 btrfs_set_item_size(buf, item2, item1_size);
4170 path->slots[0] = slot;
4171 btrfs_set_item_key_unsafe(root, path, &k2);
4172 path->slots[0] = slot + 1;
4173 btrfs_set_item_key_unsafe(root, path, &k1);
4178 static int fix_key_order(struct btrfs_trans_handle *trans,
4179 struct btrfs_root *root,
4180 struct btrfs_path *path)
4182 struct extent_buffer *buf;
4183 struct btrfs_key k1, k2;
4185 int level = path->lowest_level;
4188 buf = path->nodes[level];
4189 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4191 btrfs_node_key_to_cpu(buf, &k1, i);
4192 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4194 btrfs_item_key_to_cpu(buf, &k1, i);
4195 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4197 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4199 ret = swap_values(root, path, buf, i);
4202 btrfs_mark_buffer_dirty(buf);
4208 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4209 struct btrfs_root *root,
4210 struct btrfs_path *path,
4211 struct extent_buffer *buf, int slot)
4213 struct btrfs_key key;
4214 int nritems = btrfs_header_nritems(buf);
4216 btrfs_item_key_to_cpu(buf, &key, slot);
4218 /* These are all the keys we can deal with missing. */
4219 if (key.type != BTRFS_DIR_INDEX_KEY &&
4220 key.type != BTRFS_EXTENT_ITEM_KEY &&
4221 key.type != BTRFS_METADATA_ITEM_KEY &&
4222 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4223 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4226 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4227 (unsigned long long)key.objectid, key.type,
4228 (unsigned long long)key.offset, slot, buf->start);
4229 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4230 btrfs_item_nr_offset(slot + 1),
4231 sizeof(struct btrfs_item) *
4232 (nritems - slot - 1));
4233 btrfs_set_header_nritems(buf, nritems - 1);
4235 struct btrfs_disk_key disk_key;
4237 btrfs_item_key(buf, &disk_key, 0);
4238 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4240 btrfs_mark_buffer_dirty(buf);
4244 static int fix_item_offset(struct btrfs_trans_handle *trans,
4245 struct btrfs_root *root,
4246 struct btrfs_path *path)
4248 struct extent_buffer *buf;
4252 /* We should only get this for leaves */
4253 BUG_ON(path->lowest_level);
4254 buf = path->nodes[0];
4256 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4257 unsigned int shift = 0, offset;
4259 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4260 BTRFS_LEAF_DATA_SIZE(root)) {
4261 if (btrfs_item_end_nr(buf, i) >
4262 BTRFS_LEAF_DATA_SIZE(root)) {
4263 ret = delete_bogus_item(trans, root, path,
4267 fprintf(stderr, "item is off the end of the "
4268 "leaf, can't fix\n");
4272 shift = BTRFS_LEAF_DATA_SIZE(root) -
4273 btrfs_item_end_nr(buf, i);
4274 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4275 btrfs_item_offset_nr(buf, i - 1)) {
4276 if (btrfs_item_end_nr(buf, i) >
4277 btrfs_item_offset_nr(buf, i - 1)) {
4278 ret = delete_bogus_item(trans, root, path,
4282 fprintf(stderr, "items overlap, can't fix\n");
4286 shift = btrfs_item_offset_nr(buf, i - 1) -
4287 btrfs_item_end_nr(buf, i);
4292 printf("Shifting item nr %d by %u bytes in block %llu\n",
4293 i, shift, (unsigned long long)buf->start);
4294 offset = btrfs_item_offset_nr(buf, i);
4295 memmove_extent_buffer(buf,
4296 btrfs_leaf_data(buf) + offset + shift,
4297 btrfs_leaf_data(buf) + offset,
4298 btrfs_item_size_nr(buf, i));
4299 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4301 btrfs_mark_buffer_dirty(buf);
4305 * We may have moved things, in which case we want to exit so we don't
4306 * write those changes out. Once we have proper abort functionality in
4307 * progs this can be changed to something nicer.
4314 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4315 * then just return -EIO.
4317 static int try_to_fix_bad_block(struct btrfs_root *root,
4318 struct extent_buffer *buf,
4319 enum btrfs_tree_block_status status)
4321 struct btrfs_trans_handle *trans;
4322 struct ulist *roots;
4323 struct ulist_node *node;
4324 struct btrfs_root *search_root;
4325 struct btrfs_path *path;
4326 struct ulist_iterator iter;
4327 struct btrfs_key root_key, key;
4330 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4331 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4334 path = btrfs_alloc_path();
4338 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4341 btrfs_free_path(path);
4345 ULIST_ITER_INIT(&iter);
4346 while ((node = ulist_next(roots, &iter))) {
4347 root_key.objectid = node->val;
4348 root_key.type = BTRFS_ROOT_ITEM_KEY;
4349 root_key.offset = (u64)-1;
4351 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4358 trans = btrfs_start_transaction(search_root, 0);
4359 if (IS_ERR(trans)) {
4360 ret = PTR_ERR(trans);
4364 path->lowest_level = btrfs_header_level(buf);
4365 path->skip_check_block = 1;
4366 if (path->lowest_level)
4367 btrfs_node_key_to_cpu(buf, &key, 0);
4369 btrfs_item_key_to_cpu(buf, &key, 0);
4370 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4373 btrfs_commit_transaction(trans, search_root);
4376 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4377 ret = fix_key_order(trans, search_root, path);
4378 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4379 ret = fix_item_offset(trans, search_root, path);
4381 btrfs_commit_transaction(trans, search_root);
4384 btrfs_release_path(path);
4385 btrfs_commit_transaction(trans, search_root);
4388 btrfs_free_path(path);
4392 static int check_block(struct btrfs_root *root,
4393 struct cache_tree *extent_cache,
4394 struct extent_buffer *buf, u64 flags)
4396 struct extent_record *rec;
4397 struct cache_extent *cache;
4398 struct btrfs_key key;
4399 enum btrfs_tree_block_status status;
4403 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4406 rec = container_of(cache, struct extent_record, cache);
4407 rec->generation = btrfs_header_generation(buf);
4409 level = btrfs_header_level(buf);
4410 if (btrfs_header_nritems(buf) > 0) {
4413 btrfs_item_key_to_cpu(buf, &key, 0);
4415 btrfs_node_key_to_cpu(buf, &key, 0);
4417 rec->info_objectid = key.objectid;
4419 rec->info_level = level;
4421 if (btrfs_is_leaf(buf))
4422 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4424 status = btrfs_check_node(root, &rec->parent_key, buf);
4426 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4428 status = try_to_fix_bad_block(root, buf, status);
4429 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4431 fprintf(stderr, "bad block %llu\n",
4432 (unsigned long long)buf->start);
4435 * Signal to callers we need to start the scan over
4436 * again since we'll have cowed blocks.
4441 rec->content_checked = 1;
4442 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4443 rec->owner_ref_checked = 1;
4445 ret = check_owner_ref(root, rec, buf);
4447 rec->owner_ref_checked = 1;
4451 maybe_free_extent_rec(extent_cache, rec);
4455 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4456 u64 parent, u64 root)
4458 struct list_head *cur = rec->backrefs.next;
4459 struct extent_backref *node;
4460 struct tree_backref *back;
4462 while(cur != &rec->backrefs) {
4463 node = to_extent_backref(cur);
4467 back = to_tree_backref(node);
4469 if (!node->full_backref)
4471 if (parent == back->parent)
4474 if (node->full_backref)
4476 if (back->root == root)
4483 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4484 u64 parent, u64 root)
4486 struct tree_backref *ref = malloc(sizeof(*ref));
4490 memset(&ref->node, 0, sizeof(ref->node));
4492 ref->parent = parent;
4493 ref->node.full_backref = 1;
4496 ref->node.full_backref = 0;
4498 list_add_tail(&ref->node.list, &rec->backrefs);
4503 static struct data_backref *find_data_backref(struct extent_record *rec,
4504 u64 parent, u64 root,
4505 u64 owner, u64 offset,
4507 u64 disk_bytenr, u64 bytes)
4509 struct list_head *cur = rec->backrefs.next;
4510 struct extent_backref *node;
4511 struct data_backref *back;
4513 while(cur != &rec->backrefs) {
4514 node = to_extent_backref(cur);
4518 back = to_data_backref(node);
4520 if (!node->full_backref)
4522 if (parent == back->parent)
4525 if (node->full_backref)
4527 if (back->root == root && back->owner == owner &&
4528 back->offset == offset) {
4529 if (found_ref && node->found_ref &&
4530 (back->bytes != bytes ||
4531 back->disk_bytenr != disk_bytenr))
4540 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4541 u64 parent, u64 root,
4542 u64 owner, u64 offset,
4545 struct data_backref *ref = malloc(sizeof(*ref));
4549 memset(&ref->node, 0, sizeof(ref->node));
4550 ref->node.is_data = 1;
4553 ref->parent = parent;
4556 ref->node.full_backref = 1;
4560 ref->offset = offset;
4561 ref->node.full_backref = 0;
4563 ref->bytes = max_size;
4566 list_add_tail(&ref->node.list, &rec->backrefs);
4567 if (max_size > rec->max_size)
4568 rec->max_size = max_size;
4572 /* Check if the type of extent matches with its chunk */
4573 static void check_extent_type(struct extent_record *rec)
4575 struct btrfs_block_group_cache *bg_cache;
4577 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4581 /* data extent, check chunk directly*/
4582 if (!rec->metadata) {
4583 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4584 rec->wrong_chunk_type = 1;
4588 /* metadata extent, check the obvious case first */
4589 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4590 BTRFS_BLOCK_GROUP_METADATA))) {
4591 rec->wrong_chunk_type = 1;
4596 * Check SYSTEM extent, as it's also marked as metadata, we can only
4597 * make sure it's a SYSTEM extent by its backref
4599 if (!list_empty(&rec->backrefs)) {
4600 struct extent_backref *node;
4601 struct tree_backref *tback;
4604 node = to_extent_backref(rec->backrefs.next);
4605 if (node->is_data) {
4606 /* tree block shouldn't have data backref */
4607 rec->wrong_chunk_type = 1;
4610 tback = container_of(node, struct tree_backref, node);
4612 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4613 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4615 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4616 if (!(bg_cache->flags & bg_type))
4617 rec->wrong_chunk_type = 1;
4622 * Allocate a new extent record, fill default values from @tmpl and insert int
4623 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4624 * the cache, otherwise it fails.
4626 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4627 struct extent_record *tmpl)
4629 struct extent_record *rec;
4632 rec = malloc(sizeof(*rec));
4635 rec->start = tmpl->start;
4636 rec->max_size = tmpl->max_size;
4637 rec->nr = max(tmpl->nr, tmpl->max_size);
4638 rec->found_rec = tmpl->found_rec;
4639 rec->content_checked = tmpl->content_checked;
4640 rec->owner_ref_checked = tmpl->owner_ref_checked;
4641 rec->num_duplicates = 0;
4642 rec->metadata = tmpl->metadata;
4643 rec->flag_block_full_backref = FLAG_UNSET;
4644 rec->bad_full_backref = 0;
4645 rec->crossing_stripes = 0;
4646 rec->wrong_chunk_type = 0;
4647 rec->is_root = tmpl->is_root;
4648 rec->refs = tmpl->refs;
4649 rec->extent_item_refs = tmpl->extent_item_refs;
4650 rec->parent_generation = tmpl->parent_generation;
4651 INIT_LIST_HEAD(&rec->backrefs);
4652 INIT_LIST_HEAD(&rec->dups);
4653 INIT_LIST_HEAD(&rec->list);
4654 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4655 rec->cache.start = tmpl->start;
4656 rec->cache.size = tmpl->nr;
4657 ret = insert_cache_extent(extent_cache, &rec->cache);
4662 bytes_used += rec->nr;
4665 rec->crossing_stripes = check_crossing_stripes(rec->start,
4666 global_info->tree_root->nodesize);
4667 check_extent_type(rec);
4672 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4674 * - refs - if found, increase refs
4675 * - is_root - if found, set
4676 * - content_checked - if found, set
4677 * - owner_ref_checked - if found, set
4679 * If not found, create a new one, initialize and insert.
4681 static int add_extent_rec(struct cache_tree *extent_cache,
4682 struct extent_record *tmpl)
4684 struct extent_record *rec;
4685 struct cache_extent *cache;
4689 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4691 rec = container_of(cache, struct extent_record, cache);
4695 rec->nr = max(tmpl->nr, tmpl->max_size);
4698 * We need to make sure to reset nr to whatever the extent
4699 * record says was the real size, this way we can compare it to
4702 if (tmpl->found_rec) {
4703 if (tmpl->start != rec->start || rec->found_rec) {
4704 struct extent_record *tmp;
4707 if (list_empty(&rec->list))
4708 list_add_tail(&rec->list,
4709 &duplicate_extents);
4712 * We have to do this song and dance in case we
4713 * find an extent record that falls inside of
4714 * our current extent record but does not have
4715 * the same objectid.
4717 tmp = malloc(sizeof(*tmp));
4720 tmp->start = tmpl->start;
4721 tmp->max_size = tmpl->max_size;
4724 tmp->metadata = tmpl->metadata;
4725 tmp->extent_item_refs = tmpl->extent_item_refs;
4726 INIT_LIST_HEAD(&tmp->list);
4727 list_add_tail(&tmp->list, &rec->dups);
4728 rec->num_duplicates++;
4735 if (tmpl->extent_item_refs && !dup) {
4736 if (rec->extent_item_refs) {
4737 fprintf(stderr, "block %llu rec "
4738 "extent_item_refs %llu, passed %llu\n",
4739 (unsigned long long)tmpl->start,
4740 (unsigned long long)
4741 rec->extent_item_refs,
4742 (unsigned long long)tmpl->extent_item_refs);
4744 rec->extent_item_refs = tmpl->extent_item_refs;
4748 if (tmpl->content_checked)
4749 rec->content_checked = 1;
4750 if (tmpl->owner_ref_checked)
4751 rec->owner_ref_checked = 1;
4752 memcpy(&rec->parent_key, &tmpl->parent_key,
4753 sizeof(tmpl->parent_key));
4754 if (tmpl->parent_generation)
4755 rec->parent_generation = tmpl->parent_generation;
4756 if (rec->max_size < tmpl->max_size)
4757 rec->max_size = tmpl->max_size;
4760 * A metadata extent can't cross stripe_len boundary, otherwise
4761 * kernel scrub won't be able to handle it.
4762 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4766 rec->crossing_stripes = check_crossing_stripes(
4767 rec->start, global_info->tree_root->nodesize);
4768 check_extent_type(rec);
4769 maybe_free_extent_rec(extent_cache, rec);
4773 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4778 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4779 u64 parent, u64 root, int found_ref)
4781 struct extent_record *rec;
4782 struct tree_backref *back;
4783 struct cache_extent *cache;
4786 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4788 struct extent_record tmpl;
4790 memset(&tmpl, 0, sizeof(tmpl));
4791 tmpl.start = bytenr;
4795 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4799 /* really a bug in cache_extent implement now */
4800 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4805 rec = container_of(cache, struct extent_record, cache);
4806 if (rec->start != bytenr) {
4808 * Several cause, from unaligned bytenr to over lapping extents
4813 back = find_tree_backref(rec, parent, root);
4815 back = alloc_tree_backref(rec, parent, root);
4821 if (back->node.found_ref) {
4822 fprintf(stderr, "Extent back ref already exists "
4823 "for %llu parent %llu root %llu \n",
4824 (unsigned long long)bytenr,
4825 (unsigned long long)parent,
4826 (unsigned long long)root);
4828 back->node.found_ref = 1;
4830 if (back->node.found_extent_tree) {
4831 fprintf(stderr, "Extent back ref already exists "
4832 "for %llu parent %llu root %llu \n",
4833 (unsigned long long)bytenr,
4834 (unsigned long long)parent,
4835 (unsigned long long)root);
4837 back->node.found_extent_tree = 1;
4839 check_extent_type(rec);
4840 maybe_free_extent_rec(extent_cache, rec);
4844 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4845 u64 parent, u64 root, u64 owner, u64 offset,
4846 u32 num_refs, int found_ref, u64 max_size)
4848 struct extent_record *rec;
4849 struct data_backref *back;
4850 struct cache_extent *cache;
4853 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4855 struct extent_record tmpl;
4857 memset(&tmpl, 0, sizeof(tmpl));
4858 tmpl.start = bytenr;
4860 tmpl.max_size = max_size;
4862 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4866 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4871 rec = container_of(cache, struct extent_record, cache);
4872 if (rec->max_size < max_size)
4873 rec->max_size = max_size;
4876 * If found_ref is set then max_size is the real size and must match the
4877 * existing refs. So if we have already found a ref then we need to
4878 * make sure that this ref matches the existing one, otherwise we need
4879 * to add a new backref so we can notice that the backrefs don't match
4880 * and we need to figure out who is telling the truth. This is to
4881 * account for that awful fsync bug I introduced where we'd end up with
4882 * a btrfs_file_extent_item that would have its length include multiple
4883 * prealloc extents or point inside of a prealloc extent.
4885 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4888 back = alloc_data_backref(rec, parent, root, owner, offset,
4894 BUG_ON(num_refs != 1);
4895 if (back->node.found_ref)
4896 BUG_ON(back->bytes != max_size);
4897 back->node.found_ref = 1;
4898 back->found_ref += 1;
4899 back->bytes = max_size;
4900 back->disk_bytenr = bytenr;
4902 rec->content_checked = 1;
4903 rec->owner_ref_checked = 1;
4905 if (back->node.found_extent_tree) {
4906 fprintf(stderr, "Extent back ref already exists "
4907 "for %llu parent %llu root %llu "
4908 "owner %llu offset %llu num_refs %lu\n",
4909 (unsigned long long)bytenr,
4910 (unsigned long long)parent,
4911 (unsigned long long)root,
4912 (unsigned long long)owner,
4913 (unsigned long long)offset,
4914 (unsigned long)num_refs);
4916 back->num_refs = num_refs;
4917 back->node.found_extent_tree = 1;
4919 maybe_free_extent_rec(extent_cache, rec);
4923 static int add_pending(struct cache_tree *pending,
4924 struct cache_tree *seen, u64 bytenr, u32 size)
4927 ret = add_cache_extent(seen, bytenr, size);
4930 add_cache_extent(pending, bytenr, size);
4934 static int pick_next_pending(struct cache_tree *pending,
4935 struct cache_tree *reada,
4936 struct cache_tree *nodes,
4937 u64 last, struct block_info *bits, int bits_nr,
4940 unsigned long node_start = last;
4941 struct cache_extent *cache;
4944 cache = search_cache_extent(reada, 0);
4946 bits[0].start = cache->start;
4947 bits[0].size = cache->size;
4952 if (node_start > 32768)
4953 node_start -= 32768;
4955 cache = search_cache_extent(nodes, node_start);
4957 cache = search_cache_extent(nodes, 0);
4960 cache = search_cache_extent(pending, 0);
4965 bits[ret].start = cache->start;
4966 bits[ret].size = cache->size;
4967 cache = next_cache_extent(cache);
4969 } while (cache && ret < bits_nr);
4975 bits[ret].start = cache->start;
4976 bits[ret].size = cache->size;
4977 cache = next_cache_extent(cache);
4979 } while (cache && ret < bits_nr);
4981 if (bits_nr - ret > 8) {
4982 u64 lookup = bits[0].start + bits[0].size;
4983 struct cache_extent *next;
4984 next = search_cache_extent(pending, lookup);
4986 if (next->start - lookup > 32768)
4988 bits[ret].start = next->start;
4989 bits[ret].size = next->size;
4990 lookup = next->start + next->size;
4994 next = next_cache_extent(next);
5002 static void free_chunk_record(struct cache_extent *cache)
5004 struct chunk_record *rec;
5006 rec = container_of(cache, struct chunk_record, cache);
5007 list_del_init(&rec->list);
5008 list_del_init(&rec->dextents);
5012 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5014 cache_tree_free_extents(chunk_cache, free_chunk_record);
5017 static void free_device_record(struct rb_node *node)
5019 struct device_record *rec;
5021 rec = container_of(node, struct device_record, node);
5025 FREE_RB_BASED_TREE(device_cache, free_device_record);
5027 int insert_block_group_record(struct block_group_tree *tree,
5028 struct block_group_record *bg_rec)
5032 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5036 list_add_tail(&bg_rec->list, &tree->block_groups);
5040 static void free_block_group_record(struct cache_extent *cache)
5042 struct block_group_record *rec;
5044 rec = container_of(cache, struct block_group_record, cache);
5045 list_del_init(&rec->list);
5049 void free_block_group_tree(struct block_group_tree *tree)
5051 cache_tree_free_extents(&tree->tree, free_block_group_record);
5054 int insert_device_extent_record(struct device_extent_tree *tree,
5055 struct device_extent_record *de_rec)
5060 * Device extent is a bit different from the other extents, because
5061 * the extents which belong to the different devices may have the
5062 * same start and size, so we need use the special extent cache
5063 * search/insert functions.
5065 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5069 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5070 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5074 static void free_device_extent_record(struct cache_extent *cache)
5076 struct device_extent_record *rec;
5078 rec = container_of(cache, struct device_extent_record, cache);
5079 if (!list_empty(&rec->chunk_list))
5080 list_del_init(&rec->chunk_list);
5081 if (!list_empty(&rec->device_list))
5082 list_del_init(&rec->device_list);
5086 void free_device_extent_tree(struct device_extent_tree *tree)
5088 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5091 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5092 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5093 struct extent_buffer *leaf, int slot)
5095 struct btrfs_extent_ref_v0 *ref0;
5096 struct btrfs_key key;
5099 btrfs_item_key_to_cpu(leaf, &key, slot);
5100 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5101 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5102 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5105 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5106 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5112 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5113 struct btrfs_key *key,
5116 struct btrfs_chunk *ptr;
5117 struct chunk_record *rec;
5120 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5121 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5123 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5125 fprintf(stderr, "memory allocation failed\n");
5129 INIT_LIST_HEAD(&rec->list);
5130 INIT_LIST_HEAD(&rec->dextents);
5133 rec->cache.start = key->offset;
5134 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5136 rec->generation = btrfs_header_generation(leaf);
5138 rec->objectid = key->objectid;
5139 rec->type = key->type;
5140 rec->offset = key->offset;
5142 rec->length = rec->cache.size;
5143 rec->owner = btrfs_chunk_owner(leaf, ptr);
5144 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5145 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5146 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5147 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5148 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5149 rec->num_stripes = num_stripes;
5150 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5152 for (i = 0; i < rec->num_stripes; ++i) {
5153 rec->stripes[i].devid =
5154 btrfs_stripe_devid_nr(leaf, ptr, i);
5155 rec->stripes[i].offset =
5156 btrfs_stripe_offset_nr(leaf, ptr, i);
5157 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5158 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5165 static int process_chunk_item(struct cache_tree *chunk_cache,
5166 struct btrfs_key *key, struct extent_buffer *eb,
5169 struct chunk_record *rec;
5170 struct btrfs_chunk *chunk;
5173 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5175 * Do extra check for this chunk item,
5177 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5178 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5179 * and owner<->key_type check.
5181 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5184 error("chunk(%llu, %llu) is not valid, ignore it",
5185 key->offset, btrfs_chunk_length(eb, chunk));
5188 rec = btrfs_new_chunk_record(eb, key, slot);
5189 ret = insert_cache_extent(chunk_cache, &rec->cache);
5191 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5192 rec->offset, rec->length);
5199 static int process_device_item(struct rb_root *dev_cache,
5200 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5202 struct btrfs_dev_item *ptr;
5203 struct device_record *rec;
5206 ptr = btrfs_item_ptr(eb,
5207 slot, struct btrfs_dev_item);
5209 rec = malloc(sizeof(*rec));
5211 fprintf(stderr, "memory allocation failed\n");
5215 rec->devid = key->offset;
5216 rec->generation = btrfs_header_generation(eb);
5218 rec->objectid = key->objectid;
5219 rec->type = key->type;
5220 rec->offset = key->offset;
5222 rec->devid = btrfs_device_id(eb, ptr);
5223 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5224 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5226 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5228 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5235 struct block_group_record *
5236 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5239 struct btrfs_block_group_item *ptr;
5240 struct block_group_record *rec;
5242 rec = calloc(1, sizeof(*rec));
5244 fprintf(stderr, "memory allocation failed\n");
5248 rec->cache.start = key->objectid;
5249 rec->cache.size = key->offset;
5251 rec->generation = btrfs_header_generation(leaf);
5253 rec->objectid = key->objectid;
5254 rec->type = key->type;
5255 rec->offset = key->offset;
5257 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5258 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5260 INIT_LIST_HEAD(&rec->list);
5265 static int process_block_group_item(struct block_group_tree *block_group_cache,
5266 struct btrfs_key *key,
5267 struct extent_buffer *eb, int slot)
5269 struct block_group_record *rec;
5272 rec = btrfs_new_block_group_record(eb, key, slot);
5273 ret = insert_block_group_record(block_group_cache, rec);
5275 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5276 rec->objectid, rec->offset);
5283 struct device_extent_record *
5284 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5285 struct btrfs_key *key, int slot)
5287 struct device_extent_record *rec;
5288 struct btrfs_dev_extent *ptr;
5290 rec = calloc(1, sizeof(*rec));
5292 fprintf(stderr, "memory allocation failed\n");
5296 rec->cache.objectid = key->objectid;
5297 rec->cache.start = key->offset;
5299 rec->generation = btrfs_header_generation(leaf);
5301 rec->objectid = key->objectid;
5302 rec->type = key->type;
5303 rec->offset = key->offset;
5305 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5306 rec->chunk_objecteid =
5307 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5309 btrfs_dev_extent_chunk_offset(leaf, ptr);
5310 rec->length = btrfs_dev_extent_length(leaf, ptr);
5311 rec->cache.size = rec->length;
5313 INIT_LIST_HEAD(&rec->chunk_list);
5314 INIT_LIST_HEAD(&rec->device_list);
5320 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5321 struct btrfs_key *key, struct extent_buffer *eb,
5324 struct device_extent_record *rec;
5327 rec = btrfs_new_device_extent_record(eb, key, slot);
5328 ret = insert_device_extent_record(dev_extent_cache, rec);
5331 "Device extent[%llu, %llu, %llu] existed.\n",
5332 rec->objectid, rec->offset, rec->length);
5339 static int process_extent_item(struct btrfs_root *root,
5340 struct cache_tree *extent_cache,
5341 struct extent_buffer *eb, int slot)
5343 struct btrfs_extent_item *ei;
5344 struct btrfs_extent_inline_ref *iref;
5345 struct btrfs_extent_data_ref *dref;
5346 struct btrfs_shared_data_ref *sref;
5347 struct btrfs_key key;
5348 struct extent_record tmpl;
5353 u32 item_size = btrfs_item_size_nr(eb, slot);
5359 btrfs_item_key_to_cpu(eb, &key, slot);
5361 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5363 num_bytes = root->nodesize;
5365 num_bytes = key.offset;
5368 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5369 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5370 key.objectid, root->sectorsize);
5373 if (item_size < sizeof(*ei)) {
5374 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5375 struct btrfs_extent_item_v0 *ei0;
5376 BUG_ON(item_size != sizeof(*ei0));
5377 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5378 refs = btrfs_extent_refs_v0(eb, ei0);
5382 memset(&tmpl, 0, sizeof(tmpl));
5383 tmpl.start = key.objectid;
5384 tmpl.nr = num_bytes;
5385 tmpl.extent_item_refs = refs;
5386 tmpl.metadata = metadata;
5388 tmpl.max_size = num_bytes;
5390 return add_extent_rec(extent_cache, &tmpl);
5393 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5394 refs = btrfs_extent_refs(eb, ei);
5395 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5399 if (metadata && num_bytes != root->nodesize) {
5400 error("ignore invalid metadata extent, length %llu does not equal to %u",
5401 num_bytes, root->nodesize);
5404 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5405 error("ignore invalid data extent, length %llu is not aligned to %u",
5406 num_bytes, root->sectorsize);
5410 memset(&tmpl, 0, sizeof(tmpl));
5411 tmpl.start = key.objectid;
5412 tmpl.nr = num_bytes;
5413 tmpl.extent_item_refs = refs;
5414 tmpl.metadata = metadata;
5416 tmpl.max_size = num_bytes;
5417 add_extent_rec(extent_cache, &tmpl);
5419 ptr = (unsigned long)(ei + 1);
5420 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5421 key.type == BTRFS_EXTENT_ITEM_KEY)
5422 ptr += sizeof(struct btrfs_tree_block_info);
5424 end = (unsigned long)ei + item_size;
5426 iref = (struct btrfs_extent_inline_ref *)ptr;
5427 type = btrfs_extent_inline_ref_type(eb, iref);
5428 offset = btrfs_extent_inline_ref_offset(eb, iref);
5430 case BTRFS_TREE_BLOCK_REF_KEY:
5431 ret = add_tree_backref(extent_cache, key.objectid,
5434 error("add_tree_backref failed: %s",
5437 case BTRFS_SHARED_BLOCK_REF_KEY:
5438 ret = add_tree_backref(extent_cache, key.objectid,
5441 error("add_tree_backref failed: %s",
5444 case BTRFS_EXTENT_DATA_REF_KEY:
5445 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5446 add_data_backref(extent_cache, key.objectid, 0,
5447 btrfs_extent_data_ref_root(eb, dref),
5448 btrfs_extent_data_ref_objectid(eb,
5450 btrfs_extent_data_ref_offset(eb, dref),
5451 btrfs_extent_data_ref_count(eb, dref),
5454 case BTRFS_SHARED_DATA_REF_KEY:
5455 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5456 add_data_backref(extent_cache, key.objectid, offset,
5458 btrfs_shared_data_ref_count(eb, sref),
5462 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5463 key.objectid, key.type, num_bytes);
5466 ptr += btrfs_extent_inline_ref_size(type);
5473 static int check_cache_range(struct btrfs_root *root,
5474 struct btrfs_block_group_cache *cache,
5475 u64 offset, u64 bytes)
5477 struct btrfs_free_space *entry;
5483 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5484 bytenr = btrfs_sb_offset(i);
5485 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5486 cache->key.objectid, bytenr, 0,
5487 &logical, &nr, &stripe_len);
5492 if (logical[nr] + stripe_len <= offset)
5494 if (offset + bytes <= logical[nr])
5496 if (logical[nr] == offset) {
5497 if (stripe_len >= bytes) {
5501 bytes -= stripe_len;
5502 offset += stripe_len;
5503 } else if (logical[nr] < offset) {
5504 if (logical[nr] + stripe_len >=
5509 bytes = (offset + bytes) -
5510 (logical[nr] + stripe_len);
5511 offset = logical[nr] + stripe_len;
5514 * Could be tricky, the super may land in the
5515 * middle of the area we're checking. First
5516 * check the easiest case, it's at the end.
5518 if (logical[nr] + stripe_len >=
5520 bytes = logical[nr] - offset;
5524 /* Check the left side */
5525 ret = check_cache_range(root, cache,
5527 logical[nr] - offset);
5533 /* Now we continue with the right side */
5534 bytes = (offset + bytes) -
5535 (logical[nr] + stripe_len);
5536 offset = logical[nr] + stripe_len;
5543 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5545 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5546 offset, offset+bytes);
5550 if (entry->offset != offset) {
5551 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5556 if (entry->bytes != bytes) {
5557 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5558 bytes, entry->bytes, offset);
5562 unlink_free_space(cache->free_space_ctl, entry);
5567 static int verify_space_cache(struct btrfs_root *root,
5568 struct btrfs_block_group_cache *cache)
5570 struct btrfs_path *path;
5571 struct extent_buffer *leaf;
5572 struct btrfs_key key;
5576 path = btrfs_alloc_path();
5580 root = root->fs_info->extent_root;
5582 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5584 key.objectid = last;
5586 key.type = BTRFS_EXTENT_ITEM_KEY;
5588 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5593 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5594 ret = btrfs_next_leaf(root, path);
5602 leaf = path->nodes[0];
5603 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5604 if (key.objectid >= cache->key.offset + cache->key.objectid)
5606 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5607 key.type != BTRFS_METADATA_ITEM_KEY) {
5612 if (last == key.objectid) {
5613 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5614 last = key.objectid + key.offset;
5616 last = key.objectid + root->nodesize;
5621 ret = check_cache_range(root, cache, last,
5622 key.objectid - last);
5625 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5626 last = key.objectid + key.offset;
5628 last = key.objectid + root->nodesize;
5632 if (last < cache->key.objectid + cache->key.offset)
5633 ret = check_cache_range(root, cache, last,
5634 cache->key.objectid +
5635 cache->key.offset - last);
5638 btrfs_free_path(path);
5641 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5642 fprintf(stderr, "There are still entries left in the space "
5650 static int check_space_cache(struct btrfs_root *root)
5652 struct btrfs_block_group_cache *cache;
5653 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5657 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5658 btrfs_super_generation(root->fs_info->super_copy) !=
5659 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5660 printf("cache and super generation don't match, space cache "
5661 "will be invalidated\n");
5665 if (ctx.progress_enabled) {
5666 ctx.tp = TASK_FREE_SPACE;
5667 task_start(ctx.info);
5671 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5675 start = cache->key.objectid + cache->key.offset;
5676 if (!cache->free_space_ctl) {
5677 if (btrfs_init_free_space_ctl(cache,
5678 root->sectorsize)) {
5683 btrfs_remove_free_space_cache(cache);
5686 if (btrfs_fs_compat_ro(root->fs_info,
5687 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5688 ret = exclude_super_stripes(root, cache);
5690 fprintf(stderr, "could not exclude super stripes: %s\n",
5695 ret = load_free_space_tree(root->fs_info, cache);
5696 free_excluded_extents(root, cache);
5698 fprintf(stderr, "could not load free space tree: %s\n",
5705 ret = load_free_space_cache(root->fs_info, cache);
5710 ret = verify_space_cache(root, cache);
5712 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5713 cache->key.objectid);
5718 task_stop(ctx.info);
5720 return error ? -EINVAL : 0;
5723 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5724 u64 num_bytes, unsigned long leaf_offset,
5725 struct extent_buffer *eb) {
5728 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5730 unsigned long csum_offset;
5734 u64 data_checked = 0;
5740 if (num_bytes % root->sectorsize)
5743 data = malloc(num_bytes);
5747 while (offset < num_bytes) {
5750 read_len = num_bytes - offset;
5751 /* read as much space once a time */
5752 ret = read_extent_data(root, data + offset,
5753 bytenr + offset, &read_len, mirror);
5757 /* verify every 4k data's checksum */
5758 while (data_checked < read_len) {
5760 tmp = offset + data_checked;
5762 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5763 csum, root->sectorsize);
5764 btrfs_csum_final(csum, (u8 *)&csum);
5766 csum_offset = leaf_offset +
5767 tmp / root->sectorsize * csum_size;
5768 read_extent_buffer(eb, (char *)&csum_expected,
5769 csum_offset, csum_size);
5770 /* try another mirror */
5771 if (csum != csum_expected) {
5772 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5773 mirror, bytenr + tmp,
5774 csum, csum_expected);
5775 num_copies = btrfs_num_copies(
5776 &root->fs_info->mapping_tree,
5778 if (mirror < num_copies - 1) {
5783 data_checked += root->sectorsize;
5792 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5795 struct btrfs_path *path;
5796 struct extent_buffer *leaf;
5797 struct btrfs_key key;
5800 path = btrfs_alloc_path();
5802 fprintf(stderr, "Error allocating path\n");
5806 key.objectid = bytenr;
5807 key.type = BTRFS_EXTENT_ITEM_KEY;
5808 key.offset = (u64)-1;
5811 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5814 fprintf(stderr, "Error looking up extent record %d\n", ret);
5815 btrfs_free_path(path);
5818 if (path->slots[0] > 0) {
5821 ret = btrfs_prev_leaf(root, path);
5824 } else if (ret > 0) {
5831 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5834 * Block group items come before extent items if they have the same
5835 * bytenr, so walk back one more just in case. Dear future traveller,
5836 * first congrats on mastering time travel. Now if it's not too much
5837 * trouble could you go back to 2006 and tell Chris to make the
5838 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5839 * EXTENT_ITEM_KEY please?
5841 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5842 if (path->slots[0] > 0) {
5845 ret = btrfs_prev_leaf(root, path);
5848 } else if (ret > 0) {
5853 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5857 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5858 ret = btrfs_next_leaf(root, path);
5860 fprintf(stderr, "Error going to next leaf "
5862 btrfs_free_path(path);
5868 leaf = path->nodes[0];
5869 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5870 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5874 if (key.objectid + key.offset < bytenr) {
5878 if (key.objectid > bytenr + num_bytes)
5881 if (key.objectid == bytenr) {
5882 if (key.offset >= num_bytes) {
5886 num_bytes -= key.offset;
5887 bytenr += key.offset;
5888 } else if (key.objectid < bytenr) {
5889 if (key.objectid + key.offset >= bytenr + num_bytes) {
5893 num_bytes = (bytenr + num_bytes) -
5894 (key.objectid + key.offset);
5895 bytenr = key.objectid + key.offset;
5897 if (key.objectid + key.offset < bytenr + num_bytes) {
5898 u64 new_start = key.objectid + key.offset;
5899 u64 new_bytes = bytenr + num_bytes - new_start;
5902 * Weird case, the extent is in the middle of
5903 * our range, we'll have to search one side
5904 * and then the other. Not sure if this happens
5905 * in real life, but no harm in coding it up
5906 * anyway just in case.
5908 btrfs_release_path(path);
5909 ret = check_extent_exists(root, new_start,
5912 fprintf(stderr, "Right section didn't "
5916 num_bytes = key.objectid - bytenr;
5919 num_bytes = key.objectid - bytenr;
5926 if (num_bytes && !ret) {
5927 fprintf(stderr, "There are no extents for csum range "
5928 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5932 btrfs_free_path(path);
5936 static int check_csums(struct btrfs_root *root)
5938 struct btrfs_path *path;
5939 struct extent_buffer *leaf;
5940 struct btrfs_key key;
5941 u64 offset = 0, num_bytes = 0;
5942 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5946 unsigned long leaf_offset;
5948 root = root->fs_info->csum_root;
5949 if (!extent_buffer_uptodate(root->node)) {
5950 fprintf(stderr, "No valid csum tree found\n");
5954 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5955 key.type = BTRFS_EXTENT_CSUM_KEY;
5958 path = btrfs_alloc_path();
5962 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5964 fprintf(stderr, "Error searching csum tree %d\n", ret);
5965 btrfs_free_path(path);
5969 if (ret > 0 && path->slots[0])
5974 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5975 ret = btrfs_next_leaf(root, path);
5977 fprintf(stderr, "Error going to next leaf "
5984 leaf = path->nodes[0];
5986 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5987 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5992 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5993 csum_size) * root->sectorsize;
5994 if (!check_data_csum)
5995 goto skip_csum_check;
5996 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5997 ret = check_extent_csums(root, key.offset, data_len,
6003 offset = key.offset;
6004 } else if (key.offset != offset + num_bytes) {
6005 ret = check_extent_exists(root, offset, num_bytes);
6007 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6008 "there is no extent record\n",
6009 offset, offset+num_bytes);
6012 offset = key.offset;
6015 num_bytes += data_len;
6019 btrfs_free_path(path);
6023 static int is_dropped_key(struct btrfs_key *key,
6024 struct btrfs_key *drop_key) {
6025 if (key->objectid < drop_key->objectid)
6027 else if (key->objectid == drop_key->objectid) {
6028 if (key->type < drop_key->type)
6030 else if (key->type == drop_key->type) {
6031 if (key->offset < drop_key->offset)
6039 * Here are the rules for FULL_BACKREF.
6041 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6042 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6044 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6045 * if it happened after the relocation occurred since we'll have dropped the
6046 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6047 * have no real way to know for sure.
6049 * We process the blocks one root at a time, and we start from the lowest root
6050 * objectid and go to the highest. So we can just lookup the owner backref for
6051 * the record and if we don't find it then we know it doesn't exist and we have
6054 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6055 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6056 * be set or not and then we can check later once we've gathered all the refs.
6058 static int calc_extent_flag(struct btrfs_root *root,
6059 struct cache_tree *extent_cache,
6060 struct extent_buffer *buf,
6061 struct root_item_record *ri,
6064 struct extent_record *rec;
6065 struct cache_extent *cache;
6066 struct tree_backref *tback;
6069 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6070 /* we have added this extent before */
6074 rec = container_of(cache, struct extent_record, cache);
6077 * Except file/reloc tree, we can not have
6080 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6085 if (buf->start == ri->bytenr)
6088 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6091 owner = btrfs_header_owner(buf);
6092 if (owner == ri->objectid)
6095 tback = find_tree_backref(rec, 0, owner);
6100 if (rec->flag_block_full_backref != FLAG_UNSET &&
6101 rec->flag_block_full_backref != 0)
6102 rec->bad_full_backref = 1;
6105 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6106 if (rec->flag_block_full_backref != FLAG_UNSET &&
6107 rec->flag_block_full_backref != 1)
6108 rec->bad_full_backref = 1;
6112 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6114 fprintf(stderr, "Invalid key type(");
6115 print_key_type(stderr, 0, key_type);
6116 fprintf(stderr, ") found in root(");
6117 print_objectid(stderr, rootid, 0);
6118 fprintf(stderr, ")\n");
6122 * Check if the key is valid with its extent buffer.
6124 * This is a early check in case invalid key exists in a extent buffer
6125 * This is not comprehensive yet, but should prevent wrong key/item passed
6128 static int check_type_with_root(u64 rootid, u8 key_type)
6131 /* Only valid in chunk tree */
6132 case BTRFS_DEV_ITEM_KEY:
6133 case BTRFS_CHUNK_ITEM_KEY:
6134 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6137 /* valid in csum and log tree */
6138 case BTRFS_CSUM_TREE_OBJECTID:
6139 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6143 case BTRFS_EXTENT_ITEM_KEY:
6144 case BTRFS_METADATA_ITEM_KEY:
6145 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6146 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6149 case BTRFS_ROOT_ITEM_KEY:
6150 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6153 case BTRFS_DEV_EXTENT_KEY:
6154 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6160 report_mismatch_key_root(key_type, rootid);
6164 static int run_next_block(struct btrfs_root *root,
6165 struct block_info *bits,
6168 struct cache_tree *pending,
6169 struct cache_tree *seen,
6170 struct cache_tree *reada,
6171 struct cache_tree *nodes,
6172 struct cache_tree *extent_cache,
6173 struct cache_tree *chunk_cache,
6174 struct rb_root *dev_cache,
6175 struct block_group_tree *block_group_cache,
6176 struct device_extent_tree *dev_extent_cache,
6177 struct root_item_record *ri)
6179 struct extent_buffer *buf;
6180 struct extent_record *rec = NULL;
6191 struct btrfs_key key;
6192 struct cache_extent *cache;
6195 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6196 bits_nr, &reada_bits);
6201 for(i = 0; i < nritems; i++) {
6202 ret = add_cache_extent(reada, bits[i].start,
6207 /* fixme, get the parent transid */
6208 readahead_tree_block(root, bits[i].start,
6212 *last = bits[0].start;
6213 bytenr = bits[0].start;
6214 size = bits[0].size;
6216 cache = lookup_cache_extent(pending, bytenr, size);
6218 remove_cache_extent(pending, cache);
6221 cache = lookup_cache_extent(reada, bytenr, size);
6223 remove_cache_extent(reada, cache);
6226 cache = lookup_cache_extent(nodes, bytenr, size);
6228 remove_cache_extent(nodes, cache);
6231 cache = lookup_cache_extent(extent_cache, bytenr, size);
6233 rec = container_of(cache, struct extent_record, cache);
6234 gen = rec->parent_generation;
6237 /* fixme, get the real parent transid */
6238 buf = read_tree_block(root, bytenr, size, gen);
6239 if (!extent_buffer_uptodate(buf)) {
6240 record_bad_block_io(root->fs_info,
6241 extent_cache, bytenr, size);
6245 nritems = btrfs_header_nritems(buf);
6248 if (!init_extent_tree) {
6249 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6250 btrfs_header_level(buf), 1, NULL,
6253 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6255 fprintf(stderr, "Couldn't calc extent flags\n");
6256 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6261 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6263 fprintf(stderr, "Couldn't calc extent flags\n");
6264 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6268 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6270 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6271 ri->objectid == btrfs_header_owner(buf)) {
6273 * Ok we got to this block from it's original owner and
6274 * we have FULL_BACKREF set. Relocation can leave
6275 * converted blocks over so this is altogether possible,
6276 * however it's not possible if the generation > the
6277 * last snapshot, so check for this case.
6279 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6280 btrfs_header_generation(buf) > ri->last_snapshot) {
6281 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6282 rec->bad_full_backref = 1;
6287 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6288 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6289 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6290 rec->bad_full_backref = 1;
6294 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6295 rec->flag_block_full_backref = 1;
6299 rec->flag_block_full_backref = 0;
6301 owner = btrfs_header_owner(buf);
6304 ret = check_block(root, extent_cache, buf, flags);
6308 if (btrfs_is_leaf(buf)) {
6309 btree_space_waste += btrfs_leaf_free_space(root, buf);
6310 for (i = 0; i < nritems; i++) {
6311 struct btrfs_file_extent_item *fi;
6312 btrfs_item_key_to_cpu(buf, &key, i);
6314 * Check key type against the leaf owner.
6315 * Could filter quite a lot of early error if
6318 if (check_type_with_root(btrfs_header_owner(buf),
6320 fprintf(stderr, "ignoring invalid key\n");
6323 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6324 process_extent_item(root, extent_cache, buf,
6328 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6329 process_extent_item(root, extent_cache, buf,
6333 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6335 btrfs_item_size_nr(buf, i);
6338 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6339 process_chunk_item(chunk_cache, &key, buf, i);
6342 if (key.type == BTRFS_DEV_ITEM_KEY) {
6343 process_device_item(dev_cache, &key, buf, i);
6346 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6347 process_block_group_item(block_group_cache,
6351 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6352 process_device_extent_item(dev_extent_cache,
6357 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6358 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6359 process_extent_ref_v0(extent_cache, buf, i);
6366 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6367 ret = add_tree_backref(extent_cache,
6368 key.objectid, 0, key.offset, 0);
6370 error("add_tree_backref failed: %s",
6374 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6375 ret = add_tree_backref(extent_cache,
6376 key.objectid, key.offset, 0, 0);
6378 error("add_tree_backref failed: %s",
6382 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6383 struct btrfs_extent_data_ref *ref;
6384 ref = btrfs_item_ptr(buf, i,
6385 struct btrfs_extent_data_ref);
6386 add_data_backref(extent_cache,
6388 btrfs_extent_data_ref_root(buf, ref),
6389 btrfs_extent_data_ref_objectid(buf,
6391 btrfs_extent_data_ref_offset(buf, ref),
6392 btrfs_extent_data_ref_count(buf, ref),
6393 0, root->sectorsize);
6396 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6397 struct btrfs_shared_data_ref *ref;
6398 ref = btrfs_item_ptr(buf, i,
6399 struct btrfs_shared_data_ref);
6400 add_data_backref(extent_cache,
6401 key.objectid, key.offset, 0, 0, 0,
6402 btrfs_shared_data_ref_count(buf, ref),
6403 0, root->sectorsize);
6406 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6407 struct bad_item *bad;
6409 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6413 bad = malloc(sizeof(struct bad_item));
6416 INIT_LIST_HEAD(&bad->list);
6417 memcpy(&bad->key, &key,
6418 sizeof(struct btrfs_key));
6419 bad->root_id = owner;
6420 list_add_tail(&bad->list, &delete_items);
6423 if (key.type != BTRFS_EXTENT_DATA_KEY)
6425 fi = btrfs_item_ptr(buf, i,
6426 struct btrfs_file_extent_item);
6427 if (btrfs_file_extent_type(buf, fi) ==
6428 BTRFS_FILE_EXTENT_INLINE)
6430 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6433 data_bytes_allocated +=
6434 btrfs_file_extent_disk_num_bytes(buf, fi);
6435 if (data_bytes_allocated < root->sectorsize) {
6438 data_bytes_referenced +=
6439 btrfs_file_extent_num_bytes(buf, fi);
6440 add_data_backref(extent_cache,
6441 btrfs_file_extent_disk_bytenr(buf, fi),
6442 parent, owner, key.objectid, key.offset -
6443 btrfs_file_extent_offset(buf, fi), 1, 1,
6444 btrfs_file_extent_disk_num_bytes(buf, fi));
6448 struct btrfs_key first_key;
6450 first_key.objectid = 0;
6453 btrfs_item_key_to_cpu(buf, &first_key, 0);
6454 level = btrfs_header_level(buf);
6455 for (i = 0; i < nritems; i++) {
6456 struct extent_record tmpl;
6458 ptr = btrfs_node_blockptr(buf, i);
6459 size = root->nodesize;
6460 btrfs_node_key_to_cpu(buf, &key, i);
6462 if ((level == ri->drop_level)
6463 && is_dropped_key(&key, &ri->drop_key)) {
6468 memset(&tmpl, 0, sizeof(tmpl));
6469 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6470 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6475 tmpl.max_size = size;
6476 ret = add_extent_rec(extent_cache, &tmpl);
6480 ret = add_tree_backref(extent_cache, ptr, parent,
6483 error("add_tree_backref failed: %s",
6489 add_pending(nodes, seen, ptr, size);
6491 add_pending(pending, seen, ptr, size);
6494 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6495 nritems) * sizeof(struct btrfs_key_ptr);
6497 total_btree_bytes += buf->len;
6498 if (fs_root_objectid(btrfs_header_owner(buf)))
6499 total_fs_tree_bytes += buf->len;
6500 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6501 total_extent_tree_bytes += buf->len;
6502 if (!found_old_backref &&
6503 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6504 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6505 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6506 found_old_backref = 1;
6508 free_extent_buffer(buf);
6512 static int add_root_to_pending(struct extent_buffer *buf,
6513 struct cache_tree *extent_cache,
6514 struct cache_tree *pending,
6515 struct cache_tree *seen,
6516 struct cache_tree *nodes,
6519 struct extent_record tmpl;
6522 if (btrfs_header_level(buf) > 0)
6523 add_pending(nodes, seen, buf->start, buf->len);
6525 add_pending(pending, seen, buf->start, buf->len);
6527 memset(&tmpl, 0, sizeof(tmpl));
6528 tmpl.start = buf->start;
6533 tmpl.max_size = buf->len;
6534 add_extent_rec(extent_cache, &tmpl);
6536 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6537 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6538 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6541 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6546 /* as we fix the tree, we might be deleting blocks that
6547 * we're tracking for repair. This hook makes sure we
6548 * remove any backrefs for blocks as we are fixing them.
6550 static int free_extent_hook(struct btrfs_trans_handle *trans,
6551 struct btrfs_root *root,
6552 u64 bytenr, u64 num_bytes, u64 parent,
6553 u64 root_objectid, u64 owner, u64 offset,
6556 struct extent_record *rec;
6557 struct cache_extent *cache;
6559 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6561 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6562 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6566 rec = container_of(cache, struct extent_record, cache);
6568 struct data_backref *back;
6569 back = find_data_backref(rec, parent, root_objectid, owner,
6570 offset, 1, bytenr, num_bytes);
6573 if (back->node.found_ref) {
6574 back->found_ref -= refs_to_drop;
6576 rec->refs -= refs_to_drop;
6578 if (back->node.found_extent_tree) {
6579 back->num_refs -= refs_to_drop;
6580 if (rec->extent_item_refs)
6581 rec->extent_item_refs -= refs_to_drop;
6583 if (back->found_ref == 0)
6584 back->node.found_ref = 0;
6585 if (back->num_refs == 0)
6586 back->node.found_extent_tree = 0;
6588 if (!back->node.found_extent_tree && back->node.found_ref) {
6589 list_del(&back->node.list);
6593 struct tree_backref *back;
6594 back = find_tree_backref(rec, parent, root_objectid);
6597 if (back->node.found_ref) {
6600 back->node.found_ref = 0;
6602 if (back->node.found_extent_tree) {
6603 if (rec->extent_item_refs)
6604 rec->extent_item_refs--;
6605 back->node.found_extent_tree = 0;
6607 if (!back->node.found_extent_tree && back->node.found_ref) {
6608 list_del(&back->node.list);
6612 maybe_free_extent_rec(extent_cache, rec);
6617 static int delete_extent_records(struct btrfs_trans_handle *trans,
6618 struct btrfs_root *root,
6619 struct btrfs_path *path,
6620 u64 bytenr, u64 new_len)
6622 struct btrfs_key key;
6623 struct btrfs_key found_key;
6624 struct extent_buffer *leaf;
6629 key.objectid = bytenr;
6631 key.offset = (u64)-1;
6634 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6641 if (path->slots[0] == 0)
6647 leaf = path->nodes[0];
6648 slot = path->slots[0];
6650 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6651 if (found_key.objectid != bytenr)
6654 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6655 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6656 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6657 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6658 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6659 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6660 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6661 btrfs_release_path(path);
6662 if (found_key.type == 0) {
6663 if (found_key.offset == 0)
6665 key.offset = found_key.offset - 1;
6666 key.type = found_key.type;
6668 key.type = found_key.type - 1;
6669 key.offset = (u64)-1;
6673 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6674 found_key.objectid, found_key.type, found_key.offset);
6676 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6679 btrfs_release_path(path);
6681 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6682 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6683 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6684 found_key.offset : root->nodesize;
6686 ret = btrfs_update_block_group(trans, root, bytenr,
6693 btrfs_release_path(path);
6698 * for a single backref, this will allocate a new extent
6699 * and add the backref to it.
6701 static int record_extent(struct btrfs_trans_handle *trans,
6702 struct btrfs_fs_info *info,
6703 struct btrfs_path *path,
6704 struct extent_record *rec,
6705 struct extent_backref *back,
6706 int allocated, u64 flags)
6709 struct btrfs_root *extent_root = info->extent_root;
6710 struct extent_buffer *leaf;
6711 struct btrfs_key ins_key;
6712 struct btrfs_extent_item *ei;
6713 struct tree_backref *tback;
6714 struct data_backref *dback;
6715 struct btrfs_tree_block_info *bi;
6718 rec->max_size = max_t(u64, rec->max_size,
6719 info->extent_root->nodesize);
6722 u32 item_size = sizeof(*ei);
6725 item_size += sizeof(*bi);
6727 ins_key.objectid = rec->start;
6728 ins_key.offset = rec->max_size;
6729 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6731 ret = btrfs_insert_empty_item(trans, extent_root, path,
6732 &ins_key, item_size);
6736 leaf = path->nodes[0];
6737 ei = btrfs_item_ptr(leaf, path->slots[0],
6738 struct btrfs_extent_item);
6740 btrfs_set_extent_refs(leaf, ei, 0);
6741 btrfs_set_extent_generation(leaf, ei, rec->generation);
6743 if (back->is_data) {
6744 btrfs_set_extent_flags(leaf, ei,
6745 BTRFS_EXTENT_FLAG_DATA);
6747 struct btrfs_disk_key copy_key;;
6749 tback = to_tree_backref(back);
6750 bi = (struct btrfs_tree_block_info *)(ei + 1);
6751 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6754 btrfs_set_disk_key_objectid(©_key,
6755 rec->info_objectid);
6756 btrfs_set_disk_key_type(©_key, 0);
6757 btrfs_set_disk_key_offset(©_key, 0);
6759 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6760 btrfs_set_tree_block_key(leaf, bi, ©_key);
6762 btrfs_set_extent_flags(leaf, ei,
6763 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6766 btrfs_mark_buffer_dirty(leaf);
6767 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6768 rec->max_size, 1, 0);
6771 btrfs_release_path(path);
6774 if (back->is_data) {
6778 dback = to_data_backref(back);
6779 if (back->full_backref)
6780 parent = dback->parent;
6784 for (i = 0; i < dback->found_ref; i++) {
6785 /* if parent != 0, we're doing a full backref
6786 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6787 * just makes the backref allocator create a data
6790 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6791 rec->start, rec->max_size,
6795 BTRFS_FIRST_FREE_OBJECTID :
6801 fprintf(stderr, "adding new data backref"
6802 " on %llu %s %llu owner %llu"
6803 " offset %llu found %d\n",
6804 (unsigned long long)rec->start,
6805 back->full_backref ?
6807 back->full_backref ?
6808 (unsigned long long)parent :
6809 (unsigned long long)dback->root,
6810 (unsigned long long)dback->owner,
6811 (unsigned long long)dback->offset,
6816 tback = to_tree_backref(back);
6817 if (back->full_backref)
6818 parent = tback->parent;
6822 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6823 rec->start, rec->max_size,
6824 parent, tback->root, 0, 0);
6825 fprintf(stderr, "adding new tree backref on "
6826 "start %llu len %llu parent %llu root %llu\n",
6827 rec->start, rec->max_size, parent, tback->root);
6830 btrfs_release_path(path);
6834 static struct extent_entry *find_entry(struct list_head *entries,
6835 u64 bytenr, u64 bytes)
6837 struct extent_entry *entry = NULL;
6839 list_for_each_entry(entry, entries, list) {
6840 if (entry->bytenr == bytenr && entry->bytes == bytes)
6847 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6849 struct extent_entry *entry, *best = NULL, *prev = NULL;
6851 list_for_each_entry(entry, entries, list) {
6858 * If there are as many broken entries as entries then we know
6859 * not to trust this particular entry.
6861 if (entry->broken == entry->count)
6865 * If our current entry == best then we can't be sure our best
6866 * is really the best, so we need to keep searching.
6868 if (best && best->count == entry->count) {
6874 /* Prev == entry, not good enough, have to keep searching */
6875 if (!prev->broken && prev->count == entry->count)
6879 best = (prev->count > entry->count) ? prev : entry;
6880 else if (best->count < entry->count)
6888 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6889 struct data_backref *dback, struct extent_entry *entry)
6891 struct btrfs_trans_handle *trans;
6892 struct btrfs_root *root;
6893 struct btrfs_file_extent_item *fi;
6894 struct extent_buffer *leaf;
6895 struct btrfs_key key;
6899 key.objectid = dback->root;
6900 key.type = BTRFS_ROOT_ITEM_KEY;
6901 key.offset = (u64)-1;
6902 root = btrfs_read_fs_root(info, &key);
6904 fprintf(stderr, "Couldn't find root for our ref\n");
6909 * The backref points to the original offset of the extent if it was
6910 * split, so we need to search down to the offset we have and then walk
6911 * forward until we find the backref we're looking for.
6913 key.objectid = dback->owner;
6914 key.type = BTRFS_EXTENT_DATA_KEY;
6915 key.offset = dback->offset;
6916 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6918 fprintf(stderr, "Error looking up ref %d\n", ret);
6923 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6924 ret = btrfs_next_leaf(root, path);
6926 fprintf(stderr, "Couldn't find our ref, next\n");
6930 leaf = path->nodes[0];
6931 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6932 if (key.objectid != dback->owner ||
6933 key.type != BTRFS_EXTENT_DATA_KEY) {
6934 fprintf(stderr, "Couldn't find our ref, search\n");
6937 fi = btrfs_item_ptr(leaf, path->slots[0],
6938 struct btrfs_file_extent_item);
6939 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6940 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6942 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6947 btrfs_release_path(path);
6949 trans = btrfs_start_transaction(root, 1);
6951 return PTR_ERR(trans);
6954 * Ok we have the key of the file extent we want to fix, now we can cow
6955 * down to the thing and fix it.
6957 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6959 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6960 key.objectid, key.type, key.offset, ret);
6964 fprintf(stderr, "Well that's odd, we just found this key "
6965 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6970 leaf = path->nodes[0];
6971 fi = btrfs_item_ptr(leaf, path->slots[0],
6972 struct btrfs_file_extent_item);
6974 if (btrfs_file_extent_compression(leaf, fi) &&
6975 dback->disk_bytenr != entry->bytenr) {
6976 fprintf(stderr, "Ref doesn't match the record start and is "
6977 "compressed, please take a btrfs-image of this file "
6978 "system and send it to a btrfs developer so they can "
6979 "complete this functionality for bytenr %Lu\n",
6980 dback->disk_bytenr);
6985 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6986 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6987 } else if (dback->disk_bytenr > entry->bytenr) {
6988 u64 off_diff, offset;
6990 off_diff = dback->disk_bytenr - entry->bytenr;
6991 offset = btrfs_file_extent_offset(leaf, fi);
6992 if (dback->disk_bytenr + offset +
6993 btrfs_file_extent_num_bytes(leaf, fi) >
6994 entry->bytenr + entry->bytes) {
6995 fprintf(stderr, "Ref is past the entry end, please "
6996 "take a btrfs-image of this file system and "
6997 "send it to a btrfs developer, ref %Lu\n",
6998 dback->disk_bytenr);
7003 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7004 btrfs_set_file_extent_offset(leaf, fi, offset);
7005 } else if (dback->disk_bytenr < entry->bytenr) {
7008 offset = btrfs_file_extent_offset(leaf, fi);
7009 if (dback->disk_bytenr + offset < entry->bytenr) {
7010 fprintf(stderr, "Ref is before the entry start, please"
7011 " take a btrfs-image of this file system and "
7012 "send it to a btrfs developer, ref %Lu\n",
7013 dback->disk_bytenr);
7018 offset += dback->disk_bytenr;
7019 offset -= entry->bytenr;
7020 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7021 btrfs_set_file_extent_offset(leaf, fi, offset);
7024 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7027 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7028 * only do this if we aren't using compression, otherwise it's a
7031 if (!btrfs_file_extent_compression(leaf, fi))
7032 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7034 printf("ram bytes may be wrong?\n");
7035 btrfs_mark_buffer_dirty(leaf);
7037 err = btrfs_commit_transaction(trans, root);
7038 btrfs_release_path(path);
7039 return ret ? ret : err;
7042 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7043 struct extent_record *rec)
7045 struct extent_backref *back;
7046 struct data_backref *dback;
7047 struct extent_entry *entry, *best = NULL;
7050 int broken_entries = 0;
7055 * Metadata is easy and the backrefs should always agree on bytenr and
7056 * size, if not we've got bigger issues.
7061 list_for_each_entry(back, &rec->backrefs, list) {
7062 if (back->full_backref || !back->is_data)
7065 dback = to_data_backref(back);
7068 * We only pay attention to backrefs that we found a real
7071 if (dback->found_ref == 0)
7075 * For now we only catch when the bytes don't match, not the
7076 * bytenr. We can easily do this at the same time, but I want
7077 * to have a fs image to test on before we just add repair
7078 * functionality willy-nilly so we know we won't screw up the
7082 entry = find_entry(&entries, dback->disk_bytenr,
7085 entry = malloc(sizeof(struct extent_entry));
7090 memset(entry, 0, sizeof(*entry));
7091 entry->bytenr = dback->disk_bytenr;
7092 entry->bytes = dback->bytes;
7093 list_add_tail(&entry->list, &entries);
7098 * If we only have on entry we may think the entries agree when
7099 * in reality they don't so we have to do some extra checking.
7101 if (dback->disk_bytenr != rec->start ||
7102 dback->bytes != rec->nr || back->broken)
7113 /* Yay all the backrefs agree, carry on good sir */
7114 if (nr_entries <= 1 && !mismatch)
7117 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7118 "%Lu\n", rec->start);
7121 * First we want to see if the backrefs can agree amongst themselves who
7122 * is right, so figure out which one of the entries has the highest
7125 best = find_most_right_entry(&entries);
7128 * Ok so we may have an even split between what the backrefs think, so
7129 * this is where we use the extent ref to see what it thinks.
7132 entry = find_entry(&entries, rec->start, rec->nr);
7133 if (!entry && (!broken_entries || !rec->found_rec)) {
7134 fprintf(stderr, "Backrefs don't agree with each other "
7135 "and extent record doesn't agree with anybody,"
7136 " so we can't fix bytenr %Lu bytes %Lu\n",
7137 rec->start, rec->nr);
7140 } else if (!entry) {
7142 * Ok our backrefs were broken, we'll assume this is the
7143 * correct value and add an entry for this range.
7145 entry = malloc(sizeof(struct extent_entry));
7150 memset(entry, 0, sizeof(*entry));
7151 entry->bytenr = rec->start;
7152 entry->bytes = rec->nr;
7153 list_add_tail(&entry->list, &entries);
7157 best = find_most_right_entry(&entries);
7159 fprintf(stderr, "Backrefs and extent record evenly "
7160 "split on who is right, this is going to "
7161 "require user input to fix bytenr %Lu bytes "
7162 "%Lu\n", rec->start, rec->nr);
7169 * I don't think this can happen currently as we'll abort() if we catch
7170 * this case higher up, but in case somebody removes that we still can't
7171 * deal with it properly here yet, so just bail out of that's the case.
7173 if (best->bytenr != rec->start) {
7174 fprintf(stderr, "Extent start and backref starts don't match, "
7175 "please use btrfs-image on this file system and send "
7176 "it to a btrfs developer so they can make fsck fix "
7177 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7178 rec->start, rec->nr);
7184 * Ok great we all agreed on an extent record, let's go find the real
7185 * references and fix up the ones that don't match.
7187 list_for_each_entry(back, &rec->backrefs, list) {
7188 if (back->full_backref || !back->is_data)
7191 dback = to_data_backref(back);
7194 * Still ignoring backrefs that don't have a real ref attached
7197 if (dback->found_ref == 0)
7200 if (dback->bytes == best->bytes &&
7201 dback->disk_bytenr == best->bytenr)
7204 ret = repair_ref(info, path, dback, best);
7210 * Ok we messed with the actual refs, which means we need to drop our
7211 * entire cache and go back and rescan. I know this is a huge pain and
7212 * adds a lot of extra work, but it's the only way to be safe. Once all
7213 * the backrefs agree we may not need to do anything to the extent
7218 while (!list_empty(&entries)) {
7219 entry = list_entry(entries.next, struct extent_entry, list);
7220 list_del_init(&entry->list);
7226 static int process_duplicates(struct btrfs_root *root,
7227 struct cache_tree *extent_cache,
7228 struct extent_record *rec)
7230 struct extent_record *good, *tmp;
7231 struct cache_extent *cache;
7235 * If we found a extent record for this extent then return, or if we
7236 * have more than one duplicate we are likely going to need to delete
7239 if (rec->found_rec || rec->num_duplicates > 1)
7242 /* Shouldn't happen but just in case */
7243 BUG_ON(!rec->num_duplicates);
7246 * So this happens if we end up with a backref that doesn't match the
7247 * actual extent entry. So either the backref is bad or the extent
7248 * entry is bad. Either way we want to have the extent_record actually
7249 * reflect what we found in the extent_tree, so we need to take the
7250 * duplicate out and use that as the extent_record since the only way we
7251 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7253 remove_cache_extent(extent_cache, &rec->cache);
7255 good = to_extent_record(rec->dups.next);
7256 list_del_init(&good->list);
7257 INIT_LIST_HEAD(&good->backrefs);
7258 INIT_LIST_HEAD(&good->dups);
7259 good->cache.start = good->start;
7260 good->cache.size = good->nr;
7261 good->content_checked = 0;
7262 good->owner_ref_checked = 0;
7263 good->num_duplicates = 0;
7264 good->refs = rec->refs;
7265 list_splice_init(&rec->backrefs, &good->backrefs);
7267 cache = lookup_cache_extent(extent_cache, good->start,
7271 tmp = container_of(cache, struct extent_record, cache);
7274 * If we find another overlapping extent and it's found_rec is
7275 * set then it's a duplicate and we need to try and delete
7278 if (tmp->found_rec || tmp->num_duplicates > 0) {
7279 if (list_empty(&good->list))
7280 list_add_tail(&good->list,
7281 &duplicate_extents);
7282 good->num_duplicates += tmp->num_duplicates + 1;
7283 list_splice_init(&tmp->dups, &good->dups);
7284 list_del_init(&tmp->list);
7285 list_add_tail(&tmp->list, &good->dups);
7286 remove_cache_extent(extent_cache, &tmp->cache);
7291 * Ok we have another non extent item backed extent rec, so lets
7292 * just add it to this extent and carry on like we did above.
7294 good->refs += tmp->refs;
7295 list_splice_init(&tmp->backrefs, &good->backrefs);
7296 remove_cache_extent(extent_cache, &tmp->cache);
7299 ret = insert_cache_extent(extent_cache, &good->cache);
7302 return good->num_duplicates ? 0 : 1;
7305 static int delete_duplicate_records(struct btrfs_root *root,
7306 struct extent_record *rec)
7308 struct btrfs_trans_handle *trans;
7309 LIST_HEAD(delete_list);
7310 struct btrfs_path *path;
7311 struct extent_record *tmp, *good, *n;
7314 struct btrfs_key key;
7316 path = btrfs_alloc_path();
7323 /* Find the record that covers all of the duplicates. */
7324 list_for_each_entry(tmp, &rec->dups, list) {
7325 if (good->start < tmp->start)
7327 if (good->nr > tmp->nr)
7330 if (tmp->start + tmp->nr < good->start + good->nr) {
7331 fprintf(stderr, "Ok we have overlapping extents that "
7332 "aren't completely covered by each other, this "
7333 "is going to require more careful thought. "
7334 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7335 tmp->start, tmp->nr, good->start, good->nr);
7342 list_add_tail(&rec->list, &delete_list);
7344 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7347 list_move_tail(&tmp->list, &delete_list);
7350 root = root->fs_info->extent_root;
7351 trans = btrfs_start_transaction(root, 1);
7352 if (IS_ERR(trans)) {
7353 ret = PTR_ERR(trans);
7357 list_for_each_entry(tmp, &delete_list, list) {
7358 if (tmp->found_rec == 0)
7360 key.objectid = tmp->start;
7361 key.type = BTRFS_EXTENT_ITEM_KEY;
7362 key.offset = tmp->nr;
7364 /* Shouldn't happen but just in case */
7365 if (tmp->metadata) {
7366 fprintf(stderr, "Well this shouldn't happen, extent "
7367 "record overlaps but is metadata? "
7368 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7372 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7378 ret = btrfs_del_item(trans, root, path);
7381 btrfs_release_path(path);
7384 err = btrfs_commit_transaction(trans, root);
7388 while (!list_empty(&delete_list)) {
7389 tmp = to_extent_record(delete_list.next);
7390 list_del_init(&tmp->list);
7396 while (!list_empty(&rec->dups)) {
7397 tmp = to_extent_record(rec->dups.next);
7398 list_del_init(&tmp->list);
7402 btrfs_free_path(path);
7404 if (!ret && !nr_del)
7405 rec->num_duplicates = 0;
7407 return ret ? ret : nr_del;
7410 static int find_possible_backrefs(struct btrfs_fs_info *info,
7411 struct btrfs_path *path,
7412 struct cache_tree *extent_cache,
7413 struct extent_record *rec)
7415 struct btrfs_root *root;
7416 struct extent_backref *back;
7417 struct data_backref *dback;
7418 struct cache_extent *cache;
7419 struct btrfs_file_extent_item *fi;
7420 struct btrfs_key key;
7424 list_for_each_entry(back, &rec->backrefs, list) {
7425 /* Don't care about full backrefs (poor unloved backrefs) */
7426 if (back->full_backref || !back->is_data)
7429 dback = to_data_backref(back);
7431 /* We found this one, we don't need to do a lookup */
7432 if (dback->found_ref)
7435 key.objectid = dback->root;
7436 key.type = BTRFS_ROOT_ITEM_KEY;
7437 key.offset = (u64)-1;
7439 root = btrfs_read_fs_root(info, &key);
7441 /* No root, definitely a bad ref, skip */
7442 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7444 /* Other err, exit */
7446 return PTR_ERR(root);
7448 key.objectid = dback->owner;
7449 key.type = BTRFS_EXTENT_DATA_KEY;
7450 key.offset = dback->offset;
7451 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7453 btrfs_release_path(path);
7456 /* Didn't find it, we can carry on */
7461 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7462 struct btrfs_file_extent_item);
7463 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7464 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7465 btrfs_release_path(path);
7466 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7468 struct extent_record *tmp;
7469 tmp = container_of(cache, struct extent_record, cache);
7472 * If we found an extent record for the bytenr for this
7473 * particular backref then we can't add it to our
7474 * current extent record. We only want to add backrefs
7475 * that don't have a corresponding extent item in the
7476 * extent tree since they likely belong to this record
7477 * and we need to fix it if it doesn't match bytenrs.
7483 dback->found_ref += 1;
7484 dback->disk_bytenr = bytenr;
7485 dback->bytes = bytes;
7488 * Set this so the verify backref code knows not to trust the
7489 * values in this backref.
7498 * Record orphan data ref into corresponding root.
7500 * Return 0 if the extent item contains data ref and recorded.
7501 * Return 1 if the extent item contains no useful data ref
7502 * On that case, it may contains only shared_dataref or metadata backref
7503 * or the file extent exists(this should be handled by the extent bytenr
7505 * Return <0 if something goes wrong.
7507 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7508 struct extent_record *rec)
7510 struct btrfs_key key;
7511 struct btrfs_root *dest_root;
7512 struct extent_backref *back;
7513 struct data_backref *dback;
7514 struct orphan_data_extent *orphan;
7515 struct btrfs_path *path;
7516 int recorded_data_ref = 0;
7521 path = btrfs_alloc_path();
7524 list_for_each_entry(back, &rec->backrefs, list) {
7525 if (back->full_backref || !back->is_data ||
7526 !back->found_extent_tree)
7528 dback = to_data_backref(back);
7529 if (dback->found_ref)
7531 key.objectid = dback->root;
7532 key.type = BTRFS_ROOT_ITEM_KEY;
7533 key.offset = (u64)-1;
7535 dest_root = btrfs_read_fs_root(fs_info, &key);
7537 /* For non-exist root we just skip it */
7538 if (IS_ERR(dest_root) || !dest_root)
7541 key.objectid = dback->owner;
7542 key.type = BTRFS_EXTENT_DATA_KEY;
7543 key.offset = dback->offset;
7545 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7547 * For ret < 0, it's OK since the fs-tree may be corrupted,
7548 * we need to record it for inode/file extent rebuild.
7549 * For ret > 0, we record it only for file extent rebuild.
7550 * For ret == 0, the file extent exists but only bytenr
7551 * mismatch, let the original bytenr fix routine to handle,
7557 orphan = malloc(sizeof(*orphan));
7562 INIT_LIST_HEAD(&orphan->list);
7563 orphan->root = dback->root;
7564 orphan->objectid = dback->owner;
7565 orphan->offset = dback->offset;
7566 orphan->disk_bytenr = rec->cache.start;
7567 orphan->disk_len = rec->cache.size;
7568 list_add(&dest_root->orphan_data_extents, &orphan->list);
7569 recorded_data_ref = 1;
7572 btrfs_free_path(path);
7574 return !recorded_data_ref;
7580 * when an incorrect extent item is found, this will delete
7581 * all of the existing entries for it and recreate them
7582 * based on what the tree scan found.
7584 static int fixup_extent_refs(struct btrfs_fs_info *info,
7585 struct cache_tree *extent_cache,
7586 struct extent_record *rec)
7588 struct btrfs_trans_handle *trans = NULL;
7590 struct btrfs_path *path;
7591 struct list_head *cur = rec->backrefs.next;
7592 struct cache_extent *cache;
7593 struct extent_backref *back;
7597 if (rec->flag_block_full_backref)
7598 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7600 path = btrfs_alloc_path();
7604 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7606 * Sometimes the backrefs themselves are so broken they don't
7607 * get attached to any meaningful rec, so first go back and
7608 * check any of our backrefs that we couldn't find and throw
7609 * them into the list if we find the backref so that
7610 * verify_backrefs can figure out what to do.
7612 ret = find_possible_backrefs(info, path, extent_cache, rec);
7617 /* step one, make sure all of the backrefs agree */
7618 ret = verify_backrefs(info, path, rec);
7622 trans = btrfs_start_transaction(info->extent_root, 1);
7623 if (IS_ERR(trans)) {
7624 ret = PTR_ERR(trans);
7628 /* step two, delete all the existing records */
7629 ret = delete_extent_records(trans, info->extent_root, path,
7630 rec->start, rec->max_size);
7635 /* was this block corrupt? If so, don't add references to it */
7636 cache = lookup_cache_extent(info->corrupt_blocks,
7637 rec->start, rec->max_size);
7643 /* step three, recreate all the refs we did find */
7644 while(cur != &rec->backrefs) {
7645 back = to_extent_backref(cur);
7649 * if we didn't find any references, don't create a
7652 if (!back->found_ref)
7655 rec->bad_full_backref = 0;
7656 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7664 int err = btrfs_commit_transaction(trans, info->extent_root);
7669 btrfs_free_path(path);
7673 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7674 struct extent_record *rec)
7676 struct btrfs_trans_handle *trans;
7677 struct btrfs_root *root = fs_info->extent_root;
7678 struct btrfs_path *path;
7679 struct btrfs_extent_item *ei;
7680 struct btrfs_key key;
7684 key.objectid = rec->start;
7685 if (rec->metadata) {
7686 key.type = BTRFS_METADATA_ITEM_KEY;
7687 key.offset = rec->info_level;
7689 key.type = BTRFS_EXTENT_ITEM_KEY;
7690 key.offset = rec->max_size;
7693 path = btrfs_alloc_path();
7697 trans = btrfs_start_transaction(root, 0);
7698 if (IS_ERR(trans)) {
7699 btrfs_free_path(path);
7700 return PTR_ERR(trans);
7703 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7705 btrfs_free_path(path);
7706 btrfs_commit_transaction(trans, root);
7709 fprintf(stderr, "Didn't find extent for %llu\n",
7710 (unsigned long long)rec->start);
7711 btrfs_free_path(path);
7712 btrfs_commit_transaction(trans, root);
7716 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7717 struct btrfs_extent_item);
7718 flags = btrfs_extent_flags(path->nodes[0], ei);
7719 if (rec->flag_block_full_backref) {
7720 fprintf(stderr, "setting full backref on %llu\n",
7721 (unsigned long long)key.objectid);
7722 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7724 fprintf(stderr, "clearing full backref on %llu\n",
7725 (unsigned long long)key.objectid);
7726 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7728 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7729 btrfs_mark_buffer_dirty(path->nodes[0]);
7730 btrfs_free_path(path);
7731 return btrfs_commit_transaction(trans, root);
7734 /* right now we only prune from the extent allocation tree */
7735 static int prune_one_block(struct btrfs_trans_handle *trans,
7736 struct btrfs_fs_info *info,
7737 struct btrfs_corrupt_block *corrupt)
7740 struct btrfs_path path;
7741 struct extent_buffer *eb;
7745 int level = corrupt->level + 1;
7747 btrfs_init_path(&path);
7749 /* we want to stop at the parent to our busted block */
7750 path.lowest_level = level;
7752 ret = btrfs_search_slot(trans, info->extent_root,
7753 &corrupt->key, &path, -1, 1);
7758 eb = path.nodes[level];
7765 * hopefully the search gave us the block we want to prune,
7766 * lets try that first
7768 slot = path.slots[level];
7769 found = btrfs_node_blockptr(eb, slot);
7770 if (found == corrupt->cache.start)
7773 nritems = btrfs_header_nritems(eb);
7775 /* the search failed, lets scan this node and hope we find it */
7776 for (slot = 0; slot < nritems; slot++) {
7777 found = btrfs_node_blockptr(eb, slot);
7778 if (found == corrupt->cache.start)
7782 * we couldn't find the bad block. TODO, search all the nodes for pointers
7785 if (eb == info->extent_root->node) {
7790 btrfs_release_path(&path);
7795 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7796 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7799 btrfs_release_path(&path);
7803 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7805 struct btrfs_trans_handle *trans = NULL;
7806 struct cache_extent *cache;
7807 struct btrfs_corrupt_block *corrupt;
7810 cache = search_cache_extent(info->corrupt_blocks, 0);
7814 trans = btrfs_start_transaction(info->extent_root, 1);
7816 return PTR_ERR(trans);
7818 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7819 prune_one_block(trans, info, corrupt);
7820 remove_cache_extent(info->corrupt_blocks, cache);
7823 return btrfs_commit_transaction(trans, info->extent_root);
7827 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7829 struct btrfs_block_group_cache *cache;
7834 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7835 &start, &end, EXTENT_DIRTY);
7838 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7844 cache = btrfs_lookup_first_block_group(fs_info, start);
7849 start = cache->key.objectid + cache->key.offset;
7853 static int check_extent_refs(struct btrfs_root *root,
7854 struct cache_tree *extent_cache)
7856 struct extent_record *rec;
7857 struct cache_extent *cache;
7866 * if we're doing a repair, we have to make sure
7867 * we don't allocate from the problem extents.
7868 * In the worst case, this will be all the
7871 cache = search_cache_extent(extent_cache, 0);
7873 rec = container_of(cache, struct extent_record, cache);
7874 set_extent_dirty(root->fs_info->excluded_extents,
7876 rec->start + rec->max_size - 1,
7878 cache = next_cache_extent(cache);
7881 /* pin down all the corrupted blocks too */
7882 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7884 set_extent_dirty(root->fs_info->excluded_extents,
7886 cache->start + cache->size - 1,
7888 cache = next_cache_extent(cache);
7890 prune_corrupt_blocks(root->fs_info);
7891 reset_cached_block_groups(root->fs_info);
7894 reset_cached_block_groups(root->fs_info);
7897 * We need to delete any duplicate entries we find first otherwise we
7898 * could mess up the extent tree when we have backrefs that actually
7899 * belong to a different extent item and not the weird duplicate one.
7901 while (repair && !list_empty(&duplicate_extents)) {
7902 rec = to_extent_record(duplicate_extents.next);
7903 list_del_init(&rec->list);
7905 /* Sometimes we can find a backref before we find an actual
7906 * extent, so we need to process it a little bit to see if there
7907 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7908 * if this is a backref screwup. If we need to delete stuff
7909 * process_duplicates() will return 0, otherwise it will return
7912 if (process_duplicates(root, extent_cache, rec))
7914 ret = delete_duplicate_records(root, rec);
7918 * delete_duplicate_records will return the number of entries
7919 * deleted, so if it's greater than 0 then we know we actually
7920 * did something and we need to remove.
7934 cache = search_cache_extent(extent_cache, 0);
7937 rec = container_of(cache, struct extent_record, cache);
7938 if (rec->num_duplicates) {
7939 fprintf(stderr, "extent item %llu has multiple extent "
7940 "items\n", (unsigned long long)rec->start);
7945 if (rec->refs != rec->extent_item_refs) {
7946 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7947 (unsigned long long)rec->start,
7948 (unsigned long long)rec->nr);
7949 fprintf(stderr, "extent item %llu, found %llu\n",
7950 (unsigned long long)rec->extent_item_refs,
7951 (unsigned long long)rec->refs);
7952 ret = record_orphan_data_extents(root->fs_info, rec);
7959 * we can't use the extent to repair file
7960 * extent, let the fallback method handle it.
7962 if (!fixed && repair) {
7963 ret = fixup_extent_refs(
7974 if (all_backpointers_checked(rec, 1)) {
7975 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7976 (unsigned long long)rec->start,
7977 (unsigned long long)rec->nr);
7979 if (!fixed && !recorded && repair) {
7980 ret = fixup_extent_refs(root->fs_info,
7989 if (!rec->owner_ref_checked) {
7990 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7991 (unsigned long long)rec->start,
7992 (unsigned long long)rec->nr);
7993 if (!fixed && !recorded && repair) {
7994 ret = fixup_extent_refs(root->fs_info,
8003 if (rec->bad_full_backref) {
8004 fprintf(stderr, "bad full backref, on [%llu]\n",
8005 (unsigned long long)rec->start);
8007 ret = fixup_extent_flags(root->fs_info, rec);
8016 * Although it's not a extent ref's problem, we reuse this
8017 * routine for error reporting.
8018 * No repair function yet.
8020 if (rec->crossing_stripes) {
8022 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8023 rec->start, rec->start + rec->max_size);
8028 if (rec->wrong_chunk_type) {
8030 "bad extent [%llu, %llu), type mismatch with chunk\n",
8031 rec->start, rec->start + rec->max_size);
8036 remove_cache_extent(extent_cache, cache);
8037 free_all_extent_backrefs(rec);
8038 if (!init_extent_tree && repair && (!cur_err || fixed))
8039 clear_extent_dirty(root->fs_info->excluded_extents,
8041 rec->start + rec->max_size - 1,
8047 if (ret && ret != -EAGAIN) {
8048 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8051 struct btrfs_trans_handle *trans;
8053 root = root->fs_info->extent_root;
8054 trans = btrfs_start_transaction(root, 1);
8055 if (IS_ERR(trans)) {
8056 ret = PTR_ERR(trans);
8060 btrfs_fix_block_accounting(trans, root);
8061 ret = btrfs_commit_transaction(trans, root);
8066 fprintf(stderr, "repaired damaged extent references\n");
8072 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8076 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8077 stripe_size = length;
8078 stripe_size /= num_stripes;
8079 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8080 stripe_size = length * 2;
8081 stripe_size /= num_stripes;
8082 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8083 stripe_size = length;
8084 stripe_size /= (num_stripes - 1);
8085 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8086 stripe_size = length;
8087 stripe_size /= (num_stripes - 2);
8089 stripe_size = length;
8095 * Check the chunk with its block group/dev list ref:
8096 * Return 0 if all refs seems valid.
8097 * Return 1 if part of refs seems valid, need later check for rebuild ref
8098 * like missing block group and needs to search extent tree to rebuild them.
8099 * Return -1 if essential refs are missing and unable to rebuild.
8101 static int check_chunk_refs(struct chunk_record *chunk_rec,
8102 struct block_group_tree *block_group_cache,
8103 struct device_extent_tree *dev_extent_cache,
8106 struct cache_extent *block_group_item;
8107 struct block_group_record *block_group_rec;
8108 struct cache_extent *dev_extent_item;
8109 struct device_extent_record *dev_extent_rec;
8113 int metadump_v2 = 0;
8117 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8120 if (block_group_item) {
8121 block_group_rec = container_of(block_group_item,
8122 struct block_group_record,
8124 if (chunk_rec->length != block_group_rec->offset ||
8125 chunk_rec->offset != block_group_rec->objectid ||
8127 chunk_rec->type_flags != block_group_rec->flags)) {
8130 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8131 chunk_rec->objectid,
8136 chunk_rec->type_flags,
8137 block_group_rec->objectid,
8138 block_group_rec->type,
8139 block_group_rec->offset,
8140 block_group_rec->offset,
8141 block_group_rec->objectid,
8142 block_group_rec->flags);
8145 list_del_init(&block_group_rec->list);
8146 chunk_rec->bg_rec = block_group_rec;
8151 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8152 chunk_rec->objectid,
8157 chunk_rec->type_flags);
8164 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8165 chunk_rec->num_stripes);
8166 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8167 devid = chunk_rec->stripes[i].devid;
8168 offset = chunk_rec->stripes[i].offset;
8169 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8170 devid, offset, length);
8171 if (dev_extent_item) {
8172 dev_extent_rec = container_of(dev_extent_item,
8173 struct device_extent_record,
8175 if (dev_extent_rec->objectid != devid ||
8176 dev_extent_rec->offset != offset ||
8177 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8178 dev_extent_rec->length != length) {
8181 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8182 chunk_rec->objectid,
8185 chunk_rec->stripes[i].devid,
8186 chunk_rec->stripes[i].offset,
8187 dev_extent_rec->objectid,
8188 dev_extent_rec->offset,
8189 dev_extent_rec->length);
8192 list_move(&dev_extent_rec->chunk_list,
8193 &chunk_rec->dextents);
8198 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8199 chunk_rec->objectid,
8202 chunk_rec->stripes[i].devid,
8203 chunk_rec->stripes[i].offset);
8210 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8211 int check_chunks(struct cache_tree *chunk_cache,
8212 struct block_group_tree *block_group_cache,
8213 struct device_extent_tree *dev_extent_cache,
8214 struct list_head *good, struct list_head *bad,
8215 struct list_head *rebuild, int silent)
8217 struct cache_extent *chunk_item;
8218 struct chunk_record *chunk_rec;
8219 struct block_group_record *bg_rec;
8220 struct device_extent_record *dext_rec;
8224 chunk_item = first_cache_extent(chunk_cache);
8225 while (chunk_item) {
8226 chunk_rec = container_of(chunk_item, struct chunk_record,
8228 err = check_chunk_refs(chunk_rec, block_group_cache,
8229 dev_extent_cache, silent);
8232 if (err == 0 && good)
8233 list_add_tail(&chunk_rec->list, good);
8234 if (err > 0 && rebuild)
8235 list_add_tail(&chunk_rec->list, rebuild);
8237 list_add_tail(&chunk_rec->list, bad);
8238 chunk_item = next_cache_extent(chunk_item);
8241 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8244 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8252 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8256 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8267 static int check_device_used(struct device_record *dev_rec,
8268 struct device_extent_tree *dext_cache)
8270 struct cache_extent *cache;
8271 struct device_extent_record *dev_extent_rec;
8274 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8276 dev_extent_rec = container_of(cache,
8277 struct device_extent_record,
8279 if (dev_extent_rec->objectid != dev_rec->devid)
8282 list_del_init(&dev_extent_rec->device_list);
8283 total_byte += dev_extent_rec->length;
8284 cache = next_cache_extent(cache);
8287 if (total_byte != dev_rec->byte_used) {
8289 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8290 total_byte, dev_rec->byte_used, dev_rec->objectid,
8291 dev_rec->type, dev_rec->offset);
8298 /* check btrfs_dev_item -> btrfs_dev_extent */
8299 static int check_devices(struct rb_root *dev_cache,
8300 struct device_extent_tree *dev_extent_cache)
8302 struct rb_node *dev_node;
8303 struct device_record *dev_rec;
8304 struct device_extent_record *dext_rec;
8308 dev_node = rb_first(dev_cache);
8310 dev_rec = container_of(dev_node, struct device_record, node);
8311 err = check_device_used(dev_rec, dev_extent_cache);
8315 dev_node = rb_next(dev_node);
8317 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8320 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8321 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8328 static int add_root_item_to_list(struct list_head *head,
8329 u64 objectid, u64 bytenr, u64 last_snapshot,
8330 u8 level, u8 drop_level,
8331 int level_size, struct btrfs_key *drop_key)
8334 struct root_item_record *ri_rec;
8335 ri_rec = malloc(sizeof(*ri_rec));
8338 ri_rec->bytenr = bytenr;
8339 ri_rec->objectid = objectid;
8340 ri_rec->level = level;
8341 ri_rec->level_size = level_size;
8342 ri_rec->drop_level = drop_level;
8343 ri_rec->last_snapshot = last_snapshot;
8345 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8346 list_add_tail(&ri_rec->list, head);
8351 static void free_root_item_list(struct list_head *list)
8353 struct root_item_record *ri_rec;
8355 while (!list_empty(list)) {
8356 ri_rec = list_first_entry(list, struct root_item_record,
8358 list_del_init(&ri_rec->list);
8363 static int deal_root_from_list(struct list_head *list,
8364 struct btrfs_root *root,
8365 struct block_info *bits,
8367 struct cache_tree *pending,
8368 struct cache_tree *seen,
8369 struct cache_tree *reada,
8370 struct cache_tree *nodes,
8371 struct cache_tree *extent_cache,
8372 struct cache_tree *chunk_cache,
8373 struct rb_root *dev_cache,
8374 struct block_group_tree *block_group_cache,
8375 struct device_extent_tree *dev_extent_cache)
8380 while (!list_empty(list)) {
8381 struct root_item_record *rec;
8382 struct extent_buffer *buf;
8383 rec = list_entry(list->next,
8384 struct root_item_record, list);
8386 buf = read_tree_block(root->fs_info->tree_root,
8387 rec->bytenr, rec->level_size, 0);
8388 if (!extent_buffer_uptodate(buf)) {
8389 free_extent_buffer(buf);
8393 ret = add_root_to_pending(buf, extent_cache, pending,
8394 seen, nodes, rec->objectid);
8398 * To rebuild extent tree, we need deal with snapshot
8399 * one by one, otherwise we deal with node firstly which
8400 * can maximize readahead.
8403 ret = run_next_block(root, bits, bits_nr, &last,
8404 pending, seen, reada, nodes,
8405 extent_cache, chunk_cache,
8406 dev_cache, block_group_cache,
8407 dev_extent_cache, rec);
8411 free_extent_buffer(buf);
8412 list_del(&rec->list);
8418 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8419 reada, nodes, extent_cache, chunk_cache,
8420 dev_cache, block_group_cache,
8421 dev_extent_cache, NULL);
8431 static int check_chunks_and_extents(struct btrfs_root *root)
8433 struct rb_root dev_cache;
8434 struct cache_tree chunk_cache;
8435 struct block_group_tree block_group_cache;
8436 struct device_extent_tree dev_extent_cache;
8437 struct cache_tree extent_cache;
8438 struct cache_tree seen;
8439 struct cache_tree pending;
8440 struct cache_tree reada;
8441 struct cache_tree nodes;
8442 struct extent_io_tree excluded_extents;
8443 struct cache_tree corrupt_blocks;
8444 struct btrfs_path path;
8445 struct btrfs_key key;
8446 struct btrfs_key found_key;
8448 struct block_info *bits;
8450 struct extent_buffer *leaf;
8452 struct btrfs_root_item ri;
8453 struct list_head dropping_trees;
8454 struct list_head normal_trees;
8455 struct btrfs_root *root1;
8460 dev_cache = RB_ROOT;
8461 cache_tree_init(&chunk_cache);
8462 block_group_tree_init(&block_group_cache);
8463 device_extent_tree_init(&dev_extent_cache);
8465 cache_tree_init(&extent_cache);
8466 cache_tree_init(&seen);
8467 cache_tree_init(&pending);
8468 cache_tree_init(&nodes);
8469 cache_tree_init(&reada);
8470 cache_tree_init(&corrupt_blocks);
8471 extent_io_tree_init(&excluded_extents);
8472 INIT_LIST_HEAD(&dropping_trees);
8473 INIT_LIST_HEAD(&normal_trees);
8476 root->fs_info->excluded_extents = &excluded_extents;
8477 root->fs_info->fsck_extent_cache = &extent_cache;
8478 root->fs_info->free_extent_hook = free_extent_hook;
8479 root->fs_info->corrupt_blocks = &corrupt_blocks;
8483 bits = malloc(bits_nr * sizeof(struct block_info));
8489 if (ctx.progress_enabled) {
8490 ctx.tp = TASK_EXTENTS;
8491 task_start(ctx.info);
8495 root1 = root->fs_info->tree_root;
8496 level = btrfs_header_level(root1->node);
8497 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8498 root1->node->start, 0, level, 0,
8499 root1->nodesize, NULL);
8502 root1 = root->fs_info->chunk_root;
8503 level = btrfs_header_level(root1->node);
8504 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8505 root1->node->start, 0, level, 0,
8506 root1->nodesize, NULL);
8509 btrfs_init_path(&path);
8512 key.type = BTRFS_ROOT_ITEM_KEY;
8513 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8518 leaf = path.nodes[0];
8519 slot = path.slots[0];
8520 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8521 ret = btrfs_next_leaf(root, &path);
8524 leaf = path.nodes[0];
8525 slot = path.slots[0];
8527 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8528 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8529 unsigned long offset;
8532 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8533 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8534 last_snapshot = btrfs_root_last_snapshot(&ri);
8535 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8536 level = btrfs_root_level(&ri);
8537 level_size = root->nodesize;
8538 ret = add_root_item_to_list(&normal_trees,
8540 btrfs_root_bytenr(&ri),
8541 last_snapshot, level,
8542 0, level_size, NULL);
8546 level = btrfs_root_level(&ri);
8547 level_size = root->nodesize;
8548 objectid = found_key.objectid;
8549 btrfs_disk_key_to_cpu(&found_key,
8551 ret = add_root_item_to_list(&dropping_trees,
8553 btrfs_root_bytenr(&ri),
8554 last_snapshot, level,
8556 level_size, &found_key);
8563 btrfs_release_path(&path);
8566 * check_block can return -EAGAIN if it fixes something, please keep
8567 * this in mind when dealing with return values from these functions, if
8568 * we get -EAGAIN we want to fall through and restart the loop.
8570 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8571 &seen, &reada, &nodes, &extent_cache,
8572 &chunk_cache, &dev_cache, &block_group_cache,
8579 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8580 &pending, &seen, &reada, &nodes,
8581 &extent_cache, &chunk_cache, &dev_cache,
8582 &block_group_cache, &dev_extent_cache);
8589 ret = check_chunks(&chunk_cache, &block_group_cache,
8590 &dev_extent_cache, NULL, NULL, NULL, 0);
8597 ret = check_extent_refs(root, &extent_cache);
8604 ret = check_devices(&dev_cache, &dev_extent_cache);
8609 task_stop(ctx.info);
8611 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8612 extent_io_tree_cleanup(&excluded_extents);
8613 root->fs_info->fsck_extent_cache = NULL;
8614 root->fs_info->free_extent_hook = NULL;
8615 root->fs_info->corrupt_blocks = NULL;
8616 root->fs_info->excluded_extents = NULL;
8619 free_chunk_cache_tree(&chunk_cache);
8620 free_device_cache_tree(&dev_cache);
8621 free_block_group_tree(&block_group_cache);
8622 free_device_extent_tree(&dev_extent_cache);
8623 free_extent_cache_tree(&seen);
8624 free_extent_cache_tree(&pending);
8625 free_extent_cache_tree(&reada);
8626 free_extent_cache_tree(&nodes);
8629 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8630 free_extent_cache_tree(&seen);
8631 free_extent_cache_tree(&pending);
8632 free_extent_cache_tree(&reada);
8633 free_extent_cache_tree(&nodes);
8634 free_chunk_cache_tree(&chunk_cache);
8635 free_block_group_tree(&block_group_cache);
8636 free_device_cache_tree(&dev_cache);
8637 free_device_extent_tree(&dev_extent_cache);
8638 free_extent_record_cache(root->fs_info, &extent_cache);
8639 free_root_item_list(&normal_trees);
8640 free_root_item_list(&dropping_trees);
8641 extent_io_tree_cleanup(&excluded_extents);
8646 * Check backrefs of a tree block given by @bytenr or @eb.
8648 * @root: the root containing the @bytenr or @eb
8649 * @eb: tree block extent buffer, can be NULL
8650 * @bytenr: bytenr of the tree block to search
8651 * @level: tree level of the tree block
8652 * @owner: owner of the tree block
8654 * Return >0 for any error found and output error message
8655 * Return 0 for no error found
8657 static int check_tree_block_ref(struct btrfs_root *root,
8658 struct extent_buffer *eb, u64 bytenr,
8659 int level, u64 owner)
8661 struct btrfs_key key;
8662 struct btrfs_root *extent_root = root->fs_info->extent_root;
8663 struct btrfs_path path;
8664 struct btrfs_extent_item *ei;
8665 struct btrfs_extent_inline_ref *iref;
8666 struct extent_buffer *leaf;
8672 u32 nodesize = root->nodesize;
8679 btrfs_init_path(&path);
8680 key.objectid = bytenr;
8681 if (btrfs_fs_incompat(root->fs_info,
8682 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8683 key.type = BTRFS_METADATA_ITEM_KEY;
8685 key.type = BTRFS_EXTENT_ITEM_KEY;
8686 key.offset = (u64)-1;
8688 /* Search for the backref in extent tree */
8689 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8691 err |= BACKREF_MISSING;
8694 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8696 err |= BACKREF_MISSING;
8700 leaf = path.nodes[0];
8701 slot = path.slots[0];
8702 btrfs_item_key_to_cpu(leaf, &key, slot);
8704 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8706 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8707 skinny_level = (int)key.offset;
8708 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8710 struct btrfs_tree_block_info *info;
8712 info = (struct btrfs_tree_block_info *)(ei + 1);
8713 skinny_level = btrfs_tree_block_level(leaf, info);
8714 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8721 if (!(btrfs_extent_flags(leaf, ei) &
8722 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8724 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8725 key.objectid, nodesize,
8726 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8727 err = BACKREF_MISMATCH;
8729 header_gen = btrfs_header_generation(eb);
8730 extent_gen = btrfs_extent_generation(leaf, ei);
8731 if (header_gen != extent_gen) {
8733 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8734 key.objectid, nodesize, header_gen,
8736 err = BACKREF_MISMATCH;
8738 if (level != skinny_level) {
8740 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8741 key.objectid, nodesize, level, skinny_level);
8742 err = BACKREF_MISMATCH;
8744 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8746 "extent[%llu %u] is referred by other roots than %llu",
8747 key.objectid, nodesize, root->objectid);
8748 err = BACKREF_MISMATCH;
8753 * Iterate the extent/metadata item to find the exact backref
8755 item_size = btrfs_item_size_nr(leaf, slot);
8756 ptr = (unsigned long)iref;
8757 end = (unsigned long)ei + item_size;
8759 iref = (struct btrfs_extent_inline_ref *)ptr;
8760 type = btrfs_extent_inline_ref_type(leaf, iref);
8761 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8763 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8764 (offset == root->objectid || offset == owner)) {
8766 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8767 /* Check if the backref points to valid referencer */
8768 found_ref = !check_tree_block_ref(root, NULL, offset,
8774 ptr += btrfs_extent_inline_ref_size(type);
8778 * Inlined extent item doesn't have what we need, check
8779 * TREE_BLOCK_REF_KEY
8782 btrfs_release_path(&path);
8783 key.objectid = bytenr;
8784 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8785 key.offset = root->objectid;
8787 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8792 err |= BACKREF_MISSING;
8794 btrfs_release_path(&path);
8795 if (eb && (err & BACKREF_MISSING))
8796 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8797 bytenr, nodesize, owner, level);
8802 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8804 * Return >0 any error found and output error message
8805 * Return 0 for no error found
8807 static int check_extent_data_item(struct btrfs_root *root,
8808 struct extent_buffer *eb, int slot)
8810 struct btrfs_file_extent_item *fi;
8811 struct btrfs_path path;
8812 struct btrfs_root *extent_root = root->fs_info->extent_root;
8813 struct btrfs_key fi_key;
8814 struct btrfs_key dbref_key;
8815 struct extent_buffer *leaf;
8816 struct btrfs_extent_item *ei;
8817 struct btrfs_extent_inline_ref *iref;
8818 struct btrfs_extent_data_ref *dref;
8820 u64 file_extent_gen;
8823 u64 extent_num_bytes;
8831 int found_dbackref = 0;
8835 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8836 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8837 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8839 /* Nothing to check for hole and inline data extents */
8840 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8841 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8844 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8845 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8846 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8848 /* Check unaligned disk_num_bytes and num_bytes */
8849 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8851 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8852 fi_key.objectid, fi_key.offset, disk_num_bytes,
8854 err |= BYTES_UNALIGNED;
8856 data_bytes_allocated += disk_num_bytes;
8858 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8860 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8861 fi_key.objectid, fi_key.offset, extent_num_bytes,
8863 err |= BYTES_UNALIGNED;
8865 data_bytes_referenced += extent_num_bytes;
8867 owner = btrfs_header_owner(eb);
8869 /* Check the extent item of the file extent in extent tree */
8870 btrfs_init_path(&path);
8871 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8872 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8873 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8875 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8877 err |= BACKREF_MISSING;
8881 leaf = path.nodes[0];
8882 slot = path.slots[0];
8883 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8885 extent_flags = btrfs_extent_flags(leaf, ei);
8886 extent_gen = btrfs_extent_generation(leaf, ei);
8888 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8890 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8891 disk_bytenr, disk_num_bytes,
8892 BTRFS_EXTENT_FLAG_DATA);
8893 err |= BACKREF_MISMATCH;
8896 if (file_extent_gen < extent_gen) {
8898 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8899 disk_bytenr, disk_num_bytes, file_extent_gen,
8901 err |= BACKREF_MISMATCH;
8904 /* Check data backref inside that extent item */
8905 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8906 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8907 ptr = (unsigned long)iref;
8908 end = (unsigned long)ei + item_size;
8910 iref = (struct btrfs_extent_inline_ref *)ptr;
8911 type = btrfs_extent_inline_ref_type(leaf, iref);
8912 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8914 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8915 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8916 if (ref_root == owner || ref_root == root->objectid)
8918 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8919 found_dbackref = !check_tree_block_ref(root, NULL,
8920 btrfs_extent_inline_ref_offset(leaf, iref),
8926 ptr += btrfs_extent_inline_ref_size(type);
8929 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8930 if (!found_dbackref) {
8931 btrfs_release_path(&path);
8933 btrfs_init_path(&path);
8934 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8935 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8936 dbref_key.offset = hash_extent_data_ref(root->objectid,
8937 fi_key.objectid, fi_key.offset);
8939 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8940 &dbref_key, &path, 0, 0);
8945 if (!found_dbackref)
8946 err |= BACKREF_MISSING;
8948 btrfs_release_path(&path);
8949 if (err & BACKREF_MISSING) {
8950 error("data extent[%llu %llu] backref lost",
8951 disk_bytenr, disk_num_bytes);
8957 * Get real tree block level for the case like shared block
8958 * Return >= 0 as tree level
8959 * Return <0 for error
8961 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8963 struct extent_buffer *eb;
8964 struct btrfs_path path;
8965 struct btrfs_key key;
8966 struct btrfs_extent_item *ei;
8969 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8974 /* Search extent tree for extent generation and level */
8975 key.objectid = bytenr;
8976 key.type = BTRFS_METADATA_ITEM_KEY;
8977 key.offset = (u64)-1;
8979 btrfs_init_path(&path);
8980 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8983 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8991 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8992 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8993 struct btrfs_extent_item);
8994 flags = btrfs_extent_flags(path.nodes[0], ei);
8995 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9000 /* Get transid for later read_tree_block() check */
9001 transid = btrfs_extent_generation(path.nodes[0], ei);
9003 /* Get backref level as one source */
9004 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9005 backref_level = key.offset;
9007 struct btrfs_tree_block_info *info;
9009 info = (struct btrfs_tree_block_info *)(ei + 1);
9010 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9012 btrfs_release_path(&path);
9014 /* Get level from tree block as an alternative source */
9015 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9016 if (!extent_buffer_uptodate(eb)) {
9017 free_extent_buffer(eb);
9020 header_level = btrfs_header_level(eb);
9021 free_extent_buffer(eb);
9023 if (header_level != backref_level)
9025 return header_level;
9028 btrfs_release_path(&path);
9033 * Check if a tree block backref is valid (points to a valid tree block)
9034 * if level == -1, level will be resolved
9035 * Return >0 for any error found and print error message
9037 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9038 u64 bytenr, int level)
9040 struct btrfs_root *root;
9041 struct btrfs_key key;
9042 struct btrfs_path path;
9043 struct extent_buffer *eb;
9044 struct extent_buffer *node;
9045 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9049 /* Query level for level == -1 special case */
9051 level = query_tree_block_level(fs_info, bytenr);
9053 err |= REFERENCER_MISSING;
9057 key.objectid = root_id;
9058 key.type = BTRFS_ROOT_ITEM_KEY;
9059 key.offset = (u64)-1;
9061 root = btrfs_read_fs_root(fs_info, &key);
9063 err |= REFERENCER_MISSING;
9067 /* Read out the tree block to get item/node key */
9068 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9069 if (!extent_buffer_uptodate(eb)) {
9070 err |= REFERENCER_MISSING;
9071 free_extent_buffer(eb);
9075 /* Empty tree, no need to check key */
9076 if (!btrfs_header_nritems(eb) && !level) {
9077 free_extent_buffer(eb);
9082 btrfs_node_key_to_cpu(eb, &key, 0);
9084 btrfs_item_key_to_cpu(eb, &key, 0);
9086 free_extent_buffer(eb);
9088 btrfs_init_path(&path);
9089 path.lowest_level = level;
9090 /* Search with the first key, to ensure we can reach it */
9091 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9093 err |= REFERENCER_MISSING;
9097 node = path.nodes[level];
9098 if (btrfs_header_bytenr(node) != bytenr) {
9100 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9101 bytenr, nodesize, bytenr,
9102 btrfs_header_bytenr(node));
9103 err |= REFERENCER_MISMATCH;
9105 if (btrfs_header_level(node) != level) {
9107 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9108 bytenr, nodesize, level,
9109 btrfs_header_level(node));
9110 err |= REFERENCER_MISMATCH;
9114 btrfs_release_path(&path);
9116 if (err & REFERENCER_MISSING) {
9118 error("extent [%llu %d] lost referencer (owner: %llu)",
9119 bytenr, nodesize, root_id);
9122 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9123 bytenr, nodesize, root_id, level);
9130 * Check referencer for shared block backref
9131 * If level == -1, this function will resolve the level.
9133 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9134 u64 parent, u64 bytenr, int level)
9136 struct extent_buffer *eb;
9137 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9139 int found_parent = 0;
9142 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9143 if (!extent_buffer_uptodate(eb))
9147 level = query_tree_block_level(fs_info, bytenr);
9151 if (level + 1 != btrfs_header_level(eb))
9154 nr = btrfs_header_nritems(eb);
9155 for (i = 0; i < nr; i++) {
9156 if (bytenr == btrfs_node_blockptr(eb, i)) {
9162 free_extent_buffer(eb);
9163 if (!found_parent) {
9165 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9166 bytenr, nodesize, parent, level);
9167 return REFERENCER_MISSING;
9173 * Check referencer for normal (inlined) data ref
9174 * If len == 0, it will be resolved by searching in extent tree
9176 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9177 u64 root_id, u64 objectid, u64 offset,
9178 u64 bytenr, u64 len, u32 count)
9180 struct btrfs_root *root;
9181 struct btrfs_root *extent_root = fs_info->extent_root;
9182 struct btrfs_key key;
9183 struct btrfs_path path;
9184 struct extent_buffer *leaf;
9185 struct btrfs_file_extent_item *fi;
9186 u32 found_count = 0;
9191 key.objectid = bytenr;
9192 key.type = BTRFS_EXTENT_ITEM_KEY;
9193 key.offset = (u64)-1;
9195 btrfs_init_path(&path);
9196 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9199 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9202 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9203 if (key.objectid != bytenr ||
9204 key.type != BTRFS_EXTENT_ITEM_KEY)
9207 btrfs_release_path(&path);
9209 key.objectid = root_id;
9210 key.type = BTRFS_ROOT_ITEM_KEY;
9211 key.offset = (u64)-1;
9212 btrfs_init_path(&path);
9214 root = btrfs_read_fs_root(fs_info, &key);
9218 key.objectid = objectid;
9219 key.type = BTRFS_EXTENT_DATA_KEY;
9221 * It can be nasty as data backref offset is
9222 * file offset - file extent offset, which is smaller or
9223 * equal to original backref offset. The only special case is
9224 * overflow. So we need to special check and do further search.
9226 key.offset = offset & (1ULL << 63) ? 0 : offset;
9228 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9233 * Search afterwards to get correct one
9234 * NOTE: As we must do a comprehensive check on the data backref to
9235 * make sure the dref count also matches, we must iterate all file
9236 * extents for that inode.
9239 leaf = path.nodes[0];
9240 slot = path.slots[0];
9242 btrfs_item_key_to_cpu(leaf, &key, slot);
9243 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9245 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9247 * Except normal disk bytenr and disk num bytes, we still
9248 * need to do extra check on dbackref offset as
9249 * dbackref offset = file_offset - file_extent_offset
9251 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9252 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9253 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9257 ret = btrfs_next_item(root, &path);
9262 btrfs_release_path(&path);
9263 if (found_count != count) {
9265 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9266 bytenr, len, root_id, objectid, offset, count, found_count);
9267 return REFERENCER_MISSING;
9273 * Check if the referencer of a shared data backref exists
9275 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9276 u64 parent, u64 bytenr)
9278 struct extent_buffer *eb;
9279 struct btrfs_key key;
9280 struct btrfs_file_extent_item *fi;
9281 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9283 int found_parent = 0;
9286 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9287 if (!extent_buffer_uptodate(eb))
9290 nr = btrfs_header_nritems(eb);
9291 for (i = 0; i < nr; i++) {
9292 btrfs_item_key_to_cpu(eb, &key, i);
9293 if (key.type != BTRFS_EXTENT_DATA_KEY)
9296 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9297 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9300 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9307 free_extent_buffer(eb);
9308 if (!found_parent) {
9309 error("shared extent %llu referencer lost (parent: %llu)",
9311 return REFERENCER_MISSING;
9317 * This function will check a given extent item, including its backref and
9318 * itself (like crossing stripe boundary and type)
9320 * Since we don't use extent_record anymore, introduce new error bit
9322 static int check_extent_item(struct btrfs_fs_info *fs_info,
9323 struct extent_buffer *eb, int slot)
9325 struct btrfs_extent_item *ei;
9326 struct btrfs_extent_inline_ref *iref;
9327 struct btrfs_extent_data_ref *dref;
9331 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9332 u32 item_size = btrfs_item_size_nr(eb, slot);
9337 struct btrfs_key key;
9341 btrfs_item_key_to_cpu(eb, &key, slot);
9342 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9343 bytes_used += key.offset;
9345 bytes_used += nodesize;
9347 if (item_size < sizeof(*ei)) {
9349 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9350 * old thing when on disk format is still un-determined.
9351 * No need to care about it anymore
9353 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9357 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9358 flags = btrfs_extent_flags(eb, ei);
9360 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9362 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9363 error("bad metadata [%llu, %llu) crossing stripe boundary",
9364 key.objectid, key.objectid + nodesize);
9365 err |= CROSSING_STRIPE_BOUNDARY;
9368 ptr = (unsigned long)(ei + 1);
9370 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9371 /* Old EXTENT_ITEM metadata */
9372 struct btrfs_tree_block_info *info;
9374 info = (struct btrfs_tree_block_info *)ptr;
9375 level = btrfs_tree_block_level(eb, info);
9376 ptr += sizeof(struct btrfs_tree_block_info);
9378 /* New METADATA_ITEM */
9381 end = (unsigned long)ei + item_size;
9384 err |= ITEM_SIZE_MISMATCH;
9388 /* Now check every backref in this extent item */
9390 iref = (struct btrfs_extent_inline_ref *)ptr;
9391 type = btrfs_extent_inline_ref_type(eb, iref);
9392 offset = btrfs_extent_inline_ref_offset(eb, iref);
9394 case BTRFS_TREE_BLOCK_REF_KEY:
9395 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9399 case BTRFS_SHARED_BLOCK_REF_KEY:
9400 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9404 case BTRFS_EXTENT_DATA_REF_KEY:
9405 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9406 ret = check_extent_data_backref(fs_info,
9407 btrfs_extent_data_ref_root(eb, dref),
9408 btrfs_extent_data_ref_objectid(eb, dref),
9409 btrfs_extent_data_ref_offset(eb, dref),
9410 key.objectid, key.offset,
9411 btrfs_extent_data_ref_count(eb, dref));
9414 case BTRFS_SHARED_DATA_REF_KEY:
9415 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9419 error("extent[%llu %d %llu] has unknown ref type: %d",
9420 key.objectid, key.type, key.offset, type);
9421 err |= UNKNOWN_TYPE;
9425 ptr += btrfs_extent_inline_ref_size(type);
9434 * Check if a dev extent item is referred correctly by its chunk
9436 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9437 struct extent_buffer *eb, int slot)
9439 struct btrfs_root *chunk_root = fs_info->chunk_root;
9440 struct btrfs_dev_extent *ptr;
9441 struct btrfs_path path;
9442 struct btrfs_key chunk_key;
9443 struct btrfs_key devext_key;
9444 struct btrfs_chunk *chunk;
9445 struct extent_buffer *l;
9449 int found_chunk = 0;
9452 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9453 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9454 length = btrfs_dev_extent_length(eb, ptr);
9456 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9457 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9458 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9460 btrfs_init_path(&path);
9461 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9466 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9467 if (btrfs_chunk_length(l, chunk) != length)
9470 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9471 for (i = 0; i < num_stripes; i++) {
9472 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9473 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9475 if (devid == devext_key.objectid &&
9476 offset == devext_key.offset) {
9482 btrfs_release_path(&path);
9485 "device extent[%llu, %llu, %llu] did not find the related chunk",
9486 devext_key.objectid, devext_key.offset, length);
9487 return REFERENCER_MISSING;
9493 * Check if the used space is correct with the dev item
9495 static int check_dev_item(struct btrfs_fs_info *fs_info,
9496 struct extent_buffer *eb, int slot)
9498 struct btrfs_root *dev_root = fs_info->dev_root;
9499 struct btrfs_dev_item *dev_item;
9500 struct btrfs_path path;
9501 struct btrfs_key key;
9502 struct btrfs_dev_extent *ptr;
9508 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9509 dev_id = btrfs_device_id(eb, dev_item);
9510 used = btrfs_device_bytes_used(eb, dev_item);
9512 key.objectid = dev_id;
9513 key.type = BTRFS_DEV_EXTENT_KEY;
9516 btrfs_init_path(&path);
9517 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9519 btrfs_item_key_to_cpu(eb, &key, slot);
9520 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9521 key.objectid, key.type, key.offset);
9522 btrfs_release_path(&path);
9523 return REFERENCER_MISSING;
9526 /* Iterate dev_extents to calculate the used space of a device */
9528 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9530 if (key.objectid > dev_id)
9532 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9535 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9536 struct btrfs_dev_extent);
9537 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9539 ret = btrfs_next_item(dev_root, &path);
9543 btrfs_release_path(&path);
9545 if (used != total) {
9546 btrfs_item_key_to_cpu(eb, &key, slot);
9548 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9549 total, used, BTRFS_ROOT_TREE_OBJECTID,
9550 BTRFS_DEV_EXTENT_KEY, dev_id);
9551 return ACCOUNTING_MISMATCH;
9557 * Check a block group item with its referener (chunk) and its used space
9558 * with extent/metadata item
9560 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9561 struct extent_buffer *eb, int slot)
9563 struct btrfs_root *extent_root = fs_info->extent_root;
9564 struct btrfs_root *chunk_root = fs_info->chunk_root;
9565 struct btrfs_block_group_item *bi;
9566 struct btrfs_block_group_item bg_item;
9567 struct btrfs_path path;
9568 struct btrfs_key bg_key;
9569 struct btrfs_key chunk_key;
9570 struct btrfs_key extent_key;
9571 struct btrfs_chunk *chunk;
9572 struct extent_buffer *leaf;
9573 struct btrfs_extent_item *ei;
9574 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9582 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9583 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9584 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9585 used = btrfs_block_group_used(&bg_item);
9586 bg_flags = btrfs_block_group_flags(&bg_item);
9588 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9589 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9590 chunk_key.offset = bg_key.objectid;
9592 btrfs_init_path(&path);
9593 /* Search for the referencer chunk */
9594 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9597 "block group[%llu %llu] did not find the related chunk item",
9598 bg_key.objectid, bg_key.offset);
9599 err |= REFERENCER_MISSING;
9601 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9602 struct btrfs_chunk);
9603 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9606 "block group[%llu %llu] related chunk item length does not match",
9607 bg_key.objectid, bg_key.offset);
9608 err |= REFERENCER_MISMATCH;
9611 btrfs_release_path(&path);
9613 /* Search from the block group bytenr */
9614 extent_key.objectid = bg_key.objectid;
9615 extent_key.type = 0;
9616 extent_key.offset = 0;
9618 btrfs_init_path(&path);
9619 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9623 /* Iterate extent tree to account used space */
9625 leaf = path.nodes[0];
9626 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9627 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9630 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9631 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9633 if (extent_key.objectid < bg_key.objectid)
9636 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9639 total += extent_key.offset;
9641 ei = btrfs_item_ptr(leaf, path.slots[0],
9642 struct btrfs_extent_item);
9643 flags = btrfs_extent_flags(leaf, ei);
9644 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9645 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9647 "bad extent[%llu, %llu) type mismatch with chunk",
9648 extent_key.objectid,
9649 extent_key.objectid + extent_key.offset);
9650 err |= CHUNK_TYPE_MISMATCH;
9652 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9653 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9654 BTRFS_BLOCK_GROUP_METADATA))) {
9656 "bad extent[%llu, %llu) type mismatch with chunk",
9657 extent_key.objectid,
9658 extent_key.objectid + nodesize);
9659 err |= CHUNK_TYPE_MISMATCH;
9663 ret = btrfs_next_item(extent_root, &path);
9669 btrfs_release_path(&path);
9671 if (total != used) {
9673 "block group[%llu %llu] used %llu but extent items used %llu",
9674 bg_key.objectid, bg_key.offset, used, total);
9675 err |= ACCOUNTING_MISMATCH;
9681 * Check a chunk item.
9682 * Including checking all referred dev_extents and block group
9684 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9685 struct extent_buffer *eb, int slot)
9687 struct btrfs_root *extent_root = fs_info->extent_root;
9688 struct btrfs_root *dev_root = fs_info->dev_root;
9689 struct btrfs_path path;
9690 struct btrfs_key chunk_key;
9691 struct btrfs_key bg_key;
9692 struct btrfs_key devext_key;
9693 struct btrfs_chunk *chunk;
9694 struct extent_buffer *leaf;
9695 struct btrfs_block_group_item *bi;
9696 struct btrfs_block_group_item bg_item;
9697 struct btrfs_dev_extent *ptr;
9698 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9710 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9711 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9712 length = btrfs_chunk_length(eb, chunk);
9713 chunk_end = chunk_key.offset + length;
9714 if (!IS_ALIGNED(length, sectorsize)) {
9715 error("chunk[%llu %llu) not aligned to %u",
9716 chunk_key.offset, chunk_end, sectorsize);
9717 err |= BYTES_UNALIGNED;
9721 type = btrfs_chunk_type(eb, chunk);
9722 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9723 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9724 error("chunk[%llu %llu) has no chunk type",
9725 chunk_key.offset, chunk_end);
9726 err |= UNKNOWN_TYPE;
9728 if (profile && (profile & (profile - 1))) {
9729 error("chunk[%llu %llu) multiple profiles detected: %llx",
9730 chunk_key.offset, chunk_end, profile);
9731 err |= UNKNOWN_TYPE;
9734 bg_key.objectid = chunk_key.offset;
9735 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9736 bg_key.offset = length;
9738 btrfs_init_path(&path);
9739 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9742 "chunk[%llu %llu) did not find the related block group item",
9743 chunk_key.offset, chunk_end);
9744 err |= REFERENCER_MISSING;
9746 leaf = path.nodes[0];
9747 bi = btrfs_item_ptr(leaf, path.slots[0],
9748 struct btrfs_block_group_item);
9749 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9751 if (btrfs_block_group_flags(&bg_item) != type) {
9753 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9754 chunk_key.offset, chunk_end, type,
9755 btrfs_block_group_flags(&bg_item));
9756 err |= REFERENCER_MISSING;
9760 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9761 for (i = 0; i < num_stripes; i++) {
9762 btrfs_release_path(&path);
9763 btrfs_init_path(&path);
9764 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9765 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9766 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9768 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9773 leaf = path.nodes[0];
9774 ptr = btrfs_item_ptr(leaf, path.slots[0],
9775 struct btrfs_dev_extent);
9776 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9777 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9778 if (objectid != chunk_key.objectid ||
9779 offset != chunk_key.offset ||
9780 btrfs_dev_extent_length(leaf, ptr) != length)
9784 err |= BACKREF_MISSING;
9786 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9787 chunk_key.objectid, chunk_end, i);
9790 btrfs_release_path(&path);
9796 * Main entry function to check known items and update related accounting info
9798 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9800 struct btrfs_fs_info *fs_info = root->fs_info;
9801 struct btrfs_key key;
9804 struct btrfs_extent_data_ref *dref;
9809 btrfs_item_key_to_cpu(eb, &key, slot);
9813 case BTRFS_EXTENT_DATA_KEY:
9814 ret = check_extent_data_item(root, eb, slot);
9817 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9818 ret = check_block_group_item(fs_info, eb, slot);
9821 case BTRFS_DEV_ITEM_KEY:
9822 ret = check_dev_item(fs_info, eb, slot);
9825 case BTRFS_CHUNK_ITEM_KEY:
9826 ret = check_chunk_item(fs_info, eb, slot);
9829 case BTRFS_DEV_EXTENT_KEY:
9830 ret = check_dev_extent_item(fs_info, eb, slot);
9833 case BTRFS_EXTENT_ITEM_KEY:
9834 case BTRFS_METADATA_ITEM_KEY:
9835 ret = check_extent_item(fs_info, eb, slot);
9838 case BTRFS_EXTENT_CSUM_KEY:
9839 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9841 case BTRFS_TREE_BLOCK_REF_KEY:
9842 ret = check_tree_block_backref(fs_info, key.offset,
9846 case BTRFS_EXTENT_DATA_REF_KEY:
9847 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9848 ret = check_extent_data_backref(fs_info,
9849 btrfs_extent_data_ref_root(eb, dref),
9850 btrfs_extent_data_ref_objectid(eb, dref),
9851 btrfs_extent_data_ref_offset(eb, dref),
9853 btrfs_extent_data_ref_count(eb, dref));
9856 case BTRFS_SHARED_BLOCK_REF_KEY:
9857 ret = check_shared_block_backref(fs_info, key.offset,
9861 case BTRFS_SHARED_DATA_REF_KEY:
9862 ret = check_shared_data_backref(fs_info, key.offset,
9870 if (++slot < btrfs_header_nritems(eb))
9877 * Helper function for later fs/subvol tree check. To determine if a tree
9878 * block should be checked.
9879 * This function will ensure only the direct referencer with lowest rootid to
9880 * check a fs/subvolume tree block.
9882 * Backref check at extent tree would detect errors like missing subvolume
9883 * tree, so we can do aggressive check to reduce duplicated checks.
9885 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9887 struct btrfs_root *extent_root = root->fs_info->extent_root;
9888 struct btrfs_key key;
9889 struct btrfs_path path;
9890 struct extent_buffer *leaf;
9892 struct btrfs_extent_item *ei;
9898 struct btrfs_extent_inline_ref *iref;
9901 btrfs_init_path(&path);
9902 key.objectid = btrfs_header_bytenr(eb);
9903 key.type = BTRFS_METADATA_ITEM_KEY;
9904 key.offset = (u64)-1;
9907 * Any failure in backref resolving means we can't determine
9908 * whom the tree block belongs to.
9909 * So in that case, we need to check that tree block
9911 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9915 ret = btrfs_previous_extent_item(extent_root, &path,
9916 btrfs_header_bytenr(eb));
9920 leaf = path.nodes[0];
9921 slot = path.slots[0];
9922 btrfs_item_key_to_cpu(leaf, &key, slot);
9923 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9925 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9926 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9928 struct btrfs_tree_block_info *info;
9930 info = (struct btrfs_tree_block_info *)(ei + 1);
9931 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9934 item_size = btrfs_item_size_nr(leaf, slot);
9935 ptr = (unsigned long)iref;
9936 end = (unsigned long)ei + item_size;
9938 iref = (struct btrfs_extent_inline_ref *)ptr;
9939 type = btrfs_extent_inline_ref_type(leaf, iref);
9940 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9943 * We only check the tree block if current root is
9944 * the lowest referencer of it.
9946 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9947 offset < root->objectid) {
9948 btrfs_release_path(&path);
9952 ptr += btrfs_extent_inline_ref_size(type);
9955 * Normally we should also check keyed tree block ref, but that may be
9956 * very time consuming. Inlined ref should already make us skip a lot
9957 * of refs now. So skip search keyed tree block ref.
9961 btrfs_release_path(&path);
9966 * Traversal function for tree block. We will do:
9967 * 1) Skip shared fs/subvolume tree blocks
9968 * 2) Update related bytes accounting
9969 * 3) Pre-order traversal
9971 static int traverse_tree_block(struct btrfs_root *root,
9972 struct extent_buffer *node)
9974 struct extent_buffer *eb;
9975 struct btrfs_key key;
9976 struct btrfs_key drop_key;
9984 * Skip shared fs/subvolume tree block, in that case they will
9985 * be checked by referencer with lowest rootid
9987 if (is_fstree(root->objectid) && !should_check(root, node))
9990 /* Update bytes accounting */
9991 total_btree_bytes += node->len;
9992 if (fs_root_objectid(btrfs_header_owner(node)))
9993 total_fs_tree_bytes += node->len;
9994 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9995 total_extent_tree_bytes += node->len;
9996 if (!found_old_backref &&
9997 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9998 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9999 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10000 found_old_backref = 1;
10002 /* pre-order tranversal, check itself first */
10003 level = btrfs_header_level(node);
10004 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10005 btrfs_header_level(node),
10006 btrfs_header_owner(node));
10010 "check %s failed root %llu bytenr %llu level %d, force continue check",
10011 level ? "node":"leaf", root->objectid,
10012 btrfs_header_bytenr(node), btrfs_header_level(node));
10015 btree_space_waste += btrfs_leaf_free_space(root, node);
10016 ret = check_leaf_items(root, node);
10021 nr = btrfs_header_nritems(node);
10022 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10023 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10024 sizeof(struct btrfs_key_ptr);
10026 /* Then check all its children */
10027 for (i = 0; i < nr; i++) {
10028 u64 blocknr = btrfs_node_blockptr(node, i);
10030 btrfs_node_key_to_cpu(node, &key, i);
10031 if (level == root->root_item.drop_level &&
10032 is_dropped_key(&key, &drop_key))
10036 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10037 * to call the function itself.
10039 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10040 if (extent_buffer_uptodate(eb)) {
10041 ret = traverse_tree_block(root, eb);
10044 free_extent_buffer(eb);
10051 * Low memory usage version check_chunks_and_extents.
10053 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10055 struct btrfs_path path;
10056 struct btrfs_key key;
10057 struct btrfs_root *root1;
10058 struct btrfs_root *cur_root;
10062 root1 = root->fs_info->chunk_root;
10063 ret = traverse_tree_block(root1, root1->node);
10066 root1 = root->fs_info->tree_root;
10067 ret = traverse_tree_block(root1, root1->node);
10070 btrfs_init_path(&path);
10071 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10073 key.type = BTRFS_ROOT_ITEM_KEY;
10075 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10077 error("cannot find extent treet in tree_root");
10082 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10083 if (key.type != BTRFS_ROOT_ITEM_KEY)
10085 key.offset = (u64)-1;
10087 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10088 if (IS_ERR(cur_root) || !cur_root) {
10089 error("failed to read tree: %lld", key.objectid);
10093 ret = traverse_tree_block(cur_root, cur_root->node);
10097 ret = btrfs_next_item(root1, &path);
10103 btrfs_release_path(&path);
10107 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10108 struct btrfs_root *root, int overwrite)
10110 struct extent_buffer *c;
10111 struct extent_buffer *old = root->node;
10114 struct btrfs_disk_key disk_key = {0,0,0};
10120 extent_buffer_get(c);
10123 c = btrfs_alloc_free_block(trans, root,
10125 root->root_key.objectid,
10126 &disk_key, level, 0, 0);
10129 extent_buffer_get(c);
10133 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10134 btrfs_set_header_level(c, level);
10135 btrfs_set_header_bytenr(c, c->start);
10136 btrfs_set_header_generation(c, trans->transid);
10137 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10138 btrfs_set_header_owner(c, root->root_key.objectid);
10140 write_extent_buffer(c, root->fs_info->fsid,
10141 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10143 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10144 btrfs_header_chunk_tree_uuid(c),
10147 btrfs_mark_buffer_dirty(c);
10149 * this case can happen in the following case:
10151 * 1.overwrite previous root.
10153 * 2.reinit reloc data root, this is because we skip pin
10154 * down reloc data tree before which means we can allocate
10155 * same block bytenr here.
10157 if (old->start == c->start) {
10158 btrfs_set_root_generation(&root->root_item,
10160 root->root_item.level = btrfs_header_level(root->node);
10161 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10162 &root->root_key, &root->root_item);
10164 free_extent_buffer(c);
10168 free_extent_buffer(old);
10170 add_root_to_dirty_list(root);
10174 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10175 struct extent_buffer *eb, int tree_root)
10177 struct extent_buffer *tmp;
10178 struct btrfs_root_item *ri;
10179 struct btrfs_key key;
10182 int level = btrfs_header_level(eb);
10188 * If we have pinned this block before, don't pin it again.
10189 * This can not only avoid forever loop with broken filesystem
10190 * but also give us some speedups.
10192 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10193 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10196 btrfs_pin_extent(fs_info, eb->start, eb->len);
10198 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10199 nritems = btrfs_header_nritems(eb);
10200 for (i = 0; i < nritems; i++) {
10202 btrfs_item_key_to_cpu(eb, &key, i);
10203 if (key.type != BTRFS_ROOT_ITEM_KEY)
10205 /* Skip the extent root and reloc roots */
10206 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10207 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10208 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10210 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10211 bytenr = btrfs_disk_root_bytenr(eb, ri);
10214 * If at any point we start needing the real root we
10215 * will have to build a stump root for the root we are
10216 * in, but for now this doesn't actually use the root so
10217 * just pass in extent_root.
10219 tmp = read_tree_block(fs_info->extent_root, bytenr,
10221 if (!extent_buffer_uptodate(tmp)) {
10222 fprintf(stderr, "Error reading root block\n");
10225 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10226 free_extent_buffer(tmp);
10230 bytenr = btrfs_node_blockptr(eb, i);
10232 /* If we aren't the tree root don't read the block */
10233 if (level == 1 && !tree_root) {
10234 btrfs_pin_extent(fs_info, bytenr, nodesize);
10238 tmp = read_tree_block(fs_info->extent_root, bytenr,
10240 if (!extent_buffer_uptodate(tmp)) {
10241 fprintf(stderr, "Error reading tree block\n");
10244 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10245 free_extent_buffer(tmp);
10254 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10258 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10262 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10265 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10267 struct btrfs_block_group_cache *cache;
10268 struct btrfs_path *path;
10269 struct extent_buffer *leaf;
10270 struct btrfs_chunk *chunk;
10271 struct btrfs_key key;
10275 path = btrfs_alloc_path();
10280 key.type = BTRFS_CHUNK_ITEM_KEY;
10283 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10285 btrfs_free_path(path);
10290 * We do this in case the block groups were screwed up and had alloc
10291 * bits that aren't actually set on the chunks. This happens with
10292 * restored images every time and could happen in real life I guess.
10294 fs_info->avail_data_alloc_bits = 0;
10295 fs_info->avail_metadata_alloc_bits = 0;
10296 fs_info->avail_system_alloc_bits = 0;
10298 /* First we need to create the in-memory block groups */
10300 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10301 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10303 btrfs_free_path(path);
10311 leaf = path->nodes[0];
10312 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10313 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10318 chunk = btrfs_item_ptr(leaf, path->slots[0],
10319 struct btrfs_chunk);
10320 btrfs_add_block_group(fs_info, 0,
10321 btrfs_chunk_type(leaf, chunk),
10322 key.objectid, key.offset,
10323 btrfs_chunk_length(leaf, chunk));
10324 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10325 key.offset + btrfs_chunk_length(leaf, chunk),
10331 cache = btrfs_lookup_first_block_group(fs_info, start);
10335 start = cache->key.objectid + cache->key.offset;
10338 btrfs_free_path(path);
10342 static int reset_balance(struct btrfs_trans_handle *trans,
10343 struct btrfs_fs_info *fs_info)
10345 struct btrfs_root *root = fs_info->tree_root;
10346 struct btrfs_path *path;
10347 struct extent_buffer *leaf;
10348 struct btrfs_key key;
10349 int del_slot, del_nr = 0;
10353 path = btrfs_alloc_path();
10357 key.objectid = BTRFS_BALANCE_OBJECTID;
10358 key.type = BTRFS_BALANCE_ITEM_KEY;
10361 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10366 goto reinit_data_reloc;
10371 ret = btrfs_del_item(trans, root, path);
10374 btrfs_release_path(path);
10376 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10377 key.type = BTRFS_ROOT_ITEM_KEY;
10380 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10384 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10389 ret = btrfs_del_items(trans, root, path,
10396 btrfs_release_path(path);
10399 ret = btrfs_search_slot(trans, root, &key, path,
10406 leaf = path->nodes[0];
10407 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10408 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10410 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10415 del_slot = path->slots[0];
10424 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10428 btrfs_release_path(path);
10431 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10432 key.type = BTRFS_ROOT_ITEM_KEY;
10433 key.offset = (u64)-1;
10434 root = btrfs_read_fs_root(fs_info, &key);
10435 if (IS_ERR(root)) {
10436 fprintf(stderr, "Error reading data reloc tree\n");
10437 ret = PTR_ERR(root);
10440 record_root_in_trans(trans, root);
10441 ret = btrfs_fsck_reinit_root(trans, root, 0);
10444 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10446 btrfs_free_path(path);
10450 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10451 struct btrfs_fs_info *fs_info)
10457 * The only reason we don't do this is because right now we're just
10458 * walking the trees we find and pinning down their bytes, we don't look
10459 * at any of the leaves. In order to do mixed groups we'd have to check
10460 * the leaves of any fs roots and pin down the bytes for any file
10461 * extents we find. Not hard but why do it if we don't have to?
10463 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10464 fprintf(stderr, "We don't support re-initing the extent tree "
10465 "for mixed block groups yet, please notify a btrfs "
10466 "developer you want to do this so they can add this "
10467 "functionality.\n");
10472 * first we need to walk all of the trees except the extent tree and pin
10473 * down the bytes that are in use so we don't overwrite any existing
10476 ret = pin_metadata_blocks(fs_info);
10478 fprintf(stderr, "error pinning down used bytes\n");
10483 * Need to drop all the block groups since we're going to recreate all
10486 btrfs_free_block_groups(fs_info);
10487 ret = reset_block_groups(fs_info);
10489 fprintf(stderr, "error resetting the block groups\n");
10493 /* Ok we can allocate now, reinit the extent root */
10494 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10496 fprintf(stderr, "extent root initialization failed\n");
10498 * When the transaction code is updated we should end the
10499 * transaction, but for now progs only knows about commit so
10500 * just return an error.
10506 * Now we have all the in-memory block groups setup so we can make
10507 * allocations properly, and the metadata we care about is safe since we
10508 * pinned all of it above.
10511 struct btrfs_block_group_cache *cache;
10513 cache = btrfs_lookup_first_block_group(fs_info, start);
10516 start = cache->key.objectid + cache->key.offset;
10517 ret = btrfs_insert_item(trans, fs_info->extent_root,
10518 &cache->key, &cache->item,
10519 sizeof(cache->item));
10521 fprintf(stderr, "Error adding block group\n");
10524 btrfs_extent_post_op(trans, fs_info->extent_root);
10527 ret = reset_balance(trans, fs_info);
10529 fprintf(stderr, "error resetting the pending balance\n");
10534 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10536 struct btrfs_path *path;
10537 struct btrfs_trans_handle *trans;
10538 struct btrfs_key key;
10541 printf("Recowing metadata block %llu\n", eb->start);
10542 key.objectid = btrfs_header_owner(eb);
10543 key.type = BTRFS_ROOT_ITEM_KEY;
10544 key.offset = (u64)-1;
10546 root = btrfs_read_fs_root(root->fs_info, &key);
10547 if (IS_ERR(root)) {
10548 fprintf(stderr, "Couldn't find owner root %llu\n",
10550 return PTR_ERR(root);
10553 path = btrfs_alloc_path();
10557 trans = btrfs_start_transaction(root, 1);
10558 if (IS_ERR(trans)) {
10559 btrfs_free_path(path);
10560 return PTR_ERR(trans);
10563 path->lowest_level = btrfs_header_level(eb);
10564 if (path->lowest_level)
10565 btrfs_node_key_to_cpu(eb, &key, 0);
10567 btrfs_item_key_to_cpu(eb, &key, 0);
10569 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10570 btrfs_commit_transaction(trans, root);
10571 btrfs_free_path(path);
10575 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10577 struct btrfs_path *path;
10578 struct btrfs_trans_handle *trans;
10579 struct btrfs_key key;
10582 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10583 bad->key.type, bad->key.offset);
10584 key.objectid = bad->root_id;
10585 key.type = BTRFS_ROOT_ITEM_KEY;
10586 key.offset = (u64)-1;
10588 root = btrfs_read_fs_root(root->fs_info, &key);
10589 if (IS_ERR(root)) {
10590 fprintf(stderr, "Couldn't find owner root %llu\n",
10592 return PTR_ERR(root);
10595 path = btrfs_alloc_path();
10599 trans = btrfs_start_transaction(root, 1);
10600 if (IS_ERR(trans)) {
10601 btrfs_free_path(path);
10602 return PTR_ERR(trans);
10605 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10611 ret = btrfs_del_item(trans, root, path);
10613 btrfs_commit_transaction(trans, root);
10614 btrfs_free_path(path);
10618 static int zero_log_tree(struct btrfs_root *root)
10620 struct btrfs_trans_handle *trans;
10623 trans = btrfs_start_transaction(root, 1);
10624 if (IS_ERR(trans)) {
10625 ret = PTR_ERR(trans);
10628 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10629 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10630 ret = btrfs_commit_transaction(trans, root);
10634 static int populate_csum(struct btrfs_trans_handle *trans,
10635 struct btrfs_root *csum_root, char *buf, u64 start,
10642 while (offset < len) {
10643 sectorsize = csum_root->sectorsize;
10644 ret = read_extent_data(csum_root, buf, start + offset,
10648 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10649 start + offset, buf, sectorsize);
10652 offset += sectorsize;
10657 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10658 struct btrfs_root *csum_root,
10659 struct btrfs_root *cur_root)
10661 struct btrfs_path *path;
10662 struct btrfs_key key;
10663 struct extent_buffer *node;
10664 struct btrfs_file_extent_item *fi;
10671 path = btrfs_alloc_path();
10674 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10684 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10687 /* Iterate all regular file extents and fill its csum */
10689 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10691 if (key.type != BTRFS_EXTENT_DATA_KEY)
10693 node = path->nodes[0];
10694 slot = path->slots[0];
10695 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10696 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10698 start = btrfs_file_extent_disk_bytenr(node, fi);
10699 len = btrfs_file_extent_disk_num_bytes(node, fi);
10701 ret = populate_csum(trans, csum_root, buf, start, len);
10702 if (ret == -EEXIST)
10708 * TODO: if next leaf is corrupted, jump to nearest next valid
10711 ret = btrfs_next_item(cur_root, path);
10721 btrfs_free_path(path);
10726 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10727 struct btrfs_root *csum_root)
10729 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10730 struct btrfs_path *path;
10731 struct btrfs_root *tree_root = fs_info->tree_root;
10732 struct btrfs_root *cur_root;
10733 struct extent_buffer *node;
10734 struct btrfs_key key;
10738 path = btrfs_alloc_path();
10742 key.objectid = BTRFS_FS_TREE_OBJECTID;
10744 key.type = BTRFS_ROOT_ITEM_KEY;
10746 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10755 node = path->nodes[0];
10756 slot = path->slots[0];
10757 btrfs_item_key_to_cpu(node, &key, slot);
10758 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10760 if (key.type != BTRFS_ROOT_ITEM_KEY)
10762 if (!is_fstree(key.objectid))
10764 key.offset = (u64)-1;
10766 cur_root = btrfs_read_fs_root(fs_info, &key);
10767 if (IS_ERR(cur_root) || !cur_root) {
10768 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10772 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10777 ret = btrfs_next_item(tree_root, path);
10787 btrfs_free_path(path);
10791 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10792 struct btrfs_root *csum_root)
10794 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10795 struct btrfs_path *path;
10796 struct btrfs_extent_item *ei;
10797 struct extent_buffer *leaf;
10799 struct btrfs_key key;
10802 path = btrfs_alloc_path();
10807 key.type = BTRFS_EXTENT_ITEM_KEY;
10810 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10812 btrfs_free_path(path);
10816 buf = malloc(csum_root->sectorsize);
10818 btrfs_free_path(path);
10823 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10824 ret = btrfs_next_leaf(extent_root, path);
10832 leaf = path->nodes[0];
10834 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10835 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10840 ei = btrfs_item_ptr(leaf, path->slots[0],
10841 struct btrfs_extent_item);
10842 if (!(btrfs_extent_flags(leaf, ei) &
10843 BTRFS_EXTENT_FLAG_DATA)) {
10848 ret = populate_csum(trans, csum_root, buf, key.objectid,
10855 btrfs_free_path(path);
10861 * Recalculate the csum and put it into the csum tree.
10863 * Extent tree init will wipe out all the extent info, so in that case, we
10864 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10865 * will use fs/subvol trees to init the csum tree.
10867 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10868 struct btrfs_root *csum_root,
10869 int search_fs_tree)
10871 if (search_fs_tree)
10872 return fill_csum_tree_from_fs(trans, csum_root);
10874 return fill_csum_tree_from_extent(trans, csum_root);
10877 static void free_roots_info_cache(void)
10879 if (!roots_info_cache)
10882 while (!cache_tree_empty(roots_info_cache)) {
10883 struct cache_extent *entry;
10884 struct root_item_info *rii;
10886 entry = first_cache_extent(roots_info_cache);
10889 remove_cache_extent(roots_info_cache, entry);
10890 rii = container_of(entry, struct root_item_info, cache_extent);
10894 free(roots_info_cache);
10895 roots_info_cache = NULL;
10898 static int build_roots_info_cache(struct btrfs_fs_info *info)
10901 struct btrfs_key key;
10902 struct extent_buffer *leaf;
10903 struct btrfs_path *path;
10905 if (!roots_info_cache) {
10906 roots_info_cache = malloc(sizeof(*roots_info_cache));
10907 if (!roots_info_cache)
10909 cache_tree_init(roots_info_cache);
10912 path = btrfs_alloc_path();
10917 key.type = BTRFS_EXTENT_ITEM_KEY;
10920 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10923 leaf = path->nodes[0];
10926 struct btrfs_key found_key;
10927 struct btrfs_extent_item *ei;
10928 struct btrfs_extent_inline_ref *iref;
10929 int slot = path->slots[0];
10934 struct cache_extent *entry;
10935 struct root_item_info *rii;
10937 if (slot >= btrfs_header_nritems(leaf)) {
10938 ret = btrfs_next_leaf(info->extent_root, path);
10945 leaf = path->nodes[0];
10946 slot = path->slots[0];
10949 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10951 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10952 found_key.type != BTRFS_METADATA_ITEM_KEY)
10955 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10956 flags = btrfs_extent_flags(leaf, ei);
10958 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10959 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10962 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10963 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10964 level = found_key.offset;
10966 struct btrfs_tree_block_info *binfo;
10968 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10969 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10970 level = btrfs_tree_block_level(leaf, binfo);
10974 * For a root extent, it must be of the following type and the
10975 * first (and only one) iref in the item.
10977 type = btrfs_extent_inline_ref_type(leaf, iref);
10978 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10981 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10982 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10984 rii = malloc(sizeof(struct root_item_info));
10989 rii->cache_extent.start = root_id;
10990 rii->cache_extent.size = 1;
10991 rii->level = (u8)-1;
10992 entry = &rii->cache_extent;
10993 ret = insert_cache_extent(roots_info_cache, entry);
10996 rii = container_of(entry, struct root_item_info,
11000 ASSERT(rii->cache_extent.start == root_id);
11001 ASSERT(rii->cache_extent.size == 1);
11003 if (level > rii->level || rii->level == (u8)-1) {
11004 rii->level = level;
11005 rii->bytenr = found_key.objectid;
11006 rii->gen = btrfs_extent_generation(leaf, ei);
11007 rii->node_count = 1;
11008 } else if (level == rii->level) {
11016 btrfs_free_path(path);
11021 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11022 struct btrfs_path *path,
11023 const struct btrfs_key *root_key,
11024 const int read_only_mode)
11026 const u64 root_id = root_key->objectid;
11027 struct cache_extent *entry;
11028 struct root_item_info *rii;
11029 struct btrfs_root_item ri;
11030 unsigned long offset;
11032 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11035 "Error: could not find extent items for root %llu\n",
11036 root_key->objectid);
11040 rii = container_of(entry, struct root_item_info, cache_extent);
11041 ASSERT(rii->cache_extent.start == root_id);
11042 ASSERT(rii->cache_extent.size == 1);
11044 if (rii->node_count != 1) {
11046 "Error: could not find btree root extent for root %llu\n",
11051 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11052 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11054 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11055 btrfs_root_level(&ri) != rii->level ||
11056 btrfs_root_generation(&ri) != rii->gen) {
11059 * If we're in repair mode but our caller told us to not update
11060 * the root item, i.e. just check if it needs to be updated, don't
11061 * print this message, since the caller will call us again shortly
11062 * for the same root item without read only mode (the caller will
11063 * open a transaction first).
11065 if (!(read_only_mode && repair))
11067 "%sroot item for root %llu,"
11068 " current bytenr %llu, current gen %llu, current level %u,"
11069 " new bytenr %llu, new gen %llu, new level %u\n",
11070 (read_only_mode ? "" : "fixing "),
11072 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11073 btrfs_root_level(&ri),
11074 rii->bytenr, rii->gen, rii->level);
11076 if (btrfs_root_generation(&ri) > rii->gen) {
11078 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11079 root_id, btrfs_root_generation(&ri), rii->gen);
11083 if (!read_only_mode) {
11084 btrfs_set_root_bytenr(&ri, rii->bytenr);
11085 btrfs_set_root_level(&ri, rii->level);
11086 btrfs_set_root_generation(&ri, rii->gen);
11087 write_extent_buffer(path->nodes[0], &ri,
11088 offset, sizeof(ri));
11098 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11099 * caused read-only snapshots to be corrupted if they were created at a moment
11100 * when the source subvolume/snapshot had orphan items. The issue was that the
11101 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11102 * node instead of the post orphan cleanup root node.
11103 * So this function, and its callees, just detects and fixes those cases. Even
11104 * though the regression was for read-only snapshots, this function applies to
11105 * any snapshot/subvolume root.
11106 * This must be run before any other repair code - not doing it so, makes other
11107 * repair code delete or modify backrefs in the extent tree for example, which
11108 * will result in an inconsistent fs after repairing the root items.
11110 static int repair_root_items(struct btrfs_fs_info *info)
11112 struct btrfs_path *path = NULL;
11113 struct btrfs_key key;
11114 struct extent_buffer *leaf;
11115 struct btrfs_trans_handle *trans = NULL;
11118 int need_trans = 0;
11120 ret = build_roots_info_cache(info);
11124 path = btrfs_alloc_path();
11130 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11131 key.type = BTRFS_ROOT_ITEM_KEY;
11136 * Avoid opening and committing transactions if a leaf doesn't have
11137 * any root items that need to be fixed, so that we avoid rotating
11138 * backup roots unnecessarily.
11141 trans = btrfs_start_transaction(info->tree_root, 1);
11142 if (IS_ERR(trans)) {
11143 ret = PTR_ERR(trans);
11148 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11152 leaf = path->nodes[0];
11155 struct btrfs_key found_key;
11157 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11158 int no_more_keys = find_next_key(path, &key);
11160 btrfs_release_path(path);
11162 ret = btrfs_commit_transaction(trans,
11174 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11176 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11178 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11181 ret = maybe_repair_root_item(info, path, &found_key,
11186 if (!trans && repair) {
11189 btrfs_release_path(path);
11199 free_roots_info_cache();
11200 btrfs_free_path(path);
11202 btrfs_commit_transaction(trans, info->tree_root);
11209 const char * const cmd_check_usage[] = {
11210 "btrfs check [options] <device>",
11211 "Check structural integrity of a filesystem (unmounted).",
11212 "Check structural integrity of an unmounted filesystem. Verify internal",
11213 "trees' consistency and item connectivity. In the repair mode try to",
11214 "fix the problems found. ",
11215 "WARNING: the repair mode is considered dangerous",
11217 "-s|--super <superblock> use this superblock copy",
11218 "-b|--backup use the first valid backup root copy",
11219 "--repair try to repair the filesystem",
11220 "--readonly run in read-only mode (default)",
11221 "--init-csum-tree create a new CRC tree",
11222 "--init-extent-tree create a new extent tree",
11223 "--mode <MODE> select mode, allows to make some memory/IO",
11224 " trade-offs, where MODE is one of:",
11225 " original - read inodes and extents to memory (requires",
11226 " more memory, does less IO)",
11227 " lowmem - try to use less memory but read blocks again",
11229 "--check-data-csum verify checksums of data blocks",
11230 "-Q|--qgroup-report print a report on qgroup consistency",
11231 "-E|--subvol-extents <subvolid>",
11232 " print subvolume extents and sharing state",
11233 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11234 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11235 "-p|--progress indicate progress",
11239 int cmd_check(int argc, char **argv)
11241 struct cache_tree root_cache;
11242 struct btrfs_root *root;
11243 struct btrfs_fs_info *info;
11246 u64 tree_root_bytenr = 0;
11247 u64 chunk_root_bytenr = 0;
11248 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11251 int init_csum_tree = 0;
11253 int qgroup_report = 0;
11254 int qgroups_repaired = 0;
11255 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11259 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11260 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11261 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11263 static const struct option long_options[] = {
11264 { "super", required_argument, NULL, 's' },
11265 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11266 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11267 { "init-csum-tree", no_argument, NULL,
11268 GETOPT_VAL_INIT_CSUM },
11269 { "init-extent-tree", no_argument, NULL,
11270 GETOPT_VAL_INIT_EXTENT },
11271 { "check-data-csum", no_argument, NULL,
11272 GETOPT_VAL_CHECK_CSUM },
11273 { "backup", no_argument, NULL, 'b' },
11274 { "subvol-extents", required_argument, NULL, 'E' },
11275 { "qgroup-report", no_argument, NULL, 'Q' },
11276 { "tree-root", required_argument, NULL, 'r' },
11277 { "chunk-root", required_argument, NULL,
11278 GETOPT_VAL_CHUNK_TREE },
11279 { "progress", no_argument, NULL, 'p' },
11280 { "mode", required_argument, NULL,
11282 { NULL, 0, NULL, 0}
11285 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11289 case 'a': /* ignored */ break;
11291 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11294 num = arg_strtou64(optarg);
11295 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11297 "super mirror should be less than %d",
11298 BTRFS_SUPER_MIRROR_MAX);
11301 bytenr = btrfs_sb_offset(((int)num));
11302 printf("using SB copy %llu, bytenr %llu\n", num,
11303 (unsigned long long)bytenr);
11309 subvolid = arg_strtou64(optarg);
11312 tree_root_bytenr = arg_strtou64(optarg);
11314 case GETOPT_VAL_CHUNK_TREE:
11315 chunk_root_bytenr = arg_strtou64(optarg);
11318 ctx.progress_enabled = true;
11322 usage(cmd_check_usage);
11323 case GETOPT_VAL_REPAIR:
11324 printf("enabling repair mode\n");
11326 ctree_flags |= OPEN_CTREE_WRITES;
11328 case GETOPT_VAL_READONLY:
11331 case GETOPT_VAL_INIT_CSUM:
11332 printf("Creating a new CRC tree\n");
11333 init_csum_tree = 1;
11335 ctree_flags |= OPEN_CTREE_WRITES;
11337 case GETOPT_VAL_INIT_EXTENT:
11338 init_extent_tree = 1;
11339 ctree_flags |= (OPEN_CTREE_WRITES |
11340 OPEN_CTREE_NO_BLOCK_GROUPS);
11343 case GETOPT_VAL_CHECK_CSUM:
11344 check_data_csum = 1;
11346 case GETOPT_VAL_MODE:
11347 check_mode = parse_check_mode(optarg);
11348 if (check_mode == CHECK_MODE_UNKNOWN) {
11349 error("unknown mode: %s", optarg);
11356 if (check_argc_exact(argc - optind, 1))
11357 usage(cmd_check_usage);
11359 if (ctx.progress_enabled) {
11360 ctx.tp = TASK_NOTHING;
11361 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11364 /* This check is the only reason for --readonly to exist */
11365 if (readonly && repair) {
11366 error("repair options are not compatible with --readonly");
11371 * Not supported yet
11373 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11374 error("low memory mode doesn't support repair yet");
11379 cache_tree_init(&root_cache);
11381 if((ret = check_mounted(argv[optind])) < 0) {
11382 error("could not check mount status: %s", strerror(-ret));
11385 error("%s is currently mounted, aborting", argv[optind]);
11390 /* only allow partial opening under repair mode */
11392 ctree_flags |= OPEN_CTREE_PARTIAL;
11394 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11395 chunk_root_bytenr, ctree_flags);
11397 error("cannot open file system");
11402 global_info = info;
11403 root = info->fs_root;
11406 * repair mode will force us to commit transaction which
11407 * will make us fail to load log tree when mounting.
11409 if (repair && btrfs_super_log_root(info->super_copy)) {
11410 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11415 ret = zero_log_tree(root);
11417 error("failed to zero log tree: %d", ret);
11422 uuid_unparse(info->super_copy->fsid, uuidbuf);
11423 if (qgroup_report) {
11424 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11426 ret = qgroup_verify_all(info);
11432 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11433 subvolid, argv[optind], uuidbuf);
11434 ret = print_extent_state(info, subvolid);
11437 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11439 if (!extent_buffer_uptodate(info->tree_root->node) ||
11440 !extent_buffer_uptodate(info->dev_root->node) ||
11441 !extent_buffer_uptodate(info->chunk_root->node)) {
11442 error("critical roots corrupted, unable to check the filesystem");
11447 if (init_extent_tree || init_csum_tree) {
11448 struct btrfs_trans_handle *trans;
11450 trans = btrfs_start_transaction(info->extent_root, 0);
11451 if (IS_ERR(trans)) {
11452 error("error starting transaction");
11453 ret = PTR_ERR(trans);
11457 if (init_extent_tree) {
11458 printf("Creating a new extent tree\n");
11459 ret = reinit_extent_tree(trans, info);
11464 if (init_csum_tree) {
11465 printf("Reinitialize checksum tree\n");
11466 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11468 error("checksum tree initialization failed: %d",
11474 ret = fill_csum_tree(trans, info->csum_root,
11477 error("checksum tree refilling failed: %d", ret);
11482 * Ok now we commit and run the normal fsck, which will add
11483 * extent entries for all of the items it finds.
11485 ret = btrfs_commit_transaction(trans, info->extent_root);
11489 if (!extent_buffer_uptodate(info->extent_root->node)) {
11490 error("critical: extent_root, unable to check the filesystem");
11494 if (!extent_buffer_uptodate(info->csum_root->node)) {
11495 error("critical: csum_root, unable to check the filesystem");
11500 if (!ctx.progress_enabled)
11501 printf("checking extents");
11502 if (check_mode == CHECK_MODE_LOWMEM)
11503 ret = check_chunks_and_extents_v2(root);
11505 ret = check_chunks_and_extents(root);
11507 printf("Errors found in extent allocation tree or chunk allocation");
11509 ret = repair_root_items(info);
11513 fprintf(stderr, "Fixed %d roots.\n", ret);
11515 } else if (ret > 0) {
11517 "Found %d roots with an outdated root item.\n",
11520 "Please run a filesystem check with the option --repair to fix them.\n");
11525 if (!ctx.progress_enabled) {
11526 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11527 fprintf(stderr, "checking free space tree\n");
11529 fprintf(stderr, "checking free space cache\n");
11531 ret = check_space_cache(root);
11536 * We used to have to have these hole extents in between our real
11537 * extents so if we don't have this flag set we need to make sure there
11538 * are no gaps in the file extents for inodes, otherwise we can just
11539 * ignore it when this happens.
11541 no_holes = btrfs_fs_incompat(root->fs_info,
11542 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11543 if (!ctx.progress_enabled)
11544 fprintf(stderr, "checking fs roots\n");
11545 ret = check_fs_roots(root, &root_cache);
11549 fprintf(stderr, "checking csums\n");
11550 ret = check_csums(root);
11554 fprintf(stderr, "checking root refs\n");
11555 ret = check_root_refs(root, &root_cache);
11559 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11560 struct extent_buffer *eb;
11562 eb = list_first_entry(&root->fs_info->recow_ebs,
11563 struct extent_buffer, recow);
11564 list_del_init(&eb->recow);
11565 ret = recow_extent_buffer(root, eb);
11570 while (!list_empty(&delete_items)) {
11571 struct bad_item *bad;
11573 bad = list_first_entry(&delete_items, struct bad_item, list);
11574 list_del_init(&bad->list);
11576 ret = delete_bad_item(root, bad);
11580 if (info->quota_enabled) {
11582 fprintf(stderr, "checking quota groups\n");
11583 err = qgroup_verify_all(info);
11587 err = repair_qgroups(info, &qgroups_repaired);
11592 if (!list_empty(&root->fs_info->recow_ebs)) {
11593 error("transid errors in file system");
11597 /* Don't override original ret */
11598 if (!ret && qgroups_repaired)
11599 ret = qgroups_repaired;
11601 if (found_old_backref) { /*
11602 * there was a disk format change when mixed
11603 * backref was in testing tree. The old format
11604 * existed about one week.
11606 printf("\n * Found old mixed backref format. "
11607 "The old format is not supported! *"
11608 "\n * Please mount the FS in readonly mode, "
11609 "backup data and re-format the FS. *\n\n");
11612 printf("found %llu bytes used err is %d\n",
11613 (unsigned long long)bytes_used, ret);
11614 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11615 printf("total tree bytes: %llu\n",
11616 (unsigned long long)total_btree_bytes);
11617 printf("total fs tree bytes: %llu\n",
11618 (unsigned long long)total_fs_tree_bytes);
11619 printf("total extent tree bytes: %llu\n",
11620 (unsigned long long)total_extent_tree_bytes);
11621 printf("btree space waste bytes: %llu\n",
11622 (unsigned long long)btree_space_waste);
11623 printf("file data blocks allocated: %llu\n referenced %llu\n",
11624 (unsigned long long)data_bytes_allocated,
11625 (unsigned long long)data_bytes_referenced);
11627 free_qgroup_counts();
11628 free_root_recs_tree(&root_cache);
11632 if (ctx.progress_enabled)
11633 task_deinit(ctx.info);