2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 return list_entry(entry, struct extent_backref, list);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
121 * Much like data_backref, just removed the undetermined members
122 * and change it to use list_head.
123 * During extent scan, it is stored in root->orphan_data_extent.
124 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126 struct orphan_data_extent {
127 struct list_head list;
135 struct tree_backref {
136 struct extent_backref node;
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 return container_of(back, struct tree_backref, node);
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
151 struct extent_record {
152 struct list_head backrefs;
153 struct list_head dups;
154 struct list_head list;
155 struct cache_extent cache;
156 struct btrfs_disk_key parent_key;
161 u64 extent_item_refs;
163 u64 parent_generation;
167 unsigned int flag_block_full_backref:2;
168 unsigned int found_rec:1;
169 unsigned int content_checked:1;
170 unsigned int owner_ref_checked:1;
171 unsigned int is_root:1;
172 unsigned int metadata:1;
173 unsigned int bad_full_backref:1;
174 unsigned int crossing_stripes:1;
175 unsigned int wrong_chunk_type:1;
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 return container_of(entry, struct extent_record, list);
183 struct inode_backref {
184 struct list_head list;
185 unsigned int found_dir_item:1;
186 unsigned int found_dir_index:1;
187 unsigned int found_inode_ref:1;
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 return list_entry(entry, struct inode_backref, list);
202 struct root_item_record {
203 struct list_head list;
210 struct btrfs_key drop_key;
213 #define REF_ERR_NO_DIR_ITEM (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX (1 << 1)
215 #define REF_ERR_NO_INODE_REF (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
218 #define REF_ERR_DUP_INODE_REF (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
227 struct file_extent_hole {
233 struct inode_record {
234 struct list_head backrefs;
235 unsigned int checked:1;
236 unsigned int merging:1;
237 unsigned int found_inode_item:1;
238 unsigned int found_dir_item:1;
239 unsigned int found_file_extent:1;
240 unsigned int found_csum_item:1;
241 unsigned int some_csum_missing:1;
242 unsigned int nodatasum:1;
255 struct rb_root holes;
256 struct list_head orphan_extents;
261 #define I_ERR_NO_INODE_ITEM (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
277 struct root_backref {
278 struct list_head list;
279 unsigned int found_dir_item:1;
280 unsigned int found_dir_index:1;
281 unsigned int found_back_ref:1;
282 unsigned int found_forward_ref:1;
283 unsigned int reachable:1;
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 return list_entry(entry, struct root_backref, list);
298 struct list_head backrefs;
299 struct cache_extent cache;
300 unsigned int found_root_item:1;
306 struct cache_extent cache;
311 struct cache_extent cache;
312 struct cache_tree root_cache;
313 struct cache_tree inode_cache;
314 struct inode_record *current;
323 struct walk_control {
324 struct cache_tree shared;
325 struct shared_node *nodes[BTRFS_MAX_LEVEL];
331 struct btrfs_key key;
333 struct list_head list;
336 struct extent_entry {
341 struct list_head list;
344 struct root_item_info {
345 /* level of the root */
347 /* number of nodes at this level, must be 1 for a root */
351 struct cache_extent cache_extent;
355 * Error bit for low memory mode check.
357 * Currently no caller cares about it yet. Just internal use for error
360 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH (1 << 8)
371 static void *print_status_check(void *p)
373 struct task_ctx *priv = p;
374 const char work_indicator[] = { '.', 'o', 'O', 'o' };
376 static char *task_position_string[] = {
378 "checking free space cache",
382 task_period_start(priv->info, 1000 /* 1s */);
384 if (priv->tp == TASK_NOTHING)
388 printf("%s [%c]\r", task_position_string[priv->tp],
389 work_indicator[count % 4]);
392 task_period_wait(priv->info);
397 static int print_status_return(void *p)
405 static enum btrfs_check_mode parse_check_mode(const char *str)
407 if (strcmp(str, "lowmem") == 0)
408 return CHECK_MODE_LOWMEM;
409 if (strcmp(str, "orig") == 0)
410 return CHECK_MODE_ORIGINAL;
411 if (strcmp(str, "original") == 0)
412 return CHECK_MODE_ORIGINAL;
414 return CHECK_MODE_UNKNOWN;
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
420 struct file_extent_hole *hole;
422 if (RB_EMPTY_ROOT(holes))
425 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 struct file_extent_hole *hole1;
432 struct file_extent_hole *hole2;
434 hole1 = rb_entry(node1, struct file_extent_hole, node);
435 hole2 = rb_entry(node2, struct file_extent_hole, node);
437 if (hole1->start > hole2->start)
439 if (hole1->start < hole2->start)
441 /* Now hole1->start == hole2->start */
442 if (hole1->len >= hole2->len)
444 * Hole 1 will be merge center
445 * Same hole will be merged later
448 /* Hole 2 will be merge center */
453 * Add a hole to the record
455 * This will do hole merge for copy_file_extent_holes(),
456 * which will ensure there won't be continuous holes.
458 static int add_file_extent_hole(struct rb_root *holes,
461 struct file_extent_hole *hole;
462 struct file_extent_hole *prev = NULL;
463 struct file_extent_hole *next = NULL;
465 hole = malloc(sizeof(*hole));
470 /* Since compare will not return 0, no -EEXIST will happen */
471 rb_insert(holes, &hole->node, compare_hole);
473 /* simple merge with previous hole */
474 if (rb_prev(&hole->node))
475 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477 if (prev && prev->start + prev->len >= hole->start) {
478 hole->len = hole->start + hole->len - prev->start;
479 hole->start = prev->start;
480 rb_erase(&prev->node, holes);
485 /* iterate merge with next holes */
487 if (!rb_next(&hole->node))
489 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491 if (hole->start + hole->len >= next->start) {
492 if (hole->start + hole->len <= next->start + next->len)
493 hole->len = next->start + next->len -
495 rb_erase(&next->node, holes);
504 static int compare_hole_range(struct rb_node *node, void *data)
506 struct file_extent_hole *hole;
509 hole = (struct file_extent_hole *)data;
512 hole = rb_entry(node, struct file_extent_hole, node);
513 if (start < hole->start)
515 if (start >= hole->start && start < hole->start + hole->len)
521 * Delete a hole in the record
523 * This will do the hole split and is much restrict than add.
525 static int del_file_extent_hole(struct rb_root *holes,
528 struct file_extent_hole *hole;
529 struct file_extent_hole tmp;
534 struct rb_node *node;
541 node = rb_search(holes, &tmp, compare_hole_range, NULL);
544 hole = rb_entry(node, struct file_extent_hole, node);
545 if (start + len > hole->start + hole->len)
549 * Now there will be no overlap, delete the hole and re-add the
550 * split(s) if they exists.
552 if (start > hole->start) {
553 prev_start = hole->start;
554 prev_len = start - hole->start;
557 if (hole->start + hole->len > start + len) {
558 next_start = start + len;
559 next_len = hole->start + hole->len - start - len;
562 rb_erase(node, holes);
565 ret = add_file_extent_hole(holes, prev_start, prev_len);
570 ret = add_file_extent_hole(holes, next_start, next_len);
577 static int copy_file_extent_holes(struct rb_root *dst,
580 struct file_extent_hole *hole;
581 struct rb_node *node;
584 node = rb_first(src);
586 hole = rb_entry(node, struct file_extent_hole, node);
587 ret = add_file_extent_hole(dst, hole->start, hole->len);
590 node = rb_next(node);
595 static void free_file_extent_holes(struct rb_root *holes)
597 struct rb_node *node;
598 struct file_extent_hole *hole;
600 node = rb_first(holes);
602 hole = rb_entry(node, struct file_extent_hole, node);
603 rb_erase(node, holes);
605 node = rb_first(holes);
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612 struct btrfs_root *root)
614 if (root->last_trans != trans->transid) {
615 root->track_dirty = 1;
616 root->last_trans = trans->transid;
617 root->commit_root = root->node;
618 extent_buffer_get(root->node);
622 static u8 imode_to_type(u32 imode)
625 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
627 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
628 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
629 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
630 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
631 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
632 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
635 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 struct device_record *rec1;
642 struct device_record *rec2;
644 rec1 = rb_entry(node1, struct device_record, node);
645 rec2 = rb_entry(node2, struct device_record, node);
646 if (rec1->devid > rec2->devid)
648 else if (rec1->devid < rec2->devid)
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 struct inode_record *rec;
657 struct inode_backref *backref;
658 struct inode_backref *orig;
659 struct inode_backref *tmp;
660 struct orphan_data_extent *src_orphan;
661 struct orphan_data_extent *dst_orphan;
666 rec = malloc(sizeof(*rec));
668 return ERR_PTR(-ENOMEM);
669 memcpy(rec, orig_rec, sizeof(*rec));
671 INIT_LIST_HEAD(&rec->backrefs);
672 INIT_LIST_HEAD(&rec->orphan_extents);
673 rec->holes = RB_ROOT;
675 list_for_each_entry(orig, &orig_rec->backrefs, list) {
676 size = sizeof(*orig) + orig->namelen + 1;
677 backref = malloc(size);
682 memcpy(backref, orig, size);
683 list_add_tail(&backref->list, &rec->backrefs);
685 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686 dst_orphan = malloc(sizeof(*dst_orphan));
691 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
701 rb = rb_first(&rec->holes);
703 struct file_extent_hole *hole;
705 hole = rb_entry(rb, struct file_extent_hole, node);
711 if (!list_empty(&rec->backrefs))
712 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713 list_del(&orig->list);
717 if (!list_empty(&rec->orphan_extents))
718 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719 list_del(&orig->list);
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
731 struct orphan_data_extent *orphan;
733 if (list_empty(orphan_extents))
735 printf("The following data extent is lost in tree %llu:\n",
737 list_for_each_entry(orphan, orphan_extents, list) {
738 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739 orphan->objectid, orphan->offset, orphan->disk_bytenr,
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 u64 root_objectid = root->root_key.objectid;
747 int errors = rec->errors;
751 /* reloc root errors, we print its corresponding fs root objectid*/
752 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753 root_objectid = root->root_key.offset;
754 fprintf(stderr, "reloc");
756 fprintf(stderr, "root %llu inode %llu errors %x",
757 (unsigned long long) root_objectid,
758 (unsigned long long) rec->ino, rec->errors);
760 if (errors & I_ERR_NO_INODE_ITEM)
761 fprintf(stderr, ", no inode item");
762 if (errors & I_ERR_NO_ORPHAN_ITEM)
763 fprintf(stderr, ", no orphan item");
764 if (errors & I_ERR_DUP_INODE_ITEM)
765 fprintf(stderr, ", dup inode item");
766 if (errors & I_ERR_DUP_DIR_INDEX)
767 fprintf(stderr, ", dup dir index");
768 if (errors & I_ERR_ODD_DIR_ITEM)
769 fprintf(stderr, ", odd dir item");
770 if (errors & I_ERR_ODD_FILE_EXTENT)
771 fprintf(stderr, ", odd file extent");
772 if (errors & I_ERR_BAD_FILE_EXTENT)
773 fprintf(stderr, ", bad file extent");
774 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775 fprintf(stderr, ", file extent overlap");
776 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777 fprintf(stderr, ", file extent discount");
778 if (errors & I_ERR_DIR_ISIZE_WRONG)
779 fprintf(stderr, ", dir isize wrong");
780 if (errors & I_ERR_FILE_NBYTES_WRONG)
781 fprintf(stderr, ", nbytes wrong");
782 if (errors & I_ERR_ODD_CSUM_ITEM)
783 fprintf(stderr, ", odd csum item");
784 if (errors & I_ERR_SOME_CSUM_MISSING)
785 fprintf(stderr, ", some csum missing");
786 if (errors & I_ERR_LINK_COUNT_WRONG)
787 fprintf(stderr, ", link count wrong");
788 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789 fprintf(stderr, ", orphan file extent");
790 fprintf(stderr, "\n");
791 /* Print the orphan extents if needed */
792 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795 /* Print the holes if needed */
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797 struct file_extent_hole *hole;
798 struct rb_node *node;
801 node = rb_first(&rec->holes);
802 fprintf(stderr, "Found file extent holes:\n");
805 hole = rb_entry(node, struct file_extent_hole, node);
806 fprintf(stderr, "\tstart: %llu, len: %llu\n",
807 hole->start, hole->len);
808 node = rb_next(node);
811 fprintf(stderr, "\tstart: 0, len: %llu\n",
812 round_up(rec->isize, root->sectorsize));
816 static void print_ref_error(int errors)
818 if (errors & REF_ERR_NO_DIR_ITEM)
819 fprintf(stderr, ", no dir item");
820 if (errors & REF_ERR_NO_DIR_INDEX)
821 fprintf(stderr, ", no dir index");
822 if (errors & REF_ERR_NO_INODE_REF)
823 fprintf(stderr, ", no inode ref");
824 if (errors & REF_ERR_DUP_DIR_ITEM)
825 fprintf(stderr, ", dup dir item");
826 if (errors & REF_ERR_DUP_DIR_INDEX)
827 fprintf(stderr, ", dup dir index");
828 if (errors & REF_ERR_DUP_INODE_REF)
829 fprintf(stderr, ", dup inode ref");
830 if (errors & REF_ERR_INDEX_UNMATCH)
831 fprintf(stderr, ", index mismatch");
832 if (errors & REF_ERR_FILETYPE_UNMATCH)
833 fprintf(stderr, ", filetype mismatch");
834 if (errors & REF_ERR_NAME_TOO_LONG)
835 fprintf(stderr, ", name too long");
836 if (errors & REF_ERR_NO_ROOT_REF)
837 fprintf(stderr, ", no root ref");
838 if (errors & REF_ERR_NO_ROOT_BACKREF)
839 fprintf(stderr, ", no root backref");
840 if (errors & REF_ERR_DUP_ROOT_REF)
841 fprintf(stderr, ", dup root ref");
842 if (errors & REF_ERR_DUP_ROOT_BACKREF)
843 fprintf(stderr, ", dup root backref");
844 fprintf(stderr, "\n");
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
850 struct ptr_node *node;
851 struct cache_extent *cache;
852 struct inode_record *rec = NULL;
855 cache = lookup_cache_extent(inode_cache, ino, 1);
857 node = container_of(cache, struct ptr_node, cache);
859 if (mod && rec->refs > 1) {
860 node->data = clone_inode_rec(rec);
861 if (IS_ERR(node->data))
867 rec = calloc(1, sizeof(*rec));
869 return ERR_PTR(-ENOMEM);
871 rec->extent_start = (u64)-1;
873 INIT_LIST_HEAD(&rec->backrefs);
874 INIT_LIST_HEAD(&rec->orphan_extents);
875 rec->holes = RB_ROOT;
877 node = malloc(sizeof(*node));
880 return ERR_PTR(-ENOMEM);
882 node->cache.start = ino;
883 node->cache.size = 1;
886 if (ino == BTRFS_FREE_INO_OBJECTID)
889 ret = insert_cache_extent(inode_cache, &node->cache);
891 return ERR_PTR(-EEXIST);
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 struct orphan_data_extent *orphan;
900 while (!list_empty(orphan_extents)) {
901 orphan = list_entry(orphan_extents->next,
902 struct orphan_data_extent, list);
903 list_del(&orphan->list);
908 static void free_inode_rec(struct inode_record *rec)
910 struct inode_backref *backref;
915 while (!list_empty(&rec->backrefs)) {
916 backref = to_inode_backref(rec->backrefs.next);
917 list_del(&backref->list);
920 free_orphan_data_extents(&rec->orphan_extents);
921 free_file_extent_holes(&rec->holes);
925 static int can_free_inode_rec(struct inode_record *rec)
927 if (!rec->errors && rec->checked && rec->found_inode_item &&
928 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934 struct inode_record *rec)
936 struct cache_extent *cache;
937 struct inode_backref *tmp, *backref;
938 struct ptr_node *node;
941 if (!rec->found_inode_item)
944 filetype = imode_to_type(rec->imode);
945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946 if (backref->found_dir_item && backref->found_dir_index) {
947 if (backref->filetype != filetype)
948 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949 if (!backref->errors && backref->found_inode_ref &&
950 rec->nlink == rec->found_link) {
951 list_del(&backref->list);
957 if (!rec->checked || rec->merging)
960 if (S_ISDIR(rec->imode)) {
961 if (rec->found_size != rec->isize)
962 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963 if (rec->found_file_extent)
964 rec->errors |= I_ERR_ODD_FILE_EXTENT;
965 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966 if (rec->found_dir_item)
967 rec->errors |= I_ERR_ODD_DIR_ITEM;
968 if (rec->found_size != rec->nbytes)
969 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970 if (rec->nlink > 0 && !no_holes &&
971 (rec->extent_end < rec->isize ||
972 first_extent_gap(&rec->holes) < rec->isize))
973 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
976 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977 if (rec->found_csum_item && rec->nodatasum)
978 rec->errors |= I_ERR_ODD_CSUM_ITEM;
979 if (rec->some_csum_missing && !rec->nodatasum)
980 rec->errors |= I_ERR_SOME_CSUM_MISSING;
983 BUG_ON(rec->refs != 1);
984 if (can_free_inode_rec(rec)) {
985 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986 node = container_of(cache, struct ptr_node, cache);
987 BUG_ON(node->data != rec);
988 remove_cache_extent(inode_cache, &node->cache);
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 struct btrfs_path path;
997 struct btrfs_key key;
1000 key.objectid = BTRFS_ORPHAN_OBJECTID;
1001 key.type = BTRFS_ORPHAN_ITEM_KEY;
1004 btrfs_init_path(&path);
1005 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006 btrfs_release_path(&path);
1012 static int process_inode_item(struct extent_buffer *eb,
1013 int slot, struct btrfs_key *key,
1014 struct shared_node *active_node)
1016 struct inode_record *rec;
1017 struct btrfs_inode_item *item;
1019 rec = active_node->current;
1020 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021 if (rec->found_inode_item) {
1022 rec->errors |= I_ERR_DUP_INODE_ITEM;
1025 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026 rec->nlink = btrfs_inode_nlink(eb, item);
1027 rec->isize = btrfs_inode_size(eb, item);
1028 rec->nbytes = btrfs_inode_nbytes(eb, item);
1029 rec->imode = btrfs_inode_mode(eb, item);
1030 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032 rec->found_inode_item = 1;
1033 if (rec->nlink == 0)
1034 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035 maybe_free_inode_rec(&active_node->inode_cache, rec);
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041 int namelen, u64 dir)
1043 struct inode_backref *backref;
1045 list_for_each_entry(backref, &rec->backrefs, list) {
1046 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048 if (backref->dir != dir || backref->namelen != namelen)
1050 if (memcmp(name, backref->name, namelen))
1055 backref = malloc(sizeof(*backref) + namelen + 1);
1058 memset(backref, 0, sizeof(*backref));
1060 backref->namelen = namelen;
1061 memcpy(backref->name, name, namelen);
1062 backref->name[namelen] = '\0';
1063 list_add_tail(&backref->list, &rec->backrefs);
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068 u64 ino, u64 dir, u64 index,
1069 const char *name, int namelen,
1070 u8 filetype, u8 itemtype, int errors)
1072 struct inode_record *rec;
1073 struct inode_backref *backref;
1075 rec = get_inode_rec(inode_cache, ino, 1);
1076 BUG_ON(IS_ERR(rec));
1077 backref = get_inode_backref(rec, name, namelen, dir);
1080 backref->errors |= errors;
1081 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082 if (backref->found_dir_index)
1083 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084 if (backref->found_inode_ref && backref->index != index)
1085 backref->errors |= REF_ERR_INDEX_UNMATCH;
1086 if (backref->found_dir_item && backref->filetype != filetype)
1087 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089 backref->index = index;
1090 backref->filetype = filetype;
1091 backref->found_dir_index = 1;
1092 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094 if (backref->found_dir_item)
1095 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096 if (backref->found_dir_index && backref->filetype != filetype)
1097 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099 backref->filetype = filetype;
1100 backref->found_dir_item = 1;
1101 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103 if (backref->found_inode_ref)
1104 backref->errors |= REF_ERR_DUP_INODE_REF;
1105 if (backref->found_dir_index && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1108 backref->index = index;
1110 backref->ref_type = itemtype;
1111 backref->found_inode_ref = 1;
1116 maybe_free_inode_rec(inode_cache, rec);
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121 struct cache_tree *dst_cache)
1123 struct inode_backref *backref;
1128 list_for_each_entry(backref, &src->backrefs, list) {
1129 if (backref->found_dir_index) {
1130 add_inode_backref(dst_cache, dst->ino, backref->dir,
1131 backref->index, backref->name,
1132 backref->namelen, backref->filetype,
1133 BTRFS_DIR_INDEX_KEY, backref->errors);
1135 if (backref->found_dir_item) {
1137 add_inode_backref(dst_cache, dst->ino,
1138 backref->dir, 0, backref->name,
1139 backref->namelen, backref->filetype,
1140 BTRFS_DIR_ITEM_KEY, backref->errors);
1142 if (backref->found_inode_ref) {
1143 add_inode_backref(dst_cache, dst->ino,
1144 backref->dir, backref->index,
1145 backref->name, backref->namelen, 0,
1146 backref->ref_type, backref->errors);
1150 if (src->found_dir_item)
1151 dst->found_dir_item = 1;
1152 if (src->found_file_extent)
1153 dst->found_file_extent = 1;
1154 if (src->found_csum_item)
1155 dst->found_csum_item = 1;
1156 if (src->some_csum_missing)
1157 dst->some_csum_missing = 1;
1158 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1164 BUG_ON(src->found_link < dir_count);
1165 dst->found_link += src->found_link - dir_count;
1166 dst->found_size += src->found_size;
1167 if (src->extent_start != (u64)-1) {
1168 if (dst->extent_start == (u64)-1) {
1169 dst->extent_start = src->extent_start;
1170 dst->extent_end = src->extent_end;
1172 if (dst->extent_end > src->extent_start)
1173 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174 else if (dst->extent_end < src->extent_start) {
1175 ret = add_file_extent_hole(&dst->holes,
1177 src->extent_start - dst->extent_end);
1179 if (dst->extent_end < src->extent_end)
1180 dst->extent_end = src->extent_end;
1184 dst->errors |= src->errors;
1185 if (src->found_inode_item) {
1186 if (!dst->found_inode_item) {
1187 dst->nlink = src->nlink;
1188 dst->isize = src->isize;
1189 dst->nbytes = src->nbytes;
1190 dst->imode = src->imode;
1191 dst->nodatasum = src->nodatasum;
1192 dst->found_inode_item = 1;
1194 dst->errors |= I_ERR_DUP_INODE_ITEM;
1202 static int splice_shared_node(struct shared_node *src_node,
1203 struct shared_node *dst_node)
1205 struct cache_extent *cache;
1206 struct ptr_node *node, *ins;
1207 struct cache_tree *src, *dst;
1208 struct inode_record *rec, *conflict;
1209 u64 current_ino = 0;
1213 if (--src_node->refs == 0)
1215 if (src_node->current)
1216 current_ino = src_node->current->ino;
1218 src = &src_node->root_cache;
1219 dst = &dst_node->root_cache;
1221 cache = search_cache_extent(src, 0);
1223 node = container_of(cache, struct ptr_node, cache);
1225 cache = next_cache_extent(cache);
1228 remove_cache_extent(src, &node->cache);
1231 ins = malloc(sizeof(*ins));
1233 ins->cache.start = node->cache.start;
1234 ins->cache.size = node->cache.size;
1238 ret = insert_cache_extent(dst, &ins->cache);
1239 if (ret == -EEXIST) {
1240 conflict = get_inode_rec(dst, rec->ino, 1);
1241 BUG_ON(IS_ERR(conflict));
1242 merge_inode_recs(rec, conflict, dst);
1244 conflict->checked = 1;
1245 if (dst_node->current == conflict)
1246 dst_node->current = NULL;
1248 maybe_free_inode_rec(dst, conflict);
1249 free_inode_rec(rec);
1256 if (src == &src_node->root_cache) {
1257 src = &src_node->inode_cache;
1258 dst = &dst_node->inode_cache;
1262 if (current_ino > 0 && (!dst_node->current ||
1263 current_ino > dst_node->current->ino)) {
1264 if (dst_node->current) {
1265 dst_node->current->checked = 1;
1266 maybe_free_inode_rec(dst, dst_node->current);
1268 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269 BUG_ON(IS_ERR(dst_node->current));
1274 static void free_inode_ptr(struct cache_extent *cache)
1276 struct ptr_node *node;
1277 struct inode_record *rec;
1279 node = container_of(cache, struct ptr_node, cache);
1281 free_inode_rec(rec);
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1290 struct cache_extent *cache;
1291 struct shared_node *node;
1293 cache = lookup_cache_extent(shared, bytenr, 1);
1295 node = container_of(cache, struct shared_node, cache);
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1304 struct shared_node *node;
1306 node = calloc(1, sizeof(*node));
1309 node->cache.start = bytenr;
1310 node->cache.size = 1;
1311 cache_tree_init(&node->root_cache);
1312 cache_tree_init(&node->inode_cache);
1315 ret = insert_cache_extent(shared, &node->cache);
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321 struct walk_control *wc, int level)
1323 struct shared_node *node;
1324 struct shared_node *dest;
1327 if (level == wc->active_node)
1330 BUG_ON(wc->active_node <= level);
1331 node = find_shared_node(&wc->shared, bytenr);
1333 ret = add_shared_node(&wc->shared, bytenr, refs);
1335 node = find_shared_node(&wc->shared, bytenr);
1336 wc->nodes[level] = node;
1337 wc->active_node = level;
1341 if (wc->root_level == wc->active_node &&
1342 btrfs_root_refs(&root->root_item) == 0) {
1343 if (--node->refs == 0) {
1344 free_inode_recs_tree(&node->root_cache);
1345 free_inode_recs_tree(&node->inode_cache);
1346 remove_cache_extent(&wc->shared, &node->cache);
1352 dest = wc->nodes[wc->active_node];
1353 splice_shared_node(node, dest);
1354 if (node->refs == 0) {
1355 remove_cache_extent(&wc->shared, &node->cache);
1361 static int leave_shared_node(struct btrfs_root *root,
1362 struct walk_control *wc, int level)
1364 struct shared_node *node;
1365 struct shared_node *dest;
1368 if (level == wc->root_level)
1371 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1375 BUG_ON(i >= BTRFS_MAX_LEVEL);
1377 node = wc->nodes[wc->active_node];
1378 wc->nodes[wc->active_node] = NULL;
1379 wc->active_node = i;
1381 dest = wc->nodes[wc->active_node];
1382 if (wc->active_node < wc->root_level ||
1383 btrfs_root_refs(&root->root_item) > 0) {
1384 BUG_ON(node->refs <= 1);
1385 splice_shared_node(node, dest);
1387 BUG_ON(node->refs < 2);
1396 * 1 - if the root with id child_root_id is a child of root parent_root_id
1397 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1398 * has other root(s) as parent(s)
1399 * 2 - if the root child_root_id doesn't have any parent roots
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1404 struct btrfs_path path;
1405 struct btrfs_key key;
1406 struct extent_buffer *leaf;
1410 btrfs_init_path(&path);
1412 key.objectid = parent_root_id;
1413 key.type = BTRFS_ROOT_REF_KEY;
1414 key.offset = child_root_id;
1415 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1419 btrfs_release_path(&path);
1423 key.objectid = child_root_id;
1424 key.type = BTRFS_ROOT_BACKREF_KEY;
1426 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1432 leaf = path.nodes[0];
1433 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1437 leaf = path.nodes[0];
1440 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441 if (key.objectid != child_root_id ||
1442 key.type != BTRFS_ROOT_BACKREF_KEY)
1447 if (key.offset == parent_root_id) {
1448 btrfs_release_path(&path);
1455 btrfs_release_path(&path);
1458 return has_parent ? 0 : 2;
1461 static int process_dir_item(struct btrfs_root *root,
1462 struct extent_buffer *eb,
1463 int slot, struct btrfs_key *key,
1464 struct shared_node *active_node)
1474 struct btrfs_dir_item *di;
1475 struct inode_record *rec;
1476 struct cache_tree *root_cache;
1477 struct cache_tree *inode_cache;
1478 struct btrfs_key location;
1479 char namebuf[BTRFS_NAME_LEN];
1481 root_cache = &active_node->root_cache;
1482 inode_cache = &active_node->inode_cache;
1483 rec = active_node->current;
1484 rec->found_dir_item = 1;
1486 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487 total = btrfs_item_size_nr(eb, slot);
1488 while (cur < total) {
1490 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491 name_len = btrfs_dir_name_len(eb, di);
1492 data_len = btrfs_dir_data_len(eb, di);
1493 filetype = btrfs_dir_type(eb, di);
1495 rec->found_size += name_len;
1496 if (name_len <= BTRFS_NAME_LEN) {
1500 len = BTRFS_NAME_LEN;
1501 error = REF_ERR_NAME_TOO_LONG;
1503 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506 add_inode_backref(inode_cache, location.objectid,
1507 key->objectid, key->offset, namebuf,
1508 len, filetype, key->type, error);
1509 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510 add_inode_backref(root_cache, location.objectid,
1511 key->objectid, key->offset,
1512 namebuf, len, filetype,
1515 fprintf(stderr, "invalid location in dir item %u\n",
1517 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518 key->objectid, key->offset, namebuf,
1519 len, filetype, key->type, error);
1522 len = sizeof(*di) + name_len + data_len;
1523 di = (struct btrfs_dir_item *)((char *)di + len);
1526 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527 rec->errors |= I_ERR_DUP_DIR_INDEX;
1532 static int process_inode_ref(struct extent_buffer *eb,
1533 int slot, struct btrfs_key *key,
1534 struct shared_node *active_node)
1542 struct cache_tree *inode_cache;
1543 struct btrfs_inode_ref *ref;
1544 char namebuf[BTRFS_NAME_LEN];
1546 inode_cache = &active_node->inode_cache;
1548 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549 total = btrfs_item_size_nr(eb, slot);
1550 while (cur < total) {
1551 name_len = btrfs_inode_ref_name_len(eb, ref);
1552 index = btrfs_inode_ref_index(eb, ref);
1553 if (name_len <= BTRFS_NAME_LEN) {
1557 len = BTRFS_NAME_LEN;
1558 error = REF_ERR_NAME_TOO_LONG;
1560 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561 add_inode_backref(inode_cache, key->objectid, key->offset,
1562 index, namebuf, len, 0, key->type, error);
1564 len = sizeof(*ref) + name_len;
1565 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1571 static int process_inode_extref(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1582 struct cache_tree *inode_cache;
1583 struct btrfs_inode_extref *extref;
1584 char namebuf[BTRFS_NAME_LEN];
1586 inode_cache = &active_node->inode_cache;
1588 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589 total = btrfs_item_size_nr(eb, slot);
1590 while (cur < total) {
1591 name_len = btrfs_inode_extref_name_len(eb, extref);
1592 index = btrfs_inode_extref_index(eb, extref);
1593 parent = btrfs_inode_extref_parent(eb, extref);
1594 if (name_len <= BTRFS_NAME_LEN) {
1598 len = BTRFS_NAME_LEN;
1599 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf,
1602 (unsigned long)(extref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, parent,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*extref) + name_len;
1607 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615 u64 len, u64 *found)
1617 struct btrfs_key key;
1618 struct btrfs_path path;
1619 struct extent_buffer *leaf;
1624 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626 btrfs_init_path(&path);
1628 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630 key.type = BTRFS_EXTENT_CSUM_KEY;
1632 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1636 if (ret > 0 && path.slots[0] > 0) {
1637 leaf = path.nodes[0];
1638 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640 key.type == BTRFS_EXTENT_CSUM_KEY)
1645 leaf = path.nodes[0];
1646 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1652 leaf = path.nodes[0];
1655 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657 key.type != BTRFS_EXTENT_CSUM_KEY)
1660 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661 if (key.offset >= start + len)
1664 if (key.offset > start)
1667 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669 if (csum_end > start) {
1670 size = min(csum_end - start, len);
1679 btrfs_release_path(&path);
1685 static int process_file_extent(struct btrfs_root *root,
1686 struct extent_buffer *eb,
1687 int slot, struct btrfs_key *key,
1688 struct shared_node *active_node)
1690 struct inode_record *rec;
1691 struct btrfs_file_extent_item *fi;
1693 u64 disk_bytenr = 0;
1694 u64 extent_offset = 0;
1695 u64 mask = root->sectorsize - 1;
1699 rec = active_node->current;
1700 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701 rec->found_file_extent = 1;
1703 if (rec->extent_start == (u64)-1) {
1704 rec->extent_start = key->offset;
1705 rec->extent_end = key->offset;
1708 if (rec->extent_end > key->offset)
1709 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710 else if (rec->extent_end < key->offset) {
1711 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712 key->offset - rec->extent_end);
1717 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718 extent_type = btrfs_file_extent_type(eb, fi);
1720 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724 rec->found_size += num_bytes;
1725 num_bytes = (num_bytes + mask) & ~mask;
1726 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730 extent_offset = btrfs_file_extent_offset(eb, fi);
1731 if (num_bytes == 0 || (num_bytes & mask))
1732 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733 if (num_bytes + extent_offset >
1734 btrfs_file_extent_ram_bytes(eb, fi))
1735 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737 (btrfs_file_extent_compression(eb, fi) ||
1738 btrfs_file_extent_encryption(eb, fi) ||
1739 btrfs_file_extent_other_encoding(eb, fi)))
1740 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741 if (disk_bytenr > 0)
1742 rec->found_size += num_bytes;
1744 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746 rec->extent_end = key->offset + num_bytes;
1749 * The data reloc tree will copy full extents into its inode and then
1750 * copy the corresponding csums. Because the extent it copied could be
1751 * a preallocated extent that hasn't been written to yet there may be no
1752 * csums to copy, ergo we won't have csums for our file extent. This is
1753 * ok so just don't bother checking csums if the inode belongs to the
1756 if (disk_bytenr > 0 &&
1757 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759 if (btrfs_file_extent_compression(eb, fi))
1760 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762 disk_bytenr += extent_offset;
1764 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1767 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769 rec->found_csum_item = 1;
1770 if (found < num_bytes)
1771 rec->some_csum_missing = 1;
1772 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781 struct walk_control *wc)
1783 struct btrfs_key key;
1787 struct cache_tree *inode_cache;
1788 struct shared_node *active_node;
1790 if (wc->root_level == wc->active_node &&
1791 btrfs_root_refs(&root->root_item) == 0)
1794 active_node = wc->nodes[wc->active_node];
1795 inode_cache = &active_node->inode_cache;
1796 nritems = btrfs_header_nritems(eb);
1797 for (i = 0; i < nritems; i++) {
1798 btrfs_item_key_to_cpu(eb, &key, i);
1800 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1805 if (active_node->current == NULL ||
1806 active_node->current->ino < key.objectid) {
1807 if (active_node->current) {
1808 active_node->current->checked = 1;
1809 maybe_free_inode_rec(inode_cache,
1810 active_node->current);
1812 active_node->current = get_inode_rec(inode_cache,
1814 BUG_ON(IS_ERR(active_node->current));
1817 case BTRFS_DIR_ITEM_KEY:
1818 case BTRFS_DIR_INDEX_KEY:
1819 ret = process_dir_item(root, eb, i, &key, active_node);
1821 case BTRFS_INODE_REF_KEY:
1822 ret = process_inode_ref(eb, i, &key, active_node);
1824 case BTRFS_INODE_EXTREF_KEY:
1825 ret = process_inode_extref(eb, i, &key, active_node);
1827 case BTRFS_INODE_ITEM_KEY:
1828 ret = process_inode_item(eb, i, &key, active_node);
1830 case BTRFS_EXTENT_DATA_KEY:
1831 ret = process_file_extent(root, eb, i, &key,
1841 static void reada_walk_down(struct btrfs_root *root,
1842 struct extent_buffer *node, int slot)
1851 level = btrfs_header_level(node);
1855 nritems = btrfs_header_nritems(node);
1856 blocksize = root->nodesize;
1857 for (i = slot; i < nritems; i++) {
1858 bytenr = btrfs_node_blockptr(node, i);
1859 ptr_gen = btrfs_node_ptr_generation(node, i);
1860 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1865 * Check the child node/leaf by the following condition:
1866 * 1. the first item key of the node/leaf should be the same with the one
1868 * 2. block in parent node should match the child node/leaf.
1869 * 3. generation of parent node and child's header should be consistent.
1871 * Or the child node/leaf pointed by the key in parent is not valid.
1873 * We hope to check leaf owner too, but since subvol may share leaves,
1874 * which makes leaf owner check not so strong, key check should be
1875 * sufficient enough for that case.
1877 static int check_child_node(struct btrfs_root *root,
1878 struct extent_buffer *parent, int slot,
1879 struct extent_buffer *child)
1881 struct btrfs_key parent_key;
1882 struct btrfs_key child_key;
1885 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886 if (btrfs_header_level(child) == 0)
1887 btrfs_item_key_to_cpu(child, &child_key, 0);
1889 btrfs_node_key_to_cpu(child, &child_key, 0);
1891 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1894 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895 parent_key.objectid, parent_key.type, parent_key.offset,
1896 child_key.objectid, child_key.type, child_key.offset);
1898 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901 btrfs_node_blockptr(parent, slot),
1902 btrfs_header_bytenr(child));
1904 if (btrfs_node_ptr_generation(parent, slot) !=
1905 btrfs_header_generation(child)) {
1907 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908 btrfs_header_generation(child),
1909 btrfs_node_ptr_generation(parent, slot));
1915 u64 bytenr[BTRFS_MAX_LEVEL];
1916 u64 refs[BTRFS_MAX_LEVEL];
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920 struct walk_control *wc, int *level,
1921 struct node_refs *nrefs)
1923 enum btrfs_tree_block_status status;
1926 struct extent_buffer *next;
1927 struct extent_buffer *cur;
1932 WARN_ON(*level < 0);
1933 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936 refs = nrefs->refs[*level];
1939 ret = btrfs_lookup_extent_info(NULL, root,
1940 path->nodes[*level]->start,
1941 *level, 1, &refs, NULL);
1946 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947 nrefs->refs[*level] = refs;
1951 ret = enter_shared_node(root, path->nodes[*level]->start,
1959 while (*level >= 0) {
1960 WARN_ON(*level < 0);
1961 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962 cur = path->nodes[*level];
1964 if (btrfs_header_level(cur) != *level)
1967 if (path->slots[*level] >= btrfs_header_nritems(cur))
1970 ret = process_one_leaf(root, cur, wc);
1975 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977 blocksize = root->nodesize;
1979 if (bytenr == nrefs->bytenr[*level - 1]) {
1980 refs = nrefs->refs[*level - 1];
1982 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983 *level - 1, 1, &refs, NULL);
1987 nrefs->bytenr[*level - 1] = bytenr;
1988 nrefs->refs[*level - 1] = refs;
1993 ret = enter_shared_node(root, bytenr, refs,
1996 path->slots[*level]++;
2001 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003 free_extent_buffer(next);
2004 reada_walk_down(root, cur, path->slots[*level]);
2005 next = read_tree_block(root, bytenr, blocksize,
2007 if (!extent_buffer_uptodate(next)) {
2008 struct btrfs_key node_key;
2010 btrfs_node_key_to_cpu(path->nodes[*level],
2012 path->slots[*level]);
2013 btrfs_add_corrupt_extent_record(root->fs_info,
2015 path->nodes[*level]->start,
2016 root->nodesize, *level);
2022 ret = check_child_node(root, cur, path->slots[*level], next);
2028 if (btrfs_is_leaf(next))
2029 status = btrfs_check_leaf(root, NULL, next);
2031 status = btrfs_check_node(root, NULL, next);
2032 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033 free_extent_buffer(next);
2038 *level = *level - 1;
2039 free_extent_buffer(path->nodes[*level]);
2040 path->nodes[*level] = next;
2041 path->slots[*level] = 0;
2044 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049 struct walk_control *wc, int *level)
2052 struct extent_buffer *leaf;
2054 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055 leaf = path->nodes[i];
2056 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2061 free_extent_buffer(path->nodes[*level]);
2062 path->nodes[*level] = NULL;
2063 BUG_ON(*level > wc->active_node);
2064 if (*level == wc->active_node)
2065 leave_shared_node(root, wc, *level);
2072 static int check_root_dir(struct inode_record *rec)
2074 struct inode_backref *backref;
2077 if (!rec->found_inode_item || rec->errors)
2079 if (rec->nlink != 1 || rec->found_link != 0)
2081 if (list_empty(&rec->backrefs))
2083 backref = to_inode_backref(rec->backrefs.next);
2084 if (!backref->found_inode_ref)
2086 if (backref->index != 0 || backref->namelen != 2 ||
2087 memcmp(backref->name, "..", 2))
2089 if (backref->found_dir_index || backref->found_dir_item)
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097 struct btrfs_root *root, struct btrfs_path *path,
2098 struct inode_record *rec)
2100 struct btrfs_inode_item *ei;
2101 struct btrfs_key key;
2104 key.objectid = rec->ino;
2105 key.type = BTRFS_INODE_ITEM_KEY;
2106 key.offset = (u64)-1;
2108 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2112 if (!path->slots[0]) {
2119 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120 if (key.objectid != rec->ino) {
2125 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126 struct btrfs_inode_item);
2127 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128 btrfs_mark_buffer_dirty(path->nodes[0]);
2129 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131 root->root_key.objectid);
2133 btrfs_release_path(path);
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 struct btrfs_path *path,
2140 struct inode_record *rec)
2144 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145 btrfs_release_path(path);
2147 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152 struct btrfs_root *root,
2153 struct btrfs_path *path,
2154 struct inode_record *rec)
2156 struct btrfs_inode_item *ei;
2157 struct btrfs_key key;
2160 key.objectid = rec->ino;
2161 key.type = BTRFS_INODE_ITEM_KEY;
2164 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2171 /* Since ret == 0, no need to check anything */
2172 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173 struct btrfs_inode_item);
2174 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175 btrfs_mark_buffer_dirty(path->nodes[0]);
2176 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177 printf("reset nbytes for ino %llu root %llu\n",
2178 rec->ino, root->root_key.objectid);
2180 btrfs_release_path(path);
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185 struct cache_tree *inode_cache,
2186 struct inode_record *rec,
2187 struct inode_backref *backref)
2189 struct btrfs_path *path;
2190 struct btrfs_trans_handle *trans;
2191 struct btrfs_dir_item *dir_item;
2192 struct extent_buffer *leaf;
2193 struct btrfs_key key;
2194 struct btrfs_disk_key disk_key;
2195 struct inode_record *dir_rec;
2196 unsigned long name_ptr;
2197 u32 data_size = sizeof(*dir_item) + backref->namelen;
2200 path = btrfs_alloc_path();
2204 trans = btrfs_start_transaction(root, 1);
2205 if (IS_ERR(trans)) {
2206 btrfs_free_path(path);
2207 return PTR_ERR(trans);
2210 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2211 (unsigned long long)rec->ino);
2212 key.objectid = backref->dir;
2213 key.type = BTRFS_DIR_INDEX_KEY;
2214 key.offset = backref->index;
2216 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2219 leaf = path->nodes[0];
2220 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2222 disk_key.objectid = cpu_to_le64(rec->ino);
2223 disk_key.type = BTRFS_INODE_ITEM_KEY;
2224 disk_key.offset = 0;
2226 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2227 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2228 btrfs_set_dir_data_len(leaf, dir_item, 0);
2229 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2230 name_ptr = (unsigned long)(dir_item + 1);
2231 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2232 btrfs_mark_buffer_dirty(leaf);
2233 btrfs_free_path(path);
2234 btrfs_commit_transaction(trans, root);
2236 backref->found_dir_index = 1;
2237 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2238 BUG_ON(IS_ERR(dir_rec));
2241 dir_rec->found_size += backref->namelen;
2242 if (dir_rec->found_size == dir_rec->isize &&
2243 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2244 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2245 if (dir_rec->found_size != dir_rec->isize)
2246 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2251 static int delete_dir_index(struct btrfs_root *root,
2252 struct cache_tree *inode_cache,
2253 struct inode_record *rec,
2254 struct inode_backref *backref)
2256 struct btrfs_trans_handle *trans;
2257 struct btrfs_dir_item *di;
2258 struct btrfs_path *path;
2261 path = btrfs_alloc_path();
2265 trans = btrfs_start_transaction(root, 1);
2266 if (IS_ERR(trans)) {
2267 btrfs_free_path(path);
2268 return PTR_ERR(trans);
2272 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2273 (unsigned long long)backref->dir,
2274 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2275 (unsigned long long)root->objectid);
2277 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2278 backref->name, backref->namelen,
2279 backref->index, -1);
2282 btrfs_free_path(path);
2283 btrfs_commit_transaction(trans, root);
2290 ret = btrfs_del_item(trans, root, path);
2292 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2294 btrfs_free_path(path);
2295 btrfs_commit_transaction(trans, root);
2299 static int create_inode_item(struct btrfs_root *root,
2300 struct inode_record *rec,
2301 struct inode_backref *backref, int root_dir)
2303 struct btrfs_trans_handle *trans;
2304 struct btrfs_inode_item inode_item;
2305 time_t now = time(NULL);
2308 trans = btrfs_start_transaction(root, 1);
2309 if (IS_ERR(trans)) {
2310 ret = PTR_ERR(trans);
2314 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2315 "be incomplete, please check permissions and content after "
2316 "the fsck completes.\n", (unsigned long long)root->objectid,
2317 (unsigned long long)rec->ino);
2319 memset(&inode_item, 0, sizeof(inode_item));
2320 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2322 btrfs_set_stack_inode_nlink(&inode_item, 1);
2324 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2325 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2326 if (rec->found_dir_item) {
2327 if (rec->found_file_extent)
2328 fprintf(stderr, "root %llu inode %llu has both a dir "
2329 "item and extents, unsure if it is a dir or a "
2330 "regular file so setting it as a directory\n",
2331 (unsigned long long)root->objectid,
2332 (unsigned long long)rec->ino);
2333 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2334 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2335 } else if (!rec->found_dir_item) {
2336 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2337 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2339 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2340 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2341 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2342 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2343 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2344 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2345 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2346 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2348 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2350 btrfs_commit_transaction(trans, root);
2354 static int repair_inode_backrefs(struct btrfs_root *root,
2355 struct inode_record *rec,
2356 struct cache_tree *inode_cache,
2359 struct inode_backref *tmp, *backref;
2360 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2364 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2365 if (!delete && rec->ino == root_dirid) {
2366 if (!rec->found_inode_item) {
2367 ret = create_inode_item(root, rec, backref, 1);
2374 /* Index 0 for root dir's are special, don't mess with it */
2375 if (rec->ino == root_dirid && backref->index == 0)
2379 ((backref->found_dir_index && !backref->found_inode_ref) ||
2380 (backref->found_dir_index && backref->found_inode_ref &&
2381 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2382 ret = delete_dir_index(root, inode_cache, rec, backref);
2386 list_del(&backref->list);
2390 if (!delete && !backref->found_dir_index &&
2391 backref->found_dir_item && backref->found_inode_ref) {
2392 ret = add_missing_dir_index(root, inode_cache, rec,
2397 if (backref->found_dir_item &&
2398 backref->found_dir_index &&
2399 backref->found_dir_index) {
2400 if (!backref->errors &&
2401 backref->found_inode_ref) {
2402 list_del(&backref->list);
2408 if (!delete && (!backref->found_dir_index &&
2409 !backref->found_dir_item &&
2410 backref->found_inode_ref)) {
2411 struct btrfs_trans_handle *trans;
2412 struct btrfs_key location;
2414 ret = check_dir_conflict(root, backref->name,
2420 * let nlink fixing routine to handle it,
2421 * which can do it better.
2426 location.objectid = rec->ino;
2427 location.type = BTRFS_INODE_ITEM_KEY;
2428 location.offset = 0;
2430 trans = btrfs_start_transaction(root, 1);
2431 if (IS_ERR(trans)) {
2432 ret = PTR_ERR(trans);
2435 fprintf(stderr, "adding missing dir index/item pair "
2437 (unsigned long long)rec->ino);
2438 ret = btrfs_insert_dir_item(trans, root, backref->name,
2440 backref->dir, &location,
2441 imode_to_type(rec->imode),
2444 btrfs_commit_transaction(trans, root);
2448 if (!delete && (backref->found_inode_ref &&
2449 backref->found_dir_index &&
2450 backref->found_dir_item &&
2451 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2452 !rec->found_inode_item)) {
2453 ret = create_inode_item(root, rec, backref, 0);
2460 return ret ? ret : repaired;
2464 * To determine the file type for nlink/inode_item repair
2466 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2467 * Return -ENOENT if file type is not found.
2469 static int find_file_type(struct inode_record *rec, u8 *type)
2471 struct inode_backref *backref;
2473 /* For inode item recovered case */
2474 if (rec->found_inode_item) {
2475 *type = imode_to_type(rec->imode);
2479 list_for_each_entry(backref, &rec->backrefs, list) {
2480 if (backref->found_dir_index || backref->found_dir_item) {
2481 *type = backref->filetype;
2489 * To determine the file name for nlink repair
2491 * Return 0 if file name is found, set name and namelen.
2492 * Return -ENOENT if file name is not found.
2494 static int find_file_name(struct inode_record *rec,
2495 char *name, int *namelen)
2497 struct inode_backref *backref;
2499 list_for_each_entry(backref, &rec->backrefs, list) {
2500 if (backref->found_dir_index || backref->found_dir_item ||
2501 backref->found_inode_ref) {
2502 memcpy(name, backref->name, backref->namelen);
2503 *namelen = backref->namelen;
2510 /* Reset the nlink of the inode to the correct one */
2511 static int reset_nlink(struct btrfs_trans_handle *trans,
2512 struct btrfs_root *root,
2513 struct btrfs_path *path,
2514 struct inode_record *rec)
2516 struct inode_backref *backref;
2517 struct inode_backref *tmp;
2518 struct btrfs_key key;
2519 struct btrfs_inode_item *inode_item;
2522 /* We don't believe this either, reset it and iterate backref */
2523 rec->found_link = 0;
2525 /* Remove all backref including the valid ones */
2526 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2527 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2528 backref->index, backref->name,
2529 backref->namelen, 0);
2533 /* remove invalid backref, so it won't be added back */
2534 if (!(backref->found_dir_index &&
2535 backref->found_dir_item &&
2536 backref->found_inode_ref)) {
2537 list_del(&backref->list);
2544 /* Set nlink to 0 */
2545 key.objectid = rec->ino;
2546 key.type = BTRFS_INODE_ITEM_KEY;
2548 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2555 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2556 struct btrfs_inode_item);
2557 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2558 btrfs_mark_buffer_dirty(path->nodes[0]);
2559 btrfs_release_path(path);
2562 * Add back valid inode_ref/dir_item/dir_index,
2563 * add_link() will handle the nlink inc, so new nlink must be correct
2565 list_for_each_entry(backref, &rec->backrefs, list) {
2566 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2567 backref->name, backref->namelen,
2568 backref->filetype, &backref->index, 1);
2573 btrfs_release_path(path);
2577 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2578 struct btrfs_root *root,
2579 struct btrfs_path *path,
2580 struct inode_record *rec)
2582 char *dir_name = "lost+found";
2583 char namebuf[BTRFS_NAME_LEN] = {0};
2588 int name_recovered = 0;
2589 int type_recovered = 0;
2593 * Get file name and type first before these invalid inode ref
2594 * are deleted by remove_all_invalid_backref()
2596 name_recovered = !find_file_name(rec, namebuf, &namelen);
2597 type_recovered = !find_file_type(rec, &type);
2599 if (!name_recovered) {
2600 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2601 rec->ino, rec->ino);
2602 namelen = count_digits(rec->ino);
2603 sprintf(namebuf, "%llu", rec->ino);
2606 if (!type_recovered) {
2607 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2609 type = BTRFS_FT_REG_FILE;
2613 ret = reset_nlink(trans, root, path, rec);
2616 "Failed to reset nlink for inode %llu: %s\n",
2617 rec->ino, strerror(-ret));
2621 if (rec->found_link == 0) {
2622 lost_found_ino = root->highest_inode;
2623 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2628 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2629 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2632 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2633 dir_name, strerror(-ret));
2636 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2637 namebuf, namelen, type, NULL, 1);
2639 * Add ".INO" suffix several times to handle case where
2640 * "FILENAME.INO" is already taken by another file.
2642 while (ret == -EEXIST) {
2644 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2646 if (namelen + count_digits(rec->ino) + 1 >
2651 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2653 namelen += count_digits(rec->ino) + 1;
2654 ret = btrfs_add_link(trans, root, rec->ino,
2655 lost_found_ino, namebuf,
2656 namelen, type, NULL, 1);
2660 "Failed to link the inode %llu to %s dir: %s\n",
2661 rec->ino, dir_name, strerror(-ret));
2665 * Just increase the found_link, don't actually add the
2666 * backref. This will make things easier and this inode
2667 * record will be freed after the repair is done.
2668 * So fsck will not report problem about this inode.
2671 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2672 namelen, namebuf, dir_name);
2674 printf("Fixed the nlink of inode %llu\n", rec->ino);
2677 * Clear the flag anyway, or we will loop forever for the same inode
2678 * as it will not be removed from the bad inode list and the dead loop
2681 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2682 btrfs_release_path(path);
2687 * Check if there is any normal(reg or prealloc) file extent for given
2689 * This is used to determine the file type when neither its dir_index/item or
2690 * inode_item exists.
2692 * This will *NOT* report error, if any error happens, just consider it does
2693 * not have any normal file extent.
2695 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2697 struct btrfs_path *path;
2698 struct btrfs_key key;
2699 struct btrfs_key found_key;
2700 struct btrfs_file_extent_item *fi;
2704 path = btrfs_alloc_path();
2708 key.type = BTRFS_EXTENT_DATA_KEY;
2711 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2716 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2717 ret = btrfs_next_leaf(root, path);
2724 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2726 if (found_key.objectid != ino ||
2727 found_key.type != BTRFS_EXTENT_DATA_KEY)
2729 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2730 struct btrfs_file_extent_item);
2731 type = btrfs_file_extent_type(path->nodes[0], fi);
2732 if (type != BTRFS_FILE_EXTENT_INLINE) {
2738 btrfs_free_path(path);
2742 static u32 btrfs_type_to_imode(u8 type)
2744 static u32 imode_by_btrfs_type[] = {
2745 [BTRFS_FT_REG_FILE] = S_IFREG,
2746 [BTRFS_FT_DIR] = S_IFDIR,
2747 [BTRFS_FT_CHRDEV] = S_IFCHR,
2748 [BTRFS_FT_BLKDEV] = S_IFBLK,
2749 [BTRFS_FT_FIFO] = S_IFIFO,
2750 [BTRFS_FT_SOCK] = S_IFSOCK,
2751 [BTRFS_FT_SYMLINK] = S_IFLNK,
2754 return imode_by_btrfs_type[(type)];
2757 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2758 struct btrfs_root *root,
2759 struct btrfs_path *path,
2760 struct inode_record *rec)
2764 int type_recovered = 0;
2767 printf("Trying to rebuild inode:%llu\n", rec->ino);
2769 type_recovered = !find_file_type(rec, &filetype);
2772 * Try to determine inode type if type not found.
2774 * For found regular file extent, it must be FILE.
2775 * For found dir_item/index, it must be DIR.
2777 * For undetermined one, use FILE as fallback.
2780 * 1. If found backref(inode_index/item is already handled) to it,
2782 * Need new inode-inode ref structure to allow search for that.
2784 if (!type_recovered) {
2785 if (rec->found_file_extent &&
2786 find_normal_file_extent(root, rec->ino)) {
2788 filetype = BTRFS_FT_REG_FILE;
2789 } else if (rec->found_dir_item) {
2791 filetype = BTRFS_FT_DIR;
2792 } else if (!list_empty(&rec->orphan_extents)) {
2794 filetype = BTRFS_FT_REG_FILE;
2796 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2799 filetype = BTRFS_FT_REG_FILE;
2803 ret = btrfs_new_inode(trans, root, rec->ino,
2804 mode | btrfs_type_to_imode(filetype));
2809 * Here inode rebuild is done, we only rebuild the inode item,
2810 * don't repair the nlink(like move to lost+found).
2811 * That is the job of nlink repair.
2813 * We just fill the record and return
2815 rec->found_dir_item = 1;
2816 rec->imode = mode | btrfs_type_to_imode(filetype);
2818 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2819 /* Ensure the inode_nlinks repair function will be called */
2820 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2825 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2826 struct btrfs_root *root,
2827 struct btrfs_path *path,
2828 struct inode_record *rec)
2830 struct orphan_data_extent *orphan;
2831 struct orphan_data_extent *tmp;
2834 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2836 * Check for conflicting file extents
2838 * Here we don't know whether the extents is compressed or not,
2839 * so we can only assume it not compressed nor data offset,
2840 * and use its disk_len as extent length.
2842 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2843 orphan->offset, orphan->disk_len, 0);
2844 btrfs_release_path(path);
2849 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2850 orphan->disk_bytenr, orphan->disk_len);
2851 ret = btrfs_free_extent(trans,
2852 root->fs_info->extent_root,
2853 orphan->disk_bytenr, orphan->disk_len,
2854 0, root->objectid, orphan->objectid,
2859 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2860 orphan->offset, orphan->disk_bytenr,
2861 orphan->disk_len, orphan->disk_len);
2865 /* Update file size info */
2866 rec->found_size += orphan->disk_len;
2867 if (rec->found_size == rec->nbytes)
2868 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2870 /* Update the file extent hole info too */
2871 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2875 if (RB_EMPTY_ROOT(&rec->holes))
2876 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2878 list_del(&orphan->list);
2881 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2886 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2887 struct btrfs_root *root,
2888 struct btrfs_path *path,
2889 struct inode_record *rec)
2891 struct rb_node *node;
2892 struct file_extent_hole *hole;
2896 node = rb_first(&rec->holes);
2900 hole = rb_entry(node, struct file_extent_hole, node);
2901 ret = btrfs_punch_hole(trans, root, rec->ino,
2902 hole->start, hole->len);
2905 ret = del_file_extent_hole(&rec->holes, hole->start,
2909 if (RB_EMPTY_ROOT(&rec->holes))
2910 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2911 node = rb_first(&rec->holes);
2913 /* special case for a file losing all its file extent */
2915 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2916 round_up(rec->isize, root->sectorsize));
2920 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2921 rec->ino, root->objectid);
2926 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2928 struct btrfs_trans_handle *trans;
2929 struct btrfs_path *path;
2932 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2933 I_ERR_NO_ORPHAN_ITEM |
2934 I_ERR_LINK_COUNT_WRONG |
2935 I_ERR_NO_INODE_ITEM |
2936 I_ERR_FILE_EXTENT_ORPHAN |
2937 I_ERR_FILE_EXTENT_DISCOUNT|
2938 I_ERR_FILE_NBYTES_WRONG)))
2941 path = btrfs_alloc_path();
2946 * For nlink repair, it may create a dir and add link, so
2947 * 2 for parent(256)'s dir_index and dir_item
2948 * 2 for lost+found dir's inode_item and inode_ref
2949 * 1 for the new inode_ref of the file
2950 * 2 for lost+found dir's dir_index and dir_item for the file
2952 trans = btrfs_start_transaction(root, 7);
2953 if (IS_ERR(trans)) {
2954 btrfs_free_path(path);
2955 return PTR_ERR(trans);
2958 if (rec->errors & I_ERR_NO_INODE_ITEM)
2959 ret = repair_inode_no_item(trans, root, path, rec);
2960 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2961 ret = repair_inode_orphan_extent(trans, root, path, rec);
2962 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2963 ret = repair_inode_discount_extent(trans, root, path, rec);
2964 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2965 ret = repair_inode_isize(trans, root, path, rec);
2966 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2967 ret = repair_inode_orphan_item(trans, root, path, rec);
2968 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2969 ret = repair_inode_nlinks(trans, root, path, rec);
2970 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2971 ret = repair_inode_nbytes(trans, root, path, rec);
2972 btrfs_commit_transaction(trans, root);
2973 btrfs_free_path(path);
2977 static int check_inode_recs(struct btrfs_root *root,
2978 struct cache_tree *inode_cache)
2980 struct cache_extent *cache;
2981 struct ptr_node *node;
2982 struct inode_record *rec;
2983 struct inode_backref *backref;
2988 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2990 if (btrfs_root_refs(&root->root_item) == 0) {
2991 if (!cache_tree_empty(inode_cache))
2992 fprintf(stderr, "warning line %d\n", __LINE__);
2997 * We need to record the highest inode number for later 'lost+found'
2999 * We must select an ino not used/referred by any existing inode, or
3000 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3001 * this may cause 'lost+found' dir has wrong nlinks.
3003 cache = last_cache_extent(inode_cache);
3005 node = container_of(cache, struct ptr_node, cache);
3007 if (rec->ino > root->highest_inode)
3008 root->highest_inode = rec->ino;
3012 * We need to repair backrefs first because we could change some of the
3013 * errors in the inode recs.
3015 * We also need to go through and delete invalid backrefs first and then
3016 * add the correct ones second. We do this because we may get EEXIST
3017 * when adding back the correct index because we hadn't yet deleted the
3020 * For example, if we were missing a dir index then the directories
3021 * isize would be wrong, so if we fixed the isize to what we thought it
3022 * would be and then fixed the backref we'd still have a invalid fs, so
3023 * we need to add back the dir index and then check to see if the isize
3028 if (stage == 3 && !err)
3031 cache = search_cache_extent(inode_cache, 0);
3032 while (repair && cache) {
3033 node = container_of(cache, struct ptr_node, cache);
3035 cache = next_cache_extent(cache);
3037 /* Need to free everything up and rescan */
3039 remove_cache_extent(inode_cache, &node->cache);
3041 free_inode_rec(rec);
3045 if (list_empty(&rec->backrefs))
3048 ret = repair_inode_backrefs(root, rec, inode_cache,
3062 rec = get_inode_rec(inode_cache, root_dirid, 0);
3063 BUG_ON(IS_ERR(rec));
3065 ret = check_root_dir(rec);
3067 fprintf(stderr, "root %llu root dir %llu error\n",
3068 (unsigned long long)root->root_key.objectid,
3069 (unsigned long long)root_dirid);
3070 print_inode_error(root, rec);
3075 struct btrfs_trans_handle *trans;
3077 trans = btrfs_start_transaction(root, 1);
3078 if (IS_ERR(trans)) {
3079 err = PTR_ERR(trans);
3084 "root %llu missing its root dir, recreating\n",
3085 (unsigned long long)root->objectid);
3087 ret = btrfs_make_root_dir(trans, root, root_dirid);
3090 btrfs_commit_transaction(trans, root);
3094 fprintf(stderr, "root %llu root dir %llu not found\n",
3095 (unsigned long long)root->root_key.objectid,
3096 (unsigned long long)root_dirid);
3100 cache = search_cache_extent(inode_cache, 0);
3103 node = container_of(cache, struct ptr_node, cache);
3105 remove_cache_extent(inode_cache, &node->cache);
3107 if (rec->ino == root_dirid ||
3108 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3109 free_inode_rec(rec);
3113 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3114 ret = check_orphan_item(root, rec->ino);
3116 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3117 if (can_free_inode_rec(rec)) {
3118 free_inode_rec(rec);
3123 if (!rec->found_inode_item)
3124 rec->errors |= I_ERR_NO_INODE_ITEM;
3125 if (rec->found_link != rec->nlink)
3126 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3128 ret = try_repair_inode(root, rec);
3129 if (ret == 0 && can_free_inode_rec(rec)) {
3130 free_inode_rec(rec);
3136 if (!(repair && ret == 0))
3138 print_inode_error(root, rec);
3139 list_for_each_entry(backref, &rec->backrefs, list) {
3140 if (!backref->found_dir_item)
3141 backref->errors |= REF_ERR_NO_DIR_ITEM;
3142 if (!backref->found_dir_index)
3143 backref->errors |= REF_ERR_NO_DIR_INDEX;
3144 if (!backref->found_inode_ref)
3145 backref->errors |= REF_ERR_NO_INODE_REF;
3146 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3147 " namelen %u name %s filetype %d errors %x",
3148 (unsigned long long)backref->dir,
3149 (unsigned long long)backref->index,
3150 backref->namelen, backref->name,
3151 backref->filetype, backref->errors);
3152 print_ref_error(backref->errors);
3154 free_inode_rec(rec);
3156 return (error > 0) ? -1 : 0;
3159 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3162 struct cache_extent *cache;
3163 struct root_record *rec = NULL;
3166 cache = lookup_cache_extent(root_cache, objectid, 1);
3168 rec = container_of(cache, struct root_record, cache);
3170 rec = calloc(1, sizeof(*rec));
3172 return ERR_PTR(-ENOMEM);
3173 rec->objectid = objectid;
3174 INIT_LIST_HEAD(&rec->backrefs);
3175 rec->cache.start = objectid;
3176 rec->cache.size = 1;
3178 ret = insert_cache_extent(root_cache, &rec->cache);
3180 return ERR_PTR(-EEXIST);
3185 static struct root_backref *get_root_backref(struct root_record *rec,
3186 u64 ref_root, u64 dir, u64 index,
3187 const char *name, int namelen)
3189 struct root_backref *backref;
3191 list_for_each_entry(backref, &rec->backrefs, list) {
3192 if (backref->ref_root != ref_root || backref->dir != dir ||
3193 backref->namelen != namelen)
3195 if (memcmp(name, backref->name, namelen))
3200 backref = calloc(1, sizeof(*backref) + namelen + 1);
3203 backref->ref_root = ref_root;
3205 backref->index = index;
3206 backref->namelen = namelen;
3207 memcpy(backref->name, name, namelen);
3208 backref->name[namelen] = '\0';
3209 list_add_tail(&backref->list, &rec->backrefs);
3213 static void free_root_record(struct cache_extent *cache)
3215 struct root_record *rec;
3216 struct root_backref *backref;
3218 rec = container_of(cache, struct root_record, cache);
3219 while (!list_empty(&rec->backrefs)) {
3220 backref = to_root_backref(rec->backrefs.next);
3221 list_del(&backref->list);
3228 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3230 static int add_root_backref(struct cache_tree *root_cache,
3231 u64 root_id, u64 ref_root, u64 dir, u64 index,
3232 const char *name, int namelen,
3233 int item_type, int errors)
3235 struct root_record *rec;
3236 struct root_backref *backref;
3238 rec = get_root_rec(root_cache, root_id);
3239 BUG_ON(IS_ERR(rec));
3240 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3243 backref->errors |= errors;
3245 if (item_type != BTRFS_DIR_ITEM_KEY) {
3246 if (backref->found_dir_index || backref->found_back_ref ||
3247 backref->found_forward_ref) {
3248 if (backref->index != index)
3249 backref->errors |= REF_ERR_INDEX_UNMATCH;
3251 backref->index = index;
3255 if (item_type == BTRFS_DIR_ITEM_KEY) {
3256 if (backref->found_forward_ref)
3258 backref->found_dir_item = 1;
3259 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3260 backref->found_dir_index = 1;
3261 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3262 if (backref->found_forward_ref)
3263 backref->errors |= REF_ERR_DUP_ROOT_REF;
3264 else if (backref->found_dir_item)
3266 backref->found_forward_ref = 1;
3267 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3268 if (backref->found_back_ref)
3269 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3270 backref->found_back_ref = 1;
3275 if (backref->found_forward_ref && backref->found_dir_item)
3276 backref->reachable = 1;
3280 static int merge_root_recs(struct btrfs_root *root,
3281 struct cache_tree *src_cache,
3282 struct cache_tree *dst_cache)
3284 struct cache_extent *cache;
3285 struct ptr_node *node;
3286 struct inode_record *rec;
3287 struct inode_backref *backref;
3290 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3291 free_inode_recs_tree(src_cache);
3296 cache = search_cache_extent(src_cache, 0);
3299 node = container_of(cache, struct ptr_node, cache);
3301 remove_cache_extent(src_cache, &node->cache);
3304 ret = is_child_root(root, root->objectid, rec->ino);
3310 list_for_each_entry(backref, &rec->backrefs, list) {
3311 BUG_ON(backref->found_inode_ref);
3312 if (backref->found_dir_item)
3313 add_root_backref(dst_cache, rec->ino,
3314 root->root_key.objectid, backref->dir,
3315 backref->index, backref->name,
3316 backref->namelen, BTRFS_DIR_ITEM_KEY,
3318 if (backref->found_dir_index)
3319 add_root_backref(dst_cache, rec->ino,
3320 root->root_key.objectid, backref->dir,
3321 backref->index, backref->name,
3322 backref->namelen, BTRFS_DIR_INDEX_KEY,
3326 free_inode_rec(rec);
3333 static int check_root_refs(struct btrfs_root *root,
3334 struct cache_tree *root_cache)
3336 struct root_record *rec;
3337 struct root_record *ref_root;
3338 struct root_backref *backref;
3339 struct cache_extent *cache;
3345 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3346 BUG_ON(IS_ERR(rec));
3349 /* fixme: this can not detect circular references */
3352 cache = search_cache_extent(root_cache, 0);
3356 rec = container_of(cache, struct root_record, cache);
3357 cache = next_cache_extent(cache);
3359 if (rec->found_ref == 0)
3362 list_for_each_entry(backref, &rec->backrefs, list) {
3363 if (!backref->reachable)
3366 ref_root = get_root_rec(root_cache,
3368 BUG_ON(IS_ERR(ref_root));
3369 if (ref_root->found_ref > 0)
3372 backref->reachable = 0;
3374 if (rec->found_ref == 0)
3380 cache = search_cache_extent(root_cache, 0);
3384 rec = container_of(cache, struct root_record, cache);
3385 cache = next_cache_extent(cache);
3387 if (rec->found_ref == 0 &&
3388 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3389 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3390 ret = check_orphan_item(root->fs_info->tree_root,
3396 * If we don't have a root item then we likely just have
3397 * a dir item in a snapshot for this root but no actual
3398 * ref key or anything so it's meaningless.
3400 if (!rec->found_root_item)
3403 fprintf(stderr, "fs tree %llu not referenced\n",
3404 (unsigned long long)rec->objectid);
3408 if (rec->found_ref > 0 && !rec->found_root_item)
3410 list_for_each_entry(backref, &rec->backrefs, list) {
3411 if (!backref->found_dir_item)
3412 backref->errors |= REF_ERR_NO_DIR_ITEM;
3413 if (!backref->found_dir_index)
3414 backref->errors |= REF_ERR_NO_DIR_INDEX;
3415 if (!backref->found_back_ref)
3416 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3417 if (!backref->found_forward_ref)
3418 backref->errors |= REF_ERR_NO_ROOT_REF;
3419 if (backref->reachable && backref->errors)
3426 fprintf(stderr, "fs tree %llu refs %u %s\n",
3427 (unsigned long long)rec->objectid, rec->found_ref,
3428 rec->found_root_item ? "" : "not found");
3430 list_for_each_entry(backref, &rec->backrefs, list) {
3431 if (!backref->reachable)
3433 if (!backref->errors && rec->found_root_item)
3435 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3436 " index %llu namelen %u name %s errors %x\n",
3437 (unsigned long long)backref->ref_root,
3438 (unsigned long long)backref->dir,
3439 (unsigned long long)backref->index,
3440 backref->namelen, backref->name,
3442 print_ref_error(backref->errors);
3445 return errors > 0 ? 1 : 0;
3448 static int process_root_ref(struct extent_buffer *eb, int slot,
3449 struct btrfs_key *key,
3450 struct cache_tree *root_cache)
3456 struct btrfs_root_ref *ref;
3457 char namebuf[BTRFS_NAME_LEN];
3460 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3462 dirid = btrfs_root_ref_dirid(eb, ref);
3463 index = btrfs_root_ref_sequence(eb, ref);
3464 name_len = btrfs_root_ref_name_len(eb, ref);
3466 if (name_len <= BTRFS_NAME_LEN) {
3470 len = BTRFS_NAME_LEN;
3471 error = REF_ERR_NAME_TOO_LONG;
3473 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3475 if (key->type == BTRFS_ROOT_REF_KEY) {
3476 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3477 index, namebuf, len, key->type, error);
3479 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3480 index, namebuf, len, key->type, error);
3485 static void free_corrupt_block(struct cache_extent *cache)
3487 struct btrfs_corrupt_block *corrupt;
3489 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3493 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3496 * Repair the btree of the given root.
3498 * The fix is to remove the node key in corrupt_blocks cache_tree.
3499 * and rebalance the tree.
3500 * After the fix, the btree should be writeable.
3502 static int repair_btree(struct btrfs_root *root,
3503 struct cache_tree *corrupt_blocks)
3505 struct btrfs_trans_handle *trans;
3506 struct btrfs_path *path;
3507 struct btrfs_corrupt_block *corrupt;
3508 struct cache_extent *cache;
3509 struct btrfs_key key;
3514 if (cache_tree_empty(corrupt_blocks))
3517 path = btrfs_alloc_path();
3521 trans = btrfs_start_transaction(root, 1);
3522 if (IS_ERR(trans)) {
3523 ret = PTR_ERR(trans);
3524 fprintf(stderr, "Error starting transaction: %s\n",
3528 cache = first_cache_extent(corrupt_blocks);
3530 corrupt = container_of(cache, struct btrfs_corrupt_block,
3532 level = corrupt->level;
3533 path->lowest_level = level;
3534 key.objectid = corrupt->key.objectid;
3535 key.type = corrupt->key.type;
3536 key.offset = corrupt->key.offset;
3539 * Here we don't want to do any tree balance, since it may
3540 * cause a balance with corrupted brother leaf/node,
3541 * so ins_len set to 0 here.
3542 * Balance will be done after all corrupt node/leaf is deleted.
3544 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3547 offset = btrfs_node_blockptr(path->nodes[level],
3548 path->slots[level]);
3550 /* Remove the ptr */
3551 ret = btrfs_del_ptr(trans, root, path, level,
3552 path->slots[level]);
3556 * Remove the corresponding extent
3557 * return value is not concerned.
3559 btrfs_release_path(path);
3560 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3561 0, root->root_key.objectid,
3563 cache = next_cache_extent(cache);
3566 /* Balance the btree using btrfs_search_slot() */
3567 cache = first_cache_extent(corrupt_blocks);
3569 corrupt = container_of(cache, struct btrfs_corrupt_block,
3571 memcpy(&key, &corrupt->key, sizeof(key));
3572 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3575 /* return will always >0 since it won't find the item */
3577 btrfs_release_path(path);
3578 cache = next_cache_extent(cache);
3581 btrfs_commit_transaction(trans, root);
3583 btrfs_free_path(path);
3587 static int check_fs_root(struct btrfs_root *root,
3588 struct cache_tree *root_cache,
3589 struct walk_control *wc)
3595 struct btrfs_path path;
3596 struct shared_node root_node;
3597 struct root_record *rec;
3598 struct btrfs_root_item *root_item = &root->root_item;
3599 struct cache_tree corrupt_blocks;
3600 struct orphan_data_extent *orphan;
3601 struct orphan_data_extent *tmp;
3602 enum btrfs_tree_block_status status;
3603 struct node_refs nrefs;
3606 * Reuse the corrupt_block cache tree to record corrupted tree block
3608 * Unlike the usage in extent tree check, here we do it in a per
3609 * fs/subvol tree base.
3611 cache_tree_init(&corrupt_blocks);
3612 root->fs_info->corrupt_blocks = &corrupt_blocks;
3614 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3615 rec = get_root_rec(root_cache, root->root_key.objectid);
3616 BUG_ON(IS_ERR(rec));
3617 if (btrfs_root_refs(root_item) > 0)
3618 rec->found_root_item = 1;
3621 btrfs_init_path(&path);
3622 memset(&root_node, 0, sizeof(root_node));
3623 cache_tree_init(&root_node.root_cache);
3624 cache_tree_init(&root_node.inode_cache);
3625 memset(&nrefs, 0, sizeof(nrefs));
3627 /* Move the orphan extent record to corresponding inode_record */
3628 list_for_each_entry_safe(orphan, tmp,
3629 &root->orphan_data_extents, list) {
3630 struct inode_record *inode;
3632 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3634 BUG_ON(IS_ERR(inode));
3635 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3636 list_move(&orphan->list, &inode->orphan_extents);
3639 level = btrfs_header_level(root->node);
3640 memset(wc->nodes, 0, sizeof(wc->nodes));
3641 wc->nodes[level] = &root_node;
3642 wc->active_node = level;
3643 wc->root_level = level;
3645 /* We may not have checked the root block, lets do that now */
3646 if (btrfs_is_leaf(root->node))
3647 status = btrfs_check_leaf(root, NULL, root->node);
3649 status = btrfs_check_node(root, NULL, root->node);
3650 if (status != BTRFS_TREE_BLOCK_CLEAN)
3653 if (btrfs_root_refs(root_item) > 0 ||
3654 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3655 path.nodes[level] = root->node;
3656 extent_buffer_get(root->node);
3657 path.slots[level] = 0;
3659 struct btrfs_key key;
3660 struct btrfs_disk_key found_key;
3662 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3663 level = root_item->drop_level;
3664 path.lowest_level = level;
3665 if (level > btrfs_header_level(root->node) ||
3666 level >= BTRFS_MAX_LEVEL) {
3667 error("ignoring invalid drop level: %u", level);
3670 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3673 btrfs_node_key(path.nodes[level], &found_key,
3675 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3676 sizeof(found_key)));
3680 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3686 wret = walk_up_tree(root, &path, wc, &level);
3693 btrfs_release_path(&path);
3695 if (!cache_tree_empty(&corrupt_blocks)) {
3696 struct cache_extent *cache;
3697 struct btrfs_corrupt_block *corrupt;
3699 printf("The following tree block(s) is corrupted in tree %llu:\n",
3700 root->root_key.objectid);
3701 cache = first_cache_extent(&corrupt_blocks);
3703 corrupt = container_of(cache,
3704 struct btrfs_corrupt_block,
3706 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3707 cache->start, corrupt->level,
3708 corrupt->key.objectid, corrupt->key.type,
3709 corrupt->key.offset);
3710 cache = next_cache_extent(cache);
3713 printf("Try to repair the btree for root %llu\n",
3714 root->root_key.objectid);
3715 ret = repair_btree(root, &corrupt_blocks);
3717 fprintf(stderr, "Failed to repair btree: %s\n",
3720 printf("Btree for root %llu is fixed\n",
3721 root->root_key.objectid);
3725 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3729 if (root_node.current) {
3730 root_node.current->checked = 1;
3731 maybe_free_inode_rec(&root_node.inode_cache,
3735 err = check_inode_recs(root, &root_node.inode_cache);
3739 free_corrupt_blocks_tree(&corrupt_blocks);
3740 root->fs_info->corrupt_blocks = NULL;
3741 free_orphan_data_extents(&root->orphan_data_extents);
3745 static int fs_root_objectid(u64 objectid)
3747 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3748 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3750 return is_fstree(objectid);
3753 static int check_fs_roots(struct btrfs_root *root,
3754 struct cache_tree *root_cache)
3756 struct btrfs_path path;
3757 struct btrfs_key key;
3758 struct walk_control wc;
3759 struct extent_buffer *leaf, *tree_node;
3760 struct btrfs_root *tmp_root;
3761 struct btrfs_root *tree_root = root->fs_info->tree_root;
3765 if (ctx.progress_enabled) {
3766 ctx.tp = TASK_FS_ROOTS;
3767 task_start(ctx.info);
3771 * Just in case we made any changes to the extent tree that weren't
3772 * reflected into the free space cache yet.
3775 reset_cached_block_groups(root->fs_info);
3776 memset(&wc, 0, sizeof(wc));
3777 cache_tree_init(&wc.shared);
3778 btrfs_init_path(&path);
3783 key.type = BTRFS_ROOT_ITEM_KEY;
3784 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3789 tree_node = tree_root->node;
3791 if (tree_node != tree_root->node) {
3792 free_root_recs_tree(root_cache);
3793 btrfs_release_path(&path);
3796 leaf = path.nodes[0];
3797 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3798 ret = btrfs_next_leaf(tree_root, &path);
3804 leaf = path.nodes[0];
3806 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3807 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3808 fs_root_objectid(key.objectid)) {
3809 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3810 tmp_root = btrfs_read_fs_root_no_cache(
3811 root->fs_info, &key);
3813 key.offset = (u64)-1;
3814 tmp_root = btrfs_read_fs_root(
3815 root->fs_info, &key);
3817 if (IS_ERR(tmp_root)) {
3821 ret = check_fs_root(tmp_root, root_cache, &wc);
3822 if (ret == -EAGAIN) {
3823 free_root_recs_tree(root_cache);
3824 btrfs_release_path(&path);
3829 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3830 btrfs_free_fs_root(tmp_root);
3831 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3832 key.type == BTRFS_ROOT_BACKREF_KEY) {
3833 process_root_ref(leaf, path.slots[0], &key,
3840 btrfs_release_path(&path);
3842 free_extent_cache_tree(&wc.shared);
3843 if (!cache_tree_empty(&wc.shared))
3844 fprintf(stderr, "warning line %d\n", __LINE__);
3846 task_stop(ctx.info);
3851 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3853 struct list_head *cur = rec->backrefs.next;
3854 struct extent_backref *back;
3855 struct tree_backref *tback;
3856 struct data_backref *dback;
3860 while(cur != &rec->backrefs) {
3861 back = to_extent_backref(cur);
3863 if (!back->found_extent_tree) {
3867 if (back->is_data) {
3868 dback = to_data_backref(back);
3869 fprintf(stderr, "Backref %llu %s %llu"
3870 " owner %llu offset %llu num_refs %lu"
3871 " not found in extent tree\n",
3872 (unsigned long long)rec->start,
3873 back->full_backref ?
3875 back->full_backref ?
3876 (unsigned long long)dback->parent:
3877 (unsigned long long)dback->root,
3878 (unsigned long long)dback->owner,
3879 (unsigned long long)dback->offset,
3880 (unsigned long)dback->num_refs);
3882 tback = to_tree_backref(back);
3883 fprintf(stderr, "Backref %llu parent %llu"
3884 " root %llu not found in extent tree\n",
3885 (unsigned long long)rec->start,
3886 (unsigned long long)tback->parent,
3887 (unsigned long long)tback->root);
3890 if (!back->is_data && !back->found_ref) {
3894 tback = to_tree_backref(back);
3895 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3896 (unsigned long long)rec->start,
3897 back->full_backref ? "parent" : "root",
3898 back->full_backref ?
3899 (unsigned long long)tback->parent :
3900 (unsigned long long)tback->root, back);
3902 if (back->is_data) {
3903 dback = to_data_backref(back);
3904 if (dback->found_ref != dback->num_refs) {
3908 fprintf(stderr, "Incorrect local backref count"
3909 " on %llu %s %llu owner %llu"
3910 " offset %llu found %u wanted %u back %p\n",
3911 (unsigned long long)rec->start,
3912 back->full_backref ?
3914 back->full_backref ?
3915 (unsigned long long)dback->parent:
3916 (unsigned long long)dback->root,
3917 (unsigned long long)dback->owner,
3918 (unsigned long long)dback->offset,
3919 dback->found_ref, dback->num_refs, back);
3921 if (dback->disk_bytenr != rec->start) {
3925 fprintf(stderr, "Backref disk bytenr does not"
3926 " match extent record, bytenr=%llu, "
3927 "ref bytenr=%llu\n",
3928 (unsigned long long)rec->start,
3929 (unsigned long long)dback->disk_bytenr);
3932 if (dback->bytes != rec->nr) {
3936 fprintf(stderr, "Backref bytes do not match "
3937 "extent backref, bytenr=%llu, ref "
3938 "bytes=%llu, backref bytes=%llu\n",
3939 (unsigned long long)rec->start,
3940 (unsigned long long)rec->nr,
3941 (unsigned long long)dback->bytes);
3944 if (!back->is_data) {
3947 dback = to_data_backref(back);
3948 found += dback->found_ref;
3951 if (found != rec->refs) {
3955 fprintf(stderr, "Incorrect global backref count "
3956 "on %llu found %llu wanted %llu\n",
3957 (unsigned long long)rec->start,
3958 (unsigned long long)found,
3959 (unsigned long long)rec->refs);
3965 static int free_all_extent_backrefs(struct extent_record *rec)
3967 struct extent_backref *back;
3968 struct list_head *cur;
3969 while (!list_empty(&rec->backrefs)) {
3970 cur = rec->backrefs.next;
3971 back = to_extent_backref(cur);
3978 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3979 struct cache_tree *extent_cache)
3981 struct cache_extent *cache;
3982 struct extent_record *rec;
3985 cache = first_cache_extent(extent_cache);
3988 rec = container_of(cache, struct extent_record, cache);
3989 remove_cache_extent(extent_cache, cache);
3990 free_all_extent_backrefs(rec);
3995 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3996 struct extent_record *rec)
3998 if (rec->content_checked && rec->owner_ref_checked &&
3999 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4000 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4001 !rec->bad_full_backref && !rec->crossing_stripes &&
4002 !rec->wrong_chunk_type) {
4003 remove_cache_extent(extent_cache, &rec->cache);
4004 free_all_extent_backrefs(rec);
4005 list_del_init(&rec->list);
4011 static int check_owner_ref(struct btrfs_root *root,
4012 struct extent_record *rec,
4013 struct extent_buffer *buf)
4015 struct extent_backref *node;
4016 struct tree_backref *back;
4017 struct btrfs_root *ref_root;
4018 struct btrfs_key key;
4019 struct btrfs_path path;
4020 struct extent_buffer *parent;
4025 list_for_each_entry(node, &rec->backrefs, list) {
4028 if (!node->found_ref)
4030 if (node->full_backref)
4032 back = to_tree_backref(node);
4033 if (btrfs_header_owner(buf) == back->root)
4036 BUG_ON(rec->is_root);
4038 /* try to find the block by search corresponding fs tree */
4039 key.objectid = btrfs_header_owner(buf);
4040 key.type = BTRFS_ROOT_ITEM_KEY;
4041 key.offset = (u64)-1;
4043 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4044 if (IS_ERR(ref_root))
4047 level = btrfs_header_level(buf);
4049 btrfs_item_key_to_cpu(buf, &key, 0);
4051 btrfs_node_key_to_cpu(buf, &key, 0);
4053 btrfs_init_path(&path);
4054 path.lowest_level = level + 1;
4055 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4059 parent = path.nodes[level + 1];
4060 if (parent && buf->start == btrfs_node_blockptr(parent,
4061 path.slots[level + 1]))
4064 btrfs_release_path(&path);
4065 return found ? 0 : 1;
4068 static int is_extent_tree_record(struct extent_record *rec)
4070 struct list_head *cur = rec->backrefs.next;
4071 struct extent_backref *node;
4072 struct tree_backref *back;
4075 while(cur != &rec->backrefs) {
4076 node = to_extent_backref(cur);
4080 back = to_tree_backref(node);
4081 if (node->full_backref)
4083 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4090 static int record_bad_block_io(struct btrfs_fs_info *info,
4091 struct cache_tree *extent_cache,
4094 struct extent_record *rec;
4095 struct cache_extent *cache;
4096 struct btrfs_key key;
4098 cache = lookup_cache_extent(extent_cache, start, len);
4102 rec = container_of(cache, struct extent_record, cache);
4103 if (!is_extent_tree_record(rec))
4106 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4107 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4110 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4111 struct extent_buffer *buf, int slot)
4113 if (btrfs_header_level(buf)) {
4114 struct btrfs_key_ptr ptr1, ptr2;
4116 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4117 sizeof(struct btrfs_key_ptr));
4118 read_extent_buffer(buf, &ptr2,
4119 btrfs_node_key_ptr_offset(slot + 1),
4120 sizeof(struct btrfs_key_ptr));
4121 write_extent_buffer(buf, &ptr1,
4122 btrfs_node_key_ptr_offset(slot + 1),
4123 sizeof(struct btrfs_key_ptr));
4124 write_extent_buffer(buf, &ptr2,
4125 btrfs_node_key_ptr_offset(slot),
4126 sizeof(struct btrfs_key_ptr));
4128 struct btrfs_disk_key key;
4129 btrfs_node_key(buf, &key, 0);
4130 btrfs_fixup_low_keys(root, path, &key,
4131 btrfs_header_level(buf) + 1);
4134 struct btrfs_item *item1, *item2;
4135 struct btrfs_key k1, k2;
4136 char *item1_data, *item2_data;
4137 u32 item1_offset, item2_offset, item1_size, item2_size;
4139 item1 = btrfs_item_nr(slot);
4140 item2 = btrfs_item_nr(slot + 1);
4141 btrfs_item_key_to_cpu(buf, &k1, slot);
4142 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4143 item1_offset = btrfs_item_offset(buf, item1);
4144 item2_offset = btrfs_item_offset(buf, item2);
4145 item1_size = btrfs_item_size(buf, item1);
4146 item2_size = btrfs_item_size(buf, item2);
4148 item1_data = malloc(item1_size);
4151 item2_data = malloc(item2_size);
4157 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4158 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4160 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4161 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4165 btrfs_set_item_offset(buf, item1, item2_offset);
4166 btrfs_set_item_offset(buf, item2, item1_offset);
4167 btrfs_set_item_size(buf, item1, item2_size);
4168 btrfs_set_item_size(buf, item2, item1_size);
4170 path->slots[0] = slot;
4171 btrfs_set_item_key_unsafe(root, path, &k2);
4172 path->slots[0] = slot + 1;
4173 btrfs_set_item_key_unsafe(root, path, &k1);
4178 static int fix_key_order(struct btrfs_trans_handle *trans,
4179 struct btrfs_root *root,
4180 struct btrfs_path *path)
4182 struct extent_buffer *buf;
4183 struct btrfs_key k1, k2;
4185 int level = path->lowest_level;
4188 buf = path->nodes[level];
4189 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4191 btrfs_node_key_to_cpu(buf, &k1, i);
4192 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4194 btrfs_item_key_to_cpu(buf, &k1, i);
4195 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4197 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4199 ret = swap_values(root, path, buf, i);
4202 btrfs_mark_buffer_dirty(buf);
4208 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4209 struct btrfs_root *root,
4210 struct btrfs_path *path,
4211 struct extent_buffer *buf, int slot)
4213 struct btrfs_key key;
4214 int nritems = btrfs_header_nritems(buf);
4216 btrfs_item_key_to_cpu(buf, &key, slot);
4218 /* These are all the keys we can deal with missing. */
4219 if (key.type != BTRFS_DIR_INDEX_KEY &&
4220 key.type != BTRFS_EXTENT_ITEM_KEY &&
4221 key.type != BTRFS_METADATA_ITEM_KEY &&
4222 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4223 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4226 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4227 (unsigned long long)key.objectid, key.type,
4228 (unsigned long long)key.offset, slot, buf->start);
4229 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4230 btrfs_item_nr_offset(slot + 1),
4231 sizeof(struct btrfs_item) *
4232 (nritems - slot - 1));
4233 btrfs_set_header_nritems(buf, nritems - 1);
4235 struct btrfs_disk_key disk_key;
4237 btrfs_item_key(buf, &disk_key, 0);
4238 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4240 btrfs_mark_buffer_dirty(buf);
4244 static int fix_item_offset(struct btrfs_trans_handle *trans,
4245 struct btrfs_root *root,
4246 struct btrfs_path *path)
4248 struct extent_buffer *buf;
4252 /* We should only get this for leaves */
4253 BUG_ON(path->lowest_level);
4254 buf = path->nodes[0];
4256 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4257 unsigned int shift = 0, offset;
4259 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4260 BTRFS_LEAF_DATA_SIZE(root)) {
4261 if (btrfs_item_end_nr(buf, i) >
4262 BTRFS_LEAF_DATA_SIZE(root)) {
4263 ret = delete_bogus_item(trans, root, path,
4267 fprintf(stderr, "item is off the end of the "
4268 "leaf, can't fix\n");
4272 shift = BTRFS_LEAF_DATA_SIZE(root) -
4273 btrfs_item_end_nr(buf, i);
4274 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4275 btrfs_item_offset_nr(buf, i - 1)) {
4276 if (btrfs_item_end_nr(buf, i) >
4277 btrfs_item_offset_nr(buf, i - 1)) {
4278 ret = delete_bogus_item(trans, root, path,
4282 fprintf(stderr, "items overlap, can't fix\n");
4286 shift = btrfs_item_offset_nr(buf, i - 1) -
4287 btrfs_item_end_nr(buf, i);
4292 printf("Shifting item nr %d by %u bytes in block %llu\n",
4293 i, shift, (unsigned long long)buf->start);
4294 offset = btrfs_item_offset_nr(buf, i);
4295 memmove_extent_buffer(buf,
4296 btrfs_leaf_data(buf) + offset + shift,
4297 btrfs_leaf_data(buf) + offset,
4298 btrfs_item_size_nr(buf, i));
4299 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4301 btrfs_mark_buffer_dirty(buf);
4305 * We may have moved things, in which case we want to exit so we don't
4306 * write those changes out. Once we have proper abort functionality in
4307 * progs this can be changed to something nicer.
4314 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4315 * then just return -EIO.
4317 static int try_to_fix_bad_block(struct btrfs_root *root,
4318 struct extent_buffer *buf,
4319 enum btrfs_tree_block_status status)
4321 struct btrfs_trans_handle *trans;
4322 struct ulist *roots;
4323 struct ulist_node *node;
4324 struct btrfs_root *search_root;
4325 struct btrfs_path *path;
4326 struct ulist_iterator iter;
4327 struct btrfs_key root_key, key;
4330 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4331 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4334 path = btrfs_alloc_path();
4338 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4341 btrfs_free_path(path);
4345 ULIST_ITER_INIT(&iter);
4346 while ((node = ulist_next(roots, &iter))) {
4347 root_key.objectid = node->val;
4348 root_key.type = BTRFS_ROOT_ITEM_KEY;
4349 root_key.offset = (u64)-1;
4351 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4358 trans = btrfs_start_transaction(search_root, 0);
4359 if (IS_ERR(trans)) {
4360 ret = PTR_ERR(trans);
4364 path->lowest_level = btrfs_header_level(buf);
4365 path->skip_check_block = 1;
4366 if (path->lowest_level)
4367 btrfs_node_key_to_cpu(buf, &key, 0);
4369 btrfs_item_key_to_cpu(buf, &key, 0);
4370 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4373 btrfs_commit_transaction(trans, search_root);
4376 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4377 ret = fix_key_order(trans, search_root, path);
4378 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4379 ret = fix_item_offset(trans, search_root, path);
4381 btrfs_commit_transaction(trans, search_root);
4384 btrfs_release_path(path);
4385 btrfs_commit_transaction(trans, search_root);
4388 btrfs_free_path(path);
4392 static int check_block(struct btrfs_root *root,
4393 struct cache_tree *extent_cache,
4394 struct extent_buffer *buf, u64 flags)
4396 struct extent_record *rec;
4397 struct cache_extent *cache;
4398 struct btrfs_key key;
4399 enum btrfs_tree_block_status status;
4403 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4406 rec = container_of(cache, struct extent_record, cache);
4407 rec->generation = btrfs_header_generation(buf);
4409 level = btrfs_header_level(buf);
4410 if (btrfs_header_nritems(buf) > 0) {
4413 btrfs_item_key_to_cpu(buf, &key, 0);
4415 btrfs_node_key_to_cpu(buf, &key, 0);
4417 rec->info_objectid = key.objectid;
4419 rec->info_level = level;
4421 if (btrfs_is_leaf(buf))
4422 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4424 status = btrfs_check_node(root, &rec->parent_key, buf);
4426 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4428 status = try_to_fix_bad_block(root, buf, status);
4429 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4431 fprintf(stderr, "bad block %llu\n",
4432 (unsigned long long)buf->start);
4435 * Signal to callers we need to start the scan over
4436 * again since we'll have cowed blocks.
4441 rec->content_checked = 1;
4442 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4443 rec->owner_ref_checked = 1;
4445 ret = check_owner_ref(root, rec, buf);
4447 rec->owner_ref_checked = 1;
4451 maybe_free_extent_rec(extent_cache, rec);
4455 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4456 u64 parent, u64 root)
4458 struct list_head *cur = rec->backrefs.next;
4459 struct extent_backref *node;
4460 struct tree_backref *back;
4462 while(cur != &rec->backrefs) {
4463 node = to_extent_backref(cur);
4467 back = to_tree_backref(node);
4469 if (!node->full_backref)
4471 if (parent == back->parent)
4474 if (node->full_backref)
4476 if (back->root == root)
4483 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4484 u64 parent, u64 root)
4486 struct tree_backref *ref = malloc(sizeof(*ref));
4490 memset(&ref->node, 0, sizeof(ref->node));
4492 ref->parent = parent;
4493 ref->node.full_backref = 1;
4496 ref->node.full_backref = 0;
4498 list_add_tail(&ref->node.list, &rec->backrefs);
4503 static struct data_backref *find_data_backref(struct extent_record *rec,
4504 u64 parent, u64 root,
4505 u64 owner, u64 offset,
4507 u64 disk_bytenr, u64 bytes)
4509 struct list_head *cur = rec->backrefs.next;
4510 struct extent_backref *node;
4511 struct data_backref *back;
4513 while(cur != &rec->backrefs) {
4514 node = to_extent_backref(cur);
4518 back = to_data_backref(node);
4520 if (!node->full_backref)
4522 if (parent == back->parent)
4525 if (node->full_backref)
4527 if (back->root == root && back->owner == owner &&
4528 back->offset == offset) {
4529 if (found_ref && node->found_ref &&
4530 (back->bytes != bytes ||
4531 back->disk_bytenr != disk_bytenr))
4540 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4541 u64 parent, u64 root,
4542 u64 owner, u64 offset,
4545 struct data_backref *ref = malloc(sizeof(*ref));
4549 memset(&ref->node, 0, sizeof(ref->node));
4550 ref->node.is_data = 1;
4553 ref->parent = parent;
4556 ref->node.full_backref = 1;
4560 ref->offset = offset;
4561 ref->node.full_backref = 0;
4563 ref->bytes = max_size;
4566 list_add_tail(&ref->node.list, &rec->backrefs);
4567 if (max_size > rec->max_size)
4568 rec->max_size = max_size;
4572 /* Check if the type of extent matches with its chunk */
4573 static void check_extent_type(struct extent_record *rec)
4575 struct btrfs_block_group_cache *bg_cache;
4577 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4581 /* data extent, check chunk directly*/
4582 if (!rec->metadata) {
4583 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4584 rec->wrong_chunk_type = 1;
4588 /* metadata extent, check the obvious case first */
4589 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4590 BTRFS_BLOCK_GROUP_METADATA))) {
4591 rec->wrong_chunk_type = 1;
4596 * Check SYSTEM extent, as it's also marked as metadata, we can only
4597 * make sure it's a SYSTEM extent by its backref
4599 if (!list_empty(&rec->backrefs)) {
4600 struct extent_backref *node;
4601 struct tree_backref *tback;
4604 node = to_extent_backref(rec->backrefs.next);
4605 if (node->is_data) {
4606 /* tree block shouldn't have data backref */
4607 rec->wrong_chunk_type = 1;
4610 tback = container_of(node, struct tree_backref, node);
4612 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4613 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4615 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4616 if (!(bg_cache->flags & bg_type))
4617 rec->wrong_chunk_type = 1;
4622 * Allocate a new extent record, fill default values from @tmpl and insert int
4623 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4624 * the cache, otherwise it fails.
4626 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4627 struct extent_record *tmpl)
4629 struct extent_record *rec;
4632 rec = malloc(sizeof(*rec));
4635 rec->start = tmpl->start;
4636 rec->max_size = tmpl->max_size;
4637 rec->nr = max(tmpl->nr, tmpl->max_size);
4638 rec->found_rec = tmpl->found_rec;
4639 rec->content_checked = tmpl->content_checked;
4640 rec->owner_ref_checked = tmpl->owner_ref_checked;
4641 rec->num_duplicates = 0;
4642 rec->metadata = tmpl->metadata;
4643 rec->flag_block_full_backref = FLAG_UNSET;
4644 rec->bad_full_backref = 0;
4645 rec->crossing_stripes = 0;
4646 rec->wrong_chunk_type = 0;
4647 rec->is_root = tmpl->is_root;
4648 rec->refs = tmpl->refs;
4649 rec->extent_item_refs = tmpl->extent_item_refs;
4650 rec->parent_generation = tmpl->parent_generation;
4651 INIT_LIST_HEAD(&rec->backrefs);
4652 INIT_LIST_HEAD(&rec->dups);
4653 INIT_LIST_HEAD(&rec->list);
4654 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4655 rec->cache.start = tmpl->start;
4656 rec->cache.size = tmpl->nr;
4657 ret = insert_cache_extent(extent_cache, &rec->cache);
4662 bytes_used += rec->nr;
4665 rec->crossing_stripes = check_crossing_stripes(global_info,
4666 rec->start, global_info->tree_root->nodesize);
4667 check_extent_type(rec);
4672 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4674 * - refs - if found, increase refs
4675 * - is_root - if found, set
4676 * - content_checked - if found, set
4677 * - owner_ref_checked - if found, set
4679 * If not found, create a new one, initialize and insert.
4681 static int add_extent_rec(struct cache_tree *extent_cache,
4682 struct extent_record *tmpl)
4684 struct extent_record *rec;
4685 struct cache_extent *cache;
4689 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4691 rec = container_of(cache, struct extent_record, cache);
4695 rec->nr = max(tmpl->nr, tmpl->max_size);
4698 * We need to make sure to reset nr to whatever the extent
4699 * record says was the real size, this way we can compare it to
4702 if (tmpl->found_rec) {
4703 if (tmpl->start != rec->start || rec->found_rec) {
4704 struct extent_record *tmp;
4707 if (list_empty(&rec->list))
4708 list_add_tail(&rec->list,
4709 &duplicate_extents);
4712 * We have to do this song and dance in case we
4713 * find an extent record that falls inside of
4714 * our current extent record but does not have
4715 * the same objectid.
4717 tmp = malloc(sizeof(*tmp));
4720 tmp->start = tmpl->start;
4721 tmp->max_size = tmpl->max_size;
4724 tmp->metadata = tmpl->metadata;
4725 tmp->extent_item_refs = tmpl->extent_item_refs;
4726 INIT_LIST_HEAD(&tmp->list);
4727 list_add_tail(&tmp->list, &rec->dups);
4728 rec->num_duplicates++;
4735 if (tmpl->extent_item_refs && !dup) {
4736 if (rec->extent_item_refs) {
4737 fprintf(stderr, "block %llu rec "
4738 "extent_item_refs %llu, passed %llu\n",
4739 (unsigned long long)tmpl->start,
4740 (unsigned long long)
4741 rec->extent_item_refs,
4742 (unsigned long long)tmpl->extent_item_refs);
4744 rec->extent_item_refs = tmpl->extent_item_refs;
4748 if (tmpl->content_checked)
4749 rec->content_checked = 1;
4750 if (tmpl->owner_ref_checked)
4751 rec->owner_ref_checked = 1;
4752 memcpy(&rec->parent_key, &tmpl->parent_key,
4753 sizeof(tmpl->parent_key));
4754 if (tmpl->parent_generation)
4755 rec->parent_generation = tmpl->parent_generation;
4756 if (rec->max_size < tmpl->max_size)
4757 rec->max_size = tmpl->max_size;
4760 * A metadata extent can't cross stripe_len boundary, otherwise
4761 * kernel scrub won't be able to handle it.
4762 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4766 rec->crossing_stripes = check_crossing_stripes(
4767 global_info, rec->start,
4768 global_info->tree_root->nodesize);
4769 check_extent_type(rec);
4770 maybe_free_extent_rec(extent_cache, rec);
4774 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4779 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4780 u64 parent, u64 root, int found_ref)
4782 struct extent_record *rec;
4783 struct tree_backref *back;
4784 struct cache_extent *cache;
4787 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4789 struct extent_record tmpl;
4791 memset(&tmpl, 0, sizeof(tmpl));
4792 tmpl.start = bytenr;
4796 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4800 /* really a bug in cache_extent implement now */
4801 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4806 rec = container_of(cache, struct extent_record, cache);
4807 if (rec->start != bytenr) {
4809 * Several cause, from unaligned bytenr to over lapping extents
4814 back = find_tree_backref(rec, parent, root);
4816 back = alloc_tree_backref(rec, parent, root);
4822 if (back->node.found_ref) {
4823 fprintf(stderr, "Extent back ref already exists "
4824 "for %llu parent %llu root %llu \n",
4825 (unsigned long long)bytenr,
4826 (unsigned long long)parent,
4827 (unsigned long long)root);
4829 back->node.found_ref = 1;
4831 if (back->node.found_extent_tree) {
4832 fprintf(stderr, "Extent back ref already exists "
4833 "for %llu parent %llu root %llu \n",
4834 (unsigned long long)bytenr,
4835 (unsigned long long)parent,
4836 (unsigned long long)root);
4838 back->node.found_extent_tree = 1;
4840 check_extent_type(rec);
4841 maybe_free_extent_rec(extent_cache, rec);
4845 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4846 u64 parent, u64 root, u64 owner, u64 offset,
4847 u32 num_refs, int found_ref, u64 max_size)
4849 struct extent_record *rec;
4850 struct data_backref *back;
4851 struct cache_extent *cache;
4854 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4856 struct extent_record tmpl;
4858 memset(&tmpl, 0, sizeof(tmpl));
4859 tmpl.start = bytenr;
4861 tmpl.max_size = max_size;
4863 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4867 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4872 rec = container_of(cache, struct extent_record, cache);
4873 if (rec->max_size < max_size)
4874 rec->max_size = max_size;
4877 * If found_ref is set then max_size is the real size and must match the
4878 * existing refs. So if we have already found a ref then we need to
4879 * make sure that this ref matches the existing one, otherwise we need
4880 * to add a new backref so we can notice that the backrefs don't match
4881 * and we need to figure out who is telling the truth. This is to
4882 * account for that awful fsync bug I introduced where we'd end up with
4883 * a btrfs_file_extent_item that would have its length include multiple
4884 * prealloc extents or point inside of a prealloc extent.
4886 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4889 back = alloc_data_backref(rec, parent, root, owner, offset,
4895 BUG_ON(num_refs != 1);
4896 if (back->node.found_ref)
4897 BUG_ON(back->bytes != max_size);
4898 back->node.found_ref = 1;
4899 back->found_ref += 1;
4900 back->bytes = max_size;
4901 back->disk_bytenr = bytenr;
4903 rec->content_checked = 1;
4904 rec->owner_ref_checked = 1;
4906 if (back->node.found_extent_tree) {
4907 fprintf(stderr, "Extent back ref already exists "
4908 "for %llu parent %llu root %llu "
4909 "owner %llu offset %llu num_refs %lu\n",
4910 (unsigned long long)bytenr,
4911 (unsigned long long)parent,
4912 (unsigned long long)root,
4913 (unsigned long long)owner,
4914 (unsigned long long)offset,
4915 (unsigned long)num_refs);
4917 back->num_refs = num_refs;
4918 back->node.found_extent_tree = 1;
4920 maybe_free_extent_rec(extent_cache, rec);
4924 static int add_pending(struct cache_tree *pending,
4925 struct cache_tree *seen, u64 bytenr, u32 size)
4928 ret = add_cache_extent(seen, bytenr, size);
4931 add_cache_extent(pending, bytenr, size);
4935 static int pick_next_pending(struct cache_tree *pending,
4936 struct cache_tree *reada,
4937 struct cache_tree *nodes,
4938 u64 last, struct block_info *bits, int bits_nr,
4941 unsigned long node_start = last;
4942 struct cache_extent *cache;
4945 cache = search_cache_extent(reada, 0);
4947 bits[0].start = cache->start;
4948 bits[0].size = cache->size;
4953 if (node_start > 32768)
4954 node_start -= 32768;
4956 cache = search_cache_extent(nodes, node_start);
4958 cache = search_cache_extent(nodes, 0);
4961 cache = search_cache_extent(pending, 0);
4966 bits[ret].start = cache->start;
4967 bits[ret].size = cache->size;
4968 cache = next_cache_extent(cache);
4970 } while (cache && ret < bits_nr);
4976 bits[ret].start = cache->start;
4977 bits[ret].size = cache->size;
4978 cache = next_cache_extent(cache);
4980 } while (cache && ret < bits_nr);
4982 if (bits_nr - ret > 8) {
4983 u64 lookup = bits[0].start + bits[0].size;
4984 struct cache_extent *next;
4985 next = search_cache_extent(pending, lookup);
4987 if (next->start - lookup > 32768)
4989 bits[ret].start = next->start;
4990 bits[ret].size = next->size;
4991 lookup = next->start + next->size;
4995 next = next_cache_extent(next);
5003 static void free_chunk_record(struct cache_extent *cache)
5005 struct chunk_record *rec;
5007 rec = container_of(cache, struct chunk_record, cache);
5008 list_del_init(&rec->list);
5009 list_del_init(&rec->dextents);
5013 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5015 cache_tree_free_extents(chunk_cache, free_chunk_record);
5018 static void free_device_record(struct rb_node *node)
5020 struct device_record *rec;
5022 rec = container_of(node, struct device_record, node);
5026 FREE_RB_BASED_TREE(device_cache, free_device_record);
5028 int insert_block_group_record(struct block_group_tree *tree,
5029 struct block_group_record *bg_rec)
5033 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5037 list_add_tail(&bg_rec->list, &tree->block_groups);
5041 static void free_block_group_record(struct cache_extent *cache)
5043 struct block_group_record *rec;
5045 rec = container_of(cache, struct block_group_record, cache);
5046 list_del_init(&rec->list);
5050 void free_block_group_tree(struct block_group_tree *tree)
5052 cache_tree_free_extents(&tree->tree, free_block_group_record);
5055 int insert_device_extent_record(struct device_extent_tree *tree,
5056 struct device_extent_record *de_rec)
5061 * Device extent is a bit different from the other extents, because
5062 * the extents which belong to the different devices may have the
5063 * same start and size, so we need use the special extent cache
5064 * search/insert functions.
5066 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5070 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5071 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5075 static void free_device_extent_record(struct cache_extent *cache)
5077 struct device_extent_record *rec;
5079 rec = container_of(cache, struct device_extent_record, cache);
5080 if (!list_empty(&rec->chunk_list))
5081 list_del_init(&rec->chunk_list);
5082 if (!list_empty(&rec->device_list))
5083 list_del_init(&rec->device_list);
5087 void free_device_extent_tree(struct device_extent_tree *tree)
5089 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5092 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5093 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5094 struct extent_buffer *leaf, int slot)
5096 struct btrfs_extent_ref_v0 *ref0;
5097 struct btrfs_key key;
5100 btrfs_item_key_to_cpu(leaf, &key, slot);
5101 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5102 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5103 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5106 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5107 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5113 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5114 struct btrfs_key *key,
5117 struct btrfs_chunk *ptr;
5118 struct chunk_record *rec;
5121 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5122 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5124 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5126 fprintf(stderr, "memory allocation failed\n");
5130 INIT_LIST_HEAD(&rec->list);
5131 INIT_LIST_HEAD(&rec->dextents);
5134 rec->cache.start = key->offset;
5135 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5137 rec->generation = btrfs_header_generation(leaf);
5139 rec->objectid = key->objectid;
5140 rec->type = key->type;
5141 rec->offset = key->offset;
5143 rec->length = rec->cache.size;
5144 rec->owner = btrfs_chunk_owner(leaf, ptr);
5145 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5146 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5147 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5148 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5149 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5150 rec->num_stripes = num_stripes;
5151 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5153 for (i = 0; i < rec->num_stripes; ++i) {
5154 rec->stripes[i].devid =
5155 btrfs_stripe_devid_nr(leaf, ptr, i);
5156 rec->stripes[i].offset =
5157 btrfs_stripe_offset_nr(leaf, ptr, i);
5158 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5159 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5166 static int process_chunk_item(struct cache_tree *chunk_cache,
5167 struct btrfs_key *key, struct extent_buffer *eb,
5170 struct chunk_record *rec;
5171 struct btrfs_chunk *chunk;
5174 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5176 * Do extra check for this chunk item,
5178 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5179 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5180 * and owner<->key_type check.
5182 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5185 error("chunk(%llu, %llu) is not valid, ignore it",
5186 key->offset, btrfs_chunk_length(eb, chunk));
5189 rec = btrfs_new_chunk_record(eb, key, slot);
5190 ret = insert_cache_extent(chunk_cache, &rec->cache);
5192 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5193 rec->offset, rec->length);
5200 static int process_device_item(struct rb_root *dev_cache,
5201 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5203 struct btrfs_dev_item *ptr;
5204 struct device_record *rec;
5207 ptr = btrfs_item_ptr(eb,
5208 slot, struct btrfs_dev_item);
5210 rec = malloc(sizeof(*rec));
5212 fprintf(stderr, "memory allocation failed\n");
5216 rec->devid = key->offset;
5217 rec->generation = btrfs_header_generation(eb);
5219 rec->objectid = key->objectid;
5220 rec->type = key->type;
5221 rec->offset = key->offset;
5223 rec->devid = btrfs_device_id(eb, ptr);
5224 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5225 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5227 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5229 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5236 struct block_group_record *
5237 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5240 struct btrfs_block_group_item *ptr;
5241 struct block_group_record *rec;
5243 rec = calloc(1, sizeof(*rec));
5245 fprintf(stderr, "memory allocation failed\n");
5249 rec->cache.start = key->objectid;
5250 rec->cache.size = key->offset;
5252 rec->generation = btrfs_header_generation(leaf);
5254 rec->objectid = key->objectid;
5255 rec->type = key->type;
5256 rec->offset = key->offset;
5258 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5259 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5261 INIT_LIST_HEAD(&rec->list);
5266 static int process_block_group_item(struct block_group_tree *block_group_cache,
5267 struct btrfs_key *key,
5268 struct extent_buffer *eb, int slot)
5270 struct block_group_record *rec;
5273 rec = btrfs_new_block_group_record(eb, key, slot);
5274 ret = insert_block_group_record(block_group_cache, rec);
5276 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5277 rec->objectid, rec->offset);
5284 struct device_extent_record *
5285 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5286 struct btrfs_key *key, int slot)
5288 struct device_extent_record *rec;
5289 struct btrfs_dev_extent *ptr;
5291 rec = calloc(1, sizeof(*rec));
5293 fprintf(stderr, "memory allocation failed\n");
5297 rec->cache.objectid = key->objectid;
5298 rec->cache.start = key->offset;
5300 rec->generation = btrfs_header_generation(leaf);
5302 rec->objectid = key->objectid;
5303 rec->type = key->type;
5304 rec->offset = key->offset;
5306 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5307 rec->chunk_objecteid =
5308 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5310 btrfs_dev_extent_chunk_offset(leaf, ptr);
5311 rec->length = btrfs_dev_extent_length(leaf, ptr);
5312 rec->cache.size = rec->length;
5314 INIT_LIST_HEAD(&rec->chunk_list);
5315 INIT_LIST_HEAD(&rec->device_list);
5321 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5322 struct btrfs_key *key, struct extent_buffer *eb,
5325 struct device_extent_record *rec;
5328 rec = btrfs_new_device_extent_record(eb, key, slot);
5329 ret = insert_device_extent_record(dev_extent_cache, rec);
5332 "Device extent[%llu, %llu, %llu] existed.\n",
5333 rec->objectid, rec->offset, rec->length);
5340 static int process_extent_item(struct btrfs_root *root,
5341 struct cache_tree *extent_cache,
5342 struct extent_buffer *eb, int slot)
5344 struct btrfs_extent_item *ei;
5345 struct btrfs_extent_inline_ref *iref;
5346 struct btrfs_extent_data_ref *dref;
5347 struct btrfs_shared_data_ref *sref;
5348 struct btrfs_key key;
5349 struct extent_record tmpl;
5354 u32 item_size = btrfs_item_size_nr(eb, slot);
5360 btrfs_item_key_to_cpu(eb, &key, slot);
5362 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5364 num_bytes = root->nodesize;
5366 num_bytes = key.offset;
5369 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5370 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5371 key.objectid, root->sectorsize);
5374 if (item_size < sizeof(*ei)) {
5375 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5376 struct btrfs_extent_item_v0 *ei0;
5377 BUG_ON(item_size != sizeof(*ei0));
5378 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5379 refs = btrfs_extent_refs_v0(eb, ei0);
5383 memset(&tmpl, 0, sizeof(tmpl));
5384 tmpl.start = key.objectid;
5385 tmpl.nr = num_bytes;
5386 tmpl.extent_item_refs = refs;
5387 tmpl.metadata = metadata;
5389 tmpl.max_size = num_bytes;
5391 return add_extent_rec(extent_cache, &tmpl);
5394 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5395 refs = btrfs_extent_refs(eb, ei);
5396 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5400 if (metadata && num_bytes != root->nodesize) {
5401 error("ignore invalid metadata extent, length %llu does not equal to %u",
5402 num_bytes, root->nodesize);
5405 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5406 error("ignore invalid data extent, length %llu is not aligned to %u",
5407 num_bytes, root->sectorsize);
5411 memset(&tmpl, 0, sizeof(tmpl));
5412 tmpl.start = key.objectid;
5413 tmpl.nr = num_bytes;
5414 tmpl.extent_item_refs = refs;
5415 tmpl.metadata = metadata;
5417 tmpl.max_size = num_bytes;
5418 add_extent_rec(extent_cache, &tmpl);
5420 ptr = (unsigned long)(ei + 1);
5421 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5422 key.type == BTRFS_EXTENT_ITEM_KEY)
5423 ptr += sizeof(struct btrfs_tree_block_info);
5425 end = (unsigned long)ei + item_size;
5427 iref = (struct btrfs_extent_inline_ref *)ptr;
5428 type = btrfs_extent_inline_ref_type(eb, iref);
5429 offset = btrfs_extent_inline_ref_offset(eb, iref);
5431 case BTRFS_TREE_BLOCK_REF_KEY:
5432 ret = add_tree_backref(extent_cache, key.objectid,
5435 error("add_tree_backref failed: %s",
5438 case BTRFS_SHARED_BLOCK_REF_KEY:
5439 ret = add_tree_backref(extent_cache, key.objectid,
5442 error("add_tree_backref failed: %s",
5445 case BTRFS_EXTENT_DATA_REF_KEY:
5446 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5447 add_data_backref(extent_cache, key.objectid, 0,
5448 btrfs_extent_data_ref_root(eb, dref),
5449 btrfs_extent_data_ref_objectid(eb,
5451 btrfs_extent_data_ref_offset(eb, dref),
5452 btrfs_extent_data_ref_count(eb, dref),
5455 case BTRFS_SHARED_DATA_REF_KEY:
5456 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5457 add_data_backref(extent_cache, key.objectid, offset,
5459 btrfs_shared_data_ref_count(eb, sref),
5463 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5464 key.objectid, key.type, num_bytes);
5467 ptr += btrfs_extent_inline_ref_size(type);
5474 static int check_cache_range(struct btrfs_root *root,
5475 struct btrfs_block_group_cache *cache,
5476 u64 offset, u64 bytes)
5478 struct btrfs_free_space *entry;
5484 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5485 bytenr = btrfs_sb_offset(i);
5486 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5487 cache->key.objectid, bytenr, 0,
5488 &logical, &nr, &stripe_len);
5493 if (logical[nr] + stripe_len <= offset)
5495 if (offset + bytes <= logical[nr])
5497 if (logical[nr] == offset) {
5498 if (stripe_len >= bytes) {
5502 bytes -= stripe_len;
5503 offset += stripe_len;
5504 } else if (logical[nr] < offset) {
5505 if (logical[nr] + stripe_len >=
5510 bytes = (offset + bytes) -
5511 (logical[nr] + stripe_len);
5512 offset = logical[nr] + stripe_len;
5515 * Could be tricky, the super may land in the
5516 * middle of the area we're checking. First
5517 * check the easiest case, it's at the end.
5519 if (logical[nr] + stripe_len >=
5521 bytes = logical[nr] - offset;
5525 /* Check the left side */
5526 ret = check_cache_range(root, cache,
5528 logical[nr] - offset);
5534 /* Now we continue with the right side */
5535 bytes = (offset + bytes) -
5536 (logical[nr] + stripe_len);
5537 offset = logical[nr] + stripe_len;
5544 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5546 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5547 offset, offset+bytes);
5551 if (entry->offset != offset) {
5552 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5557 if (entry->bytes != bytes) {
5558 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5559 bytes, entry->bytes, offset);
5563 unlink_free_space(cache->free_space_ctl, entry);
5568 static int verify_space_cache(struct btrfs_root *root,
5569 struct btrfs_block_group_cache *cache)
5571 struct btrfs_path *path;
5572 struct extent_buffer *leaf;
5573 struct btrfs_key key;
5577 path = btrfs_alloc_path();
5581 root = root->fs_info->extent_root;
5583 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5585 key.objectid = last;
5587 key.type = BTRFS_EXTENT_ITEM_KEY;
5589 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5594 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5595 ret = btrfs_next_leaf(root, path);
5603 leaf = path->nodes[0];
5604 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5605 if (key.objectid >= cache->key.offset + cache->key.objectid)
5607 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5608 key.type != BTRFS_METADATA_ITEM_KEY) {
5613 if (last == key.objectid) {
5614 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5615 last = key.objectid + key.offset;
5617 last = key.objectid + root->nodesize;
5622 ret = check_cache_range(root, cache, last,
5623 key.objectid - last);
5626 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5627 last = key.objectid + key.offset;
5629 last = key.objectid + root->nodesize;
5633 if (last < cache->key.objectid + cache->key.offset)
5634 ret = check_cache_range(root, cache, last,
5635 cache->key.objectid +
5636 cache->key.offset - last);
5639 btrfs_free_path(path);
5642 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5643 fprintf(stderr, "There are still entries left in the space "
5651 static int check_space_cache(struct btrfs_root *root)
5653 struct btrfs_block_group_cache *cache;
5654 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5658 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5659 btrfs_super_generation(root->fs_info->super_copy) !=
5660 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5661 printf("cache and super generation don't match, space cache "
5662 "will be invalidated\n");
5666 if (ctx.progress_enabled) {
5667 ctx.tp = TASK_FREE_SPACE;
5668 task_start(ctx.info);
5672 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5676 start = cache->key.objectid + cache->key.offset;
5677 if (!cache->free_space_ctl) {
5678 if (btrfs_init_free_space_ctl(cache,
5679 root->sectorsize)) {
5684 btrfs_remove_free_space_cache(cache);
5687 if (btrfs_fs_compat_ro(root->fs_info,
5688 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5689 ret = exclude_super_stripes(root, cache);
5691 fprintf(stderr, "could not exclude super stripes: %s\n",
5696 ret = load_free_space_tree(root->fs_info, cache);
5697 free_excluded_extents(root, cache);
5699 fprintf(stderr, "could not load free space tree: %s\n",
5706 ret = load_free_space_cache(root->fs_info, cache);
5711 ret = verify_space_cache(root, cache);
5713 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5714 cache->key.objectid);
5719 task_stop(ctx.info);
5721 return error ? -EINVAL : 0;
5724 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5725 u64 num_bytes, unsigned long leaf_offset,
5726 struct extent_buffer *eb) {
5729 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5731 unsigned long csum_offset;
5735 u64 data_checked = 0;
5741 if (num_bytes % root->sectorsize)
5744 data = malloc(num_bytes);
5748 while (offset < num_bytes) {
5751 read_len = num_bytes - offset;
5752 /* read as much space once a time */
5753 ret = read_extent_data(root, data + offset,
5754 bytenr + offset, &read_len, mirror);
5758 /* verify every 4k data's checksum */
5759 while (data_checked < read_len) {
5761 tmp = offset + data_checked;
5763 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5764 csum, root->sectorsize);
5765 btrfs_csum_final(csum, (u8 *)&csum);
5767 csum_offset = leaf_offset +
5768 tmp / root->sectorsize * csum_size;
5769 read_extent_buffer(eb, (char *)&csum_expected,
5770 csum_offset, csum_size);
5771 /* try another mirror */
5772 if (csum != csum_expected) {
5773 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5774 mirror, bytenr + tmp,
5775 csum, csum_expected);
5776 num_copies = btrfs_num_copies(
5777 &root->fs_info->mapping_tree,
5779 if (mirror < num_copies - 1) {
5784 data_checked += root->sectorsize;
5793 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5796 struct btrfs_path *path;
5797 struct extent_buffer *leaf;
5798 struct btrfs_key key;
5801 path = btrfs_alloc_path();
5803 fprintf(stderr, "Error allocating path\n");
5807 key.objectid = bytenr;
5808 key.type = BTRFS_EXTENT_ITEM_KEY;
5809 key.offset = (u64)-1;
5812 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5815 fprintf(stderr, "Error looking up extent record %d\n", ret);
5816 btrfs_free_path(path);
5819 if (path->slots[0] > 0) {
5822 ret = btrfs_prev_leaf(root, path);
5825 } else if (ret > 0) {
5832 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5835 * Block group items come before extent items if they have the same
5836 * bytenr, so walk back one more just in case. Dear future traveller,
5837 * first congrats on mastering time travel. Now if it's not too much
5838 * trouble could you go back to 2006 and tell Chris to make the
5839 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5840 * EXTENT_ITEM_KEY please?
5842 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5843 if (path->slots[0] > 0) {
5846 ret = btrfs_prev_leaf(root, path);
5849 } else if (ret > 0) {
5854 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5858 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5859 ret = btrfs_next_leaf(root, path);
5861 fprintf(stderr, "Error going to next leaf "
5863 btrfs_free_path(path);
5869 leaf = path->nodes[0];
5870 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5871 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5875 if (key.objectid + key.offset < bytenr) {
5879 if (key.objectid > bytenr + num_bytes)
5882 if (key.objectid == bytenr) {
5883 if (key.offset >= num_bytes) {
5887 num_bytes -= key.offset;
5888 bytenr += key.offset;
5889 } else if (key.objectid < bytenr) {
5890 if (key.objectid + key.offset >= bytenr + num_bytes) {
5894 num_bytes = (bytenr + num_bytes) -
5895 (key.objectid + key.offset);
5896 bytenr = key.objectid + key.offset;
5898 if (key.objectid + key.offset < bytenr + num_bytes) {
5899 u64 new_start = key.objectid + key.offset;
5900 u64 new_bytes = bytenr + num_bytes - new_start;
5903 * Weird case, the extent is in the middle of
5904 * our range, we'll have to search one side
5905 * and then the other. Not sure if this happens
5906 * in real life, but no harm in coding it up
5907 * anyway just in case.
5909 btrfs_release_path(path);
5910 ret = check_extent_exists(root, new_start,
5913 fprintf(stderr, "Right section didn't "
5917 num_bytes = key.objectid - bytenr;
5920 num_bytes = key.objectid - bytenr;
5927 if (num_bytes && !ret) {
5928 fprintf(stderr, "There are no extents for csum range "
5929 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5933 btrfs_free_path(path);
5937 static int check_csums(struct btrfs_root *root)
5939 struct btrfs_path *path;
5940 struct extent_buffer *leaf;
5941 struct btrfs_key key;
5942 u64 offset = 0, num_bytes = 0;
5943 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5947 unsigned long leaf_offset;
5949 root = root->fs_info->csum_root;
5950 if (!extent_buffer_uptodate(root->node)) {
5951 fprintf(stderr, "No valid csum tree found\n");
5955 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5956 key.type = BTRFS_EXTENT_CSUM_KEY;
5959 path = btrfs_alloc_path();
5963 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5965 fprintf(stderr, "Error searching csum tree %d\n", ret);
5966 btrfs_free_path(path);
5970 if (ret > 0 && path->slots[0])
5975 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5976 ret = btrfs_next_leaf(root, path);
5978 fprintf(stderr, "Error going to next leaf "
5985 leaf = path->nodes[0];
5987 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5988 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5993 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5994 csum_size) * root->sectorsize;
5995 if (!check_data_csum)
5996 goto skip_csum_check;
5997 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5998 ret = check_extent_csums(root, key.offset, data_len,
6004 offset = key.offset;
6005 } else if (key.offset != offset + num_bytes) {
6006 ret = check_extent_exists(root, offset, num_bytes);
6008 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6009 "there is no extent record\n",
6010 offset, offset+num_bytes);
6013 offset = key.offset;
6016 num_bytes += data_len;
6020 btrfs_free_path(path);
6024 static int is_dropped_key(struct btrfs_key *key,
6025 struct btrfs_key *drop_key) {
6026 if (key->objectid < drop_key->objectid)
6028 else if (key->objectid == drop_key->objectid) {
6029 if (key->type < drop_key->type)
6031 else if (key->type == drop_key->type) {
6032 if (key->offset < drop_key->offset)
6040 * Here are the rules for FULL_BACKREF.
6042 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6043 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6045 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6046 * if it happened after the relocation occurred since we'll have dropped the
6047 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6048 * have no real way to know for sure.
6050 * We process the blocks one root at a time, and we start from the lowest root
6051 * objectid and go to the highest. So we can just lookup the owner backref for
6052 * the record and if we don't find it then we know it doesn't exist and we have
6055 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6056 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6057 * be set or not and then we can check later once we've gathered all the refs.
6059 static int calc_extent_flag(struct btrfs_root *root,
6060 struct cache_tree *extent_cache,
6061 struct extent_buffer *buf,
6062 struct root_item_record *ri,
6065 struct extent_record *rec;
6066 struct cache_extent *cache;
6067 struct tree_backref *tback;
6070 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6071 /* we have added this extent before */
6075 rec = container_of(cache, struct extent_record, cache);
6078 * Except file/reloc tree, we can not have
6081 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6086 if (buf->start == ri->bytenr)
6089 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6092 owner = btrfs_header_owner(buf);
6093 if (owner == ri->objectid)
6096 tback = find_tree_backref(rec, 0, owner);
6101 if (rec->flag_block_full_backref != FLAG_UNSET &&
6102 rec->flag_block_full_backref != 0)
6103 rec->bad_full_backref = 1;
6106 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6107 if (rec->flag_block_full_backref != FLAG_UNSET &&
6108 rec->flag_block_full_backref != 1)
6109 rec->bad_full_backref = 1;
6113 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6115 fprintf(stderr, "Invalid key type(");
6116 print_key_type(stderr, 0, key_type);
6117 fprintf(stderr, ") found in root(");
6118 print_objectid(stderr, rootid, 0);
6119 fprintf(stderr, ")\n");
6123 * Check if the key is valid with its extent buffer.
6125 * This is a early check in case invalid key exists in a extent buffer
6126 * This is not comprehensive yet, but should prevent wrong key/item passed
6129 static int check_type_with_root(u64 rootid, u8 key_type)
6132 /* Only valid in chunk tree */
6133 case BTRFS_DEV_ITEM_KEY:
6134 case BTRFS_CHUNK_ITEM_KEY:
6135 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6138 /* valid in csum and log tree */
6139 case BTRFS_CSUM_TREE_OBJECTID:
6140 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6144 case BTRFS_EXTENT_ITEM_KEY:
6145 case BTRFS_METADATA_ITEM_KEY:
6146 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6147 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6150 case BTRFS_ROOT_ITEM_KEY:
6151 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6154 case BTRFS_DEV_EXTENT_KEY:
6155 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6161 report_mismatch_key_root(key_type, rootid);
6165 static int run_next_block(struct btrfs_root *root,
6166 struct block_info *bits,
6169 struct cache_tree *pending,
6170 struct cache_tree *seen,
6171 struct cache_tree *reada,
6172 struct cache_tree *nodes,
6173 struct cache_tree *extent_cache,
6174 struct cache_tree *chunk_cache,
6175 struct rb_root *dev_cache,
6176 struct block_group_tree *block_group_cache,
6177 struct device_extent_tree *dev_extent_cache,
6178 struct root_item_record *ri)
6180 struct extent_buffer *buf;
6181 struct extent_record *rec = NULL;
6192 struct btrfs_key key;
6193 struct cache_extent *cache;
6196 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6197 bits_nr, &reada_bits);
6202 for(i = 0; i < nritems; i++) {
6203 ret = add_cache_extent(reada, bits[i].start,
6208 /* fixme, get the parent transid */
6209 readahead_tree_block(root, bits[i].start,
6213 *last = bits[0].start;
6214 bytenr = bits[0].start;
6215 size = bits[0].size;
6217 cache = lookup_cache_extent(pending, bytenr, size);
6219 remove_cache_extent(pending, cache);
6222 cache = lookup_cache_extent(reada, bytenr, size);
6224 remove_cache_extent(reada, cache);
6227 cache = lookup_cache_extent(nodes, bytenr, size);
6229 remove_cache_extent(nodes, cache);
6232 cache = lookup_cache_extent(extent_cache, bytenr, size);
6234 rec = container_of(cache, struct extent_record, cache);
6235 gen = rec->parent_generation;
6238 /* fixme, get the real parent transid */
6239 buf = read_tree_block(root, bytenr, size, gen);
6240 if (!extent_buffer_uptodate(buf)) {
6241 record_bad_block_io(root->fs_info,
6242 extent_cache, bytenr, size);
6246 nritems = btrfs_header_nritems(buf);
6249 if (!init_extent_tree) {
6250 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6251 btrfs_header_level(buf), 1, NULL,
6254 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6256 fprintf(stderr, "Couldn't calc extent flags\n");
6257 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6262 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6264 fprintf(stderr, "Couldn't calc extent flags\n");
6265 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6269 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6271 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6272 ri->objectid == btrfs_header_owner(buf)) {
6274 * Ok we got to this block from it's original owner and
6275 * we have FULL_BACKREF set. Relocation can leave
6276 * converted blocks over so this is altogether possible,
6277 * however it's not possible if the generation > the
6278 * last snapshot, so check for this case.
6280 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6281 btrfs_header_generation(buf) > ri->last_snapshot) {
6282 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6283 rec->bad_full_backref = 1;
6288 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6289 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6290 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6291 rec->bad_full_backref = 1;
6295 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6296 rec->flag_block_full_backref = 1;
6300 rec->flag_block_full_backref = 0;
6302 owner = btrfs_header_owner(buf);
6305 ret = check_block(root, extent_cache, buf, flags);
6309 if (btrfs_is_leaf(buf)) {
6310 btree_space_waste += btrfs_leaf_free_space(root, buf);
6311 for (i = 0; i < nritems; i++) {
6312 struct btrfs_file_extent_item *fi;
6313 btrfs_item_key_to_cpu(buf, &key, i);
6315 * Check key type against the leaf owner.
6316 * Could filter quite a lot of early error if
6319 if (check_type_with_root(btrfs_header_owner(buf),
6321 fprintf(stderr, "ignoring invalid key\n");
6324 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6325 process_extent_item(root, extent_cache, buf,
6329 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6330 process_extent_item(root, extent_cache, buf,
6334 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6336 btrfs_item_size_nr(buf, i);
6339 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6340 process_chunk_item(chunk_cache, &key, buf, i);
6343 if (key.type == BTRFS_DEV_ITEM_KEY) {
6344 process_device_item(dev_cache, &key, buf, i);
6347 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6348 process_block_group_item(block_group_cache,
6352 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6353 process_device_extent_item(dev_extent_cache,
6358 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6359 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6360 process_extent_ref_v0(extent_cache, buf, i);
6367 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6368 ret = add_tree_backref(extent_cache,
6369 key.objectid, 0, key.offset, 0);
6371 error("add_tree_backref failed: %s",
6375 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6376 ret = add_tree_backref(extent_cache,
6377 key.objectid, key.offset, 0, 0);
6379 error("add_tree_backref failed: %s",
6383 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6384 struct btrfs_extent_data_ref *ref;
6385 ref = btrfs_item_ptr(buf, i,
6386 struct btrfs_extent_data_ref);
6387 add_data_backref(extent_cache,
6389 btrfs_extent_data_ref_root(buf, ref),
6390 btrfs_extent_data_ref_objectid(buf,
6392 btrfs_extent_data_ref_offset(buf, ref),
6393 btrfs_extent_data_ref_count(buf, ref),
6394 0, root->sectorsize);
6397 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6398 struct btrfs_shared_data_ref *ref;
6399 ref = btrfs_item_ptr(buf, i,
6400 struct btrfs_shared_data_ref);
6401 add_data_backref(extent_cache,
6402 key.objectid, key.offset, 0, 0, 0,
6403 btrfs_shared_data_ref_count(buf, ref),
6404 0, root->sectorsize);
6407 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6408 struct bad_item *bad;
6410 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6414 bad = malloc(sizeof(struct bad_item));
6417 INIT_LIST_HEAD(&bad->list);
6418 memcpy(&bad->key, &key,
6419 sizeof(struct btrfs_key));
6420 bad->root_id = owner;
6421 list_add_tail(&bad->list, &delete_items);
6424 if (key.type != BTRFS_EXTENT_DATA_KEY)
6426 fi = btrfs_item_ptr(buf, i,
6427 struct btrfs_file_extent_item);
6428 if (btrfs_file_extent_type(buf, fi) ==
6429 BTRFS_FILE_EXTENT_INLINE)
6431 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6434 data_bytes_allocated +=
6435 btrfs_file_extent_disk_num_bytes(buf, fi);
6436 if (data_bytes_allocated < root->sectorsize) {
6439 data_bytes_referenced +=
6440 btrfs_file_extent_num_bytes(buf, fi);
6441 add_data_backref(extent_cache,
6442 btrfs_file_extent_disk_bytenr(buf, fi),
6443 parent, owner, key.objectid, key.offset -
6444 btrfs_file_extent_offset(buf, fi), 1, 1,
6445 btrfs_file_extent_disk_num_bytes(buf, fi));
6449 struct btrfs_key first_key;
6451 first_key.objectid = 0;
6454 btrfs_item_key_to_cpu(buf, &first_key, 0);
6455 level = btrfs_header_level(buf);
6456 for (i = 0; i < nritems; i++) {
6457 struct extent_record tmpl;
6459 ptr = btrfs_node_blockptr(buf, i);
6460 size = root->nodesize;
6461 btrfs_node_key_to_cpu(buf, &key, i);
6463 if ((level == ri->drop_level)
6464 && is_dropped_key(&key, &ri->drop_key)) {
6469 memset(&tmpl, 0, sizeof(tmpl));
6470 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6471 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6476 tmpl.max_size = size;
6477 ret = add_extent_rec(extent_cache, &tmpl);
6481 ret = add_tree_backref(extent_cache, ptr, parent,
6484 error("add_tree_backref failed: %s",
6490 add_pending(nodes, seen, ptr, size);
6492 add_pending(pending, seen, ptr, size);
6495 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6496 nritems) * sizeof(struct btrfs_key_ptr);
6498 total_btree_bytes += buf->len;
6499 if (fs_root_objectid(btrfs_header_owner(buf)))
6500 total_fs_tree_bytes += buf->len;
6501 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6502 total_extent_tree_bytes += buf->len;
6503 if (!found_old_backref &&
6504 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6505 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6506 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6507 found_old_backref = 1;
6509 free_extent_buffer(buf);
6513 static int add_root_to_pending(struct extent_buffer *buf,
6514 struct cache_tree *extent_cache,
6515 struct cache_tree *pending,
6516 struct cache_tree *seen,
6517 struct cache_tree *nodes,
6520 struct extent_record tmpl;
6523 if (btrfs_header_level(buf) > 0)
6524 add_pending(nodes, seen, buf->start, buf->len);
6526 add_pending(pending, seen, buf->start, buf->len);
6528 memset(&tmpl, 0, sizeof(tmpl));
6529 tmpl.start = buf->start;
6534 tmpl.max_size = buf->len;
6535 add_extent_rec(extent_cache, &tmpl);
6537 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6538 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6539 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6542 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6547 /* as we fix the tree, we might be deleting blocks that
6548 * we're tracking for repair. This hook makes sure we
6549 * remove any backrefs for blocks as we are fixing them.
6551 static int free_extent_hook(struct btrfs_trans_handle *trans,
6552 struct btrfs_root *root,
6553 u64 bytenr, u64 num_bytes, u64 parent,
6554 u64 root_objectid, u64 owner, u64 offset,
6557 struct extent_record *rec;
6558 struct cache_extent *cache;
6560 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6562 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6563 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6567 rec = container_of(cache, struct extent_record, cache);
6569 struct data_backref *back;
6570 back = find_data_backref(rec, parent, root_objectid, owner,
6571 offset, 1, bytenr, num_bytes);
6574 if (back->node.found_ref) {
6575 back->found_ref -= refs_to_drop;
6577 rec->refs -= refs_to_drop;
6579 if (back->node.found_extent_tree) {
6580 back->num_refs -= refs_to_drop;
6581 if (rec->extent_item_refs)
6582 rec->extent_item_refs -= refs_to_drop;
6584 if (back->found_ref == 0)
6585 back->node.found_ref = 0;
6586 if (back->num_refs == 0)
6587 back->node.found_extent_tree = 0;
6589 if (!back->node.found_extent_tree && back->node.found_ref) {
6590 list_del(&back->node.list);
6594 struct tree_backref *back;
6595 back = find_tree_backref(rec, parent, root_objectid);
6598 if (back->node.found_ref) {
6601 back->node.found_ref = 0;
6603 if (back->node.found_extent_tree) {
6604 if (rec->extent_item_refs)
6605 rec->extent_item_refs--;
6606 back->node.found_extent_tree = 0;
6608 if (!back->node.found_extent_tree && back->node.found_ref) {
6609 list_del(&back->node.list);
6613 maybe_free_extent_rec(extent_cache, rec);
6618 static int delete_extent_records(struct btrfs_trans_handle *trans,
6619 struct btrfs_root *root,
6620 struct btrfs_path *path,
6621 u64 bytenr, u64 new_len)
6623 struct btrfs_key key;
6624 struct btrfs_key found_key;
6625 struct extent_buffer *leaf;
6630 key.objectid = bytenr;
6632 key.offset = (u64)-1;
6635 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6642 if (path->slots[0] == 0)
6648 leaf = path->nodes[0];
6649 slot = path->slots[0];
6651 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6652 if (found_key.objectid != bytenr)
6655 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6656 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6657 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6658 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6659 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6660 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6661 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6662 btrfs_release_path(path);
6663 if (found_key.type == 0) {
6664 if (found_key.offset == 0)
6666 key.offset = found_key.offset - 1;
6667 key.type = found_key.type;
6669 key.type = found_key.type - 1;
6670 key.offset = (u64)-1;
6674 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6675 found_key.objectid, found_key.type, found_key.offset);
6677 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6680 btrfs_release_path(path);
6682 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6683 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6684 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6685 found_key.offset : root->nodesize;
6687 ret = btrfs_update_block_group(trans, root, bytenr,
6694 btrfs_release_path(path);
6699 * for a single backref, this will allocate a new extent
6700 * and add the backref to it.
6702 static int record_extent(struct btrfs_trans_handle *trans,
6703 struct btrfs_fs_info *info,
6704 struct btrfs_path *path,
6705 struct extent_record *rec,
6706 struct extent_backref *back,
6707 int allocated, u64 flags)
6710 struct btrfs_root *extent_root = info->extent_root;
6711 struct extent_buffer *leaf;
6712 struct btrfs_key ins_key;
6713 struct btrfs_extent_item *ei;
6714 struct tree_backref *tback;
6715 struct data_backref *dback;
6716 struct btrfs_tree_block_info *bi;
6719 rec->max_size = max_t(u64, rec->max_size,
6720 info->extent_root->nodesize);
6723 u32 item_size = sizeof(*ei);
6726 item_size += sizeof(*bi);
6728 ins_key.objectid = rec->start;
6729 ins_key.offset = rec->max_size;
6730 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6732 ret = btrfs_insert_empty_item(trans, extent_root, path,
6733 &ins_key, item_size);
6737 leaf = path->nodes[0];
6738 ei = btrfs_item_ptr(leaf, path->slots[0],
6739 struct btrfs_extent_item);
6741 btrfs_set_extent_refs(leaf, ei, 0);
6742 btrfs_set_extent_generation(leaf, ei, rec->generation);
6744 if (back->is_data) {
6745 btrfs_set_extent_flags(leaf, ei,
6746 BTRFS_EXTENT_FLAG_DATA);
6748 struct btrfs_disk_key copy_key;;
6750 tback = to_tree_backref(back);
6751 bi = (struct btrfs_tree_block_info *)(ei + 1);
6752 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6755 btrfs_set_disk_key_objectid(©_key,
6756 rec->info_objectid);
6757 btrfs_set_disk_key_type(©_key, 0);
6758 btrfs_set_disk_key_offset(©_key, 0);
6760 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6761 btrfs_set_tree_block_key(leaf, bi, ©_key);
6763 btrfs_set_extent_flags(leaf, ei,
6764 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6767 btrfs_mark_buffer_dirty(leaf);
6768 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6769 rec->max_size, 1, 0);
6772 btrfs_release_path(path);
6775 if (back->is_data) {
6779 dback = to_data_backref(back);
6780 if (back->full_backref)
6781 parent = dback->parent;
6785 for (i = 0; i < dback->found_ref; i++) {
6786 /* if parent != 0, we're doing a full backref
6787 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6788 * just makes the backref allocator create a data
6791 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6792 rec->start, rec->max_size,
6796 BTRFS_FIRST_FREE_OBJECTID :
6802 fprintf(stderr, "adding new data backref"
6803 " on %llu %s %llu owner %llu"
6804 " offset %llu found %d\n",
6805 (unsigned long long)rec->start,
6806 back->full_backref ?
6808 back->full_backref ?
6809 (unsigned long long)parent :
6810 (unsigned long long)dback->root,
6811 (unsigned long long)dback->owner,
6812 (unsigned long long)dback->offset,
6817 tback = to_tree_backref(back);
6818 if (back->full_backref)
6819 parent = tback->parent;
6823 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6824 rec->start, rec->max_size,
6825 parent, tback->root, 0, 0);
6826 fprintf(stderr, "adding new tree backref on "
6827 "start %llu len %llu parent %llu root %llu\n",
6828 rec->start, rec->max_size, parent, tback->root);
6831 btrfs_release_path(path);
6835 static struct extent_entry *find_entry(struct list_head *entries,
6836 u64 bytenr, u64 bytes)
6838 struct extent_entry *entry = NULL;
6840 list_for_each_entry(entry, entries, list) {
6841 if (entry->bytenr == bytenr && entry->bytes == bytes)
6848 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6850 struct extent_entry *entry, *best = NULL, *prev = NULL;
6852 list_for_each_entry(entry, entries, list) {
6859 * If there are as many broken entries as entries then we know
6860 * not to trust this particular entry.
6862 if (entry->broken == entry->count)
6866 * If our current entry == best then we can't be sure our best
6867 * is really the best, so we need to keep searching.
6869 if (best && best->count == entry->count) {
6875 /* Prev == entry, not good enough, have to keep searching */
6876 if (!prev->broken && prev->count == entry->count)
6880 best = (prev->count > entry->count) ? prev : entry;
6881 else if (best->count < entry->count)
6889 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6890 struct data_backref *dback, struct extent_entry *entry)
6892 struct btrfs_trans_handle *trans;
6893 struct btrfs_root *root;
6894 struct btrfs_file_extent_item *fi;
6895 struct extent_buffer *leaf;
6896 struct btrfs_key key;
6900 key.objectid = dback->root;
6901 key.type = BTRFS_ROOT_ITEM_KEY;
6902 key.offset = (u64)-1;
6903 root = btrfs_read_fs_root(info, &key);
6905 fprintf(stderr, "Couldn't find root for our ref\n");
6910 * The backref points to the original offset of the extent if it was
6911 * split, so we need to search down to the offset we have and then walk
6912 * forward until we find the backref we're looking for.
6914 key.objectid = dback->owner;
6915 key.type = BTRFS_EXTENT_DATA_KEY;
6916 key.offset = dback->offset;
6917 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6919 fprintf(stderr, "Error looking up ref %d\n", ret);
6924 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6925 ret = btrfs_next_leaf(root, path);
6927 fprintf(stderr, "Couldn't find our ref, next\n");
6931 leaf = path->nodes[0];
6932 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6933 if (key.objectid != dback->owner ||
6934 key.type != BTRFS_EXTENT_DATA_KEY) {
6935 fprintf(stderr, "Couldn't find our ref, search\n");
6938 fi = btrfs_item_ptr(leaf, path->slots[0],
6939 struct btrfs_file_extent_item);
6940 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6941 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6943 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6948 btrfs_release_path(path);
6950 trans = btrfs_start_transaction(root, 1);
6952 return PTR_ERR(trans);
6955 * Ok we have the key of the file extent we want to fix, now we can cow
6956 * down to the thing and fix it.
6958 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6960 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6961 key.objectid, key.type, key.offset, ret);
6965 fprintf(stderr, "Well that's odd, we just found this key "
6966 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6971 leaf = path->nodes[0];
6972 fi = btrfs_item_ptr(leaf, path->slots[0],
6973 struct btrfs_file_extent_item);
6975 if (btrfs_file_extent_compression(leaf, fi) &&
6976 dback->disk_bytenr != entry->bytenr) {
6977 fprintf(stderr, "Ref doesn't match the record start and is "
6978 "compressed, please take a btrfs-image of this file "
6979 "system and send it to a btrfs developer so they can "
6980 "complete this functionality for bytenr %Lu\n",
6981 dback->disk_bytenr);
6986 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6987 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6988 } else if (dback->disk_bytenr > entry->bytenr) {
6989 u64 off_diff, offset;
6991 off_diff = dback->disk_bytenr - entry->bytenr;
6992 offset = btrfs_file_extent_offset(leaf, fi);
6993 if (dback->disk_bytenr + offset +
6994 btrfs_file_extent_num_bytes(leaf, fi) >
6995 entry->bytenr + entry->bytes) {
6996 fprintf(stderr, "Ref is past the entry end, please "
6997 "take a btrfs-image of this file system and "
6998 "send it to a btrfs developer, ref %Lu\n",
6999 dback->disk_bytenr);
7004 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7005 btrfs_set_file_extent_offset(leaf, fi, offset);
7006 } else if (dback->disk_bytenr < entry->bytenr) {
7009 offset = btrfs_file_extent_offset(leaf, fi);
7010 if (dback->disk_bytenr + offset < entry->bytenr) {
7011 fprintf(stderr, "Ref is before the entry start, please"
7012 " take a btrfs-image of this file system and "
7013 "send it to a btrfs developer, ref %Lu\n",
7014 dback->disk_bytenr);
7019 offset += dback->disk_bytenr;
7020 offset -= entry->bytenr;
7021 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7022 btrfs_set_file_extent_offset(leaf, fi, offset);
7025 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7028 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7029 * only do this if we aren't using compression, otherwise it's a
7032 if (!btrfs_file_extent_compression(leaf, fi))
7033 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7035 printf("ram bytes may be wrong?\n");
7036 btrfs_mark_buffer_dirty(leaf);
7038 err = btrfs_commit_transaction(trans, root);
7039 btrfs_release_path(path);
7040 return ret ? ret : err;
7043 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7044 struct extent_record *rec)
7046 struct extent_backref *back;
7047 struct data_backref *dback;
7048 struct extent_entry *entry, *best = NULL;
7051 int broken_entries = 0;
7056 * Metadata is easy and the backrefs should always agree on bytenr and
7057 * size, if not we've got bigger issues.
7062 list_for_each_entry(back, &rec->backrefs, list) {
7063 if (back->full_backref || !back->is_data)
7066 dback = to_data_backref(back);
7069 * We only pay attention to backrefs that we found a real
7072 if (dback->found_ref == 0)
7076 * For now we only catch when the bytes don't match, not the
7077 * bytenr. We can easily do this at the same time, but I want
7078 * to have a fs image to test on before we just add repair
7079 * functionality willy-nilly so we know we won't screw up the
7083 entry = find_entry(&entries, dback->disk_bytenr,
7086 entry = malloc(sizeof(struct extent_entry));
7091 memset(entry, 0, sizeof(*entry));
7092 entry->bytenr = dback->disk_bytenr;
7093 entry->bytes = dback->bytes;
7094 list_add_tail(&entry->list, &entries);
7099 * If we only have on entry we may think the entries agree when
7100 * in reality they don't so we have to do some extra checking.
7102 if (dback->disk_bytenr != rec->start ||
7103 dback->bytes != rec->nr || back->broken)
7114 /* Yay all the backrefs agree, carry on good sir */
7115 if (nr_entries <= 1 && !mismatch)
7118 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7119 "%Lu\n", rec->start);
7122 * First we want to see if the backrefs can agree amongst themselves who
7123 * is right, so figure out which one of the entries has the highest
7126 best = find_most_right_entry(&entries);
7129 * Ok so we may have an even split between what the backrefs think, so
7130 * this is where we use the extent ref to see what it thinks.
7133 entry = find_entry(&entries, rec->start, rec->nr);
7134 if (!entry && (!broken_entries || !rec->found_rec)) {
7135 fprintf(stderr, "Backrefs don't agree with each other "
7136 "and extent record doesn't agree with anybody,"
7137 " so we can't fix bytenr %Lu bytes %Lu\n",
7138 rec->start, rec->nr);
7141 } else if (!entry) {
7143 * Ok our backrefs were broken, we'll assume this is the
7144 * correct value and add an entry for this range.
7146 entry = malloc(sizeof(struct extent_entry));
7151 memset(entry, 0, sizeof(*entry));
7152 entry->bytenr = rec->start;
7153 entry->bytes = rec->nr;
7154 list_add_tail(&entry->list, &entries);
7158 best = find_most_right_entry(&entries);
7160 fprintf(stderr, "Backrefs and extent record evenly "
7161 "split on who is right, this is going to "
7162 "require user input to fix bytenr %Lu bytes "
7163 "%Lu\n", rec->start, rec->nr);
7170 * I don't think this can happen currently as we'll abort() if we catch
7171 * this case higher up, but in case somebody removes that we still can't
7172 * deal with it properly here yet, so just bail out of that's the case.
7174 if (best->bytenr != rec->start) {
7175 fprintf(stderr, "Extent start and backref starts don't match, "
7176 "please use btrfs-image on this file system and send "
7177 "it to a btrfs developer so they can make fsck fix "
7178 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7179 rec->start, rec->nr);
7185 * Ok great we all agreed on an extent record, let's go find the real
7186 * references and fix up the ones that don't match.
7188 list_for_each_entry(back, &rec->backrefs, list) {
7189 if (back->full_backref || !back->is_data)
7192 dback = to_data_backref(back);
7195 * Still ignoring backrefs that don't have a real ref attached
7198 if (dback->found_ref == 0)
7201 if (dback->bytes == best->bytes &&
7202 dback->disk_bytenr == best->bytenr)
7205 ret = repair_ref(info, path, dback, best);
7211 * Ok we messed with the actual refs, which means we need to drop our
7212 * entire cache and go back and rescan. I know this is a huge pain and
7213 * adds a lot of extra work, but it's the only way to be safe. Once all
7214 * the backrefs agree we may not need to do anything to the extent
7219 while (!list_empty(&entries)) {
7220 entry = list_entry(entries.next, struct extent_entry, list);
7221 list_del_init(&entry->list);
7227 static int process_duplicates(struct btrfs_root *root,
7228 struct cache_tree *extent_cache,
7229 struct extent_record *rec)
7231 struct extent_record *good, *tmp;
7232 struct cache_extent *cache;
7236 * If we found a extent record for this extent then return, or if we
7237 * have more than one duplicate we are likely going to need to delete
7240 if (rec->found_rec || rec->num_duplicates > 1)
7243 /* Shouldn't happen but just in case */
7244 BUG_ON(!rec->num_duplicates);
7247 * So this happens if we end up with a backref that doesn't match the
7248 * actual extent entry. So either the backref is bad or the extent
7249 * entry is bad. Either way we want to have the extent_record actually
7250 * reflect what we found in the extent_tree, so we need to take the
7251 * duplicate out and use that as the extent_record since the only way we
7252 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7254 remove_cache_extent(extent_cache, &rec->cache);
7256 good = to_extent_record(rec->dups.next);
7257 list_del_init(&good->list);
7258 INIT_LIST_HEAD(&good->backrefs);
7259 INIT_LIST_HEAD(&good->dups);
7260 good->cache.start = good->start;
7261 good->cache.size = good->nr;
7262 good->content_checked = 0;
7263 good->owner_ref_checked = 0;
7264 good->num_duplicates = 0;
7265 good->refs = rec->refs;
7266 list_splice_init(&rec->backrefs, &good->backrefs);
7268 cache = lookup_cache_extent(extent_cache, good->start,
7272 tmp = container_of(cache, struct extent_record, cache);
7275 * If we find another overlapping extent and it's found_rec is
7276 * set then it's a duplicate and we need to try and delete
7279 if (tmp->found_rec || tmp->num_duplicates > 0) {
7280 if (list_empty(&good->list))
7281 list_add_tail(&good->list,
7282 &duplicate_extents);
7283 good->num_duplicates += tmp->num_duplicates + 1;
7284 list_splice_init(&tmp->dups, &good->dups);
7285 list_del_init(&tmp->list);
7286 list_add_tail(&tmp->list, &good->dups);
7287 remove_cache_extent(extent_cache, &tmp->cache);
7292 * Ok we have another non extent item backed extent rec, so lets
7293 * just add it to this extent and carry on like we did above.
7295 good->refs += tmp->refs;
7296 list_splice_init(&tmp->backrefs, &good->backrefs);
7297 remove_cache_extent(extent_cache, &tmp->cache);
7300 ret = insert_cache_extent(extent_cache, &good->cache);
7303 return good->num_duplicates ? 0 : 1;
7306 static int delete_duplicate_records(struct btrfs_root *root,
7307 struct extent_record *rec)
7309 struct btrfs_trans_handle *trans;
7310 LIST_HEAD(delete_list);
7311 struct btrfs_path *path;
7312 struct extent_record *tmp, *good, *n;
7315 struct btrfs_key key;
7317 path = btrfs_alloc_path();
7324 /* Find the record that covers all of the duplicates. */
7325 list_for_each_entry(tmp, &rec->dups, list) {
7326 if (good->start < tmp->start)
7328 if (good->nr > tmp->nr)
7331 if (tmp->start + tmp->nr < good->start + good->nr) {
7332 fprintf(stderr, "Ok we have overlapping extents that "
7333 "aren't completely covered by each other, this "
7334 "is going to require more careful thought. "
7335 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7336 tmp->start, tmp->nr, good->start, good->nr);
7343 list_add_tail(&rec->list, &delete_list);
7345 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7348 list_move_tail(&tmp->list, &delete_list);
7351 root = root->fs_info->extent_root;
7352 trans = btrfs_start_transaction(root, 1);
7353 if (IS_ERR(trans)) {
7354 ret = PTR_ERR(trans);
7358 list_for_each_entry(tmp, &delete_list, list) {
7359 if (tmp->found_rec == 0)
7361 key.objectid = tmp->start;
7362 key.type = BTRFS_EXTENT_ITEM_KEY;
7363 key.offset = tmp->nr;
7365 /* Shouldn't happen but just in case */
7366 if (tmp->metadata) {
7367 fprintf(stderr, "Well this shouldn't happen, extent "
7368 "record overlaps but is metadata? "
7369 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7373 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7379 ret = btrfs_del_item(trans, root, path);
7382 btrfs_release_path(path);
7385 err = btrfs_commit_transaction(trans, root);
7389 while (!list_empty(&delete_list)) {
7390 tmp = to_extent_record(delete_list.next);
7391 list_del_init(&tmp->list);
7397 while (!list_empty(&rec->dups)) {
7398 tmp = to_extent_record(rec->dups.next);
7399 list_del_init(&tmp->list);
7403 btrfs_free_path(path);
7405 if (!ret && !nr_del)
7406 rec->num_duplicates = 0;
7408 return ret ? ret : nr_del;
7411 static int find_possible_backrefs(struct btrfs_fs_info *info,
7412 struct btrfs_path *path,
7413 struct cache_tree *extent_cache,
7414 struct extent_record *rec)
7416 struct btrfs_root *root;
7417 struct extent_backref *back;
7418 struct data_backref *dback;
7419 struct cache_extent *cache;
7420 struct btrfs_file_extent_item *fi;
7421 struct btrfs_key key;
7425 list_for_each_entry(back, &rec->backrefs, list) {
7426 /* Don't care about full backrefs (poor unloved backrefs) */
7427 if (back->full_backref || !back->is_data)
7430 dback = to_data_backref(back);
7432 /* We found this one, we don't need to do a lookup */
7433 if (dback->found_ref)
7436 key.objectid = dback->root;
7437 key.type = BTRFS_ROOT_ITEM_KEY;
7438 key.offset = (u64)-1;
7440 root = btrfs_read_fs_root(info, &key);
7442 /* No root, definitely a bad ref, skip */
7443 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7445 /* Other err, exit */
7447 return PTR_ERR(root);
7449 key.objectid = dback->owner;
7450 key.type = BTRFS_EXTENT_DATA_KEY;
7451 key.offset = dback->offset;
7452 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7454 btrfs_release_path(path);
7457 /* Didn't find it, we can carry on */
7462 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7463 struct btrfs_file_extent_item);
7464 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7465 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7466 btrfs_release_path(path);
7467 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7469 struct extent_record *tmp;
7470 tmp = container_of(cache, struct extent_record, cache);
7473 * If we found an extent record for the bytenr for this
7474 * particular backref then we can't add it to our
7475 * current extent record. We only want to add backrefs
7476 * that don't have a corresponding extent item in the
7477 * extent tree since they likely belong to this record
7478 * and we need to fix it if it doesn't match bytenrs.
7484 dback->found_ref += 1;
7485 dback->disk_bytenr = bytenr;
7486 dback->bytes = bytes;
7489 * Set this so the verify backref code knows not to trust the
7490 * values in this backref.
7499 * Record orphan data ref into corresponding root.
7501 * Return 0 if the extent item contains data ref and recorded.
7502 * Return 1 if the extent item contains no useful data ref
7503 * On that case, it may contains only shared_dataref or metadata backref
7504 * or the file extent exists(this should be handled by the extent bytenr
7506 * Return <0 if something goes wrong.
7508 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7509 struct extent_record *rec)
7511 struct btrfs_key key;
7512 struct btrfs_root *dest_root;
7513 struct extent_backref *back;
7514 struct data_backref *dback;
7515 struct orphan_data_extent *orphan;
7516 struct btrfs_path *path;
7517 int recorded_data_ref = 0;
7522 path = btrfs_alloc_path();
7525 list_for_each_entry(back, &rec->backrefs, list) {
7526 if (back->full_backref || !back->is_data ||
7527 !back->found_extent_tree)
7529 dback = to_data_backref(back);
7530 if (dback->found_ref)
7532 key.objectid = dback->root;
7533 key.type = BTRFS_ROOT_ITEM_KEY;
7534 key.offset = (u64)-1;
7536 dest_root = btrfs_read_fs_root(fs_info, &key);
7538 /* For non-exist root we just skip it */
7539 if (IS_ERR(dest_root) || !dest_root)
7542 key.objectid = dback->owner;
7543 key.type = BTRFS_EXTENT_DATA_KEY;
7544 key.offset = dback->offset;
7546 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7547 btrfs_release_path(path);
7549 * For ret < 0, it's OK since the fs-tree may be corrupted,
7550 * we need to record it for inode/file extent rebuild.
7551 * For ret > 0, we record it only for file extent rebuild.
7552 * For ret == 0, the file extent exists but only bytenr
7553 * mismatch, let the original bytenr fix routine to handle,
7559 orphan = malloc(sizeof(*orphan));
7564 INIT_LIST_HEAD(&orphan->list);
7565 orphan->root = dback->root;
7566 orphan->objectid = dback->owner;
7567 orphan->offset = dback->offset;
7568 orphan->disk_bytenr = rec->cache.start;
7569 orphan->disk_len = rec->cache.size;
7570 list_add(&dest_root->orphan_data_extents, &orphan->list);
7571 recorded_data_ref = 1;
7574 btrfs_free_path(path);
7576 return !recorded_data_ref;
7582 * when an incorrect extent item is found, this will delete
7583 * all of the existing entries for it and recreate them
7584 * based on what the tree scan found.
7586 static int fixup_extent_refs(struct btrfs_fs_info *info,
7587 struct cache_tree *extent_cache,
7588 struct extent_record *rec)
7590 struct btrfs_trans_handle *trans = NULL;
7592 struct btrfs_path *path;
7593 struct list_head *cur = rec->backrefs.next;
7594 struct cache_extent *cache;
7595 struct extent_backref *back;
7599 if (rec->flag_block_full_backref)
7600 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7602 path = btrfs_alloc_path();
7606 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7608 * Sometimes the backrefs themselves are so broken they don't
7609 * get attached to any meaningful rec, so first go back and
7610 * check any of our backrefs that we couldn't find and throw
7611 * them into the list if we find the backref so that
7612 * verify_backrefs can figure out what to do.
7614 ret = find_possible_backrefs(info, path, extent_cache, rec);
7619 /* step one, make sure all of the backrefs agree */
7620 ret = verify_backrefs(info, path, rec);
7624 trans = btrfs_start_transaction(info->extent_root, 1);
7625 if (IS_ERR(trans)) {
7626 ret = PTR_ERR(trans);
7630 /* step two, delete all the existing records */
7631 ret = delete_extent_records(trans, info->extent_root, path,
7632 rec->start, rec->max_size);
7637 /* was this block corrupt? If so, don't add references to it */
7638 cache = lookup_cache_extent(info->corrupt_blocks,
7639 rec->start, rec->max_size);
7645 /* step three, recreate all the refs we did find */
7646 while(cur != &rec->backrefs) {
7647 back = to_extent_backref(cur);
7651 * if we didn't find any references, don't create a
7654 if (!back->found_ref)
7657 rec->bad_full_backref = 0;
7658 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7666 int err = btrfs_commit_transaction(trans, info->extent_root);
7671 btrfs_free_path(path);
7675 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7676 struct extent_record *rec)
7678 struct btrfs_trans_handle *trans;
7679 struct btrfs_root *root = fs_info->extent_root;
7680 struct btrfs_path *path;
7681 struct btrfs_extent_item *ei;
7682 struct btrfs_key key;
7686 key.objectid = rec->start;
7687 if (rec->metadata) {
7688 key.type = BTRFS_METADATA_ITEM_KEY;
7689 key.offset = rec->info_level;
7691 key.type = BTRFS_EXTENT_ITEM_KEY;
7692 key.offset = rec->max_size;
7695 path = btrfs_alloc_path();
7699 trans = btrfs_start_transaction(root, 0);
7700 if (IS_ERR(trans)) {
7701 btrfs_free_path(path);
7702 return PTR_ERR(trans);
7705 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7707 btrfs_free_path(path);
7708 btrfs_commit_transaction(trans, root);
7711 fprintf(stderr, "Didn't find extent for %llu\n",
7712 (unsigned long long)rec->start);
7713 btrfs_free_path(path);
7714 btrfs_commit_transaction(trans, root);
7718 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7719 struct btrfs_extent_item);
7720 flags = btrfs_extent_flags(path->nodes[0], ei);
7721 if (rec->flag_block_full_backref) {
7722 fprintf(stderr, "setting full backref on %llu\n",
7723 (unsigned long long)key.objectid);
7724 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7726 fprintf(stderr, "clearing full backref on %llu\n",
7727 (unsigned long long)key.objectid);
7728 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7730 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7731 btrfs_mark_buffer_dirty(path->nodes[0]);
7732 btrfs_free_path(path);
7733 return btrfs_commit_transaction(trans, root);
7736 /* right now we only prune from the extent allocation tree */
7737 static int prune_one_block(struct btrfs_trans_handle *trans,
7738 struct btrfs_fs_info *info,
7739 struct btrfs_corrupt_block *corrupt)
7742 struct btrfs_path path;
7743 struct extent_buffer *eb;
7747 int level = corrupt->level + 1;
7749 btrfs_init_path(&path);
7751 /* we want to stop at the parent to our busted block */
7752 path.lowest_level = level;
7754 ret = btrfs_search_slot(trans, info->extent_root,
7755 &corrupt->key, &path, -1, 1);
7760 eb = path.nodes[level];
7767 * hopefully the search gave us the block we want to prune,
7768 * lets try that first
7770 slot = path.slots[level];
7771 found = btrfs_node_blockptr(eb, slot);
7772 if (found == corrupt->cache.start)
7775 nritems = btrfs_header_nritems(eb);
7777 /* the search failed, lets scan this node and hope we find it */
7778 for (slot = 0; slot < nritems; slot++) {
7779 found = btrfs_node_blockptr(eb, slot);
7780 if (found == corrupt->cache.start)
7784 * we couldn't find the bad block. TODO, search all the nodes for pointers
7787 if (eb == info->extent_root->node) {
7792 btrfs_release_path(&path);
7797 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7798 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7801 btrfs_release_path(&path);
7805 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7807 struct btrfs_trans_handle *trans = NULL;
7808 struct cache_extent *cache;
7809 struct btrfs_corrupt_block *corrupt;
7812 cache = search_cache_extent(info->corrupt_blocks, 0);
7816 trans = btrfs_start_transaction(info->extent_root, 1);
7818 return PTR_ERR(trans);
7820 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7821 prune_one_block(trans, info, corrupt);
7822 remove_cache_extent(info->corrupt_blocks, cache);
7825 return btrfs_commit_transaction(trans, info->extent_root);
7829 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7831 struct btrfs_block_group_cache *cache;
7836 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7837 &start, &end, EXTENT_DIRTY);
7840 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7846 cache = btrfs_lookup_first_block_group(fs_info, start);
7851 start = cache->key.objectid + cache->key.offset;
7855 static int check_extent_refs(struct btrfs_root *root,
7856 struct cache_tree *extent_cache)
7858 struct extent_record *rec;
7859 struct cache_extent *cache;
7868 * if we're doing a repair, we have to make sure
7869 * we don't allocate from the problem extents.
7870 * In the worst case, this will be all the
7873 cache = search_cache_extent(extent_cache, 0);
7875 rec = container_of(cache, struct extent_record, cache);
7876 set_extent_dirty(root->fs_info->excluded_extents,
7878 rec->start + rec->max_size - 1,
7880 cache = next_cache_extent(cache);
7883 /* pin down all the corrupted blocks too */
7884 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7886 set_extent_dirty(root->fs_info->excluded_extents,
7888 cache->start + cache->size - 1,
7890 cache = next_cache_extent(cache);
7892 prune_corrupt_blocks(root->fs_info);
7893 reset_cached_block_groups(root->fs_info);
7896 reset_cached_block_groups(root->fs_info);
7899 * We need to delete any duplicate entries we find first otherwise we
7900 * could mess up the extent tree when we have backrefs that actually
7901 * belong to a different extent item and not the weird duplicate one.
7903 while (repair && !list_empty(&duplicate_extents)) {
7904 rec = to_extent_record(duplicate_extents.next);
7905 list_del_init(&rec->list);
7907 /* Sometimes we can find a backref before we find an actual
7908 * extent, so we need to process it a little bit to see if there
7909 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7910 * if this is a backref screwup. If we need to delete stuff
7911 * process_duplicates() will return 0, otherwise it will return
7914 if (process_duplicates(root, extent_cache, rec))
7916 ret = delete_duplicate_records(root, rec);
7920 * delete_duplicate_records will return the number of entries
7921 * deleted, so if it's greater than 0 then we know we actually
7922 * did something and we need to remove.
7936 cache = search_cache_extent(extent_cache, 0);
7939 rec = container_of(cache, struct extent_record, cache);
7940 if (rec->num_duplicates) {
7941 fprintf(stderr, "extent item %llu has multiple extent "
7942 "items\n", (unsigned long long)rec->start);
7947 if (rec->refs != rec->extent_item_refs) {
7948 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7949 (unsigned long long)rec->start,
7950 (unsigned long long)rec->nr);
7951 fprintf(stderr, "extent item %llu, found %llu\n",
7952 (unsigned long long)rec->extent_item_refs,
7953 (unsigned long long)rec->refs);
7954 ret = record_orphan_data_extents(root->fs_info, rec);
7961 * we can't use the extent to repair file
7962 * extent, let the fallback method handle it.
7964 if (!fixed && repair) {
7965 ret = fixup_extent_refs(
7976 if (all_backpointers_checked(rec, 1)) {
7977 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7978 (unsigned long long)rec->start,
7979 (unsigned long long)rec->nr);
7981 if (!fixed && !recorded && repair) {
7982 ret = fixup_extent_refs(root->fs_info,
7991 if (!rec->owner_ref_checked) {
7992 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7993 (unsigned long long)rec->start,
7994 (unsigned long long)rec->nr);
7995 if (!fixed && !recorded && repair) {
7996 ret = fixup_extent_refs(root->fs_info,
8005 if (rec->bad_full_backref) {
8006 fprintf(stderr, "bad full backref, on [%llu]\n",
8007 (unsigned long long)rec->start);
8009 ret = fixup_extent_flags(root->fs_info, rec);
8018 * Although it's not a extent ref's problem, we reuse this
8019 * routine for error reporting.
8020 * No repair function yet.
8022 if (rec->crossing_stripes) {
8024 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8025 rec->start, rec->start + rec->max_size);
8030 if (rec->wrong_chunk_type) {
8032 "bad extent [%llu, %llu), type mismatch with chunk\n",
8033 rec->start, rec->start + rec->max_size);
8038 remove_cache_extent(extent_cache, cache);
8039 free_all_extent_backrefs(rec);
8040 if (!init_extent_tree && repair && (!cur_err || fixed))
8041 clear_extent_dirty(root->fs_info->excluded_extents,
8043 rec->start + rec->max_size - 1,
8049 if (ret && ret != -EAGAIN) {
8050 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8053 struct btrfs_trans_handle *trans;
8055 root = root->fs_info->extent_root;
8056 trans = btrfs_start_transaction(root, 1);
8057 if (IS_ERR(trans)) {
8058 ret = PTR_ERR(trans);
8062 btrfs_fix_block_accounting(trans, root);
8063 ret = btrfs_commit_transaction(trans, root);
8068 fprintf(stderr, "repaired damaged extent references\n");
8074 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8078 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8079 stripe_size = length;
8080 stripe_size /= num_stripes;
8081 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8082 stripe_size = length * 2;
8083 stripe_size /= num_stripes;
8084 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8085 stripe_size = length;
8086 stripe_size /= (num_stripes - 1);
8087 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8088 stripe_size = length;
8089 stripe_size /= (num_stripes - 2);
8091 stripe_size = length;
8097 * Check the chunk with its block group/dev list ref:
8098 * Return 0 if all refs seems valid.
8099 * Return 1 if part of refs seems valid, need later check for rebuild ref
8100 * like missing block group and needs to search extent tree to rebuild them.
8101 * Return -1 if essential refs are missing and unable to rebuild.
8103 static int check_chunk_refs(struct chunk_record *chunk_rec,
8104 struct block_group_tree *block_group_cache,
8105 struct device_extent_tree *dev_extent_cache,
8108 struct cache_extent *block_group_item;
8109 struct block_group_record *block_group_rec;
8110 struct cache_extent *dev_extent_item;
8111 struct device_extent_record *dev_extent_rec;
8115 int metadump_v2 = 0;
8119 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8122 if (block_group_item) {
8123 block_group_rec = container_of(block_group_item,
8124 struct block_group_record,
8126 if (chunk_rec->length != block_group_rec->offset ||
8127 chunk_rec->offset != block_group_rec->objectid ||
8129 chunk_rec->type_flags != block_group_rec->flags)) {
8132 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8133 chunk_rec->objectid,
8138 chunk_rec->type_flags,
8139 block_group_rec->objectid,
8140 block_group_rec->type,
8141 block_group_rec->offset,
8142 block_group_rec->offset,
8143 block_group_rec->objectid,
8144 block_group_rec->flags);
8147 list_del_init(&block_group_rec->list);
8148 chunk_rec->bg_rec = block_group_rec;
8153 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8154 chunk_rec->objectid,
8159 chunk_rec->type_flags);
8166 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8167 chunk_rec->num_stripes);
8168 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8169 devid = chunk_rec->stripes[i].devid;
8170 offset = chunk_rec->stripes[i].offset;
8171 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8172 devid, offset, length);
8173 if (dev_extent_item) {
8174 dev_extent_rec = container_of(dev_extent_item,
8175 struct device_extent_record,
8177 if (dev_extent_rec->objectid != devid ||
8178 dev_extent_rec->offset != offset ||
8179 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8180 dev_extent_rec->length != length) {
8183 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8184 chunk_rec->objectid,
8187 chunk_rec->stripes[i].devid,
8188 chunk_rec->stripes[i].offset,
8189 dev_extent_rec->objectid,
8190 dev_extent_rec->offset,
8191 dev_extent_rec->length);
8194 list_move(&dev_extent_rec->chunk_list,
8195 &chunk_rec->dextents);
8200 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8201 chunk_rec->objectid,
8204 chunk_rec->stripes[i].devid,
8205 chunk_rec->stripes[i].offset);
8212 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8213 int check_chunks(struct cache_tree *chunk_cache,
8214 struct block_group_tree *block_group_cache,
8215 struct device_extent_tree *dev_extent_cache,
8216 struct list_head *good, struct list_head *bad,
8217 struct list_head *rebuild, int silent)
8219 struct cache_extent *chunk_item;
8220 struct chunk_record *chunk_rec;
8221 struct block_group_record *bg_rec;
8222 struct device_extent_record *dext_rec;
8226 chunk_item = first_cache_extent(chunk_cache);
8227 while (chunk_item) {
8228 chunk_rec = container_of(chunk_item, struct chunk_record,
8230 err = check_chunk_refs(chunk_rec, block_group_cache,
8231 dev_extent_cache, silent);
8234 if (err == 0 && good)
8235 list_add_tail(&chunk_rec->list, good);
8236 if (err > 0 && rebuild)
8237 list_add_tail(&chunk_rec->list, rebuild);
8239 list_add_tail(&chunk_rec->list, bad);
8240 chunk_item = next_cache_extent(chunk_item);
8243 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8246 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8254 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8258 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8269 static int check_device_used(struct device_record *dev_rec,
8270 struct device_extent_tree *dext_cache)
8272 struct cache_extent *cache;
8273 struct device_extent_record *dev_extent_rec;
8276 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8278 dev_extent_rec = container_of(cache,
8279 struct device_extent_record,
8281 if (dev_extent_rec->objectid != dev_rec->devid)
8284 list_del_init(&dev_extent_rec->device_list);
8285 total_byte += dev_extent_rec->length;
8286 cache = next_cache_extent(cache);
8289 if (total_byte != dev_rec->byte_used) {
8291 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8292 total_byte, dev_rec->byte_used, dev_rec->objectid,
8293 dev_rec->type, dev_rec->offset);
8300 /* check btrfs_dev_item -> btrfs_dev_extent */
8301 static int check_devices(struct rb_root *dev_cache,
8302 struct device_extent_tree *dev_extent_cache)
8304 struct rb_node *dev_node;
8305 struct device_record *dev_rec;
8306 struct device_extent_record *dext_rec;
8310 dev_node = rb_first(dev_cache);
8312 dev_rec = container_of(dev_node, struct device_record, node);
8313 err = check_device_used(dev_rec, dev_extent_cache);
8317 dev_node = rb_next(dev_node);
8319 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8322 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8323 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8330 static int add_root_item_to_list(struct list_head *head,
8331 u64 objectid, u64 bytenr, u64 last_snapshot,
8332 u8 level, u8 drop_level,
8333 int level_size, struct btrfs_key *drop_key)
8336 struct root_item_record *ri_rec;
8337 ri_rec = malloc(sizeof(*ri_rec));
8340 ri_rec->bytenr = bytenr;
8341 ri_rec->objectid = objectid;
8342 ri_rec->level = level;
8343 ri_rec->level_size = level_size;
8344 ri_rec->drop_level = drop_level;
8345 ri_rec->last_snapshot = last_snapshot;
8347 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8348 list_add_tail(&ri_rec->list, head);
8353 static void free_root_item_list(struct list_head *list)
8355 struct root_item_record *ri_rec;
8357 while (!list_empty(list)) {
8358 ri_rec = list_first_entry(list, struct root_item_record,
8360 list_del_init(&ri_rec->list);
8365 static int deal_root_from_list(struct list_head *list,
8366 struct btrfs_root *root,
8367 struct block_info *bits,
8369 struct cache_tree *pending,
8370 struct cache_tree *seen,
8371 struct cache_tree *reada,
8372 struct cache_tree *nodes,
8373 struct cache_tree *extent_cache,
8374 struct cache_tree *chunk_cache,
8375 struct rb_root *dev_cache,
8376 struct block_group_tree *block_group_cache,
8377 struct device_extent_tree *dev_extent_cache)
8382 while (!list_empty(list)) {
8383 struct root_item_record *rec;
8384 struct extent_buffer *buf;
8385 rec = list_entry(list->next,
8386 struct root_item_record, list);
8388 buf = read_tree_block(root->fs_info->tree_root,
8389 rec->bytenr, rec->level_size, 0);
8390 if (!extent_buffer_uptodate(buf)) {
8391 free_extent_buffer(buf);
8395 ret = add_root_to_pending(buf, extent_cache, pending,
8396 seen, nodes, rec->objectid);
8400 * To rebuild extent tree, we need deal with snapshot
8401 * one by one, otherwise we deal with node firstly which
8402 * can maximize readahead.
8405 ret = run_next_block(root, bits, bits_nr, &last,
8406 pending, seen, reada, nodes,
8407 extent_cache, chunk_cache,
8408 dev_cache, block_group_cache,
8409 dev_extent_cache, rec);
8413 free_extent_buffer(buf);
8414 list_del(&rec->list);
8420 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8421 reada, nodes, extent_cache, chunk_cache,
8422 dev_cache, block_group_cache,
8423 dev_extent_cache, NULL);
8433 static int check_chunks_and_extents(struct btrfs_root *root)
8435 struct rb_root dev_cache;
8436 struct cache_tree chunk_cache;
8437 struct block_group_tree block_group_cache;
8438 struct device_extent_tree dev_extent_cache;
8439 struct cache_tree extent_cache;
8440 struct cache_tree seen;
8441 struct cache_tree pending;
8442 struct cache_tree reada;
8443 struct cache_tree nodes;
8444 struct extent_io_tree excluded_extents;
8445 struct cache_tree corrupt_blocks;
8446 struct btrfs_path path;
8447 struct btrfs_key key;
8448 struct btrfs_key found_key;
8450 struct block_info *bits;
8452 struct extent_buffer *leaf;
8454 struct btrfs_root_item ri;
8455 struct list_head dropping_trees;
8456 struct list_head normal_trees;
8457 struct btrfs_root *root1;
8462 dev_cache = RB_ROOT;
8463 cache_tree_init(&chunk_cache);
8464 block_group_tree_init(&block_group_cache);
8465 device_extent_tree_init(&dev_extent_cache);
8467 cache_tree_init(&extent_cache);
8468 cache_tree_init(&seen);
8469 cache_tree_init(&pending);
8470 cache_tree_init(&nodes);
8471 cache_tree_init(&reada);
8472 cache_tree_init(&corrupt_blocks);
8473 extent_io_tree_init(&excluded_extents);
8474 INIT_LIST_HEAD(&dropping_trees);
8475 INIT_LIST_HEAD(&normal_trees);
8478 root->fs_info->excluded_extents = &excluded_extents;
8479 root->fs_info->fsck_extent_cache = &extent_cache;
8480 root->fs_info->free_extent_hook = free_extent_hook;
8481 root->fs_info->corrupt_blocks = &corrupt_blocks;
8485 bits = malloc(bits_nr * sizeof(struct block_info));
8491 if (ctx.progress_enabled) {
8492 ctx.tp = TASK_EXTENTS;
8493 task_start(ctx.info);
8497 root1 = root->fs_info->tree_root;
8498 level = btrfs_header_level(root1->node);
8499 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8500 root1->node->start, 0, level, 0,
8501 root1->nodesize, NULL);
8504 root1 = root->fs_info->chunk_root;
8505 level = btrfs_header_level(root1->node);
8506 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8507 root1->node->start, 0, level, 0,
8508 root1->nodesize, NULL);
8511 btrfs_init_path(&path);
8514 key.type = BTRFS_ROOT_ITEM_KEY;
8515 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8520 leaf = path.nodes[0];
8521 slot = path.slots[0];
8522 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8523 ret = btrfs_next_leaf(root, &path);
8526 leaf = path.nodes[0];
8527 slot = path.slots[0];
8529 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8530 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8531 unsigned long offset;
8534 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8535 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8536 last_snapshot = btrfs_root_last_snapshot(&ri);
8537 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8538 level = btrfs_root_level(&ri);
8539 level_size = root->nodesize;
8540 ret = add_root_item_to_list(&normal_trees,
8542 btrfs_root_bytenr(&ri),
8543 last_snapshot, level,
8544 0, level_size, NULL);
8548 level = btrfs_root_level(&ri);
8549 level_size = root->nodesize;
8550 objectid = found_key.objectid;
8551 btrfs_disk_key_to_cpu(&found_key,
8553 ret = add_root_item_to_list(&dropping_trees,
8555 btrfs_root_bytenr(&ri),
8556 last_snapshot, level,
8558 level_size, &found_key);
8565 btrfs_release_path(&path);
8568 * check_block can return -EAGAIN if it fixes something, please keep
8569 * this in mind when dealing with return values from these functions, if
8570 * we get -EAGAIN we want to fall through and restart the loop.
8572 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8573 &seen, &reada, &nodes, &extent_cache,
8574 &chunk_cache, &dev_cache, &block_group_cache,
8581 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8582 &pending, &seen, &reada, &nodes,
8583 &extent_cache, &chunk_cache, &dev_cache,
8584 &block_group_cache, &dev_extent_cache);
8591 ret = check_chunks(&chunk_cache, &block_group_cache,
8592 &dev_extent_cache, NULL, NULL, NULL, 0);
8599 ret = check_extent_refs(root, &extent_cache);
8606 ret = check_devices(&dev_cache, &dev_extent_cache);
8611 task_stop(ctx.info);
8613 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8614 extent_io_tree_cleanup(&excluded_extents);
8615 root->fs_info->fsck_extent_cache = NULL;
8616 root->fs_info->free_extent_hook = NULL;
8617 root->fs_info->corrupt_blocks = NULL;
8618 root->fs_info->excluded_extents = NULL;
8621 free_chunk_cache_tree(&chunk_cache);
8622 free_device_cache_tree(&dev_cache);
8623 free_block_group_tree(&block_group_cache);
8624 free_device_extent_tree(&dev_extent_cache);
8625 free_extent_cache_tree(&seen);
8626 free_extent_cache_tree(&pending);
8627 free_extent_cache_tree(&reada);
8628 free_extent_cache_tree(&nodes);
8631 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8632 free_extent_cache_tree(&seen);
8633 free_extent_cache_tree(&pending);
8634 free_extent_cache_tree(&reada);
8635 free_extent_cache_tree(&nodes);
8636 free_chunk_cache_tree(&chunk_cache);
8637 free_block_group_tree(&block_group_cache);
8638 free_device_cache_tree(&dev_cache);
8639 free_device_extent_tree(&dev_extent_cache);
8640 free_extent_record_cache(root->fs_info, &extent_cache);
8641 free_root_item_list(&normal_trees);
8642 free_root_item_list(&dropping_trees);
8643 extent_io_tree_cleanup(&excluded_extents);
8648 * Check backrefs of a tree block given by @bytenr or @eb.
8650 * @root: the root containing the @bytenr or @eb
8651 * @eb: tree block extent buffer, can be NULL
8652 * @bytenr: bytenr of the tree block to search
8653 * @level: tree level of the tree block
8654 * @owner: owner of the tree block
8656 * Return >0 for any error found and output error message
8657 * Return 0 for no error found
8659 static int check_tree_block_ref(struct btrfs_root *root,
8660 struct extent_buffer *eb, u64 bytenr,
8661 int level, u64 owner)
8663 struct btrfs_key key;
8664 struct btrfs_root *extent_root = root->fs_info->extent_root;
8665 struct btrfs_path path;
8666 struct btrfs_extent_item *ei;
8667 struct btrfs_extent_inline_ref *iref;
8668 struct extent_buffer *leaf;
8674 u32 nodesize = root->nodesize;
8681 btrfs_init_path(&path);
8682 key.objectid = bytenr;
8683 if (btrfs_fs_incompat(root->fs_info,
8684 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8685 key.type = BTRFS_METADATA_ITEM_KEY;
8687 key.type = BTRFS_EXTENT_ITEM_KEY;
8688 key.offset = (u64)-1;
8690 /* Search for the backref in extent tree */
8691 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8693 err |= BACKREF_MISSING;
8696 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8698 err |= BACKREF_MISSING;
8702 leaf = path.nodes[0];
8703 slot = path.slots[0];
8704 btrfs_item_key_to_cpu(leaf, &key, slot);
8706 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8708 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8709 skinny_level = (int)key.offset;
8710 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8712 struct btrfs_tree_block_info *info;
8714 info = (struct btrfs_tree_block_info *)(ei + 1);
8715 skinny_level = btrfs_tree_block_level(leaf, info);
8716 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8723 if (!(btrfs_extent_flags(leaf, ei) &
8724 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8726 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8727 key.objectid, nodesize,
8728 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8729 err = BACKREF_MISMATCH;
8731 header_gen = btrfs_header_generation(eb);
8732 extent_gen = btrfs_extent_generation(leaf, ei);
8733 if (header_gen != extent_gen) {
8735 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8736 key.objectid, nodesize, header_gen,
8738 err = BACKREF_MISMATCH;
8740 if (level != skinny_level) {
8742 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8743 key.objectid, nodesize, level, skinny_level);
8744 err = BACKREF_MISMATCH;
8746 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8748 "extent[%llu %u] is referred by other roots than %llu",
8749 key.objectid, nodesize, root->objectid);
8750 err = BACKREF_MISMATCH;
8755 * Iterate the extent/metadata item to find the exact backref
8757 item_size = btrfs_item_size_nr(leaf, slot);
8758 ptr = (unsigned long)iref;
8759 end = (unsigned long)ei + item_size;
8761 iref = (struct btrfs_extent_inline_ref *)ptr;
8762 type = btrfs_extent_inline_ref_type(leaf, iref);
8763 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8765 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8766 (offset == root->objectid || offset == owner)) {
8768 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8769 /* Check if the backref points to valid referencer */
8770 found_ref = !check_tree_block_ref(root, NULL, offset,
8776 ptr += btrfs_extent_inline_ref_size(type);
8780 * Inlined extent item doesn't have what we need, check
8781 * TREE_BLOCK_REF_KEY
8784 btrfs_release_path(&path);
8785 key.objectid = bytenr;
8786 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8787 key.offset = root->objectid;
8789 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8794 err |= BACKREF_MISSING;
8796 btrfs_release_path(&path);
8797 if (eb && (err & BACKREF_MISSING))
8798 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8799 bytenr, nodesize, owner, level);
8804 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8806 * Return >0 any error found and output error message
8807 * Return 0 for no error found
8809 static int check_extent_data_item(struct btrfs_root *root,
8810 struct extent_buffer *eb, int slot)
8812 struct btrfs_file_extent_item *fi;
8813 struct btrfs_path path;
8814 struct btrfs_root *extent_root = root->fs_info->extent_root;
8815 struct btrfs_key fi_key;
8816 struct btrfs_key dbref_key;
8817 struct extent_buffer *leaf;
8818 struct btrfs_extent_item *ei;
8819 struct btrfs_extent_inline_ref *iref;
8820 struct btrfs_extent_data_ref *dref;
8822 u64 file_extent_gen;
8825 u64 extent_num_bytes;
8833 int found_dbackref = 0;
8837 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8838 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8839 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8841 /* Nothing to check for hole and inline data extents */
8842 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8843 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8846 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8847 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8848 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8850 /* Check unaligned disk_num_bytes and num_bytes */
8851 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8853 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8854 fi_key.objectid, fi_key.offset, disk_num_bytes,
8856 err |= BYTES_UNALIGNED;
8858 data_bytes_allocated += disk_num_bytes;
8860 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8862 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8863 fi_key.objectid, fi_key.offset, extent_num_bytes,
8865 err |= BYTES_UNALIGNED;
8867 data_bytes_referenced += extent_num_bytes;
8869 owner = btrfs_header_owner(eb);
8871 /* Check the extent item of the file extent in extent tree */
8872 btrfs_init_path(&path);
8873 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8874 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8875 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8877 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8879 err |= BACKREF_MISSING;
8883 leaf = path.nodes[0];
8884 slot = path.slots[0];
8885 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8887 extent_flags = btrfs_extent_flags(leaf, ei);
8888 extent_gen = btrfs_extent_generation(leaf, ei);
8890 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8892 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8893 disk_bytenr, disk_num_bytes,
8894 BTRFS_EXTENT_FLAG_DATA);
8895 err |= BACKREF_MISMATCH;
8898 if (file_extent_gen < extent_gen) {
8900 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8901 disk_bytenr, disk_num_bytes, file_extent_gen,
8903 err |= BACKREF_MISMATCH;
8906 /* Check data backref inside that extent item */
8907 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8908 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8909 ptr = (unsigned long)iref;
8910 end = (unsigned long)ei + item_size;
8912 iref = (struct btrfs_extent_inline_ref *)ptr;
8913 type = btrfs_extent_inline_ref_type(leaf, iref);
8914 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8916 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8917 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8918 if (ref_root == owner || ref_root == root->objectid)
8920 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8921 found_dbackref = !check_tree_block_ref(root, NULL,
8922 btrfs_extent_inline_ref_offset(leaf, iref),
8928 ptr += btrfs_extent_inline_ref_size(type);
8931 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8932 if (!found_dbackref) {
8933 btrfs_release_path(&path);
8935 btrfs_init_path(&path);
8936 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8937 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8938 dbref_key.offset = hash_extent_data_ref(root->objectid,
8939 fi_key.objectid, fi_key.offset);
8941 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8942 &dbref_key, &path, 0, 0);
8947 if (!found_dbackref)
8948 err |= BACKREF_MISSING;
8950 btrfs_release_path(&path);
8951 if (err & BACKREF_MISSING) {
8952 error("data extent[%llu %llu] backref lost",
8953 disk_bytenr, disk_num_bytes);
8959 * Get real tree block level for the case like shared block
8960 * Return >= 0 as tree level
8961 * Return <0 for error
8963 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8965 struct extent_buffer *eb;
8966 struct btrfs_path path;
8967 struct btrfs_key key;
8968 struct btrfs_extent_item *ei;
8971 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8976 /* Search extent tree for extent generation and level */
8977 key.objectid = bytenr;
8978 key.type = BTRFS_METADATA_ITEM_KEY;
8979 key.offset = (u64)-1;
8981 btrfs_init_path(&path);
8982 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8985 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8993 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8994 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8995 struct btrfs_extent_item);
8996 flags = btrfs_extent_flags(path.nodes[0], ei);
8997 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9002 /* Get transid for later read_tree_block() check */
9003 transid = btrfs_extent_generation(path.nodes[0], ei);
9005 /* Get backref level as one source */
9006 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9007 backref_level = key.offset;
9009 struct btrfs_tree_block_info *info;
9011 info = (struct btrfs_tree_block_info *)(ei + 1);
9012 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9014 btrfs_release_path(&path);
9016 /* Get level from tree block as an alternative source */
9017 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9018 if (!extent_buffer_uptodate(eb)) {
9019 free_extent_buffer(eb);
9022 header_level = btrfs_header_level(eb);
9023 free_extent_buffer(eb);
9025 if (header_level != backref_level)
9027 return header_level;
9030 btrfs_release_path(&path);
9035 * Check if a tree block backref is valid (points to a valid tree block)
9036 * if level == -1, level will be resolved
9037 * Return >0 for any error found and print error message
9039 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9040 u64 bytenr, int level)
9042 struct btrfs_root *root;
9043 struct btrfs_key key;
9044 struct btrfs_path path;
9045 struct extent_buffer *eb;
9046 struct extent_buffer *node;
9047 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9051 /* Query level for level == -1 special case */
9053 level = query_tree_block_level(fs_info, bytenr);
9055 err |= REFERENCER_MISSING;
9059 key.objectid = root_id;
9060 key.type = BTRFS_ROOT_ITEM_KEY;
9061 key.offset = (u64)-1;
9063 root = btrfs_read_fs_root(fs_info, &key);
9065 err |= REFERENCER_MISSING;
9069 /* Read out the tree block to get item/node key */
9070 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9071 if (!extent_buffer_uptodate(eb)) {
9072 err |= REFERENCER_MISSING;
9073 free_extent_buffer(eb);
9077 /* Empty tree, no need to check key */
9078 if (!btrfs_header_nritems(eb) && !level) {
9079 free_extent_buffer(eb);
9084 btrfs_node_key_to_cpu(eb, &key, 0);
9086 btrfs_item_key_to_cpu(eb, &key, 0);
9088 free_extent_buffer(eb);
9090 btrfs_init_path(&path);
9091 path.lowest_level = level;
9092 /* Search with the first key, to ensure we can reach it */
9093 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9095 err |= REFERENCER_MISSING;
9099 node = path.nodes[level];
9100 if (btrfs_header_bytenr(node) != bytenr) {
9102 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9103 bytenr, nodesize, bytenr,
9104 btrfs_header_bytenr(node));
9105 err |= REFERENCER_MISMATCH;
9107 if (btrfs_header_level(node) != level) {
9109 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9110 bytenr, nodesize, level,
9111 btrfs_header_level(node));
9112 err |= REFERENCER_MISMATCH;
9116 btrfs_release_path(&path);
9118 if (err & REFERENCER_MISSING) {
9120 error("extent [%llu %d] lost referencer (owner: %llu)",
9121 bytenr, nodesize, root_id);
9124 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9125 bytenr, nodesize, root_id, level);
9132 * Check referencer for shared block backref
9133 * If level == -1, this function will resolve the level.
9135 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9136 u64 parent, u64 bytenr, int level)
9138 struct extent_buffer *eb;
9139 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9141 int found_parent = 0;
9144 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9145 if (!extent_buffer_uptodate(eb))
9149 level = query_tree_block_level(fs_info, bytenr);
9153 if (level + 1 != btrfs_header_level(eb))
9156 nr = btrfs_header_nritems(eb);
9157 for (i = 0; i < nr; i++) {
9158 if (bytenr == btrfs_node_blockptr(eb, i)) {
9164 free_extent_buffer(eb);
9165 if (!found_parent) {
9167 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9168 bytenr, nodesize, parent, level);
9169 return REFERENCER_MISSING;
9175 * Check referencer for normal (inlined) data ref
9176 * If len == 0, it will be resolved by searching in extent tree
9178 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9179 u64 root_id, u64 objectid, u64 offset,
9180 u64 bytenr, u64 len, u32 count)
9182 struct btrfs_root *root;
9183 struct btrfs_root *extent_root = fs_info->extent_root;
9184 struct btrfs_key key;
9185 struct btrfs_path path;
9186 struct extent_buffer *leaf;
9187 struct btrfs_file_extent_item *fi;
9188 u32 found_count = 0;
9193 key.objectid = bytenr;
9194 key.type = BTRFS_EXTENT_ITEM_KEY;
9195 key.offset = (u64)-1;
9197 btrfs_init_path(&path);
9198 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9201 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9204 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9205 if (key.objectid != bytenr ||
9206 key.type != BTRFS_EXTENT_ITEM_KEY)
9209 btrfs_release_path(&path);
9211 key.objectid = root_id;
9212 key.type = BTRFS_ROOT_ITEM_KEY;
9213 key.offset = (u64)-1;
9214 btrfs_init_path(&path);
9216 root = btrfs_read_fs_root(fs_info, &key);
9220 key.objectid = objectid;
9221 key.type = BTRFS_EXTENT_DATA_KEY;
9223 * It can be nasty as data backref offset is
9224 * file offset - file extent offset, which is smaller or
9225 * equal to original backref offset. The only special case is
9226 * overflow. So we need to special check and do further search.
9228 key.offset = offset & (1ULL << 63) ? 0 : offset;
9230 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9235 * Search afterwards to get correct one
9236 * NOTE: As we must do a comprehensive check on the data backref to
9237 * make sure the dref count also matches, we must iterate all file
9238 * extents for that inode.
9241 leaf = path.nodes[0];
9242 slot = path.slots[0];
9244 btrfs_item_key_to_cpu(leaf, &key, slot);
9245 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9247 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9249 * Except normal disk bytenr and disk num bytes, we still
9250 * need to do extra check on dbackref offset as
9251 * dbackref offset = file_offset - file_extent_offset
9253 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9254 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9255 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9259 ret = btrfs_next_item(root, &path);
9264 btrfs_release_path(&path);
9265 if (found_count != count) {
9267 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9268 bytenr, len, root_id, objectid, offset, count, found_count);
9269 return REFERENCER_MISSING;
9275 * Check if the referencer of a shared data backref exists
9277 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9278 u64 parent, u64 bytenr)
9280 struct extent_buffer *eb;
9281 struct btrfs_key key;
9282 struct btrfs_file_extent_item *fi;
9283 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9285 int found_parent = 0;
9288 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9289 if (!extent_buffer_uptodate(eb))
9292 nr = btrfs_header_nritems(eb);
9293 for (i = 0; i < nr; i++) {
9294 btrfs_item_key_to_cpu(eb, &key, i);
9295 if (key.type != BTRFS_EXTENT_DATA_KEY)
9298 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9299 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9302 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9309 free_extent_buffer(eb);
9310 if (!found_parent) {
9311 error("shared extent %llu referencer lost (parent: %llu)",
9313 return REFERENCER_MISSING;
9319 * This function will check a given extent item, including its backref and
9320 * itself (like crossing stripe boundary and type)
9322 * Since we don't use extent_record anymore, introduce new error bit
9324 static int check_extent_item(struct btrfs_fs_info *fs_info,
9325 struct extent_buffer *eb, int slot)
9327 struct btrfs_extent_item *ei;
9328 struct btrfs_extent_inline_ref *iref;
9329 struct btrfs_extent_data_ref *dref;
9333 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9334 u32 item_size = btrfs_item_size_nr(eb, slot);
9339 struct btrfs_key key;
9343 btrfs_item_key_to_cpu(eb, &key, slot);
9344 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9345 bytes_used += key.offset;
9347 bytes_used += nodesize;
9349 if (item_size < sizeof(*ei)) {
9351 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9352 * old thing when on disk format is still un-determined.
9353 * No need to care about it anymore
9355 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9359 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9360 flags = btrfs_extent_flags(eb, ei);
9362 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9364 if (metadata && check_crossing_stripes(global_info, key.objectid,
9366 error("bad metadata [%llu, %llu) crossing stripe boundary",
9367 key.objectid, key.objectid + nodesize);
9368 err |= CROSSING_STRIPE_BOUNDARY;
9371 ptr = (unsigned long)(ei + 1);
9373 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9374 /* Old EXTENT_ITEM metadata */
9375 struct btrfs_tree_block_info *info;
9377 info = (struct btrfs_tree_block_info *)ptr;
9378 level = btrfs_tree_block_level(eb, info);
9379 ptr += sizeof(struct btrfs_tree_block_info);
9381 /* New METADATA_ITEM */
9384 end = (unsigned long)ei + item_size;
9387 err |= ITEM_SIZE_MISMATCH;
9391 /* Now check every backref in this extent item */
9393 iref = (struct btrfs_extent_inline_ref *)ptr;
9394 type = btrfs_extent_inline_ref_type(eb, iref);
9395 offset = btrfs_extent_inline_ref_offset(eb, iref);
9397 case BTRFS_TREE_BLOCK_REF_KEY:
9398 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9402 case BTRFS_SHARED_BLOCK_REF_KEY:
9403 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9407 case BTRFS_EXTENT_DATA_REF_KEY:
9408 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9409 ret = check_extent_data_backref(fs_info,
9410 btrfs_extent_data_ref_root(eb, dref),
9411 btrfs_extent_data_ref_objectid(eb, dref),
9412 btrfs_extent_data_ref_offset(eb, dref),
9413 key.objectid, key.offset,
9414 btrfs_extent_data_ref_count(eb, dref));
9417 case BTRFS_SHARED_DATA_REF_KEY:
9418 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9422 error("extent[%llu %d %llu] has unknown ref type: %d",
9423 key.objectid, key.type, key.offset, type);
9424 err |= UNKNOWN_TYPE;
9428 ptr += btrfs_extent_inline_ref_size(type);
9437 * Check if a dev extent item is referred correctly by its chunk
9439 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9440 struct extent_buffer *eb, int slot)
9442 struct btrfs_root *chunk_root = fs_info->chunk_root;
9443 struct btrfs_dev_extent *ptr;
9444 struct btrfs_path path;
9445 struct btrfs_key chunk_key;
9446 struct btrfs_key devext_key;
9447 struct btrfs_chunk *chunk;
9448 struct extent_buffer *l;
9452 int found_chunk = 0;
9455 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9456 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9457 length = btrfs_dev_extent_length(eb, ptr);
9459 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9460 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9461 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9463 btrfs_init_path(&path);
9464 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9469 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9470 if (btrfs_chunk_length(l, chunk) != length)
9473 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9474 for (i = 0; i < num_stripes; i++) {
9475 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9476 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9478 if (devid == devext_key.objectid &&
9479 offset == devext_key.offset) {
9485 btrfs_release_path(&path);
9488 "device extent[%llu, %llu, %llu] did not find the related chunk",
9489 devext_key.objectid, devext_key.offset, length);
9490 return REFERENCER_MISSING;
9496 * Check if the used space is correct with the dev item
9498 static int check_dev_item(struct btrfs_fs_info *fs_info,
9499 struct extent_buffer *eb, int slot)
9501 struct btrfs_root *dev_root = fs_info->dev_root;
9502 struct btrfs_dev_item *dev_item;
9503 struct btrfs_path path;
9504 struct btrfs_key key;
9505 struct btrfs_dev_extent *ptr;
9511 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9512 dev_id = btrfs_device_id(eb, dev_item);
9513 used = btrfs_device_bytes_used(eb, dev_item);
9515 key.objectid = dev_id;
9516 key.type = BTRFS_DEV_EXTENT_KEY;
9519 btrfs_init_path(&path);
9520 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9522 btrfs_item_key_to_cpu(eb, &key, slot);
9523 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9524 key.objectid, key.type, key.offset);
9525 btrfs_release_path(&path);
9526 return REFERENCER_MISSING;
9529 /* Iterate dev_extents to calculate the used space of a device */
9531 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9533 if (key.objectid > dev_id)
9535 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9538 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9539 struct btrfs_dev_extent);
9540 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9542 ret = btrfs_next_item(dev_root, &path);
9546 btrfs_release_path(&path);
9548 if (used != total) {
9549 btrfs_item_key_to_cpu(eb, &key, slot);
9551 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9552 total, used, BTRFS_ROOT_TREE_OBJECTID,
9553 BTRFS_DEV_EXTENT_KEY, dev_id);
9554 return ACCOUNTING_MISMATCH;
9560 * Check a block group item with its referener (chunk) and its used space
9561 * with extent/metadata item
9563 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9564 struct extent_buffer *eb, int slot)
9566 struct btrfs_root *extent_root = fs_info->extent_root;
9567 struct btrfs_root *chunk_root = fs_info->chunk_root;
9568 struct btrfs_block_group_item *bi;
9569 struct btrfs_block_group_item bg_item;
9570 struct btrfs_path path;
9571 struct btrfs_key bg_key;
9572 struct btrfs_key chunk_key;
9573 struct btrfs_key extent_key;
9574 struct btrfs_chunk *chunk;
9575 struct extent_buffer *leaf;
9576 struct btrfs_extent_item *ei;
9577 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9585 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9586 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9587 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9588 used = btrfs_block_group_used(&bg_item);
9589 bg_flags = btrfs_block_group_flags(&bg_item);
9591 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9592 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9593 chunk_key.offset = bg_key.objectid;
9595 btrfs_init_path(&path);
9596 /* Search for the referencer chunk */
9597 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9600 "block group[%llu %llu] did not find the related chunk item",
9601 bg_key.objectid, bg_key.offset);
9602 err |= REFERENCER_MISSING;
9604 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9605 struct btrfs_chunk);
9606 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9609 "block group[%llu %llu] related chunk item length does not match",
9610 bg_key.objectid, bg_key.offset);
9611 err |= REFERENCER_MISMATCH;
9614 btrfs_release_path(&path);
9616 /* Search from the block group bytenr */
9617 extent_key.objectid = bg_key.objectid;
9618 extent_key.type = 0;
9619 extent_key.offset = 0;
9621 btrfs_init_path(&path);
9622 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9626 /* Iterate extent tree to account used space */
9628 leaf = path.nodes[0];
9629 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9630 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9633 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9634 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9636 if (extent_key.objectid < bg_key.objectid)
9639 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9642 total += extent_key.offset;
9644 ei = btrfs_item_ptr(leaf, path.slots[0],
9645 struct btrfs_extent_item);
9646 flags = btrfs_extent_flags(leaf, ei);
9647 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9648 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9650 "bad extent[%llu, %llu) type mismatch with chunk",
9651 extent_key.objectid,
9652 extent_key.objectid + extent_key.offset);
9653 err |= CHUNK_TYPE_MISMATCH;
9655 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9656 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9657 BTRFS_BLOCK_GROUP_METADATA))) {
9659 "bad extent[%llu, %llu) type mismatch with chunk",
9660 extent_key.objectid,
9661 extent_key.objectid + nodesize);
9662 err |= CHUNK_TYPE_MISMATCH;
9666 ret = btrfs_next_item(extent_root, &path);
9672 btrfs_release_path(&path);
9674 if (total != used) {
9676 "block group[%llu %llu] used %llu but extent items used %llu",
9677 bg_key.objectid, bg_key.offset, used, total);
9678 err |= ACCOUNTING_MISMATCH;
9684 * Check a chunk item.
9685 * Including checking all referred dev_extents and block group
9687 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9688 struct extent_buffer *eb, int slot)
9690 struct btrfs_root *extent_root = fs_info->extent_root;
9691 struct btrfs_root *dev_root = fs_info->dev_root;
9692 struct btrfs_path path;
9693 struct btrfs_key chunk_key;
9694 struct btrfs_key bg_key;
9695 struct btrfs_key devext_key;
9696 struct btrfs_chunk *chunk;
9697 struct extent_buffer *leaf;
9698 struct btrfs_block_group_item *bi;
9699 struct btrfs_block_group_item bg_item;
9700 struct btrfs_dev_extent *ptr;
9701 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9713 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9714 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9715 length = btrfs_chunk_length(eb, chunk);
9716 chunk_end = chunk_key.offset + length;
9717 if (!IS_ALIGNED(length, sectorsize)) {
9718 error("chunk[%llu %llu) not aligned to %u",
9719 chunk_key.offset, chunk_end, sectorsize);
9720 err |= BYTES_UNALIGNED;
9724 type = btrfs_chunk_type(eb, chunk);
9725 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9726 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9727 error("chunk[%llu %llu) has no chunk type",
9728 chunk_key.offset, chunk_end);
9729 err |= UNKNOWN_TYPE;
9731 if (profile && (profile & (profile - 1))) {
9732 error("chunk[%llu %llu) multiple profiles detected: %llx",
9733 chunk_key.offset, chunk_end, profile);
9734 err |= UNKNOWN_TYPE;
9737 bg_key.objectid = chunk_key.offset;
9738 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9739 bg_key.offset = length;
9741 btrfs_init_path(&path);
9742 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9745 "chunk[%llu %llu) did not find the related block group item",
9746 chunk_key.offset, chunk_end);
9747 err |= REFERENCER_MISSING;
9749 leaf = path.nodes[0];
9750 bi = btrfs_item_ptr(leaf, path.slots[0],
9751 struct btrfs_block_group_item);
9752 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9754 if (btrfs_block_group_flags(&bg_item) != type) {
9756 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9757 chunk_key.offset, chunk_end, type,
9758 btrfs_block_group_flags(&bg_item));
9759 err |= REFERENCER_MISSING;
9763 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9764 for (i = 0; i < num_stripes; i++) {
9765 btrfs_release_path(&path);
9766 btrfs_init_path(&path);
9767 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9768 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9769 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9771 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9776 leaf = path.nodes[0];
9777 ptr = btrfs_item_ptr(leaf, path.slots[0],
9778 struct btrfs_dev_extent);
9779 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9780 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9781 if (objectid != chunk_key.objectid ||
9782 offset != chunk_key.offset ||
9783 btrfs_dev_extent_length(leaf, ptr) != length)
9787 err |= BACKREF_MISSING;
9789 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9790 chunk_key.objectid, chunk_end, i);
9793 btrfs_release_path(&path);
9799 * Main entry function to check known items and update related accounting info
9801 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9803 struct btrfs_fs_info *fs_info = root->fs_info;
9804 struct btrfs_key key;
9807 struct btrfs_extent_data_ref *dref;
9812 btrfs_item_key_to_cpu(eb, &key, slot);
9816 case BTRFS_EXTENT_DATA_KEY:
9817 ret = check_extent_data_item(root, eb, slot);
9820 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9821 ret = check_block_group_item(fs_info, eb, slot);
9824 case BTRFS_DEV_ITEM_KEY:
9825 ret = check_dev_item(fs_info, eb, slot);
9828 case BTRFS_CHUNK_ITEM_KEY:
9829 ret = check_chunk_item(fs_info, eb, slot);
9832 case BTRFS_DEV_EXTENT_KEY:
9833 ret = check_dev_extent_item(fs_info, eb, slot);
9836 case BTRFS_EXTENT_ITEM_KEY:
9837 case BTRFS_METADATA_ITEM_KEY:
9838 ret = check_extent_item(fs_info, eb, slot);
9841 case BTRFS_EXTENT_CSUM_KEY:
9842 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9844 case BTRFS_TREE_BLOCK_REF_KEY:
9845 ret = check_tree_block_backref(fs_info, key.offset,
9849 case BTRFS_EXTENT_DATA_REF_KEY:
9850 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9851 ret = check_extent_data_backref(fs_info,
9852 btrfs_extent_data_ref_root(eb, dref),
9853 btrfs_extent_data_ref_objectid(eb, dref),
9854 btrfs_extent_data_ref_offset(eb, dref),
9856 btrfs_extent_data_ref_count(eb, dref));
9859 case BTRFS_SHARED_BLOCK_REF_KEY:
9860 ret = check_shared_block_backref(fs_info, key.offset,
9864 case BTRFS_SHARED_DATA_REF_KEY:
9865 ret = check_shared_data_backref(fs_info, key.offset,
9873 if (++slot < btrfs_header_nritems(eb))
9880 * Helper function for later fs/subvol tree check. To determine if a tree
9881 * block should be checked.
9882 * This function will ensure only the direct referencer with lowest rootid to
9883 * check a fs/subvolume tree block.
9885 * Backref check at extent tree would detect errors like missing subvolume
9886 * tree, so we can do aggressive check to reduce duplicated checks.
9888 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9890 struct btrfs_root *extent_root = root->fs_info->extent_root;
9891 struct btrfs_key key;
9892 struct btrfs_path path;
9893 struct extent_buffer *leaf;
9895 struct btrfs_extent_item *ei;
9901 struct btrfs_extent_inline_ref *iref;
9904 btrfs_init_path(&path);
9905 key.objectid = btrfs_header_bytenr(eb);
9906 key.type = BTRFS_METADATA_ITEM_KEY;
9907 key.offset = (u64)-1;
9910 * Any failure in backref resolving means we can't determine
9911 * whom the tree block belongs to.
9912 * So in that case, we need to check that tree block
9914 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9918 ret = btrfs_previous_extent_item(extent_root, &path,
9919 btrfs_header_bytenr(eb));
9923 leaf = path.nodes[0];
9924 slot = path.slots[0];
9925 btrfs_item_key_to_cpu(leaf, &key, slot);
9926 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9928 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9929 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9931 struct btrfs_tree_block_info *info;
9933 info = (struct btrfs_tree_block_info *)(ei + 1);
9934 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9937 item_size = btrfs_item_size_nr(leaf, slot);
9938 ptr = (unsigned long)iref;
9939 end = (unsigned long)ei + item_size;
9941 iref = (struct btrfs_extent_inline_ref *)ptr;
9942 type = btrfs_extent_inline_ref_type(leaf, iref);
9943 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9946 * We only check the tree block if current root is
9947 * the lowest referencer of it.
9949 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9950 offset < root->objectid) {
9951 btrfs_release_path(&path);
9955 ptr += btrfs_extent_inline_ref_size(type);
9958 * Normally we should also check keyed tree block ref, but that may be
9959 * very time consuming. Inlined ref should already make us skip a lot
9960 * of refs now. So skip search keyed tree block ref.
9964 btrfs_release_path(&path);
9969 * Traversal function for tree block. We will do:
9970 * 1) Skip shared fs/subvolume tree blocks
9971 * 2) Update related bytes accounting
9972 * 3) Pre-order traversal
9974 static int traverse_tree_block(struct btrfs_root *root,
9975 struct extent_buffer *node)
9977 struct extent_buffer *eb;
9978 struct btrfs_key key;
9979 struct btrfs_key drop_key;
9987 * Skip shared fs/subvolume tree block, in that case they will
9988 * be checked by referencer with lowest rootid
9990 if (is_fstree(root->objectid) && !should_check(root, node))
9993 /* Update bytes accounting */
9994 total_btree_bytes += node->len;
9995 if (fs_root_objectid(btrfs_header_owner(node)))
9996 total_fs_tree_bytes += node->len;
9997 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9998 total_extent_tree_bytes += node->len;
9999 if (!found_old_backref &&
10000 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10001 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10002 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10003 found_old_backref = 1;
10005 /* pre-order tranversal, check itself first */
10006 level = btrfs_header_level(node);
10007 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10008 btrfs_header_level(node),
10009 btrfs_header_owner(node));
10013 "check %s failed root %llu bytenr %llu level %d, force continue check",
10014 level ? "node":"leaf", root->objectid,
10015 btrfs_header_bytenr(node), btrfs_header_level(node));
10018 btree_space_waste += btrfs_leaf_free_space(root, node);
10019 ret = check_leaf_items(root, node);
10024 nr = btrfs_header_nritems(node);
10025 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10026 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10027 sizeof(struct btrfs_key_ptr);
10029 /* Then check all its children */
10030 for (i = 0; i < nr; i++) {
10031 u64 blocknr = btrfs_node_blockptr(node, i);
10033 btrfs_node_key_to_cpu(node, &key, i);
10034 if (level == root->root_item.drop_level &&
10035 is_dropped_key(&key, &drop_key))
10039 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10040 * to call the function itself.
10042 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10043 if (extent_buffer_uptodate(eb)) {
10044 ret = traverse_tree_block(root, eb);
10047 free_extent_buffer(eb);
10054 * Low memory usage version check_chunks_and_extents.
10056 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10058 struct btrfs_path path;
10059 struct btrfs_key key;
10060 struct btrfs_root *root1;
10061 struct btrfs_root *cur_root;
10065 root1 = root->fs_info->chunk_root;
10066 ret = traverse_tree_block(root1, root1->node);
10069 root1 = root->fs_info->tree_root;
10070 ret = traverse_tree_block(root1, root1->node);
10073 btrfs_init_path(&path);
10074 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10076 key.type = BTRFS_ROOT_ITEM_KEY;
10078 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10080 error("cannot find extent treet in tree_root");
10085 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10086 if (key.type != BTRFS_ROOT_ITEM_KEY)
10088 key.offset = (u64)-1;
10090 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10091 if (IS_ERR(cur_root) || !cur_root) {
10092 error("failed to read tree: %lld", key.objectid);
10096 ret = traverse_tree_block(cur_root, cur_root->node);
10100 ret = btrfs_next_item(root1, &path);
10106 btrfs_release_path(&path);
10110 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10111 struct btrfs_root *root, int overwrite)
10113 struct extent_buffer *c;
10114 struct extent_buffer *old = root->node;
10117 struct btrfs_disk_key disk_key = {0,0,0};
10123 extent_buffer_get(c);
10126 c = btrfs_alloc_free_block(trans, root,
10128 root->root_key.objectid,
10129 &disk_key, level, 0, 0);
10132 extent_buffer_get(c);
10136 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10137 btrfs_set_header_level(c, level);
10138 btrfs_set_header_bytenr(c, c->start);
10139 btrfs_set_header_generation(c, trans->transid);
10140 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10141 btrfs_set_header_owner(c, root->root_key.objectid);
10143 write_extent_buffer(c, root->fs_info->fsid,
10144 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10146 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10147 btrfs_header_chunk_tree_uuid(c),
10150 btrfs_mark_buffer_dirty(c);
10152 * this case can happen in the following case:
10154 * 1.overwrite previous root.
10156 * 2.reinit reloc data root, this is because we skip pin
10157 * down reloc data tree before which means we can allocate
10158 * same block bytenr here.
10160 if (old->start == c->start) {
10161 btrfs_set_root_generation(&root->root_item,
10163 root->root_item.level = btrfs_header_level(root->node);
10164 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10165 &root->root_key, &root->root_item);
10167 free_extent_buffer(c);
10171 free_extent_buffer(old);
10173 add_root_to_dirty_list(root);
10177 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10178 struct extent_buffer *eb, int tree_root)
10180 struct extent_buffer *tmp;
10181 struct btrfs_root_item *ri;
10182 struct btrfs_key key;
10185 int level = btrfs_header_level(eb);
10191 * If we have pinned this block before, don't pin it again.
10192 * This can not only avoid forever loop with broken filesystem
10193 * but also give us some speedups.
10195 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10196 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10199 btrfs_pin_extent(fs_info, eb->start, eb->len);
10201 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10202 nritems = btrfs_header_nritems(eb);
10203 for (i = 0; i < nritems; i++) {
10205 btrfs_item_key_to_cpu(eb, &key, i);
10206 if (key.type != BTRFS_ROOT_ITEM_KEY)
10208 /* Skip the extent root and reloc roots */
10209 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10210 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10211 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10213 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10214 bytenr = btrfs_disk_root_bytenr(eb, ri);
10217 * If at any point we start needing the real root we
10218 * will have to build a stump root for the root we are
10219 * in, but for now this doesn't actually use the root so
10220 * just pass in extent_root.
10222 tmp = read_tree_block(fs_info->extent_root, bytenr,
10224 if (!extent_buffer_uptodate(tmp)) {
10225 fprintf(stderr, "Error reading root block\n");
10228 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10229 free_extent_buffer(tmp);
10233 bytenr = btrfs_node_blockptr(eb, i);
10235 /* If we aren't the tree root don't read the block */
10236 if (level == 1 && !tree_root) {
10237 btrfs_pin_extent(fs_info, bytenr, nodesize);
10241 tmp = read_tree_block(fs_info->extent_root, bytenr,
10243 if (!extent_buffer_uptodate(tmp)) {
10244 fprintf(stderr, "Error reading tree block\n");
10247 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10248 free_extent_buffer(tmp);
10257 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10261 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10265 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10268 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10270 struct btrfs_block_group_cache *cache;
10271 struct btrfs_path *path;
10272 struct extent_buffer *leaf;
10273 struct btrfs_chunk *chunk;
10274 struct btrfs_key key;
10278 path = btrfs_alloc_path();
10283 key.type = BTRFS_CHUNK_ITEM_KEY;
10286 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10288 btrfs_free_path(path);
10293 * We do this in case the block groups were screwed up and had alloc
10294 * bits that aren't actually set on the chunks. This happens with
10295 * restored images every time and could happen in real life I guess.
10297 fs_info->avail_data_alloc_bits = 0;
10298 fs_info->avail_metadata_alloc_bits = 0;
10299 fs_info->avail_system_alloc_bits = 0;
10301 /* First we need to create the in-memory block groups */
10303 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10304 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10306 btrfs_free_path(path);
10314 leaf = path->nodes[0];
10315 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10316 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10321 chunk = btrfs_item_ptr(leaf, path->slots[0],
10322 struct btrfs_chunk);
10323 btrfs_add_block_group(fs_info, 0,
10324 btrfs_chunk_type(leaf, chunk),
10325 key.objectid, key.offset,
10326 btrfs_chunk_length(leaf, chunk));
10327 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10328 key.offset + btrfs_chunk_length(leaf, chunk),
10334 cache = btrfs_lookup_first_block_group(fs_info, start);
10338 start = cache->key.objectid + cache->key.offset;
10341 btrfs_free_path(path);
10345 static int reset_balance(struct btrfs_trans_handle *trans,
10346 struct btrfs_fs_info *fs_info)
10348 struct btrfs_root *root = fs_info->tree_root;
10349 struct btrfs_path *path;
10350 struct extent_buffer *leaf;
10351 struct btrfs_key key;
10352 int del_slot, del_nr = 0;
10356 path = btrfs_alloc_path();
10360 key.objectid = BTRFS_BALANCE_OBJECTID;
10361 key.type = BTRFS_BALANCE_ITEM_KEY;
10364 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10369 goto reinit_data_reloc;
10374 ret = btrfs_del_item(trans, root, path);
10377 btrfs_release_path(path);
10379 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10380 key.type = BTRFS_ROOT_ITEM_KEY;
10383 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10387 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10392 ret = btrfs_del_items(trans, root, path,
10399 btrfs_release_path(path);
10402 ret = btrfs_search_slot(trans, root, &key, path,
10409 leaf = path->nodes[0];
10410 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10411 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10413 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10418 del_slot = path->slots[0];
10427 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10431 btrfs_release_path(path);
10434 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10435 key.type = BTRFS_ROOT_ITEM_KEY;
10436 key.offset = (u64)-1;
10437 root = btrfs_read_fs_root(fs_info, &key);
10438 if (IS_ERR(root)) {
10439 fprintf(stderr, "Error reading data reloc tree\n");
10440 ret = PTR_ERR(root);
10443 record_root_in_trans(trans, root);
10444 ret = btrfs_fsck_reinit_root(trans, root, 0);
10447 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10449 btrfs_free_path(path);
10453 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10454 struct btrfs_fs_info *fs_info)
10460 * The only reason we don't do this is because right now we're just
10461 * walking the trees we find and pinning down their bytes, we don't look
10462 * at any of the leaves. In order to do mixed groups we'd have to check
10463 * the leaves of any fs roots and pin down the bytes for any file
10464 * extents we find. Not hard but why do it if we don't have to?
10466 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10467 fprintf(stderr, "We don't support re-initing the extent tree "
10468 "for mixed block groups yet, please notify a btrfs "
10469 "developer you want to do this so they can add this "
10470 "functionality.\n");
10475 * first we need to walk all of the trees except the extent tree and pin
10476 * down the bytes that are in use so we don't overwrite any existing
10479 ret = pin_metadata_blocks(fs_info);
10481 fprintf(stderr, "error pinning down used bytes\n");
10486 * Need to drop all the block groups since we're going to recreate all
10489 btrfs_free_block_groups(fs_info);
10490 ret = reset_block_groups(fs_info);
10492 fprintf(stderr, "error resetting the block groups\n");
10496 /* Ok we can allocate now, reinit the extent root */
10497 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10499 fprintf(stderr, "extent root initialization failed\n");
10501 * When the transaction code is updated we should end the
10502 * transaction, but for now progs only knows about commit so
10503 * just return an error.
10509 * Now we have all the in-memory block groups setup so we can make
10510 * allocations properly, and the metadata we care about is safe since we
10511 * pinned all of it above.
10514 struct btrfs_block_group_cache *cache;
10516 cache = btrfs_lookup_first_block_group(fs_info, start);
10519 start = cache->key.objectid + cache->key.offset;
10520 ret = btrfs_insert_item(trans, fs_info->extent_root,
10521 &cache->key, &cache->item,
10522 sizeof(cache->item));
10524 fprintf(stderr, "Error adding block group\n");
10527 btrfs_extent_post_op(trans, fs_info->extent_root);
10530 ret = reset_balance(trans, fs_info);
10532 fprintf(stderr, "error resetting the pending balance\n");
10537 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10539 struct btrfs_path *path;
10540 struct btrfs_trans_handle *trans;
10541 struct btrfs_key key;
10544 printf("Recowing metadata block %llu\n", eb->start);
10545 key.objectid = btrfs_header_owner(eb);
10546 key.type = BTRFS_ROOT_ITEM_KEY;
10547 key.offset = (u64)-1;
10549 root = btrfs_read_fs_root(root->fs_info, &key);
10550 if (IS_ERR(root)) {
10551 fprintf(stderr, "Couldn't find owner root %llu\n",
10553 return PTR_ERR(root);
10556 path = btrfs_alloc_path();
10560 trans = btrfs_start_transaction(root, 1);
10561 if (IS_ERR(trans)) {
10562 btrfs_free_path(path);
10563 return PTR_ERR(trans);
10566 path->lowest_level = btrfs_header_level(eb);
10567 if (path->lowest_level)
10568 btrfs_node_key_to_cpu(eb, &key, 0);
10570 btrfs_item_key_to_cpu(eb, &key, 0);
10572 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10573 btrfs_commit_transaction(trans, root);
10574 btrfs_free_path(path);
10578 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10580 struct btrfs_path *path;
10581 struct btrfs_trans_handle *trans;
10582 struct btrfs_key key;
10585 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10586 bad->key.type, bad->key.offset);
10587 key.objectid = bad->root_id;
10588 key.type = BTRFS_ROOT_ITEM_KEY;
10589 key.offset = (u64)-1;
10591 root = btrfs_read_fs_root(root->fs_info, &key);
10592 if (IS_ERR(root)) {
10593 fprintf(stderr, "Couldn't find owner root %llu\n",
10595 return PTR_ERR(root);
10598 path = btrfs_alloc_path();
10602 trans = btrfs_start_transaction(root, 1);
10603 if (IS_ERR(trans)) {
10604 btrfs_free_path(path);
10605 return PTR_ERR(trans);
10608 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10614 ret = btrfs_del_item(trans, root, path);
10616 btrfs_commit_transaction(trans, root);
10617 btrfs_free_path(path);
10621 static int zero_log_tree(struct btrfs_root *root)
10623 struct btrfs_trans_handle *trans;
10626 trans = btrfs_start_transaction(root, 1);
10627 if (IS_ERR(trans)) {
10628 ret = PTR_ERR(trans);
10631 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10632 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10633 ret = btrfs_commit_transaction(trans, root);
10637 static int populate_csum(struct btrfs_trans_handle *trans,
10638 struct btrfs_root *csum_root, char *buf, u64 start,
10645 while (offset < len) {
10646 sectorsize = csum_root->sectorsize;
10647 ret = read_extent_data(csum_root, buf, start + offset,
10651 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10652 start + offset, buf, sectorsize);
10655 offset += sectorsize;
10660 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10661 struct btrfs_root *csum_root,
10662 struct btrfs_root *cur_root)
10664 struct btrfs_path *path;
10665 struct btrfs_key key;
10666 struct extent_buffer *node;
10667 struct btrfs_file_extent_item *fi;
10674 path = btrfs_alloc_path();
10677 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10687 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10690 /* Iterate all regular file extents and fill its csum */
10692 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10694 if (key.type != BTRFS_EXTENT_DATA_KEY)
10696 node = path->nodes[0];
10697 slot = path->slots[0];
10698 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10699 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10701 start = btrfs_file_extent_disk_bytenr(node, fi);
10702 len = btrfs_file_extent_disk_num_bytes(node, fi);
10704 ret = populate_csum(trans, csum_root, buf, start, len);
10705 if (ret == -EEXIST)
10711 * TODO: if next leaf is corrupted, jump to nearest next valid
10714 ret = btrfs_next_item(cur_root, path);
10724 btrfs_free_path(path);
10729 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10730 struct btrfs_root *csum_root)
10732 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10733 struct btrfs_path *path;
10734 struct btrfs_root *tree_root = fs_info->tree_root;
10735 struct btrfs_root *cur_root;
10736 struct extent_buffer *node;
10737 struct btrfs_key key;
10741 path = btrfs_alloc_path();
10745 key.objectid = BTRFS_FS_TREE_OBJECTID;
10747 key.type = BTRFS_ROOT_ITEM_KEY;
10749 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10758 node = path->nodes[0];
10759 slot = path->slots[0];
10760 btrfs_item_key_to_cpu(node, &key, slot);
10761 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10763 if (key.type != BTRFS_ROOT_ITEM_KEY)
10765 if (!is_fstree(key.objectid))
10767 key.offset = (u64)-1;
10769 cur_root = btrfs_read_fs_root(fs_info, &key);
10770 if (IS_ERR(cur_root) || !cur_root) {
10771 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10775 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10780 ret = btrfs_next_item(tree_root, path);
10790 btrfs_free_path(path);
10794 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10795 struct btrfs_root *csum_root)
10797 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10798 struct btrfs_path *path;
10799 struct btrfs_extent_item *ei;
10800 struct extent_buffer *leaf;
10802 struct btrfs_key key;
10805 path = btrfs_alloc_path();
10810 key.type = BTRFS_EXTENT_ITEM_KEY;
10813 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10815 btrfs_free_path(path);
10819 buf = malloc(csum_root->sectorsize);
10821 btrfs_free_path(path);
10826 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10827 ret = btrfs_next_leaf(extent_root, path);
10835 leaf = path->nodes[0];
10837 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10838 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10843 ei = btrfs_item_ptr(leaf, path->slots[0],
10844 struct btrfs_extent_item);
10845 if (!(btrfs_extent_flags(leaf, ei) &
10846 BTRFS_EXTENT_FLAG_DATA)) {
10851 ret = populate_csum(trans, csum_root, buf, key.objectid,
10858 btrfs_free_path(path);
10864 * Recalculate the csum and put it into the csum tree.
10866 * Extent tree init will wipe out all the extent info, so in that case, we
10867 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10868 * will use fs/subvol trees to init the csum tree.
10870 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10871 struct btrfs_root *csum_root,
10872 int search_fs_tree)
10874 if (search_fs_tree)
10875 return fill_csum_tree_from_fs(trans, csum_root);
10877 return fill_csum_tree_from_extent(trans, csum_root);
10880 static void free_roots_info_cache(void)
10882 if (!roots_info_cache)
10885 while (!cache_tree_empty(roots_info_cache)) {
10886 struct cache_extent *entry;
10887 struct root_item_info *rii;
10889 entry = first_cache_extent(roots_info_cache);
10892 remove_cache_extent(roots_info_cache, entry);
10893 rii = container_of(entry, struct root_item_info, cache_extent);
10897 free(roots_info_cache);
10898 roots_info_cache = NULL;
10901 static int build_roots_info_cache(struct btrfs_fs_info *info)
10904 struct btrfs_key key;
10905 struct extent_buffer *leaf;
10906 struct btrfs_path *path;
10908 if (!roots_info_cache) {
10909 roots_info_cache = malloc(sizeof(*roots_info_cache));
10910 if (!roots_info_cache)
10912 cache_tree_init(roots_info_cache);
10915 path = btrfs_alloc_path();
10920 key.type = BTRFS_EXTENT_ITEM_KEY;
10923 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10926 leaf = path->nodes[0];
10929 struct btrfs_key found_key;
10930 struct btrfs_extent_item *ei;
10931 struct btrfs_extent_inline_ref *iref;
10932 int slot = path->slots[0];
10937 struct cache_extent *entry;
10938 struct root_item_info *rii;
10940 if (slot >= btrfs_header_nritems(leaf)) {
10941 ret = btrfs_next_leaf(info->extent_root, path);
10948 leaf = path->nodes[0];
10949 slot = path->slots[0];
10952 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10954 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10955 found_key.type != BTRFS_METADATA_ITEM_KEY)
10958 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10959 flags = btrfs_extent_flags(leaf, ei);
10961 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10962 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10965 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10966 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10967 level = found_key.offset;
10969 struct btrfs_tree_block_info *binfo;
10971 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10972 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10973 level = btrfs_tree_block_level(leaf, binfo);
10977 * For a root extent, it must be of the following type and the
10978 * first (and only one) iref in the item.
10980 type = btrfs_extent_inline_ref_type(leaf, iref);
10981 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10984 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10985 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10987 rii = malloc(sizeof(struct root_item_info));
10992 rii->cache_extent.start = root_id;
10993 rii->cache_extent.size = 1;
10994 rii->level = (u8)-1;
10995 entry = &rii->cache_extent;
10996 ret = insert_cache_extent(roots_info_cache, entry);
10999 rii = container_of(entry, struct root_item_info,
11003 ASSERT(rii->cache_extent.start == root_id);
11004 ASSERT(rii->cache_extent.size == 1);
11006 if (level > rii->level || rii->level == (u8)-1) {
11007 rii->level = level;
11008 rii->bytenr = found_key.objectid;
11009 rii->gen = btrfs_extent_generation(leaf, ei);
11010 rii->node_count = 1;
11011 } else if (level == rii->level) {
11019 btrfs_free_path(path);
11024 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11025 struct btrfs_path *path,
11026 const struct btrfs_key *root_key,
11027 const int read_only_mode)
11029 const u64 root_id = root_key->objectid;
11030 struct cache_extent *entry;
11031 struct root_item_info *rii;
11032 struct btrfs_root_item ri;
11033 unsigned long offset;
11035 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11038 "Error: could not find extent items for root %llu\n",
11039 root_key->objectid);
11043 rii = container_of(entry, struct root_item_info, cache_extent);
11044 ASSERT(rii->cache_extent.start == root_id);
11045 ASSERT(rii->cache_extent.size == 1);
11047 if (rii->node_count != 1) {
11049 "Error: could not find btree root extent for root %llu\n",
11054 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11055 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11057 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11058 btrfs_root_level(&ri) != rii->level ||
11059 btrfs_root_generation(&ri) != rii->gen) {
11062 * If we're in repair mode but our caller told us to not update
11063 * the root item, i.e. just check if it needs to be updated, don't
11064 * print this message, since the caller will call us again shortly
11065 * for the same root item without read only mode (the caller will
11066 * open a transaction first).
11068 if (!(read_only_mode && repair))
11070 "%sroot item for root %llu,"
11071 " current bytenr %llu, current gen %llu, current level %u,"
11072 " new bytenr %llu, new gen %llu, new level %u\n",
11073 (read_only_mode ? "" : "fixing "),
11075 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11076 btrfs_root_level(&ri),
11077 rii->bytenr, rii->gen, rii->level);
11079 if (btrfs_root_generation(&ri) > rii->gen) {
11081 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11082 root_id, btrfs_root_generation(&ri), rii->gen);
11086 if (!read_only_mode) {
11087 btrfs_set_root_bytenr(&ri, rii->bytenr);
11088 btrfs_set_root_level(&ri, rii->level);
11089 btrfs_set_root_generation(&ri, rii->gen);
11090 write_extent_buffer(path->nodes[0], &ri,
11091 offset, sizeof(ri));
11101 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11102 * caused read-only snapshots to be corrupted if they were created at a moment
11103 * when the source subvolume/snapshot had orphan items. The issue was that the
11104 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11105 * node instead of the post orphan cleanup root node.
11106 * So this function, and its callees, just detects and fixes those cases. Even
11107 * though the regression was for read-only snapshots, this function applies to
11108 * any snapshot/subvolume root.
11109 * This must be run before any other repair code - not doing it so, makes other
11110 * repair code delete or modify backrefs in the extent tree for example, which
11111 * will result in an inconsistent fs after repairing the root items.
11113 static int repair_root_items(struct btrfs_fs_info *info)
11115 struct btrfs_path *path = NULL;
11116 struct btrfs_key key;
11117 struct extent_buffer *leaf;
11118 struct btrfs_trans_handle *trans = NULL;
11121 int need_trans = 0;
11123 ret = build_roots_info_cache(info);
11127 path = btrfs_alloc_path();
11133 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11134 key.type = BTRFS_ROOT_ITEM_KEY;
11139 * Avoid opening and committing transactions if a leaf doesn't have
11140 * any root items that need to be fixed, so that we avoid rotating
11141 * backup roots unnecessarily.
11144 trans = btrfs_start_transaction(info->tree_root, 1);
11145 if (IS_ERR(trans)) {
11146 ret = PTR_ERR(trans);
11151 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11155 leaf = path->nodes[0];
11158 struct btrfs_key found_key;
11160 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11161 int no_more_keys = find_next_key(path, &key);
11163 btrfs_release_path(path);
11165 ret = btrfs_commit_transaction(trans,
11177 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11179 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11181 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11184 ret = maybe_repair_root_item(info, path, &found_key,
11189 if (!trans && repair) {
11192 btrfs_release_path(path);
11202 free_roots_info_cache();
11203 btrfs_free_path(path);
11205 btrfs_commit_transaction(trans, info->tree_root);
11212 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11214 struct btrfs_trans_handle *trans;
11215 struct btrfs_block_group_cache *bg_cache;
11219 /* Clear all free space cache inodes and its extent data */
11221 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11224 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11227 current = bg_cache->key.objectid + bg_cache->key.offset;
11230 /* Don't forget to set cache_generation to -1 */
11231 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11232 if (IS_ERR(trans)) {
11233 error("failed to update super block cache generation");
11234 return PTR_ERR(trans);
11236 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11237 btrfs_commit_transaction(trans, fs_info->tree_root);
11242 const char * const cmd_check_usage[] = {
11243 "btrfs check [options] <device>",
11244 "Check structural integrity of a filesystem (unmounted).",
11245 "Check structural integrity of an unmounted filesystem. Verify internal",
11246 "trees' consistency and item connectivity. In the repair mode try to",
11247 "fix the problems found. ",
11248 "WARNING: the repair mode is considered dangerous",
11250 "-s|--super <superblock> use this superblock copy",
11251 "-b|--backup use the first valid backup root copy",
11252 "--repair try to repair the filesystem",
11253 "--readonly run in read-only mode (default)",
11254 "--init-csum-tree create a new CRC tree",
11255 "--init-extent-tree create a new extent tree",
11256 "--mode <MODE> allows choice of memory/IO trade-offs",
11257 " where MODE is one of:",
11258 " original - read inodes and extents to memory (requires",
11259 " more memory, does less IO)",
11260 " lowmem - try to use less memory but read blocks again",
11262 "--check-data-csum verify checksums of data blocks",
11263 "-Q|--qgroup-report print a report on qgroup consistency",
11264 "-E|--subvol-extents <subvolid>",
11265 " print subvolume extents and sharing state",
11266 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11267 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11268 "-p|--progress indicate progress",
11269 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11270 " NOTE: v1 support implemented",
11274 int cmd_check(int argc, char **argv)
11276 struct cache_tree root_cache;
11277 struct btrfs_root *root;
11278 struct btrfs_fs_info *info;
11281 u64 tree_root_bytenr = 0;
11282 u64 chunk_root_bytenr = 0;
11283 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11286 int init_csum_tree = 0;
11288 int clear_space_cache = 0;
11289 int qgroup_report = 0;
11290 int qgroups_repaired = 0;
11291 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11295 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11296 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11297 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11298 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11299 static const struct option long_options[] = {
11300 { "super", required_argument, NULL, 's' },
11301 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11302 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11303 { "init-csum-tree", no_argument, NULL,
11304 GETOPT_VAL_INIT_CSUM },
11305 { "init-extent-tree", no_argument, NULL,
11306 GETOPT_VAL_INIT_EXTENT },
11307 { "check-data-csum", no_argument, NULL,
11308 GETOPT_VAL_CHECK_CSUM },
11309 { "backup", no_argument, NULL, 'b' },
11310 { "subvol-extents", required_argument, NULL, 'E' },
11311 { "qgroup-report", no_argument, NULL, 'Q' },
11312 { "tree-root", required_argument, NULL, 'r' },
11313 { "chunk-root", required_argument, NULL,
11314 GETOPT_VAL_CHUNK_TREE },
11315 { "progress", no_argument, NULL, 'p' },
11316 { "mode", required_argument, NULL,
11318 { "clear-space-cache", required_argument, NULL,
11319 GETOPT_VAL_CLEAR_SPACE_CACHE},
11320 { NULL, 0, NULL, 0}
11323 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11327 case 'a': /* ignored */ break;
11329 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11332 num = arg_strtou64(optarg);
11333 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11335 "super mirror should be less than %d",
11336 BTRFS_SUPER_MIRROR_MAX);
11339 bytenr = btrfs_sb_offset(((int)num));
11340 printf("using SB copy %llu, bytenr %llu\n", num,
11341 (unsigned long long)bytenr);
11347 subvolid = arg_strtou64(optarg);
11350 tree_root_bytenr = arg_strtou64(optarg);
11352 case GETOPT_VAL_CHUNK_TREE:
11353 chunk_root_bytenr = arg_strtou64(optarg);
11356 ctx.progress_enabled = true;
11360 usage(cmd_check_usage);
11361 case GETOPT_VAL_REPAIR:
11362 printf("enabling repair mode\n");
11364 ctree_flags |= OPEN_CTREE_WRITES;
11366 case GETOPT_VAL_READONLY:
11369 case GETOPT_VAL_INIT_CSUM:
11370 printf("Creating a new CRC tree\n");
11371 init_csum_tree = 1;
11373 ctree_flags |= OPEN_CTREE_WRITES;
11375 case GETOPT_VAL_INIT_EXTENT:
11376 init_extent_tree = 1;
11377 ctree_flags |= (OPEN_CTREE_WRITES |
11378 OPEN_CTREE_NO_BLOCK_GROUPS);
11381 case GETOPT_VAL_CHECK_CSUM:
11382 check_data_csum = 1;
11384 case GETOPT_VAL_MODE:
11385 check_mode = parse_check_mode(optarg);
11386 if (check_mode == CHECK_MODE_UNKNOWN) {
11387 error("unknown mode: %s", optarg);
11391 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11392 if (strcmp(optarg, "v1") != 0) {
11394 "only v1 support implmented, unrecognized value %s",
11398 clear_space_cache = 1;
11399 ctree_flags |= OPEN_CTREE_WRITES;
11404 if (check_argc_exact(argc - optind, 1))
11405 usage(cmd_check_usage);
11407 if (ctx.progress_enabled) {
11408 ctx.tp = TASK_NOTHING;
11409 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11412 /* This check is the only reason for --readonly to exist */
11413 if (readonly && repair) {
11414 error("repair options are not compatible with --readonly");
11419 * Not supported yet
11421 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11422 error("low memory mode doesn't support repair yet");
11427 cache_tree_init(&root_cache);
11429 if((ret = check_mounted(argv[optind])) < 0) {
11430 error("could not check mount status: %s", strerror(-ret));
11433 error("%s is currently mounted, aborting", argv[optind]);
11438 /* only allow partial opening under repair mode */
11440 ctree_flags |= OPEN_CTREE_PARTIAL;
11442 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11443 chunk_root_bytenr, ctree_flags);
11445 error("cannot open file system");
11450 global_info = info;
11451 root = info->fs_root;
11452 if (clear_space_cache) {
11453 if (btrfs_fs_compat_ro(info,
11454 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11456 "free space cache v2 detected, clearing not implemented");
11460 printf("Clearing free space cache\n");
11461 ret = clear_free_space_cache(info);
11463 error("failed to clear free space cache");
11466 printf("Free space cache cleared\n");
11472 * repair mode will force us to commit transaction which
11473 * will make us fail to load log tree when mounting.
11475 if (repair && btrfs_super_log_root(info->super_copy)) {
11476 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11481 ret = zero_log_tree(root);
11483 error("failed to zero log tree: %d", ret);
11488 uuid_unparse(info->super_copy->fsid, uuidbuf);
11489 if (qgroup_report) {
11490 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11492 ret = qgroup_verify_all(info);
11498 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11499 subvolid, argv[optind], uuidbuf);
11500 ret = print_extent_state(info, subvolid);
11503 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11505 if (!extent_buffer_uptodate(info->tree_root->node) ||
11506 !extent_buffer_uptodate(info->dev_root->node) ||
11507 !extent_buffer_uptodate(info->chunk_root->node)) {
11508 error("critical roots corrupted, unable to check the filesystem");
11513 if (init_extent_tree || init_csum_tree) {
11514 struct btrfs_trans_handle *trans;
11516 trans = btrfs_start_transaction(info->extent_root, 0);
11517 if (IS_ERR(trans)) {
11518 error("error starting transaction");
11519 ret = PTR_ERR(trans);
11523 if (init_extent_tree) {
11524 printf("Creating a new extent tree\n");
11525 ret = reinit_extent_tree(trans, info);
11530 if (init_csum_tree) {
11531 printf("Reinitialize checksum tree\n");
11532 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11534 error("checksum tree initialization failed: %d",
11540 ret = fill_csum_tree(trans, info->csum_root,
11543 error("checksum tree refilling failed: %d", ret);
11548 * Ok now we commit and run the normal fsck, which will add
11549 * extent entries for all of the items it finds.
11551 ret = btrfs_commit_transaction(trans, info->extent_root);
11555 if (!extent_buffer_uptodate(info->extent_root->node)) {
11556 error("critical: extent_root, unable to check the filesystem");
11560 if (!extent_buffer_uptodate(info->csum_root->node)) {
11561 error("critical: csum_root, unable to check the filesystem");
11566 if (!ctx.progress_enabled)
11567 printf("checking extents");
11568 if (check_mode == CHECK_MODE_LOWMEM)
11569 ret = check_chunks_and_extents_v2(root);
11571 ret = check_chunks_and_extents(root);
11573 printf("Errors found in extent allocation tree or chunk allocation");
11575 ret = repair_root_items(info);
11579 fprintf(stderr, "Fixed %d roots.\n", ret);
11581 } else if (ret > 0) {
11583 "Found %d roots with an outdated root item.\n",
11586 "Please run a filesystem check with the option --repair to fix them.\n");
11591 if (!ctx.progress_enabled) {
11592 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11593 fprintf(stderr, "checking free space tree\n");
11595 fprintf(stderr, "checking free space cache\n");
11597 ret = check_space_cache(root);
11602 * We used to have to have these hole extents in between our real
11603 * extents so if we don't have this flag set we need to make sure there
11604 * are no gaps in the file extents for inodes, otherwise we can just
11605 * ignore it when this happens.
11607 no_holes = btrfs_fs_incompat(root->fs_info,
11608 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11609 if (!ctx.progress_enabled)
11610 fprintf(stderr, "checking fs roots\n");
11611 ret = check_fs_roots(root, &root_cache);
11615 fprintf(stderr, "checking csums\n");
11616 ret = check_csums(root);
11620 fprintf(stderr, "checking root refs\n");
11621 ret = check_root_refs(root, &root_cache);
11625 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11626 struct extent_buffer *eb;
11628 eb = list_first_entry(&root->fs_info->recow_ebs,
11629 struct extent_buffer, recow);
11630 list_del_init(&eb->recow);
11631 ret = recow_extent_buffer(root, eb);
11636 while (!list_empty(&delete_items)) {
11637 struct bad_item *bad;
11639 bad = list_first_entry(&delete_items, struct bad_item, list);
11640 list_del_init(&bad->list);
11642 ret = delete_bad_item(root, bad);
11646 if (info->quota_enabled) {
11648 fprintf(stderr, "checking quota groups\n");
11649 err = qgroup_verify_all(info);
11653 err = repair_qgroups(info, &qgroups_repaired);
11658 if (!list_empty(&root->fs_info->recow_ebs)) {
11659 error("transid errors in file system");
11663 /* Don't override original ret */
11664 if (!ret && qgroups_repaired)
11665 ret = qgroups_repaired;
11667 if (found_old_backref) { /*
11668 * there was a disk format change when mixed
11669 * backref was in testing tree. The old format
11670 * existed about one week.
11672 printf("\n * Found old mixed backref format. "
11673 "The old format is not supported! *"
11674 "\n * Please mount the FS in readonly mode, "
11675 "backup data and re-format the FS. *\n\n");
11678 printf("found %llu bytes used err is %d\n",
11679 (unsigned long long)bytes_used, ret);
11680 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11681 printf("total tree bytes: %llu\n",
11682 (unsigned long long)total_btree_bytes);
11683 printf("total fs tree bytes: %llu\n",
11684 (unsigned long long)total_fs_tree_bytes);
11685 printf("total extent tree bytes: %llu\n",
11686 (unsigned long long)total_extent_tree_bytes);
11687 printf("btree space waste bytes: %llu\n",
11688 (unsigned long long)btree_space_waste);
11689 printf("file data blocks allocated: %llu\n referenced %llu\n",
11690 (unsigned long long)data_bytes_allocated,
11691 (unsigned long long)data_bytes_referenced);
11693 free_qgroup_counts();
11694 free_root_recs_tree(&root_cache);
11698 if (ctx.progress_enabled)
11699 task_deinit(ctx.info);