2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 return list_entry(entry, struct extent_backref, list);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
121 * Much like data_backref, just removed the undetermined members
122 * and change it to use list_head.
123 * During extent scan, it is stored in root->orphan_data_extent.
124 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126 struct orphan_data_extent {
127 struct list_head list;
135 struct tree_backref {
136 struct extent_backref node;
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 return container_of(back, struct tree_backref, node);
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
151 struct extent_record {
152 struct list_head backrefs;
153 struct list_head dups;
154 struct list_head list;
155 struct cache_extent cache;
156 struct btrfs_disk_key parent_key;
161 u64 extent_item_refs;
163 u64 parent_generation;
167 unsigned int flag_block_full_backref:2;
168 unsigned int found_rec:1;
169 unsigned int content_checked:1;
170 unsigned int owner_ref_checked:1;
171 unsigned int is_root:1;
172 unsigned int metadata:1;
173 unsigned int bad_full_backref:1;
174 unsigned int crossing_stripes:1;
175 unsigned int wrong_chunk_type:1;
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 return container_of(entry, struct extent_record, list);
183 struct inode_backref {
184 struct list_head list;
185 unsigned int found_dir_item:1;
186 unsigned int found_dir_index:1;
187 unsigned int found_inode_ref:1;
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 return list_entry(entry, struct inode_backref, list);
202 struct root_item_record {
203 struct list_head list;
210 struct btrfs_key drop_key;
213 #define REF_ERR_NO_DIR_ITEM (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX (1 << 1)
215 #define REF_ERR_NO_INODE_REF (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
218 #define REF_ERR_DUP_INODE_REF (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
227 struct file_extent_hole {
233 struct inode_record {
234 struct list_head backrefs;
235 unsigned int checked:1;
236 unsigned int merging:1;
237 unsigned int found_inode_item:1;
238 unsigned int found_dir_item:1;
239 unsigned int found_file_extent:1;
240 unsigned int found_csum_item:1;
241 unsigned int some_csum_missing:1;
242 unsigned int nodatasum:1;
255 struct rb_root holes;
256 struct list_head orphan_extents;
261 #define I_ERR_NO_INODE_ITEM (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
277 struct root_backref {
278 struct list_head list;
279 unsigned int found_dir_item:1;
280 unsigned int found_dir_index:1;
281 unsigned int found_back_ref:1;
282 unsigned int found_forward_ref:1;
283 unsigned int reachable:1;
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 return list_entry(entry, struct root_backref, list);
298 struct list_head backrefs;
299 struct cache_extent cache;
300 unsigned int found_root_item:1;
306 struct cache_extent cache;
311 struct cache_extent cache;
312 struct cache_tree root_cache;
313 struct cache_tree inode_cache;
314 struct inode_record *current;
323 struct walk_control {
324 struct cache_tree shared;
325 struct shared_node *nodes[BTRFS_MAX_LEVEL];
331 struct btrfs_key key;
333 struct list_head list;
336 struct extent_entry {
341 struct list_head list;
344 struct root_item_info {
345 /* level of the root */
347 /* number of nodes at this level, must be 1 for a root */
351 struct cache_extent cache_extent;
355 * Error bit for low memory mode check.
357 * Currently no caller cares about it yet. Just internal use for error
360 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH (1 << 8)
371 static void *print_status_check(void *p)
373 struct task_ctx *priv = p;
374 const char work_indicator[] = { '.', 'o', 'O', 'o' };
376 static char *task_position_string[] = {
378 "checking free space cache",
382 task_period_start(priv->info, 1000 /* 1s */);
384 if (priv->tp == TASK_NOTHING)
388 printf("%s [%c]\r", task_position_string[priv->tp],
389 work_indicator[count % 4]);
392 task_period_wait(priv->info);
397 static int print_status_return(void *p)
405 static enum btrfs_check_mode parse_check_mode(const char *str)
407 if (strcmp(str, "lowmem") == 0)
408 return CHECK_MODE_LOWMEM;
409 if (strcmp(str, "orig") == 0)
410 return CHECK_MODE_ORIGINAL;
411 if (strcmp(str, "original") == 0)
412 return CHECK_MODE_ORIGINAL;
414 return CHECK_MODE_UNKNOWN;
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
420 struct file_extent_hole *hole;
422 if (RB_EMPTY_ROOT(holes))
425 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 struct file_extent_hole *hole1;
432 struct file_extent_hole *hole2;
434 hole1 = rb_entry(node1, struct file_extent_hole, node);
435 hole2 = rb_entry(node2, struct file_extent_hole, node);
437 if (hole1->start > hole2->start)
439 if (hole1->start < hole2->start)
441 /* Now hole1->start == hole2->start */
442 if (hole1->len >= hole2->len)
444 * Hole 1 will be merge center
445 * Same hole will be merged later
448 /* Hole 2 will be merge center */
453 * Add a hole to the record
455 * This will do hole merge for copy_file_extent_holes(),
456 * which will ensure there won't be continuous holes.
458 static int add_file_extent_hole(struct rb_root *holes,
461 struct file_extent_hole *hole;
462 struct file_extent_hole *prev = NULL;
463 struct file_extent_hole *next = NULL;
465 hole = malloc(sizeof(*hole));
470 /* Since compare will not return 0, no -EEXIST will happen */
471 rb_insert(holes, &hole->node, compare_hole);
473 /* simple merge with previous hole */
474 if (rb_prev(&hole->node))
475 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477 if (prev && prev->start + prev->len >= hole->start) {
478 hole->len = hole->start + hole->len - prev->start;
479 hole->start = prev->start;
480 rb_erase(&prev->node, holes);
485 /* iterate merge with next holes */
487 if (!rb_next(&hole->node))
489 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491 if (hole->start + hole->len >= next->start) {
492 if (hole->start + hole->len <= next->start + next->len)
493 hole->len = next->start + next->len -
495 rb_erase(&next->node, holes);
504 static int compare_hole_range(struct rb_node *node, void *data)
506 struct file_extent_hole *hole;
509 hole = (struct file_extent_hole *)data;
512 hole = rb_entry(node, struct file_extent_hole, node);
513 if (start < hole->start)
515 if (start >= hole->start && start < hole->start + hole->len)
521 * Delete a hole in the record
523 * This will do the hole split and is much restrict than add.
525 static int del_file_extent_hole(struct rb_root *holes,
528 struct file_extent_hole *hole;
529 struct file_extent_hole tmp;
534 struct rb_node *node;
541 node = rb_search(holes, &tmp, compare_hole_range, NULL);
544 hole = rb_entry(node, struct file_extent_hole, node);
545 if (start + len > hole->start + hole->len)
549 * Now there will be no overlap, delete the hole and re-add the
550 * split(s) if they exists.
552 if (start > hole->start) {
553 prev_start = hole->start;
554 prev_len = start - hole->start;
557 if (hole->start + hole->len > start + len) {
558 next_start = start + len;
559 next_len = hole->start + hole->len - start - len;
562 rb_erase(node, holes);
565 ret = add_file_extent_hole(holes, prev_start, prev_len);
570 ret = add_file_extent_hole(holes, next_start, next_len);
577 static int copy_file_extent_holes(struct rb_root *dst,
580 struct file_extent_hole *hole;
581 struct rb_node *node;
584 node = rb_first(src);
586 hole = rb_entry(node, struct file_extent_hole, node);
587 ret = add_file_extent_hole(dst, hole->start, hole->len);
590 node = rb_next(node);
595 static void free_file_extent_holes(struct rb_root *holes)
597 struct rb_node *node;
598 struct file_extent_hole *hole;
600 node = rb_first(holes);
602 hole = rb_entry(node, struct file_extent_hole, node);
603 rb_erase(node, holes);
605 node = rb_first(holes);
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612 struct btrfs_root *root)
614 if (root->last_trans != trans->transid) {
615 root->track_dirty = 1;
616 root->last_trans = trans->transid;
617 root->commit_root = root->node;
618 extent_buffer_get(root->node);
622 static u8 imode_to_type(u32 imode)
625 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
627 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
628 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
629 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
630 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
631 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
632 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
635 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 struct device_record *rec1;
642 struct device_record *rec2;
644 rec1 = rb_entry(node1, struct device_record, node);
645 rec2 = rb_entry(node2, struct device_record, node);
646 if (rec1->devid > rec2->devid)
648 else if (rec1->devid < rec2->devid)
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 struct inode_record *rec;
657 struct inode_backref *backref;
658 struct inode_backref *orig;
659 struct inode_backref *tmp;
660 struct orphan_data_extent *src_orphan;
661 struct orphan_data_extent *dst_orphan;
666 rec = malloc(sizeof(*rec));
668 return ERR_PTR(-ENOMEM);
669 memcpy(rec, orig_rec, sizeof(*rec));
671 INIT_LIST_HEAD(&rec->backrefs);
672 INIT_LIST_HEAD(&rec->orphan_extents);
673 rec->holes = RB_ROOT;
675 list_for_each_entry(orig, &orig_rec->backrefs, list) {
676 size = sizeof(*orig) + orig->namelen + 1;
677 backref = malloc(size);
682 memcpy(backref, orig, size);
683 list_add_tail(&backref->list, &rec->backrefs);
685 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686 dst_orphan = malloc(sizeof(*dst_orphan));
691 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
701 rb = rb_first(&rec->holes);
703 struct file_extent_hole *hole;
705 hole = rb_entry(rb, struct file_extent_hole, node);
711 if (!list_empty(&rec->backrefs))
712 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713 list_del(&orig->list);
717 if (!list_empty(&rec->orphan_extents))
718 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719 list_del(&orig->list);
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
731 struct orphan_data_extent *orphan;
733 if (list_empty(orphan_extents))
735 printf("The following data extent is lost in tree %llu:\n",
737 list_for_each_entry(orphan, orphan_extents, list) {
738 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739 orphan->objectid, orphan->offset, orphan->disk_bytenr,
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 u64 root_objectid = root->root_key.objectid;
747 int errors = rec->errors;
751 /* reloc root errors, we print its corresponding fs root objectid*/
752 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753 root_objectid = root->root_key.offset;
754 fprintf(stderr, "reloc");
756 fprintf(stderr, "root %llu inode %llu errors %x",
757 (unsigned long long) root_objectid,
758 (unsigned long long) rec->ino, rec->errors);
760 if (errors & I_ERR_NO_INODE_ITEM)
761 fprintf(stderr, ", no inode item");
762 if (errors & I_ERR_NO_ORPHAN_ITEM)
763 fprintf(stderr, ", no orphan item");
764 if (errors & I_ERR_DUP_INODE_ITEM)
765 fprintf(stderr, ", dup inode item");
766 if (errors & I_ERR_DUP_DIR_INDEX)
767 fprintf(stderr, ", dup dir index");
768 if (errors & I_ERR_ODD_DIR_ITEM)
769 fprintf(stderr, ", odd dir item");
770 if (errors & I_ERR_ODD_FILE_EXTENT)
771 fprintf(stderr, ", odd file extent");
772 if (errors & I_ERR_BAD_FILE_EXTENT)
773 fprintf(stderr, ", bad file extent");
774 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775 fprintf(stderr, ", file extent overlap");
776 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777 fprintf(stderr, ", file extent discount");
778 if (errors & I_ERR_DIR_ISIZE_WRONG)
779 fprintf(stderr, ", dir isize wrong");
780 if (errors & I_ERR_FILE_NBYTES_WRONG)
781 fprintf(stderr, ", nbytes wrong");
782 if (errors & I_ERR_ODD_CSUM_ITEM)
783 fprintf(stderr, ", odd csum item");
784 if (errors & I_ERR_SOME_CSUM_MISSING)
785 fprintf(stderr, ", some csum missing");
786 if (errors & I_ERR_LINK_COUNT_WRONG)
787 fprintf(stderr, ", link count wrong");
788 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789 fprintf(stderr, ", orphan file extent");
790 fprintf(stderr, "\n");
791 /* Print the orphan extents if needed */
792 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795 /* Print the holes if needed */
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797 struct file_extent_hole *hole;
798 struct rb_node *node;
801 node = rb_first(&rec->holes);
802 fprintf(stderr, "Found file extent holes:\n");
805 hole = rb_entry(node, struct file_extent_hole, node);
806 fprintf(stderr, "\tstart: %llu, len: %llu\n",
807 hole->start, hole->len);
808 node = rb_next(node);
811 fprintf(stderr, "\tstart: 0, len: %llu\n",
812 round_up(rec->isize, root->sectorsize));
816 static void print_ref_error(int errors)
818 if (errors & REF_ERR_NO_DIR_ITEM)
819 fprintf(stderr, ", no dir item");
820 if (errors & REF_ERR_NO_DIR_INDEX)
821 fprintf(stderr, ", no dir index");
822 if (errors & REF_ERR_NO_INODE_REF)
823 fprintf(stderr, ", no inode ref");
824 if (errors & REF_ERR_DUP_DIR_ITEM)
825 fprintf(stderr, ", dup dir item");
826 if (errors & REF_ERR_DUP_DIR_INDEX)
827 fprintf(stderr, ", dup dir index");
828 if (errors & REF_ERR_DUP_INODE_REF)
829 fprintf(stderr, ", dup inode ref");
830 if (errors & REF_ERR_INDEX_UNMATCH)
831 fprintf(stderr, ", index mismatch");
832 if (errors & REF_ERR_FILETYPE_UNMATCH)
833 fprintf(stderr, ", filetype mismatch");
834 if (errors & REF_ERR_NAME_TOO_LONG)
835 fprintf(stderr, ", name too long");
836 if (errors & REF_ERR_NO_ROOT_REF)
837 fprintf(stderr, ", no root ref");
838 if (errors & REF_ERR_NO_ROOT_BACKREF)
839 fprintf(stderr, ", no root backref");
840 if (errors & REF_ERR_DUP_ROOT_REF)
841 fprintf(stderr, ", dup root ref");
842 if (errors & REF_ERR_DUP_ROOT_BACKREF)
843 fprintf(stderr, ", dup root backref");
844 fprintf(stderr, "\n");
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
850 struct ptr_node *node;
851 struct cache_extent *cache;
852 struct inode_record *rec = NULL;
855 cache = lookup_cache_extent(inode_cache, ino, 1);
857 node = container_of(cache, struct ptr_node, cache);
859 if (mod && rec->refs > 1) {
860 node->data = clone_inode_rec(rec);
861 if (IS_ERR(node->data))
867 rec = calloc(1, sizeof(*rec));
869 return ERR_PTR(-ENOMEM);
871 rec->extent_start = (u64)-1;
873 INIT_LIST_HEAD(&rec->backrefs);
874 INIT_LIST_HEAD(&rec->orphan_extents);
875 rec->holes = RB_ROOT;
877 node = malloc(sizeof(*node));
880 return ERR_PTR(-ENOMEM);
882 node->cache.start = ino;
883 node->cache.size = 1;
886 if (ino == BTRFS_FREE_INO_OBJECTID)
889 ret = insert_cache_extent(inode_cache, &node->cache);
891 return ERR_PTR(-EEXIST);
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 struct orphan_data_extent *orphan;
900 while (!list_empty(orphan_extents)) {
901 orphan = list_entry(orphan_extents->next,
902 struct orphan_data_extent, list);
903 list_del(&orphan->list);
908 static void free_inode_rec(struct inode_record *rec)
910 struct inode_backref *backref;
915 while (!list_empty(&rec->backrefs)) {
916 backref = to_inode_backref(rec->backrefs.next);
917 list_del(&backref->list);
920 free_orphan_data_extents(&rec->orphan_extents);
921 free_file_extent_holes(&rec->holes);
925 static int can_free_inode_rec(struct inode_record *rec)
927 if (!rec->errors && rec->checked && rec->found_inode_item &&
928 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934 struct inode_record *rec)
936 struct cache_extent *cache;
937 struct inode_backref *tmp, *backref;
938 struct ptr_node *node;
941 if (!rec->found_inode_item)
944 filetype = imode_to_type(rec->imode);
945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946 if (backref->found_dir_item && backref->found_dir_index) {
947 if (backref->filetype != filetype)
948 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949 if (!backref->errors && backref->found_inode_ref &&
950 rec->nlink == rec->found_link) {
951 list_del(&backref->list);
957 if (!rec->checked || rec->merging)
960 if (S_ISDIR(rec->imode)) {
961 if (rec->found_size != rec->isize)
962 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963 if (rec->found_file_extent)
964 rec->errors |= I_ERR_ODD_FILE_EXTENT;
965 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966 if (rec->found_dir_item)
967 rec->errors |= I_ERR_ODD_DIR_ITEM;
968 if (rec->found_size != rec->nbytes)
969 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970 if (rec->nlink > 0 && !no_holes &&
971 (rec->extent_end < rec->isize ||
972 first_extent_gap(&rec->holes) < rec->isize))
973 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
976 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977 if (rec->found_csum_item && rec->nodatasum)
978 rec->errors |= I_ERR_ODD_CSUM_ITEM;
979 if (rec->some_csum_missing && !rec->nodatasum)
980 rec->errors |= I_ERR_SOME_CSUM_MISSING;
983 BUG_ON(rec->refs != 1);
984 if (can_free_inode_rec(rec)) {
985 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986 node = container_of(cache, struct ptr_node, cache);
987 BUG_ON(node->data != rec);
988 remove_cache_extent(inode_cache, &node->cache);
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 struct btrfs_path path;
997 struct btrfs_key key;
1000 key.objectid = BTRFS_ORPHAN_OBJECTID;
1001 key.type = BTRFS_ORPHAN_ITEM_KEY;
1004 btrfs_init_path(&path);
1005 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006 btrfs_release_path(&path);
1012 static int process_inode_item(struct extent_buffer *eb,
1013 int slot, struct btrfs_key *key,
1014 struct shared_node *active_node)
1016 struct inode_record *rec;
1017 struct btrfs_inode_item *item;
1019 rec = active_node->current;
1020 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021 if (rec->found_inode_item) {
1022 rec->errors |= I_ERR_DUP_INODE_ITEM;
1025 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026 rec->nlink = btrfs_inode_nlink(eb, item);
1027 rec->isize = btrfs_inode_size(eb, item);
1028 rec->nbytes = btrfs_inode_nbytes(eb, item);
1029 rec->imode = btrfs_inode_mode(eb, item);
1030 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032 rec->found_inode_item = 1;
1033 if (rec->nlink == 0)
1034 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035 maybe_free_inode_rec(&active_node->inode_cache, rec);
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041 int namelen, u64 dir)
1043 struct inode_backref *backref;
1045 list_for_each_entry(backref, &rec->backrefs, list) {
1046 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048 if (backref->dir != dir || backref->namelen != namelen)
1050 if (memcmp(name, backref->name, namelen))
1055 backref = malloc(sizeof(*backref) + namelen + 1);
1058 memset(backref, 0, sizeof(*backref));
1060 backref->namelen = namelen;
1061 memcpy(backref->name, name, namelen);
1062 backref->name[namelen] = '\0';
1063 list_add_tail(&backref->list, &rec->backrefs);
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068 u64 ino, u64 dir, u64 index,
1069 const char *name, int namelen,
1070 u8 filetype, u8 itemtype, int errors)
1072 struct inode_record *rec;
1073 struct inode_backref *backref;
1075 rec = get_inode_rec(inode_cache, ino, 1);
1076 BUG_ON(IS_ERR(rec));
1077 backref = get_inode_backref(rec, name, namelen, dir);
1080 backref->errors |= errors;
1081 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082 if (backref->found_dir_index)
1083 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084 if (backref->found_inode_ref && backref->index != index)
1085 backref->errors |= REF_ERR_INDEX_UNMATCH;
1086 if (backref->found_dir_item && backref->filetype != filetype)
1087 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089 backref->index = index;
1090 backref->filetype = filetype;
1091 backref->found_dir_index = 1;
1092 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094 if (backref->found_dir_item)
1095 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096 if (backref->found_dir_index && backref->filetype != filetype)
1097 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099 backref->filetype = filetype;
1100 backref->found_dir_item = 1;
1101 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103 if (backref->found_inode_ref)
1104 backref->errors |= REF_ERR_DUP_INODE_REF;
1105 if (backref->found_dir_index && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1108 backref->index = index;
1110 backref->ref_type = itemtype;
1111 backref->found_inode_ref = 1;
1116 maybe_free_inode_rec(inode_cache, rec);
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121 struct cache_tree *dst_cache)
1123 struct inode_backref *backref;
1128 list_for_each_entry(backref, &src->backrefs, list) {
1129 if (backref->found_dir_index) {
1130 add_inode_backref(dst_cache, dst->ino, backref->dir,
1131 backref->index, backref->name,
1132 backref->namelen, backref->filetype,
1133 BTRFS_DIR_INDEX_KEY, backref->errors);
1135 if (backref->found_dir_item) {
1137 add_inode_backref(dst_cache, dst->ino,
1138 backref->dir, 0, backref->name,
1139 backref->namelen, backref->filetype,
1140 BTRFS_DIR_ITEM_KEY, backref->errors);
1142 if (backref->found_inode_ref) {
1143 add_inode_backref(dst_cache, dst->ino,
1144 backref->dir, backref->index,
1145 backref->name, backref->namelen, 0,
1146 backref->ref_type, backref->errors);
1150 if (src->found_dir_item)
1151 dst->found_dir_item = 1;
1152 if (src->found_file_extent)
1153 dst->found_file_extent = 1;
1154 if (src->found_csum_item)
1155 dst->found_csum_item = 1;
1156 if (src->some_csum_missing)
1157 dst->some_csum_missing = 1;
1158 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1164 BUG_ON(src->found_link < dir_count);
1165 dst->found_link += src->found_link - dir_count;
1166 dst->found_size += src->found_size;
1167 if (src->extent_start != (u64)-1) {
1168 if (dst->extent_start == (u64)-1) {
1169 dst->extent_start = src->extent_start;
1170 dst->extent_end = src->extent_end;
1172 if (dst->extent_end > src->extent_start)
1173 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174 else if (dst->extent_end < src->extent_start) {
1175 ret = add_file_extent_hole(&dst->holes,
1177 src->extent_start - dst->extent_end);
1179 if (dst->extent_end < src->extent_end)
1180 dst->extent_end = src->extent_end;
1184 dst->errors |= src->errors;
1185 if (src->found_inode_item) {
1186 if (!dst->found_inode_item) {
1187 dst->nlink = src->nlink;
1188 dst->isize = src->isize;
1189 dst->nbytes = src->nbytes;
1190 dst->imode = src->imode;
1191 dst->nodatasum = src->nodatasum;
1192 dst->found_inode_item = 1;
1194 dst->errors |= I_ERR_DUP_INODE_ITEM;
1202 static int splice_shared_node(struct shared_node *src_node,
1203 struct shared_node *dst_node)
1205 struct cache_extent *cache;
1206 struct ptr_node *node, *ins;
1207 struct cache_tree *src, *dst;
1208 struct inode_record *rec, *conflict;
1209 u64 current_ino = 0;
1213 if (--src_node->refs == 0)
1215 if (src_node->current)
1216 current_ino = src_node->current->ino;
1218 src = &src_node->root_cache;
1219 dst = &dst_node->root_cache;
1221 cache = search_cache_extent(src, 0);
1223 node = container_of(cache, struct ptr_node, cache);
1225 cache = next_cache_extent(cache);
1228 remove_cache_extent(src, &node->cache);
1231 ins = malloc(sizeof(*ins));
1233 ins->cache.start = node->cache.start;
1234 ins->cache.size = node->cache.size;
1238 ret = insert_cache_extent(dst, &ins->cache);
1239 if (ret == -EEXIST) {
1240 conflict = get_inode_rec(dst, rec->ino, 1);
1241 BUG_ON(IS_ERR(conflict));
1242 merge_inode_recs(rec, conflict, dst);
1244 conflict->checked = 1;
1245 if (dst_node->current == conflict)
1246 dst_node->current = NULL;
1248 maybe_free_inode_rec(dst, conflict);
1249 free_inode_rec(rec);
1256 if (src == &src_node->root_cache) {
1257 src = &src_node->inode_cache;
1258 dst = &dst_node->inode_cache;
1262 if (current_ino > 0 && (!dst_node->current ||
1263 current_ino > dst_node->current->ino)) {
1264 if (dst_node->current) {
1265 dst_node->current->checked = 1;
1266 maybe_free_inode_rec(dst, dst_node->current);
1268 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269 BUG_ON(IS_ERR(dst_node->current));
1274 static void free_inode_ptr(struct cache_extent *cache)
1276 struct ptr_node *node;
1277 struct inode_record *rec;
1279 node = container_of(cache, struct ptr_node, cache);
1281 free_inode_rec(rec);
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1290 struct cache_extent *cache;
1291 struct shared_node *node;
1293 cache = lookup_cache_extent(shared, bytenr, 1);
1295 node = container_of(cache, struct shared_node, cache);
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1304 struct shared_node *node;
1306 node = calloc(1, sizeof(*node));
1309 node->cache.start = bytenr;
1310 node->cache.size = 1;
1311 cache_tree_init(&node->root_cache);
1312 cache_tree_init(&node->inode_cache);
1315 ret = insert_cache_extent(shared, &node->cache);
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321 struct walk_control *wc, int level)
1323 struct shared_node *node;
1324 struct shared_node *dest;
1327 if (level == wc->active_node)
1330 BUG_ON(wc->active_node <= level);
1331 node = find_shared_node(&wc->shared, bytenr);
1333 ret = add_shared_node(&wc->shared, bytenr, refs);
1335 node = find_shared_node(&wc->shared, bytenr);
1336 wc->nodes[level] = node;
1337 wc->active_node = level;
1341 if (wc->root_level == wc->active_node &&
1342 btrfs_root_refs(&root->root_item) == 0) {
1343 if (--node->refs == 0) {
1344 free_inode_recs_tree(&node->root_cache);
1345 free_inode_recs_tree(&node->inode_cache);
1346 remove_cache_extent(&wc->shared, &node->cache);
1352 dest = wc->nodes[wc->active_node];
1353 splice_shared_node(node, dest);
1354 if (node->refs == 0) {
1355 remove_cache_extent(&wc->shared, &node->cache);
1361 static int leave_shared_node(struct btrfs_root *root,
1362 struct walk_control *wc, int level)
1364 struct shared_node *node;
1365 struct shared_node *dest;
1368 if (level == wc->root_level)
1371 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1375 BUG_ON(i >= BTRFS_MAX_LEVEL);
1377 node = wc->nodes[wc->active_node];
1378 wc->nodes[wc->active_node] = NULL;
1379 wc->active_node = i;
1381 dest = wc->nodes[wc->active_node];
1382 if (wc->active_node < wc->root_level ||
1383 btrfs_root_refs(&root->root_item) > 0) {
1384 BUG_ON(node->refs <= 1);
1385 splice_shared_node(node, dest);
1387 BUG_ON(node->refs < 2);
1396 * 1 - if the root with id child_root_id is a child of root parent_root_id
1397 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1398 * has other root(s) as parent(s)
1399 * 2 - if the root child_root_id doesn't have any parent roots
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1404 struct btrfs_path path;
1405 struct btrfs_key key;
1406 struct extent_buffer *leaf;
1410 btrfs_init_path(&path);
1412 key.objectid = parent_root_id;
1413 key.type = BTRFS_ROOT_REF_KEY;
1414 key.offset = child_root_id;
1415 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1419 btrfs_release_path(&path);
1423 key.objectid = child_root_id;
1424 key.type = BTRFS_ROOT_BACKREF_KEY;
1426 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1432 leaf = path.nodes[0];
1433 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1437 leaf = path.nodes[0];
1440 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441 if (key.objectid != child_root_id ||
1442 key.type != BTRFS_ROOT_BACKREF_KEY)
1447 if (key.offset == parent_root_id) {
1448 btrfs_release_path(&path);
1455 btrfs_release_path(&path);
1458 return has_parent ? 0 : 2;
1461 static int process_dir_item(struct btrfs_root *root,
1462 struct extent_buffer *eb,
1463 int slot, struct btrfs_key *key,
1464 struct shared_node *active_node)
1474 struct btrfs_dir_item *di;
1475 struct inode_record *rec;
1476 struct cache_tree *root_cache;
1477 struct cache_tree *inode_cache;
1478 struct btrfs_key location;
1479 char namebuf[BTRFS_NAME_LEN];
1481 root_cache = &active_node->root_cache;
1482 inode_cache = &active_node->inode_cache;
1483 rec = active_node->current;
1484 rec->found_dir_item = 1;
1486 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487 total = btrfs_item_size_nr(eb, slot);
1488 while (cur < total) {
1490 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491 name_len = btrfs_dir_name_len(eb, di);
1492 data_len = btrfs_dir_data_len(eb, di);
1493 filetype = btrfs_dir_type(eb, di);
1495 rec->found_size += name_len;
1496 if (name_len <= BTRFS_NAME_LEN) {
1500 len = BTRFS_NAME_LEN;
1501 error = REF_ERR_NAME_TOO_LONG;
1503 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506 add_inode_backref(inode_cache, location.objectid,
1507 key->objectid, key->offset, namebuf,
1508 len, filetype, key->type, error);
1509 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510 add_inode_backref(root_cache, location.objectid,
1511 key->objectid, key->offset,
1512 namebuf, len, filetype,
1515 fprintf(stderr, "invalid location in dir item %u\n",
1517 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518 key->objectid, key->offset, namebuf,
1519 len, filetype, key->type, error);
1522 len = sizeof(*di) + name_len + data_len;
1523 di = (struct btrfs_dir_item *)((char *)di + len);
1526 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527 rec->errors |= I_ERR_DUP_DIR_INDEX;
1532 static int process_inode_ref(struct extent_buffer *eb,
1533 int slot, struct btrfs_key *key,
1534 struct shared_node *active_node)
1542 struct cache_tree *inode_cache;
1543 struct btrfs_inode_ref *ref;
1544 char namebuf[BTRFS_NAME_LEN];
1546 inode_cache = &active_node->inode_cache;
1548 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549 total = btrfs_item_size_nr(eb, slot);
1550 while (cur < total) {
1551 name_len = btrfs_inode_ref_name_len(eb, ref);
1552 index = btrfs_inode_ref_index(eb, ref);
1553 if (name_len <= BTRFS_NAME_LEN) {
1557 len = BTRFS_NAME_LEN;
1558 error = REF_ERR_NAME_TOO_LONG;
1560 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561 add_inode_backref(inode_cache, key->objectid, key->offset,
1562 index, namebuf, len, 0, key->type, error);
1564 len = sizeof(*ref) + name_len;
1565 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1571 static int process_inode_extref(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1582 struct cache_tree *inode_cache;
1583 struct btrfs_inode_extref *extref;
1584 char namebuf[BTRFS_NAME_LEN];
1586 inode_cache = &active_node->inode_cache;
1588 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589 total = btrfs_item_size_nr(eb, slot);
1590 while (cur < total) {
1591 name_len = btrfs_inode_extref_name_len(eb, extref);
1592 index = btrfs_inode_extref_index(eb, extref);
1593 parent = btrfs_inode_extref_parent(eb, extref);
1594 if (name_len <= BTRFS_NAME_LEN) {
1598 len = BTRFS_NAME_LEN;
1599 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf,
1602 (unsigned long)(extref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, parent,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*extref) + name_len;
1607 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615 u64 len, u64 *found)
1617 struct btrfs_key key;
1618 struct btrfs_path path;
1619 struct extent_buffer *leaf;
1624 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626 btrfs_init_path(&path);
1628 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630 key.type = BTRFS_EXTENT_CSUM_KEY;
1632 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1636 if (ret > 0 && path.slots[0] > 0) {
1637 leaf = path.nodes[0];
1638 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640 key.type == BTRFS_EXTENT_CSUM_KEY)
1645 leaf = path.nodes[0];
1646 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1652 leaf = path.nodes[0];
1655 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657 key.type != BTRFS_EXTENT_CSUM_KEY)
1660 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661 if (key.offset >= start + len)
1664 if (key.offset > start)
1667 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669 if (csum_end > start) {
1670 size = min(csum_end - start, len);
1679 btrfs_release_path(&path);
1685 static int process_file_extent(struct btrfs_root *root,
1686 struct extent_buffer *eb,
1687 int slot, struct btrfs_key *key,
1688 struct shared_node *active_node)
1690 struct inode_record *rec;
1691 struct btrfs_file_extent_item *fi;
1693 u64 disk_bytenr = 0;
1694 u64 extent_offset = 0;
1695 u64 mask = root->sectorsize - 1;
1699 rec = active_node->current;
1700 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701 rec->found_file_extent = 1;
1703 if (rec->extent_start == (u64)-1) {
1704 rec->extent_start = key->offset;
1705 rec->extent_end = key->offset;
1708 if (rec->extent_end > key->offset)
1709 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710 else if (rec->extent_end < key->offset) {
1711 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712 key->offset - rec->extent_end);
1717 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718 extent_type = btrfs_file_extent_type(eb, fi);
1720 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724 rec->found_size += num_bytes;
1725 num_bytes = (num_bytes + mask) & ~mask;
1726 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730 extent_offset = btrfs_file_extent_offset(eb, fi);
1731 if (num_bytes == 0 || (num_bytes & mask))
1732 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733 if (num_bytes + extent_offset >
1734 btrfs_file_extent_ram_bytes(eb, fi))
1735 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737 (btrfs_file_extent_compression(eb, fi) ||
1738 btrfs_file_extent_encryption(eb, fi) ||
1739 btrfs_file_extent_other_encoding(eb, fi)))
1740 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741 if (disk_bytenr > 0)
1742 rec->found_size += num_bytes;
1744 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746 rec->extent_end = key->offset + num_bytes;
1749 * The data reloc tree will copy full extents into its inode and then
1750 * copy the corresponding csums. Because the extent it copied could be
1751 * a preallocated extent that hasn't been written to yet there may be no
1752 * csums to copy, ergo we won't have csums for our file extent. This is
1753 * ok so just don't bother checking csums if the inode belongs to the
1756 if (disk_bytenr > 0 &&
1757 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759 if (btrfs_file_extent_compression(eb, fi))
1760 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762 disk_bytenr += extent_offset;
1764 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1767 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769 rec->found_csum_item = 1;
1770 if (found < num_bytes)
1771 rec->some_csum_missing = 1;
1772 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781 struct walk_control *wc)
1783 struct btrfs_key key;
1787 struct cache_tree *inode_cache;
1788 struct shared_node *active_node;
1790 if (wc->root_level == wc->active_node &&
1791 btrfs_root_refs(&root->root_item) == 0)
1794 active_node = wc->nodes[wc->active_node];
1795 inode_cache = &active_node->inode_cache;
1796 nritems = btrfs_header_nritems(eb);
1797 for (i = 0; i < nritems; i++) {
1798 btrfs_item_key_to_cpu(eb, &key, i);
1800 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1805 if (active_node->current == NULL ||
1806 active_node->current->ino < key.objectid) {
1807 if (active_node->current) {
1808 active_node->current->checked = 1;
1809 maybe_free_inode_rec(inode_cache,
1810 active_node->current);
1812 active_node->current = get_inode_rec(inode_cache,
1814 BUG_ON(IS_ERR(active_node->current));
1817 case BTRFS_DIR_ITEM_KEY:
1818 case BTRFS_DIR_INDEX_KEY:
1819 ret = process_dir_item(root, eb, i, &key, active_node);
1821 case BTRFS_INODE_REF_KEY:
1822 ret = process_inode_ref(eb, i, &key, active_node);
1824 case BTRFS_INODE_EXTREF_KEY:
1825 ret = process_inode_extref(eb, i, &key, active_node);
1827 case BTRFS_INODE_ITEM_KEY:
1828 ret = process_inode_item(eb, i, &key, active_node);
1830 case BTRFS_EXTENT_DATA_KEY:
1831 ret = process_file_extent(root, eb, i, &key,
1841 static void reada_walk_down(struct btrfs_root *root,
1842 struct extent_buffer *node, int slot)
1851 level = btrfs_header_level(node);
1855 nritems = btrfs_header_nritems(node);
1856 blocksize = root->nodesize;
1857 for (i = slot; i < nritems; i++) {
1858 bytenr = btrfs_node_blockptr(node, i);
1859 ptr_gen = btrfs_node_ptr_generation(node, i);
1860 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1865 * Check the child node/leaf by the following condition:
1866 * 1. the first item key of the node/leaf should be the same with the one
1868 * 2. block in parent node should match the child node/leaf.
1869 * 3. generation of parent node and child's header should be consistent.
1871 * Or the child node/leaf pointed by the key in parent is not valid.
1873 * We hope to check leaf owner too, but since subvol may share leaves,
1874 * which makes leaf owner check not so strong, key check should be
1875 * sufficient enough for that case.
1877 static int check_child_node(struct btrfs_root *root,
1878 struct extent_buffer *parent, int slot,
1879 struct extent_buffer *child)
1881 struct btrfs_key parent_key;
1882 struct btrfs_key child_key;
1885 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886 if (btrfs_header_level(child) == 0)
1887 btrfs_item_key_to_cpu(child, &child_key, 0);
1889 btrfs_node_key_to_cpu(child, &child_key, 0);
1891 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1894 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895 parent_key.objectid, parent_key.type, parent_key.offset,
1896 child_key.objectid, child_key.type, child_key.offset);
1898 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901 btrfs_node_blockptr(parent, slot),
1902 btrfs_header_bytenr(child));
1904 if (btrfs_node_ptr_generation(parent, slot) !=
1905 btrfs_header_generation(child)) {
1907 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908 btrfs_header_generation(child),
1909 btrfs_node_ptr_generation(parent, slot));
1915 u64 bytenr[BTRFS_MAX_LEVEL];
1916 u64 refs[BTRFS_MAX_LEVEL];
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920 struct walk_control *wc, int *level,
1921 struct node_refs *nrefs)
1923 enum btrfs_tree_block_status status;
1926 struct extent_buffer *next;
1927 struct extent_buffer *cur;
1932 WARN_ON(*level < 0);
1933 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936 refs = nrefs->refs[*level];
1939 ret = btrfs_lookup_extent_info(NULL, root,
1940 path->nodes[*level]->start,
1941 *level, 1, &refs, NULL);
1946 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947 nrefs->refs[*level] = refs;
1951 ret = enter_shared_node(root, path->nodes[*level]->start,
1959 while (*level >= 0) {
1960 WARN_ON(*level < 0);
1961 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962 cur = path->nodes[*level];
1964 if (btrfs_header_level(cur) != *level)
1967 if (path->slots[*level] >= btrfs_header_nritems(cur))
1970 ret = process_one_leaf(root, cur, wc);
1975 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977 blocksize = root->nodesize;
1979 if (bytenr == nrefs->bytenr[*level - 1]) {
1980 refs = nrefs->refs[*level - 1];
1982 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983 *level - 1, 1, &refs, NULL);
1987 nrefs->bytenr[*level - 1] = bytenr;
1988 nrefs->refs[*level - 1] = refs;
1993 ret = enter_shared_node(root, bytenr, refs,
1996 path->slots[*level]++;
2001 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003 free_extent_buffer(next);
2004 reada_walk_down(root, cur, path->slots[*level]);
2005 next = read_tree_block(root, bytenr, blocksize,
2007 if (!extent_buffer_uptodate(next)) {
2008 struct btrfs_key node_key;
2010 btrfs_node_key_to_cpu(path->nodes[*level],
2012 path->slots[*level]);
2013 btrfs_add_corrupt_extent_record(root->fs_info,
2015 path->nodes[*level]->start,
2016 root->nodesize, *level);
2022 ret = check_child_node(root, cur, path->slots[*level], next);
2028 if (btrfs_is_leaf(next))
2029 status = btrfs_check_leaf(root, NULL, next);
2031 status = btrfs_check_node(root, NULL, next);
2032 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033 free_extent_buffer(next);
2038 *level = *level - 1;
2039 free_extent_buffer(path->nodes[*level]);
2040 path->nodes[*level] = next;
2041 path->slots[*level] = 0;
2044 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049 struct walk_control *wc, int *level)
2052 struct extent_buffer *leaf;
2054 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055 leaf = path->nodes[i];
2056 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2061 free_extent_buffer(path->nodes[*level]);
2062 path->nodes[*level] = NULL;
2063 BUG_ON(*level > wc->active_node);
2064 if (*level == wc->active_node)
2065 leave_shared_node(root, wc, *level);
2072 static int check_root_dir(struct inode_record *rec)
2074 struct inode_backref *backref;
2077 if (!rec->found_inode_item || rec->errors)
2079 if (rec->nlink != 1 || rec->found_link != 0)
2081 if (list_empty(&rec->backrefs))
2083 backref = to_inode_backref(rec->backrefs.next);
2084 if (!backref->found_inode_ref)
2086 if (backref->index != 0 || backref->namelen != 2 ||
2087 memcmp(backref->name, "..", 2))
2089 if (backref->found_dir_index || backref->found_dir_item)
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097 struct btrfs_root *root, struct btrfs_path *path,
2098 struct inode_record *rec)
2100 struct btrfs_inode_item *ei;
2101 struct btrfs_key key;
2104 key.objectid = rec->ino;
2105 key.type = BTRFS_INODE_ITEM_KEY;
2106 key.offset = (u64)-1;
2108 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2112 if (!path->slots[0]) {
2119 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120 if (key.objectid != rec->ino) {
2125 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126 struct btrfs_inode_item);
2127 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128 btrfs_mark_buffer_dirty(path->nodes[0]);
2129 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131 root->root_key.objectid);
2133 btrfs_release_path(path);
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 struct btrfs_path *path,
2140 struct inode_record *rec)
2144 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145 btrfs_release_path(path);
2147 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152 struct btrfs_root *root,
2153 struct btrfs_path *path,
2154 struct inode_record *rec)
2156 struct btrfs_inode_item *ei;
2157 struct btrfs_key key;
2160 key.objectid = rec->ino;
2161 key.type = BTRFS_INODE_ITEM_KEY;
2164 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2171 /* Since ret == 0, no need to check anything */
2172 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173 struct btrfs_inode_item);
2174 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175 btrfs_mark_buffer_dirty(path->nodes[0]);
2176 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177 printf("reset nbytes for ino %llu root %llu\n",
2178 rec->ino, root->root_key.objectid);
2180 btrfs_release_path(path);
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185 struct cache_tree *inode_cache,
2186 struct inode_record *rec,
2187 struct inode_backref *backref)
2189 struct btrfs_path path;
2190 struct btrfs_trans_handle *trans;
2191 struct btrfs_dir_item *dir_item;
2192 struct extent_buffer *leaf;
2193 struct btrfs_key key;
2194 struct btrfs_disk_key disk_key;
2195 struct inode_record *dir_rec;
2196 unsigned long name_ptr;
2197 u32 data_size = sizeof(*dir_item) + backref->namelen;
2200 trans = btrfs_start_transaction(root, 1);
2202 return PTR_ERR(trans);
2204 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205 (unsigned long long)rec->ino);
2207 btrfs_init_path(&path);
2208 key.objectid = backref->dir;
2209 key.type = BTRFS_DIR_INDEX_KEY;
2210 key.offset = backref->index;
2211 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2214 leaf = path.nodes[0];
2215 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217 disk_key.objectid = cpu_to_le64(rec->ino);
2218 disk_key.type = BTRFS_INODE_ITEM_KEY;
2219 disk_key.offset = 0;
2221 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223 btrfs_set_dir_data_len(leaf, dir_item, 0);
2224 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225 name_ptr = (unsigned long)(dir_item + 1);
2226 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227 btrfs_mark_buffer_dirty(leaf);
2228 btrfs_release_path(&path);
2229 btrfs_commit_transaction(trans, root);
2231 backref->found_dir_index = 1;
2232 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233 BUG_ON(IS_ERR(dir_rec));
2236 dir_rec->found_size += backref->namelen;
2237 if (dir_rec->found_size == dir_rec->isize &&
2238 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240 if (dir_rec->found_size != dir_rec->isize)
2241 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2246 static int delete_dir_index(struct btrfs_root *root,
2247 struct cache_tree *inode_cache,
2248 struct inode_record *rec,
2249 struct inode_backref *backref)
2251 struct btrfs_trans_handle *trans;
2252 struct btrfs_dir_item *di;
2253 struct btrfs_path path;
2256 trans = btrfs_start_transaction(root, 1);
2258 return PTR_ERR(trans);
2260 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261 (unsigned long long)backref->dir,
2262 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263 (unsigned long long)root->objectid);
2265 btrfs_init_path(&path);
2266 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267 backref->name, backref->namelen,
2268 backref->index, -1);
2271 btrfs_release_path(&path);
2272 btrfs_commit_transaction(trans, root);
2279 ret = btrfs_del_item(trans, root, &path);
2281 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283 btrfs_release_path(&path);
2284 btrfs_commit_transaction(trans, root);
2288 static int create_inode_item(struct btrfs_root *root,
2289 struct inode_record *rec,
2290 struct inode_backref *backref, int root_dir)
2292 struct btrfs_trans_handle *trans;
2293 struct btrfs_inode_item inode_item;
2294 time_t now = time(NULL);
2297 trans = btrfs_start_transaction(root, 1);
2298 if (IS_ERR(trans)) {
2299 ret = PTR_ERR(trans);
2303 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304 "be incomplete, please check permissions and content after "
2305 "the fsck completes.\n", (unsigned long long)root->objectid,
2306 (unsigned long long)rec->ino);
2308 memset(&inode_item, 0, sizeof(inode_item));
2309 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315 if (rec->found_dir_item) {
2316 if (rec->found_file_extent)
2317 fprintf(stderr, "root %llu inode %llu has both a dir "
2318 "item and extents, unsure if it is a dir or a "
2319 "regular file so setting it as a directory\n",
2320 (unsigned long long)root->objectid,
2321 (unsigned long long)rec->ino);
2322 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324 } else if (!rec->found_dir_item) {
2325 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339 btrfs_commit_transaction(trans, root);
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344 struct inode_record *rec,
2345 struct cache_tree *inode_cache,
2348 struct inode_backref *tmp, *backref;
2349 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2353 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354 if (!delete && rec->ino == root_dirid) {
2355 if (!rec->found_inode_item) {
2356 ret = create_inode_item(root, rec, backref, 1);
2363 /* Index 0 for root dir's are special, don't mess with it */
2364 if (rec->ino == root_dirid && backref->index == 0)
2368 ((backref->found_dir_index && !backref->found_inode_ref) ||
2369 (backref->found_dir_index && backref->found_inode_ref &&
2370 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371 ret = delete_dir_index(root, inode_cache, rec, backref);
2375 list_del(&backref->list);
2379 if (!delete && !backref->found_dir_index &&
2380 backref->found_dir_item && backref->found_inode_ref) {
2381 ret = add_missing_dir_index(root, inode_cache, rec,
2386 if (backref->found_dir_item &&
2387 backref->found_dir_index &&
2388 backref->found_dir_index) {
2389 if (!backref->errors &&
2390 backref->found_inode_ref) {
2391 list_del(&backref->list);
2397 if (!delete && (!backref->found_dir_index &&
2398 !backref->found_dir_item &&
2399 backref->found_inode_ref)) {
2400 struct btrfs_trans_handle *trans;
2401 struct btrfs_key location;
2403 ret = check_dir_conflict(root, backref->name,
2409 * let nlink fixing routine to handle it,
2410 * which can do it better.
2415 location.objectid = rec->ino;
2416 location.type = BTRFS_INODE_ITEM_KEY;
2417 location.offset = 0;
2419 trans = btrfs_start_transaction(root, 1);
2420 if (IS_ERR(trans)) {
2421 ret = PTR_ERR(trans);
2424 fprintf(stderr, "adding missing dir index/item pair "
2426 (unsigned long long)rec->ino);
2427 ret = btrfs_insert_dir_item(trans, root, backref->name,
2429 backref->dir, &location,
2430 imode_to_type(rec->imode),
2433 btrfs_commit_transaction(trans, root);
2437 if (!delete && (backref->found_inode_ref &&
2438 backref->found_dir_index &&
2439 backref->found_dir_item &&
2440 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441 !rec->found_inode_item)) {
2442 ret = create_inode_item(root, rec, backref, 0);
2449 return ret ? ret : repaired;
2453 * To determine the file type for nlink/inode_item repair
2455 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456 * Return -ENOENT if file type is not found.
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2460 struct inode_backref *backref;
2462 /* For inode item recovered case */
2463 if (rec->found_inode_item) {
2464 *type = imode_to_type(rec->imode);
2468 list_for_each_entry(backref, &rec->backrefs, list) {
2469 if (backref->found_dir_index || backref->found_dir_item) {
2470 *type = backref->filetype;
2478 * To determine the file name for nlink repair
2480 * Return 0 if file name is found, set name and namelen.
2481 * Return -ENOENT if file name is not found.
2483 static int find_file_name(struct inode_record *rec,
2484 char *name, int *namelen)
2486 struct inode_backref *backref;
2488 list_for_each_entry(backref, &rec->backrefs, list) {
2489 if (backref->found_dir_index || backref->found_dir_item ||
2490 backref->found_inode_ref) {
2491 memcpy(name, backref->name, backref->namelen);
2492 *namelen = backref->namelen;
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501 struct btrfs_root *root,
2502 struct btrfs_path *path,
2503 struct inode_record *rec)
2505 struct inode_backref *backref;
2506 struct inode_backref *tmp;
2507 struct btrfs_key key;
2508 struct btrfs_inode_item *inode_item;
2511 /* We don't believe this either, reset it and iterate backref */
2512 rec->found_link = 0;
2514 /* Remove all backref including the valid ones */
2515 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517 backref->index, backref->name,
2518 backref->namelen, 0);
2522 /* remove invalid backref, so it won't be added back */
2523 if (!(backref->found_dir_index &&
2524 backref->found_dir_item &&
2525 backref->found_inode_ref)) {
2526 list_del(&backref->list);
2533 /* Set nlink to 0 */
2534 key.objectid = rec->ino;
2535 key.type = BTRFS_INODE_ITEM_KEY;
2537 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2544 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545 struct btrfs_inode_item);
2546 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547 btrfs_mark_buffer_dirty(path->nodes[0]);
2548 btrfs_release_path(path);
2551 * Add back valid inode_ref/dir_item/dir_index,
2552 * add_link() will handle the nlink inc, so new nlink must be correct
2554 list_for_each_entry(backref, &rec->backrefs, list) {
2555 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556 backref->name, backref->namelen,
2557 backref->filetype, &backref->index, 1);
2562 btrfs_release_path(path);
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567 struct btrfs_root *root,
2568 struct btrfs_path *path,
2569 struct inode_record *rec)
2571 char *dir_name = "lost+found";
2572 char namebuf[BTRFS_NAME_LEN] = {0};
2577 int name_recovered = 0;
2578 int type_recovered = 0;
2582 * Get file name and type first before these invalid inode ref
2583 * are deleted by remove_all_invalid_backref()
2585 name_recovered = !find_file_name(rec, namebuf, &namelen);
2586 type_recovered = !find_file_type(rec, &type);
2588 if (!name_recovered) {
2589 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590 rec->ino, rec->ino);
2591 namelen = count_digits(rec->ino);
2592 sprintf(namebuf, "%llu", rec->ino);
2595 if (!type_recovered) {
2596 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598 type = BTRFS_FT_REG_FILE;
2602 ret = reset_nlink(trans, root, path, rec);
2605 "Failed to reset nlink for inode %llu: %s\n",
2606 rec->ino, strerror(-ret));
2610 if (rec->found_link == 0) {
2611 lost_found_ino = root->highest_inode;
2612 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2617 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2621 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622 dir_name, strerror(-ret));
2625 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626 namebuf, namelen, type, NULL, 1);
2628 * Add ".INO" suffix several times to handle case where
2629 * "FILENAME.INO" is already taken by another file.
2631 while (ret == -EEXIST) {
2633 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635 if (namelen + count_digits(rec->ino) + 1 >
2640 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642 namelen += count_digits(rec->ino) + 1;
2643 ret = btrfs_add_link(trans, root, rec->ino,
2644 lost_found_ino, namebuf,
2645 namelen, type, NULL, 1);
2649 "Failed to link the inode %llu to %s dir: %s\n",
2650 rec->ino, dir_name, strerror(-ret));
2654 * Just increase the found_link, don't actually add the
2655 * backref. This will make things easier and this inode
2656 * record will be freed after the repair is done.
2657 * So fsck will not report problem about this inode.
2660 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661 namelen, namebuf, dir_name);
2663 printf("Fixed the nlink of inode %llu\n", rec->ino);
2666 * Clear the flag anyway, or we will loop forever for the same inode
2667 * as it will not be removed from the bad inode list and the dead loop
2670 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671 btrfs_release_path(path);
2676 * Check if there is any normal(reg or prealloc) file extent for given
2678 * This is used to determine the file type when neither its dir_index/item or
2679 * inode_item exists.
2681 * This will *NOT* report error, if any error happens, just consider it does
2682 * not have any normal file extent.
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 struct btrfs_path *path;
2687 struct btrfs_key key;
2688 struct btrfs_key found_key;
2689 struct btrfs_file_extent_item *fi;
2693 path = btrfs_alloc_path();
2697 key.type = BTRFS_EXTENT_DATA_KEY;
2700 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2705 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2706 ret = btrfs_next_leaf(root, path);
2713 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2715 if (found_key.objectid != ino ||
2716 found_key.type != BTRFS_EXTENT_DATA_KEY)
2718 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2719 struct btrfs_file_extent_item);
2720 type = btrfs_file_extent_type(path->nodes[0], fi);
2721 if (type != BTRFS_FILE_EXTENT_INLINE) {
2727 btrfs_free_path(path);
2731 static u32 btrfs_type_to_imode(u8 type)
2733 static u32 imode_by_btrfs_type[] = {
2734 [BTRFS_FT_REG_FILE] = S_IFREG,
2735 [BTRFS_FT_DIR] = S_IFDIR,
2736 [BTRFS_FT_CHRDEV] = S_IFCHR,
2737 [BTRFS_FT_BLKDEV] = S_IFBLK,
2738 [BTRFS_FT_FIFO] = S_IFIFO,
2739 [BTRFS_FT_SOCK] = S_IFSOCK,
2740 [BTRFS_FT_SYMLINK] = S_IFLNK,
2743 return imode_by_btrfs_type[(type)];
2746 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2747 struct btrfs_root *root,
2748 struct btrfs_path *path,
2749 struct inode_record *rec)
2753 int type_recovered = 0;
2756 printf("Trying to rebuild inode:%llu\n", rec->ino);
2758 type_recovered = !find_file_type(rec, &filetype);
2761 * Try to determine inode type if type not found.
2763 * For found regular file extent, it must be FILE.
2764 * For found dir_item/index, it must be DIR.
2766 * For undetermined one, use FILE as fallback.
2769 * 1. If found backref(inode_index/item is already handled) to it,
2771 * Need new inode-inode ref structure to allow search for that.
2773 if (!type_recovered) {
2774 if (rec->found_file_extent &&
2775 find_normal_file_extent(root, rec->ino)) {
2777 filetype = BTRFS_FT_REG_FILE;
2778 } else if (rec->found_dir_item) {
2780 filetype = BTRFS_FT_DIR;
2781 } else if (!list_empty(&rec->orphan_extents)) {
2783 filetype = BTRFS_FT_REG_FILE;
2785 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2788 filetype = BTRFS_FT_REG_FILE;
2792 ret = btrfs_new_inode(trans, root, rec->ino,
2793 mode | btrfs_type_to_imode(filetype));
2798 * Here inode rebuild is done, we only rebuild the inode item,
2799 * don't repair the nlink(like move to lost+found).
2800 * That is the job of nlink repair.
2802 * We just fill the record and return
2804 rec->found_dir_item = 1;
2805 rec->imode = mode | btrfs_type_to_imode(filetype);
2807 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2808 /* Ensure the inode_nlinks repair function will be called */
2809 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2814 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2815 struct btrfs_root *root,
2816 struct btrfs_path *path,
2817 struct inode_record *rec)
2819 struct orphan_data_extent *orphan;
2820 struct orphan_data_extent *tmp;
2823 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2825 * Check for conflicting file extents
2827 * Here we don't know whether the extents is compressed or not,
2828 * so we can only assume it not compressed nor data offset,
2829 * and use its disk_len as extent length.
2831 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2832 orphan->offset, orphan->disk_len, 0);
2833 btrfs_release_path(path);
2838 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2839 orphan->disk_bytenr, orphan->disk_len);
2840 ret = btrfs_free_extent(trans,
2841 root->fs_info->extent_root,
2842 orphan->disk_bytenr, orphan->disk_len,
2843 0, root->objectid, orphan->objectid,
2848 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2849 orphan->offset, orphan->disk_bytenr,
2850 orphan->disk_len, orphan->disk_len);
2854 /* Update file size info */
2855 rec->found_size += orphan->disk_len;
2856 if (rec->found_size == rec->nbytes)
2857 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2859 /* Update the file extent hole info too */
2860 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2864 if (RB_EMPTY_ROOT(&rec->holes))
2865 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2867 list_del(&orphan->list);
2870 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2875 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2876 struct btrfs_root *root,
2877 struct btrfs_path *path,
2878 struct inode_record *rec)
2880 struct rb_node *node;
2881 struct file_extent_hole *hole;
2885 node = rb_first(&rec->holes);
2889 hole = rb_entry(node, struct file_extent_hole, node);
2890 ret = btrfs_punch_hole(trans, root, rec->ino,
2891 hole->start, hole->len);
2894 ret = del_file_extent_hole(&rec->holes, hole->start,
2898 if (RB_EMPTY_ROOT(&rec->holes))
2899 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2900 node = rb_first(&rec->holes);
2902 /* special case for a file losing all its file extent */
2904 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2905 round_up(rec->isize, root->sectorsize));
2909 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2910 rec->ino, root->objectid);
2915 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2917 struct btrfs_trans_handle *trans;
2918 struct btrfs_path *path;
2921 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2922 I_ERR_NO_ORPHAN_ITEM |
2923 I_ERR_LINK_COUNT_WRONG |
2924 I_ERR_NO_INODE_ITEM |
2925 I_ERR_FILE_EXTENT_ORPHAN |
2926 I_ERR_FILE_EXTENT_DISCOUNT|
2927 I_ERR_FILE_NBYTES_WRONG)))
2930 path = btrfs_alloc_path();
2935 * For nlink repair, it may create a dir and add link, so
2936 * 2 for parent(256)'s dir_index and dir_item
2937 * 2 for lost+found dir's inode_item and inode_ref
2938 * 1 for the new inode_ref of the file
2939 * 2 for lost+found dir's dir_index and dir_item for the file
2941 trans = btrfs_start_transaction(root, 7);
2942 if (IS_ERR(trans)) {
2943 btrfs_free_path(path);
2944 return PTR_ERR(trans);
2947 if (rec->errors & I_ERR_NO_INODE_ITEM)
2948 ret = repair_inode_no_item(trans, root, path, rec);
2949 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2950 ret = repair_inode_orphan_extent(trans, root, path, rec);
2951 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2952 ret = repair_inode_discount_extent(trans, root, path, rec);
2953 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2954 ret = repair_inode_isize(trans, root, path, rec);
2955 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2956 ret = repair_inode_orphan_item(trans, root, path, rec);
2957 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2958 ret = repair_inode_nlinks(trans, root, path, rec);
2959 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2960 ret = repair_inode_nbytes(trans, root, path, rec);
2961 btrfs_commit_transaction(trans, root);
2962 btrfs_free_path(path);
2966 static int check_inode_recs(struct btrfs_root *root,
2967 struct cache_tree *inode_cache)
2969 struct cache_extent *cache;
2970 struct ptr_node *node;
2971 struct inode_record *rec;
2972 struct inode_backref *backref;
2977 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2979 if (btrfs_root_refs(&root->root_item) == 0) {
2980 if (!cache_tree_empty(inode_cache))
2981 fprintf(stderr, "warning line %d\n", __LINE__);
2986 * We need to record the highest inode number for later 'lost+found'
2988 * We must select an ino not used/referred by any existing inode, or
2989 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2990 * this may cause 'lost+found' dir has wrong nlinks.
2992 cache = last_cache_extent(inode_cache);
2994 node = container_of(cache, struct ptr_node, cache);
2996 if (rec->ino > root->highest_inode)
2997 root->highest_inode = rec->ino;
3001 * We need to repair backrefs first because we could change some of the
3002 * errors in the inode recs.
3004 * We also need to go through and delete invalid backrefs first and then
3005 * add the correct ones second. We do this because we may get EEXIST
3006 * when adding back the correct index because we hadn't yet deleted the
3009 * For example, if we were missing a dir index then the directories
3010 * isize would be wrong, so if we fixed the isize to what we thought it
3011 * would be and then fixed the backref we'd still have a invalid fs, so
3012 * we need to add back the dir index and then check to see if the isize
3017 if (stage == 3 && !err)
3020 cache = search_cache_extent(inode_cache, 0);
3021 while (repair && cache) {
3022 node = container_of(cache, struct ptr_node, cache);
3024 cache = next_cache_extent(cache);
3026 /* Need to free everything up and rescan */
3028 remove_cache_extent(inode_cache, &node->cache);
3030 free_inode_rec(rec);
3034 if (list_empty(&rec->backrefs))
3037 ret = repair_inode_backrefs(root, rec, inode_cache,
3051 rec = get_inode_rec(inode_cache, root_dirid, 0);
3052 BUG_ON(IS_ERR(rec));
3054 ret = check_root_dir(rec);
3056 fprintf(stderr, "root %llu root dir %llu error\n",
3057 (unsigned long long)root->root_key.objectid,
3058 (unsigned long long)root_dirid);
3059 print_inode_error(root, rec);
3064 struct btrfs_trans_handle *trans;
3066 trans = btrfs_start_transaction(root, 1);
3067 if (IS_ERR(trans)) {
3068 err = PTR_ERR(trans);
3073 "root %llu missing its root dir, recreating\n",
3074 (unsigned long long)root->objectid);
3076 ret = btrfs_make_root_dir(trans, root, root_dirid);
3079 btrfs_commit_transaction(trans, root);
3083 fprintf(stderr, "root %llu root dir %llu not found\n",
3084 (unsigned long long)root->root_key.objectid,
3085 (unsigned long long)root_dirid);
3089 cache = search_cache_extent(inode_cache, 0);
3092 node = container_of(cache, struct ptr_node, cache);
3094 remove_cache_extent(inode_cache, &node->cache);
3096 if (rec->ino == root_dirid ||
3097 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3098 free_inode_rec(rec);
3102 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3103 ret = check_orphan_item(root, rec->ino);
3105 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3106 if (can_free_inode_rec(rec)) {
3107 free_inode_rec(rec);
3112 if (!rec->found_inode_item)
3113 rec->errors |= I_ERR_NO_INODE_ITEM;
3114 if (rec->found_link != rec->nlink)
3115 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3117 ret = try_repair_inode(root, rec);
3118 if (ret == 0 && can_free_inode_rec(rec)) {
3119 free_inode_rec(rec);
3125 if (!(repair && ret == 0))
3127 print_inode_error(root, rec);
3128 list_for_each_entry(backref, &rec->backrefs, list) {
3129 if (!backref->found_dir_item)
3130 backref->errors |= REF_ERR_NO_DIR_ITEM;
3131 if (!backref->found_dir_index)
3132 backref->errors |= REF_ERR_NO_DIR_INDEX;
3133 if (!backref->found_inode_ref)
3134 backref->errors |= REF_ERR_NO_INODE_REF;
3135 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3136 " namelen %u name %s filetype %d errors %x",
3137 (unsigned long long)backref->dir,
3138 (unsigned long long)backref->index,
3139 backref->namelen, backref->name,
3140 backref->filetype, backref->errors);
3141 print_ref_error(backref->errors);
3143 free_inode_rec(rec);
3145 return (error > 0) ? -1 : 0;
3148 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3151 struct cache_extent *cache;
3152 struct root_record *rec = NULL;
3155 cache = lookup_cache_extent(root_cache, objectid, 1);
3157 rec = container_of(cache, struct root_record, cache);
3159 rec = calloc(1, sizeof(*rec));
3161 return ERR_PTR(-ENOMEM);
3162 rec->objectid = objectid;
3163 INIT_LIST_HEAD(&rec->backrefs);
3164 rec->cache.start = objectid;
3165 rec->cache.size = 1;
3167 ret = insert_cache_extent(root_cache, &rec->cache);
3169 return ERR_PTR(-EEXIST);
3174 static struct root_backref *get_root_backref(struct root_record *rec,
3175 u64 ref_root, u64 dir, u64 index,
3176 const char *name, int namelen)
3178 struct root_backref *backref;
3180 list_for_each_entry(backref, &rec->backrefs, list) {
3181 if (backref->ref_root != ref_root || backref->dir != dir ||
3182 backref->namelen != namelen)
3184 if (memcmp(name, backref->name, namelen))
3189 backref = calloc(1, sizeof(*backref) + namelen + 1);
3192 backref->ref_root = ref_root;
3194 backref->index = index;
3195 backref->namelen = namelen;
3196 memcpy(backref->name, name, namelen);
3197 backref->name[namelen] = '\0';
3198 list_add_tail(&backref->list, &rec->backrefs);
3202 static void free_root_record(struct cache_extent *cache)
3204 struct root_record *rec;
3205 struct root_backref *backref;
3207 rec = container_of(cache, struct root_record, cache);
3208 while (!list_empty(&rec->backrefs)) {
3209 backref = to_root_backref(rec->backrefs.next);
3210 list_del(&backref->list);
3217 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3219 static int add_root_backref(struct cache_tree *root_cache,
3220 u64 root_id, u64 ref_root, u64 dir, u64 index,
3221 const char *name, int namelen,
3222 int item_type, int errors)
3224 struct root_record *rec;
3225 struct root_backref *backref;
3227 rec = get_root_rec(root_cache, root_id);
3228 BUG_ON(IS_ERR(rec));
3229 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3232 backref->errors |= errors;
3234 if (item_type != BTRFS_DIR_ITEM_KEY) {
3235 if (backref->found_dir_index || backref->found_back_ref ||
3236 backref->found_forward_ref) {
3237 if (backref->index != index)
3238 backref->errors |= REF_ERR_INDEX_UNMATCH;
3240 backref->index = index;
3244 if (item_type == BTRFS_DIR_ITEM_KEY) {
3245 if (backref->found_forward_ref)
3247 backref->found_dir_item = 1;
3248 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3249 backref->found_dir_index = 1;
3250 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3251 if (backref->found_forward_ref)
3252 backref->errors |= REF_ERR_DUP_ROOT_REF;
3253 else if (backref->found_dir_item)
3255 backref->found_forward_ref = 1;
3256 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3257 if (backref->found_back_ref)
3258 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3259 backref->found_back_ref = 1;
3264 if (backref->found_forward_ref && backref->found_dir_item)
3265 backref->reachable = 1;
3269 static int merge_root_recs(struct btrfs_root *root,
3270 struct cache_tree *src_cache,
3271 struct cache_tree *dst_cache)
3273 struct cache_extent *cache;
3274 struct ptr_node *node;
3275 struct inode_record *rec;
3276 struct inode_backref *backref;
3279 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3280 free_inode_recs_tree(src_cache);
3285 cache = search_cache_extent(src_cache, 0);
3288 node = container_of(cache, struct ptr_node, cache);
3290 remove_cache_extent(src_cache, &node->cache);
3293 ret = is_child_root(root, root->objectid, rec->ino);
3299 list_for_each_entry(backref, &rec->backrefs, list) {
3300 BUG_ON(backref->found_inode_ref);
3301 if (backref->found_dir_item)
3302 add_root_backref(dst_cache, rec->ino,
3303 root->root_key.objectid, backref->dir,
3304 backref->index, backref->name,
3305 backref->namelen, BTRFS_DIR_ITEM_KEY,
3307 if (backref->found_dir_index)
3308 add_root_backref(dst_cache, rec->ino,
3309 root->root_key.objectid, backref->dir,
3310 backref->index, backref->name,
3311 backref->namelen, BTRFS_DIR_INDEX_KEY,
3315 free_inode_rec(rec);
3322 static int check_root_refs(struct btrfs_root *root,
3323 struct cache_tree *root_cache)
3325 struct root_record *rec;
3326 struct root_record *ref_root;
3327 struct root_backref *backref;
3328 struct cache_extent *cache;
3334 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3335 BUG_ON(IS_ERR(rec));
3338 /* fixme: this can not detect circular references */
3341 cache = search_cache_extent(root_cache, 0);
3345 rec = container_of(cache, struct root_record, cache);
3346 cache = next_cache_extent(cache);
3348 if (rec->found_ref == 0)
3351 list_for_each_entry(backref, &rec->backrefs, list) {
3352 if (!backref->reachable)
3355 ref_root = get_root_rec(root_cache,
3357 BUG_ON(IS_ERR(ref_root));
3358 if (ref_root->found_ref > 0)
3361 backref->reachable = 0;
3363 if (rec->found_ref == 0)
3369 cache = search_cache_extent(root_cache, 0);
3373 rec = container_of(cache, struct root_record, cache);
3374 cache = next_cache_extent(cache);
3376 if (rec->found_ref == 0 &&
3377 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3378 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3379 ret = check_orphan_item(root->fs_info->tree_root,
3385 * If we don't have a root item then we likely just have
3386 * a dir item in a snapshot for this root but no actual
3387 * ref key or anything so it's meaningless.
3389 if (!rec->found_root_item)
3392 fprintf(stderr, "fs tree %llu not referenced\n",
3393 (unsigned long long)rec->objectid);
3397 if (rec->found_ref > 0 && !rec->found_root_item)
3399 list_for_each_entry(backref, &rec->backrefs, list) {
3400 if (!backref->found_dir_item)
3401 backref->errors |= REF_ERR_NO_DIR_ITEM;
3402 if (!backref->found_dir_index)
3403 backref->errors |= REF_ERR_NO_DIR_INDEX;
3404 if (!backref->found_back_ref)
3405 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3406 if (!backref->found_forward_ref)
3407 backref->errors |= REF_ERR_NO_ROOT_REF;
3408 if (backref->reachable && backref->errors)
3415 fprintf(stderr, "fs tree %llu refs %u %s\n",
3416 (unsigned long long)rec->objectid, rec->found_ref,
3417 rec->found_root_item ? "" : "not found");
3419 list_for_each_entry(backref, &rec->backrefs, list) {
3420 if (!backref->reachable)
3422 if (!backref->errors && rec->found_root_item)
3424 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3425 " index %llu namelen %u name %s errors %x\n",
3426 (unsigned long long)backref->ref_root,
3427 (unsigned long long)backref->dir,
3428 (unsigned long long)backref->index,
3429 backref->namelen, backref->name,
3431 print_ref_error(backref->errors);
3434 return errors > 0 ? 1 : 0;
3437 static int process_root_ref(struct extent_buffer *eb, int slot,
3438 struct btrfs_key *key,
3439 struct cache_tree *root_cache)
3445 struct btrfs_root_ref *ref;
3446 char namebuf[BTRFS_NAME_LEN];
3449 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3451 dirid = btrfs_root_ref_dirid(eb, ref);
3452 index = btrfs_root_ref_sequence(eb, ref);
3453 name_len = btrfs_root_ref_name_len(eb, ref);
3455 if (name_len <= BTRFS_NAME_LEN) {
3459 len = BTRFS_NAME_LEN;
3460 error = REF_ERR_NAME_TOO_LONG;
3462 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3464 if (key->type == BTRFS_ROOT_REF_KEY) {
3465 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3466 index, namebuf, len, key->type, error);
3468 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3469 index, namebuf, len, key->type, error);
3474 static void free_corrupt_block(struct cache_extent *cache)
3476 struct btrfs_corrupt_block *corrupt;
3478 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3482 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3485 * Repair the btree of the given root.
3487 * The fix is to remove the node key in corrupt_blocks cache_tree.
3488 * and rebalance the tree.
3489 * After the fix, the btree should be writeable.
3491 static int repair_btree(struct btrfs_root *root,
3492 struct cache_tree *corrupt_blocks)
3494 struct btrfs_trans_handle *trans;
3495 struct btrfs_path *path;
3496 struct btrfs_corrupt_block *corrupt;
3497 struct cache_extent *cache;
3498 struct btrfs_key key;
3503 if (cache_tree_empty(corrupt_blocks))
3506 path = btrfs_alloc_path();
3510 trans = btrfs_start_transaction(root, 1);
3511 if (IS_ERR(trans)) {
3512 ret = PTR_ERR(trans);
3513 fprintf(stderr, "Error starting transaction: %s\n",
3517 cache = first_cache_extent(corrupt_blocks);
3519 corrupt = container_of(cache, struct btrfs_corrupt_block,
3521 level = corrupt->level;
3522 path->lowest_level = level;
3523 key.objectid = corrupt->key.objectid;
3524 key.type = corrupt->key.type;
3525 key.offset = corrupt->key.offset;
3528 * Here we don't want to do any tree balance, since it may
3529 * cause a balance with corrupted brother leaf/node,
3530 * so ins_len set to 0 here.
3531 * Balance will be done after all corrupt node/leaf is deleted.
3533 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3536 offset = btrfs_node_blockptr(path->nodes[level],
3537 path->slots[level]);
3539 /* Remove the ptr */
3540 ret = btrfs_del_ptr(trans, root, path, level,
3541 path->slots[level]);
3545 * Remove the corresponding extent
3546 * return value is not concerned.
3548 btrfs_release_path(path);
3549 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3550 0, root->root_key.objectid,
3552 cache = next_cache_extent(cache);
3555 /* Balance the btree using btrfs_search_slot() */
3556 cache = first_cache_extent(corrupt_blocks);
3558 corrupt = container_of(cache, struct btrfs_corrupt_block,
3560 memcpy(&key, &corrupt->key, sizeof(key));
3561 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3564 /* return will always >0 since it won't find the item */
3566 btrfs_release_path(path);
3567 cache = next_cache_extent(cache);
3570 btrfs_commit_transaction(trans, root);
3572 btrfs_free_path(path);
3576 static int check_fs_root(struct btrfs_root *root,
3577 struct cache_tree *root_cache,
3578 struct walk_control *wc)
3584 struct btrfs_path path;
3585 struct shared_node root_node;
3586 struct root_record *rec;
3587 struct btrfs_root_item *root_item = &root->root_item;
3588 struct cache_tree corrupt_blocks;
3589 struct orphan_data_extent *orphan;
3590 struct orphan_data_extent *tmp;
3591 enum btrfs_tree_block_status status;
3592 struct node_refs nrefs;
3595 * Reuse the corrupt_block cache tree to record corrupted tree block
3597 * Unlike the usage in extent tree check, here we do it in a per
3598 * fs/subvol tree base.
3600 cache_tree_init(&corrupt_blocks);
3601 root->fs_info->corrupt_blocks = &corrupt_blocks;
3603 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3604 rec = get_root_rec(root_cache, root->root_key.objectid);
3605 BUG_ON(IS_ERR(rec));
3606 if (btrfs_root_refs(root_item) > 0)
3607 rec->found_root_item = 1;
3610 btrfs_init_path(&path);
3611 memset(&root_node, 0, sizeof(root_node));
3612 cache_tree_init(&root_node.root_cache);
3613 cache_tree_init(&root_node.inode_cache);
3614 memset(&nrefs, 0, sizeof(nrefs));
3616 /* Move the orphan extent record to corresponding inode_record */
3617 list_for_each_entry_safe(orphan, tmp,
3618 &root->orphan_data_extents, list) {
3619 struct inode_record *inode;
3621 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3623 BUG_ON(IS_ERR(inode));
3624 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3625 list_move(&orphan->list, &inode->orphan_extents);
3628 level = btrfs_header_level(root->node);
3629 memset(wc->nodes, 0, sizeof(wc->nodes));
3630 wc->nodes[level] = &root_node;
3631 wc->active_node = level;
3632 wc->root_level = level;
3634 /* We may not have checked the root block, lets do that now */
3635 if (btrfs_is_leaf(root->node))
3636 status = btrfs_check_leaf(root, NULL, root->node);
3638 status = btrfs_check_node(root, NULL, root->node);
3639 if (status != BTRFS_TREE_BLOCK_CLEAN)
3642 if (btrfs_root_refs(root_item) > 0 ||
3643 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3644 path.nodes[level] = root->node;
3645 extent_buffer_get(root->node);
3646 path.slots[level] = 0;
3648 struct btrfs_key key;
3649 struct btrfs_disk_key found_key;
3651 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3652 level = root_item->drop_level;
3653 path.lowest_level = level;
3654 if (level > btrfs_header_level(root->node) ||
3655 level >= BTRFS_MAX_LEVEL) {
3656 error("ignoring invalid drop level: %u", level);
3659 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3662 btrfs_node_key(path.nodes[level], &found_key,
3664 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3665 sizeof(found_key)));
3669 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3675 wret = walk_up_tree(root, &path, wc, &level);
3682 btrfs_release_path(&path);
3684 if (!cache_tree_empty(&corrupt_blocks)) {
3685 struct cache_extent *cache;
3686 struct btrfs_corrupt_block *corrupt;
3688 printf("The following tree block(s) is corrupted in tree %llu:\n",
3689 root->root_key.objectid);
3690 cache = first_cache_extent(&corrupt_blocks);
3692 corrupt = container_of(cache,
3693 struct btrfs_corrupt_block,
3695 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3696 cache->start, corrupt->level,
3697 corrupt->key.objectid, corrupt->key.type,
3698 corrupt->key.offset);
3699 cache = next_cache_extent(cache);
3702 printf("Try to repair the btree for root %llu\n",
3703 root->root_key.objectid);
3704 ret = repair_btree(root, &corrupt_blocks);
3706 fprintf(stderr, "Failed to repair btree: %s\n",
3709 printf("Btree for root %llu is fixed\n",
3710 root->root_key.objectid);
3714 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3718 if (root_node.current) {
3719 root_node.current->checked = 1;
3720 maybe_free_inode_rec(&root_node.inode_cache,
3724 err = check_inode_recs(root, &root_node.inode_cache);
3728 free_corrupt_blocks_tree(&corrupt_blocks);
3729 root->fs_info->corrupt_blocks = NULL;
3730 free_orphan_data_extents(&root->orphan_data_extents);
3734 static int fs_root_objectid(u64 objectid)
3736 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3737 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3739 return is_fstree(objectid);
3742 static int check_fs_roots(struct btrfs_root *root,
3743 struct cache_tree *root_cache)
3745 struct btrfs_path path;
3746 struct btrfs_key key;
3747 struct walk_control wc;
3748 struct extent_buffer *leaf, *tree_node;
3749 struct btrfs_root *tmp_root;
3750 struct btrfs_root *tree_root = root->fs_info->tree_root;
3754 if (ctx.progress_enabled) {
3755 ctx.tp = TASK_FS_ROOTS;
3756 task_start(ctx.info);
3760 * Just in case we made any changes to the extent tree that weren't
3761 * reflected into the free space cache yet.
3764 reset_cached_block_groups(root->fs_info);
3765 memset(&wc, 0, sizeof(wc));
3766 cache_tree_init(&wc.shared);
3767 btrfs_init_path(&path);
3772 key.type = BTRFS_ROOT_ITEM_KEY;
3773 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3778 tree_node = tree_root->node;
3780 if (tree_node != tree_root->node) {
3781 free_root_recs_tree(root_cache);
3782 btrfs_release_path(&path);
3785 leaf = path.nodes[0];
3786 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3787 ret = btrfs_next_leaf(tree_root, &path);
3793 leaf = path.nodes[0];
3795 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3796 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3797 fs_root_objectid(key.objectid)) {
3798 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3799 tmp_root = btrfs_read_fs_root_no_cache(
3800 root->fs_info, &key);
3802 key.offset = (u64)-1;
3803 tmp_root = btrfs_read_fs_root(
3804 root->fs_info, &key);
3806 if (IS_ERR(tmp_root)) {
3810 ret = check_fs_root(tmp_root, root_cache, &wc);
3811 if (ret == -EAGAIN) {
3812 free_root_recs_tree(root_cache);
3813 btrfs_release_path(&path);
3818 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3819 btrfs_free_fs_root(tmp_root);
3820 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3821 key.type == BTRFS_ROOT_BACKREF_KEY) {
3822 process_root_ref(leaf, path.slots[0], &key,
3829 btrfs_release_path(&path);
3831 free_extent_cache_tree(&wc.shared);
3832 if (!cache_tree_empty(&wc.shared))
3833 fprintf(stderr, "warning line %d\n", __LINE__);
3835 task_stop(ctx.info);
3840 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3842 struct list_head *cur = rec->backrefs.next;
3843 struct extent_backref *back;
3844 struct tree_backref *tback;
3845 struct data_backref *dback;
3849 while(cur != &rec->backrefs) {
3850 back = to_extent_backref(cur);
3852 if (!back->found_extent_tree) {
3856 if (back->is_data) {
3857 dback = to_data_backref(back);
3858 fprintf(stderr, "Backref %llu %s %llu"
3859 " owner %llu offset %llu num_refs %lu"
3860 " not found in extent tree\n",
3861 (unsigned long long)rec->start,
3862 back->full_backref ?
3864 back->full_backref ?
3865 (unsigned long long)dback->parent:
3866 (unsigned long long)dback->root,
3867 (unsigned long long)dback->owner,
3868 (unsigned long long)dback->offset,
3869 (unsigned long)dback->num_refs);
3871 tback = to_tree_backref(back);
3872 fprintf(stderr, "Backref %llu parent %llu"
3873 " root %llu not found in extent tree\n",
3874 (unsigned long long)rec->start,
3875 (unsigned long long)tback->parent,
3876 (unsigned long long)tback->root);
3879 if (!back->is_data && !back->found_ref) {
3883 tback = to_tree_backref(back);
3884 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3885 (unsigned long long)rec->start,
3886 back->full_backref ? "parent" : "root",
3887 back->full_backref ?
3888 (unsigned long long)tback->parent :
3889 (unsigned long long)tback->root, back);
3891 if (back->is_data) {
3892 dback = to_data_backref(back);
3893 if (dback->found_ref != dback->num_refs) {
3897 fprintf(stderr, "Incorrect local backref count"
3898 " on %llu %s %llu owner %llu"
3899 " offset %llu found %u wanted %u back %p\n",
3900 (unsigned long long)rec->start,
3901 back->full_backref ?
3903 back->full_backref ?
3904 (unsigned long long)dback->parent:
3905 (unsigned long long)dback->root,
3906 (unsigned long long)dback->owner,
3907 (unsigned long long)dback->offset,
3908 dback->found_ref, dback->num_refs, back);
3910 if (dback->disk_bytenr != rec->start) {
3914 fprintf(stderr, "Backref disk bytenr does not"
3915 " match extent record, bytenr=%llu, "
3916 "ref bytenr=%llu\n",
3917 (unsigned long long)rec->start,
3918 (unsigned long long)dback->disk_bytenr);
3921 if (dback->bytes != rec->nr) {
3925 fprintf(stderr, "Backref bytes do not match "
3926 "extent backref, bytenr=%llu, ref "
3927 "bytes=%llu, backref bytes=%llu\n",
3928 (unsigned long long)rec->start,
3929 (unsigned long long)rec->nr,
3930 (unsigned long long)dback->bytes);
3933 if (!back->is_data) {
3936 dback = to_data_backref(back);
3937 found += dback->found_ref;
3940 if (found != rec->refs) {
3944 fprintf(stderr, "Incorrect global backref count "
3945 "on %llu found %llu wanted %llu\n",
3946 (unsigned long long)rec->start,
3947 (unsigned long long)found,
3948 (unsigned long long)rec->refs);
3954 static int free_all_extent_backrefs(struct extent_record *rec)
3956 struct extent_backref *back;
3957 struct list_head *cur;
3958 while (!list_empty(&rec->backrefs)) {
3959 cur = rec->backrefs.next;
3960 back = to_extent_backref(cur);
3967 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3968 struct cache_tree *extent_cache)
3970 struct cache_extent *cache;
3971 struct extent_record *rec;
3974 cache = first_cache_extent(extent_cache);
3977 rec = container_of(cache, struct extent_record, cache);
3978 remove_cache_extent(extent_cache, cache);
3979 free_all_extent_backrefs(rec);
3984 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3985 struct extent_record *rec)
3987 if (rec->content_checked && rec->owner_ref_checked &&
3988 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3989 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3990 !rec->bad_full_backref && !rec->crossing_stripes &&
3991 !rec->wrong_chunk_type) {
3992 remove_cache_extent(extent_cache, &rec->cache);
3993 free_all_extent_backrefs(rec);
3994 list_del_init(&rec->list);
4000 static int check_owner_ref(struct btrfs_root *root,
4001 struct extent_record *rec,
4002 struct extent_buffer *buf)
4004 struct extent_backref *node;
4005 struct tree_backref *back;
4006 struct btrfs_root *ref_root;
4007 struct btrfs_key key;
4008 struct btrfs_path path;
4009 struct extent_buffer *parent;
4014 list_for_each_entry(node, &rec->backrefs, list) {
4017 if (!node->found_ref)
4019 if (node->full_backref)
4021 back = to_tree_backref(node);
4022 if (btrfs_header_owner(buf) == back->root)
4025 BUG_ON(rec->is_root);
4027 /* try to find the block by search corresponding fs tree */
4028 key.objectid = btrfs_header_owner(buf);
4029 key.type = BTRFS_ROOT_ITEM_KEY;
4030 key.offset = (u64)-1;
4032 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4033 if (IS_ERR(ref_root))
4036 level = btrfs_header_level(buf);
4038 btrfs_item_key_to_cpu(buf, &key, 0);
4040 btrfs_node_key_to_cpu(buf, &key, 0);
4042 btrfs_init_path(&path);
4043 path.lowest_level = level + 1;
4044 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4048 parent = path.nodes[level + 1];
4049 if (parent && buf->start == btrfs_node_blockptr(parent,
4050 path.slots[level + 1]))
4053 btrfs_release_path(&path);
4054 return found ? 0 : 1;
4057 static int is_extent_tree_record(struct extent_record *rec)
4059 struct list_head *cur = rec->backrefs.next;
4060 struct extent_backref *node;
4061 struct tree_backref *back;
4064 while(cur != &rec->backrefs) {
4065 node = to_extent_backref(cur);
4069 back = to_tree_backref(node);
4070 if (node->full_backref)
4072 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4079 static int record_bad_block_io(struct btrfs_fs_info *info,
4080 struct cache_tree *extent_cache,
4083 struct extent_record *rec;
4084 struct cache_extent *cache;
4085 struct btrfs_key key;
4087 cache = lookup_cache_extent(extent_cache, start, len);
4091 rec = container_of(cache, struct extent_record, cache);
4092 if (!is_extent_tree_record(rec))
4095 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4096 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4099 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4100 struct extent_buffer *buf, int slot)
4102 if (btrfs_header_level(buf)) {
4103 struct btrfs_key_ptr ptr1, ptr2;
4105 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4106 sizeof(struct btrfs_key_ptr));
4107 read_extent_buffer(buf, &ptr2,
4108 btrfs_node_key_ptr_offset(slot + 1),
4109 sizeof(struct btrfs_key_ptr));
4110 write_extent_buffer(buf, &ptr1,
4111 btrfs_node_key_ptr_offset(slot + 1),
4112 sizeof(struct btrfs_key_ptr));
4113 write_extent_buffer(buf, &ptr2,
4114 btrfs_node_key_ptr_offset(slot),
4115 sizeof(struct btrfs_key_ptr));
4117 struct btrfs_disk_key key;
4118 btrfs_node_key(buf, &key, 0);
4119 btrfs_fixup_low_keys(root, path, &key,
4120 btrfs_header_level(buf) + 1);
4123 struct btrfs_item *item1, *item2;
4124 struct btrfs_key k1, k2;
4125 char *item1_data, *item2_data;
4126 u32 item1_offset, item2_offset, item1_size, item2_size;
4128 item1 = btrfs_item_nr(slot);
4129 item2 = btrfs_item_nr(slot + 1);
4130 btrfs_item_key_to_cpu(buf, &k1, slot);
4131 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4132 item1_offset = btrfs_item_offset(buf, item1);
4133 item2_offset = btrfs_item_offset(buf, item2);
4134 item1_size = btrfs_item_size(buf, item1);
4135 item2_size = btrfs_item_size(buf, item2);
4137 item1_data = malloc(item1_size);
4140 item2_data = malloc(item2_size);
4146 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4147 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4149 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4150 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4154 btrfs_set_item_offset(buf, item1, item2_offset);
4155 btrfs_set_item_offset(buf, item2, item1_offset);
4156 btrfs_set_item_size(buf, item1, item2_size);
4157 btrfs_set_item_size(buf, item2, item1_size);
4159 path->slots[0] = slot;
4160 btrfs_set_item_key_unsafe(root, path, &k2);
4161 path->slots[0] = slot + 1;
4162 btrfs_set_item_key_unsafe(root, path, &k1);
4167 static int fix_key_order(struct btrfs_trans_handle *trans,
4168 struct btrfs_root *root,
4169 struct btrfs_path *path)
4171 struct extent_buffer *buf;
4172 struct btrfs_key k1, k2;
4174 int level = path->lowest_level;
4177 buf = path->nodes[level];
4178 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4180 btrfs_node_key_to_cpu(buf, &k1, i);
4181 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4183 btrfs_item_key_to_cpu(buf, &k1, i);
4184 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4186 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4188 ret = swap_values(root, path, buf, i);
4191 btrfs_mark_buffer_dirty(buf);
4197 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4198 struct btrfs_root *root,
4199 struct btrfs_path *path,
4200 struct extent_buffer *buf, int slot)
4202 struct btrfs_key key;
4203 int nritems = btrfs_header_nritems(buf);
4205 btrfs_item_key_to_cpu(buf, &key, slot);
4207 /* These are all the keys we can deal with missing. */
4208 if (key.type != BTRFS_DIR_INDEX_KEY &&
4209 key.type != BTRFS_EXTENT_ITEM_KEY &&
4210 key.type != BTRFS_METADATA_ITEM_KEY &&
4211 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4212 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4215 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4216 (unsigned long long)key.objectid, key.type,
4217 (unsigned long long)key.offset, slot, buf->start);
4218 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4219 btrfs_item_nr_offset(slot + 1),
4220 sizeof(struct btrfs_item) *
4221 (nritems - slot - 1));
4222 btrfs_set_header_nritems(buf, nritems - 1);
4224 struct btrfs_disk_key disk_key;
4226 btrfs_item_key(buf, &disk_key, 0);
4227 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4229 btrfs_mark_buffer_dirty(buf);
4233 static int fix_item_offset(struct btrfs_trans_handle *trans,
4234 struct btrfs_root *root,
4235 struct btrfs_path *path)
4237 struct extent_buffer *buf;
4241 /* We should only get this for leaves */
4242 BUG_ON(path->lowest_level);
4243 buf = path->nodes[0];
4245 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4246 unsigned int shift = 0, offset;
4248 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4249 BTRFS_LEAF_DATA_SIZE(root)) {
4250 if (btrfs_item_end_nr(buf, i) >
4251 BTRFS_LEAF_DATA_SIZE(root)) {
4252 ret = delete_bogus_item(trans, root, path,
4256 fprintf(stderr, "item is off the end of the "
4257 "leaf, can't fix\n");
4261 shift = BTRFS_LEAF_DATA_SIZE(root) -
4262 btrfs_item_end_nr(buf, i);
4263 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4264 btrfs_item_offset_nr(buf, i - 1)) {
4265 if (btrfs_item_end_nr(buf, i) >
4266 btrfs_item_offset_nr(buf, i - 1)) {
4267 ret = delete_bogus_item(trans, root, path,
4271 fprintf(stderr, "items overlap, can't fix\n");
4275 shift = btrfs_item_offset_nr(buf, i - 1) -
4276 btrfs_item_end_nr(buf, i);
4281 printf("Shifting item nr %d by %u bytes in block %llu\n",
4282 i, shift, (unsigned long long)buf->start);
4283 offset = btrfs_item_offset_nr(buf, i);
4284 memmove_extent_buffer(buf,
4285 btrfs_leaf_data(buf) + offset + shift,
4286 btrfs_leaf_data(buf) + offset,
4287 btrfs_item_size_nr(buf, i));
4288 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4290 btrfs_mark_buffer_dirty(buf);
4294 * We may have moved things, in which case we want to exit so we don't
4295 * write those changes out. Once we have proper abort functionality in
4296 * progs this can be changed to something nicer.
4303 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4304 * then just return -EIO.
4306 static int try_to_fix_bad_block(struct btrfs_root *root,
4307 struct extent_buffer *buf,
4308 enum btrfs_tree_block_status status)
4310 struct btrfs_trans_handle *trans;
4311 struct ulist *roots;
4312 struct ulist_node *node;
4313 struct btrfs_root *search_root;
4314 struct btrfs_path *path;
4315 struct ulist_iterator iter;
4316 struct btrfs_key root_key, key;
4319 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4320 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4323 path = btrfs_alloc_path();
4327 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4330 btrfs_free_path(path);
4334 ULIST_ITER_INIT(&iter);
4335 while ((node = ulist_next(roots, &iter))) {
4336 root_key.objectid = node->val;
4337 root_key.type = BTRFS_ROOT_ITEM_KEY;
4338 root_key.offset = (u64)-1;
4340 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4347 trans = btrfs_start_transaction(search_root, 0);
4348 if (IS_ERR(trans)) {
4349 ret = PTR_ERR(trans);
4353 path->lowest_level = btrfs_header_level(buf);
4354 path->skip_check_block = 1;
4355 if (path->lowest_level)
4356 btrfs_node_key_to_cpu(buf, &key, 0);
4358 btrfs_item_key_to_cpu(buf, &key, 0);
4359 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4362 btrfs_commit_transaction(trans, search_root);
4365 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4366 ret = fix_key_order(trans, search_root, path);
4367 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4368 ret = fix_item_offset(trans, search_root, path);
4370 btrfs_commit_transaction(trans, search_root);
4373 btrfs_release_path(path);
4374 btrfs_commit_transaction(trans, search_root);
4377 btrfs_free_path(path);
4381 static int check_block(struct btrfs_root *root,
4382 struct cache_tree *extent_cache,
4383 struct extent_buffer *buf, u64 flags)
4385 struct extent_record *rec;
4386 struct cache_extent *cache;
4387 struct btrfs_key key;
4388 enum btrfs_tree_block_status status;
4392 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4395 rec = container_of(cache, struct extent_record, cache);
4396 rec->generation = btrfs_header_generation(buf);
4398 level = btrfs_header_level(buf);
4399 if (btrfs_header_nritems(buf) > 0) {
4402 btrfs_item_key_to_cpu(buf, &key, 0);
4404 btrfs_node_key_to_cpu(buf, &key, 0);
4406 rec->info_objectid = key.objectid;
4408 rec->info_level = level;
4410 if (btrfs_is_leaf(buf))
4411 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4413 status = btrfs_check_node(root, &rec->parent_key, buf);
4415 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4417 status = try_to_fix_bad_block(root, buf, status);
4418 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4420 fprintf(stderr, "bad block %llu\n",
4421 (unsigned long long)buf->start);
4424 * Signal to callers we need to start the scan over
4425 * again since we'll have cowed blocks.
4430 rec->content_checked = 1;
4431 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4432 rec->owner_ref_checked = 1;
4434 ret = check_owner_ref(root, rec, buf);
4436 rec->owner_ref_checked = 1;
4440 maybe_free_extent_rec(extent_cache, rec);
4444 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4445 u64 parent, u64 root)
4447 struct list_head *cur = rec->backrefs.next;
4448 struct extent_backref *node;
4449 struct tree_backref *back;
4451 while(cur != &rec->backrefs) {
4452 node = to_extent_backref(cur);
4456 back = to_tree_backref(node);
4458 if (!node->full_backref)
4460 if (parent == back->parent)
4463 if (node->full_backref)
4465 if (back->root == root)
4472 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4473 u64 parent, u64 root)
4475 struct tree_backref *ref = malloc(sizeof(*ref));
4479 memset(&ref->node, 0, sizeof(ref->node));
4481 ref->parent = parent;
4482 ref->node.full_backref = 1;
4485 ref->node.full_backref = 0;
4487 list_add_tail(&ref->node.list, &rec->backrefs);
4492 static struct data_backref *find_data_backref(struct extent_record *rec,
4493 u64 parent, u64 root,
4494 u64 owner, u64 offset,
4496 u64 disk_bytenr, u64 bytes)
4498 struct list_head *cur = rec->backrefs.next;
4499 struct extent_backref *node;
4500 struct data_backref *back;
4502 while(cur != &rec->backrefs) {
4503 node = to_extent_backref(cur);
4507 back = to_data_backref(node);
4509 if (!node->full_backref)
4511 if (parent == back->parent)
4514 if (node->full_backref)
4516 if (back->root == root && back->owner == owner &&
4517 back->offset == offset) {
4518 if (found_ref && node->found_ref &&
4519 (back->bytes != bytes ||
4520 back->disk_bytenr != disk_bytenr))
4529 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4530 u64 parent, u64 root,
4531 u64 owner, u64 offset,
4534 struct data_backref *ref = malloc(sizeof(*ref));
4538 memset(&ref->node, 0, sizeof(ref->node));
4539 ref->node.is_data = 1;
4542 ref->parent = parent;
4545 ref->node.full_backref = 1;
4549 ref->offset = offset;
4550 ref->node.full_backref = 0;
4552 ref->bytes = max_size;
4555 list_add_tail(&ref->node.list, &rec->backrefs);
4556 if (max_size > rec->max_size)
4557 rec->max_size = max_size;
4561 /* Check if the type of extent matches with its chunk */
4562 static void check_extent_type(struct extent_record *rec)
4564 struct btrfs_block_group_cache *bg_cache;
4566 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4570 /* data extent, check chunk directly*/
4571 if (!rec->metadata) {
4572 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4573 rec->wrong_chunk_type = 1;
4577 /* metadata extent, check the obvious case first */
4578 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4579 BTRFS_BLOCK_GROUP_METADATA))) {
4580 rec->wrong_chunk_type = 1;
4585 * Check SYSTEM extent, as it's also marked as metadata, we can only
4586 * make sure it's a SYSTEM extent by its backref
4588 if (!list_empty(&rec->backrefs)) {
4589 struct extent_backref *node;
4590 struct tree_backref *tback;
4593 node = to_extent_backref(rec->backrefs.next);
4594 if (node->is_data) {
4595 /* tree block shouldn't have data backref */
4596 rec->wrong_chunk_type = 1;
4599 tback = container_of(node, struct tree_backref, node);
4601 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4602 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4604 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4605 if (!(bg_cache->flags & bg_type))
4606 rec->wrong_chunk_type = 1;
4611 * Allocate a new extent record, fill default values from @tmpl and insert int
4612 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4613 * the cache, otherwise it fails.
4615 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4616 struct extent_record *tmpl)
4618 struct extent_record *rec;
4621 rec = malloc(sizeof(*rec));
4624 rec->start = tmpl->start;
4625 rec->max_size = tmpl->max_size;
4626 rec->nr = max(tmpl->nr, tmpl->max_size);
4627 rec->found_rec = tmpl->found_rec;
4628 rec->content_checked = tmpl->content_checked;
4629 rec->owner_ref_checked = tmpl->owner_ref_checked;
4630 rec->num_duplicates = 0;
4631 rec->metadata = tmpl->metadata;
4632 rec->flag_block_full_backref = FLAG_UNSET;
4633 rec->bad_full_backref = 0;
4634 rec->crossing_stripes = 0;
4635 rec->wrong_chunk_type = 0;
4636 rec->is_root = tmpl->is_root;
4637 rec->refs = tmpl->refs;
4638 rec->extent_item_refs = tmpl->extent_item_refs;
4639 rec->parent_generation = tmpl->parent_generation;
4640 INIT_LIST_HEAD(&rec->backrefs);
4641 INIT_LIST_HEAD(&rec->dups);
4642 INIT_LIST_HEAD(&rec->list);
4643 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4644 rec->cache.start = tmpl->start;
4645 rec->cache.size = tmpl->nr;
4646 ret = insert_cache_extent(extent_cache, &rec->cache);
4651 bytes_used += rec->nr;
4654 rec->crossing_stripes = check_crossing_stripes(global_info,
4655 rec->start, global_info->tree_root->nodesize);
4656 check_extent_type(rec);
4661 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4663 * - refs - if found, increase refs
4664 * - is_root - if found, set
4665 * - content_checked - if found, set
4666 * - owner_ref_checked - if found, set
4668 * If not found, create a new one, initialize and insert.
4670 static int add_extent_rec(struct cache_tree *extent_cache,
4671 struct extent_record *tmpl)
4673 struct extent_record *rec;
4674 struct cache_extent *cache;
4678 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4680 rec = container_of(cache, struct extent_record, cache);
4684 rec->nr = max(tmpl->nr, tmpl->max_size);
4687 * We need to make sure to reset nr to whatever the extent
4688 * record says was the real size, this way we can compare it to
4691 if (tmpl->found_rec) {
4692 if (tmpl->start != rec->start || rec->found_rec) {
4693 struct extent_record *tmp;
4696 if (list_empty(&rec->list))
4697 list_add_tail(&rec->list,
4698 &duplicate_extents);
4701 * We have to do this song and dance in case we
4702 * find an extent record that falls inside of
4703 * our current extent record but does not have
4704 * the same objectid.
4706 tmp = malloc(sizeof(*tmp));
4709 tmp->start = tmpl->start;
4710 tmp->max_size = tmpl->max_size;
4713 tmp->metadata = tmpl->metadata;
4714 tmp->extent_item_refs = tmpl->extent_item_refs;
4715 INIT_LIST_HEAD(&tmp->list);
4716 list_add_tail(&tmp->list, &rec->dups);
4717 rec->num_duplicates++;
4724 if (tmpl->extent_item_refs && !dup) {
4725 if (rec->extent_item_refs) {
4726 fprintf(stderr, "block %llu rec "
4727 "extent_item_refs %llu, passed %llu\n",
4728 (unsigned long long)tmpl->start,
4729 (unsigned long long)
4730 rec->extent_item_refs,
4731 (unsigned long long)tmpl->extent_item_refs);
4733 rec->extent_item_refs = tmpl->extent_item_refs;
4737 if (tmpl->content_checked)
4738 rec->content_checked = 1;
4739 if (tmpl->owner_ref_checked)
4740 rec->owner_ref_checked = 1;
4741 memcpy(&rec->parent_key, &tmpl->parent_key,
4742 sizeof(tmpl->parent_key));
4743 if (tmpl->parent_generation)
4744 rec->parent_generation = tmpl->parent_generation;
4745 if (rec->max_size < tmpl->max_size)
4746 rec->max_size = tmpl->max_size;
4749 * A metadata extent can't cross stripe_len boundary, otherwise
4750 * kernel scrub won't be able to handle it.
4751 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4755 rec->crossing_stripes = check_crossing_stripes(
4756 global_info, rec->start,
4757 global_info->tree_root->nodesize);
4758 check_extent_type(rec);
4759 maybe_free_extent_rec(extent_cache, rec);
4763 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4768 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4769 u64 parent, u64 root, int found_ref)
4771 struct extent_record *rec;
4772 struct tree_backref *back;
4773 struct cache_extent *cache;
4776 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4778 struct extent_record tmpl;
4780 memset(&tmpl, 0, sizeof(tmpl));
4781 tmpl.start = bytenr;
4785 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4789 /* really a bug in cache_extent implement now */
4790 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4795 rec = container_of(cache, struct extent_record, cache);
4796 if (rec->start != bytenr) {
4798 * Several cause, from unaligned bytenr to over lapping extents
4803 back = find_tree_backref(rec, parent, root);
4805 back = alloc_tree_backref(rec, parent, root);
4811 if (back->node.found_ref) {
4812 fprintf(stderr, "Extent back ref already exists "
4813 "for %llu parent %llu root %llu \n",
4814 (unsigned long long)bytenr,
4815 (unsigned long long)parent,
4816 (unsigned long long)root);
4818 back->node.found_ref = 1;
4820 if (back->node.found_extent_tree) {
4821 fprintf(stderr, "Extent back ref already exists "
4822 "for %llu parent %llu root %llu \n",
4823 (unsigned long long)bytenr,
4824 (unsigned long long)parent,
4825 (unsigned long long)root);
4827 back->node.found_extent_tree = 1;
4829 check_extent_type(rec);
4830 maybe_free_extent_rec(extent_cache, rec);
4834 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4835 u64 parent, u64 root, u64 owner, u64 offset,
4836 u32 num_refs, int found_ref, u64 max_size)
4838 struct extent_record *rec;
4839 struct data_backref *back;
4840 struct cache_extent *cache;
4843 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4845 struct extent_record tmpl;
4847 memset(&tmpl, 0, sizeof(tmpl));
4848 tmpl.start = bytenr;
4850 tmpl.max_size = max_size;
4852 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4856 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4861 rec = container_of(cache, struct extent_record, cache);
4862 if (rec->max_size < max_size)
4863 rec->max_size = max_size;
4866 * If found_ref is set then max_size is the real size and must match the
4867 * existing refs. So if we have already found a ref then we need to
4868 * make sure that this ref matches the existing one, otherwise we need
4869 * to add a new backref so we can notice that the backrefs don't match
4870 * and we need to figure out who is telling the truth. This is to
4871 * account for that awful fsync bug I introduced where we'd end up with
4872 * a btrfs_file_extent_item that would have its length include multiple
4873 * prealloc extents or point inside of a prealloc extent.
4875 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4878 back = alloc_data_backref(rec, parent, root, owner, offset,
4884 BUG_ON(num_refs != 1);
4885 if (back->node.found_ref)
4886 BUG_ON(back->bytes != max_size);
4887 back->node.found_ref = 1;
4888 back->found_ref += 1;
4889 back->bytes = max_size;
4890 back->disk_bytenr = bytenr;
4892 rec->content_checked = 1;
4893 rec->owner_ref_checked = 1;
4895 if (back->node.found_extent_tree) {
4896 fprintf(stderr, "Extent back ref already exists "
4897 "for %llu parent %llu root %llu "
4898 "owner %llu offset %llu num_refs %lu\n",
4899 (unsigned long long)bytenr,
4900 (unsigned long long)parent,
4901 (unsigned long long)root,
4902 (unsigned long long)owner,
4903 (unsigned long long)offset,
4904 (unsigned long)num_refs);
4906 back->num_refs = num_refs;
4907 back->node.found_extent_tree = 1;
4909 maybe_free_extent_rec(extent_cache, rec);
4913 static int add_pending(struct cache_tree *pending,
4914 struct cache_tree *seen, u64 bytenr, u32 size)
4917 ret = add_cache_extent(seen, bytenr, size);
4920 add_cache_extent(pending, bytenr, size);
4924 static int pick_next_pending(struct cache_tree *pending,
4925 struct cache_tree *reada,
4926 struct cache_tree *nodes,
4927 u64 last, struct block_info *bits, int bits_nr,
4930 unsigned long node_start = last;
4931 struct cache_extent *cache;
4934 cache = search_cache_extent(reada, 0);
4936 bits[0].start = cache->start;
4937 bits[0].size = cache->size;
4942 if (node_start > 32768)
4943 node_start -= 32768;
4945 cache = search_cache_extent(nodes, node_start);
4947 cache = search_cache_extent(nodes, 0);
4950 cache = search_cache_extent(pending, 0);
4955 bits[ret].start = cache->start;
4956 bits[ret].size = cache->size;
4957 cache = next_cache_extent(cache);
4959 } while (cache && ret < bits_nr);
4965 bits[ret].start = cache->start;
4966 bits[ret].size = cache->size;
4967 cache = next_cache_extent(cache);
4969 } while (cache && ret < bits_nr);
4971 if (bits_nr - ret > 8) {
4972 u64 lookup = bits[0].start + bits[0].size;
4973 struct cache_extent *next;
4974 next = search_cache_extent(pending, lookup);
4976 if (next->start - lookup > 32768)
4978 bits[ret].start = next->start;
4979 bits[ret].size = next->size;
4980 lookup = next->start + next->size;
4984 next = next_cache_extent(next);
4992 static void free_chunk_record(struct cache_extent *cache)
4994 struct chunk_record *rec;
4996 rec = container_of(cache, struct chunk_record, cache);
4997 list_del_init(&rec->list);
4998 list_del_init(&rec->dextents);
5002 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5004 cache_tree_free_extents(chunk_cache, free_chunk_record);
5007 static void free_device_record(struct rb_node *node)
5009 struct device_record *rec;
5011 rec = container_of(node, struct device_record, node);
5015 FREE_RB_BASED_TREE(device_cache, free_device_record);
5017 int insert_block_group_record(struct block_group_tree *tree,
5018 struct block_group_record *bg_rec)
5022 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5026 list_add_tail(&bg_rec->list, &tree->block_groups);
5030 static void free_block_group_record(struct cache_extent *cache)
5032 struct block_group_record *rec;
5034 rec = container_of(cache, struct block_group_record, cache);
5035 list_del_init(&rec->list);
5039 void free_block_group_tree(struct block_group_tree *tree)
5041 cache_tree_free_extents(&tree->tree, free_block_group_record);
5044 int insert_device_extent_record(struct device_extent_tree *tree,
5045 struct device_extent_record *de_rec)
5050 * Device extent is a bit different from the other extents, because
5051 * the extents which belong to the different devices may have the
5052 * same start and size, so we need use the special extent cache
5053 * search/insert functions.
5055 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5059 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5060 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5064 static void free_device_extent_record(struct cache_extent *cache)
5066 struct device_extent_record *rec;
5068 rec = container_of(cache, struct device_extent_record, cache);
5069 if (!list_empty(&rec->chunk_list))
5070 list_del_init(&rec->chunk_list);
5071 if (!list_empty(&rec->device_list))
5072 list_del_init(&rec->device_list);
5076 void free_device_extent_tree(struct device_extent_tree *tree)
5078 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5081 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5082 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5083 struct extent_buffer *leaf, int slot)
5085 struct btrfs_extent_ref_v0 *ref0;
5086 struct btrfs_key key;
5089 btrfs_item_key_to_cpu(leaf, &key, slot);
5090 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5091 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5092 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5095 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5096 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5102 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5103 struct btrfs_key *key,
5106 struct btrfs_chunk *ptr;
5107 struct chunk_record *rec;
5110 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5111 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5113 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5115 fprintf(stderr, "memory allocation failed\n");
5119 INIT_LIST_HEAD(&rec->list);
5120 INIT_LIST_HEAD(&rec->dextents);
5123 rec->cache.start = key->offset;
5124 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5126 rec->generation = btrfs_header_generation(leaf);
5128 rec->objectid = key->objectid;
5129 rec->type = key->type;
5130 rec->offset = key->offset;
5132 rec->length = rec->cache.size;
5133 rec->owner = btrfs_chunk_owner(leaf, ptr);
5134 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5135 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5136 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5137 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5138 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5139 rec->num_stripes = num_stripes;
5140 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5142 for (i = 0; i < rec->num_stripes; ++i) {
5143 rec->stripes[i].devid =
5144 btrfs_stripe_devid_nr(leaf, ptr, i);
5145 rec->stripes[i].offset =
5146 btrfs_stripe_offset_nr(leaf, ptr, i);
5147 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5148 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5155 static int process_chunk_item(struct cache_tree *chunk_cache,
5156 struct btrfs_key *key, struct extent_buffer *eb,
5159 struct chunk_record *rec;
5160 struct btrfs_chunk *chunk;
5163 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5165 * Do extra check for this chunk item,
5167 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5168 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5169 * and owner<->key_type check.
5171 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5174 error("chunk(%llu, %llu) is not valid, ignore it",
5175 key->offset, btrfs_chunk_length(eb, chunk));
5178 rec = btrfs_new_chunk_record(eb, key, slot);
5179 ret = insert_cache_extent(chunk_cache, &rec->cache);
5181 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5182 rec->offset, rec->length);
5189 static int process_device_item(struct rb_root *dev_cache,
5190 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5192 struct btrfs_dev_item *ptr;
5193 struct device_record *rec;
5196 ptr = btrfs_item_ptr(eb,
5197 slot, struct btrfs_dev_item);
5199 rec = malloc(sizeof(*rec));
5201 fprintf(stderr, "memory allocation failed\n");
5205 rec->devid = key->offset;
5206 rec->generation = btrfs_header_generation(eb);
5208 rec->objectid = key->objectid;
5209 rec->type = key->type;
5210 rec->offset = key->offset;
5212 rec->devid = btrfs_device_id(eb, ptr);
5213 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5214 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5216 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5218 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5225 struct block_group_record *
5226 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5229 struct btrfs_block_group_item *ptr;
5230 struct block_group_record *rec;
5232 rec = calloc(1, sizeof(*rec));
5234 fprintf(stderr, "memory allocation failed\n");
5238 rec->cache.start = key->objectid;
5239 rec->cache.size = key->offset;
5241 rec->generation = btrfs_header_generation(leaf);
5243 rec->objectid = key->objectid;
5244 rec->type = key->type;
5245 rec->offset = key->offset;
5247 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5248 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5250 INIT_LIST_HEAD(&rec->list);
5255 static int process_block_group_item(struct block_group_tree *block_group_cache,
5256 struct btrfs_key *key,
5257 struct extent_buffer *eb, int slot)
5259 struct block_group_record *rec;
5262 rec = btrfs_new_block_group_record(eb, key, slot);
5263 ret = insert_block_group_record(block_group_cache, rec);
5265 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5266 rec->objectid, rec->offset);
5273 struct device_extent_record *
5274 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5275 struct btrfs_key *key, int slot)
5277 struct device_extent_record *rec;
5278 struct btrfs_dev_extent *ptr;
5280 rec = calloc(1, sizeof(*rec));
5282 fprintf(stderr, "memory allocation failed\n");
5286 rec->cache.objectid = key->objectid;
5287 rec->cache.start = key->offset;
5289 rec->generation = btrfs_header_generation(leaf);
5291 rec->objectid = key->objectid;
5292 rec->type = key->type;
5293 rec->offset = key->offset;
5295 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5296 rec->chunk_objecteid =
5297 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5299 btrfs_dev_extent_chunk_offset(leaf, ptr);
5300 rec->length = btrfs_dev_extent_length(leaf, ptr);
5301 rec->cache.size = rec->length;
5303 INIT_LIST_HEAD(&rec->chunk_list);
5304 INIT_LIST_HEAD(&rec->device_list);
5310 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5311 struct btrfs_key *key, struct extent_buffer *eb,
5314 struct device_extent_record *rec;
5317 rec = btrfs_new_device_extent_record(eb, key, slot);
5318 ret = insert_device_extent_record(dev_extent_cache, rec);
5321 "Device extent[%llu, %llu, %llu] existed.\n",
5322 rec->objectid, rec->offset, rec->length);
5329 static int process_extent_item(struct btrfs_root *root,
5330 struct cache_tree *extent_cache,
5331 struct extent_buffer *eb, int slot)
5333 struct btrfs_extent_item *ei;
5334 struct btrfs_extent_inline_ref *iref;
5335 struct btrfs_extent_data_ref *dref;
5336 struct btrfs_shared_data_ref *sref;
5337 struct btrfs_key key;
5338 struct extent_record tmpl;
5343 u32 item_size = btrfs_item_size_nr(eb, slot);
5349 btrfs_item_key_to_cpu(eb, &key, slot);
5351 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5353 num_bytes = root->nodesize;
5355 num_bytes = key.offset;
5358 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5359 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5360 key.objectid, root->sectorsize);
5363 if (item_size < sizeof(*ei)) {
5364 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5365 struct btrfs_extent_item_v0 *ei0;
5366 BUG_ON(item_size != sizeof(*ei0));
5367 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5368 refs = btrfs_extent_refs_v0(eb, ei0);
5372 memset(&tmpl, 0, sizeof(tmpl));
5373 tmpl.start = key.objectid;
5374 tmpl.nr = num_bytes;
5375 tmpl.extent_item_refs = refs;
5376 tmpl.metadata = metadata;
5378 tmpl.max_size = num_bytes;
5380 return add_extent_rec(extent_cache, &tmpl);
5383 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5384 refs = btrfs_extent_refs(eb, ei);
5385 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5389 if (metadata && num_bytes != root->nodesize) {
5390 error("ignore invalid metadata extent, length %llu does not equal to %u",
5391 num_bytes, root->nodesize);
5394 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5395 error("ignore invalid data extent, length %llu is not aligned to %u",
5396 num_bytes, root->sectorsize);
5400 memset(&tmpl, 0, sizeof(tmpl));
5401 tmpl.start = key.objectid;
5402 tmpl.nr = num_bytes;
5403 tmpl.extent_item_refs = refs;
5404 tmpl.metadata = metadata;
5406 tmpl.max_size = num_bytes;
5407 add_extent_rec(extent_cache, &tmpl);
5409 ptr = (unsigned long)(ei + 1);
5410 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5411 key.type == BTRFS_EXTENT_ITEM_KEY)
5412 ptr += sizeof(struct btrfs_tree_block_info);
5414 end = (unsigned long)ei + item_size;
5416 iref = (struct btrfs_extent_inline_ref *)ptr;
5417 type = btrfs_extent_inline_ref_type(eb, iref);
5418 offset = btrfs_extent_inline_ref_offset(eb, iref);
5420 case BTRFS_TREE_BLOCK_REF_KEY:
5421 ret = add_tree_backref(extent_cache, key.objectid,
5424 error("add_tree_backref failed: %s",
5427 case BTRFS_SHARED_BLOCK_REF_KEY:
5428 ret = add_tree_backref(extent_cache, key.objectid,
5431 error("add_tree_backref failed: %s",
5434 case BTRFS_EXTENT_DATA_REF_KEY:
5435 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5436 add_data_backref(extent_cache, key.objectid, 0,
5437 btrfs_extent_data_ref_root(eb, dref),
5438 btrfs_extent_data_ref_objectid(eb,
5440 btrfs_extent_data_ref_offset(eb, dref),
5441 btrfs_extent_data_ref_count(eb, dref),
5444 case BTRFS_SHARED_DATA_REF_KEY:
5445 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5446 add_data_backref(extent_cache, key.objectid, offset,
5448 btrfs_shared_data_ref_count(eb, sref),
5452 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5453 key.objectid, key.type, num_bytes);
5456 ptr += btrfs_extent_inline_ref_size(type);
5463 static int check_cache_range(struct btrfs_root *root,
5464 struct btrfs_block_group_cache *cache,
5465 u64 offset, u64 bytes)
5467 struct btrfs_free_space *entry;
5473 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5474 bytenr = btrfs_sb_offset(i);
5475 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5476 cache->key.objectid, bytenr, 0,
5477 &logical, &nr, &stripe_len);
5482 if (logical[nr] + stripe_len <= offset)
5484 if (offset + bytes <= logical[nr])
5486 if (logical[nr] == offset) {
5487 if (stripe_len >= bytes) {
5491 bytes -= stripe_len;
5492 offset += stripe_len;
5493 } else if (logical[nr] < offset) {
5494 if (logical[nr] + stripe_len >=
5499 bytes = (offset + bytes) -
5500 (logical[nr] + stripe_len);
5501 offset = logical[nr] + stripe_len;
5504 * Could be tricky, the super may land in the
5505 * middle of the area we're checking. First
5506 * check the easiest case, it's at the end.
5508 if (logical[nr] + stripe_len >=
5510 bytes = logical[nr] - offset;
5514 /* Check the left side */
5515 ret = check_cache_range(root, cache,
5517 logical[nr] - offset);
5523 /* Now we continue with the right side */
5524 bytes = (offset + bytes) -
5525 (logical[nr] + stripe_len);
5526 offset = logical[nr] + stripe_len;
5533 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5535 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5536 offset, offset+bytes);
5540 if (entry->offset != offset) {
5541 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5546 if (entry->bytes != bytes) {
5547 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5548 bytes, entry->bytes, offset);
5552 unlink_free_space(cache->free_space_ctl, entry);
5557 static int verify_space_cache(struct btrfs_root *root,
5558 struct btrfs_block_group_cache *cache)
5560 struct btrfs_path *path;
5561 struct extent_buffer *leaf;
5562 struct btrfs_key key;
5566 path = btrfs_alloc_path();
5570 root = root->fs_info->extent_root;
5572 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5574 key.objectid = last;
5576 key.type = BTRFS_EXTENT_ITEM_KEY;
5578 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5583 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5584 ret = btrfs_next_leaf(root, path);
5592 leaf = path->nodes[0];
5593 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5594 if (key.objectid >= cache->key.offset + cache->key.objectid)
5596 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5597 key.type != BTRFS_METADATA_ITEM_KEY) {
5602 if (last == key.objectid) {
5603 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5604 last = key.objectid + key.offset;
5606 last = key.objectid + root->nodesize;
5611 ret = check_cache_range(root, cache, last,
5612 key.objectid - last);
5615 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5616 last = key.objectid + key.offset;
5618 last = key.objectid + root->nodesize;
5622 if (last < cache->key.objectid + cache->key.offset)
5623 ret = check_cache_range(root, cache, last,
5624 cache->key.objectid +
5625 cache->key.offset - last);
5628 btrfs_free_path(path);
5631 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5632 fprintf(stderr, "There are still entries left in the space "
5640 static int check_space_cache(struct btrfs_root *root)
5642 struct btrfs_block_group_cache *cache;
5643 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5647 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5648 btrfs_super_generation(root->fs_info->super_copy) !=
5649 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5650 printf("cache and super generation don't match, space cache "
5651 "will be invalidated\n");
5655 if (ctx.progress_enabled) {
5656 ctx.tp = TASK_FREE_SPACE;
5657 task_start(ctx.info);
5661 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5665 start = cache->key.objectid + cache->key.offset;
5666 if (!cache->free_space_ctl) {
5667 if (btrfs_init_free_space_ctl(cache,
5668 root->sectorsize)) {
5673 btrfs_remove_free_space_cache(cache);
5676 if (btrfs_fs_compat_ro(root->fs_info,
5677 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5678 ret = exclude_super_stripes(root, cache);
5680 fprintf(stderr, "could not exclude super stripes: %s\n",
5685 ret = load_free_space_tree(root->fs_info, cache);
5686 free_excluded_extents(root, cache);
5688 fprintf(stderr, "could not load free space tree: %s\n",
5695 ret = load_free_space_cache(root->fs_info, cache);
5700 ret = verify_space_cache(root, cache);
5702 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5703 cache->key.objectid);
5708 task_stop(ctx.info);
5710 return error ? -EINVAL : 0;
5713 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5714 u64 num_bytes, unsigned long leaf_offset,
5715 struct extent_buffer *eb) {
5718 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5720 unsigned long csum_offset;
5724 u64 data_checked = 0;
5730 if (num_bytes % root->sectorsize)
5733 data = malloc(num_bytes);
5737 while (offset < num_bytes) {
5740 read_len = num_bytes - offset;
5741 /* read as much space once a time */
5742 ret = read_extent_data(root, data + offset,
5743 bytenr + offset, &read_len, mirror);
5747 /* verify every 4k data's checksum */
5748 while (data_checked < read_len) {
5750 tmp = offset + data_checked;
5752 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5753 csum, root->sectorsize);
5754 btrfs_csum_final(csum, (u8 *)&csum);
5756 csum_offset = leaf_offset +
5757 tmp / root->sectorsize * csum_size;
5758 read_extent_buffer(eb, (char *)&csum_expected,
5759 csum_offset, csum_size);
5760 /* try another mirror */
5761 if (csum != csum_expected) {
5762 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5763 mirror, bytenr + tmp,
5764 csum, csum_expected);
5765 num_copies = btrfs_num_copies(
5766 &root->fs_info->mapping_tree,
5768 if (mirror < num_copies - 1) {
5773 data_checked += root->sectorsize;
5782 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5785 struct btrfs_path *path;
5786 struct extent_buffer *leaf;
5787 struct btrfs_key key;
5790 path = btrfs_alloc_path();
5792 fprintf(stderr, "Error allocating path\n");
5796 key.objectid = bytenr;
5797 key.type = BTRFS_EXTENT_ITEM_KEY;
5798 key.offset = (u64)-1;
5801 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5804 fprintf(stderr, "Error looking up extent record %d\n", ret);
5805 btrfs_free_path(path);
5808 if (path->slots[0] > 0) {
5811 ret = btrfs_prev_leaf(root, path);
5814 } else if (ret > 0) {
5821 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5824 * Block group items come before extent items if they have the same
5825 * bytenr, so walk back one more just in case. Dear future traveller,
5826 * first congrats on mastering time travel. Now if it's not too much
5827 * trouble could you go back to 2006 and tell Chris to make the
5828 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5829 * EXTENT_ITEM_KEY please?
5831 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5832 if (path->slots[0] > 0) {
5835 ret = btrfs_prev_leaf(root, path);
5838 } else if (ret > 0) {
5843 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5847 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5848 ret = btrfs_next_leaf(root, path);
5850 fprintf(stderr, "Error going to next leaf "
5852 btrfs_free_path(path);
5858 leaf = path->nodes[0];
5859 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5860 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5864 if (key.objectid + key.offset < bytenr) {
5868 if (key.objectid > bytenr + num_bytes)
5871 if (key.objectid == bytenr) {
5872 if (key.offset >= num_bytes) {
5876 num_bytes -= key.offset;
5877 bytenr += key.offset;
5878 } else if (key.objectid < bytenr) {
5879 if (key.objectid + key.offset >= bytenr + num_bytes) {
5883 num_bytes = (bytenr + num_bytes) -
5884 (key.objectid + key.offset);
5885 bytenr = key.objectid + key.offset;
5887 if (key.objectid + key.offset < bytenr + num_bytes) {
5888 u64 new_start = key.objectid + key.offset;
5889 u64 new_bytes = bytenr + num_bytes - new_start;
5892 * Weird case, the extent is in the middle of
5893 * our range, we'll have to search one side
5894 * and then the other. Not sure if this happens
5895 * in real life, but no harm in coding it up
5896 * anyway just in case.
5898 btrfs_release_path(path);
5899 ret = check_extent_exists(root, new_start,
5902 fprintf(stderr, "Right section didn't "
5906 num_bytes = key.objectid - bytenr;
5909 num_bytes = key.objectid - bytenr;
5916 if (num_bytes && !ret) {
5917 fprintf(stderr, "There are no extents for csum range "
5918 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5922 btrfs_free_path(path);
5926 static int check_csums(struct btrfs_root *root)
5928 struct btrfs_path *path;
5929 struct extent_buffer *leaf;
5930 struct btrfs_key key;
5931 u64 offset = 0, num_bytes = 0;
5932 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5936 unsigned long leaf_offset;
5938 root = root->fs_info->csum_root;
5939 if (!extent_buffer_uptodate(root->node)) {
5940 fprintf(stderr, "No valid csum tree found\n");
5944 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5945 key.type = BTRFS_EXTENT_CSUM_KEY;
5948 path = btrfs_alloc_path();
5952 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5954 fprintf(stderr, "Error searching csum tree %d\n", ret);
5955 btrfs_free_path(path);
5959 if (ret > 0 && path->slots[0])
5964 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5965 ret = btrfs_next_leaf(root, path);
5967 fprintf(stderr, "Error going to next leaf "
5974 leaf = path->nodes[0];
5976 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5977 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5982 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5983 csum_size) * root->sectorsize;
5984 if (!check_data_csum)
5985 goto skip_csum_check;
5986 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5987 ret = check_extent_csums(root, key.offset, data_len,
5993 offset = key.offset;
5994 } else if (key.offset != offset + num_bytes) {
5995 ret = check_extent_exists(root, offset, num_bytes);
5997 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5998 "there is no extent record\n",
5999 offset, offset+num_bytes);
6002 offset = key.offset;
6005 num_bytes += data_len;
6009 btrfs_free_path(path);
6013 static int is_dropped_key(struct btrfs_key *key,
6014 struct btrfs_key *drop_key) {
6015 if (key->objectid < drop_key->objectid)
6017 else if (key->objectid == drop_key->objectid) {
6018 if (key->type < drop_key->type)
6020 else if (key->type == drop_key->type) {
6021 if (key->offset < drop_key->offset)
6029 * Here are the rules for FULL_BACKREF.
6031 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6032 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6034 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6035 * if it happened after the relocation occurred since we'll have dropped the
6036 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6037 * have no real way to know for sure.
6039 * We process the blocks one root at a time, and we start from the lowest root
6040 * objectid and go to the highest. So we can just lookup the owner backref for
6041 * the record and if we don't find it then we know it doesn't exist and we have
6044 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6045 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6046 * be set or not and then we can check later once we've gathered all the refs.
6048 static int calc_extent_flag(struct btrfs_root *root,
6049 struct cache_tree *extent_cache,
6050 struct extent_buffer *buf,
6051 struct root_item_record *ri,
6054 struct extent_record *rec;
6055 struct cache_extent *cache;
6056 struct tree_backref *tback;
6059 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6060 /* we have added this extent before */
6064 rec = container_of(cache, struct extent_record, cache);
6067 * Except file/reloc tree, we can not have
6070 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6075 if (buf->start == ri->bytenr)
6078 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6081 owner = btrfs_header_owner(buf);
6082 if (owner == ri->objectid)
6085 tback = find_tree_backref(rec, 0, owner);
6090 if (rec->flag_block_full_backref != FLAG_UNSET &&
6091 rec->flag_block_full_backref != 0)
6092 rec->bad_full_backref = 1;
6095 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6096 if (rec->flag_block_full_backref != FLAG_UNSET &&
6097 rec->flag_block_full_backref != 1)
6098 rec->bad_full_backref = 1;
6102 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6104 fprintf(stderr, "Invalid key type(");
6105 print_key_type(stderr, 0, key_type);
6106 fprintf(stderr, ") found in root(");
6107 print_objectid(stderr, rootid, 0);
6108 fprintf(stderr, ")\n");
6112 * Check if the key is valid with its extent buffer.
6114 * This is a early check in case invalid key exists in a extent buffer
6115 * This is not comprehensive yet, but should prevent wrong key/item passed
6118 static int check_type_with_root(u64 rootid, u8 key_type)
6121 /* Only valid in chunk tree */
6122 case BTRFS_DEV_ITEM_KEY:
6123 case BTRFS_CHUNK_ITEM_KEY:
6124 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6127 /* valid in csum and log tree */
6128 case BTRFS_CSUM_TREE_OBJECTID:
6129 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6133 case BTRFS_EXTENT_ITEM_KEY:
6134 case BTRFS_METADATA_ITEM_KEY:
6135 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6136 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6139 case BTRFS_ROOT_ITEM_KEY:
6140 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6143 case BTRFS_DEV_EXTENT_KEY:
6144 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6150 report_mismatch_key_root(key_type, rootid);
6154 static int run_next_block(struct btrfs_root *root,
6155 struct block_info *bits,
6158 struct cache_tree *pending,
6159 struct cache_tree *seen,
6160 struct cache_tree *reada,
6161 struct cache_tree *nodes,
6162 struct cache_tree *extent_cache,
6163 struct cache_tree *chunk_cache,
6164 struct rb_root *dev_cache,
6165 struct block_group_tree *block_group_cache,
6166 struct device_extent_tree *dev_extent_cache,
6167 struct root_item_record *ri)
6169 struct extent_buffer *buf;
6170 struct extent_record *rec = NULL;
6181 struct btrfs_key key;
6182 struct cache_extent *cache;
6185 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6186 bits_nr, &reada_bits);
6191 for(i = 0; i < nritems; i++) {
6192 ret = add_cache_extent(reada, bits[i].start,
6197 /* fixme, get the parent transid */
6198 readahead_tree_block(root, bits[i].start,
6202 *last = bits[0].start;
6203 bytenr = bits[0].start;
6204 size = bits[0].size;
6206 cache = lookup_cache_extent(pending, bytenr, size);
6208 remove_cache_extent(pending, cache);
6211 cache = lookup_cache_extent(reada, bytenr, size);
6213 remove_cache_extent(reada, cache);
6216 cache = lookup_cache_extent(nodes, bytenr, size);
6218 remove_cache_extent(nodes, cache);
6221 cache = lookup_cache_extent(extent_cache, bytenr, size);
6223 rec = container_of(cache, struct extent_record, cache);
6224 gen = rec->parent_generation;
6227 /* fixme, get the real parent transid */
6228 buf = read_tree_block(root, bytenr, size, gen);
6229 if (!extent_buffer_uptodate(buf)) {
6230 record_bad_block_io(root->fs_info,
6231 extent_cache, bytenr, size);
6235 nritems = btrfs_header_nritems(buf);
6238 if (!init_extent_tree) {
6239 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6240 btrfs_header_level(buf), 1, NULL,
6243 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6245 fprintf(stderr, "Couldn't calc extent flags\n");
6246 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6251 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6253 fprintf(stderr, "Couldn't calc extent flags\n");
6254 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6258 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6260 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6261 ri->objectid == btrfs_header_owner(buf)) {
6263 * Ok we got to this block from it's original owner and
6264 * we have FULL_BACKREF set. Relocation can leave
6265 * converted blocks over so this is altogether possible,
6266 * however it's not possible if the generation > the
6267 * last snapshot, so check for this case.
6269 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6270 btrfs_header_generation(buf) > ri->last_snapshot) {
6271 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6272 rec->bad_full_backref = 1;
6277 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6278 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6279 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6280 rec->bad_full_backref = 1;
6284 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6285 rec->flag_block_full_backref = 1;
6289 rec->flag_block_full_backref = 0;
6291 owner = btrfs_header_owner(buf);
6294 ret = check_block(root, extent_cache, buf, flags);
6298 if (btrfs_is_leaf(buf)) {
6299 btree_space_waste += btrfs_leaf_free_space(root, buf);
6300 for (i = 0; i < nritems; i++) {
6301 struct btrfs_file_extent_item *fi;
6302 btrfs_item_key_to_cpu(buf, &key, i);
6304 * Check key type against the leaf owner.
6305 * Could filter quite a lot of early error if
6308 if (check_type_with_root(btrfs_header_owner(buf),
6310 fprintf(stderr, "ignoring invalid key\n");
6313 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6314 process_extent_item(root, extent_cache, buf,
6318 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6319 process_extent_item(root, extent_cache, buf,
6323 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6325 btrfs_item_size_nr(buf, i);
6328 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6329 process_chunk_item(chunk_cache, &key, buf, i);
6332 if (key.type == BTRFS_DEV_ITEM_KEY) {
6333 process_device_item(dev_cache, &key, buf, i);
6336 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6337 process_block_group_item(block_group_cache,
6341 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6342 process_device_extent_item(dev_extent_cache,
6347 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6348 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6349 process_extent_ref_v0(extent_cache, buf, i);
6356 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6357 ret = add_tree_backref(extent_cache,
6358 key.objectid, 0, key.offset, 0);
6360 error("add_tree_backref failed: %s",
6364 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6365 ret = add_tree_backref(extent_cache,
6366 key.objectid, key.offset, 0, 0);
6368 error("add_tree_backref failed: %s",
6372 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6373 struct btrfs_extent_data_ref *ref;
6374 ref = btrfs_item_ptr(buf, i,
6375 struct btrfs_extent_data_ref);
6376 add_data_backref(extent_cache,
6378 btrfs_extent_data_ref_root(buf, ref),
6379 btrfs_extent_data_ref_objectid(buf,
6381 btrfs_extent_data_ref_offset(buf, ref),
6382 btrfs_extent_data_ref_count(buf, ref),
6383 0, root->sectorsize);
6386 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6387 struct btrfs_shared_data_ref *ref;
6388 ref = btrfs_item_ptr(buf, i,
6389 struct btrfs_shared_data_ref);
6390 add_data_backref(extent_cache,
6391 key.objectid, key.offset, 0, 0, 0,
6392 btrfs_shared_data_ref_count(buf, ref),
6393 0, root->sectorsize);
6396 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6397 struct bad_item *bad;
6399 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6403 bad = malloc(sizeof(struct bad_item));
6406 INIT_LIST_HEAD(&bad->list);
6407 memcpy(&bad->key, &key,
6408 sizeof(struct btrfs_key));
6409 bad->root_id = owner;
6410 list_add_tail(&bad->list, &delete_items);
6413 if (key.type != BTRFS_EXTENT_DATA_KEY)
6415 fi = btrfs_item_ptr(buf, i,
6416 struct btrfs_file_extent_item);
6417 if (btrfs_file_extent_type(buf, fi) ==
6418 BTRFS_FILE_EXTENT_INLINE)
6420 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6423 data_bytes_allocated +=
6424 btrfs_file_extent_disk_num_bytes(buf, fi);
6425 if (data_bytes_allocated < root->sectorsize) {
6428 data_bytes_referenced +=
6429 btrfs_file_extent_num_bytes(buf, fi);
6430 add_data_backref(extent_cache,
6431 btrfs_file_extent_disk_bytenr(buf, fi),
6432 parent, owner, key.objectid, key.offset -
6433 btrfs_file_extent_offset(buf, fi), 1, 1,
6434 btrfs_file_extent_disk_num_bytes(buf, fi));
6438 struct btrfs_key first_key;
6440 first_key.objectid = 0;
6443 btrfs_item_key_to_cpu(buf, &first_key, 0);
6444 level = btrfs_header_level(buf);
6445 for (i = 0; i < nritems; i++) {
6446 struct extent_record tmpl;
6448 ptr = btrfs_node_blockptr(buf, i);
6449 size = root->nodesize;
6450 btrfs_node_key_to_cpu(buf, &key, i);
6452 if ((level == ri->drop_level)
6453 && is_dropped_key(&key, &ri->drop_key)) {
6458 memset(&tmpl, 0, sizeof(tmpl));
6459 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6460 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6465 tmpl.max_size = size;
6466 ret = add_extent_rec(extent_cache, &tmpl);
6470 ret = add_tree_backref(extent_cache, ptr, parent,
6473 error("add_tree_backref failed: %s",
6479 add_pending(nodes, seen, ptr, size);
6481 add_pending(pending, seen, ptr, size);
6484 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6485 nritems) * sizeof(struct btrfs_key_ptr);
6487 total_btree_bytes += buf->len;
6488 if (fs_root_objectid(btrfs_header_owner(buf)))
6489 total_fs_tree_bytes += buf->len;
6490 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6491 total_extent_tree_bytes += buf->len;
6492 if (!found_old_backref &&
6493 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6494 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6495 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6496 found_old_backref = 1;
6498 free_extent_buffer(buf);
6502 static int add_root_to_pending(struct extent_buffer *buf,
6503 struct cache_tree *extent_cache,
6504 struct cache_tree *pending,
6505 struct cache_tree *seen,
6506 struct cache_tree *nodes,
6509 struct extent_record tmpl;
6512 if (btrfs_header_level(buf) > 0)
6513 add_pending(nodes, seen, buf->start, buf->len);
6515 add_pending(pending, seen, buf->start, buf->len);
6517 memset(&tmpl, 0, sizeof(tmpl));
6518 tmpl.start = buf->start;
6523 tmpl.max_size = buf->len;
6524 add_extent_rec(extent_cache, &tmpl);
6526 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6527 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6528 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6531 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6536 /* as we fix the tree, we might be deleting blocks that
6537 * we're tracking for repair. This hook makes sure we
6538 * remove any backrefs for blocks as we are fixing them.
6540 static int free_extent_hook(struct btrfs_trans_handle *trans,
6541 struct btrfs_root *root,
6542 u64 bytenr, u64 num_bytes, u64 parent,
6543 u64 root_objectid, u64 owner, u64 offset,
6546 struct extent_record *rec;
6547 struct cache_extent *cache;
6549 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6551 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6552 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6556 rec = container_of(cache, struct extent_record, cache);
6558 struct data_backref *back;
6559 back = find_data_backref(rec, parent, root_objectid, owner,
6560 offset, 1, bytenr, num_bytes);
6563 if (back->node.found_ref) {
6564 back->found_ref -= refs_to_drop;
6566 rec->refs -= refs_to_drop;
6568 if (back->node.found_extent_tree) {
6569 back->num_refs -= refs_to_drop;
6570 if (rec->extent_item_refs)
6571 rec->extent_item_refs -= refs_to_drop;
6573 if (back->found_ref == 0)
6574 back->node.found_ref = 0;
6575 if (back->num_refs == 0)
6576 back->node.found_extent_tree = 0;
6578 if (!back->node.found_extent_tree && back->node.found_ref) {
6579 list_del(&back->node.list);
6583 struct tree_backref *back;
6584 back = find_tree_backref(rec, parent, root_objectid);
6587 if (back->node.found_ref) {
6590 back->node.found_ref = 0;
6592 if (back->node.found_extent_tree) {
6593 if (rec->extent_item_refs)
6594 rec->extent_item_refs--;
6595 back->node.found_extent_tree = 0;
6597 if (!back->node.found_extent_tree && back->node.found_ref) {
6598 list_del(&back->node.list);
6602 maybe_free_extent_rec(extent_cache, rec);
6607 static int delete_extent_records(struct btrfs_trans_handle *trans,
6608 struct btrfs_root *root,
6609 struct btrfs_path *path,
6610 u64 bytenr, u64 new_len)
6612 struct btrfs_key key;
6613 struct btrfs_key found_key;
6614 struct extent_buffer *leaf;
6619 key.objectid = bytenr;
6621 key.offset = (u64)-1;
6624 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6631 if (path->slots[0] == 0)
6637 leaf = path->nodes[0];
6638 slot = path->slots[0];
6640 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6641 if (found_key.objectid != bytenr)
6644 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6645 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6646 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6647 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6648 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6649 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6650 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6651 btrfs_release_path(path);
6652 if (found_key.type == 0) {
6653 if (found_key.offset == 0)
6655 key.offset = found_key.offset - 1;
6656 key.type = found_key.type;
6658 key.type = found_key.type - 1;
6659 key.offset = (u64)-1;
6663 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6664 found_key.objectid, found_key.type, found_key.offset);
6666 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6669 btrfs_release_path(path);
6671 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6672 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6673 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6674 found_key.offset : root->nodesize;
6676 ret = btrfs_update_block_group(trans, root, bytenr,
6683 btrfs_release_path(path);
6688 * for a single backref, this will allocate a new extent
6689 * and add the backref to it.
6691 static int record_extent(struct btrfs_trans_handle *trans,
6692 struct btrfs_fs_info *info,
6693 struct btrfs_path *path,
6694 struct extent_record *rec,
6695 struct extent_backref *back,
6696 int allocated, u64 flags)
6699 struct btrfs_root *extent_root = info->extent_root;
6700 struct extent_buffer *leaf;
6701 struct btrfs_key ins_key;
6702 struct btrfs_extent_item *ei;
6703 struct tree_backref *tback;
6704 struct data_backref *dback;
6705 struct btrfs_tree_block_info *bi;
6708 rec->max_size = max_t(u64, rec->max_size,
6709 info->extent_root->nodesize);
6712 u32 item_size = sizeof(*ei);
6715 item_size += sizeof(*bi);
6717 ins_key.objectid = rec->start;
6718 ins_key.offset = rec->max_size;
6719 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6721 ret = btrfs_insert_empty_item(trans, extent_root, path,
6722 &ins_key, item_size);
6726 leaf = path->nodes[0];
6727 ei = btrfs_item_ptr(leaf, path->slots[0],
6728 struct btrfs_extent_item);
6730 btrfs_set_extent_refs(leaf, ei, 0);
6731 btrfs_set_extent_generation(leaf, ei, rec->generation);
6733 if (back->is_data) {
6734 btrfs_set_extent_flags(leaf, ei,
6735 BTRFS_EXTENT_FLAG_DATA);
6737 struct btrfs_disk_key copy_key;;
6739 tback = to_tree_backref(back);
6740 bi = (struct btrfs_tree_block_info *)(ei + 1);
6741 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6744 btrfs_set_disk_key_objectid(©_key,
6745 rec->info_objectid);
6746 btrfs_set_disk_key_type(©_key, 0);
6747 btrfs_set_disk_key_offset(©_key, 0);
6749 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6750 btrfs_set_tree_block_key(leaf, bi, ©_key);
6752 btrfs_set_extent_flags(leaf, ei,
6753 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6756 btrfs_mark_buffer_dirty(leaf);
6757 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6758 rec->max_size, 1, 0);
6761 btrfs_release_path(path);
6764 if (back->is_data) {
6768 dback = to_data_backref(back);
6769 if (back->full_backref)
6770 parent = dback->parent;
6774 for (i = 0; i < dback->found_ref; i++) {
6775 /* if parent != 0, we're doing a full backref
6776 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6777 * just makes the backref allocator create a data
6780 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6781 rec->start, rec->max_size,
6785 BTRFS_FIRST_FREE_OBJECTID :
6791 fprintf(stderr, "adding new data backref"
6792 " on %llu %s %llu owner %llu"
6793 " offset %llu found %d\n",
6794 (unsigned long long)rec->start,
6795 back->full_backref ?
6797 back->full_backref ?
6798 (unsigned long long)parent :
6799 (unsigned long long)dback->root,
6800 (unsigned long long)dback->owner,
6801 (unsigned long long)dback->offset,
6806 tback = to_tree_backref(back);
6807 if (back->full_backref)
6808 parent = tback->parent;
6812 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6813 rec->start, rec->max_size,
6814 parent, tback->root, 0, 0);
6815 fprintf(stderr, "adding new tree backref on "
6816 "start %llu len %llu parent %llu root %llu\n",
6817 rec->start, rec->max_size, parent, tback->root);
6820 btrfs_release_path(path);
6824 static struct extent_entry *find_entry(struct list_head *entries,
6825 u64 bytenr, u64 bytes)
6827 struct extent_entry *entry = NULL;
6829 list_for_each_entry(entry, entries, list) {
6830 if (entry->bytenr == bytenr && entry->bytes == bytes)
6837 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6839 struct extent_entry *entry, *best = NULL, *prev = NULL;
6841 list_for_each_entry(entry, entries, list) {
6848 * If there are as many broken entries as entries then we know
6849 * not to trust this particular entry.
6851 if (entry->broken == entry->count)
6855 * If our current entry == best then we can't be sure our best
6856 * is really the best, so we need to keep searching.
6858 if (best && best->count == entry->count) {
6864 /* Prev == entry, not good enough, have to keep searching */
6865 if (!prev->broken && prev->count == entry->count)
6869 best = (prev->count > entry->count) ? prev : entry;
6870 else if (best->count < entry->count)
6878 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6879 struct data_backref *dback, struct extent_entry *entry)
6881 struct btrfs_trans_handle *trans;
6882 struct btrfs_root *root;
6883 struct btrfs_file_extent_item *fi;
6884 struct extent_buffer *leaf;
6885 struct btrfs_key key;
6889 key.objectid = dback->root;
6890 key.type = BTRFS_ROOT_ITEM_KEY;
6891 key.offset = (u64)-1;
6892 root = btrfs_read_fs_root(info, &key);
6894 fprintf(stderr, "Couldn't find root for our ref\n");
6899 * The backref points to the original offset of the extent if it was
6900 * split, so we need to search down to the offset we have and then walk
6901 * forward until we find the backref we're looking for.
6903 key.objectid = dback->owner;
6904 key.type = BTRFS_EXTENT_DATA_KEY;
6905 key.offset = dback->offset;
6906 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6908 fprintf(stderr, "Error looking up ref %d\n", ret);
6913 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6914 ret = btrfs_next_leaf(root, path);
6916 fprintf(stderr, "Couldn't find our ref, next\n");
6920 leaf = path->nodes[0];
6921 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6922 if (key.objectid != dback->owner ||
6923 key.type != BTRFS_EXTENT_DATA_KEY) {
6924 fprintf(stderr, "Couldn't find our ref, search\n");
6927 fi = btrfs_item_ptr(leaf, path->slots[0],
6928 struct btrfs_file_extent_item);
6929 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6930 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6932 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6937 btrfs_release_path(path);
6939 trans = btrfs_start_transaction(root, 1);
6941 return PTR_ERR(trans);
6944 * Ok we have the key of the file extent we want to fix, now we can cow
6945 * down to the thing and fix it.
6947 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6949 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6950 key.objectid, key.type, key.offset, ret);
6954 fprintf(stderr, "Well that's odd, we just found this key "
6955 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6960 leaf = path->nodes[0];
6961 fi = btrfs_item_ptr(leaf, path->slots[0],
6962 struct btrfs_file_extent_item);
6964 if (btrfs_file_extent_compression(leaf, fi) &&
6965 dback->disk_bytenr != entry->bytenr) {
6966 fprintf(stderr, "Ref doesn't match the record start and is "
6967 "compressed, please take a btrfs-image of this file "
6968 "system and send it to a btrfs developer so they can "
6969 "complete this functionality for bytenr %Lu\n",
6970 dback->disk_bytenr);
6975 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6976 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6977 } else if (dback->disk_bytenr > entry->bytenr) {
6978 u64 off_diff, offset;
6980 off_diff = dback->disk_bytenr - entry->bytenr;
6981 offset = btrfs_file_extent_offset(leaf, fi);
6982 if (dback->disk_bytenr + offset +
6983 btrfs_file_extent_num_bytes(leaf, fi) >
6984 entry->bytenr + entry->bytes) {
6985 fprintf(stderr, "Ref is past the entry end, please "
6986 "take a btrfs-image of this file system and "
6987 "send it to a btrfs developer, ref %Lu\n",
6988 dback->disk_bytenr);
6993 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6994 btrfs_set_file_extent_offset(leaf, fi, offset);
6995 } else if (dback->disk_bytenr < entry->bytenr) {
6998 offset = btrfs_file_extent_offset(leaf, fi);
6999 if (dback->disk_bytenr + offset < entry->bytenr) {
7000 fprintf(stderr, "Ref is before the entry start, please"
7001 " take a btrfs-image of this file system and "
7002 "send it to a btrfs developer, ref %Lu\n",
7003 dback->disk_bytenr);
7008 offset += dback->disk_bytenr;
7009 offset -= entry->bytenr;
7010 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7011 btrfs_set_file_extent_offset(leaf, fi, offset);
7014 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7017 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7018 * only do this if we aren't using compression, otherwise it's a
7021 if (!btrfs_file_extent_compression(leaf, fi))
7022 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7024 printf("ram bytes may be wrong?\n");
7025 btrfs_mark_buffer_dirty(leaf);
7027 err = btrfs_commit_transaction(trans, root);
7028 btrfs_release_path(path);
7029 return ret ? ret : err;
7032 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7033 struct extent_record *rec)
7035 struct extent_backref *back;
7036 struct data_backref *dback;
7037 struct extent_entry *entry, *best = NULL;
7040 int broken_entries = 0;
7045 * Metadata is easy and the backrefs should always agree on bytenr and
7046 * size, if not we've got bigger issues.
7051 list_for_each_entry(back, &rec->backrefs, list) {
7052 if (back->full_backref || !back->is_data)
7055 dback = to_data_backref(back);
7058 * We only pay attention to backrefs that we found a real
7061 if (dback->found_ref == 0)
7065 * For now we only catch when the bytes don't match, not the
7066 * bytenr. We can easily do this at the same time, but I want
7067 * to have a fs image to test on before we just add repair
7068 * functionality willy-nilly so we know we won't screw up the
7072 entry = find_entry(&entries, dback->disk_bytenr,
7075 entry = malloc(sizeof(struct extent_entry));
7080 memset(entry, 0, sizeof(*entry));
7081 entry->bytenr = dback->disk_bytenr;
7082 entry->bytes = dback->bytes;
7083 list_add_tail(&entry->list, &entries);
7088 * If we only have on entry we may think the entries agree when
7089 * in reality they don't so we have to do some extra checking.
7091 if (dback->disk_bytenr != rec->start ||
7092 dback->bytes != rec->nr || back->broken)
7103 /* Yay all the backrefs agree, carry on good sir */
7104 if (nr_entries <= 1 && !mismatch)
7107 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7108 "%Lu\n", rec->start);
7111 * First we want to see if the backrefs can agree amongst themselves who
7112 * is right, so figure out which one of the entries has the highest
7115 best = find_most_right_entry(&entries);
7118 * Ok so we may have an even split between what the backrefs think, so
7119 * this is where we use the extent ref to see what it thinks.
7122 entry = find_entry(&entries, rec->start, rec->nr);
7123 if (!entry && (!broken_entries || !rec->found_rec)) {
7124 fprintf(stderr, "Backrefs don't agree with each other "
7125 "and extent record doesn't agree with anybody,"
7126 " so we can't fix bytenr %Lu bytes %Lu\n",
7127 rec->start, rec->nr);
7130 } else if (!entry) {
7132 * Ok our backrefs were broken, we'll assume this is the
7133 * correct value and add an entry for this range.
7135 entry = malloc(sizeof(struct extent_entry));
7140 memset(entry, 0, sizeof(*entry));
7141 entry->bytenr = rec->start;
7142 entry->bytes = rec->nr;
7143 list_add_tail(&entry->list, &entries);
7147 best = find_most_right_entry(&entries);
7149 fprintf(stderr, "Backrefs and extent record evenly "
7150 "split on who is right, this is going to "
7151 "require user input to fix bytenr %Lu bytes "
7152 "%Lu\n", rec->start, rec->nr);
7159 * I don't think this can happen currently as we'll abort() if we catch
7160 * this case higher up, but in case somebody removes that we still can't
7161 * deal with it properly here yet, so just bail out of that's the case.
7163 if (best->bytenr != rec->start) {
7164 fprintf(stderr, "Extent start and backref starts don't match, "
7165 "please use btrfs-image on this file system and send "
7166 "it to a btrfs developer so they can make fsck fix "
7167 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7168 rec->start, rec->nr);
7174 * Ok great we all agreed on an extent record, let's go find the real
7175 * references and fix up the ones that don't match.
7177 list_for_each_entry(back, &rec->backrefs, list) {
7178 if (back->full_backref || !back->is_data)
7181 dback = to_data_backref(back);
7184 * Still ignoring backrefs that don't have a real ref attached
7187 if (dback->found_ref == 0)
7190 if (dback->bytes == best->bytes &&
7191 dback->disk_bytenr == best->bytenr)
7194 ret = repair_ref(info, path, dback, best);
7200 * Ok we messed with the actual refs, which means we need to drop our
7201 * entire cache and go back and rescan. I know this is a huge pain and
7202 * adds a lot of extra work, but it's the only way to be safe. Once all
7203 * the backrefs agree we may not need to do anything to the extent
7208 while (!list_empty(&entries)) {
7209 entry = list_entry(entries.next, struct extent_entry, list);
7210 list_del_init(&entry->list);
7216 static int process_duplicates(struct btrfs_root *root,
7217 struct cache_tree *extent_cache,
7218 struct extent_record *rec)
7220 struct extent_record *good, *tmp;
7221 struct cache_extent *cache;
7225 * If we found a extent record for this extent then return, or if we
7226 * have more than one duplicate we are likely going to need to delete
7229 if (rec->found_rec || rec->num_duplicates > 1)
7232 /* Shouldn't happen but just in case */
7233 BUG_ON(!rec->num_duplicates);
7236 * So this happens if we end up with a backref that doesn't match the
7237 * actual extent entry. So either the backref is bad or the extent
7238 * entry is bad. Either way we want to have the extent_record actually
7239 * reflect what we found in the extent_tree, so we need to take the
7240 * duplicate out and use that as the extent_record since the only way we
7241 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7243 remove_cache_extent(extent_cache, &rec->cache);
7245 good = to_extent_record(rec->dups.next);
7246 list_del_init(&good->list);
7247 INIT_LIST_HEAD(&good->backrefs);
7248 INIT_LIST_HEAD(&good->dups);
7249 good->cache.start = good->start;
7250 good->cache.size = good->nr;
7251 good->content_checked = 0;
7252 good->owner_ref_checked = 0;
7253 good->num_duplicates = 0;
7254 good->refs = rec->refs;
7255 list_splice_init(&rec->backrefs, &good->backrefs);
7257 cache = lookup_cache_extent(extent_cache, good->start,
7261 tmp = container_of(cache, struct extent_record, cache);
7264 * If we find another overlapping extent and it's found_rec is
7265 * set then it's a duplicate and we need to try and delete
7268 if (tmp->found_rec || tmp->num_duplicates > 0) {
7269 if (list_empty(&good->list))
7270 list_add_tail(&good->list,
7271 &duplicate_extents);
7272 good->num_duplicates += tmp->num_duplicates + 1;
7273 list_splice_init(&tmp->dups, &good->dups);
7274 list_del_init(&tmp->list);
7275 list_add_tail(&tmp->list, &good->dups);
7276 remove_cache_extent(extent_cache, &tmp->cache);
7281 * Ok we have another non extent item backed extent rec, so lets
7282 * just add it to this extent and carry on like we did above.
7284 good->refs += tmp->refs;
7285 list_splice_init(&tmp->backrefs, &good->backrefs);
7286 remove_cache_extent(extent_cache, &tmp->cache);
7289 ret = insert_cache_extent(extent_cache, &good->cache);
7292 return good->num_duplicates ? 0 : 1;
7295 static int delete_duplicate_records(struct btrfs_root *root,
7296 struct extent_record *rec)
7298 struct btrfs_trans_handle *trans;
7299 LIST_HEAD(delete_list);
7300 struct btrfs_path *path;
7301 struct extent_record *tmp, *good, *n;
7304 struct btrfs_key key;
7306 path = btrfs_alloc_path();
7313 /* Find the record that covers all of the duplicates. */
7314 list_for_each_entry(tmp, &rec->dups, list) {
7315 if (good->start < tmp->start)
7317 if (good->nr > tmp->nr)
7320 if (tmp->start + tmp->nr < good->start + good->nr) {
7321 fprintf(stderr, "Ok we have overlapping extents that "
7322 "aren't completely covered by each other, this "
7323 "is going to require more careful thought. "
7324 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7325 tmp->start, tmp->nr, good->start, good->nr);
7332 list_add_tail(&rec->list, &delete_list);
7334 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7337 list_move_tail(&tmp->list, &delete_list);
7340 root = root->fs_info->extent_root;
7341 trans = btrfs_start_transaction(root, 1);
7342 if (IS_ERR(trans)) {
7343 ret = PTR_ERR(trans);
7347 list_for_each_entry(tmp, &delete_list, list) {
7348 if (tmp->found_rec == 0)
7350 key.objectid = tmp->start;
7351 key.type = BTRFS_EXTENT_ITEM_KEY;
7352 key.offset = tmp->nr;
7354 /* Shouldn't happen but just in case */
7355 if (tmp->metadata) {
7356 fprintf(stderr, "Well this shouldn't happen, extent "
7357 "record overlaps but is metadata? "
7358 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7362 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7368 ret = btrfs_del_item(trans, root, path);
7371 btrfs_release_path(path);
7374 err = btrfs_commit_transaction(trans, root);
7378 while (!list_empty(&delete_list)) {
7379 tmp = to_extent_record(delete_list.next);
7380 list_del_init(&tmp->list);
7386 while (!list_empty(&rec->dups)) {
7387 tmp = to_extent_record(rec->dups.next);
7388 list_del_init(&tmp->list);
7392 btrfs_free_path(path);
7394 if (!ret && !nr_del)
7395 rec->num_duplicates = 0;
7397 return ret ? ret : nr_del;
7400 static int find_possible_backrefs(struct btrfs_fs_info *info,
7401 struct btrfs_path *path,
7402 struct cache_tree *extent_cache,
7403 struct extent_record *rec)
7405 struct btrfs_root *root;
7406 struct extent_backref *back;
7407 struct data_backref *dback;
7408 struct cache_extent *cache;
7409 struct btrfs_file_extent_item *fi;
7410 struct btrfs_key key;
7414 list_for_each_entry(back, &rec->backrefs, list) {
7415 /* Don't care about full backrefs (poor unloved backrefs) */
7416 if (back->full_backref || !back->is_data)
7419 dback = to_data_backref(back);
7421 /* We found this one, we don't need to do a lookup */
7422 if (dback->found_ref)
7425 key.objectid = dback->root;
7426 key.type = BTRFS_ROOT_ITEM_KEY;
7427 key.offset = (u64)-1;
7429 root = btrfs_read_fs_root(info, &key);
7431 /* No root, definitely a bad ref, skip */
7432 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7434 /* Other err, exit */
7436 return PTR_ERR(root);
7438 key.objectid = dback->owner;
7439 key.type = BTRFS_EXTENT_DATA_KEY;
7440 key.offset = dback->offset;
7441 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7443 btrfs_release_path(path);
7446 /* Didn't find it, we can carry on */
7451 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7452 struct btrfs_file_extent_item);
7453 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7454 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7455 btrfs_release_path(path);
7456 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7458 struct extent_record *tmp;
7459 tmp = container_of(cache, struct extent_record, cache);
7462 * If we found an extent record for the bytenr for this
7463 * particular backref then we can't add it to our
7464 * current extent record. We only want to add backrefs
7465 * that don't have a corresponding extent item in the
7466 * extent tree since they likely belong to this record
7467 * and we need to fix it if it doesn't match bytenrs.
7473 dback->found_ref += 1;
7474 dback->disk_bytenr = bytenr;
7475 dback->bytes = bytes;
7478 * Set this so the verify backref code knows not to trust the
7479 * values in this backref.
7488 * Record orphan data ref into corresponding root.
7490 * Return 0 if the extent item contains data ref and recorded.
7491 * Return 1 if the extent item contains no useful data ref
7492 * On that case, it may contains only shared_dataref or metadata backref
7493 * or the file extent exists(this should be handled by the extent bytenr
7495 * Return <0 if something goes wrong.
7497 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7498 struct extent_record *rec)
7500 struct btrfs_key key;
7501 struct btrfs_root *dest_root;
7502 struct extent_backref *back;
7503 struct data_backref *dback;
7504 struct orphan_data_extent *orphan;
7505 struct btrfs_path *path;
7506 int recorded_data_ref = 0;
7511 path = btrfs_alloc_path();
7514 list_for_each_entry(back, &rec->backrefs, list) {
7515 if (back->full_backref || !back->is_data ||
7516 !back->found_extent_tree)
7518 dback = to_data_backref(back);
7519 if (dback->found_ref)
7521 key.objectid = dback->root;
7522 key.type = BTRFS_ROOT_ITEM_KEY;
7523 key.offset = (u64)-1;
7525 dest_root = btrfs_read_fs_root(fs_info, &key);
7527 /* For non-exist root we just skip it */
7528 if (IS_ERR(dest_root) || !dest_root)
7531 key.objectid = dback->owner;
7532 key.type = BTRFS_EXTENT_DATA_KEY;
7533 key.offset = dback->offset;
7535 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7536 btrfs_release_path(path);
7538 * For ret < 0, it's OK since the fs-tree may be corrupted,
7539 * we need to record it for inode/file extent rebuild.
7540 * For ret > 0, we record it only for file extent rebuild.
7541 * For ret == 0, the file extent exists but only bytenr
7542 * mismatch, let the original bytenr fix routine to handle,
7548 orphan = malloc(sizeof(*orphan));
7553 INIT_LIST_HEAD(&orphan->list);
7554 orphan->root = dback->root;
7555 orphan->objectid = dback->owner;
7556 orphan->offset = dback->offset;
7557 orphan->disk_bytenr = rec->cache.start;
7558 orphan->disk_len = rec->cache.size;
7559 list_add(&dest_root->orphan_data_extents, &orphan->list);
7560 recorded_data_ref = 1;
7563 btrfs_free_path(path);
7565 return !recorded_data_ref;
7571 * when an incorrect extent item is found, this will delete
7572 * all of the existing entries for it and recreate them
7573 * based on what the tree scan found.
7575 static int fixup_extent_refs(struct btrfs_fs_info *info,
7576 struct cache_tree *extent_cache,
7577 struct extent_record *rec)
7579 struct btrfs_trans_handle *trans = NULL;
7581 struct btrfs_path *path;
7582 struct list_head *cur = rec->backrefs.next;
7583 struct cache_extent *cache;
7584 struct extent_backref *back;
7588 if (rec->flag_block_full_backref)
7589 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7591 path = btrfs_alloc_path();
7595 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7597 * Sometimes the backrefs themselves are so broken they don't
7598 * get attached to any meaningful rec, so first go back and
7599 * check any of our backrefs that we couldn't find and throw
7600 * them into the list if we find the backref so that
7601 * verify_backrefs can figure out what to do.
7603 ret = find_possible_backrefs(info, path, extent_cache, rec);
7608 /* step one, make sure all of the backrefs agree */
7609 ret = verify_backrefs(info, path, rec);
7613 trans = btrfs_start_transaction(info->extent_root, 1);
7614 if (IS_ERR(trans)) {
7615 ret = PTR_ERR(trans);
7619 /* step two, delete all the existing records */
7620 ret = delete_extent_records(trans, info->extent_root, path,
7621 rec->start, rec->max_size);
7626 /* was this block corrupt? If so, don't add references to it */
7627 cache = lookup_cache_extent(info->corrupt_blocks,
7628 rec->start, rec->max_size);
7634 /* step three, recreate all the refs we did find */
7635 while(cur != &rec->backrefs) {
7636 back = to_extent_backref(cur);
7640 * if we didn't find any references, don't create a
7643 if (!back->found_ref)
7646 rec->bad_full_backref = 0;
7647 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7655 int err = btrfs_commit_transaction(trans, info->extent_root);
7660 btrfs_free_path(path);
7664 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7665 struct extent_record *rec)
7667 struct btrfs_trans_handle *trans;
7668 struct btrfs_root *root = fs_info->extent_root;
7669 struct btrfs_path *path;
7670 struct btrfs_extent_item *ei;
7671 struct btrfs_key key;
7675 key.objectid = rec->start;
7676 if (rec->metadata) {
7677 key.type = BTRFS_METADATA_ITEM_KEY;
7678 key.offset = rec->info_level;
7680 key.type = BTRFS_EXTENT_ITEM_KEY;
7681 key.offset = rec->max_size;
7684 path = btrfs_alloc_path();
7688 trans = btrfs_start_transaction(root, 0);
7689 if (IS_ERR(trans)) {
7690 btrfs_free_path(path);
7691 return PTR_ERR(trans);
7694 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7696 btrfs_free_path(path);
7697 btrfs_commit_transaction(trans, root);
7700 fprintf(stderr, "Didn't find extent for %llu\n",
7701 (unsigned long long)rec->start);
7702 btrfs_free_path(path);
7703 btrfs_commit_transaction(trans, root);
7707 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7708 struct btrfs_extent_item);
7709 flags = btrfs_extent_flags(path->nodes[0], ei);
7710 if (rec->flag_block_full_backref) {
7711 fprintf(stderr, "setting full backref on %llu\n",
7712 (unsigned long long)key.objectid);
7713 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7715 fprintf(stderr, "clearing full backref on %llu\n",
7716 (unsigned long long)key.objectid);
7717 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7719 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7720 btrfs_mark_buffer_dirty(path->nodes[0]);
7721 btrfs_free_path(path);
7722 return btrfs_commit_transaction(trans, root);
7725 /* right now we only prune from the extent allocation tree */
7726 static int prune_one_block(struct btrfs_trans_handle *trans,
7727 struct btrfs_fs_info *info,
7728 struct btrfs_corrupt_block *corrupt)
7731 struct btrfs_path path;
7732 struct extent_buffer *eb;
7736 int level = corrupt->level + 1;
7738 btrfs_init_path(&path);
7740 /* we want to stop at the parent to our busted block */
7741 path.lowest_level = level;
7743 ret = btrfs_search_slot(trans, info->extent_root,
7744 &corrupt->key, &path, -1, 1);
7749 eb = path.nodes[level];
7756 * hopefully the search gave us the block we want to prune,
7757 * lets try that first
7759 slot = path.slots[level];
7760 found = btrfs_node_blockptr(eb, slot);
7761 if (found == corrupt->cache.start)
7764 nritems = btrfs_header_nritems(eb);
7766 /* the search failed, lets scan this node and hope we find it */
7767 for (slot = 0; slot < nritems; slot++) {
7768 found = btrfs_node_blockptr(eb, slot);
7769 if (found == corrupt->cache.start)
7773 * we couldn't find the bad block. TODO, search all the nodes for pointers
7776 if (eb == info->extent_root->node) {
7781 btrfs_release_path(&path);
7786 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7787 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7790 btrfs_release_path(&path);
7794 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7796 struct btrfs_trans_handle *trans = NULL;
7797 struct cache_extent *cache;
7798 struct btrfs_corrupt_block *corrupt;
7801 cache = search_cache_extent(info->corrupt_blocks, 0);
7805 trans = btrfs_start_transaction(info->extent_root, 1);
7807 return PTR_ERR(trans);
7809 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7810 prune_one_block(trans, info, corrupt);
7811 remove_cache_extent(info->corrupt_blocks, cache);
7814 return btrfs_commit_transaction(trans, info->extent_root);
7818 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7820 struct btrfs_block_group_cache *cache;
7825 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7826 &start, &end, EXTENT_DIRTY);
7829 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7835 cache = btrfs_lookup_first_block_group(fs_info, start);
7840 start = cache->key.objectid + cache->key.offset;
7844 static int check_extent_refs(struct btrfs_root *root,
7845 struct cache_tree *extent_cache)
7847 struct extent_record *rec;
7848 struct cache_extent *cache;
7857 * if we're doing a repair, we have to make sure
7858 * we don't allocate from the problem extents.
7859 * In the worst case, this will be all the
7862 cache = search_cache_extent(extent_cache, 0);
7864 rec = container_of(cache, struct extent_record, cache);
7865 set_extent_dirty(root->fs_info->excluded_extents,
7867 rec->start + rec->max_size - 1,
7869 cache = next_cache_extent(cache);
7872 /* pin down all the corrupted blocks too */
7873 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7875 set_extent_dirty(root->fs_info->excluded_extents,
7877 cache->start + cache->size - 1,
7879 cache = next_cache_extent(cache);
7881 prune_corrupt_blocks(root->fs_info);
7882 reset_cached_block_groups(root->fs_info);
7885 reset_cached_block_groups(root->fs_info);
7888 * We need to delete any duplicate entries we find first otherwise we
7889 * could mess up the extent tree when we have backrefs that actually
7890 * belong to a different extent item and not the weird duplicate one.
7892 while (repair && !list_empty(&duplicate_extents)) {
7893 rec = to_extent_record(duplicate_extents.next);
7894 list_del_init(&rec->list);
7896 /* Sometimes we can find a backref before we find an actual
7897 * extent, so we need to process it a little bit to see if there
7898 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7899 * if this is a backref screwup. If we need to delete stuff
7900 * process_duplicates() will return 0, otherwise it will return
7903 if (process_duplicates(root, extent_cache, rec))
7905 ret = delete_duplicate_records(root, rec);
7909 * delete_duplicate_records will return the number of entries
7910 * deleted, so if it's greater than 0 then we know we actually
7911 * did something and we need to remove.
7925 cache = search_cache_extent(extent_cache, 0);
7928 rec = container_of(cache, struct extent_record, cache);
7929 if (rec->num_duplicates) {
7930 fprintf(stderr, "extent item %llu has multiple extent "
7931 "items\n", (unsigned long long)rec->start);
7936 if (rec->refs != rec->extent_item_refs) {
7937 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7938 (unsigned long long)rec->start,
7939 (unsigned long long)rec->nr);
7940 fprintf(stderr, "extent item %llu, found %llu\n",
7941 (unsigned long long)rec->extent_item_refs,
7942 (unsigned long long)rec->refs);
7943 ret = record_orphan_data_extents(root->fs_info, rec);
7950 * we can't use the extent to repair file
7951 * extent, let the fallback method handle it.
7953 if (!fixed && repair) {
7954 ret = fixup_extent_refs(
7965 if (all_backpointers_checked(rec, 1)) {
7966 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7967 (unsigned long long)rec->start,
7968 (unsigned long long)rec->nr);
7970 if (!fixed && !recorded && repair) {
7971 ret = fixup_extent_refs(root->fs_info,
7980 if (!rec->owner_ref_checked) {
7981 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7982 (unsigned long long)rec->start,
7983 (unsigned long long)rec->nr);
7984 if (!fixed && !recorded && repair) {
7985 ret = fixup_extent_refs(root->fs_info,
7994 if (rec->bad_full_backref) {
7995 fprintf(stderr, "bad full backref, on [%llu]\n",
7996 (unsigned long long)rec->start);
7998 ret = fixup_extent_flags(root->fs_info, rec);
8007 * Although it's not a extent ref's problem, we reuse this
8008 * routine for error reporting.
8009 * No repair function yet.
8011 if (rec->crossing_stripes) {
8013 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8014 rec->start, rec->start + rec->max_size);
8019 if (rec->wrong_chunk_type) {
8021 "bad extent [%llu, %llu), type mismatch with chunk\n",
8022 rec->start, rec->start + rec->max_size);
8027 remove_cache_extent(extent_cache, cache);
8028 free_all_extent_backrefs(rec);
8029 if (!init_extent_tree && repair && (!cur_err || fixed))
8030 clear_extent_dirty(root->fs_info->excluded_extents,
8032 rec->start + rec->max_size - 1,
8038 if (ret && ret != -EAGAIN) {
8039 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8042 struct btrfs_trans_handle *trans;
8044 root = root->fs_info->extent_root;
8045 trans = btrfs_start_transaction(root, 1);
8046 if (IS_ERR(trans)) {
8047 ret = PTR_ERR(trans);
8051 btrfs_fix_block_accounting(trans, root);
8052 ret = btrfs_commit_transaction(trans, root);
8057 fprintf(stderr, "repaired damaged extent references\n");
8063 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8067 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8068 stripe_size = length;
8069 stripe_size /= num_stripes;
8070 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8071 stripe_size = length * 2;
8072 stripe_size /= num_stripes;
8073 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8074 stripe_size = length;
8075 stripe_size /= (num_stripes - 1);
8076 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8077 stripe_size = length;
8078 stripe_size /= (num_stripes - 2);
8080 stripe_size = length;
8086 * Check the chunk with its block group/dev list ref:
8087 * Return 0 if all refs seems valid.
8088 * Return 1 if part of refs seems valid, need later check for rebuild ref
8089 * like missing block group and needs to search extent tree to rebuild them.
8090 * Return -1 if essential refs are missing and unable to rebuild.
8092 static int check_chunk_refs(struct chunk_record *chunk_rec,
8093 struct block_group_tree *block_group_cache,
8094 struct device_extent_tree *dev_extent_cache,
8097 struct cache_extent *block_group_item;
8098 struct block_group_record *block_group_rec;
8099 struct cache_extent *dev_extent_item;
8100 struct device_extent_record *dev_extent_rec;
8104 int metadump_v2 = 0;
8108 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8111 if (block_group_item) {
8112 block_group_rec = container_of(block_group_item,
8113 struct block_group_record,
8115 if (chunk_rec->length != block_group_rec->offset ||
8116 chunk_rec->offset != block_group_rec->objectid ||
8118 chunk_rec->type_flags != block_group_rec->flags)) {
8121 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8122 chunk_rec->objectid,
8127 chunk_rec->type_flags,
8128 block_group_rec->objectid,
8129 block_group_rec->type,
8130 block_group_rec->offset,
8131 block_group_rec->offset,
8132 block_group_rec->objectid,
8133 block_group_rec->flags);
8136 list_del_init(&block_group_rec->list);
8137 chunk_rec->bg_rec = block_group_rec;
8142 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8143 chunk_rec->objectid,
8148 chunk_rec->type_flags);
8155 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8156 chunk_rec->num_stripes);
8157 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8158 devid = chunk_rec->stripes[i].devid;
8159 offset = chunk_rec->stripes[i].offset;
8160 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8161 devid, offset, length);
8162 if (dev_extent_item) {
8163 dev_extent_rec = container_of(dev_extent_item,
8164 struct device_extent_record,
8166 if (dev_extent_rec->objectid != devid ||
8167 dev_extent_rec->offset != offset ||
8168 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8169 dev_extent_rec->length != length) {
8172 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8173 chunk_rec->objectid,
8176 chunk_rec->stripes[i].devid,
8177 chunk_rec->stripes[i].offset,
8178 dev_extent_rec->objectid,
8179 dev_extent_rec->offset,
8180 dev_extent_rec->length);
8183 list_move(&dev_extent_rec->chunk_list,
8184 &chunk_rec->dextents);
8189 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8190 chunk_rec->objectid,
8193 chunk_rec->stripes[i].devid,
8194 chunk_rec->stripes[i].offset);
8201 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8202 int check_chunks(struct cache_tree *chunk_cache,
8203 struct block_group_tree *block_group_cache,
8204 struct device_extent_tree *dev_extent_cache,
8205 struct list_head *good, struct list_head *bad,
8206 struct list_head *rebuild, int silent)
8208 struct cache_extent *chunk_item;
8209 struct chunk_record *chunk_rec;
8210 struct block_group_record *bg_rec;
8211 struct device_extent_record *dext_rec;
8215 chunk_item = first_cache_extent(chunk_cache);
8216 while (chunk_item) {
8217 chunk_rec = container_of(chunk_item, struct chunk_record,
8219 err = check_chunk_refs(chunk_rec, block_group_cache,
8220 dev_extent_cache, silent);
8223 if (err == 0 && good)
8224 list_add_tail(&chunk_rec->list, good);
8225 if (err > 0 && rebuild)
8226 list_add_tail(&chunk_rec->list, rebuild);
8228 list_add_tail(&chunk_rec->list, bad);
8229 chunk_item = next_cache_extent(chunk_item);
8232 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8235 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8243 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8247 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8258 static int check_device_used(struct device_record *dev_rec,
8259 struct device_extent_tree *dext_cache)
8261 struct cache_extent *cache;
8262 struct device_extent_record *dev_extent_rec;
8265 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8267 dev_extent_rec = container_of(cache,
8268 struct device_extent_record,
8270 if (dev_extent_rec->objectid != dev_rec->devid)
8273 list_del_init(&dev_extent_rec->device_list);
8274 total_byte += dev_extent_rec->length;
8275 cache = next_cache_extent(cache);
8278 if (total_byte != dev_rec->byte_used) {
8280 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8281 total_byte, dev_rec->byte_used, dev_rec->objectid,
8282 dev_rec->type, dev_rec->offset);
8289 /* check btrfs_dev_item -> btrfs_dev_extent */
8290 static int check_devices(struct rb_root *dev_cache,
8291 struct device_extent_tree *dev_extent_cache)
8293 struct rb_node *dev_node;
8294 struct device_record *dev_rec;
8295 struct device_extent_record *dext_rec;
8299 dev_node = rb_first(dev_cache);
8301 dev_rec = container_of(dev_node, struct device_record, node);
8302 err = check_device_used(dev_rec, dev_extent_cache);
8306 dev_node = rb_next(dev_node);
8308 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8311 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8312 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8319 static int add_root_item_to_list(struct list_head *head,
8320 u64 objectid, u64 bytenr, u64 last_snapshot,
8321 u8 level, u8 drop_level,
8322 int level_size, struct btrfs_key *drop_key)
8325 struct root_item_record *ri_rec;
8326 ri_rec = malloc(sizeof(*ri_rec));
8329 ri_rec->bytenr = bytenr;
8330 ri_rec->objectid = objectid;
8331 ri_rec->level = level;
8332 ri_rec->level_size = level_size;
8333 ri_rec->drop_level = drop_level;
8334 ri_rec->last_snapshot = last_snapshot;
8336 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8337 list_add_tail(&ri_rec->list, head);
8342 static void free_root_item_list(struct list_head *list)
8344 struct root_item_record *ri_rec;
8346 while (!list_empty(list)) {
8347 ri_rec = list_first_entry(list, struct root_item_record,
8349 list_del_init(&ri_rec->list);
8354 static int deal_root_from_list(struct list_head *list,
8355 struct btrfs_root *root,
8356 struct block_info *bits,
8358 struct cache_tree *pending,
8359 struct cache_tree *seen,
8360 struct cache_tree *reada,
8361 struct cache_tree *nodes,
8362 struct cache_tree *extent_cache,
8363 struct cache_tree *chunk_cache,
8364 struct rb_root *dev_cache,
8365 struct block_group_tree *block_group_cache,
8366 struct device_extent_tree *dev_extent_cache)
8371 while (!list_empty(list)) {
8372 struct root_item_record *rec;
8373 struct extent_buffer *buf;
8374 rec = list_entry(list->next,
8375 struct root_item_record, list);
8377 buf = read_tree_block(root->fs_info->tree_root,
8378 rec->bytenr, rec->level_size, 0);
8379 if (!extent_buffer_uptodate(buf)) {
8380 free_extent_buffer(buf);
8384 ret = add_root_to_pending(buf, extent_cache, pending,
8385 seen, nodes, rec->objectid);
8389 * To rebuild extent tree, we need deal with snapshot
8390 * one by one, otherwise we deal with node firstly which
8391 * can maximize readahead.
8394 ret = run_next_block(root, bits, bits_nr, &last,
8395 pending, seen, reada, nodes,
8396 extent_cache, chunk_cache,
8397 dev_cache, block_group_cache,
8398 dev_extent_cache, rec);
8402 free_extent_buffer(buf);
8403 list_del(&rec->list);
8409 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8410 reada, nodes, extent_cache, chunk_cache,
8411 dev_cache, block_group_cache,
8412 dev_extent_cache, NULL);
8422 static int check_chunks_and_extents(struct btrfs_root *root)
8424 struct rb_root dev_cache;
8425 struct cache_tree chunk_cache;
8426 struct block_group_tree block_group_cache;
8427 struct device_extent_tree dev_extent_cache;
8428 struct cache_tree extent_cache;
8429 struct cache_tree seen;
8430 struct cache_tree pending;
8431 struct cache_tree reada;
8432 struct cache_tree nodes;
8433 struct extent_io_tree excluded_extents;
8434 struct cache_tree corrupt_blocks;
8435 struct btrfs_path path;
8436 struct btrfs_key key;
8437 struct btrfs_key found_key;
8439 struct block_info *bits;
8441 struct extent_buffer *leaf;
8443 struct btrfs_root_item ri;
8444 struct list_head dropping_trees;
8445 struct list_head normal_trees;
8446 struct btrfs_root *root1;
8451 dev_cache = RB_ROOT;
8452 cache_tree_init(&chunk_cache);
8453 block_group_tree_init(&block_group_cache);
8454 device_extent_tree_init(&dev_extent_cache);
8456 cache_tree_init(&extent_cache);
8457 cache_tree_init(&seen);
8458 cache_tree_init(&pending);
8459 cache_tree_init(&nodes);
8460 cache_tree_init(&reada);
8461 cache_tree_init(&corrupt_blocks);
8462 extent_io_tree_init(&excluded_extents);
8463 INIT_LIST_HEAD(&dropping_trees);
8464 INIT_LIST_HEAD(&normal_trees);
8467 root->fs_info->excluded_extents = &excluded_extents;
8468 root->fs_info->fsck_extent_cache = &extent_cache;
8469 root->fs_info->free_extent_hook = free_extent_hook;
8470 root->fs_info->corrupt_blocks = &corrupt_blocks;
8474 bits = malloc(bits_nr * sizeof(struct block_info));
8480 if (ctx.progress_enabled) {
8481 ctx.tp = TASK_EXTENTS;
8482 task_start(ctx.info);
8486 root1 = root->fs_info->tree_root;
8487 level = btrfs_header_level(root1->node);
8488 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8489 root1->node->start, 0, level, 0,
8490 root1->nodesize, NULL);
8493 root1 = root->fs_info->chunk_root;
8494 level = btrfs_header_level(root1->node);
8495 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8496 root1->node->start, 0, level, 0,
8497 root1->nodesize, NULL);
8500 btrfs_init_path(&path);
8503 key.type = BTRFS_ROOT_ITEM_KEY;
8504 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8509 leaf = path.nodes[0];
8510 slot = path.slots[0];
8511 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8512 ret = btrfs_next_leaf(root, &path);
8515 leaf = path.nodes[0];
8516 slot = path.slots[0];
8518 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8519 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8520 unsigned long offset;
8523 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8524 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8525 last_snapshot = btrfs_root_last_snapshot(&ri);
8526 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8527 level = btrfs_root_level(&ri);
8528 level_size = root->nodesize;
8529 ret = add_root_item_to_list(&normal_trees,
8531 btrfs_root_bytenr(&ri),
8532 last_snapshot, level,
8533 0, level_size, NULL);
8537 level = btrfs_root_level(&ri);
8538 level_size = root->nodesize;
8539 objectid = found_key.objectid;
8540 btrfs_disk_key_to_cpu(&found_key,
8542 ret = add_root_item_to_list(&dropping_trees,
8544 btrfs_root_bytenr(&ri),
8545 last_snapshot, level,
8547 level_size, &found_key);
8554 btrfs_release_path(&path);
8557 * check_block can return -EAGAIN if it fixes something, please keep
8558 * this in mind when dealing with return values from these functions, if
8559 * we get -EAGAIN we want to fall through and restart the loop.
8561 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8562 &seen, &reada, &nodes, &extent_cache,
8563 &chunk_cache, &dev_cache, &block_group_cache,
8570 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8571 &pending, &seen, &reada, &nodes,
8572 &extent_cache, &chunk_cache, &dev_cache,
8573 &block_group_cache, &dev_extent_cache);
8580 ret = check_chunks(&chunk_cache, &block_group_cache,
8581 &dev_extent_cache, NULL, NULL, NULL, 0);
8588 ret = check_extent_refs(root, &extent_cache);
8595 ret = check_devices(&dev_cache, &dev_extent_cache);
8600 task_stop(ctx.info);
8602 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8603 extent_io_tree_cleanup(&excluded_extents);
8604 root->fs_info->fsck_extent_cache = NULL;
8605 root->fs_info->free_extent_hook = NULL;
8606 root->fs_info->corrupt_blocks = NULL;
8607 root->fs_info->excluded_extents = NULL;
8610 free_chunk_cache_tree(&chunk_cache);
8611 free_device_cache_tree(&dev_cache);
8612 free_block_group_tree(&block_group_cache);
8613 free_device_extent_tree(&dev_extent_cache);
8614 free_extent_cache_tree(&seen);
8615 free_extent_cache_tree(&pending);
8616 free_extent_cache_tree(&reada);
8617 free_extent_cache_tree(&nodes);
8620 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8621 free_extent_cache_tree(&seen);
8622 free_extent_cache_tree(&pending);
8623 free_extent_cache_tree(&reada);
8624 free_extent_cache_tree(&nodes);
8625 free_chunk_cache_tree(&chunk_cache);
8626 free_block_group_tree(&block_group_cache);
8627 free_device_cache_tree(&dev_cache);
8628 free_device_extent_tree(&dev_extent_cache);
8629 free_extent_record_cache(root->fs_info, &extent_cache);
8630 free_root_item_list(&normal_trees);
8631 free_root_item_list(&dropping_trees);
8632 extent_io_tree_cleanup(&excluded_extents);
8637 * Check backrefs of a tree block given by @bytenr or @eb.
8639 * @root: the root containing the @bytenr or @eb
8640 * @eb: tree block extent buffer, can be NULL
8641 * @bytenr: bytenr of the tree block to search
8642 * @level: tree level of the tree block
8643 * @owner: owner of the tree block
8645 * Return >0 for any error found and output error message
8646 * Return 0 for no error found
8648 static int check_tree_block_ref(struct btrfs_root *root,
8649 struct extent_buffer *eb, u64 bytenr,
8650 int level, u64 owner)
8652 struct btrfs_key key;
8653 struct btrfs_root *extent_root = root->fs_info->extent_root;
8654 struct btrfs_path path;
8655 struct btrfs_extent_item *ei;
8656 struct btrfs_extent_inline_ref *iref;
8657 struct extent_buffer *leaf;
8663 u32 nodesize = root->nodesize;
8670 btrfs_init_path(&path);
8671 key.objectid = bytenr;
8672 if (btrfs_fs_incompat(root->fs_info,
8673 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8674 key.type = BTRFS_METADATA_ITEM_KEY;
8676 key.type = BTRFS_EXTENT_ITEM_KEY;
8677 key.offset = (u64)-1;
8679 /* Search for the backref in extent tree */
8680 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8682 err |= BACKREF_MISSING;
8685 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8687 err |= BACKREF_MISSING;
8691 leaf = path.nodes[0];
8692 slot = path.slots[0];
8693 btrfs_item_key_to_cpu(leaf, &key, slot);
8695 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8697 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8698 skinny_level = (int)key.offset;
8699 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8701 struct btrfs_tree_block_info *info;
8703 info = (struct btrfs_tree_block_info *)(ei + 1);
8704 skinny_level = btrfs_tree_block_level(leaf, info);
8705 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8712 if (!(btrfs_extent_flags(leaf, ei) &
8713 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8715 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8716 key.objectid, nodesize,
8717 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8718 err = BACKREF_MISMATCH;
8720 header_gen = btrfs_header_generation(eb);
8721 extent_gen = btrfs_extent_generation(leaf, ei);
8722 if (header_gen != extent_gen) {
8724 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8725 key.objectid, nodesize, header_gen,
8727 err = BACKREF_MISMATCH;
8729 if (level != skinny_level) {
8731 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8732 key.objectid, nodesize, level, skinny_level);
8733 err = BACKREF_MISMATCH;
8735 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8737 "extent[%llu %u] is referred by other roots than %llu",
8738 key.objectid, nodesize, root->objectid);
8739 err = BACKREF_MISMATCH;
8744 * Iterate the extent/metadata item to find the exact backref
8746 item_size = btrfs_item_size_nr(leaf, slot);
8747 ptr = (unsigned long)iref;
8748 end = (unsigned long)ei + item_size;
8750 iref = (struct btrfs_extent_inline_ref *)ptr;
8751 type = btrfs_extent_inline_ref_type(leaf, iref);
8752 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8754 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8755 (offset == root->objectid || offset == owner)) {
8757 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8758 /* Check if the backref points to valid referencer */
8759 found_ref = !check_tree_block_ref(root, NULL, offset,
8765 ptr += btrfs_extent_inline_ref_size(type);
8769 * Inlined extent item doesn't have what we need, check
8770 * TREE_BLOCK_REF_KEY
8773 btrfs_release_path(&path);
8774 key.objectid = bytenr;
8775 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8776 key.offset = root->objectid;
8778 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8783 err |= BACKREF_MISSING;
8785 btrfs_release_path(&path);
8786 if (eb && (err & BACKREF_MISSING))
8787 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8788 bytenr, nodesize, owner, level);
8793 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8795 * Return >0 any error found and output error message
8796 * Return 0 for no error found
8798 static int check_extent_data_item(struct btrfs_root *root,
8799 struct extent_buffer *eb, int slot)
8801 struct btrfs_file_extent_item *fi;
8802 struct btrfs_path path;
8803 struct btrfs_root *extent_root = root->fs_info->extent_root;
8804 struct btrfs_key fi_key;
8805 struct btrfs_key dbref_key;
8806 struct extent_buffer *leaf;
8807 struct btrfs_extent_item *ei;
8808 struct btrfs_extent_inline_ref *iref;
8809 struct btrfs_extent_data_ref *dref;
8811 u64 file_extent_gen;
8814 u64 extent_num_bytes;
8822 int found_dbackref = 0;
8826 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8827 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8828 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8830 /* Nothing to check for hole and inline data extents */
8831 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8832 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8835 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8836 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8837 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8839 /* Check unaligned disk_num_bytes and num_bytes */
8840 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8842 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8843 fi_key.objectid, fi_key.offset, disk_num_bytes,
8845 err |= BYTES_UNALIGNED;
8847 data_bytes_allocated += disk_num_bytes;
8849 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8851 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8852 fi_key.objectid, fi_key.offset, extent_num_bytes,
8854 err |= BYTES_UNALIGNED;
8856 data_bytes_referenced += extent_num_bytes;
8858 owner = btrfs_header_owner(eb);
8860 /* Check the extent item of the file extent in extent tree */
8861 btrfs_init_path(&path);
8862 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8863 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8864 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8866 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8868 err |= BACKREF_MISSING;
8872 leaf = path.nodes[0];
8873 slot = path.slots[0];
8874 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8876 extent_flags = btrfs_extent_flags(leaf, ei);
8877 extent_gen = btrfs_extent_generation(leaf, ei);
8879 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8881 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8882 disk_bytenr, disk_num_bytes,
8883 BTRFS_EXTENT_FLAG_DATA);
8884 err |= BACKREF_MISMATCH;
8887 if (file_extent_gen < extent_gen) {
8889 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8890 disk_bytenr, disk_num_bytes, file_extent_gen,
8892 err |= BACKREF_MISMATCH;
8895 /* Check data backref inside that extent item */
8896 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8897 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8898 ptr = (unsigned long)iref;
8899 end = (unsigned long)ei + item_size;
8901 iref = (struct btrfs_extent_inline_ref *)ptr;
8902 type = btrfs_extent_inline_ref_type(leaf, iref);
8903 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8905 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8906 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8907 if (ref_root == owner || ref_root == root->objectid)
8909 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8910 found_dbackref = !check_tree_block_ref(root, NULL,
8911 btrfs_extent_inline_ref_offset(leaf, iref),
8917 ptr += btrfs_extent_inline_ref_size(type);
8920 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8921 if (!found_dbackref) {
8922 btrfs_release_path(&path);
8924 btrfs_init_path(&path);
8925 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8926 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8927 dbref_key.offset = hash_extent_data_ref(root->objectid,
8928 fi_key.objectid, fi_key.offset);
8930 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8931 &dbref_key, &path, 0, 0);
8936 if (!found_dbackref)
8937 err |= BACKREF_MISSING;
8939 btrfs_release_path(&path);
8940 if (err & BACKREF_MISSING) {
8941 error("data extent[%llu %llu] backref lost",
8942 disk_bytenr, disk_num_bytes);
8948 * Get real tree block level for the case like shared block
8949 * Return >= 0 as tree level
8950 * Return <0 for error
8952 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8954 struct extent_buffer *eb;
8955 struct btrfs_path path;
8956 struct btrfs_key key;
8957 struct btrfs_extent_item *ei;
8960 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8965 /* Search extent tree for extent generation and level */
8966 key.objectid = bytenr;
8967 key.type = BTRFS_METADATA_ITEM_KEY;
8968 key.offset = (u64)-1;
8970 btrfs_init_path(&path);
8971 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8974 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8982 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8983 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8984 struct btrfs_extent_item);
8985 flags = btrfs_extent_flags(path.nodes[0], ei);
8986 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8991 /* Get transid for later read_tree_block() check */
8992 transid = btrfs_extent_generation(path.nodes[0], ei);
8994 /* Get backref level as one source */
8995 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8996 backref_level = key.offset;
8998 struct btrfs_tree_block_info *info;
9000 info = (struct btrfs_tree_block_info *)(ei + 1);
9001 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9003 btrfs_release_path(&path);
9005 /* Get level from tree block as an alternative source */
9006 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9007 if (!extent_buffer_uptodate(eb)) {
9008 free_extent_buffer(eb);
9011 header_level = btrfs_header_level(eb);
9012 free_extent_buffer(eb);
9014 if (header_level != backref_level)
9016 return header_level;
9019 btrfs_release_path(&path);
9024 * Check if a tree block backref is valid (points to a valid tree block)
9025 * if level == -1, level will be resolved
9026 * Return >0 for any error found and print error message
9028 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9029 u64 bytenr, int level)
9031 struct btrfs_root *root;
9032 struct btrfs_key key;
9033 struct btrfs_path path;
9034 struct extent_buffer *eb;
9035 struct extent_buffer *node;
9036 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9040 /* Query level for level == -1 special case */
9042 level = query_tree_block_level(fs_info, bytenr);
9044 err |= REFERENCER_MISSING;
9048 key.objectid = root_id;
9049 key.type = BTRFS_ROOT_ITEM_KEY;
9050 key.offset = (u64)-1;
9052 root = btrfs_read_fs_root(fs_info, &key);
9054 err |= REFERENCER_MISSING;
9058 /* Read out the tree block to get item/node key */
9059 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9060 if (!extent_buffer_uptodate(eb)) {
9061 err |= REFERENCER_MISSING;
9062 free_extent_buffer(eb);
9066 /* Empty tree, no need to check key */
9067 if (!btrfs_header_nritems(eb) && !level) {
9068 free_extent_buffer(eb);
9073 btrfs_node_key_to_cpu(eb, &key, 0);
9075 btrfs_item_key_to_cpu(eb, &key, 0);
9077 free_extent_buffer(eb);
9079 btrfs_init_path(&path);
9080 path.lowest_level = level;
9081 /* Search with the first key, to ensure we can reach it */
9082 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9084 err |= REFERENCER_MISSING;
9088 node = path.nodes[level];
9089 if (btrfs_header_bytenr(node) != bytenr) {
9091 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9092 bytenr, nodesize, bytenr,
9093 btrfs_header_bytenr(node));
9094 err |= REFERENCER_MISMATCH;
9096 if (btrfs_header_level(node) != level) {
9098 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9099 bytenr, nodesize, level,
9100 btrfs_header_level(node));
9101 err |= REFERENCER_MISMATCH;
9105 btrfs_release_path(&path);
9107 if (err & REFERENCER_MISSING) {
9109 error("extent [%llu %d] lost referencer (owner: %llu)",
9110 bytenr, nodesize, root_id);
9113 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9114 bytenr, nodesize, root_id, level);
9121 * Check referencer for shared block backref
9122 * If level == -1, this function will resolve the level.
9124 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9125 u64 parent, u64 bytenr, int level)
9127 struct extent_buffer *eb;
9128 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9130 int found_parent = 0;
9133 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9134 if (!extent_buffer_uptodate(eb))
9138 level = query_tree_block_level(fs_info, bytenr);
9142 if (level + 1 != btrfs_header_level(eb))
9145 nr = btrfs_header_nritems(eb);
9146 for (i = 0; i < nr; i++) {
9147 if (bytenr == btrfs_node_blockptr(eb, i)) {
9153 free_extent_buffer(eb);
9154 if (!found_parent) {
9156 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9157 bytenr, nodesize, parent, level);
9158 return REFERENCER_MISSING;
9164 * Check referencer for normal (inlined) data ref
9165 * If len == 0, it will be resolved by searching in extent tree
9167 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9168 u64 root_id, u64 objectid, u64 offset,
9169 u64 bytenr, u64 len, u32 count)
9171 struct btrfs_root *root;
9172 struct btrfs_root *extent_root = fs_info->extent_root;
9173 struct btrfs_key key;
9174 struct btrfs_path path;
9175 struct extent_buffer *leaf;
9176 struct btrfs_file_extent_item *fi;
9177 u32 found_count = 0;
9182 key.objectid = bytenr;
9183 key.type = BTRFS_EXTENT_ITEM_KEY;
9184 key.offset = (u64)-1;
9186 btrfs_init_path(&path);
9187 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9190 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9193 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9194 if (key.objectid != bytenr ||
9195 key.type != BTRFS_EXTENT_ITEM_KEY)
9198 btrfs_release_path(&path);
9200 key.objectid = root_id;
9201 key.type = BTRFS_ROOT_ITEM_KEY;
9202 key.offset = (u64)-1;
9203 btrfs_init_path(&path);
9205 root = btrfs_read_fs_root(fs_info, &key);
9209 key.objectid = objectid;
9210 key.type = BTRFS_EXTENT_DATA_KEY;
9212 * It can be nasty as data backref offset is
9213 * file offset - file extent offset, which is smaller or
9214 * equal to original backref offset. The only special case is
9215 * overflow. So we need to special check and do further search.
9217 key.offset = offset & (1ULL << 63) ? 0 : offset;
9219 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9224 * Search afterwards to get correct one
9225 * NOTE: As we must do a comprehensive check on the data backref to
9226 * make sure the dref count also matches, we must iterate all file
9227 * extents for that inode.
9230 leaf = path.nodes[0];
9231 slot = path.slots[0];
9233 btrfs_item_key_to_cpu(leaf, &key, slot);
9234 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9236 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9238 * Except normal disk bytenr and disk num bytes, we still
9239 * need to do extra check on dbackref offset as
9240 * dbackref offset = file_offset - file_extent_offset
9242 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9243 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9244 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9248 ret = btrfs_next_item(root, &path);
9253 btrfs_release_path(&path);
9254 if (found_count != count) {
9256 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9257 bytenr, len, root_id, objectid, offset, count, found_count);
9258 return REFERENCER_MISSING;
9264 * Check if the referencer of a shared data backref exists
9266 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9267 u64 parent, u64 bytenr)
9269 struct extent_buffer *eb;
9270 struct btrfs_key key;
9271 struct btrfs_file_extent_item *fi;
9272 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9274 int found_parent = 0;
9277 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9278 if (!extent_buffer_uptodate(eb))
9281 nr = btrfs_header_nritems(eb);
9282 for (i = 0; i < nr; i++) {
9283 btrfs_item_key_to_cpu(eb, &key, i);
9284 if (key.type != BTRFS_EXTENT_DATA_KEY)
9287 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9288 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9291 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9298 free_extent_buffer(eb);
9299 if (!found_parent) {
9300 error("shared extent %llu referencer lost (parent: %llu)",
9302 return REFERENCER_MISSING;
9308 * This function will check a given extent item, including its backref and
9309 * itself (like crossing stripe boundary and type)
9311 * Since we don't use extent_record anymore, introduce new error bit
9313 static int check_extent_item(struct btrfs_fs_info *fs_info,
9314 struct extent_buffer *eb, int slot)
9316 struct btrfs_extent_item *ei;
9317 struct btrfs_extent_inline_ref *iref;
9318 struct btrfs_extent_data_ref *dref;
9322 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9323 u32 item_size = btrfs_item_size_nr(eb, slot);
9328 struct btrfs_key key;
9332 btrfs_item_key_to_cpu(eb, &key, slot);
9333 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9334 bytes_used += key.offset;
9336 bytes_used += nodesize;
9338 if (item_size < sizeof(*ei)) {
9340 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9341 * old thing when on disk format is still un-determined.
9342 * No need to care about it anymore
9344 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9348 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9349 flags = btrfs_extent_flags(eb, ei);
9351 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9353 if (metadata && check_crossing_stripes(global_info, key.objectid,
9355 error("bad metadata [%llu, %llu) crossing stripe boundary",
9356 key.objectid, key.objectid + nodesize);
9357 err |= CROSSING_STRIPE_BOUNDARY;
9360 ptr = (unsigned long)(ei + 1);
9362 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9363 /* Old EXTENT_ITEM metadata */
9364 struct btrfs_tree_block_info *info;
9366 info = (struct btrfs_tree_block_info *)ptr;
9367 level = btrfs_tree_block_level(eb, info);
9368 ptr += sizeof(struct btrfs_tree_block_info);
9370 /* New METADATA_ITEM */
9373 end = (unsigned long)ei + item_size;
9376 err |= ITEM_SIZE_MISMATCH;
9380 /* Now check every backref in this extent item */
9382 iref = (struct btrfs_extent_inline_ref *)ptr;
9383 type = btrfs_extent_inline_ref_type(eb, iref);
9384 offset = btrfs_extent_inline_ref_offset(eb, iref);
9386 case BTRFS_TREE_BLOCK_REF_KEY:
9387 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9391 case BTRFS_SHARED_BLOCK_REF_KEY:
9392 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9396 case BTRFS_EXTENT_DATA_REF_KEY:
9397 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9398 ret = check_extent_data_backref(fs_info,
9399 btrfs_extent_data_ref_root(eb, dref),
9400 btrfs_extent_data_ref_objectid(eb, dref),
9401 btrfs_extent_data_ref_offset(eb, dref),
9402 key.objectid, key.offset,
9403 btrfs_extent_data_ref_count(eb, dref));
9406 case BTRFS_SHARED_DATA_REF_KEY:
9407 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9411 error("extent[%llu %d %llu] has unknown ref type: %d",
9412 key.objectid, key.type, key.offset, type);
9413 err |= UNKNOWN_TYPE;
9417 ptr += btrfs_extent_inline_ref_size(type);
9426 * Check if a dev extent item is referred correctly by its chunk
9428 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9429 struct extent_buffer *eb, int slot)
9431 struct btrfs_root *chunk_root = fs_info->chunk_root;
9432 struct btrfs_dev_extent *ptr;
9433 struct btrfs_path path;
9434 struct btrfs_key chunk_key;
9435 struct btrfs_key devext_key;
9436 struct btrfs_chunk *chunk;
9437 struct extent_buffer *l;
9441 int found_chunk = 0;
9444 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9445 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9446 length = btrfs_dev_extent_length(eb, ptr);
9448 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9449 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9450 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9452 btrfs_init_path(&path);
9453 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9458 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9459 if (btrfs_chunk_length(l, chunk) != length)
9462 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9463 for (i = 0; i < num_stripes; i++) {
9464 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9465 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9467 if (devid == devext_key.objectid &&
9468 offset == devext_key.offset) {
9474 btrfs_release_path(&path);
9477 "device extent[%llu, %llu, %llu] did not find the related chunk",
9478 devext_key.objectid, devext_key.offset, length);
9479 return REFERENCER_MISSING;
9485 * Check if the used space is correct with the dev item
9487 static int check_dev_item(struct btrfs_fs_info *fs_info,
9488 struct extent_buffer *eb, int slot)
9490 struct btrfs_root *dev_root = fs_info->dev_root;
9491 struct btrfs_dev_item *dev_item;
9492 struct btrfs_path path;
9493 struct btrfs_key key;
9494 struct btrfs_dev_extent *ptr;
9500 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9501 dev_id = btrfs_device_id(eb, dev_item);
9502 used = btrfs_device_bytes_used(eb, dev_item);
9504 key.objectid = dev_id;
9505 key.type = BTRFS_DEV_EXTENT_KEY;
9508 btrfs_init_path(&path);
9509 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9511 btrfs_item_key_to_cpu(eb, &key, slot);
9512 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9513 key.objectid, key.type, key.offset);
9514 btrfs_release_path(&path);
9515 return REFERENCER_MISSING;
9518 /* Iterate dev_extents to calculate the used space of a device */
9520 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9522 if (key.objectid > dev_id)
9524 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9527 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9528 struct btrfs_dev_extent);
9529 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9531 ret = btrfs_next_item(dev_root, &path);
9535 btrfs_release_path(&path);
9537 if (used != total) {
9538 btrfs_item_key_to_cpu(eb, &key, slot);
9540 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9541 total, used, BTRFS_ROOT_TREE_OBJECTID,
9542 BTRFS_DEV_EXTENT_KEY, dev_id);
9543 return ACCOUNTING_MISMATCH;
9549 * Check a block group item with its referener (chunk) and its used space
9550 * with extent/metadata item
9552 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9553 struct extent_buffer *eb, int slot)
9555 struct btrfs_root *extent_root = fs_info->extent_root;
9556 struct btrfs_root *chunk_root = fs_info->chunk_root;
9557 struct btrfs_block_group_item *bi;
9558 struct btrfs_block_group_item bg_item;
9559 struct btrfs_path path;
9560 struct btrfs_key bg_key;
9561 struct btrfs_key chunk_key;
9562 struct btrfs_key extent_key;
9563 struct btrfs_chunk *chunk;
9564 struct extent_buffer *leaf;
9565 struct btrfs_extent_item *ei;
9566 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9574 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9575 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9576 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9577 used = btrfs_block_group_used(&bg_item);
9578 bg_flags = btrfs_block_group_flags(&bg_item);
9580 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9581 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9582 chunk_key.offset = bg_key.objectid;
9584 btrfs_init_path(&path);
9585 /* Search for the referencer chunk */
9586 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9589 "block group[%llu %llu] did not find the related chunk item",
9590 bg_key.objectid, bg_key.offset);
9591 err |= REFERENCER_MISSING;
9593 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9594 struct btrfs_chunk);
9595 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9598 "block group[%llu %llu] related chunk item length does not match",
9599 bg_key.objectid, bg_key.offset);
9600 err |= REFERENCER_MISMATCH;
9603 btrfs_release_path(&path);
9605 /* Search from the block group bytenr */
9606 extent_key.objectid = bg_key.objectid;
9607 extent_key.type = 0;
9608 extent_key.offset = 0;
9610 btrfs_init_path(&path);
9611 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9615 /* Iterate extent tree to account used space */
9617 leaf = path.nodes[0];
9618 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9619 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9622 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9623 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9625 if (extent_key.objectid < bg_key.objectid)
9628 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9631 total += extent_key.offset;
9633 ei = btrfs_item_ptr(leaf, path.slots[0],
9634 struct btrfs_extent_item);
9635 flags = btrfs_extent_flags(leaf, ei);
9636 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9637 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9639 "bad extent[%llu, %llu) type mismatch with chunk",
9640 extent_key.objectid,
9641 extent_key.objectid + extent_key.offset);
9642 err |= CHUNK_TYPE_MISMATCH;
9644 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9645 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9646 BTRFS_BLOCK_GROUP_METADATA))) {
9648 "bad extent[%llu, %llu) type mismatch with chunk",
9649 extent_key.objectid,
9650 extent_key.objectid + nodesize);
9651 err |= CHUNK_TYPE_MISMATCH;
9655 ret = btrfs_next_item(extent_root, &path);
9661 btrfs_release_path(&path);
9663 if (total != used) {
9665 "block group[%llu %llu] used %llu but extent items used %llu",
9666 bg_key.objectid, bg_key.offset, used, total);
9667 err |= ACCOUNTING_MISMATCH;
9673 * Check a chunk item.
9674 * Including checking all referred dev_extents and block group
9676 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9677 struct extent_buffer *eb, int slot)
9679 struct btrfs_root *extent_root = fs_info->extent_root;
9680 struct btrfs_root *dev_root = fs_info->dev_root;
9681 struct btrfs_path path;
9682 struct btrfs_key chunk_key;
9683 struct btrfs_key bg_key;
9684 struct btrfs_key devext_key;
9685 struct btrfs_chunk *chunk;
9686 struct extent_buffer *leaf;
9687 struct btrfs_block_group_item *bi;
9688 struct btrfs_block_group_item bg_item;
9689 struct btrfs_dev_extent *ptr;
9690 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9702 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9703 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9704 length = btrfs_chunk_length(eb, chunk);
9705 chunk_end = chunk_key.offset + length;
9706 if (!IS_ALIGNED(length, sectorsize)) {
9707 error("chunk[%llu %llu) not aligned to %u",
9708 chunk_key.offset, chunk_end, sectorsize);
9709 err |= BYTES_UNALIGNED;
9713 type = btrfs_chunk_type(eb, chunk);
9714 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9715 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9716 error("chunk[%llu %llu) has no chunk type",
9717 chunk_key.offset, chunk_end);
9718 err |= UNKNOWN_TYPE;
9720 if (profile && (profile & (profile - 1))) {
9721 error("chunk[%llu %llu) multiple profiles detected: %llx",
9722 chunk_key.offset, chunk_end, profile);
9723 err |= UNKNOWN_TYPE;
9726 bg_key.objectid = chunk_key.offset;
9727 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9728 bg_key.offset = length;
9730 btrfs_init_path(&path);
9731 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9734 "chunk[%llu %llu) did not find the related block group item",
9735 chunk_key.offset, chunk_end);
9736 err |= REFERENCER_MISSING;
9738 leaf = path.nodes[0];
9739 bi = btrfs_item_ptr(leaf, path.slots[0],
9740 struct btrfs_block_group_item);
9741 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9743 if (btrfs_block_group_flags(&bg_item) != type) {
9745 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9746 chunk_key.offset, chunk_end, type,
9747 btrfs_block_group_flags(&bg_item));
9748 err |= REFERENCER_MISSING;
9752 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9753 for (i = 0; i < num_stripes; i++) {
9754 btrfs_release_path(&path);
9755 btrfs_init_path(&path);
9756 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9757 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9758 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9760 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9765 leaf = path.nodes[0];
9766 ptr = btrfs_item_ptr(leaf, path.slots[0],
9767 struct btrfs_dev_extent);
9768 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9769 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9770 if (objectid != chunk_key.objectid ||
9771 offset != chunk_key.offset ||
9772 btrfs_dev_extent_length(leaf, ptr) != length)
9776 err |= BACKREF_MISSING;
9778 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9779 chunk_key.objectid, chunk_end, i);
9782 btrfs_release_path(&path);
9788 * Main entry function to check known items and update related accounting info
9790 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9792 struct btrfs_fs_info *fs_info = root->fs_info;
9793 struct btrfs_key key;
9796 struct btrfs_extent_data_ref *dref;
9801 btrfs_item_key_to_cpu(eb, &key, slot);
9805 case BTRFS_EXTENT_DATA_KEY:
9806 ret = check_extent_data_item(root, eb, slot);
9809 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9810 ret = check_block_group_item(fs_info, eb, slot);
9813 case BTRFS_DEV_ITEM_KEY:
9814 ret = check_dev_item(fs_info, eb, slot);
9817 case BTRFS_CHUNK_ITEM_KEY:
9818 ret = check_chunk_item(fs_info, eb, slot);
9821 case BTRFS_DEV_EXTENT_KEY:
9822 ret = check_dev_extent_item(fs_info, eb, slot);
9825 case BTRFS_EXTENT_ITEM_KEY:
9826 case BTRFS_METADATA_ITEM_KEY:
9827 ret = check_extent_item(fs_info, eb, slot);
9830 case BTRFS_EXTENT_CSUM_KEY:
9831 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9833 case BTRFS_TREE_BLOCK_REF_KEY:
9834 ret = check_tree_block_backref(fs_info, key.offset,
9838 case BTRFS_EXTENT_DATA_REF_KEY:
9839 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9840 ret = check_extent_data_backref(fs_info,
9841 btrfs_extent_data_ref_root(eb, dref),
9842 btrfs_extent_data_ref_objectid(eb, dref),
9843 btrfs_extent_data_ref_offset(eb, dref),
9845 btrfs_extent_data_ref_count(eb, dref));
9848 case BTRFS_SHARED_BLOCK_REF_KEY:
9849 ret = check_shared_block_backref(fs_info, key.offset,
9853 case BTRFS_SHARED_DATA_REF_KEY:
9854 ret = check_shared_data_backref(fs_info, key.offset,
9862 if (++slot < btrfs_header_nritems(eb))
9869 * Helper function for later fs/subvol tree check. To determine if a tree
9870 * block should be checked.
9871 * This function will ensure only the direct referencer with lowest rootid to
9872 * check a fs/subvolume tree block.
9874 * Backref check at extent tree would detect errors like missing subvolume
9875 * tree, so we can do aggressive check to reduce duplicated checks.
9877 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9879 struct btrfs_root *extent_root = root->fs_info->extent_root;
9880 struct btrfs_key key;
9881 struct btrfs_path path;
9882 struct extent_buffer *leaf;
9884 struct btrfs_extent_item *ei;
9890 struct btrfs_extent_inline_ref *iref;
9893 btrfs_init_path(&path);
9894 key.objectid = btrfs_header_bytenr(eb);
9895 key.type = BTRFS_METADATA_ITEM_KEY;
9896 key.offset = (u64)-1;
9899 * Any failure in backref resolving means we can't determine
9900 * whom the tree block belongs to.
9901 * So in that case, we need to check that tree block
9903 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9907 ret = btrfs_previous_extent_item(extent_root, &path,
9908 btrfs_header_bytenr(eb));
9912 leaf = path.nodes[0];
9913 slot = path.slots[0];
9914 btrfs_item_key_to_cpu(leaf, &key, slot);
9915 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9917 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9918 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9920 struct btrfs_tree_block_info *info;
9922 info = (struct btrfs_tree_block_info *)(ei + 1);
9923 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9926 item_size = btrfs_item_size_nr(leaf, slot);
9927 ptr = (unsigned long)iref;
9928 end = (unsigned long)ei + item_size;
9930 iref = (struct btrfs_extent_inline_ref *)ptr;
9931 type = btrfs_extent_inline_ref_type(leaf, iref);
9932 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9935 * We only check the tree block if current root is
9936 * the lowest referencer of it.
9938 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9939 offset < root->objectid) {
9940 btrfs_release_path(&path);
9944 ptr += btrfs_extent_inline_ref_size(type);
9947 * Normally we should also check keyed tree block ref, but that may be
9948 * very time consuming. Inlined ref should already make us skip a lot
9949 * of refs now. So skip search keyed tree block ref.
9953 btrfs_release_path(&path);
9958 * Traversal function for tree block. We will do:
9959 * 1) Skip shared fs/subvolume tree blocks
9960 * 2) Update related bytes accounting
9961 * 3) Pre-order traversal
9963 static int traverse_tree_block(struct btrfs_root *root,
9964 struct extent_buffer *node)
9966 struct extent_buffer *eb;
9967 struct btrfs_key key;
9968 struct btrfs_key drop_key;
9976 * Skip shared fs/subvolume tree block, in that case they will
9977 * be checked by referencer with lowest rootid
9979 if (is_fstree(root->objectid) && !should_check(root, node))
9982 /* Update bytes accounting */
9983 total_btree_bytes += node->len;
9984 if (fs_root_objectid(btrfs_header_owner(node)))
9985 total_fs_tree_bytes += node->len;
9986 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9987 total_extent_tree_bytes += node->len;
9988 if (!found_old_backref &&
9989 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9990 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9991 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9992 found_old_backref = 1;
9994 /* pre-order tranversal, check itself first */
9995 level = btrfs_header_level(node);
9996 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9997 btrfs_header_level(node),
9998 btrfs_header_owner(node));
10002 "check %s failed root %llu bytenr %llu level %d, force continue check",
10003 level ? "node":"leaf", root->objectid,
10004 btrfs_header_bytenr(node), btrfs_header_level(node));
10007 btree_space_waste += btrfs_leaf_free_space(root, node);
10008 ret = check_leaf_items(root, node);
10013 nr = btrfs_header_nritems(node);
10014 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10015 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10016 sizeof(struct btrfs_key_ptr);
10018 /* Then check all its children */
10019 for (i = 0; i < nr; i++) {
10020 u64 blocknr = btrfs_node_blockptr(node, i);
10022 btrfs_node_key_to_cpu(node, &key, i);
10023 if (level == root->root_item.drop_level &&
10024 is_dropped_key(&key, &drop_key))
10028 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10029 * to call the function itself.
10031 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10032 if (extent_buffer_uptodate(eb)) {
10033 ret = traverse_tree_block(root, eb);
10036 free_extent_buffer(eb);
10043 * Low memory usage version check_chunks_and_extents.
10045 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10047 struct btrfs_path path;
10048 struct btrfs_key key;
10049 struct btrfs_root *root1;
10050 struct btrfs_root *cur_root;
10054 root1 = root->fs_info->chunk_root;
10055 ret = traverse_tree_block(root1, root1->node);
10058 root1 = root->fs_info->tree_root;
10059 ret = traverse_tree_block(root1, root1->node);
10062 btrfs_init_path(&path);
10063 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10065 key.type = BTRFS_ROOT_ITEM_KEY;
10067 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10069 error("cannot find extent treet in tree_root");
10074 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10075 if (key.type != BTRFS_ROOT_ITEM_KEY)
10077 key.offset = (u64)-1;
10079 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10080 if (IS_ERR(cur_root) || !cur_root) {
10081 error("failed to read tree: %lld", key.objectid);
10085 ret = traverse_tree_block(cur_root, cur_root->node);
10089 ret = btrfs_next_item(root1, &path);
10095 btrfs_release_path(&path);
10099 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10100 struct btrfs_root *root, int overwrite)
10102 struct extent_buffer *c;
10103 struct extent_buffer *old = root->node;
10106 struct btrfs_disk_key disk_key = {0,0,0};
10112 extent_buffer_get(c);
10115 c = btrfs_alloc_free_block(trans, root,
10117 root->root_key.objectid,
10118 &disk_key, level, 0, 0);
10121 extent_buffer_get(c);
10125 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10126 btrfs_set_header_level(c, level);
10127 btrfs_set_header_bytenr(c, c->start);
10128 btrfs_set_header_generation(c, trans->transid);
10129 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10130 btrfs_set_header_owner(c, root->root_key.objectid);
10132 write_extent_buffer(c, root->fs_info->fsid,
10133 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10135 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10136 btrfs_header_chunk_tree_uuid(c),
10139 btrfs_mark_buffer_dirty(c);
10141 * this case can happen in the following case:
10143 * 1.overwrite previous root.
10145 * 2.reinit reloc data root, this is because we skip pin
10146 * down reloc data tree before which means we can allocate
10147 * same block bytenr here.
10149 if (old->start == c->start) {
10150 btrfs_set_root_generation(&root->root_item,
10152 root->root_item.level = btrfs_header_level(root->node);
10153 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10154 &root->root_key, &root->root_item);
10156 free_extent_buffer(c);
10160 free_extent_buffer(old);
10162 add_root_to_dirty_list(root);
10166 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10167 struct extent_buffer *eb, int tree_root)
10169 struct extent_buffer *tmp;
10170 struct btrfs_root_item *ri;
10171 struct btrfs_key key;
10174 int level = btrfs_header_level(eb);
10180 * If we have pinned this block before, don't pin it again.
10181 * This can not only avoid forever loop with broken filesystem
10182 * but also give us some speedups.
10184 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10185 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10188 btrfs_pin_extent(fs_info, eb->start, eb->len);
10190 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10191 nritems = btrfs_header_nritems(eb);
10192 for (i = 0; i < nritems; i++) {
10194 btrfs_item_key_to_cpu(eb, &key, i);
10195 if (key.type != BTRFS_ROOT_ITEM_KEY)
10197 /* Skip the extent root and reloc roots */
10198 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10199 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10200 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10202 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10203 bytenr = btrfs_disk_root_bytenr(eb, ri);
10206 * If at any point we start needing the real root we
10207 * will have to build a stump root for the root we are
10208 * in, but for now this doesn't actually use the root so
10209 * just pass in extent_root.
10211 tmp = read_tree_block(fs_info->extent_root, bytenr,
10213 if (!extent_buffer_uptodate(tmp)) {
10214 fprintf(stderr, "Error reading root block\n");
10217 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10218 free_extent_buffer(tmp);
10222 bytenr = btrfs_node_blockptr(eb, i);
10224 /* If we aren't the tree root don't read the block */
10225 if (level == 1 && !tree_root) {
10226 btrfs_pin_extent(fs_info, bytenr, nodesize);
10230 tmp = read_tree_block(fs_info->extent_root, bytenr,
10232 if (!extent_buffer_uptodate(tmp)) {
10233 fprintf(stderr, "Error reading tree block\n");
10236 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10237 free_extent_buffer(tmp);
10246 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10250 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10254 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10257 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10259 struct btrfs_block_group_cache *cache;
10260 struct btrfs_path *path;
10261 struct extent_buffer *leaf;
10262 struct btrfs_chunk *chunk;
10263 struct btrfs_key key;
10267 path = btrfs_alloc_path();
10272 key.type = BTRFS_CHUNK_ITEM_KEY;
10275 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10277 btrfs_free_path(path);
10282 * We do this in case the block groups were screwed up and had alloc
10283 * bits that aren't actually set on the chunks. This happens with
10284 * restored images every time and could happen in real life I guess.
10286 fs_info->avail_data_alloc_bits = 0;
10287 fs_info->avail_metadata_alloc_bits = 0;
10288 fs_info->avail_system_alloc_bits = 0;
10290 /* First we need to create the in-memory block groups */
10292 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10293 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10295 btrfs_free_path(path);
10303 leaf = path->nodes[0];
10304 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10305 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10310 chunk = btrfs_item_ptr(leaf, path->slots[0],
10311 struct btrfs_chunk);
10312 btrfs_add_block_group(fs_info, 0,
10313 btrfs_chunk_type(leaf, chunk),
10314 key.objectid, key.offset,
10315 btrfs_chunk_length(leaf, chunk));
10316 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10317 key.offset + btrfs_chunk_length(leaf, chunk),
10323 cache = btrfs_lookup_first_block_group(fs_info, start);
10327 start = cache->key.objectid + cache->key.offset;
10330 btrfs_free_path(path);
10334 static int reset_balance(struct btrfs_trans_handle *trans,
10335 struct btrfs_fs_info *fs_info)
10337 struct btrfs_root *root = fs_info->tree_root;
10338 struct btrfs_path *path;
10339 struct extent_buffer *leaf;
10340 struct btrfs_key key;
10341 int del_slot, del_nr = 0;
10345 path = btrfs_alloc_path();
10349 key.objectid = BTRFS_BALANCE_OBJECTID;
10350 key.type = BTRFS_BALANCE_ITEM_KEY;
10353 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10358 goto reinit_data_reloc;
10363 ret = btrfs_del_item(trans, root, path);
10366 btrfs_release_path(path);
10368 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10369 key.type = BTRFS_ROOT_ITEM_KEY;
10372 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10376 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10381 ret = btrfs_del_items(trans, root, path,
10388 btrfs_release_path(path);
10391 ret = btrfs_search_slot(trans, root, &key, path,
10398 leaf = path->nodes[0];
10399 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10400 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10402 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10407 del_slot = path->slots[0];
10416 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10420 btrfs_release_path(path);
10423 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10424 key.type = BTRFS_ROOT_ITEM_KEY;
10425 key.offset = (u64)-1;
10426 root = btrfs_read_fs_root(fs_info, &key);
10427 if (IS_ERR(root)) {
10428 fprintf(stderr, "Error reading data reloc tree\n");
10429 ret = PTR_ERR(root);
10432 record_root_in_trans(trans, root);
10433 ret = btrfs_fsck_reinit_root(trans, root, 0);
10436 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10438 btrfs_free_path(path);
10442 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10443 struct btrfs_fs_info *fs_info)
10449 * The only reason we don't do this is because right now we're just
10450 * walking the trees we find and pinning down their bytes, we don't look
10451 * at any of the leaves. In order to do mixed groups we'd have to check
10452 * the leaves of any fs roots and pin down the bytes for any file
10453 * extents we find. Not hard but why do it if we don't have to?
10455 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10456 fprintf(stderr, "We don't support re-initing the extent tree "
10457 "for mixed block groups yet, please notify a btrfs "
10458 "developer you want to do this so they can add this "
10459 "functionality.\n");
10464 * first we need to walk all of the trees except the extent tree and pin
10465 * down the bytes that are in use so we don't overwrite any existing
10468 ret = pin_metadata_blocks(fs_info);
10470 fprintf(stderr, "error pinning down used bytes\n");
10475 * Need to drop all the block groups since we're going to recreate all
10478 btrfs_free_block_groups(fs_info);
10479 ret = reset_block_groups(fs_info);
10481 fprintf(stderr, "error resetting the block groups\n");
10485 /* Ok we can allocate now, reinit the extent root */
10486 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10488 fprintf(stderr, "extent root initialization failed\n");
10490 * When the transaction code is updated we should end the
10491 * transaction, but for now progs only knows about commit so
10492 * just return an error.
10498 * Now we have all the in-memory block groups setup so we can make
10499 * allocations properly, and the metadata we care about is safe since we
10500 * pinned all of it above.
10503 struct btrfs_block_group_cache *cache;
10505 cache = btrfs_lookup_first_block_group(fs_info, start);
10508 start = cache->key.objectid + cache->key.offset;
10509 ret = btrfs_insert_item(trans, fs_info->extent_root,
10510 &cache->key, &cache->item,
10511 sizeof(cache->item));
10513 fprintf(stderr, "Error adding block group\n");
10516 btrfs_extent_post_op(trans, fs_info->extent_root);
10519 ret = reset_balance(trans, fs_info);
10521 fprintf(stderr, "error resetting the pending balance\n");
10526 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10528 struct btrfs_path *path;
10529 struct btrfs_trans_handle *trans;
10530 struct btrfs_key key;
10533 printf("Recowing metadata block %llu\n", eb->start);
10534 key.objectid = btrfs_header_owner(eb);
10535 key.type = BTRFS_ROOT_ITEM_KEY;
10536 key.offset = (u64)-1;
10538 root = btrfs_read_fs_root(root->fs_info, &key);
10539 if (IS_ERR(root)) {
10540 fprintf(stderr, "Couldn't find owner root %llu\n",
10542 return PTR_ERR(root);
10545 path = btrfs_alloc_path();
10549 trans = btrfs_start_transaction(root, 1);
10550 if (IS_ERR(trans)) {
10551 btrfs_free_path(path);
10552 return PTR_ERR(trans);
10555 path->lowest_level = btrfs_header_level(eb);
10556 if (path->lowest_level)
10557 btrfs_node_key_to_cpu(eb, &key, 0);
10559 btrfs_item_key_to_cpu(eb, &key, 0);
10561 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10562 btrfs_commit_transaction(trans, root);
10563 btrfs_free_path(path);
10567 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10569 struct btrfs_path *path;
10570 struct btrfs_trans_handle *trans;
10571 struct btrfs_key key;
10574 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10575 bad->key.type, bad->key.offset);
10576 key.objectid = bad->root_id;
10577 key.type = BTRFS_ROOT_ITEM_KEY;
10578 key.offset = (u64)-1;
10580 root = btrfs_read_fs_root(root->fs_info, &key);
10581 if (IS_ERR(root)) {
10582 fprintf(stderr, "Couldn't find owner root %llu\n",
10584 return PTR_ERR(root);
10587 path = btrfs_alloc_path();
10591 trans = btrfs_start_transaction(root, 1);
10592 if (IS_ERR(trans)) {
10593 btrfs_free_path(path);
10594 return PTR_ERR(trans);
10597 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10603 ret = btrfs_del_item(trans, root, path);
10605 btrfs_commit_transaction(trans, root);
10606 btrfs_free_path(path);
10610 static int zero_log_tree(struct btrfs_root *root)
10612 struct btrfs_trans_handle *trans;
10615 trans = btrfs_start_transaction(root, 1);
10616 if (IS_ERR(trans)) {
10617 ret = PTR_ERR(trans);
10620 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10621 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10622 ret = btrfs_commit_transaction(trans, root);
10626 static int populate_csum(struct btrfs_trans_handle *trans,
10627 struct btrfs_root *csum_root, char *buf, u64 start,
10634 while (offset < len) {
10635 sectorsize = csum_root->sectorsize;
10636 ret = read_extent_data(csum_root, buf, start + offset,
10640 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10641 start + offset, buf, sectorsize);
10644 offset += sectorsize;
10649 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10650 struct btrfs_root *csum_root,
10651 struct btrfs_root *cur_root)
10653 struct btrfs_path *path;
10654 struct btrfs_key key;
10655 struct extent_buffer *node;
10656 struct btrfs_file_extent_item *fi;
10663 path = btrfs_alloc_path();
10666 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10676 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10679 /* Iterate all regular file extents and fill its csum */
10681 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10683 if (key.type != BTRFS_EXTENT_DATA_KEY)
10685 node = path->nodes[0];
10686 slot = path->slots[0];
10687 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10688 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10690 start = btrfs_file_extent_disk_bytenr(node, fi);
10691 len = btrfs_file_extent_disk_num_bytes(node, fi);
10693 ret = populate_csum(trans, csum_root, buf, start, len);
10694 if (ret == -EEXIST)
10700 * TODO: if next leaf is corrupted, jump to nearest next valid
10703 ret = btrfs_next_item(cur_root, path);
10713 btrfs_free_path(path);
10718 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10719 struct btrfs_root *csum_root)
10721 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10722 struct btrfs_path *path;
10723 struct btrfs_root *tree_root = fs_info->tree_root;
10724 struct btrfs_root *cur_root;
10725 struct extent_buffer *node;
10726 struct btrfs_key key;
10730 path = btrfs_alloc_path();
10734 key.objectid = BTRFS_FS_TREE_OBJECTID;
10736 key.type = BTRFS_ROOT_ITEM_KEY;
10738 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10747 node = path->nodes[0];
10748 slot = path->slots[0];
10749 btrfs_item_key_to_cpu(node, &key, slot);
10750 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10752 if (key.type != BTRFS_ROOT_ITEM_KEY)
10754 if (!is_fstree(key.objectid))
10756 key.offset = (u64)-1;
10758 cur_root = btrfs_read_fs_root(fs_info, &key);
10759 if (IS_ERR(cur_root) || !cur_root) {
10760 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10764 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10769 ret = btrfs_next_item(tree_root, path);
10779 btrfs_free_path(path);
10783 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10784 struct btrfs_root *csum_root)
10786 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10787 struct btrfs_path *path;
10788 struct btrfs_extent_item *ei;
10789 struct extent_buffer *leaf;
10791 struct btrfs_key key;
10794 path = btrfs_alloc_path();
10799 key.type = BTRFS_EXTENT_ITEM_KEY;
10802 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10804 btrfs_free_path(path);
10808 buf = malloc(csum_root->sectorsize);
10810 btrfs_free_path(path);
10815 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10816 ret = btrfs_next_leaf(extent_root, path);
10824 leaf = path->nodes[0];
10826 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10827 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10832 ei = btrfs_item_ptr(leaf, path->slots[0],
10833 struct btrfs_extent_item);
10834 if (!(btrfs_extent_flags(leaf, ei) &
10835 BTRFS_EXTENT_FLAG_DATA)) {
10840 ret = populate_csum(trans, csum_root, buf, key.objectid,
10847 btrfs_free_path(path);
10853 * Recalculate the csum and put it into the csum tree.
10855 * Extent tree init will wipe out all the extent info, so in that case, we
10856 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10857 * will use fs/subvol trees to init the csum tree.
10859 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10860 struct btrfs_root *csum_root,
10861 int search_fs_tree)
10863 if (search_fs_tree)
10864 return fill_csum_tree_from_fs(trans, csum_root);
10866 return fill_csum_tree_from_extent(trans, csum_root);
10869 static void free_roots_info_cache(void)
10871 if (!roots_info_cache)
10874 while (!cache_tree_empty(roots_info_cache)) {
10875 struct cache_extent *entry;
10876 struct root_item_info *rii;
10878 entry = first_cache_extent(roots_info_cache);
10881 remove_cache_extent(roots_info_cache, entry);
10882 rii = container_of(entry, struct root_item_info, cache_extent);
10886 free(roots_info_cache);
10887 roots_info_cache = NULL;
10890 static int build_roots_info_cache(struct btrfs_fs_info *info)
10893 struct btrfs_key key;
10894 struct extent_buffer *leaf;
10895 struct btrfs_path *path;
10897 if (!roots_info_cache) {
10898 roots_info_cache = malloc(sizeof(*roots_info_cache));
10899 if (!roots_info_cache)
10901 cache_tree_init(roots_info_cache);
10904 path = btrfs_alloc_path();
10909 key.type = BTRFS_EXTENT_ITEM_KEY;
10912 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10915 leaf = path->nodes[0];
10918 struct btrfs_key found_key;
10919 struct btrfs_extent_item *ei;
10920 struct btrfs_extent_inline_ref *iref;
10921 int slot = path->slots[0];
10926 struct cache_extent *entry;
10927 struct root_item_info *rii;
10929 if (slot >= btrfs_header_nritems(leaf)) {
10930 ret = btrfs_next_leaf(info->extent_root, path);
10937 leaf = path->nodes[0];
10938 slot = path->slots[0];
10941 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10943 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10944 found_key.type != BTRFS_METADATA_ITEM_KEY)
10947 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10948 flags = btrfs_extent_flags(leaf, ei);
10950 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10951 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10954 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10955 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10956 level = found_key.offset;
10958 struct btrfs_tree_block_info *binfo;
10960 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10961 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10962 level = btrfs_tree_block_level(leaf, binfo);
10966 * For a root extent, it must be of the following type and the
10967 * first (and only one) iref in the item.
10969 type = btrfs_extent_inline_ref_type(leaf, iref);
10970 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10973 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10974 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10976 rii = malloc(sizeof(struct root_item_info));
10981 rii->cache_extent.start = root_id;
10982 rii->cache_extent.size = 1;
10983 rii->level = (u8)-1;
10984 entry = &rii->cache_extent;
10985 ret = insert_cache_extent(roots_info_cache, entry);
10988 rii = container_of(entry, struct root_item_info,
10992 ASSERT(rii->cache_extent.start == root_id);
10993 ASSERT(rii->cache_extent.size == 1);
10995 if (level > rii->level || rii->level == (u8)-1) {
10996 rii->level = level;
10997 rii->bytenr = found_key.objectid;
10998 rii->gen = btrfs_extent_generation(leaf, ei);
10999 rii->node_count = 1;
11000 } else if (level == rii->level) {
11008 btrfs_free_path(path);
11013 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11014 struct btrfs_path *path,
11015 const struct btrfs_key *root_key,
11016 const int read_only_mode)
11018 const u64 root_id = root_key->objectid;
11019 struct cache_extent *entry;
11020 struct root_item_info *rii;
11021 struct btrfs_root_item ri;
11022 unsigned long offset;
11024 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11027 "Error: could not find extent items for root %llu\n",
11028 root_key->objectid);
11032 rii = container_of(entry, struct root_item_info, cache_extent);
11033 ASSERT(rii->cache_extent.start == root_id);
11034 ASSERT(rii->cache_extent.size == 1);
11036 if (rii->node_count != 1) {
11038 "Error: could not find btree root extent for root %llu\n",
11043 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11044 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11046 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11047 btrfs_root_level(&ri) != rii->level ||
11048 btrfs_root_generation(&ri) != rii->gen) {
11051 * If we're in repair mode but our caller told us to not update
11052 * the root item, i.e. just check if it needs to be updated, don't
11053 * print this message, since the caller will call us again shortly
11054 * for the same root item without read only mode (the caller will
11055 * open a transaction first).
11057 if (!(read_only_mode && repair))
11059 "%sroot item for root %llu,"
11060 " current bytenr %llu, current gen %llu, current level %u,"
11061 " new bytenr %llu, new gen %llu, new level %u\n",
11062 (read_only_mode ? "" : "fixing "),
11064 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11065 btrfs_root_level(&ri),
11066 rii->bytenr, rii->gen, rii->level);
11068 if (btrfs_root_generation(&ri) > rii->gen) {
11070 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11071 root_id, btrfs_root_generation(&ri), rii->gen);
11075 if (!read_only_mode) {
11076 btrfs_set_root_bytenr(&ri, rii->bytenr);
11077 btrfs_set_root_level(&ri, rii->level);
11078 btrfs_set_root_generation(&ri, rii->gen);
11079 write_extent_buffer(path->nodes[0], &ri,
11080 offset, sizeof(ri));
11090 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11091 * caused read-only snapshots to be corrupted if they were created at a moment
11092 * when the source subvolume/snapshot had orphan items. The issue was that the
11093 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11094 * node instead of the post orphan cleanup root node.
11095 * So this function, and its callees, just detects and fixes those cases. Even
11096 * though the regression was for read-only snapshots, this function applies to
11097 * any snapshot/subvolume root.
11098 * This must be run before any other repair code - not doing it so, makes other
11099 * repair code delete or modify backrefs in the extent tree for example, which
11100 * will result in an inconsistent fs after repairing the root items.
11102 static int repair_root_items(struct btrfs_fs_info *info)
11104 struct btrfs_path *path = NULL;
11105 struct btrfs_key key;
11106 struct extent_buffer *leaf;
11107 struct btrfs_trans_handle *trans = NULL;
11110 int need_trans = 0;
11112 ret = build_roots_info_cache(info);
11116 path = btrfs_alloc_path();
11122 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11123 key.type = BTRFS_ROOT_ITEM_KEY;
11128 * Avoid opening and committing transactions if a leaf doesn't have
11129 * any root items that need to be fixed, so that we avoid rotating
11130 * backup roots unnecessarily.
11133 trans = btrfs_start_transaction(info->tree_root, 1);
11134 if (IS_ERR(trans)) {
11135 ret = PTR_ERR(trans);
11140 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11144 leaf = path->nodes[0];
11147 struct btrfs_key found_key;
11149 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11150 int no_more_keys = find_next_key(path, &key);
11152 btrfs_release_path(path);
11154 ret = btrfs_commit_transaction(trans,
11166 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11168 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11170 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11173 ret = maybe_repair_root_item(info, path, &found_key,
11178 if (!trans && repair) {
11181 btrfs_release_path(path);
11191 free_roots_info_cache();
11192 btrfs_free_path(path);
11194 btrfs_commit_transaction(trans, info->tree_root);
11201 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11203 struct btrfs_trans_handle *trans;
11204 struct btrfs_block_group_cache *bg_cache;
11208 /* Clear all free space cache inodes and its extent data */
11210 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11213 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11216 current = bg_cache->key.objectid + bg_cache->key.offset;
11219 /* Don't forget to set cache_generation to -1 */
11220 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11221 if (IS_ERR(trans)) {
11222 error("failed to update super block cache generation");
11223 return PTR_ERR(trans);
11225 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11226 btrfs_commit_transaction(trans, fs_info->tree_root);
11231 const char * const cmd_check_usage[] = {
11232 "btrfs check [options] <device>",
11233 "Check structural integrity of a filesystem (unmounted).",
11234 "Check structural integrity of an unmounted filesystem. Verify internal",
11235 "trees' consistency and item connectivity. In the repair mode try to",
11236 "fix the problems found. ",
11237 "WARNING: the repair mode is considered dangerous",
11239 "-s|--super <superblock> use this superblock copy",
11240 "-b|--backup use the first valid backup root copy",
11241 "--repair try to repair the filesystem",
11242 "--readonly run in read-only mode (default)",
11243 "--init-csum-tree create a new CRC tree",
11244 "--init-extent-tree create a new extent tree",
11245 "--mode <MODE> allows choice of memory/IO trade-offs",
11246 " where MODE is one of:",
11247 " original - read inodes and extents to memory (requires",
11248 " more memory, does less IO)",
11249 " lowmem - try to use less memory but read blocks again",
11251 "--check-data-csum verify checksums of data blocks",
11252 "-Q|--qgroup-report print a report on qgroup consistency",
11253 "-E|--subvol-extents <subvolid>",
11254 " print subvolume extents and sharing state",
11255 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11256 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11257 "-p|--progress indicate progress",
11258 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11259 " NOTE: v1 support implemented",
11263 int cmd_check(int argc, char **argv)
11265 struct cache_tree root_cache;
11266 struct btrfs_root *root;
11267 struct btrfs_fs_info *info;
11270 u64 tree_root_bytenr = 0;
11271 u64 chunk_root_bytenr = 0;
11272 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11275 int init_csum_tree = 0;
11277 int clear_space_cache = 0;
11278 int qgroup_report = 0;
11279 int qgroups_repaired = 0;
11280 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11284 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11285 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11286 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11287 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11288 static const struct option long_options[] = {
11289 { "super", required_argument, NULL, 's' },
11290 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11291 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11292 { "init-csum-tree", no_argument, NULL,
11293 GETOPT_VAL_INIT_CSUM },
11294 { "init-extent-tree", no_argument, NULL,
11295 GETOPT_VAL_INIT_EXTENT },
11296 { "check-data-csum", no_argument, NULL,
11297 GETOPT_VAL_CHECK_CSUM },
11298 { "backup", no_argument, NULL, 'b' },
11299 { "subvol-extents", required_argument, NULL, 'E' },
11300 { "qgroup-report", no_argument, NULL, 'Q' },
11301 { "tree-root", required_argument, NULL, 'r' },
11302 { "chunk-root", required_argument, NULL,
11303 GETOPT_VAL_CHUNK_TREE },
11304 { "progress", no_argument, NULL, 'p' },
11305 { "mode", required_argument, NULL,
11307 { "clear-space-cache", required_argument, NULL,
11308 GETOPT_VAL_CLEAR_SPACE_CACHE},
11309 { NULL, 0, NULL, 0}
11312 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11316 case 'a': /* ignored */ break;
11318 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11321 num = arg_strtou64(optarg);
11322 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11324 "super mirror should be less than %d",
11325 BTRFS_SUPER_MIRROR_MAX);
11328 bytenr = btrfs_sb_offset(((int)num));
11329 printf("using SB copy %llu, bytenr %llu\n", num,
11330 (unsigned long long)bytenr);
11336 subvolid = arg_strtou64(optarg);
11339 tree_root_bytenr = arg_strtou64(optarg);
11341 case GETOPT_VAL_CHUNK_TREE:
11342 chunk_root_bytenr = arg_strtou64(optarg);
11345 ctx.progress_enabled = true;
11349 usage(cmd_check_usage);
11350 case GETOPT_VAL_REPAIR:
11351 printf("enabling repair mode\n");
11353 ctree_flags |= OPEN_CTREE_WRITES;
11355 case GETOPT_VAL_READONLY:
11358 case GETOPT_VAL_INIT_CSUM:
11359 printf("Creating a new CRC tree\n");
11360 init_csum_tree = 1;
11362 ctree_flags |= OPEN_CTREE_WRITES;
11364 case GETOPT_VAL_INIT_EXTENT:
11365 init_extent_tree = 1;
11366 ctree_flags |= (OPEN_CTREE_WRITES |
11367 OPEN_CTREE_NO_BLOCK_GROUPS);
11370 case GETOPT_VAL_CHECK_CSUM:
11371 check_data_csum = 1;
11373 case GETOPT_VAL_MODE:
11374 check_mode = parse_check_mode(optarg);
11375 if (check_mode == CHECK_MODE_UNKNOWN) {
11376 error("unknown mode: %s", optarg);
11380 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11381 if (strcmp(optarg, "v1") != 0) {
11383 "only v1 support implmented, unrecognized value %s",
11387 clear_space_cache = 1;
11388 ctree_flags |= OPEN_CTREE_WRITES;
11393 if (check_argc_exact(argc - optind, 1))
11394 usage(cmd_check_usage);
11396 if (ctx.progress_enabled) {
11397 ctx.tp = TASK_NOTHING;
11398 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11401 /* This check is the only reason for --readonly to exist */
11402 if (readonly && repair) {
11403 error("repair options are not compatible with --readonly");
11408 * Not supported yet
11410 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11411 error("low memory mode doesn't support repair yet");
11416 cache_tree_init(&root_cache);
11418 if((ret = check_mounted(argv[optind])) < 0) {
11419 error("could not check mount status: %s", strerror(-ret));
11422 error("%s is currently mounted, aborting", argv[optind]);
11427 /* only allow partial opening under repair mode */
11429 ctree_flags |= OPEN_CTREE_PARTIAL;
11431 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11432 chunk_root_bytenr, ctree_flags);
11434 error("cannot open file system");
11439 global_info = info;
11440 root = info->fs_root;
11441 if (clear_space_cache) {
11442 if (btrfs_fs_compat_ro(info,
11443 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11445 "free space cache v2 detected, clearing not implemented");
11449 printf("Clearing free space cache\n");
11450 ret = clear_free_space_cache(info);
11452 error("failed to clear free space cache");
11455 printf("Free space cache cleared\n");
11461 * repair mode will force us to commit transaction which
11462 * will make us fail to load log tree when mounting.
11464 if (repair && btrfs_super_log_root(info->super_copy)) {
11465 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11470 ret = zero_log_tree(root);
11472 error("failed to zero log tree: %d", ret);
11477 uuid_unparse(info->super_copy->fsid, uuidbuf);
11478 if (qgroup_report) {
11479 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11481 ret = qgroup_verify_all(info);
11487 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11488 subvolid, argv[optind], uuidbuf);
11489 ret = print_extent_state(info, subvolid);
11492 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11494 if (!extent_buffer_uptodate(info->tree_root->node) ||
11495 !extent_buffer_uptodate(info->dev_root->node) ||
11496 !extent_buffer_uptodate(info->chunk_root->node)) {
11497 error("critical roots corrupted, unable to check the filesystem");
11502 if (init_extent_tree || init_csum_tree) {
11503 struct btrfs_trans_handle *trans;
11505 trans = btrfs_start_transaction(info->extent_root, 0);
11506 if (IS_ERR(trans)) {
11507 error("error starting transaction");
11508 ret = PTR_ERR(trans);
11512 if (init_extent_tree) {
11513 printf("Creating a new extent tree\n");
11514 ret = reinit_extent_tree(trans, info);
11519 if (init_csum_tree) {
11520 printf("Reinitialize checksum tree\n");
11521 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11523 error("checksum tree initialization failed: %d",
11529 ret = fill_csum_tree(trans, info->csum_root,
11532 error("checksum tree refilling failed: %d", ret);
11537 * Ok now we commit and run the normal fsck, which will add
11538 * extent entries for all of the items it finds.
11540 ret = btrfs_commit_transaction(trans, info->extent_root);
11544 if (!extent_buffer_uptodate(info->extent_root->node)) {
11545 error("critical: extent_root, unable to check the filesystem");
11549 if (!extent_buffer_uptodate(info->csum_root->node)) {
11550 error("critical: csum_root, unable to check the filesystem");
11555 if (!ctx.progress_enabled)
11556 printf("checking extents");
11557 if (check_mode == CHECK_MODE_LOWMEM)
11558 ret = check_chunks_and_extents_v2(root);
11560 ret = check_chunks_and_extents(root);
11562 printf("Errors found in extent allocation tree or chunk allocation");
11564 ret = repair_root_items(info);
11568 fprintf(stderr, "Fixed %d roots.\n", ret);
11570 } else if (ret > 0) {
11572 "Found %d roots with an outdated root item.\n",
11575 "Please run a filesystem check with the option --repair to fix them.\n");
11580 if (!ctx.progress_enabled) {
11581 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11582 fprintf(stderr, "checking free space tree\n");
11584 fprintf(stderr, "checking free space cache\n");
11586 ret = check_space_cache(root);
11591 * We used to have to have these hole extents in between our real
11592 * extents so if we don't have this flag set we need to make sure there
11593 * are no gaps in the file extents for inodes, otherwise we can just
11594 * ignore it when this happens.
11596 no_holes = btrfs_fs_incompat(root->fs_info,
11597 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11598 if (!ctx.progress_enabled)
11599 fprintf(stderr, "checking fs roots\n");
11600 ret = check_fs_roots(root, &root_cache);
11604 fprintf(stderr, "checking csums\n");
11605 ret = check_csums(root);
11609 fprintf(stderr, "checking root refs\n");
11610 ret = check_root_refs(root, &root_cache);
11614 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11615 struct extent_buffer *eb;
11617 eb = list_first_entry(&root->fs_info->recow_ebs,
11618 struct extent_buffer, recow);
11619 list_del_init(&eb->recow);
11620 ret = recow_extent_buffer(root, eb);
11625 while (!list_empty(&delete_items)) {
11626 struct bad_item *bad;
11628 bad = list_first_entry(&delete_items, struct bad_item, list);
11629 list_del_init(&bad->list);
11631 ret = delete_bad_item(root, bad);
11635 if (info->quota_enabled) {
11637 fprintf(stderr, "checking quota groups\n");
11638 err = qgroup_verify_all(info);
11642 err = repair_qgroups(info, &qgroups_repaired);
11647 if (!list_empty(&root->fs_info->recow_ebs)) {
11648 error("transid errors in file system");
11652 /* Don't override original ret */
11653 if (!ret && qgroups_repaired)
11654 ret = qgroups_repaired;
11656 if (found_old_backref) { /*
11657 * there was a disk format change when mixed
11658 * backref was in testing tree. The old format
11659 * existed about one week.
11661 printf("\n * Found old mixed backref format. "
11662 "The old format is not supported! *"
11663 "\n * Please mount the FS in readonly mode, "
11664 "backup data and re-format the FS. *\n\n");
11667 printf("found %llu bytes used err is %d\n",
11668 (unsigned long long)bytes_used, ret);
11669 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11670 printf("total tree bytes: %llu\n",
11671 (unsigned long long)total_btree_bytes);
11672 printf("total fs tree bytes: %llu\n",
11673 (unsigned long long)total_fs_tree_bytes);
11674 printf("total extent tree bytes: %llu\n",
11675 (unsigned long long)total_extent_tree_bytes);
11676 printf("btree space waste bytes: %llu\n",
11677 (unsigned long long)btree_space_waste);
11678 printf("file data blocks allocated: %llu\n referenced %llu\n",
11679 (unsigned long long)data_bytes_allocated,
11680 (unsigned long long)data_bytes_referenced);
11682 free_qgroup_counts();
11683 free_root_recs_tree(&root_cache);
11687 if (ctx.progress_enabled)
11688 task_deinit(ctx.info);