2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 return list_entry(entry, struct extent_backref, list);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
121 * Much like data_backref, just removed the undetermined members
122 * and change it to use list_head.
123 * During extent scan, it is stored in root->orphan_data_extent.
124 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126 struct orphan_data_extent {
127 struct list_head list;
135 struct tree_backref {
136 struct extent_backref node;
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 return container_of(back, struct tree_backref, node);
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
151 struct extent_record {
152 struct list_head backrefs;
153 struct list_head dups;
154 struct list_head list;
155 struct cache_extent cache;
156 struct btrfs_disk_key parent_key;
161 u64 extent_item_refs;
163 u64 parent_generation;
167 unsigned int flag_block_full_backref:2;
168 unsigned int found_rec:1;
169 unsigned int content_checked:1;
170 unsigned int owner_ref_checked:1;
171 unsigned int is_root:1;
172 unsigned int metadata:1;
173 unsigned int bad_full_backref:1;
174 unsigned int crossing_stripes:1;
175 unsigned int wrong_chunk_type:1;
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 return container_of(entry, struct extent_record, list);
183 struct inode_backref {
184 struct list_head list;
185 unsigned int found_dir_item:1;
186 unsigned int found_dir_index:1;
187 unsigned int found_inode_ref:1;
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 return list_entry(entry, struct inode_backref, list);
202 struct root_item_record {
203 struct list_head list;
210 struct btrfs_key drop_key;
213 #define REF_ERR_NO_DIR_ITEM (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX (1 << 1)
215 #define REF_ERR_NO_INODE_REF (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
218 #define REF_ERR_DUP_INODE_REF (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
227 struct file_extent_hole {
233 struct inode_record {
234 struct list_head backrefs;
235 unsigned int checked:1;
236 unsigned int merging:1;
237 unsigned int found_inode_item:1;
238 unsigned int found_dir_item:1;
239 unsigned int found_file_extent:1;
240 unsigned int found_csum_item:1;
241 unsigned int some_csum_missing:1;
242 unsigned int nodatasum:1;
255 struct rb_root holes;
256 struct list_head orphan_extents;
261 #define I_ERR_NO_INODE_ITEM (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
277 struct root_backref {
278 struct list_head list;
279 unsigned int found_dir_item:1;
280 unsigned int found_dir_index:1;
281 unsigned int found_back_ref:1;
282 unsigned int found_forward_ref:1;
283 unsigned int reachable:1;
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 return list_entry(entry, struct root_backref, list);
298 struct list_head backrefs;
299 struct cache_extent cache;
300 unsigned int found_root_item:1;
306 struct cache_extent cache;
311 struct cache_extent cache;
312 struct cache_tree root_cache;
313 struct cache_tree inode_cache;
314 struct inode_record *current;
323 struct walk_control {
324 struct cache_tree shared;
325 struct shared_node *nodes[BTRFS_MAX_LEVEL];
331 struct btrfs_key key;
333 struct list_head list;
336 struct extent_entry {
341 struct list_head list;
344 struct root_item_info {
345 /* level of the root */
347 /* number of nodes at this level, must be 1 for a root */
351 struct cache_extent cache_extent;
355 * Error bit for low memory mode check.
357 * Currently no caller cares about it yet. Just internal use for error
360 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH (1 << 8)
371 static void *print_status_check(void *p)
373 struct task_ctx *priv = p;
374 const char work_indicator[] = { '.', 'o', 'O', 'o' };
376 static char *task_position_string[] = {
378 "checking free space cache",
382 task_period_start(priv->info, 1000 /* 1s */);
384 if (priv->tp == TASK_NOTHING)
388 printf("%s [%c]\r", task_position_string[priv->tp],
389 work_indicator[count % 4]);
392 task_period_wait(priv->info);
397 static int print_status_return(void *p)
405 static enum btrfs_check_mode parse_check_mode(const char *str)
407 if (strcmp(str, "lowmem") == 0)
408 return CHECK_MODE_LOWMEM;
409 if (strcmp(str, "orig") == 0)
410 return CHECK_MODE_ORIGINAL;
411 if (strcmp(str, "original") == 0)
412 return CHECK_MODE_ORIGINAL;
414 return CHECK_MODE_UNKNOWN;
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
420 struct file_extent_hole *hole;
422 if (RB_EMPTY_ROOT(holes))
425 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 struct file_extent_hole *hole1;
432 struct file_extent_hole *hole2;
434 hole1 = rb_entry(node1, struct file_extent_hole, node);
435 hole2 = rb_entry(node2, struct file_extent_hole, node);
437 if (hole1->start > hole2->start)
439 if (hole1->start < hole2->start)
441 /* Now hole1->start == hole2->start */
442 if (hole1->len >= hole2->len)
444 * Hole 1 will be merge center
445 * Same hole will be merged later
448 /* Hole 2 will be merge center */
453 * Add a hole to the record
455 * This will do hole merge for copy_file_extent_holes(),
456 * which will ensure there won't be continuous holes.
458 static int add_file_extent_hole(struct rb_root *holes,
461 struct file_extent_hole *hole;
462 struct file_extent_hole *prev = NULL;
463 struct file_extent_hole *next = NULL;
465 hole = malloc(sizeof(*hole));
470 /* Since compare will not return 0, no -EEXIST will happen */
471 rb_insert(holes, &hole->node, compare_hole);
473 /* simple merge with previous hole */
474 if (rb_prev(&hole->node))
475 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477 if (prev && prev->start + prev->len >= hole->start) {
478 hole->len = hole->start + hole->len - prev->start;
479 hole->start = prev->start;
480 rb_erase(&prev->node, holes);
485 /* iterate merge with next holes */
487 if (!rb_next(&hole->node))
489 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491 if (hole->start + hole->len >= next->start) {
492 if (hole->start + hole->len <= next->start + next->len)
493 hole->len = next->start + next->len -
495 rb_erase(&next->node, holes);
504 static int compare_hole_range(struct rb_node *node, void *data)
506 struct file_extent_hole *hole;
509 hole = (struct file_extent_hole *)data;
512 hole = rb_entry(node, struct file_extent_hole, node);
513 if (start < hole->start)
515 if (start >= hole->start && start < hole->start + hole->len)
521 * Delete a hole in the record
523 * This will do the hole split and is much restrict than add.
525 static int del_file_extent_hole(struct rb_root *holes,
528 struct file_extent_hole *hole;
529 struct file_extent_hole tmp;
534 struct rb_node *node;
541 node = rb_search(holes, &tmp, compare_hole_range, NULL);
544 hole = rb_entry(node, struct file_extent_hole, node);
545 if (start + len > hole->start + hole->len)
549 * Now there will be no overlap, delete the hole and re-add the
550 * split(s) if they exists.
552 if (start > hole->start) {
553 prev_start = hole->start;
554 prev_len = start - hole->start;
557 if (hole->start + hole->len > start + len) {
558 next_start = start + len;
559 next_len = hole->start + hole->len - start - len;
562 rb_erase(node, holes);
565 ret = add_file_extent_hole(holes, prev_start, prev_len);
570 ret = add_file_extent_hole(holes, next_start, next_len);
577 static int copy_file_extent_holes(struct rb_root *dst,
580 struct file_extent_hole *hole;
581 struct rb_node *node;
584 node = rb_first(src);
586 hole = rb_entry(node, struct file_extent_hole, node);
587 ret = add_file_extent_hole(dst, hole->start, hole->len);
590 node = rb_next(node);
595 static void free_file_extent_holes(struct rb_root *holes)
597 struct rb_node *node;
598 struct file_extent_hole *hole;
600 node = rb_first(holes);
602 hole = rb_entry(node, struct file_extent_hole, node);
603 rb_erase(node, holes);
605 node = rb_first(holes);
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612 struct btrfs_root *root)
614 if (root->last_trans != trans->transid) {
615 root->track_dirty = 1;
616 root->last_trans = trans->transid;
617 root->commit_root = root->node;
618 extent_buffer_get(root->node);
622 static u8 imode_to_type(u32 imode)
625 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
627 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
628 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
629 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
630 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
631 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
632 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
635 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 struct device_record *rec1;
642 struct device_record *rec2;
644 rec1 = rb_entry(node1, struct device_record, node);
645 rec2 = rb_entry(node2, struct device_record, node);
646 if (rec1->devid > rec2->devid)
648 else if (rec1->devid < rec2->devid)
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 struct inode_record *rec;
657 struct inode_backref *backref;
658 struct inode_backref *orig;
659 struct inode_backref *tmp;
660 struct orphan_data_extent *src_orphan;
661 struct orphan_data_extent *dst_orphan;
666 rec = malloc(sizeof(*rec));
668 return ERR_PTR(-ENOMEM);
669 memcpy(rec, orig_rec, sizeof(*rec));
671 INIT_LIST_HEAD(&rec->backrefs);
672 INIT_LIST_HEAD(&rec->orphan_extents);
673 rec->holes = RB_ROOT;
675 list_for_each_entry(orig, &orig_rec->backrefs, list) {
676 size = sizeof(*orig) + orig->namelen + 1;
677 backref = malloc(size);
682 memcpy(backref, orig, size);
683 list_add_tail(&backref->list, &rec->backrefs);
685 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686 dst_orphan = malloc(sizeof(*dst_orphan));
691 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
701 rb = rb_first(&rec->holes);
703 struct file_extent_hole *hole;
705 hole = rb_entry(rb, struct file_extent_hole, node);
711 if (!list_empty(&rec->backrefs))
712 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713 list_del(&orig->list);
717 if (!list_empty(&rec->orphan_extents))
718 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719 list_del(&orig->list);
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
731 struct orphan_data_extent *orphan;
733 if (list_empty(orphan_extents))
735 printf("The following data extent is lost in tree %llu:\n",
737 list_for_each_entry(orphan, orphan_extents, list) {
738 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739 orphan->objectid, orphan->offset, orphan->disk_bytenr,
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 u64 root_objectid = root->root_key.objectid;
747 int errors = rec->errors;
751 /* reloc root errors, we print its corresponding fs root objectid*/
752 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753 root_objectid = root->root_key.offset;
754 fprintf(stderr, "reloc");
756 fprintf(stderr, "root %llu inode %llu errors %x",
757 (unsigned long long) root_objectid,
758 (unsigned long long) rec->ino, rec->errors);
760 if (errors & I_ERR_NO_INODE_ITEM)
761 fprintf(stderr, ", no inode item");
762 if (errors & I_ERR_NO_ORPHAN_ITEM)
763 fprintf(stderr, ", no orphan item");
764 if (errors & I_ERR_DUP_INODE_ITEM)
765 fprintf(stderr, ", dup inode item");
766 if (errors & I_ERR_DUP_DIR_INDEX)
767 fprintf(stderr, ", dup dir index");
768 if (errors & I_ERR_ODD_DIR_ITEM)
769 fprintf(stderr, ", odd dir item");
770 if (errors & I_ERR_ODD_FILE_EXTENT)
771 fprintf(stderr, ", odd file extent");
772 if (errors & I_ERR_BAD_FILE_EXTENT)
773 fprintf(stderr, ", bad file extent");
774 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775 fprintf(stderr, ", file extent overlap");
776 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777 fprintf(stderr, ", file extent discount");
778 if (errors & I_ERR_DIR_ISIZE_WRONG)
779 fprintf(stderr, ", dir isize wrong");
780 if (errors & I_ERR_FILE_NBYTES_WRONG)
781 fprintf(stderr, ", nbytes wrong");
782 if (errors & I_ERR_ODD_CSUM_ITEM)
783 fprintf(stderr, ", odd csum item");
784 if (errors & I_ERR_SOME_CSUM_MISSING)
785 fprintf(stderr, ", some csum missing");
786 if (errors & I_ERR_LINK_COUNT_WRONG)
787 fprintf(stderr, ", link count wrong");
788 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789 fprintf(stderr, ", orphan file extent");
790 fprintf(stderr, "\n");
791 /* Print the orphan extents if needed */
792 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795 /* Print the holes if needed */
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797 struct file_extent_hole *hole;
798 struct rb_node *node;
801 node = rb_first(&rec->holes);
802 fprintf(stderr, "Found file extent holes:\n");
805 hole = rb_entry(node, struct file_extent_hole, node);
806 fprintf(stderr, "\tstart: %llu, len: %llu\n",
807 hole->start, hole->len);
808 node = rb_next(node);
811 fprintf(stderr, "\tstart: 0, len: %llu\n",
812 round_up(rec->isize, root->sectorsize));
816 static void print_ref_error(int errors)
818 if (errors & REF_ERR_NO_DIR_ITEM)
819 fprintf(stderr, ", no dir item");
820 if (errors & REF_ERR_NO_DIR_INDEX)
821 fprintf(stderr, ", no dir index");
822 if (errors & REF_ERR_NO_INODE_REF)
823 fprintf(stderr, ", no inode ref");
824 if (errors & REF_ERR_DUP_DIR_ITEM)
825 fprintf(stderr, ", dup dir item");
826 if (errors & REF_ERR_DUP_DIR_INDEX)
827 fprintf(stderr, ", dup dir index");
828 if (errors & REF_ERR_DUP_INODE_REF)
829 fprintf(stderr, ", dup inode ref");
830 if (errors & REF_ERR_INDEX_UNMATCH)
831 fprintf(stderr, ", index mismatch");
832 if (errors & REF_ERR_FILETYPE_UNMATCH)
833 fprintf(stderr, ", filetype mismatch");
834 if (errors & REF_ERR_NAME_TOO_LONG)
835 fprintf(stderr, ", name too long");
836 if (errors & REF_ERR_NO_ROOT_REF)
837 fprintf(stderr, ", no root ref");
838 if (errors & REF_ERR_NO_ROOT_BACKREF)
839 fprintf(stderr, ", no root backref");
840 if (errors & REF_ERR_DUP_ROOT_REF)
841 fprintf(stderr, ", dup root ref");
842 if (errors & REF_ERR_DUP_ROOT_BACKREF)
843 fprintf(stderr, ", dup root backref");
844 fprintf(stderr, "\n");
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
850 struct ptr_node *node;
851 struct cache_extent *cache;
852 struct inode_record *rec = NULL;
855 cache = lookup_cache_extent(inode_cache, ino, 1);
857 node = container_of(cache, struct ptr_node, cache);
859 if (mod && rec->refs > 1) {
860 node->data = clone_inode_rec(rec);
861 if (IS_ERR(node->data))
867 rec = calloc(1, sizeof(*rec));
869 return ERR_PTR(-ENOMEM);
871 rec->extent_start = (u64)-1;
873 INIT_LIST_HEAD(&rec->backrefs);
874 INIT_LIST_HEAD(&rec->orphan_extents);
875 rec->holes = RB_ROOT;
877 node = malloc(sizeof(*node));
880 return ERR_PTR(-ENOMEM);
882 node->cache.start = ino;
883 node->cache.size = 1;
886 if (ino == BTRFS_FREE_INO_OBJECTID)
889 ret = insert_cache_extent(inode_cache, &node->cache);
891 return ERR_PTR(-EEXIST);
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 struct orphan_data_extent *orphan;
900 while (!list_empty(orphan_extents)) {
901 orphan = list_entry(orphan_extents->next,
902 struct orphan_data_extent, list);
903 list_del(&orphan->list);
908 static void free_inode_rec(struct inode_record *rec)
910 struct inode_backref *backref;
915 while (!list_empty(&rec->backrefs)) {
916 backref = to_inode_backref(rec->backrefs.next);
917 list_del(&backref->list);
920 free_orphan_data_extents(&rec->orphan_extents);
921 free_file_extent_holes(&rec->holes);
925 static int can_free_inode_rec(struct inode_record *rec)
927 if (!rec->errors && rec->checked && rec->found_inode_item &&
928 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934 struct inode_record *rec)
936 struct cache_extent *cache;
937 struct inode_backref *tmp, *backref;
938 struct ptr_node *node;
941 if (!rec->found_inode_item)
944 filetype = imode_to_type(rec->imode);
945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946 if (backref->found_dir_item && backref->found_dir_index) {
947 if (backref->filetype != filetype)
948 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949 if (!backref->errors && backref->found_inode_ref &&
950 rec->nlink == rec->found_link) {
951 list_del(&backref->list);
957 if (!rec->checked || rec->merging)
960 if (S_ISDIR(rec->imode)) {
961 if (rec->found_size != rec->isize)
962 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963 if (rec->found_file_extent)
964 rec->errors |= I_ERR_ODD_FILE_EXTENT;
965 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966 if (rec->found_dir_item)
967 rec->errors |= I_ERR_ODD_DIR_ITEM;
968 if (rec->found_size != rec->nbytes)
969 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970 if (rec->nlink > 0 && !no_holes &&
971 (rec->extent_end < rec->isize ||
972 first_extent_gap(&rec->holes) < rec->isize))
973 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
976 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977 if (rec->found_csum_item && rec->nodatasum)
978 rec->errors |= I_ERR_ODD_CSUM_ITEM;
979 if (rec->some_csum_missing && !rec->nodatasum)
980 rec->errors |= I_ERR_SOME_CSUM_MISSING;
983 BUG_ON(rec->refs != 1);
984 if (can_free_inode_rec(rec)) {
985 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986 node = container_of(cache, struct ptr_node, cache);
987 BUG_ON(node->data != rec);
988 remove_cache_extent(inode_cache, &node->cache);
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 struct btrfs_path path;
997 struct btrfs_key key;
1000 key.objectid = BTRFS_ORPHAN_OBJECTID;
1001 key.type = BTRFS_ORPHAN_ITEM_KEY;
1004 btrfs_init_path(&path);
1005 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006 btrfs_release_path(&path);
1012 static int process_inode_item(struct extent_buffer *eb,
1013 int slot, struct btrfs_key *key,
1014 struct shared_node *active_node)
1016 struct inode_record *rec;
1017 struct btrfs_inode_item *item;
1019 rec = active_node->current;
1020 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021 if (rec->found_inode_item) {
1022 rec->errors |= I_ERR_DUP_INODE_ITEM;
1025 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026 rec->nlink = btrfs_inode_nlink(eb, item);
1027 rec->isize = btrfs_inode_size(eb, item);
1028 rec->nbytes = btrfs_inode_nbytes(eb, item);
1029 rec->imode = btrfs_inode_mode(eb, item);
1030 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032 rec->found_inode_item = 1;
1033 if (rec->nlink == 0)
1034 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035 maybe_free_inode_rec(&active_node->inode_cache, rec);
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041 int namelen, u64 dir)
1043 struct inode_backref *backref;
1045 list_for_each_entry(backref, &rec->backrefs, list) {
1046 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048 if (backref->dir != dir || backref->namelen != namelen)
1050 if (memcmp(name, backref->name, namelen))
1055 backref = malloc(sizeof(*backref) + namelen + 1);
1058 memset(backref, 0, sizeof(*backref));
1060 backref->namelen = namelen;
1061 memcpy(backref->name, name, namelen);
1062 backref->name[namelen] = '\0';
1063 list_add_tail(&backref->list, &rec->backrefs);
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068 u64 ino, u64 dir, u64 index,
1069 const char *name, int namelen,
1070 u8 filetype, u8 itemtype, int errors)
1072 struct inode_record *rec;
1073 struct inode_backref *backref;
1075 rec = get_inode_rec(inode_cache, ino, 1);
1076 BUG_ON(IS_ERR(rec));
1077 backref = get_inode_backref(rec, name, namelen, dir);
1080 backref->errors |= errors;
1081 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082 if (backref->found_dir_index)
1083 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084 if (backref->found_inode_ref && backref->index != index)
1085 backref->errors |= REF_ERR_INDEX_UNMATCH;
1086 if (backref->found_dir_item && backref->filetype != filetype)
1087 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089 backref->index = index;
1090 backref->filetype = filetype;
1091 backref->found_dir_index = 1;
1092 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094 if (backref->found_dir_item)
1095 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096 if (backref->found_dir_index && backref->filetype != filetype)
1097 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099 backref->filetype = filetype;
1100 backref->found_dir_item = 1;
1101 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103 if (backref->found_inode_ref)
1104 backref->errors |= REF_ERR_DUP_INODE_REF;
1105 if (backref->found_dir_index && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1108 backref->index = index;
1110 backref->ref_type = itemtype;
1111 backref->found_inode_ref = 1;
1116 maybe_free_inode_rec(inode_cache, rec);
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121 struct cache_tree *dst_cache)
1123 struct inode_backref *backref;
1128 list_for_each_entry(backref, &src->backrefs, list) {
1129 if (backref->found_dir_index) {
1130 add_inode_backref(dst_cache, dst->ino, backref->dir,
1131 backref->index, backref->name,
1132 backref->namelen, backref->filetype,
1133 BTRFS_DIR_INDEX_KEY, backref->errors);
1135 if (backref->found_dir_item) {
1137 add_inode_backref(dst_cache, dst->ino,
1138 backref->dir, 0, backref->name,
1139 backref->namelen, backref->filetype,
1140 BTRFS_DIR_ITEM_KEY, backref->errors);
1142 if (backref->found_inode_ref) {
1143 add_inode_backref(dst_cache, dst->ino,
1144 backref->dir, backref->index,
1145 backref->name, backref->namelen, 0,
1146 backref->ref_type, backref->errors);
1150 if (src->found_dir_item)
1151 dst->found_dir_item = 1;
1152 if (src->found_file_extent)
1153 dst->found_file_extent = 1;
1154 if (src->found_csum_item)
1155 dst->found_csum_item = 1;
1156 if (src->some_csum_missing)
1157 dst->some_csum_missing = 1;
1158 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1164 BUG_ON(src->found_link < dir_count);
1165 dst->found_link += src->found_link - dir_count;
1166 dst->found_size += src->found_size;
1167 if (src->extent_start != (u64)-1) {
1168 if (dst->extent_start == (u64)-1) {
1169 dst->extent_start = src->extent_start;
1170 dst->extent_end = src->extent_end;
1172 if (dst->extent_end > src->extent_start)
1173 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174 else if (dst->extent_end < src->extent_start) {
1175 ret = add_file_extent_hole(&dst->holes,
1177 src->extent_start - dst->extent_end);
1179 if (dst->extent_end < src->extent_end)
1180 dst->extent_end = src->extent_end;
1184 dst->errors |= src->errors;
1185 if (src->found_inode_item) {
1186 if (!dst->found_inode_item) {
1187 dst->nlink = src->nlink;
1188 dst->isize = src->isize;
1189 dst->nbytes = src->nbytes;
1190 dst->imode = src->imode;
1191 dst->nodatasum = src->nodatasum;
1192 dst->found_inode_item = 1;
1194 dst->errors |= I_ERR_DUP_INODE_ITEM;
1202 static int splice_shared_node(struct shared_node *src_node,
1203 struct shared_node *dst_node)
1205 struct cache_extent *cache;
1206 struct ptr_node *node, *ins;
1207 struct cache_tree *src, *dst;
1208 struct inode_record *rec, *conflict;
1209 u64 current_ino = 0;
1213 if (--src_node->refs == 0)
1215 if (src_node->current)
1216 current_ino = src_node->current->ino;
1218 src = &src_node->root_cache;
1219 dst = &dst_node->root_cache;
1221 cache = search_cache_extent(src, 0);
1223 node = container_of(cache, struct ptr_node, cache);
1225 cache = next_cache_extent(cache);
1228 remove_cache_extent(src, &node->cache);
1231 ins = malloc(sizeof(*ins));
1233 ins->cache.start = node->cache.start;
1234 ins->cache.size = node->cache.size;
1238 ret = insert_cache_extent(dst, &ins->cache);
1239 if (ret == -EEXIST) {
1240 conflict = get_inode_rec(dst, rec->ino, 1);
1241 BUG_ON(IS_ERR(conflict));
1242 merge_inode_recs(rec, conflict, dst);
1244 conflict->checked = 1;
1245 if (dst_node->current == conflict)
1246 dst_node->current = NULL;
1248 maybe_free_inode_rec(dst, conflict);
1249 free_inode_rec(rec);
1256 if (src == &src_node->root_cache) {
1257 src = &src_node->inode_cache;
1258 dst = &dst_node->inode_cache;
1262 if (current_ino > 0 && (!dst_node->current ||
1263 current_ino > dst_node->current->ino)) {
1264 if (dst_node->current) {
1265 dst_node->current->checked = 1;
1266 maybe_free_inode_rec(dst, dst_node->current);
1268 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269 BUG_ON(IS_ERR(dst_node->current));
1274 static void free_inode_ptr(struct cache_extent *cache)
1276 struct ptr_node *node;
1277 struct inode_record *rec;
1279 node = container_of(cache, struct ptr_node, cache);
1281 free_inode_rec(rec);
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1290 struct cache_extent *cache;
1291 struct shared_node *node;
1293 cache = lookup_cache_extent(shared, bytenr, 1);
1295 node = container_of(cache, struct shared_node, cache);
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1304 struct shared_node *node;
1306 node = calloc(1, sizeof(*node));
1309 node->cache.start = bytenr;
1310 node->cache.size = 1;
1311 cache_tree_init(&node->root_cache);
1312 cache_tree_init(&node->inode_cache);
1315 ret = insert_cache_extent(shared, &node->cache);
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321 struct walk_control *wc, int level)
1323 struct shared_node *node;
1324 struct shared_node *dest;
1327 if (level == wc->active_node)
1330 BUG_ON(wc->active_node <= level);
1331 node = find_shared_node(&wc->shared, bytenr);
1333 ret = add_shared_node(&wc->shared, bytenr, refs);
1335 node = find_shared_node(&wc->shared, bytenr);
1336 wc->nodes[level] = node;
1337 wc->active_node = level;
1341 if (wc->root_level == wc->active_node &&
1342 btrfs_root_refs(&root->root_item) == 0) {
1343 if (--node->refs == 0) {
1344 free_inode_recs_tree(&node->root_cache);
1345 free_inode_recs_tree(&node->inode_cache);
1346 remove_cache_extent(&wc->shared, &node->cache);
1352 dest = wc->nodes[wc->active_node];
1353 splice_shared_node(node, dest);
1354 if (node->refs == 0) {
1355 remove_cache_extent(&wc->shared, &node->cache);
1361 static int leave_shared_node(struct btrfs_root *root,
1362 struct walk_control *wc, int level)
1364 struct shared_node *node;
1365 struct shared_node *dest;
1368 if (level == wc->root_level)
1371 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1375 BUG_ON(i >= BTRFS_MAX_LEVEL);
1377 node = wc->nodes[wc->active_node];
1378 wc->nodes[wc->active_node] = NULL;
1379 wc->active_node = i;
1381 dest = wc->nodes[wc->active_node];
1382 if (wc->active_node < wc->root_level ||
1383 btrfs_root_refs(&root->root_item) > 0) {
1384 BUG_ON(node->refs <= 1);
1385 splice_shared_node(node, dest);
1387 BUG_ON(node->refs < 2);
1396 * 1 - if the root with id child_root_id is a child of root parent_root_id
1397 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1398 * has other root(s) as parent(s)
1399 * 2 - if the root child_root_id doesn't have any parent roots
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1404 struct btrfs_path path;
1405 struct btrfs_key key;
1406 struct extent_buffer *leaf;
1410 btrfs_init_path(&path);
1412 key.objectid = parent_root_id;
1413 key.type = BTRFS_ROOT_REF_KEY;
1414 key.offset = child_root_id;
1415 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1419 btrfs_release_path(&path);
1423 key.objectid = child_root_id;
1424 key.type = BTRFS_ROOT_BACKREF_KEY;
1426 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1432 leaf = path.nodes[0];
1433 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1437 leaf = path.nodes[0];
1440 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441 if (key.objectid != child_root_id ||
1442 key.type != BTRFS_ROOT_BACKREF_KEY)
1447 if (key.offset == parent_root_id) {
1448 btrfs_release_path(&path);
1455 btrfs_release_path(&path);
1458 return has_parent ? 0 : 2;
1461 static int process_dir_item(struct btrfs_root *root,
1462 struct extent_buffer *eb,
1463 int slot, struct btrfs_key *key,
1464 struct shared_node *active_node)
1474 struct btrfs_dir_item *di;
1475 struct inode_record *rec;
1476 struct cache_tree *root_cache;
1477 struct cache_tree *inode_cache;
1478 struct btrfs_key location;
1479 char namebuf[BTRFS_NAME_LEN];
1481 root_cache = &active_node->root_cache;
1482 inode_cache = &active_node->inode_cache;
1483 rec = active_node->current;
1484 rec->found_dir_item = 1;
1486 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487 total = btrfs_item_size_nr(eb, slot);
1488 while (cur < total) {
1490 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491 name_len = btrfs_dir_name_len(eb, di);
1492 data_len = btrfs_dir_data_len(eb, di);
1493 filetype = btrfs_dir_type(eb, di);
1495 rec->found_size += name_len;
1496 if (name_len <= BTRFS_NAME_LEN) {
1500 len = BTRFS_NAME_LEN;
1501 error = REF_ERR_NAME_TOO_LONG;
1503 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506 add_inode_backref(inode_cache, location.objectid,
1507 key->objectid, key->offset, namebuf,
1508 len, filetype, key->type, error);
1509 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510 add_inode_backref(root_cache, location.objectid,
1511 key->objectid, key->offset,
1512 namebuf, len, filetype,
1515 fprintf(stderr, "invalid location in dir item %u\n",
1517 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518 key->objectid, key->offset, namebuf,
1519 len, filetype, key->type, error);
1522 len = sizeof(*di) + name_len + data_len;
1523 di = (struct btrfs_dir_item *)((char *)di + len);
1526 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527 rec->errors |= I_ERR_DUP_DIR_INDEX;
1532 static int process_inode_ref(struct extent_buffer *eb,
1533 int slot, struct btrfs_key *key,
1534 struct shared_node *active_node)
1542 struct cache_tree *inode_cache;
1543 struct btrfs_inode_ref *ref;
1544 char namebuf[BTRFS_NAME_LEN];
1546 inode_cache = &active_node->inode_cache;
1548 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549 total = btrfs_item_size_nr(eb, slot);
1550 while (cur < total) {
1551 name_len = btrfs_inode_ref_name_len(eb, ref);
1552 index = btrfs_inode_ref_index(eb, ref);
1553 if (name_len <= BTRFS_NAME_LEN) {
1557 len = BTRFS_NAME_LEN;
1558 error = REF_ERR_NAME_TOO_LONG;
1560 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561 add_inode_backref(inode_cache, key->objectid, key->offset,
1562 index, namebuf, len, 0, key->type, error);
1564 len = sizeof(*ref) + name_len;
1565 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1571 static int process_inode_extref(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1582 struct cache_tree *inode_cache;
1583 struct btrfs_inode_extref *extref;
1584 char namebuf[BTRFS_NAME_LEN];
1586 inode_cache = &active_node->inode_cache;
1588 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589 total = btrfs_item_size_nr(eb, slot);
1590 while (cur < total) {
1591 name_len = btrfs_inode_extref_name_len(eb, extref);
1592 index = btrfs_inode_extref_index(eb, extref);
1593 parent = btrfs_inode_extref_parent(eb, extref);
1594 if (name_len <= BTRFS_NAME_LEN) {
1598 len = BTRFS_NAME_LEN;
1599 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf,
1602 (unsigned long)(extref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, parent,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*extref) + name_len;
1607 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615 u64 len, u64 *found)
1617 struct btrfs_key key;
1618 struct btrfs_path path;
1619 struct extent_buffer *leaf;
1624 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626 btrfs_init_path(&path);
1628 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630 key.type = BTRFS_EXTENT_CSUM_KEY;
1632 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1636 if (ret > 0 && path.slots[0] > 0) {
1637 leaf = path.nodes[0];
1638 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640 key.type == BTRFS_EXTENT_CSUM_KEY)
1645 leaf = path.nodes[0];
1646 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1652 leaf = path.nodes[0];
1655 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657 key.type != BTRFS_EXTENT_CSUM_KEY)
1660 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661 if (key.offset >= start + len)
1664 if (key.offset > start)
1667 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669 if (csum_end > start) {
1670 size = min(csum_end - start, len);
1679 btrfs_release_path(&path);
1685 static int process_file_extent(struct btrfs_root *root,
1686 struct extent_buffer *eb,
1687 int slot, struct btrfs_key *key,
1688 struct shared_node *active_node)
1690 struct inode_record *rec;
1691 struct btrfs_file_extent_item *fi;
1693 u64 disk_bytenr = 0;
1694 u64 extent_offset = 0;
1695 u64 mask = root->sectorsize - 1;
1699 rec = active_node->current;
1700 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701 rec->found_file_extent = 1;
1703 if (rec->extent_start == (u64)-1) {
1704 rec->extent_start = key->offset;
1705 rec->extent_end = key->offset;
1708 if (rec->extent_end > key->offset)
1709 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710 else if (rec->extent_end < key->offset) {
1711 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712 key->offset - rec->extent_end);
1717 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718 extent_type = btrfs_file_extent_type(eb, fi);
1720 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724 rec->found_size += num_bytes;
1725 num_bytes = (num_bytes + mask) & ~mask;
1726 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730 extent_offset = btrfs_file_extent_offset(eb, fi);
1731 if (num_bytes == 0 || (num_bytes & mask))
1732 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733 if (num_bytes + extent_offset >
1734 btrfs_file_extent_ram_bytes(eb, fi))
1735 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737 (btrfs_file_extent_compression(eb, fi) ||
1738 btrfs_file_extent_encryption(eb, fi) ||
1739 btrfs_file_extent_other_encoding(eb, fi)))
1740 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741 if (disk_bytenr > 0)
1742 rec->found_size += num_bytes;
1744 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746 rec->extent_end = key->offset + num_bytes;
1749 * The data reloc tree will copy full extents into its inode and then
1750 * copy the corresponding csums. Because the extent it copied could be
1751 * a preallocated extent that hasn't been written to yet there may be no
1752 * csums to copy, ergo we won't have csums for our file extent. This is
1753 * ok so just don't bother checking csums if the inode belongs to the
1756 if (disk_bytenr > 0 &&
1757 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759 if (btrfs_file_extent_compression(eb, fi))
1760 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762 disk_bytenr += extent_offset;
1764 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1767 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769 rec->found_csum_item = 1;
1770 if (found < num_bytes)
1771 rec->some_csum_missing = 1;
1772 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781 struct walk_control *wc)
1783 struct btrfs_key key;
1787 struct cache_tree *inode_cache;
1788 struct shared_node *active_node;
1790 if (wc->root_level == wc->active_node &&
1791 btrfs_root_refs(&root->root_item) == 0)
1794 active_node = wc->nodes[wc->active_node];
1795 inode_cache = &active_node->inode_cache;
1796 nritems = btrfs_header_nritems(eb);
1797 for (i = 0; i < nritems; i++) {
1798 btrfs_item_key_to_cpu(eb, &key, i);
1800 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1805 if (active_node->current == NULL ||
1806 active_node->current->ino < key.objectid) {
1807 if (active_node->current) {
1808 active_node->current->checked = 1;
1809 maybe_free_inode_rec(inode_cache,
1810 active_node->current);
1812 active_node->current = get_inode_rec(inode_cache,
1814 BUG_ON(IS_ERR(active_node->current));
1817 case BTRFS_DIR_ITEM_KEY:
1818 case BTRFS_DIR_INDEX_KEY:
1819 ret = process_dir_item(root, eb, i, &key, active_node);
1821 case BTRFS_INODE_REF_KEY:
1822 ret = process_inode_ref(eb, i, &key, active_node);
1824 case BTRFS_INODE_EXTREF_KEY:
1825 ret = process_inode_extref(eb, i, &key, active_node);
1827 case BTRFS_INODE_ITEM_KEY:
1828 ret = process_inode_item(eb, i, &key, active_node);
1830 case BTRFS_EXTENT_DATA_KEY:
1831 ret = process_file_extent(root, eb, i, &key,
1841 static void reada_walk_down(struct btrfs_root *root,
1842 struct extent_buffer *node, int slot)
1851 level = btrfs_header_level(node);
1855 nritems = btrfs_header_nritems(node);
1856 blocksize = root->nodesize;
1857 for (i = slot; i < nritems; i++) {
1858 bytenr = btrfs_node_blockptr(node, i);
1859 ptr_gen = btrfs_node_ptr_generation(node, i);
1860 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1865 * Check the child node/leaf by the following condition:
1866 * 1. the first item key of the node/leaf should be the same with the one
1868 * 2. block in parent node should match the child node/leaf.
1869 * 3. generation of parent node and child's header should be consistent.
1871 * Or the child node/leaf pointed by the key in parent is not valid.
1873 * We hope to check leaf owner too, but since subvol may share leaves,
1874 * which makes leaf owner check not so strong, key check should be
1875 * sufficient enough for that case.
1877 static int check_child_node(struct btrfs_root *root,
1878 struct extent_buffer *parent, int slot,
1879 struct extent_buffer *child)
1881 struct btrfs_key parent_key;
1882 struct btrfs_key child_key;
1885 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886 if (btrfs_header_level(child) == 0)
1887 btrfs_item_key_to_cpu(child, &child_key, 0);
1889 btrfs_node_key_to_cpu(child, &child_key, 0);
1891 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1894 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895 parent_key.objectid, parent_key.type, parent_key.offset,
1896 child_key.objectid, child_key.type, child_key.offset);
1898 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901 btrfs_node_blockptr(parent, slot),
1902 btrfs_header_bytenr(child));
1904 if (btrfs_node_ptr_generation(parent, slot) !=
1905 btrfs_header_generation(child)) {
1907 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908 btrfs_header_generation(child),
1909 btrfs_node_ptr_generation(parent, slot));
1915 u64 bytenr[BTRFS_MAX_LEVEL];
1916 u64 refs[BTRFS_MAX_LEVEL];
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920 struct walk_control *wc, int *level,
1921 struct node_refs *nrefs)
1923 enum btrfs_tree_block_status status;
1926 struct extent_buffer *next;
1927 struct extent_buffer *cur;
1932 WARN_ON(*level < 0);
1933 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936 refs = nrefs->refs[*level];
1939 ret = btrfs_lookup_extent_info(NULL, root,
1940 path->nodes[*level]->start,
1941 *level, 1, &refs, NULL);
1946 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947 nrefs->refs[*level] = refs;
1951 ret = enter_shared_node(root, path->nodes[*level]->start,
1959 while (*level >= 0) {
1960 WARN_ON(*level < 0);
1961 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962 cur = path->nodes[*level];
1964 if (btrfs_header_level(cur) != *level)
1967 if (path->slots[*level] >= btrfs_header_nritems(cur))
1970 ret = process_one_leaf(root, cur, wc);
1975 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977 blocksize = root->nodesize;
1979 if (bytenr == nrefs->bytenr[*level - 1]) {
1980 refs = nrefs->refs[*level - 1];
1982 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983 *level - 1, 1, &refs, NULL);
1987 nrefs->bytenr[*level - 1] = bytenr;
1988 nrefs->refs[*level - 1] = refs;
1993 ret = enter_shared_node(root, bytenr, refs,
1996 path->slots[*level]++;
2001 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003 free_extent_buffer(next);
2004 reada_walk_down(root, cur, path->slots[*level]);
2005 next = read_tree_block(root, bytenr, blocksize,
2007 if (!extent_buffer_uptodate(next)) {
2008 struct btrfs_key node_key;
2010 btrfs_node_key_to_cpu(path->nodes[*level],
2012 path->slots[*level]);
2013 btrfs_add_corrupt_extent_record(root->fs_info,
2015 path->nodes[*level]->start,
2016 root->nodesize, *level);
2022 ret = check_child_node(root, cur, path->slots[*level], next);
2028 if (btrfs_is_leaf(next))
2029 status = btrfs_check_leaf(root, NULL, next);
2031 status = btrfs_check_node(root, NULL, next);
2032 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033 free_extent_buffer(next);
2038 *level = *level - 1;
2039 free_extent_buffer(path->nodes[*level]);
2040 path->nodes[*level] = next;
2041 path->slots[*level] = 0;
2044 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049 struct walk_control *wc, int *level)
2052 struct extent_buffer *leaf;
2054 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055 leaf = path->nodes[i];
2056 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2061 free_extent_buffer(path->nodes[*level]);
2062 path->nodes[*level] = NULL;
2063 BUG_ON(*level > wc->active_node);
2064 if (*level == wc->active_node)
2065 leave_shared_node(root, wc, *level);
2072 static int check_root_dir(struct inode_record *rec)
2074 struct inode_backref *backref;
2077 if (!rec->found_inode_item || rec->errors)
2079 if (rec->nlink != 1 || rec->found_link != 0)
2081 if (list_empty(&rec->backrefs))
2083 backref = to_inode_backref(rec->backrefs.next);
2084 if (!backref->found_inode_ref)
2086 if (backref->index != 0 || backref->namelen != 2 ||
2087 memcmp(backref->name, "..", 2))
2089 if (backref->found_dir_index || backref->found_dir_item)
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097 struct btrfs_root *root, struct btrfs_path *path,
2098 struct inode_record *rec)
2100 struct btrfs_inode_item *ei;
2101 struct btrfs_key key;
2104 key.objectid = rec->ino;
2105 key.type = BTRFS_INODE_ITEM_KEY;
2106 key.offset = (u64)-1;
2108 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2112 if (!path->slots[0]) {
2119 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120 if (key.objectid != rec->ino) {
2125 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126 struct btrfs_inode_item);
2127 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128 btrfs_mark_buffer_dirty(path->nodes[0]);
2129 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131 root->root_key.objectid);
2133 btrfs_release_path(path);
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 struct btrfs_path *path,
2140 struct inode_record *rec)
2144 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145 btrfs_release_path(path);
2147 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152 struct btrfs_root *root,
2153 struct btrfs_path *path,
2154 struct inode_record *rec)
2156 struct btrfs_inode_item *ei;
2157 struct btrfs_key key;
2160 key.objectid = rec->ino;
2161 key.type = BTRFS_INODE_ITEM_KEY;
2164 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2171 /* Since ret == 0, no need to check anything */
2172 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173 struct btrfs_inode_item);
2174 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175 btrfs_mark_buffer_dirty(path->nodes[0]);
2176 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177 printf("reset nbytes for ino %llu root %llu\n",
2178 rec->ino, root->root_key.objectid);
2180 btrfs_release_path(path);
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185 struct cache_tree *inode_cache,
2186 struct inode_record *rec,
2187 struct inode_backref *backref)
2189 struct btrfs_path path;
2190 struct btrfs_trans_handle *trans;
2191 struct btrfs_dir_item *dir_item;
2192 struct extent_buffer *leaf;
2193 struct btrfs_key key;
2194 struct btrfs_disk_key disk_key;
2195 struct inode_record *dir_rec;
2196 unsigned long name_ptr;
2197 u32 data_size = sizeof(*dir_item) + backref->namelen;
2200 trans = btrfs_start_transaction(root, 1);
2202 return PTR_ERR(trans);
2204 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205 (unsigned long long)rec->ino);
2207 btrfs_init_path(&path);
2208 key.objectid = backref->dir;
2209 key.type = BTRFS_DIR_INDEX_KEY;
2210 key.offset = backref->index;
2211 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2214 leaf = path.nodes[0];
2215 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217 disk_key.objectid = cpu_to_le64(rec->ino);
2218 disk_key.type = BTRFS_INODE_ITEM_KEY;
2219 disk_key.offset = 0;
2221 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223 btrfs_set_dir_data_len(leaf, dir_item, 0);
2224 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225 name_ptr = (unsigned long)(dir_item + 1);
2226 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227 btrfs_mark_buffer_dirty(leaf);
2228 btrfs_release_path(&path);
2229 btrfs_commit_transaction(trans, root);
2231 backref->found_dir_index = 1;
2232 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233 BUG_ON(IS_ERR(dir_rec));
2236 dir_rec->found_size += backref->namelen;
2237 if (dir_rec->found_size == dir_rec->isize &&
2238 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240 if (dir_rec->found_size != dir_rec->isize)
2241 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2246 static int delete_dir_index(struct btrfs_root *root,
2247 struct cache_tree *inode_cache,
2248 struct inode_record *rec,
2249 struct inode_backref *backref)
2251 struct btrfs_trans_handle *trans;
2252 struct btrfs_dir_item *di;
2253 struct btrfs_path path;
2256 trans = btrfs_start_transaction(root, 1);
2258 return PTR_ERR(trans);
2260 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261 (unsigned long long)backref->dir,
2262 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263 (unsigned long long)root->objectid);
2265 btrfs_init_path(&path);
2266 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267 backref->name, backref->namelen,
2268 backref->index, -1);
2271 btrfs_release_path(&path);
2272 btrfs_commit_transaction(trans, root);
2279 ret = btrfs_del_item(trans, root, &path);
2281 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283 btrfs_release_path(&path);
2284 btrfs_commit_transaction(trans, root);
2288 static int create_inode_item(struct btrfs_root *root,
2289 struct inode_record *rec,
2290 struct inode_backref *backref, int root_dir)
2292 struct btrfs_trans_handle *trans;
2293 struct btrfs_inode_item inode_item;
2294 time_t now = time(NULL);
2297 trans = btrfs_start_transaction(root, 1);
2298 if (IS_ERR(trans)) {
2299 ret = PTR_ERR(trans);
2303 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304 "be incomplete, please check permissions and content after "
2305 "the fsck completes.\n", (unsigned long long)root->objectid,
2306 (unsigned long long)rec->ino);
2308 memset(&inode_item, 0, sizeof(inode_item));
2309 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315 if (rec->found_dir_item) {
2316 if (rec->found_file_extent)
2317 fprintf(stderr, "root %llu inode %llu has both a dir "
2318 "item and extents, unsure if it is a dir or a "
2319 "regular file so setting it as a directory\n",
2320 (unsigned long long)root->objectid,
2321 (unsigned long long)rec->ino);
2322 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324 } else if (!rec->found_dir_item) {
2325 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339 btrfs_commit_transaction(trans, root);
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344 struct inode_record *rec,
2345 struct cache_tree *inode_cache,
2348 struct inode_backref *tmp, *backref;
2349 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2353 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354 if (!delete && rec->ino == root_dirid) {
2355 if (!rec->found_inode_item) {
2356 ret = create_inode_item(root, rec, backref, 1);
2363 /* Index 0 for root dir's are special, don't mess with it */
2364 if (rec->ino == root_dirid && backref->index == 0)
2368 ((backref->found_dir_index && !backref->found_inode_ref) ||
2369 (backref->found_dir_index && backref->found_inode_ref &&
2370 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371 ret = delete_dir_index(root, inode_cache, rec, backref);
2375 list_del(&backref->list);
2379 if (!delete && !backref->found_dir_index &&
2380 backref->found_dir_item && backref->found_inode_ref) {
2381 ret = add_missing_dir_index(root, inode_cache, rec,
2386 if (backref->found_dir_item &&
2387 backref->found_dir_index &&
2388 backref->found_dir_index) {
2389 if (!backref->errors &&
2390 backref->found_inode_ref) {
2391 list_del(&backref->list);
2397 if (!delete && (!backref->found_dir_index &&
2398 !backref->found_dir_item &&
2399 backref->found_inode_ref)) {
2400 struct btrfs_trans_handle *trans;
2401 struct btrfs_key location;
2403 ret = check_dir_conflict(root, backref->name,
2409 * let nlink fixing routine to handle it,
2410 * which can do it better.
2415 location.objectid = rec->ino;
2416 location.type = BTRFS_INODE_ITEM_KEY;
2417 location.offset = 0;
2419 trans = btrfs_start_transaction(root, 1);
2420 if (IS_ERR(trans)) {
2421 ret = PTR_ERR(trans);
2424 fprintf(stderr, "adding missing dir index/item pair "
2426 (unsigned long long)rec->ino);
2427 ret = btrfs_insert_dir_item(trans, root, backref->name,
2429 backref->dir, &location,
2430 imode_to_type(rec->imode),
2433 btrfs_commit_transaction(trans, root);
2437 if (!delete && (backref->found_inode_ref &&
2438 backref->found_dir_index &&
2439 backref->found_dir_item &&
2440 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441 !rec->found_inode_item)) {
2442 ret = create_inode_item(root, rec, backref, 0);
2449 return ret ? ret : repaired;
2453 * To determine the file type for nlink/inode_item repair
2455 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456 * Return -ENOENT if file type is not found.
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2460 struct inode_backref *backref;
2462 /* For inode item recovered case */
2463 if (rec->found_inode_item) {
2464 *type = imode_to_type(rec->imode);
2468 list_for_each_entry(backref, &rec->backrefs, list) {
2469 if (backref->found_dir_index || backref->found_dir_item) {
2470 *type = backref->filetype;
2478 * To determine the file name for nlink repair
2480 * Return 0 if file name is found, set name and namelen.
2481 * Return -ENOENT if file name is not found.
2483 static int find_file_name(struct inode_record *rec,
2484 char *name, int *namelen)
2486 struct inode_backref *backref;
2488 list_for_each_entry(backref, &rec->backrefs, list) {
2489 if (backref->found_dir_index || backref->found_dir_item ||
2490 backref->found_inode_ref) {
2491 memcpy(name, backref->name, backref->namelen);
2492 *namelen = backref->namelen;
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501 struct btrfs_root *root,
2502 struct btrfs_path *path,
2503 struct inode_record *rec)
2505 struct inode_backref *backref;
2506 struct inode_backref *tmp;
2507 struct btrfs_key key;
2508 struct btrfs_inode_item *inode_item;
2511 /* We don't believe this either, reset it and iterate backref */
2512 rec->found_link = 0;
2514 /* Remove all backref including the valid ones */
2515 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517 backref->index, backref->name,
2518 backref->namelen, 0);
2522 /* remove invalid backref, so it won't be added back */
2523 if (!(backref->found_dir_index &&
2524 backref->found_dir_item &&
2525 backref->found_inode_ref)) {
2526 list_del(&backref->list);
2533 /* Set nlink to 0 */
2534 key.objectid = rec->ino;
2535 key.type = BTRFS_INODE_ITEM_KEY;
2537 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2544 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545 struct btrfs_inode_item);
2546 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547 btrfs_mark_buffer_dirty(path->nodes[0]);
2548 btrfs_release_path(path);
2551 * Add back valid inode_ref/dir_item/dir_index,
2552 * add_link() will handle the nlink inc, so new nlink must be correct
2554 list_for_each_entry(backref, &rec->backrefs, list) {
2555 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556 backref->name, backref->namelen,
2557 backref->filetype, &backref->index, 1);
2562 btrfs_release_path(path);
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567 struct btrfs_root *root,
2568 struct btrfs_path *path,
2569 struct inode_record *rec)
2571 char *dir_name = "lost+found";
2572 char namebuf[BTRFS_NAME_LEN] = {0};
2577 int name_recovered = 0;
2578 int type_recovered = 0;
2582 * Get file name and type first before these invalid inode ref
2583 * are deleted by remove_all_invalid_backref()
2585 name_recovered = !find_file_name(rec, namebuf, &namelen);
2586 type_recovered = !find_file_type(rec, &type);
2588 if (!name_recovered) {
2589 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590 rec->ino, rec->ino);
2591 namelen = count_digits(rec->ino);
2592 sprintf(namebuf, "%llu", rec->ino);
2595 if (!type_recovered) {
2596 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598 type = BTRFS_FT_REG_FILE;
2602 ret = reset_nlink(trans, root, path, rec);
2605 "Failed to reset nlink for inode %llu: %s\n",
2606 rec->ino, strerror(-ret));
2610 if (rec->found_link == 0) {
2611 lost_found_ino = root->highest_inode;
2612 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2617 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2621 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622 dir_name, strerror(-ret));
2625 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626 namebuf, namelen, type, NULL, 1);
2628 * Add ".INO" suffix several times to handle case where
2629 * "FILENAME.INO" is already taken by another file.
2631 while (ret == -EEXIST) {
2633 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635 if (namelen + count_digits(rec->ino) + 1 >
2640 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642 namelen += count_digits(rec->ino) + 1;
2643 ret = btrfs_add_link(trans, root, rec->ino,
2644 lost_found_ino, namebuf,
2645 namelen, type, NULL, 1);
2649 "Failed to link the inode %llu to %s dir: %s\n",
2650 rec->ino, dir_name, strerror(-ret));
2654 * Just increase the found_link, don't actually add the
2655 * backref. This will make things easier and this inode
2656 * record will be freed after the repair is done.
2657 * So fsck will not report problem about this inode.
2660 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661 namelen, namebuf, dir_name);
2663 printf("Fixed the nlink of inode %llu\n", rec->ino);
2666 * Clear the flag anyway, or we will loop forever for the same inode
2667 * as it will not be removed from the bad inode list and the dead loop
2670 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671 btrfs_release_path(path);
2676 * Check if there is any normal(reg or prealloc) file extent for given
2678 * This is used to determine the file type when neither its dir_index/item or
2679 * inode_item exists.
2681 * This will *NOT* report error, if any error happens, just consider it does
2682 * not have any normal file extent.
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 struct btrfs_path path;
2687 struct btrfs_key key;
2688 struct btrfs_key found_key;
2689 struct btrfs_file_extent_item *fi;
2693 btrfs_init_path(&path);
2695 key.type = BTRFS_EXTENT_DATA_KEY;
2698 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2703 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2704 ret = btrfs_next_leaf(root, &path);
2711 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713 if (found_key.objectid != ino ||
2714 found_key.type != BTRFS_EXTENT_DATA_KEY)
2716 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2717 struct btrfs_file_extent_item);
2718 type = btrfs_file_extent_type(path.nodes[0], fi);
2719 if (type != BTRFS_FILE_EXTENT_INLINE) {
2725 btrfs_release_path(&path);
2729 static u32 btrfs_type_to_imode(u8 type)
2731 static u32 imode_by_btrfs_type[] = {
2732 [BTRFS_FT_REG_FILE] = S_IFREG,
2733 [BTRFS_FT_DIR] = S_IFDIR,
2734 [BTRFS_FT_CHRDEV] = S_IFCHR,
2735 [BTRFS_FT_BLKDEV] = S_IFBLK,
2736 [BTRFS_FT_FIFO] = S_IFIFO,
2737 [BTRFS_FT_SOCK] = S_IFSOCK,
2738 [BTRFS_FT_SYMLINK] = S_IFLNK,
2741 return imode_by_btrfs_type[(type)];
2744 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2745 struct btrfs_root *root,
2746 struct btrfs_path *path,
2747 struct inode_record *rec)
2751 int type_recovered = 0;
2754 printf("Trying to rebuild inode:%llu\n", rec->ino);
2756 type_recovered = !find_file_type(rec, &filetype);
2759 * Try to determine inode type if type not found.
2761 * For found regular file extent, it must be FILE.
2762 * For found dir_item/index, it must be DIR.
2764 * For undetermined one, use FILE as fallback.
2767 * 1. If found backref(inode_index/item is already handled) to it,
2769 * Need new inode-inode ref structure to allow search for that.
2771 if (!type_recovered) {
2772 if (rec->found_file_extent &&
2773 find_normal_file_extent(root, rec->ino)) {
2775 filetype = BTRFS_FT_REG_FILE;
2776 } else if (rec->found_dir_item) {
2778 filetype = BTRFS_FT_DIR;
2779 } else if (!list_empty(&rec->orphan_extents)) {
2781 filetype = BTRFS_FT_REG_FILE;
2783 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2786 filetype = BTRFS_FT_REG_FILE;
2790 ret = btrfs_new_inode(trans, root, rec->ino,
2791 mode | btrfs_type_to_imode(filetype));
2796 * Here inode rebuild is done, we only rebuild the inode item,
2797 * don't repair the nlink(like move to lost+found).
2798 * That is the job of nlink repair.
2800 * We just fill the record and return
2802 rec->found_dir_item = 1;
2803 rec->imode = mode | btrfs_type_to_imode(filetype);
2805 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2806 /* Ensure the inode_nlinks repair function will be called */
2807 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2812 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2813 struct btrfs_root *root,
2814 struct btrfs_path *path,
2815 struct inode_record *rec)
2817 struct orphan_data_extent *orphan;
2818 struct orphan_data_extent *tmp;
2821 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823 * Check for conflicting file extents
2825 * Here we don't know whether the extents is compressed or not,
2826 * so we can only assume it not compressed nor data offset,
2827 * and use its disk_len as extent length.
2829 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2830 orphan->offset, orphan->disk_len, 0);
2831 btrfs_release_path(path);
2836 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2837 orphan->disk_bytenr, orphan->disk_len);
2838 ret = btrfs_free_extent(trans,
2839 root->fs_info->extent_root,
2840 orphan->disk_bytenr, orphan->disk_len,
2841 0, root->objectid, orphan->objectid,
2846 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2847 orphan->offset, orphan->disk_bytenr,
2848 orphan->disk_len, orphan->disk_len);
2852 /* Update file size info */
2853 rec->found_size += orphan->disk_len;
2854 if (rec->found_size == rec->nbytes)
2855 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857 /* Update the file extent hole info too */
2858 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2862 if (RB_EMPTY_ROOT(&rec->holes))
2863 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865 list_del(&orphan->list);
2868 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2873 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2874 struct btrfs_root *root,
2875 struct btrfs_path *path,
2876 struct inode_record *rec)
2878 struct rb_node *node;
2879 struct file_extent_hole *hole;
2883 node = rb_first(&rec->holes);
2887 hole = rb_entry(node, struct file_extent_hole, node);
2888 ret = btrfs_punch_hole(trans, root, rec->ino,
2889 hole->start, hole->len);
2892 ret = del_file_extent_hole(&rec->holes, hole->start,
2896 if (RB_EMPTY_ROOT(&rec->holes))
2897 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2898 node = rb_first(&rec->holes);
2900 /* special case for a file losing all its file extent */
2902 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2903 round_up(rec->isize, root->sectorsize));
2907 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2908 rec->ino, root->objectid);
2913 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 struct btrfs_trans_handle *trans;
2916 struct btrfs_path path;
2919 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2920 I_ERR_NO_ORPHAN_ITEM |
2921 I_ERR_LINK_COUNT_WRONG |
2922 I_ERR_NO_INODE_ITEM |
2923 I_ERR_FILE_EXTENT_ORPHAN |
2924 I_ERR_FILE_EXTENT_DISCOUNT|
2925 I_ERR_FILE_NBYTES_WRONG)))
2929 * For nlink repair, it may create a dir and add link, so
2930 * 2 for parent(256)'s dir_index and dir_item
2931 * 2 for lost+found dir's inode_item and inode_ref
2932 * 1 for the new inode_ref of the file
2933 * 2 for lost+found dir's dir_index and dir_item for the file
2935 trans = btrfs_start_transaction(root, 7);
2937 return PTR_ERR(trans);
2939 btrfs_init_path(&path);
2940 if (rec->errors & I_ERR_NO_INODE_ITEM)
2941 ret = repair_inode_no_item(trans, root, &path, rec);
2942 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2943 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2944 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2945 ret = repair_inode_discount_extent(trans, root, &path, rec);
2946 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2947 ret = repair_inode_isize(trans, root, &path, rec);
2948 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2949 ret = repair_inode_orphan_item(trans, root, &path, rec);
2950 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2951 ret = repair_inode_nlinks(trans, root, &path, rec);
2952 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2953 ret = repair_inode_nbytes(trans, root, &path, rec);
2954 btrfs_commit_transaction(trans, root);
2955 btrfs_release_path(&path);
2959 static int check_inode_recs(struct btrfs_root *root,
2960 struct cache_tree *inode_cache)
2962 struct cache_extent *cache;
2963 struct ptr_node *node;
2964 struct inode_record *rec;
2965 struct inode_backref *backref;
2970 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972 if (btrfs_root_refs(&root->root_item) == 0) {
2973 if (!cache_tree_empty(inode_cache))
2974 fprintf(stderr, "warning line %d\n", __LINE__);
2979 * We need to record the highest inode number for later 'lost+found'
2981 * We must select an ino not used/referred by any existing inode, or
2982 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2983 * this may cause 'lost+found' dir has wrong nlinks.
2985 cache = last_cache_extent(inode_cache);
2987 node = container_of(cache, struct ptr_node, cache);
2989 if (rec->ino > root->highest_inode)
2990 root->highest_inode = rec->ino;
2994 * We need to repair backrefs first because we could change some of the
2995 * errors in the inode recs.
2997 * We also need to go through and delete invalid backrefs first and then
2998 * add the correct ones second. We do this because we may get EEXIST
2999 * when adding back the correct index because we hadn't yet deleted the
3002 * For example, if we were missing a dir index then the directories
3003 * isize would be wrong, so if we fixed the isize to what we thought it
3004 * would be and then fixed the backref we'd still have a invalid fs, so
3005 * we need to add back the dir index and then check to see if the isize
3010 if (stage == 3 && !err)
3013 cache = search_cache_extent(inode_cache, 0);
3014 while (repair && cache) {
3015 node = container_of(cache, struct ptr_node, cache);
3017 cache = next_cache_extent(cache);
3019 /* Need to free everything up and rescan */
3021 remove_cache_extent(inode_cache, &node->cache);
3023 free_inode_rec(rec);
3027 if (list_empty(&rec->backrefs))
3030 ret = repair_inode_backrefs(root, rec, inode_cache,
3044 rec = get_inode_rec(inode_cache, root_dirid, 0);
3045 BUG_ON(IS_ERR(rec));
3047 ret = check_root_dir(rec);
3049 fprintf(stderr, "root %llu root dir %llu error\n",
3050 (unsigned long long)root->root_key.objectid,
3051 (unsigned long long)root_dirid);
3052 print_inode_error(root, rec);
3057 struct btrfs_trans_handle *trans;
3059 trans = btrfs_start_transaction(root, 1);
3060 if (IS_ERR(trans)) {
3061 err = PTR_ERR(trans);
3066 "root %llu missing its root dir, recreating\n",
3067 (unsigned long long)root->objectid);
3069 ret = btrfs_make_root_dir(trans, root, root_dirid);
3072 btrfs_commit_transaction(trans, root);
3076 fprintf(stderr, "root %llu root dir %llu not found\n",
3077 (unsigned long long)root->root_key.objectid,
3078 (unsigned long long)root_dirid);
3082 cache = search_cache_extent(inode_cache, 0);
3085 node = container_of(cache, struct ptr_node, cache);
3087 remove_cache_extent(inode_cache, &node->cache);
3089 if (rec->ino == root_dirid ||
3090 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3091 free_inode_rec(rec);
3095 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3096 ret = check_orphan_item(root, rec->ino);
3098 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3099 if (can_free_inode_rec(rec)) {
3100 free_inode_rec(rec);
3105 if (!rec->found_inode_item)
3106 rec->errors |= I_ERR_NO_INODE_ITEM;
3107 if (rec->found_link != rec->nlink)
3108 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110 ret = try_repair_inode(root, rec);
3111 if (ret == 0 && can_free_inode_rec(rec)) {
3112 free_inode_rec(rec);
3118 if (!(repair && ret == 0))
3120 print_inode_error(root, rec);
3121 list_for_each_entry(backref, &rec->backrefs, list) {
3122 if (!backref->found_dir_item)
3123 backref->errors |= REF_ERR_NO_DIR_ITEM;
3124 if (!backref->found_dir_index)
3125 backref->errors |= REF_ERR_NO_DIR_INDEX;
3126 if (!backref->found_inode_ref)
3127 backref->errors |= REF_ERR_NO_INODE_REF;
3128 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3129 " namelen %u name %s filetype %d errors %x",
3130 (unsigned long long)backref->dir,
3131 (unsigned long long)backref->index,
3132 backref->namelen, backref->name,
3133 backref->filetype, backref->errors);
3134 print_ref_error(backref->errors);
3136 free_inode_rec(rec);
3138 return (error > 0) ? -1 : 0;
3141 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3144 struct cache_extent *cache;
3145 struct root_record *rec = NULL;
3148 cache = lookup_cache_extent(root_cache, objectid, 1);
3150 rec = container_of(cache, struct root_record, cache);
3152 rec = calloc(1, sizeof(*rec));
3154 return ERR_PTR(-ENOMEM);
3155 rec->objectid = objectid;
3156 INIT_LIST_HEAD(&rec->backrefs);
3157 rec->cache.start = objectid;
3158 rec->cache.size = 1;
3160 ret = insert_cache_extent(root_cache, &rec->cache);
3162 return ERR_PTR(-EEXIST);
3167 static struct root_backref *get_root_backref(struct root_record *rec,
3168 u64 ref_root, u64 dir, u64 index,
3169 const char *name, int namelen)
3171 struct root_backref *backref;
3173 list_for_each_entry(backref, &rec->backrefs, list) {
3174 if (backref->ref_root != ref_root || backref->dir != dir ||
3175 backref->namelen != namelen)
3177 if (memcmp(name, backref->name, namelen))
3182 backref = calloc(1, sizeof(*backref) + namelen + 1);
3185 backref->ref_root = ref_root;
3187 backref->index = index;
3188 backref->namelen = namelen;
3189 memcpy(backref->name, name, namelen);
3190 backref->name[namelen] = '\0';
3191 list_add_tail(&backref->list, &rec->backrefs);
3195 static void free_root_record(struct cache_extent *cache)
3197 struct root_record *rec;
3198 struct root_backref *backref;
3200 rec = container_of(cache, struct root_record, cache);
3201 while (!list_empty(&rec->backrefs)) {
3202 backref = to_root_backref(rec->backrefs.next);
3203 list_del(&backref->list);
3210 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212 static int add_root_backref(struct cache_tree *root_cache,
3213 u64 root_id, u64 ref_root, u64 dir, u64 index,
3214 const char *name, int namelen,
3215 int item_type, int errors)
3217 struct root_record *rec;
3218 struct root_backref *backref;
3220 rec = get_root_rec(root_cache, root_id);
3221 BUG_ON(IS_ERR(rec));
3222 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3225 backref->errors |= errors;
3227 if (item_type != BTRFS_DIR_ITEM_KEY) {
3228 if (backref->found_dir_index || backref->found_back_ref ||
3229 backref->found_forward_ref) {
3230 if (backref->index != index)
3231 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233 backref->index = index;
3237 if (item_type == BTRFS_DIR_ITEM_KEY) {
3238 if (backref->found_forward_ref)
3240 backref->found_dir_item = 1;
3241 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3242 backref->found_dir_index = 1;
3243 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3244 if (backref->found_forward_ref)
3245 backref->errors |= REF_ERR_DUP_ROOT_REF;
3246 else if (backref->found_dir_item)
3248 backref->found_forward_ref = 1;
3249 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3250 if (backref->found_back_ref)
3251 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3252 backref->found_back_ref = 1;
3257 if (backref->found_forward_ref && backref->found_dir_item)
3258 backref->reachable = 1;
3262 static int merge_root_recs(struct btrfs_root *root,
3263 struct cache_tree *src_cache,
3264 struct cache_tree *dst_cache)
3266 struct cache_extent *cache;
3267 struct ptr_node *node;
3268 struct inode_record *rec;
3269 struct inode_backref *backref;
3272 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3273 free_inode_recs_tree(src_cache);
3278 cache = search_cache_extent(src_cache, 0);
3281 node = container_of(cache, struct ptr_node, cache);
3283 remove_cache_extent(src_cache, &node->cache);
3286 ret = is_child_root(root, root->objectid, rec->ino);
3292 list_for_each_entry(backref, &rec->backrefs, list) {
3293 BUG_ON(backref->found_inode_ref);
3294 if (backref->found_dir_item)
3295 add_root_backref(dst_cache, rec->ino,
3296 root->root_key.objectid, backref->dir,
3297 backref->index, backref->name,
3298 backref->namelen, BTRFS_DIR_ITEM_KEY,
3300 if (backref->found_dir_index)
3301 add_root_backref(dst_cache, rec->ino,
3302 root->root_key.objectid, backref->dir,
3303 backref->index, backref->name,
3304 backref->namelen, BTRFS_DIR_INDEX_KEY,
3308 free_inode_rec(rec);
3315 static int check_root_refs(struct btrfs_root *root,
3316 struct cache_tree *root_cache)
3318 struct root_record *rec;
3319 struct root_record *ref_root;
3320 struct root_backref *backref;
3321 struct cache_extent *cache;
3327 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3328 BUG_ON(IS_ERR(rec));
3331 /* fixme: this can not detect circular references */
3334 cache = search_cache_extent(root_cache, 0);
3338 rec = container_of(cache, struct root_record, cache);
3339 cache = next_cache_extent(cache);
3341 if (rec->found_ref == 0)
3344 list_for_each_entry(backref, &rec->backrefs, list) {
3345 if (!backref->reachable)
3348 ref_root = get_root_rec(root_cache,
3350 BUG_ON(IS_ERR(ref_root));
3351 if (ref_root->found_ref > 0)
3354 backref->reachable = 0;
3356 if (rec->found_ref == 0)
3362 cache = search_cache_extent(root_cache, 0);
3366 rec = container_of(cache, struct root_record, cache);
3367 cache = next_cache_extent(cache);
3369 if (rec->found_ref == 0 &&
3370 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3371 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3372 ret = check_orphan_item(root->fs_info->tree_root,
3378 * If we don't have a root item then we likely just have
3379 * a dir item in a snapshot for this root but no actual
3380 * ref key or anything so it's meaningless.
3382 if (!rec->found_root_item)
3385 fprintf(stderr, "fs tree %llu not referenced\n",
3386 (unsigned long long)rec->objectid);
3390 if (rec->found_ref > 0 && !rec->found_root_item)
3392 list_for_each_entry(backref, &rec->backrefs, list) {
3393 if (!backref->found_dir_item)
3394 backref->errors |= REF_ERR_NO_DIR_ITEM;
3395 if (!backref->found_dir_index)
3396 backref->errors |= REF_ERR_NO_DIR_INDEX;
3397 if (!backref->found_back_ref)
3398 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3399 if (!backref->found_forward_ref)
3400 backref->errors |= REF_ERR_NO_ROOT_REF;
3401 if (backref->reachable && backref->errors)
3408 fprintf(stderr, "fs tree %llu refs %u %s\n",
3409 (unsigned long long)rec->objectid, rec->found_ref,
3410 rec->found_root_item ? "" : "not found");
3412 list_for_each_entry(backref, &rec->backrefs, list) {
3413 if (!backref->reachable)
3415 if (!backref->errors && rec->found_root_item)
3417 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3418 " index %llu namelen %u name %s errors %x\n",
3419 (unsigned long long)backref->ref_root,
3420 (unsigned long long)backref->dir,
3421 (unsigned long long)backref->index,
3422 backref->namelen, backref->name,
3424 print_ref_error(backref->errors);
3427 return errors > 0 ? 1 : 0;
3430 static int process_root_ref(struct extent_buffer *eb, int slot,
3431 struct btrfs_key *key,
3432 struct cache_tree *root_cache)
3438 struct btrfs_root_ref *ref;
3439 char namebuf[BTRFS_NAME_LEN];
3442 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444 dirid = btrfs_root_ref_dirid(eb, ref);
3445 index = btrfs_root_ref_sequence(eb, ref);
3446 name_len = btrfs_root_ref_name_len(eb, ref);
3448 if (name_len <= BTRFS_NAME_LEN) {
3452 len = BTRFS_NAME_LEN;
3453 error = REF_ERR_NAME_TOO_LONG;
3455 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457 if (key->type == BTRFS_ROOT_REF_KEY) {
3458 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3459 index, namebuf, len, key->type, error);
3461 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3462 index, namebuf, len, key->type, error);
3467 static void free_corrupt_block(struct cache_extent *cache)
3469 struct btrfs_corrupt_block *corrupt;
3471 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3475 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3478 * Repair the btree of the given root.
3480 * The fix is to remove the node key in corrupt_blocks cache_tree.
3481 * and rebalance the tree.
3482 * After the fix, the btree should be writeable.
3484 static int repair_btree(struct btrfs_root *root,
3485 struct cache_tree *corrupt_blocks)
3487 struct btrfs_trans_handle *trans;
3488 struct btrfs_path *path;
3489 struct btrfs_corrupt_block *corrupt;
3490 struct cache_extent *cache;
3491 struct btrfs_key key;
3496 if (cache_tree_empty(corrupt_blocks))
3499 path = btrfs_alloc_path();
3503 trans = btrfs_start_transaction(root, 1);
3504 if (IS_ERR(trans)) {
3505 ret = PTR_ERR(trans);
3506 fprintf(stderr, "Error starting transaction: %s\n",
3510 cache = first_cache_extent(corrupt_blocks);
3512 corrupt = container_of(cache, struct btrfs_corrupt_block,
3514 level = corrupt->level;
3515 path->lowest_level = level;
3516 key.objectid = corrupt->key.objectid;
3517 key.type = corrupt->key.type;
3518 key.offset = corrupt->key.offset;
3521 * Here we don't want to do any tree balance, since it may
3522 * cause a balance with corrupted brother leaf/node,
3523 * so ins_len set to 0 here.
3524 * Balance will be done after all corrupt node/leaf is deleted.
3526 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3529 offset = btrfs_node_blockptr(path->nodes[level],
3530 path->slots[level]);
3532 /* Remove the ptr */
3533 ret = btrfs_del_ptr(trans, root, path, level,
3534 path->slots[level]);
3538 * Remove the corresponding extent
3539 * return value is not concerned.
3541 btrfs_release_path(path);
3542 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3543 0, root->root_key.objectid,
3545 cache = next_cache_extent(cache);
3548 /* Balance the btree using btrfs_search_slot() */
3549 cache = first_cache_extent(corrupt_blocks);
3551 corrupt = container_of(cache, struct btrfs_corrupt_block,
3553 memcpy(&key, &corrupt->key, sizeof(key));
3554 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3557 /* return will always >0 since it won't find the item */
3559 btrfs_release_path(path);
3560 cache = next_cache_extent(cache);
3563 btrfs_commit_transaction(trans, root);
3565 btrfs_free_path(path);
3569 static int check_fs_root(struct btrfs_root *root,
3570 struct cache_tree *root_cache,
3571 struct walk_control *wc)
3577 struct btrfs_path path;
3578 struct shared_node root_node;
3579 struct root_record *rec;
3580 struct btrfs_root_item *root_item = &root->root_item;
3581 struct cache_tree corrupt_blocks;
3582 struct orphan_data_extent *orphan;
3583 struct orphan_data_extent *tmp;
3584 enum btrfs_tree_block_status status;
3585 struct node_refs nrefs;
3588 * Reuse the corrupt_block cache tree to record corrupted tree block
3590 * Unlike the usage in extent tree check, here we do it in a per
3591 * fs/subvol tree base.
3593 cache_tree_init(&corrupt_blocks);
3594 root->fs_info->corrupt_blocks = &corrupt_blocks;
3596 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3597 rec = get_root_rec(root_cache, root->root_key.objectid);
3598 BUG_ON(IS_ERR(rec));
3599 if (btrfs_root_refs(root_item) > 0)
3600 rec->found_root_item = 1;
3603 btrfs_init_path(&path);
3604 memset(&root_node, 0, sizeof(root_node));
3605 cache_tree_init(&root_node.root_cache);
3606 cache_tree_init(&root_node.inode_cache);
3607 memset(&nrefs, 0, sizeof(nrefs));
3609 /* Move the orphan extent record to corresponding inode_record */
3610 list_for_each_entry_safe(orphan, tmp,
3611 &root->orphan_data_extents, list) {
3612 struct inode_record *inode;
3614 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3616 BUG_ON(IS_ERR(inode));
3617 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3618 list_move(&orphan->list, &inode->orphan_extents);
3621 level = btrfs_header_level(root->node);
3622 memset(wc->nodes, 0, sizeof(wc->nodes));
3623 wc->nodes[level] = &root_node;
3624 wc->active_node = level;
3625 wc->root_level = level;
3627 /* We may not have checked the root block, lets do that now */
3628 if (btrfs_is_leaf(root->node))
3629 status = btrfs_check_leaf(root, NULL, root->node);
3631 status = btrfs_check_node(root, NULL, root->node);
3632 if (status != BTRFS_TREE_BLOCK_CLEAN)
3635 if (btrfs_root_refs(root_item) > 0 ||
3636 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3637 path.nodes[level] = root->node;
3638 extent_buffer_get(root->node);
3639 path.slots[level] = 0;
3641 struct btrfs_key key;
3642 struct btrfs_disk_key found_key;
3644 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3645 level = root_item->drop_level;
3646 path.lowest_level = level;
3647 if (level > btrfs_header_level(root->node) ||
3648 level >= BTRFS_MAX_LEVEL) {
3649 error("ignoring invalid drop level: %u", level);
3652 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3655 btrfs_node_key(path.nodes[level], &found_key,
3657 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3658 sizeof(found_key)));
3662 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3668 wret = walk_up_tree(root, &path, wc, &level);
3675 btrfs_release_path(&path);
3677 if (!cache_tree_empty(&corrupt_blocks)) {
3678 struct cache_extent *cache;
3679 struct btrfs_corrupt_block *corrupt;
3681 printf("The following tree block(s) is corrupted in tree %llu:\n",
3682 root->root_key.objectid);
3683 cache = first_cache_extent(&corrupt_blocks);
3685 corrupt = container_of(cache,
3686 struct btrfs_corrupt_block,
3688 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3689 cache->start, corrupt->level,
3690 corrupt->key.objectid, corrupt->key.type,
3691 corrupt->key.offset);
3692 cache = next_cache_extent(cache);
3695 printf("Try to repair the btree for root %llu\n",
3696 root->root_key.objectid);
3697 ret = repair_btree(root, &corrupt_blocks);
3699 fprintf(stderr, "Failed to repair btree: %s\n",
3702 printf("Btree for root %llu is fixed\n",
3703 root->root_key.objectid);
3707 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3711 if (root_node.current) {
3712 root_node.current->checked = 1;
3713 maybe_free_inode_rec(&root_node.inode_cache,
3717 err = check_inode_recs(root, &root_node.inode_cache);
3721 free_corrupt_blocks_tree(&corrupt_blocks);
3722 root->fs_info->corrupt_blocks = NULL;
3723 free_orphan_data_extents(&root->orphan_data_extents);
3727 static int fs_root_objectid(u64 objectid)
3729 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3730 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3732 return is_fstree(objectid);
3735 static int check_fs_roots(struct btrfs_root *root,
3736 struct cache_tree *root_cache)
3738 struct btrfs_path path;
3739 struct btrfs_key key;
3740 struct walk_control wc;
3741 struct extent_buffer *leaf, *tree_node;
3742 struct btrfs_root *tmp_root;
3743 struct btrfs_root *tree_root = root->fs_info->tree_root;
3747 if (ctx.progress_enabled) {
3748 ctx.tp = TASK_FS_ROOTS;
3749 task_start(ctx.info);
3753 * Just in case we made any changes to the extent tree that weren't
3754 * reflected into the free space cache yet.
3757 reset_cached_block_groups(root->fs_info);
3758 memset(&wc, 0, sizeof(wc));
3759 cache_tree_init(&wc.shared);
3760 btrfs_init_path(&path);
3765 key.type = BTRFS_ROOT_ITEM_KEY;
3766 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3771 tree_node = tree_root->node;
3773 if (tree_node != tree_root->node) {
3774 free_root_recs_tree(root_cache);
3775 btrfs_release_path(&path);
3778 leaf = path.nodes[0];
3779 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3780 ret = btrfs_next_leaf(tree_root, &path);
3786 leaf = path.nodes[0];
3788 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3789 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3790 fs_root_objectid(key.objectid)) {
3791 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3792 tmp_root = btrfs_read_fs_root_no_cache(
3793 root->fs_info, &key);
3795 key.offset = (u64)-1;
3796 tmp_root = btrfs_read_fs_root(
3797 root->fs_info, &key);
3799 if (IS_ERR(tmp_root)) {
3803 ret = check_fs_root(tmp_root, root_cache, &wc);
3804 if (ret == -EAGAIN) {
3805 free_root_recs_tree(root_cache);
3806 btrfs_release_path(&path);
3811 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3812 btrfs_free_fs_root(tmp_root);
3813 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3814 key.type == BTRFS_ROOT_BACKREF_KEY) {
3815 process_root_ref(leaf, path.slots[0], &key,
3822 btrfs_release_path(&path);
3824 free_extent_cache_tree(&wc.shared);
3825 if (!cache_tree_empty(&wc.shared))
3826 fprintf(stderr, "warning line %d\n", __LINE__);
3828 task_stop(ctx.info);
3833 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3835 struct list_head *cur = rec->backrefs.next;
3836 struct extent_backref *back;
3837 struct tree_backref *tback;
3838 struct data_backref *dback;
3842 while(cur != &rec->backrefs) {
3843 back = to_extent_backref(cur);
3845 if (!back->found_extent_tree) {
3849 if (back->is_data) {
3850 dback = to_data_backref(back);
3851 fprintf(stderr, "Backref %llu %s %llu"
3852 " owner %llu offset %llu num_refs %lu"
3853 " not found in extent tree\n",
3854 (unsigned long long)rec->start,
3855 back->full_backref ?
3857 back->full_backref ?
3858 (unsigned long long)dback->parent:
3859 (unsigned long long)dback->root,
3860 (unsigned long long)dback->owner,
3861 (unsigned long long)dback->offset,
3862 (unsigned long)dback->num_refs);
3864 tback = to_tree_backref(back);
3865 fprintf(stderr, "Backref %llu parent %llu"
3866 " root %llu not found in extent tree\n",
3867 (unsigned long long)rec->start,
3868 (unsigned long long)tback->parent,
3869 (unsigned long long)tback->root);
3872 if (!back->is_data && !back->found_ref) {
3876 tback = to_tree_backref(back);
3877 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3878 (unsigned long long)rec->start,
3879 back->full_backref ? "parent" : "root",
3880 back->full_backref ?
3881 (unsigned long long)tback->parent :
3882 (unsigned long long)tback->root, back);
3884 if (back->is_data) {
3885 dback = to_data_backref(back);
3886 if (dback->found_ref != dback->num_refs) {
3890 fprintf(stderr, "Incorrect local backref count"
3891 " on %llu %s %llu owner %llu"
3892 " offset %llu found %u wanted %u back %p\n",
3893 (unsigned long long)rec->start,
3894 back->full_backref ?
3896 back->full_backref ?
3897 (unsigned long long)dback->parent:
3898 (unsigned long long)dback->root,
3899 (unsigned long long)dback->owner,
3900 (unsigned long long)dback->offset,
3901 dback->found_ref, dback->num_refs, back);
3903 if (dback->disk_bytenr != rec->start) {
3907 fprintf(stderr, "Backref disk bytenr does not"
3908 " match extent record, bytenr=%llu, "
3909 "ref bytenr=%llu\n",
3910 (unsigned long long)rec->start,
3911 (unsigned long long)dback->disk_bytenr);
3914 if (dback->bytes != rec->nr) {
3918 fprintf(stderr, "Backref bytes do not match "
3919 "extent backref, bytenr=%llu, ref "
3920 "bytes=%llu, backref bytes=%llu\n",
3921 (unsigned long long)rec->start,
3922 (unsigned long long)rec->nr,
3923 (unsigned long long)dback->bytes);
3926 if (!back->is_data) {
3929 dback = to_data_backref(back);
3930 found += dback->found_ref;
3933 if (found != rec->refs) {
3937 fprintf(stderr, "Incorrect global backref count "
3938 "on %llu found %llu wanted %llu\n",
3939 (unsigned long long)rec->start,
3940 (unsigned long long)found,
3941 (unsigned long long)rec->refs);
3947 static int free_all_extent_backrefs(struct extent_record *rec)
3949 struct extent_backref *back;
3950 struct list_head *cur;
3951 while (!list_empty(&rec->backrefs)) {
3952 cur = rec->backrefs.next;
3953 back = to_extent_backref(cur);
3960 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3961 struct cache_tree *extent_cache)
3963 struct cache_extent *cache;
3964 struct extent_record *rec;
3967 cache = first_cache_extent(extent_cache);
3970 rec = container_of(cache, struct extent_record, cache);
3971 remove_cache_extent(extent_cache, cache);
3972 free_all_extent_backrefs(rec);
3977 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3978 struct extent_record *rec)
3980 if (rec->content_checked && rec->owner_ref_checked &&
3981 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3982 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3983 !rec->bad_full_backref && !rec->crossing_stripes &&
3984 !rec->wrong_chunk_type) {
3985 remove_cache_extent(extent_cache, &rec->cache);
3986 free_all_extent_backrefs(rec);
3987 list_del_init(&rec->list);
3993 static int check_owner_ref(struct btrfs_root *root,
3994 struct extent_record *rec,
3995 struct extent_buffer *buf)
3997 struct extent_backref *node;
3998 struct tree_backref *back;
3999 struct btrfs_root *ref_root;
4000 struct btrfs_key key;
4001 struct btrfs_path path;
4002 struct extent_buffer *parent;
4007 list_for_each_entry(node, &rec->backrefs, list) {
4010 if (!node->found_ref)
4012 if (node->full_backref)
4014 back = to_tree_backref(node);
4015 if (btrfs_header_owner(buf) == back->root)
4018 BUG_ON(rec->is_root);
4020 /* try to find the block by search corresponding fs tree */
4021 key.objectid = btrfs_header_owner(buf);
4022 key.type = BTRFS_ROOT_ITEM_KEY;
4023 key.offset = (u64)-1;
4025 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4026 if (IS_ERR(ref_root))
4029 level = btrfs_header_level(buf);
4031 btrfs_item_key_to_cpu(buf, &key, 0);
4033 btrfs_node_key_to_cpu(buf, &key, 0);
4035 btrfs_init_path(&path);
4036 path.lowest_level = level + 1;
4037 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4041 parent = path.nodes[level + 1];
4042 if (parent && buf->start == btrfs_node_blockptr(parent,
4043 path.slots[level + 1]))
4046 btrfs_release_path(&path);
4047 return found ? 0 : 1;
4050 static int is_extent_tree_record(struct extent_record *rec)
4052 struct list_head *cur = rec->backrefs.next;
4053 struct extent_backref *node;
4054 struct tree_backref *back;
4057 while(cur != &rec->backrefs) {
4058 node = to_extent_backref(cur);
4062 back = to_tree_backref(node);
4063 if (node->full_backref)
4065 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4072 static int record_bad_block_io(struct btrfs_fs_info *info,
4073 struct cache_tree *extent_cache,
4076 struct extent_record *rec;
4077 struct cache_extent *cache;
4078 struct btrfs_key key;
4080 cache = lookup_cache_extent(extent_cache, start, len);
4084 rec = container_of(cache, struct extent_record, cache);
4085 if (!is_extent_tree_record(rec))
4088 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4089 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4092 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4093 struct extent_buffer *buf, int slot)
4095 if (btrfs_header_level(buf)) {
4096 struct btrfs_key_ptr ptr1, ptr2;
4098 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4099 sizeof(struct btrfs_key_ptr));
4100 read_extent_buffer(buf, &ptr2,
4101 btrfs_node_key_ptr_offset(slot + 1),
4102 sizeof(struct btrfs_key_ptr));
4103 write_extent_buffer(buf, &ptr1,
4104 btrfs_node_key_ptr_offset(slot + 1),
4105 sizeof(struct btrfs_key_ptr));
4106 write_extent_buffer(buf, &ptr2,
4107 btrfs_node_key_ptr_offset(slot),
4108 sizeof(struct btrfs_key_ptr));
4110 struct btrfs_disk_key key;
4111 btrfs_node_key(buf, &key, 0);
4112 btrfs_fixup_low_keys(root, path, &key,
4113 btrfs_header_level(buf) + 1);
4116 struct btrfs_item *item1, *item2;
4117 struct btrfs_key k1, k2;
4118 char *item1_data, *item2_data;
4119 u32 item1_offset, item2_offset, item1_size, item2_size;
4121 item1 = btrfs_item_nr(slot);
4122 item2 = btrfs_item_nr(slot + 1);
4123 btrfs_item_key_to_cpu(buf, &k1, slot);
4124 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4125 item1_offset = btrfs_item_offset(buf, item1);
4126 item2_offset = btrfs_item_offset(buf, item2);
4127 item1_size = btrfs_item_size(buf, item1);
4128 item2_size = btrfs_item_size(buf, item2);
4130 item1_data = malloc(item1_size);
4133 item2_data = malloc(item2_size);
4139 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4140 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4142 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4143 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4147 btrfs_set_item_offset(buf, item1, item2_offset);
4148 btrfs_set_item_offset(buf, item2, item1_offset);
4149 btrfs_set_item_size(buf, item1, item2_size);
4150 btrfs_set_item_size(buf, item2, item1_size);
4152 path->slots[0] = slot;
4153 btrfs_set_item_key_unsafe(root, path, &k2);
4154 path->slots[0] = slot + 1;
4155 btrfs_set_item_key_unsafe(root, path, &k1);
4160 static int fix_key_order(struct btrfs_trans_handle *trans,
4161 struct btrfs_root *root,
4162 struct btrfs_path *path)
4164 struct extent_buffer *buf;
4165 struct btrfs_key k1, k2;
4167 int level = path->lowest_level;
4170 buf = path->nodes[level];
4171 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4173 btrfs_node_key_to_cpu(buf, &k1, i);
4174 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4176 btrfs_item_key_to_cpu(buf, &k1, i);
4177 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4179 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4181 ret = swap_values(root, path, buf, i);
4184 btrfs_mark_buffer_dirty(buf);
4190 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4191 struct btrfs_root *root,
4192 struct btrfs_path *path,
4193 struct extent_buffer *buf, int slot)
4195 struct btrfs_key key;
4196 int nritems = btrfs_header_nritems(buf);
4198 btrfs_item_key_to_cpu(buf, &key, slot);
4200 /* These are all the keys we can deal with missing. */
4201 if (key.type != BTRFS_DIR_INDEX_KEY &&
4202 key.type != BTRFS_EXTENT_ITEM_KEY &&
4203 key.type != BTRFS_METADATA_ITEM_KEY &&
4204 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4205 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4208 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4209 (unsigned long long)key.objectid, key.type,
4210 (unsigned long long)key.offset, slot, buf->start);
4211 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4212 btrfs_item_nr_offset(slot + 1),
4213 sizeof(struct btrfs_item) *
4214 (nritems - slot - 1));
4215 btrfs_set_header_nritems(buf, nritems - 1);
4217 struct btrfs_disk_key disk_key;
4219 btrfs_item_key(buf, &disk_key, 0);
4220 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4222 btrfs_mark_buffer_dirty(buf);
4226 static int fix_item_offset(struct btrfs_trans_handle *trans,
4227 struct btrfs_root *root,
4228 struct btrfs_path *path)
4230 struct extent_buffer *buf;
4234 /* We should only get this for leaves */
4235 BUG_ON(path->lowest_level);
4236 buf = path->nodes[0];
4238 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4239 unsigned int shift = 0, offset;
4241 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4242 BTRFS_LEAF_DATA_SIZE(root)) {
4243 if (btrfs_item_end_nr(buf, i) >
4244 BTRFS_LEAF_DATA_SIZE(root)) {
4245 ret = delete_bogus_item(trans, root, path,
4249 fprintf(stderr, "item is off the end of the "
4250 "leaf, can't fix\n");
4254 shift = BTRFS_LEAF_DATA_SIZE(root) -
4255 btrfs_item_end_nr(buf, i);
4256 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4257 btrfs_item_offset_nr(buf, i - 1)) {
4258 if (btrfs_item_end_nr(buf, i) >
4259 btrfs_item_offset_nr(buf, i - 1)) {
4260 ret = delete_bogus_item(trans, root, path,
4264 fprintf(stderr, "items overlap, can't fix\n");
4268 shift = btrfs_item_offset_nr(buf, i - 1) -
4269 btrfs_item_end_nr(buf, i);
4274 printf("Shifting item nr %d by %u bytes in block %llu\n",
4275 i, shift, (unsigned long long)buf->start);
4276 offset = btrfs_item_offset_nr(buf, i);
4277 memmove_extent_buffer(buf,
4278 btrfs_leaf_data(buf) + offset + shift,
4279 btrfs_leaf_data(buf) + offset,
4280 btrfs_item_size_nr(buf, i));
4281 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4283 btrfs_mark_buffer_dirty(buf);
4287 * We may have moved things, in which case we want to exit so we don't
4288 * write those changes out. Once we have proper abort functionality in
4289 * progs this can be changed to something nicer.
4296 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4297 * then just return -EIO.
4299 static int try_to_fix_bad_block(struct btrfs_root *root,
4300 struct extent_buffer *buf,
4301 enum btrfs_tree_block_status status)
4303 struct btrfs_trans_handle *trans;
4304 struct ulist *roots;
4305 struct ulist_node *node;
4306 struct btrfs_root *search_root;
4307 struct btrfs_path *path;
4308 struct ulist_iterator iter;
4309 struct btrfs_key root_key, key;
4312 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4313 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4316 path = btrfs_alloc_path();
4320 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4323 btrfs_free_path(path);
4327 ULIST_ITER_INIT(&iter);
4328 while ((node = ulist_next(roots, &iter))) {
4329 root_key.objectid = node->val;
4330 root_key.type = BTRFS_ROOT_ITEM_KEY;
4331 root_key.offset = (u64)-1;
4333 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4340 trans = btrfs_start_transaction(search_root, 0);
4341 if (IS_ERR(trans)) {
4342 ret = PTR_ERR(trans);
4346 path->lowest_level = btrfs_header_level(buf);
4347 path->skip_check_block = 1;
4348 if (path->lowest_level)
4349 btrfs_node_key_to_cpu(buf, &key, 0);
4351 btrfs_item_key_to_cpu(buf, &key, 0);
4352 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4355 btrfs_commit_transaction(trans, search_root);
4358 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4359 ret = fix_key_order(trans, search_root, path);
4360 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4361 ret = fix_item_offset(trans, search_root, path);
4363 btrfs_commit_transaction(trans, search_root);
4366 btrfs_release_path(path);
4367 btrfs_commit_transaction(trans, search_root);
4370 btrfs_free_path(path);
4374 static int check_block(struct btrfs_root *root,
4375 struct cache_tree *extent_cache,
4376 struct extent_buffer *buf, u64 flags)
4378 struct extent_record *rec;
4379 struct cache_extent *cache;
4380 struct btrfs_key key;
4381 enum btrfs_tree_block_status status;
4385 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4388 rec = container_of(cache, struct extent_record, cache);
4389 rec->generation = btrfs_header_generation(buf);
4391 level = btrfs_header_level(buf);
4392 if (btrfs_header_nritems(buf) > 0) {
4395 btrfs_item_key_to_cpu(buf, &key, 0);
4397 btrfs_node_key_to_cpu(buf, &key, 0);
4399 rec->info_objectid = key.objectid;
4401 rec->info_level = level;
4403 if (btrfs_is_leaf(buf))
4404 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4406 status = btrfs_check_node(root, &rec->parent_key, buf);
4408 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4410 status = try_to_fix_bad_block(root, buf, status);
4411 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4413 fprintf(stderr, "bad block %llu\n",
4414 (unsigned long long)buf->start);
4417 * Signal to callers we need to start the scan over
4418 * again since we'll have cowed blocks.
4423 rec->content_checked = 1;
4424 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4425 rec->owner_ref_checked = 1;
4427 ret = check_owner_ref(root, rec, buf);
4429 rec->owner_ref_checked = 1;
4433 maybe_free_extent_rec(extent_cache, rec);
4437 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4438 u64 parent, u64 root)
4440 struct list_head *cur = rec->backrefs.next;
4441 struct extent_backref *node;
4442 struct tree_backref *back;
4444 while(cur != &rec->backrefs) {
4445 node = to_extent_backref(cur);
4449 back = to_tree_backref(node);
4451 if (!node->full_backref)
4453 if (parent == back->parent)
4456 if (node->full_backref)
4458 if (back->root == root)
4465 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4466 u64 parent, u64 root)
4468 struct tree_backref *ref = malloc(sizeof(*ref));
4472 memset(&ref->node, 0, sizeof(ref->node));
4474 ref->parent = parent;
4475 ref->node.full_backref = 1;
4478 ref->node.full_backref = 0;
4480 list_add_tail(&ref->node.list, &rec->backrefs);
4485 static struct data_backref *find_data_backref(struct extent_record *rec,
4486 u64 parent, u64 root,
4487 u64 owner, u64 offset,
4489 u64 disk_bytenr, u64 bytes)
4491 struct list_head *cur = rec->backrefs.next;
4492 struct extent_backref *node;
4493 struct data_backref *back;
4495 while(cur != &rec->backrefs) {
4496 node = to_extent_backref(cur);
4500 back = to_data_backref(node);
4502 if (!node->full_backref)
4504 if (parent == back->parent)
4507 if (node->full_backref)
4509 if (back->root == root && back->owner == owner &&
4510 back->offset == offset) {
4511 if (found_ref && node->found_ref &&
4512 (back->bytes != bytes ||
4513 back->disk_bytenr != disk_bytenr))
4522 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4523 u64 parent, u64 root,
4524 u64 owner, u64 offset,
4527 struct data_backref *ref = malloc(sizeof(*ref));
4531 memset(&ref->node, 0, sizeof(ref->node));
4532 ref->node.is_data = 1;
4535 ref->parent = parent;
4538 ref->node.full_backref = 1;
4542 ref->offset = offset;
4543 ref->node.full_backref = 0;
4545 ref->bytes = max_size;
4548 list_add_tail(&ref->node.list, &rec->backrefs);
4549 if (max_size > rec->max_size)
4550 rec->max_size = max_size;
4554 /* Check if the type of extent matches with its chunk */
4555 static void check_extent_type(struct extent_record *rec)
4557 struct btrfs_block_group_cache *bg_cache;
4559 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4563 /* data extent, check chunk directly*/
4564 if (!rec->metadata) {
4565 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4566 rec->wrong_chunk_type = 1;
4570 /* metadata extent, check the obvious case first */
4571 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4572 BTRFS_BLOCK_GROUP_METADATA))) {
4573 rec->wrong_chunk_type = 1;
4578 * Check SYSTEM extent, as it's also marked as metadata, we can only
4579 * make sure it's a SYSTEM extent by its backref
4581 if (!list_empty(&rec->backrefs)) {
4582 struct extent_backref *node;
4583 struct tree_backref *tback;
4586 node = to_extent_backref(rec->backrefs.next);
4587 if (node->is_data) {
4588 /* tree block shouldn't have data backref */
4589 rec->wrong_chunk_type = 1;
4592 tback = container_of(node, struct tree_backref, node);
4594 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4595 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4597 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4598 if (!(bg_cache->flags & bg_type))
4599 rec->wrong_chunk_type = 1;
4604 * Allocate a new extent record, fill default values from @tmpl and insert int
4605 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4606 * the cache, otherwise it fails.
4608 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4609 struct extent_record *tmpl)
4611 struct extent_record *rec;
4614 rec = malloc(sizeof(*rec));
4617 rec->start = tmpl->start;
4618 rec->max_size = tmpl->max_size;
4619 rec->nr = max(tmpl->nr, tmpl->max_size);
4620 rec->found_rec = tmpl->found_rec;
4621 rec->content_checked = tmpl->content_checked;
4622 rec->owner_ref_checked = tmpl->owner_ref_checked;
4623 rec->num_duplicates = 0;
4624 rec->metadata = tmpl->metadata;
4625 rec->flag_block_full_backref = FLAG_UNSET;
4626 rec->bad_full_backref = 0;
4627 rec->crossing_stripes = 0;
4628 rec->wrong_chunk_type = 0;
4629 rec->is_root = tmpl->is_root;
4630 rec->refs = tmpl->refs;
4631 rec->extent_item_refs = tmpl->extent_item_refs;
4632 rec->parent_generation = tmpl->parent_generation;
4633 INIT_LIST_HEAD(&rec->backrefs);
4634 INIT_LIST_HEAD(&rec->dups);
4635 INIT_LIST_HEAD(&rec->list);
4636 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4637 rec->cache.start = tmpl->start;
4638 rec->cache.size = tmpl->nr;
4639 ret = insert_cache_extent(extent_cache, &rec->cache);
4644 bytes_used += rec->nr;
4647 rec->crossing_stripes = check_crossing_stripes(global_info,
4648 rec->start, global_info->tree_root->nodesize);
4649 check_extent_type(rec);
4654 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4656 * - refs - if found, increase refs
4657 * - is_root - if found, set
4658 * - content_checked - if found, set
4659 * - owner_ref_checked - if found, set
4661 * If not found, create a new one, initialize and insert.
4663 static int add_extent_rec(struct cache_tree *extent_cache,
4664 struct extent_record *tmpl)
4666 struct extent_record *rec;
4667 struct cache_extent *cache;
4671 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4673 rec = container_of(cache, struct extent_record, cache);
4677 rec->nr = max(tmpl->nr, tmpl->max_size);
4680 * We need to make sure to reset nr to whatever the extent
4681 * record says was the real size, this way we can compare it to
4684 if (tmpl->found_rec) {
4685 if (tmpl->start != rec->start || rec->found_rec) {
4686 struct extent_record *tmp;
4689 if (list_empty(&rec->list))
4690 list_add_tail(&rec->list,
4691 &duplicate_extents);
4694 * We have to do this song and dance in case we
4695 * find an extent record that falls inside of
4696 * our current extent record but does not have
4697 * the same objectid.
4699 tmp = malloc(sizeof(*tmp));
4702 tmp->start = tmpl->start;
4703 tmp->max_size = tmpl->max_size;
4706 tmp->metadata = tmpl->metadata;
4707 tmp->extent_item_refs = tmpl->extent_item_refs;
4708 INIT_LIST_HEAD(&tmp->list);
4709 list_add_tail(&tmp->list, &rec->dups);
4710 rec->num_duplicates++;
4717 if (tmpl->extent_item_refs && !dup) {
4718 if (rec->extent_item_refs) {
4719 fprintf(stderr, "block %llu rec "
4720 "extent_item_refs %llu, passed %llu\n",
4721 (unsigned long long)tmpl->start,
4722 (unsigned long long)
4723 rec->extent_item_refs,
4724 (unsigned long long)tmpl->extent_item_refs);
4726 rec->extent_item_refs = tmpl->extent_item_refs;
4730 if (tmpl->content_checked)
4731 rec->content_checked = 1;
4732 if (tmpl->owner_ref_checked)
4733 rec->owner_ref_checked = 1;
4734 memcpy(&rec->parent_key, &tmpl->parent_key,
4735 sizeof(tmpl->parent_key));
4736 if (tmpl->parent_generation)
4737 rec->parent_generation = tmpl->parent_generation;
4738 if (rec->max_size < tmpl->max_size)
4739 rec->max_size = tmpl->max_size;
4742 * A metadata extent can't cross stripe_len boundary, otherwise
4743 * kernel scrub won't be able to handle it.
4744 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4748 rec->crossing_stripes = check_crossing_stripes(
4749 global_info, rec->start,
4750 global_info->tree_root->nodesize);
4751 check_extent_type(rec);
4752 maybe_free_extent_rec(extent_cache, rec);
4756 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4761 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4762 u64 parent, u64 root, int found_ref)
4764 struct extent_record *rec;
4765 struct tree_backref *back;
4766 struct cache_extent *cache;
4769 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4771 struct extent_record tmpl;
4773 memset(&tmpl, 0, sizeof(tmpl));
4774 tmpl.start = bytenr;
4778 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4782 /* really a bug in cache_extent implement now */
4783 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4788 rec = container_of(cache, struct extent_record, cache);
4789 if (rec->start != bytenr) {
4791 * Several cause, from unaligned bytenr to over lapping extents
4796 back = find_tree_backref(rec, parent, root);
4798 back = alloc_tree_backref(rec, parent, root);
4804 if (back->node.found_ref) {
4805 fprintf(stderr, "Extent back ref already exists "
4806 "for %llu parent %llu root %llu \n",
4807 (unsigned long long)bytenr,
4808 (unsigned long long)parent,
4809 (unsigned long long)root);
4811 back->node.found_ref = 1;
4813 if (back->node.found_extent_tree) {
4814 fprintf(stderr, "Extent back ref already exists "
4815 "for %llu parent %llu root %llu \n",
4816 (unsigned long long)bytenr,
4817 (unsigned long long)parent,
4818 (unsigned long long)root);
4820 back->node.found_extent_tree = 1;
4822 check_extent_type(rec);
4823 maybe_free_extent_rec(extent_cache, rec);
4827 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4828 u64 parent, u64 root, u64 owner, u64 offset,
4829 u32 num_refs, int found_ref, u64 max_size)
4831 struct extent_record *rec;
4832 struct data_backref *back;
4833 struct cache_extent *cache;
4836 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4838 struct extent_record tmpl;
4840 memset(&tmpl, 0, sizeof(tmpl));
4841 tmpl.start = bytenr;
4843 tmpl.max_size = max_size;
4845 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4849 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4854 rec = container_of(cache, struct extent_record, cache);
4855 if (rec->max_size < max_size)
4856 rec->max_size = max_size;
4859 * If found_ref is set then max_size is the real size and must match the
4860 * existing refs. So if we have already found a ref then we need to
4861 * make sure that this ref matches the existing one, otherwise we need
4862 * to add a new backref so we can notice that the backrefs don't match
4863 * and we need to figure out who is telling the truth. This is to
4864 * account for that awful fsync bug I introduced where we'd end up with
4865 * a btrfs_file_extent_item that would have its length include multiple
4866 * prealloc extents or point inside of a prealloc extent.
4868 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4871 back = alloc_data_backref(rec, parent, root, owner, offset,
4877 BUG_ON(num_refs != 1);
4878 if (back->node.found_ref)
4879 BUG_ON(back->bytes != max_size);
4880 back->node.found_ref = 1;
4881 back->found_ref += 1;
4882 back->bytes = max_size;
4883 back->disk_bytenr = bytenr;
4885 rec->content_checked = 1;
4886 rec->owner_ref_checked = 1;
4888 if (back->node.found_extent_tree) {
4889 fprintf(stderr, "Extent back ref already exists "
4890 "for %llu parent %llu root %llu "
4891 "owner %llu offset %llu num_refs %lu\n",
4892 (unsigned long long)bytenr,
4893 (unsigned long long)parent,
4894 (unsigned long long)root,
4895 (unsigned long long)owner,
4896 (unsigned long long)offset,
4897 (unsigned long)num_refs);
4899 back->num_refs = num_refs;
4900 back->node.found_extent_tree = 1;
4902 maybe_free_extent_rec(extent_cache, rec);
4906 static int add_pending(struct cache_tree *pending,
4907 struct cache_tree *seen, u64 bytenr, u32 size)
4910 ret = add_cache_extent(seen, bytenr, size);
4913 add_cache_extent(pending, bytenr, size);
4917 static int pick_next_pending(struct cache_tree *pending,
4918 struct cache_tree *reada,
4919 struct cache_tree *nodes,
4920 u64 last, struct block_info *bits, int bits_nr,
4923 unsigned long node_start = last;
4924 struct cache_extent *cache;
4927 cache = search_cache_extent(reada, 0);
4929 bits[0].start = cache->start;
4930 bits[0].size = cache->size;
4935 if (node_start > 32768)
4936 node_start -= 32768;
4938 cache = search_cache_extent(nodes, node_start);
4940 cache = search_cache_extent(nodes, 0);
4943 cache = search_cache_extent(pending, 0);
4948 bits[ret].start = cache->start;
4949 bits[ret].size = cache->size;
4950 cache = next_cache_extent(cache);
4952 } while (cache && ret < bits_nr);
4958 bits[ret].start = cache->start;
4959 bits[ret].size = cache->size;
4960 cache = next_cache_extent(cache);
4962 } while (cache && ret < bits_nr);
4964 if (bits_nr - ret > 8) {
4965 u64 lookup = bits[0].start + bits[0].size;
4966 struct cache_extent *next;
4967 next = search_cache_extent(pending, lookup);
4969 if (next->start - lookup > 32768)
4971 bits[ret].start = next->start;
4972 bits[ret].size = next->size;
4973 lookup = next->start + next->size;
4977 next = next_cache_extent(next);
4985 static void free_chunk_record(struct cache_extent *cache)
4987 struct chunk_record *rec;
4989 rec = container_of(cache, struct chunk_record, cache);
4990 list_del_init(&rec->list);
4991 list_del_init(&rec->dextents);
4995 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4997 cache_tree_free_extents(chunk_cache, free_chunk_record);
5000 static void free_device_record(struct rb_node *node)
5002 struct device_record *rec;
5004 rec = container_of(node, struct device_record, node);
5008 FREE_RB_BASED_TREE(device_cache, free_device_record);
5010 int insert_block_group_record(struct block_group_tree *tree,
5011 struct block_group_record *bg_rec)
5015 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5019 list_add_tail(&bg_rec->list, &tree->block_groups);
5023 static void free_block_group_record(struct cache_extent *cache)
5025 struct block_group_record *rec;
5027 rec = container_of(cache, struct block_group_record, cache);
5028 list_del_init(&rec->list);
5032 void free_block_group_tree(struct block_group_tree *tree)
5034 cache_tree_free_extents(&tree->tree, free_block_group_record);
5037 int insert_device_extent_record(struct device_extent_tree *tree,
5038 struct device_extent_record *de_rec)
5043 * Device extent is a bit different from the other extents, because
5044 * the extents which belong to the different devices may have the
5045 * same start and size, so we need use the special extent cache
5046 * search/insert functions.
5048 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5052 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5053 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5057 static void free_device_extent_record(struct cache_extent *cache)
5059 struct device_extent_record *rec;
5061 rec = container_of(cache, struct device_extent_record, cache);
5062 if (!list_empty(&rec->chunk_list))
5063 list_del_init(&rec->chunk_list);
5064 if (!list_empty(&rec->device_list))
5065 list_del_init(&rec->device_list);
5069 void free_device_extent_tree(struct device_extent_tree *tree)
5071 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5074 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5075 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5076 struct extent_buffer *leaf, int slot)
5078 struct btrfs_extent_ref_v0 *ref0;
5079 struct btrfs_key key;
5082 btrfs_item_key_to_cpu(leaf, &key, slot);
5083 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5084 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5085 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5088 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5089 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5095 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5096 struct btrfs_key *key,
5099 struct btrfs_chunk *ptr;
5100 struct chunk_record *rec;
5103 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5104 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5106 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5108 fprintf(stderr, "memory allocation failed\n");
5112 INIT_LIST_HEAD(&rec->list);
5113 INIT_LIST_HEAD(&rec->dextents);
5116 rec->cache.start = key->offset;
5117 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5119 rec->generation = btrfs_header_generation(leaf);
5121 rec->objectid = key->objectid;
5122 rec->type = key->type;
5123 rec->offset = key->offset;
5125 rec->length = rec->cache.size;
5126 rec->owner = btrfs_chunk_owner(leaf, ptr);
5127 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5128 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5129 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5130 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5131 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5132 rec->num_stripes = num_stripes;
5133 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5135 for (i = 0; i < rec->num_stripes; ++i) {
5136 rec->stripes[i].devid =
5137 btrfs_stripe_devid_nr(leaf, ptr, i);
5138 rec->stripes[i].offset =
5139 btrfs_stripe_offset_nr(leaf, ptr, i);
5140 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5141 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5148 static int process_chunk_item(struct cache_tree *chunk_cache,
5149 struct btrfs_key *key, struct extent_buffer *eb,
5152 struct chunk_record *rec;
5153 struct btrfs_chunk *chunk;
5156 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5158 * Do extra check for this chunk item,
5160 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5161 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5162 * and owner<->key_type check.
5164 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5167 error("chunk(%llu, %llu) is not valid, ignore it",
5168 key->offset, btrfs_chunk_length(eb, chunk));
5171 rec = btrfs_new_chunk_record(eb, key, slot);
5172 ret = insert_cache_extent(chunk_cache, &rec->cache);
5174 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5175 rec->offset, rec->length);
5182 static int process_device_item(struct rb_root *dev_cache,
5183 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5185 struct btrfs_dev_item *ptr;
5186 struct device_record *rec;
5189 ptr = btrfs_item_ptr(eb,
5190 slot, struct btrfs_dev_item);
5192 rec = malloc(sizeof(*rec));
5194 fprintf(stderr, "memory allocation failed\n");
5198 rec->devid = key->offset;
5199 rec->generation = btrfs_header_generation(eb);
5201 rec->objectid = key->objectid;
5202 rec->type = key->type;
5203 rec->offset = key->offset;
5205 rec->devid = btrfs_device_id(eb, ptr);
5206 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5207 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5209 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5211 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5218 struct block_group_record *
5219 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5222 struct btrfs_block_group_item *ptr;
5223 struct block_group_record *rec;
5225 rec = calloc(1, sizeof(*rec));
5227 fprintf(stderr, "memory allocation failed\n");
5231 rec->cache.start = key->objectid;
5232 rec->cache.size = key->offset;
5234 rec->generation = btrfs_header_generation(leaf);
5236 rec->objectid = key->objectid;
5237 rec->type = key->type;
5238 rec->offset = key->offset;
5240 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5241 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5243 INIT_LIST_HEAD(&rec->list);
5248 static int process_block_group_item(struct block_group_tree *block_group_cache,
5249 struct btrfs_key *key,
5250 struct extent_buffer *eb, int slot)
5252 struct block_group_record *rec;
5255 rec = btrfs_new_block_group_record(eb, key, slot);
5256 ret = insert_block_group_record(block_group_cache, rec);
5258 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5259 rec->objectid, rec->offset);
5266 struct device_extent_record *
5267 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5268 struct btrfs_key *key, int slot)
5270 struct device_extent_record *rec;
5271 struct btrfs_dev_extent *ptr;
5273 rec = calloc(1, sizeof(*rec));
5275 fprintf(stderr, "memory allocation failed\n");
5279 rec->cache.objectid = key->objectid;
5280 rec->cache.start = key->offset;
5282 rec->generation = btrfs_header_generation(leaf);
5284 rec->objectid = key->objectid;
5285 rec->type = key->type;
5286 rec->offset = key->offset;
5288 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5289 rec->chunk_objecteid =
5290 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5292 btrfs_dev_extent_chunk_offset(leaf, ptr);
5293 rec->length = btrfs_dev_extent_length(leaf, ptr);
5294 rec->cache.size = rec->length;
5296 INIT_LIST_HEAD(&rec->chunk_list);
5297 INIT_LIST_HEAD(&rec->device_list);
5303 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5304 struct btrfs_key *key, struct extent_buffer *eb,
5307 struct device_extent_record *rec;
5310 rec = btrfs_new_device_extent_record(eb, key, slot);
5311 ret = insert_device_extent_record(dev_extent_cache, rec);
5314 "Device extent[%llu, %llu, %llu] existed.\n",
5315 rec->objectid, rec->offset, rec->length);
5322 static int process_extent_item(struct btrfs_root *root,
5323 struct cache_tree *extent_cache,
5324 struct extent_buffer *eb, int slot)
5326 struct btrfs_extent_item *ei;
5327 struct btrfs_extent_inline_ref *iref;
5328 struct btrfs_extent_data_ref *dref;
5329 struct btrfs_shared_data_ref *sref;
5330 struct btrfs_key key;
5331 struct extent_record tmpl;
5336 u32 item_size = btrfs_item_size_nr(eb, slot);
5342 btrfs_item_key_to_cpu(eb, &key, slot);
5344 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5346 num_bytes = root->nodesize;
5348 num_bytes = key.offset;
5351 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5352 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5353 key.objectid, root->sectorsize);
5356 if (item_size < sizeof(*ei)) {
5357 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5358 struct btrfs_extent_item_v0 *ei0;
5359 BUG_ON(item_size != sizeof(*ei0));
5360 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5361 refs = btrfs_extent_refs_v0(eb, ei0);
5365 memset(&tmpl, 0, sizeof(tmpl));
5366 tmpl.start = key.objectid;
5367 tmpl.nr = num_bytes;
5368 tmpl.extent_item_refs = refs;
5369 tmpl.metadata = metadata;
5371 tmpl.max_size = num_bytes;
5373 return add_extent_rec(extent_cache, &tmpl);
5376 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5377 refs = btrfs_extent_refs(eb, ei);
5378 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5382 if (metadata && num_bytes != root->nodesize) {
5383 error("ignore invalid metadata extent, length %llu does not equal to %u",
5384 num_bytes, root->nodesize);
5387 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5388 error("ignore invalid data extent, length %llu is not aligned to %u",
5389 num_bytes, root->sectorsize);
5393 memset(&tmpl, 0, sizeof(tmpl));
5394 tmpl.start = key.objectid;
5395 tmpl.nr = num_bytes;
5396 tmpl.extent_item_refs = refs;
5397 tmpl.metadata = metadata;
5399 tmpl.max_size = num_bytes;
5400 add_extent_rec(extent_cache, &tmpl);
5402 ptr = (unsigned long)(ei + 1);
5403 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5404 key.type == BTRFS_EXTENT_ITEM_KEY)
5405 ptr += sizeof(struct btrfs_tree_block_info);
5407 end = (unsigned long)ei + item_size;
5409 iref = (struct btrfs_extent_inline_ref *)ptr;
5410 type = btrfs_extent_inline_ref_type(eb, iref);
5411 offset = btrfs_extent_inline_ref_offset(eb, iref);
5413 case BTRFS_TREE_BLOCK_REF_KEY:
5414 ret = add_tree_backref(extent_cache, key.objectid,
5417 error("add_tree_backref failed: %s",
5420 case BTRFS_SHARED_BLOCK_REF_KEY:
5421 ret = add_tree_backref(extent_cache, key.objectid,
5424 error("add_tree_backref failed: %s",
5427 case BTRFS_EXTENT_DATA_REF_KEY:
5428 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5429 add_data_backref(extent_cache, key.objectid, 0,
5430 btrfs_extent_data_ref_root(eb, dref),
5431 btrfs_extent_data_ref_objectid(eb,
5433 btrfs_extent_data_ref_offset(eb, dref),
5434 btrfs_extent_data_ref_count(eb, dref),
5437 case BTRFS_SHARED_DATA_REF_KEY:
5438 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5439 add_data_backref(extent_cache, key.objectid, offset,
5441 btrfs_shared_data_ref_count(eb, sref),
5445 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5446 key.objectid, key.type, num_bytes);
5449 ptr += btrfs_extent_inline_ref_size(type);
5456 static int check_cache_range(struct btrfs_root *root,
5457 struct btrfs_block_group_cache *cache,
5458 u64 offset, u64 bytes)
5460 struct btrfs_free_space *entry;
5466 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5467 bytenr = btrfs_sb_offset(i);
5468 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5469 cache->key.objectid, bytenr, 0,
5470 &logical, &nr, &stripe_len);
5475 if (logical[nr] + stripe_len <= offset)
5477 if (offset + bytes <= logical[nr])
5479 if (logical[nr] == offset) {
5480 if (stripe_len >= bytes) {
5484 bytes -= stripe_len;
5485 offset += stripe_len;
5486 } else if (logical[nr] < offset) {
5487 if (logical[nr] + stripe_len >=
5492 bytes = (offset + bytes) -
5493 (logical[nr] + stripe_len);
5494 offset = logical[nr] + stripe_len;
5497 * Could be tricky, the super may land in the
5498 * middle of the area we're checking. First
5499 * check the easiest case, it's at the end.
5501 if (logical[nr] + stripe_len >=
5503 bytes = logical[nr] - offset;
5507 /* Check the left side */
5508 ret = check_cache_range(root, cache,
5510 logical[nr] - offset);
5516 /* Now we continue with the right side */
5517 bytes = (offset + bytes) -
5518 (logical[nr] + stripe_len);
5519 offset = logical[nr] + stripe_len;
5526 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5528 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5529 offset, offset+bytes);
5533 if (entry->offset != offset) {
5534 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5539 if (entry->bytes != bytes) {
5540 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5541 bytes, entry->bytes, offset);
5545 unlink_free_space(cache->free_space_ctl, entry);
5550 static int verify_space_cache(struct btrfs_root *root,
5551 struct btrfs_block_group_cache *cache)
5553 struct btrfs_path *path;
5554 struct extent_buffer *leaf;
5555 struct btrfs_key key;
5559 path = btrfs_alloc_path();
5563 root = root->fs_info->extent_root;
5565 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5567 key.objectid = last;
5569 key.type = BTRFS_EXTENT_ITEM_KEY;
5571 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5576 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5577 ret = btrfs_next_leaf(root, path);
5585 leaf = path->nodes[0];
5586 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5587 if (key.objectid >= cache->key.offset + cache->key.objectid)
5589 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5590 key.type != BTRFS_METADATA_ITEM_KEY) {
5595 if (last == key.objectid) {
5596 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5597 last = key.objectid + key.offset;
5599 last = key.objectid + root->nodesize;
5604 ret = check_cache_range(root, cache, last,
5605 key.objectid - last);
5608 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5609 last = key.objectid + key.offset;
5611 last = key.objectid + root->nodesize;
5615 if (last < cache->key.objectid + cache->key.offset)
5616 ret = check_cache_range(root, cache, last,
5617 cache->key.objectid +
5618 cache->key.offset - last);
5621 btrfs_free_path(path);
5624 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5625 fprintf(stderr, "There are still entries left in the space "
5633 static int check_space_cache(struct btrfs_root *root)
5635 struct btrfs_block_group_cache *cache;
5636 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5640 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5641 btrfs_super_generation(root->fs_info->super_copy) !=
5642 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5643 printf("cache and super generation don't match, space cache "
5644 "will be invalidated\n");
5648 if (ctx.progress_enabled) {
5649 ctx.tp = TASK_FREE_SPACE;
5650 task_start(ctx.info);
5654 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5658 start = cache->key.objectid + cache->key.offset;
5659 if (!cache->free_space_ctl) {
5660 if (btrfs_init_free_space_ctl(cache,
5661 root->sectorsize)) {
5666 btrfs_remove_free_space_cache(cache);
5669 if (btrfs_fs_compat_ro(root->fs_info,
5670 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5671 ret = exclude_super_stripes(root, cache);
5673 fprintf(stderr, "could not exclude super stripes: %s\n",
5678 ret = load_free_space_tree(root->fs_info, cache);
5679 free_excluded_extents(root, cache);
5681 fprintf(stderr, "could not load free space tree: %s\n",
5688 ret = load_free_space_cache(root->fs_info, cache);
5693 ret = verify_space_cache(root, cache);
5695 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5696 cache->key.objectid);
5701 task_stop(ctx.info);
5703 return error ? -EINVAL : 0;
5706 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5707 u64 num_bytes, unsigned long leaf_offset,
5708 struct extent_buffer *eb) {
5711 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5713 unsigned long csum_offset;
5717 u64 data_checked = 0;
5723 if (num_bytes % root->sectorsize)
5726 data = malloc(num_bytes);
5730 while (offset < num_bytes) {
5733 read_len = num_bytes - offset;
5734 /* read as much space once a time */
5735 ret = read_extent_data(root, data + offset,
5736 bytenr + offset, &read_len, mirror);
5740 /* verify every 4k data's checksum */
5741 while (data_checked < read_len) {
5743 tmp = offset + data_checked;
5745 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5746 csum, root->sectorsize);
5747 btrfs_csum_final(csum, (u8 *)&csum);
5749 csum_offset = leaf_offset +
5750 tmp / root->sectorsize * csum_size;
5751 read_extent_buffer(eb, (char *)&csum_expected,
5752 csum_offset, csum_size);
5753 /* try another mirror */
5754 if (csum != csum_expected) {
5755 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5756 mirror, bytenr + tmp,
5757 csum, csum_expected);
5758 num_copies = btrfs_num_copies(
5759 &root->fs_info->mapping_tree,
5761 if (mirror < num_copies - 1) {
5766 data_checked += root->sectorsize;
5775 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5778 struct btrfs_path *path;
5779 struct extent_buffer *leaf;
5780 struct btrfs_key key;
5783 path = btrfs_alloc_path();
5785 fprintf(stderr, "Error allocating path\n");
5789 key.objectid = bytenr;
5790 key.type = BTRFS_EXTENT_ITEM_KEY;
5791 key.offset = (u64)-1;
5794 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5797 fprintf(stderr, "Error looking up extent record %d\n", ret);
5798 btrfs_free_path(path);
5801 if (path->slots[0] > 0) {
5804 ret = btrfs_prev_leaf(root, path);
5807 } else if (ret > 0) {
5814 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5817 * Block group items come before extent items if they have the same
5818 * bytenr, so walk back one more just in case. Dear future traveller,
5819 * first congrats on mastering time travel. Now if it's not too much
5820 * trouble could you go back to 2006 and tell Chris to make the
5821 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5822 * EXTENT_ITEM_KEY please?
5824 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5825 if (path->slots[0] > 0) {
5828 ret = btrfs_prev_leaf(root, path);
5831 } else if (ret > 0) {
5836 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5840 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5841 ret = btrfs_next_leaf(root, path);
5843 fprintf(stderr, "Error going to next leaf "
5845 btrfs_free_path(path);
5851 leaf = path->nodes[0];
5852 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5853 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5857 if (key.objectid + key.offset < bytenr) {
5861 if (key.objectid > bytenr + num_bytes)
5864 if (key.objectid == bytenr) {
5865 if (key.offset >= num_bytes) {
5869 num_bytes -= key.offset;
5870 bytenr += key.offset;
5871 } else if (key.objectid < bytenr) {
5872 if (key.objectid + key.offset >= bytenr + num_bytes) {
5876 num_bytes = (bytenr + num_bytes) -
5877 (key.objectid + key.offset);
5878 bytenr = key.objectid + key.offset;
5880 if (key.objectid + key.offset < bytenr + num_bytes) {
5881 u64 new_start = key.objectid + key.offset;
5882 u64 new_bytes = bytenr + num_bytes - new_start;
5885 * Weird case, the extent is in the middle of
5886 * our range, we'll have to search one side
5887 * and then the other. Not sure if this happens
5888 * in real life, but no harm in coding it up
5889 * anyway just in case.
5891 btrfs_release_path(path);
5892 ret = check_extent_exists(root, new_start,
5895 fprintf(stderr, "Right section didn't "
5899 num_bytes = key.objectid - bytenr;
5902 num_bytes = key.objectid - bytenr;
5909 if (num_bytes && !ret) {
5910 fprintf(stderr, "There are no extents for csum range "
5911 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5915 btrfs_free_path(path);
5919 static int check_csums(struct btrfs_root *root)
5921 struct btrfs_path *path;
5922 struct extent_buffer *leaf;
5923 struct btrfs_key key;
5924 u64 offset = 0, num_bytes = 0;
5925 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5929 unsigned long leaf_offset;
5931 root = root->fs_info->csum_root;
5932 if (!extent_buffer_uptodate(root->node)) {
5933 fprintf(stderr, "No valid csum tree found\n");
5937 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5938 key.type = BTRFS_EXTENT_CSUM_KEY;
5941 path = btrfs_alloc_path();
5945 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5947 fprintf(stderr, "Error searching csum tree %d\n", ret);
5948 btrfs_free_path(path);
5952 if (ret > 0 && path->slots[0])
5957 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5958 ret = btrfs_next_leaf(root, path);
5960 fprintf(stderr, "Error going to next leaf "
5967 leaf = path->nodes[0];
5969 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5970 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5975 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5976 csum_size) * root->sectorsize;
5977 if (!check_data_csum)
5978 goto skip_csum_check;
5979 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5980 ret = check_extent_csums(root, key.offset, data_len,
5986 offset = key.offset;
5987 } else if (key.offset != offset + num_bytes) {
5988 ret = check_extent_exists(root, offset, num_bytes);
5990 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5991 "there is no extent record\n",
5992 offset, offset+num_bytes);
5995 offset = key.offset;
5998 num_bytes += data_len;
6002 btrfs_free_path(path);
6006 static int is_dropped_key(struct btrfs_key *key,
6007 struct btrfs_key *drop_key) {
6008 if (key->objectid < drop_key->objectid)
6010 else if (key->objectid == drop_key->objectid) {
6011 if (key->type < drop_key->type)
6013 else if (key->type == drop_key->type) {
6014 if (key->offset < drop_key->offset)
6022 * Here are the rules for FULL_BACKREF.
6024 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6025 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6027 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6028 * if it happened after the relocation occurred since we'll have dropped the
6029 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6030 * have no real way to know for sure.
6032 * We process the blocks one root at a time, and we start from the lowest root
6033 * objectid and go to the highest. So we can just lookup the owner backref for
6034 * the record and if we don't find it then we know it doesn't exist and we have
6037 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6038 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6039 * be set or not and then we can check later once we've gathered all the refs.
6041 static int calc_extent_flag(struct btrfs_root *root,
6042 struct cache_tree *extent_cache,
6043 struct extent_buffer *buf,
6044 struct root_item_record *ri,
6047 struct extent_record *rec;
6048 struct cache_extent *cache;
6049 struct tree_backref *tback;
6052 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6053 /* we have added this extent before */
6057 rec = container_of(cache, struct extent_record, cache);
6060 * Except file/reloc tree, we can not have
6063 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6068 if (buf->start == ri->bytenr)
6071 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6074 owner = btrfs_header_owner(buf);
6075 if (owner == ri->objectid)
6078 tback = find_tree_backref(rec, 0, owner);
6083 if (rec->flag_block_full_backref != FLAG_UNSET &&
6084 rec->flag_block_full_backref != 0)
6085 rec->bad_full_backref = 1;
6088 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6089 if (rec->flag_block_full_backref != FLAG_UNSET &&
6090 rec->flag_block_full_backref != 1)
6091 rec->bad_full_backref = 1;
6095 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6097 fprintf(stderr, "Invalid key type(");
6098 print_key_type(stderr, 0, key_type);
6099 fprintf(stderr, ") found in root(");
6100 print_objectid(stderr, rootid, 0);
6101 fprintf(stderr, ")\n");
6105 * Check if the key is valid with its extent buffer.
6107 * This is a early check in case invalid key exists in a extent buffer
6108 * This is not comprehensive yet, but should prevent wrong key/item passed
6111 static int check_type_with_root(u64 rootid, u8 key_type)
6114 /* Only valid in chunk tree */
6115 case BTRFS_DEV_ITEM_KEY:
6116 case BTRFS_CHUNK_ITEM_KEY:
6117 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6120 /* valid in csum and log tree */
6121 case BTRFS_CSUM_TREE_OBJECTID:
6122 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6126 case BTRFS_EXTENT_ITEM_KEY:
6127 case BTRFS_METADATA_ITEM_KEY:
6128 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6129 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6132 case BTRFS_ROOT_ITEM_KEY:
6133 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6136 case BTRFS_DEV_EXTENT_KEY:
6137 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6143 report_mismatch_key_root(key_type, rootid);
6147 static int run_next_block(struct btrfs_root *root,
6148 struct block_info *bits,
6151 struct cache_tree *pending,
6152 struct cache_tree *seen,
6153 struct cache_tree *reada,
6154 struct cache_tree *nodes,
6155 struct cache_tree *extent_cache,
6156 struct cache_tree *chunk_cache,
6157 struct rb_root *dev_cache,
6158 struct block_group_tree *block_group_cache,
6159 struct device_extent_tree *dev_extent_cache,
6160 struct root_item_record *ri)
6162 struct extent_buffer *buf;
6163 struct extent_record *rec = NULL;
6174 struct btrfs_key key;
6175 struct cache_extent *cache;
6178 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6179 bits_nr, &reada_bits);
6184 for(i = 0; i < nritems; i++) {
6185 ret = add_cache_extent(reada, bits[i].start,
6190 /* fixme, get the parent transid */
6191 readahead_tree_block(root, bits[i].start,
6195 *last = bits[0].start;
6196 bytenr = bits[0].start;
6197 size = bits[0].size;
6199 cache = lookup_cache_extent(pending, bytenr, size);
6201 remove_cache_extent(pending, cache);
6204 cache = lookup_cache_extent(reada, bytenr, size);
6206 remove_cache_extent(reada, cache);
6209 cache = lookup_cache_extent(nodes, bytenr, size);
6211 remove_cache_extent(nodes, cache);
6214 cache = lookup_cache_extent(extent_cache, bytenr, size);
6216 rec = container_of(cache, struct extent_record, cache);
6217 gen = rec->parent_generation;
6220 /* fixme, get the real parent transid */
6221 buf = read_tree_block(root, bytenr, size, gen);
6222 if (!extent_buffer_uptodate(buf)) {
6223 record_bad_block_io(root->fs_info,
6224 extent_cache, bytenr, size);
6228 nritems = btrfs_header_nritems(buf);
6231 if (!init_extent_tree) {
6232 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6233 btrfs_header_level(buf), 1, NULL,
6236 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6238 fprintf(stderr, "Couldn't calc extent flags\n");
6239 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6244 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6246 fprintf(stderr, "Couldn't calc extent flags\n");
6247 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6251 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6253 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6254 ri->objectid == btrfs_header_owner(buf)) {
6256 * Ok we got to this block from it's original owner and
6257 * we have FULL_BACKREF set. Relocation can leave
6258 * converted blocks over so this is altogether possible,
6259 * however it's not possible if the generation > the
6260 * last snapshot, so check for this case.
6262 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6263 btrfs_header_generation(buf) > ri->last_snapshot) {
6264 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6265 rec->bad_full_backref = 1;
6270 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6271 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6272 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6273 rec->bad_full_backref = 1;
6277 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6278 rec->flag_block_full_backref = 1;
6282 rec->flag_block_full_backref = 0;
6284 owner = btrfs_header_owner(buf);
6287 ret = check_block(root, extent_cache, buf, flags);
6291 if (btrfs_is_leaf(buf)) {
6292 btree_space_waste += btrfs_leaf_free_space(root, buf);
6293 for (i = 0; i < nritems; i++) {
6294 struct btrfs_file_extent_item *fi;
6295 btrfs_item_key_to_cpu(buf, &key, i);
6297 * Check key type against the leaf owner.
6298 * Could filter quite a lot of early error if
6301 if (check_type_with_root(btrfs_header_owner(buf),
6303 fprintf(stderr, "ignoring invalid key\n");
6306 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6307 process_extent_item(root, extent_cache, buf,
6311 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6312 process_extent_item(root, extent_cache, buf,
6316 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6318 btrfs_item_size_nr(buf, i);
6321 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6322 process_chunk_item(chunk_cache, &key, buf, i);
6325 if (key.type == BTRFS_DEV_ITEM_KEY) {
6326 process_device_item(dev_cache, &key, buf, i);
6329 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6330 process_block_group_item(block_group_cache,
6334 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6335 process_device_extent_item(dev_extent_cache,
6340 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6341 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6342 process_extent_ref_v0(extent_cache, buf, i);
6349 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6350 ret = add_tree_backref(extent_cache,
6351 key.objectid, 0, key.offset, 0);
6353 error("add_tree_backref failed: %s",
6357 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6358 ret = add_tree_backref(extent_cache,
6359 key.objectid, key.offset, 0, 0);
6361 error("add_tree_backref failed: %s",
6365 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6366 struct btrfs_extent_data_ref *ref;
6367 ref = btrfs_item_ptr(buf, i,
6368 struct btrfs_extent_data_ref);
6369 add_data_backref(extent_cache,
6371 btrfs_extent_data_ref_root(buf, ref),
6372 btrfs_extent_data_ref_objectid(buf,
6374 btrfs_extent_data_ref_offset(buf, ref),
6375 btrfs_extent_data_ref_count(buf, ref),
6376 0, root->sectorsize);
6379 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6380 struct btrfs_shared_data_ref *ref;
6381 ref = btrfs_item_ptr(buf, i,
6382 struct btrfs_shared_data_ref);
6383 add_data_backref(extent_cache,
6384 key.objectid, key.offset, 0, 0, 0,
6385 btrfs_shared_data_ref_count(buf, ref),
6386 0, root->sectorsize);
6389 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6390 struct bad_item *bad;
6392 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6396 bad = malloc(sizeof(struct bad_item));
6399 INIT_LIST_HEAD(&bad->list);
6400 memcpy(&bad->key, &key,
6401 sizeof(struct btrfs_key));
6402 bad->root_id = owner;
6403 list_add_tail(&bad->list, &delete_items);
6406 if (key.type != BTRFS_EXTENT_DATA_KEY)
6408 fi = btrfs_item_ptr(buf, i,
6409 struct btrfs_file_extent_item);
6410 if (btrfs_file_extent_type(buf, fi) ==
6411 BTRFS_FILE_EXTENT_INLINE)
6413 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6416 data_bytes_allocated +=
6417 btrfs_file_extent_disk_num_bytes(buf, fi);
6418 if (data_bytes_allocated < root->sectorsize) {
6421 data_bytes_referenced +=
6422 btrfs_file_extent_num_bytes(buf, fi);
6423 add_data_backref(extent_cache,
6424 btrfs_file_extent_disk_bytenr(buf, fi),
6425 parent, owner, key.objectid, key.offset -
6426 btrfs_file_extent_offset(buf, fi), 1, 1,
6427 btrfs_file_extent_disk_num_bytes(buf, fi));
6431 struct btrfs_key first_key;
6433 first_key.objectid = 0;
6436 btrfs_item_key_to_cpu(buf, &first_key, 0);
6437 level = btrfs_header_level(buf);
6438 for (i = 0; i < nritems; i++) {
6439 struct extent_record tmpl;
6441 ptr = btrfs_node_blockptr(buf, i);
6442 size = root->nodesize;
6443 btrfs_node_key_to_cpu(buf, &key, i);
6445 if ((level == ri->drop_level)
6446 && is_dropped_key(&key, &ri->drop_key)) {
6451 memset(&tmpl, 0, sizeof(tmpl));
6452 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6453 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6458 tmpl.max_size = size;
6459 ret = add_extent_rec(extent_cache, &tmpl);
6463 ret = add_tree_backref(extent_cache, ptr, parent,
6466 error("add_tree_backref failed: %s",
6472 add_pending(nodes, seen, ptr, size);
6474 add_pending(pending, seen, ptr, size);
6477 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6478 nritems) * sizeof(struct btrfs_key_ptr);
6480 total_btree_bytes += buf->len;
6481 if (fs_root_objectid(btrfs_header_owner(buf)))
6482 total_fs_tree_bytes += buf->len;
6483 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6484 total_extent_tree_bytes += buf->len;
6485 if (!found_old_backref &&
6486 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6487 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6488 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6489 found_old_backref = 1;
6491 free_extent_buffer(buf);
6495 static int add_root_to_pending(struct extent_buffer *buf,
6496 struct cache_tree *extent_cache,
6497 struct cache_tree *pending,
6498 struct cache_tree *seen,
6499 struct cache_tree *nodes,
6502 struct extent_record tmpl;
6505 if (btrfs_header_level(buf) > 0)
6506 add_pending(nodes, seen, buf->start, buf->len);
6508 add_pending(pending, seen, buf->start, buf->len);
6510 memset(&tmpl, 0, sizeof(tmpl));
6511 tmpl.start = buf->start;
6516 tmpl.max_size = buf->len;
6517 add_extent_rec(extent_cache, &tmpl);
6519 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6520 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6521 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6524 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6529 /* as we fix the tree, we might be deleting blocks that
6530 * we're tracking for repair. This hook makes sure we
6531 * remove any backrefs for blocks as we are fixing them.
6533 static int free_extent_hook(struct btrfs_trans_handle *trans,
6534 struct btrfs_root *root,
6535 u64 bytenr, u64 num_bytes, u64 parent,
6536 u64 root_objectid, u64 owner, u64 offset,
6539 struct extent_record *rec;
6540 struct cache_extent *cache;
6542 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6544 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6545 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6549 rec = container_of(cache, struct extent_record, cache);
6551 struct data_backref *back;
6552 back = find_data_backref(rec, parent, root_objectid, owner,
6553 offset, 1, bytenr, num_bytes);
6556 if (back->node.found_ref) {
6557 back->found_ref -= refs_to_drop;
6559 rec->refs -= refs_to_drop;
6561 if (back->node.found_extent_tree) {
6562 back->num_refs -= refs_to_drop;
6563 if (rec->extent_item_refs)
6564 rec->extent_item_refs -= refs_to_drop;
6566 if (back->found_ref == 0)
6567 back->node.found_ref = 0;
6568 if (back->num_refs == 0)
6569 back->node.found_extent_tree = 0;
6571 if (!back->node.found_extent_tree && back->node.found_ref) {
6572 list_del(&back->node.list);
6576 struct tree_backref *back;
6577 back = find_tree_backref(rec, parent, root_objectid);
6580 if (back->node.found_ref) {
6583 back->node.found_ref = 0;
6585 if (back->node.found_extent_tree) {
6586 if (rec->extent_item_refs)
6587 rec->extent_item_refs--;
6588 back->node.found_extent_tree = 0;
6590 if (!back->node.found_extent_tree && back->node.found_ref) {
6591 list_del(&back->node.list);
6595 maybe_free_extent_rec(extent_cache, rec);
6600 static int delete_extent_records(struct btrfs_trans_handle *trans,
6601 struct btrfs_root *root,
6602 struct btrfs_path *path,
6603 u64 bytenr, u64 new_len)
6605 struct btrfs_key key;
6606 struct btrfs_key found_key;
6607 struct extent_buffer *leaf;
6612 key.objectid = bytenr;
6614 key.offset = (u64)-1;
6617 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6624 if (path->slots[0] == 0)
6630 leaf = path->nodes[0];
6631 slot = path->slots[0];
6633 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6634 if (found_key.objectid != bytenr)
6637 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6638 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6639 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6640 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6641 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6642 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6643 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6644 btrfs_release_path(path);
6645 if (found_key.type == 0) {
6646 if (found_key.offset == 0)
6648 key.offset = found_key.offset - 1;
6649 key.type = found_key.type;
6651 key.type = found_key.type - 1;
6652 key.offset = (u64)-1;
6656 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6657 found_key.objectid, found_key.type, found_key.offset);
6659 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6662 btrfs_release_path(path);
6664 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6665 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6666 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6667 found_key.offset : root->nodesize;
6669 ret = btrfs_update_block_group(trans, root, bytenr,
6676 btrfs_release_path(path);
6681 * for a single backref, this will allocate a new extent
6682 * and add the backref to it.
6684 static int record_extent(struct btrfs_trans_handle *trans,
6685 struct btrfs_fs_info *info,
6686 struct btrfs_path *path,
6687 struct extent_record *rec,
6688 struct extent_backref *back,
6689 int allocated, u64 flags)
6692 struct btrfs_root *extent_root = info->extent_root;
6693 struct extent_buffer *leaf;
6694 struct btrfs_key ins_key;
6695 struct btrfs_extent_item *ei;
6696 struct tree_backref *tback;
6697 struct data_backref *dback;
6698 struct btrfs_tree_block_info *bi;
6701 rec->max_size = max_t(u64, rec->max_size,
6702 info->extent_root->nodesize);
6705 u32 item_size = sizeof(*ei);
6708 item_size += sizeof(*bi);
6710 ins_key.objectid = rec->start;
6711 ins_key.offset = rec->max_size;
6712 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6714 ret = btrfs_insert_empty_item(trans, extent_root, path,
6715 &ins_key, item_size);
6719 leaf = path->nodes[0];
6720 ei = btrfs_item_ptr(leaf, path->slots[0],
6721 struct btrfs_extent_item);
6723 btrfs_set_extent_refs(leaf, ei, 0);
6724 btrfs_set_extent_generation(leaf, ei, rec->generation);
6726 if (back->is_data) {
6727 btrfs_set_extent_flags(leaf, ei,
6728 BTRFS_EXTENT_FLAG_DATA);
6730 struct btrfs_disk_key copy_key;;
6732 tback = to_tree_backref(back);
6733 bi = (struct btrfs_tree_block_info *)(ei + 1);
6734 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6737 btrfs_set_disk_key_objectid(©_key,
6738 rec->info_objectid);
6739 btrfs_set_disk_key_type(©_key, 0);
6740 btrfs_set_disk_key_offset(©_key, 0);
6742 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6743 btrfs_set_tree_block_key(leaf, bi, ©_key);
6745 btrfs_set_extent_flags(leaf, ei,
6746 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6749 btrfs_mark_buffer_dirty(leaf);
6750 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6751 rec->max_size, 1, 0);
6754 btrfs_release_path(path);
6757 if (back->is_data) {
6761 dback = to_data_backref(back);
6762 if (back->full_backref)
6763 parent = dback->parent;
6767 for (i = 0; i < dback->found_ref; i++) {
6768 /* if parent != 0, we're doing a full backref
6769 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6770 * just makes the backref allocator create a data
6773 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6774 rec->start, rec->max_size,
6778 BTRFS_FIRST_FREE_OBJECTID :
6784 fprintf(stderr, "adding new data backref"
6785 " on %llu %s %llu owner %llu"
6786 " offset %llu found %d\n",
6787 (unsigned long long)rec->start,
6788 back->full_backref ?
6790 back->full_backref ?
6791 (unsigned long long)parent :
6792 (unsigned long long)dback->root,
6793 (unsigned long long)dback->owner,
6794 (unsigned long long)dback->offset,
6799 tback = to_tree_backref(back);
6800 if (back->full_backref)
6801 parent = tback->parent;
6805 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6806 rec->start, rec->max_size,
6807 parent, tback->root, 0, 0);
6808 fprintf(stderr, "adding new tree backref on "
6809 "start %llu len %llu parent %llu root %llu\n",
6810 rec->start, rec->max_size, parent, tback->root);
6813 btrfs_release_path(path);
6817 static struct extent_entry *find_entry(struct list_head *entries,
6818 u64 bytenr, u64 bytes)
6820 struct extent_entry *entry = NULL;
6822 list_for_each_entry(entry, entries, list) {
6823 if (entry->bytenr == bytenr && entry->bytes == bytes)
6830 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6832 struct extent_entry *entry, *best = NULL, *prev = NULL;
6834 list_for_each_entry(entry, entries, list) {
6841 * If there are as many broken entries as entries then we know
6842 * not to trust this particular entry.
6844 if (entry->broken == entry->count)
6848 * If our current entry == best then we can't be sure our best
6849 * is really the best, so we need to keep searching.
6851 if (best && best->count == entry->count) {
6857 /* Prev == entry, not good enough, have to keep searching */
6858 if (!prev->broken && prev->count == entry->count)
6862 best = (prev->count > entry->count) ? prev : entry;
6863 else if (best->count < entry->count)
6871 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6872 struct data_backref *dback, struct extent_entry *entry)
6874 struct btrfs_trans_handle *trans;
6875 struct btrfs_root *root;
6876 struct btrfs_file_extent_item *fi;
6877 struct extent_buffer *leaf;
6878 struct btrfs_key key;
6882 key.objectid = dback->root;
6883 key.type = BTRFS_ROOT_ITEM_KEY;
6884 key.offset = (u64)-1;
6885 root = btrfs_read_fs_root(info, &key);
6887 fprintf(stderr, "Couldn't find root for our ref\n");
6892 * The backref points to the original offset of the extent if it was
6893 * split, so we need to search down to the offset we have and then walk
6894 * forward until we find the backref we're looking for.
6896 key.objectid = dback->owner;
6897 key.type = BTRFS_EXTENT_DATA_KEY;
6898 key.offset = dback->offset;
6899 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6901 fprintf(stderr, "Error looking up ref %d\n", ret);
6906 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6907 ret = btrfs_next_leaf(root, path);
6909 fprintf(stderr, "Couldn't find our ref, next\n");
6913 leaf = path->nodes[0];
6914 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6915 if (key.objectid != dback->owner ||
6916 key.type != BTRFS_EXTENT_DATA_KEY) {
6917 fprintf(stderr, "Couldn't find our ref, search\n");
6920 fi = btrfs_item_ptr(leaf, path->slots[0],
6921 struct btrfs_file_extent_item);
6922 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6923 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6925 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6930 btrfs_release_path(path);
6932 trans = btrfs_start_transaction(root, 1);
6934 return PTR_ERR(trans);
6937 * Ok we have the key of the file extent we want to fix, now we can cow
6938 * down to the thing and fix it.
6940 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6942 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6943 key.objectid, key.type, key.offset, ret);
6947 fprintf(stderr, "Well that's odd, we just found this key "
6948 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6953 leaf = path->nodes[0];
6954 fi = btrfs_item_ptr(leaf, path->slots[0],
6955 struct btrfs_file_extent_item);
6957 if (btrfs_file_extent_compression(leaf, fi) &&
6958 dback->disk_bytenr != entry->bytenr) {
6959 fprintf(stderr, "Ref doesn't match the record start and is "
6960 "compressed, please take a btrfs-image of this file "
6961 "system and send it to a btrfs developer so they can "
6962 "complete this functionality for bytenr %Lu\n",
6963 dback->disk_bytenr);
6968 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6969 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6970 } else if (dback->disk_bytenr > entry->bytenr) {
6971 u64 off_diff, offset;
6973 off_diff = dback->disk_bytenr - entry->bytenr;
6974 offset = btrfs_file_extent_offset(leaf, fi);
6975 if (dback->disk_bytenr + offset +
6976 btrfs_file_extent_num_bytes(leaf, fi) >
6977 entry->bytenr + entry->bytes) {
6978 fprintf(stderr, "Ref is past the entry end, please "
6979 "take a btrfs-image of this file system and "
6980 "send it to a btrfs developer, ref %Lu\n",
6981 dback->disk_bytenr);
6986 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6987 btrfs_set_file_extent_offset(leaf, fi, offset);
6988 } else if (dback->disk_bytenr < entry->bytenr) {
6991 offset = btrfs_file_extent_offset(leaf, fi);
6992 if (dback->disk_bytenr + offset < entry->bytenr) {
6993 fprintf(stderr, "Ref is before the entry start, please"
6994 " take a btrfs-image of this file system and "
6995 "send it to a btrfs developer, ref %Lu\n",
6996 dback->disk_bytenr);
7001 offset += dback->disk_bytenr;
7002 offset -= entry->bytenr;
7003 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7004 btrfs_set_file_extent_offset(leaf, fi, offset);
7007 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7010 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7011 * only do this if we aren't using compression, otherwise it's a
7014 if (!btrfs_file_extent_compression(leaf, fi))
7015 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7017 printf("ram bytes may be wrong?\n");
7018 btrfs_mark_buffer_dirty(leaf);
7020 err = btrfs_commit_transaction(trans, root);
7021 btrfs_release_path(path);
7022 return ret ? ret : err;
7025 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7026 struct extent_record *rec)
7028 struct extent_backref *back;
7029 struct data_backref *dback;
7030 struct extent_entry *entry, *best = NULL;
7033 int broken_entries = 0;
7038 * Metadata is easy and the backrefs should always agree on bytenr and
7039 * size, if not we've got bigger issues.
7044 list_for_each_entry(back, &rec->backrefs, list) {
7045 if (back->full_backref || !back->is_data)
7048 dback = to_data_backref(back);
7051 * We only pay attention to backrefs that we found a real
7054 if (dback->found_ref == 0)
7058 * For now we only catch when the bytes don't match, not the
7059 * bytenr. We can easily do this at the same time, but I want
7060 * to have a fs image to test on before we just add repair
7061 * functionality willy-nilly so we know we won't screw up the
7065 entry = find_entry(&entries, dback->disk_bytenr,
7068 entry = malloc(sizeof(struct extent_entry));
7073 memset(entry, 0, sizeof(*entry));
7074 entry->bytenr = dback->disk_bytenr;
7075 entry->bytes = dback->bytes;
7076 list_add_tail(&entry->list, &entries);
7081 * If we only have on entry we may think the entries agree when
7082 * in reality they don't so we have to do some extra checking.
7084 if (dback->disk_bytenr != rec->start ||
7085 dback->bytes != rec->nr || back->broken)
7096 /* Yay all the backrefs agree, carry on good sir */
7097 if (nr_entries <= 1 && !mismatch)
7100 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7101 "%Lu\n", rec->start);
7104 * First we want to see if the backrefs can agree amongst themselves who
7105 * is right, so figure out which one of the entries has the highest
7108 best = find_most_right_entry(&entries);
7111 * Ok so we may have an even split between what the backrefs think, so
7112 * this is where we use the extent ref to see what it thinks.
7115 entry = find_entry(&entries, rec->start, rec->nr);
7116 if (!entry && (!broken_entries || !rec->found_rec)) {
7117 fprintf(stderr, "Backrefs don't agree with each other "
7118 "and extent record doesn't agree with anybody,"
7119 " so we can't fix bytenr %Lu bytes %Lu\n",
7120 rec->start, rec->nr);
7123 } else if (!entry) {
7125 * Ok our backrefs were broken, we'll assume this is the
7126 * correct value and add an entry for this range.
7128 entry = malloc(sizeof(struct extent_entry));
7133 memset(entry, 0, sizeof(*entry));
7134 entry->bytenr = rec->start;
7135 entry->bytes = rec->nr;
7136 list_add_tail(&entry->list, &entries);
7140 best = find_most_right_entry(&entries);
7142 fprintf(stderr, "Backrefs and extent record evenly "
7143 "split on who is right, this is going to "
7144 "require user input to fix bytenr %Lu bytes "
7145 "%Lu\n", rec->start, rec->nr);
7152 * I don't think this can happen currently as we'll abort() if we catch
7153 * this case higher up, but in case somebody removes that we still can't
7154 * deal with it properly here yet, so just bail out of that's the case.
7156 if (best->bytenr != rec->start) {
7157 fprintf(stderr, "Extent start and backref starts don't match, "
7158 "please use btrfs-image on this file system and send "
7159 "it to a btrfs developer so they can make fsck fix "
7160 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7161 rec->start, rec->nr);
7167 * Ok great we all agreed on an extent record, let's go find the real
7168 * references and fix up the ones that don't match.
7170 list_for_each_entry(back, &rec->backrefs, list) {
7171 if (back->full_backref || !back->is_data)
7174 dback = to_data_backref(back);
7177 * Still ignoring backrefs that don't have a real ref attached
7180 if (dback->found_ref == 0)
7183 if (dback->bytes == best->bytes &&
7184 dback->disk_bytenr == best->bytenr)
7187 ret = repair_ref(info, path, dback, best);
7193 * Ok we messed with the actual refs, which means we need to drop our
7194 * entire cache and go back and rescan. I know this is a huge pain and
7195 * adds a lot of extra work, but it's the only way to be safe. Once all
7196 * the backrefs agree we may not need to do anything to the extent
7201 while (!list_empty(&entries)) {
7202 entry = list_entry(entries.next, struct extent_entry, list);
7203 list_del_init(&entry->list);
7209 static int process_duplicates(struct btrfs_root *root,
7210 struct cache_tree *extent_cache,
7211 struct extent_record *rec)
7213 struct extent_record *good, *tmp;
7214 struct cache_extent *cache;
7218 * If we found a extent record for this extent then return, or if we
7219 * have more than one duplicate we are likely going to need to delete
7222 if (rec->found_rec || rec->num_duplicates > 1)
7225 /* Shouldn't happen but just in case */
7226 BUG_ON(!rec->num_duplicates);
7229 * So this happens if we end up with a backref that doesn't match the
7230 * actual extent entry. So either the backref is bad or the extent
7231 * entry is bad. Either way we want to have the extent_record actually
7232 * reflect what we found in the extent_tree, so we need to take the
7233 * duplicate out and use that as the extent_record since the only way we
7234 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7236 remove_cache_extent(extent_cache, &rec->cache);
7238 good = to_extent_record(rec->dups.next);
7239 list_del_init(&good->list);
7240 INIT_LIST_HEAD(&good->backrefs);
7241 INIT_LIST_HEAD(&good->dups);
7242 good->cache.start = good->start;
7243 good->cache.size = good->nr;
7244 good->content_checked = 0;
7245 good->owner_ref_checked = 0;
7246 good->num_duplicates = 0;
7247 good->refs = rec->refs;
7248 list_splice_init(&rec->backrefs, &good->backrefs);
7250 cache = lookup_cache_extent(extent_cache, good->start,
7254 tmp = container_of(cache, struct extent_record, cache);
7257 * If we find another overlapping extent and it's found_rec is
7258 * set then it's a duplicate and we need to try and delete
7261 if (tmp->found_rec || tmp->num_duplicates > 0) {
7262 if (list_empty(&good->list))
7263 list_add_tail(&good->list,
7264 &duplicate_extents);
7265 good->num_duplicates += tmp->num_duplicates + 1;
7266 list_splice_init(&tmp->dups, &good->dups);
7267 list_del_init(&tmp->list);
7268 list_add_tail(&tmp->list, &good->dups);
7269 remove_cache_extent(extent_cache, &tmp->cache);
7274 * Ok we have another non extent item backed extent rec, so lets
7275 * just add it to this extent and carry on like we did above.
7277 good->refs += tmp->refs;
7278 list_splice_init(&tmp->backrefs, &good->backrefs);
7279 remove_cache_extent(extent_cache, &tmp->cache);
7282 ret = insert_cache_extent(extent_cache, &good->cache);
7285 return good->num_duplicates ? 0 : 1;
7288 static int delete_duplicate_records(struct btrfs_root *root,
7289 struct extent_record *rec)
7291 struct btrfs_trans_handle *trans;
7292 LIST_HEAD(delete_list);
7293 struct btrfs_path *path;
7294 struct extent_record *tmp, *good, *n;
7297 struct btrfs_key key;
7299 path = btrfs_alloc_path();
7306 /* Find the record that covers all of the duplicates. */
7307 list_for_each_entry(tmp, &rec->dups, list) {
7308 if (good->start < tmp->start)
7310 if (good->nr > tmp->nr)
7313 if (tmp->start + tmp->nr < good->start + good->nr) {
7314 fprintf(stderr, "Ok we have overlapping extents that "
7315 "aren't completely covered by each other, this "
7316 "is going to require more careful thought. "
7317 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7318 tmp->start, tmp->nr, good->start, good->nr);
7325 list_add_tail(&rec->list, &delete_list);
7327 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7330 list_move_tail(&tmp->list, &delete_list);
7333 root = root->fs_info->extent_root;
7334 trans = btrfs_start_transaction(root, 1);
7335 if (IS_ERR(trans)) {
7336 ret = PTR_ERR(trans);
7340 list_for_each_entry(tmp, &delete_list, list) {
7341 if (tmp->found_rec == 0)
7343 key.objectid = tmp->start;
7344 key.type = BTRFS_EXTENT_ITEM_KEY;
7345 key.offset = tmp->nr;
7347 /* Shouldn't happen but just in case */
7348 if (tmp->metadata) {
7349 fprintf(stderr, "Well this shouldn't happen, extent "
7350 "record overlaps but is metadata? "
7351 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7355 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7361 ret = btrfs_del_item(trans, root, path);
7364 btrfs_release_path(path);
7367 err = btrfs_commit_transaction(trans, root);
7371 while (!list_empty(&delete_list)) {
7372 tmp = to_extent_record(delete_list.next);
7373 list_del_init(&tmp->list);
7379 while (!list_empty(&rec->dups)) {
7380 tmp = to_extent_record(rec->dups.next);
7381 list_del_init(&tmp->list);
7385 btrfs_free_path(path);
7387 if (!ret && !nr_del)
7388 rec->num_duplicates = 0;
7390 return ret ? ret : nr_del;
7393 static int find_possible_backrefs(struct btrfs_fs_info *info,
7394 struct btrfs_path *path,
7395 struct cache_tree *extent_cache,
7396 struct extent_record *rec)
7398 struct btrfs_root *root;
7399 struct extent_backref *back;
7400 struct data_backref *dback;
7401 struct cache_extent *cache;
7402 struct btrfs_file_extent_item *fi;
7403 struct btrfs_key key;
7407 list_for_each_entry(back, &rec->backrefs, list) {
7408 /* Don't care about full backrefs (poor unloved backrefs) */
7409 if (back->full_backref || !back->is_data)
7412 dback = to_data_backref(back);
7414 /* We found this one, we don't need to do a lookup */
7415 if (dback->found_ref)
7418 key.objectid = dback->root;
7419 key.type = BTRFS_ROOT_ITEM_KEY;
7420 key.offset = (u64)-1;
7422 root = btrfs_read_fs_root(info, &key);
7424 /* No root, definitely a bad ref, skip */
7425 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7427 /* Other err, exit */
7429 return PTR_ERR(root);
7431 key.objectid = dback->owner;
7432 key.type = BTRFS_EXTENT_DATA_KEY;
7433 key.offset = dback->offset;
7434 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7436 btrfs_release_path(path);
7439 /* Didn't find it, we can carry on */
7444 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7445 struct btrfs_file_extent_item);
7446 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7447 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7448 btrfs_release_path(path);
7449 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7451 struct extent_record *tmp;
7452 tmp = container_of(cache, struct extent_record, cache);
7455 * If we found an extent record for the bytenr for this
7456 * particular backref then we can't add it to our
7457 * current extent record. We only want to add backrefs
7458 * that don't have a corresponding extent item in the
7459 * extent tree since they likely belong to this record
7460 * and we need to fix it if it doesn't match bytenrs.
7466 dback->found_ref += 1;
7467 dback->disk_bytenr = bytenr;
7468 dback->bytes = bytes;
7471 * Set this so the verify backref code knows not to trust the
7472 * values in this backref.
7481 * Record orphan data ref into corresponding root.
7483 * Return 0 if the extent item contains data ref and recorded.
7484 * Return 1 if the extent item contains no useful data ref
7485 * On that case, it may contains only shared_dataref or metadata backref
7486 * or the file extent exists(this should be handled by the extent bytenr
7488 * Return <0 if something goes wrong.
7490 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7491 struct extent_record *rec)
7493 struct btrfs_key key;
7494 struct btrfs_root *dest_root;
7495 struct extent_backref *back;
7496 struct data_backref *dback;
7497 struct orphan_data_extent *orphan;
7498 struct btrfs_path *path;
7499 int recorded_data_ref = 0;
7504 path = btrfs_alloc_path();
7507 list_for_each_entry(back, &rec->backrefs, list) {
7508 if (back->full_backref || !back->is_data ||
7509 !back->found_extent_tree)
7511 dback = to_data_backref(back);
7512 if (dback->found_ref)
7514 key.objectid = dback->root;
7515 key.type = BTRFS_ROOT_ITEM_KEY;
7516 key.offset = (u64)-1;
7518 dest_root = btrfs_read_fs_root(fs_info, &key);
7520 /* For non-exist root we just skip it */
7521 if (IS_ERR(dest_root) || !dest_root)
7524 key.objectid = dback->owner;
7525 key.type = BTRFS_EXTENT_DATA_KEY;
7526 key.offset = dback->offset;
7528 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7529 btrfs_release_path(path);
7531 * For ret < 0, it's OK since the fs-tree may be corrupted,
7532 * we need to record it for inode/file extent rebuild.
7533 * For ret > 0, we record it only for file extent rebuild.
7534 * For ret == 0, the file extent exists but only bytenr
7535 * mismatch, let the original bytenr fix routine to handle,
7541 orphan = malloc(sizeof(*orphan));
7546 INIT_LIST_HEAD(&orphan->list);
7547 orphan->root = dback->root;
7548 orphan->objectid = dback->owner;
7549 orphan->offset = dback->offset;
7550 orphan->disk_bytenr = rec->cache.start;
7551 orphan->disk_len = rec->cache.size;
7552 list_add(&dest_root->orphan_data_extents, &orphan->list);
7553 recorded_data_ref = 1;
7556 btrfs_free_path(path);
7558 return !recorded_data_ref;
7564 * when an incorrect extent item is found, this will delete
7565 * all of the existing entries for it and recreate them
7566 * based on what the tree scan found.
7568 static int fixup_extent_refs(struct btrfs_fs_info *info,
7569 struct cache_tree *extent_cache,
7570 struct extent_record *rec)
7572 struct btrfs_trans_handle *trans = NULL;
7574 struct btrfs_path *path;
7575 struct list_head *cur = rec->backrefs.next;
7576 struct cache_extent *cache;
7577 struct extent_backref *back;
7581 if (rec->flag_block_full_backref)
7582 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7584 path = btrfs_alloc_path();
7588 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7590 * Sometimes the backrefs themselves are so broken they don't
7591 * get attached to any meaningful rec, so first go back and
7592 * check any of our backrefs that we couldn't find and throw
7593 * them into the list if we find the backref so that
7594 * verify_backrefs can figure out what to do.
7596 ret = find_possible_backrefs(info, path, extent_cache, rec);
7601 /* step one, make sure all of the backrefs agree */
7602 ret = verify_backrefs(info, path, rec);
7606 trans = btrfs_start_transaction(info->extent_root, 1);
7607 if (IS_ERR(trans)) {
7608 ret = PTR_ERR(trans);
7612 /* step two, delete all the existing records */
7613 ret = delete_extent_records(trans, info->extent_root, path,
7614 rec->start, rec->max_size);
7619 /* was this block corrupt? If so, don't add references to it */
7620 cache = lookup_cache_extent(info->corrupt_blocks,
7621 rec->start, rec->max_size);
7627 /* step three, recreate all the refs we did find */
7628 while(cur != &rec->backrefs) {
7629 back = to_extent_backref(cur);
7633 * if we didn't find any references, don't create a
7636 if (!back->found_ref)
7639 rec->bad_full_backref = 0;
7640 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7648 int err = btrfs_commit_transaction(trans, info->extent_root);
7653 btrfs_free_path(path);
7657 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7658 struct extent_record *rec)
7660 struct btrfs_trans_handle *trans;
7661 struct btrfs_root *root = fs_info->extent_root;
7662 struct btrfs_path *path;
7663 struct btrfs_extent_item *ei;
7664 struct btrfs_key key;
7668 key.objectid = rec->start;
7669 if (rec->metadata) {
7670 key.type = BTRFS_METADATA_ITEM_KEY;
7671 key.offset = rec->info_level;
7673 key.type = BTRFS_EXTENT_ITEM_KEY;
7674 key.offset = rec->max_size;
7677 path = btrfs_alloc_path();
7681 trans = btrfs_start_transaction(root, 0);
7682 if (IS_ERR(trans)) {
7683 btrfs_free_path(path);
7684 return PTR_ERR(trans);
7687 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7689 btrfs_free_path(path);
7690 btrfs_commit_transaction(trans, root);
7693 fprintf(stderr, "Didn't find extent for %llu\n",
7694 (unsigned long long)rec->start);
7695 btrfs_free_path(path);
7696 btrfs_commit_transaction(trans, root);
7700 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7701 struct btrfs_extent_item);
7702 flags = btrfs_extent_flags(path->nodes[0], ei);
7703 if (rec->flag_block_full_backref) {
7704 fprintf(stderr, "setting full backref on %llu\n",
7705 (unsigned long long)key.objectid);
7706 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7708 fprintf(stderr, "clearing full backref on %llu\n",
7709 (unsigned long long)key.objectid);
7710 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7712 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7713 btrfs_mark_buffer_dirty(path->nodes[0]);
7714 btrfs_free_path(path);
7715 return btrfs_commit_transaction(trans, root);
7718 /* right now we only prune from the extent allocation tree */
7719 static int prune_one_block(struct btrfs_trans_handle *trans,
7720 struct btrfs_fs_info *info,
7721 struct btrfs_corrupt_block *corrupt)
7724 struct btrfs_path path;
7725 struct extent_buffer *eb;
7729 int level = corrupt->level + 1;
7731 btrfs_init_path(&path);
7733 /* we want to stop at the parent to our busted block */
7734 path.lowest_level = level;
7736 ret = btrfs_search_slot(trans, info->extent_root,
7737 &corrupt->key, &path, -1, 1);
7742 eb = path.nodes[level];
7749 * hopefully the search gave us the block we want to prune,
7750 * lets try that first
7752 slot = path.slots[level];
7753 found = btrfs_node_blockptr(eb, slot);
7754 if (found == corrupt->cache.start)
7757 nritems = btrfs_header_nritems(eb);
7759 /* the search failed, lets scan this node and hope we find it */
7760 for (slot = 0; slot < nritems; slot++) {
7761 found = btrfs_node_blockptr(eb, slot);
7762 if (found == corrupt->cache.start)
7766 * we couldn't find the bad block. TODO, search all the nodes for pointers
7769 if (eb == info->extent_root->node) {
7774 btrfs_release_path(&path);
7779 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7780 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7783 btrfs_release_path(&path);
7787 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7789 struct btrfs_trans_handle *trans = NULL;
7790 struct cache_extent *cache;
7791 struct btrfs_corrupt_block *corrupt;
7794 cache = search_cache_extent(info->corrupt_blocks, 0);
7798 trans = btrfs_start_transaction(info->extent_root, 1);
7800 return PTR_ERR(trans);
7802 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7803 prune_one_block(trans, info, corrupt);
7804 remove_cache_extent(info->corrupt_blocks, cache);
7807 return btrfs_commit_transaction(trans, info->extent_root);
7811 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7813 struct btrfs_block_group_cache *cache;
7818 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7819 &start, &end, EXTENT_DIRTY);
7822 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7828 cache = btrfs_lookup_first_block_group(fs_info, start);
7833 start = cache->key.objectid + cache->key.offset;
7837 static int check_extent_refs(struct btrfs_root *root,
7838 struct cache_tree *extent_cache)
7840 struct extent_record *rec;
7841 struct cache_extent *cache;
7850 * if we're doing a repair, we have to make sure
7851 * we don't allocate from the problem extents.
7852 * In the worst case, this will be all the
7855 cache = search_cache_extent(extent_cache, 0);
7857 rec = container_of(cache, struct extent_record, cache);
7858 set_extent_dirty(root->fs_info->excluded_extents,
7860 rec->start + rec->max_size - 1,
7862 cache = next_cache_extent(cache);
7865 /* pin down all the corrupted blocks too */
7866 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7868 set_extent_dirty(root->fs_info->excluded_extents,
7870 cache->start + cache->size - 1,
7872 cache = next_cache_extent(cache);
7874 prune_corrupt_blocks(root->fs_info);
7875 reset_cached_block_groups(root->fs_info);
7878 reset_cached_block_groups(root->fs_info);
7881 * We need to delete any duplicate entries we find first otherwise we
7882 * could mess up the extent tree when we have backrefs that actually
7883 * belong to a different extent item and not the weird duplicate one.
7885 while (repair && !list_empty(&duplicate_extents)) {
7886 rec = to_extent_record(duplicate_extents.next);
7887 list_del_init(&rec->list);
7889 /* Sometimes we can find a backref before we find an actual
7890 * extent, so we need to process it a little bit to see if there
7891 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7892 * if this is a backref screwup. If we need to delete stuff
7893 * process_duplicates() will return 0, otherwise it will return
7896 if (process_duplicates(root, extent_cache, rec))
7898 ret = delete_duplicate_records(root, rec);
7902 * delete_duplicate_records will return the number of entries
7903 * deleted, so if it's greater than 0 then we know we actually
7904 * did something and we need to remove.
7918 cache = search_cache_extent(extent_cache, 0);
7921 rec = container_of(cache, struct extent_record, cache);
7922 if (rec->num_duplicates) {
7923 fprintf(stderr, "extent item %llu has multiple extent "
7924 "items\n", (unsigned long long)rec->start);
7929 if (rec->refs != rec->extent_item_refs) {
7930 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7931 (unsigned long long)rec->start,
7932 (unsigned long long)rec->nr);
7933 fprintf(stderr, "extent item %llu, found %llu\n",
7934 (unsigned long long)rec->extent_item_refs,
7935 (unsigned long long)rec->refs);
7936 ret = record_orphan_data_extents(root->fs_info, rec);
7943 * we can't use the extent to repair file
7944 * extent, let the fallback method handle it.
7946 if (!fixed && repair) {
7947 ret = fixup_extent_refs(
7958 if (all_backpointers_checked(rec, 1)) {
7959 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7960 (unsigned long long)rec->start,
7961 (unsigned long long)rec->nr);
7963 if (!fixed && !recorded && repair) {
7964 ret = fixup_extent_refs(root->fs_info,
7973 if (!rec->owner_ref_checked) {
7974 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7975 (unsigned long long)rec->start,
7976 (unsigned long long)rec->nr);
7977 if (!fixed && !recorded && repair) {
7978 ret = fixup_extent_refs(root->fs_info,
7987 if (rec->bad_full_backref) {
7988 fprintf(stderr, "bad full backref, on [%llu]\n",
7989 (unsigned long long)rec->start);
7991 ret = fixup_extent_flags(root->fs_info, rec);
8000 * Although it's not a extent ref's problem, we reuse this
8001 * routine for error reporting.
8002 * No repair function yet.
8004 if (rec->crossing_stripes) {
8006 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8007 rec->start, rec->start + rec->max_size);
8012 if (rec->wrong_chunk_type) {
8014 "bad extent [%llu, %llu), type mismatch with chunk\n",
8015 rec->start, rec->start + rec->max_size);
8020 remove_cache_extent(extent_cache, cache);
8021 free_all_extent_backrefs(rec);
8022 if (!init_extent_tree && repair && (!cur_err || fixed))
8023 clear_extent_dirty(root->fs_info->excluded_extents,
8025 rec->start + rec->max_size - 1,
8031 if (ret && ret != -EAGAIN) {
8032 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8035 struct btrfs_trans_handle *trans;
8037 root = root->fs_info->extent_root;
8038 trans = btrfs_start_transaction(root, 1);
8039 if (IS_ERR(trans)) {
8040 ret = PTR_ERR(trans);
8044 btrfs_fix_block_accounting(trans, root);
8045 ret = btrfs_commit_transaction(trans, root);
8050 fprintf(stderr, "repaired damaged extent references\n");
8056 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8060 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8061 stripe_size = length;
8062 stripe_size /= num_stripes;
8063 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8064 stripe_size = length * 2;
8065 stripe_size /= num_stripes;
8066 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8067 stripe_size = length;
8068 stripe_size /= (num_stripes - 1);
8069 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8070 stripe_size = length;
8071 stripe_size /= (num_stripes - 2);
8073 stripe_size = length;
8079 * Check the chunk with its block group/dev list ref:
8080 * Return 0 if all refs seems valid.
8081 * Return 1 if part of refs seems valid, need later check for rebuild ref
8082 * like missing block group and needs to search extent tree to rebuild them.
8083 * Return -1 if essential refs are missing and unable to rebuild.
8085 static int check_chunk_refs(struct chunk_record *chunk_rec,
8086 struct block_group_tree *block_group_cache,
8087 struct device_extent_tree *dev_extent_cache,
8090 struct cache_extent *block_group_item;
8091 struct block_group_record *block_group_rec;
8092 struct cache_extent *dev_extent_item;
8093 struct device_extent_record *dev_extent_rec;
8097 int metadump_v2 = 0;
8101 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8104 if (block_group_item) {
8105 block_group_rec = container_of(block_group_item,
8106 struct block_group_record,
8108 if (chunk_rec->length != block_group_rec->offset ||
8109 chunk_rec->offset != block_group_rec->objectid ||
8111 chunk_rec->type_flags != block_group_rec->flags)) {
8114 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8115 chunk_rec->objectid,
8120 chunk_rec->type_flags,
8121 block_group_rec->objectid,
8122 block_group_rec->type,
8123 block_group_rec->offset,
8124 block_group_rec->offset,
8125 block_group_rec->objectid,
8126 block_group_rec->flags);
8129 list_del_init(&block_group_rec->list);
8130 chunk_rec->bg_rec = block_group_rec;
8135 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8136 chunk_rec->objectid,
8141 chunk_rec->type_flags);
8148 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8149 chunk_rec->num_stripes);
8150 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8151 devid = chunk_rec->stripes[i].devid;
8152 offset = chunk_rec->stripes[i].offset;
8153 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8154 devid, offset, length);
8155 if (dev_extent_item) {
8156 dev_extent_rec = container_of(dev_extent_item,
8157 struct device_extent_record,
8159 if (dev_extent_rec->objectid != devid ||
8160 dev_extent_rec->offset != offset ||
8161 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8162 dev_extent_rec->length != length) {
8165 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8166 chunk_rec->objectid,
8169 chunk_rec->stripes[i].devid,
8170 chunk_rec->stripes[i].offset,
8171 dev_extent_rec->objectid,
8172 dev_extent_rec->offset,
8173 dev_extent_rec->length);
8176 list_move(&dev_extent_rec->chunk_list,
8177 &chunk_rec->dextents);
8182 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8183 chunk_rec->objectid,
8186 chunk_rec->stripes[i].devid,
8187 chunk_rec->stripes[i].offset);
8194 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8195 int check_chunks(struct cache_tree *chunk_cache,
8196 struct block_group_tree *block_group_cache,
8197 struct device_extent_tree *dev_extent_cache,
8198 struct list_head *good, struct list_head *bad,
8199 struct list_head *rebuild, int silent)
8201 struct cache_extent *chunk_item;
8202 struct chunk_record *chunk_rec;
8203 struct block_group_record *bg_rec;
8204 struct device_extent_record *dext_rec;
8208 chunk_item = first_cache_extent(chunk_cache);
8209 while (chunk_item) {
8210 chunk_rec = container_of(chunk_item, struct chunk_record,
8212 err = check_chunk_refs(chunk_rec, block_group_cache,
8213 dev_extent_cache, silent);
8216 if (err == 0 && good)
8217 list_add_tail(&chunk_rec->list, good);
8218 if (err > 0 && rebuild)
8219 list_add_tail(&chunk_rec->list, rebuild);
8221 list_add_tail(&chunk_rec->list, bad);
8222 chunk_item = next_cache_extent(chunk_item);
8225 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8228 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8236 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8240 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8251 static int check_device_used(struct device_record *dev_rec,
8252 struct device_extent_tree *dext_cache)
8254 struct cache_extent *cache;
8255 struct device_extent_record *dev_extent_rec;
8258 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8260 dev_extent_rec = container_of(cache,
8261 struct device_extent_record,
8263 if (dev_extent_rec->objectid != dev_rec->devid)
8266 list_del_init(&dev_extent_rec->device_list);
8267 total_byte += dev_extent_rec->length;
8268 cache = next_cache_extent(cache);
8271 if (total_byte != dev_rec->byte_used) {
8273 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8274 total_byte, dev_rec->byte_used, dev_rec->objectid,
8275 dev_rec->type, dev_rec->offset);
8282 /* check btrfs_dev_item -> btrfs_dev_extent */
8283 static int check_devices(struct rb_root *dev_cache,
8284 struct device_extent_tree *dev_extent_cache)
8286 struct rb_node *dev_node;
8287 struct device_record *dev_rec;
8288 struct device_extent_record *dext_rec;
8292 dev_node = rb_first(dev_cache);
8294 dev_rec = container_of(dev_node, struct device_record, node);
8295 err = check_device_used(dev_rec, dev_extent_cache);
8299 dev_node = rb_next(dev_node);
8301 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8304 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8305 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8312 static int add_root_item_to_list(struct list_head *head,
8313 u64 objectid, u64 bytenr, u64 last_snapshot,
8314 u8 level, u8 drop_level,
8315 int level_size, struct btrfs_key *drop_key)
8318 struct root_item_record *ri_rec;
8319 ri_rec = malloc(sizeof(*ri_rec));
8322 ri_rec->bytenr = bytenr;
8323 ri_rec->objectid = objectid;
8324 ri_rec->level = level;
8325 ri_rec->level_size = level_size;
8326 ri_rec->drop_level = drop_level;
8327 ri_rec->last_snapshot = last_snapshot;
8329 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8330 list_add_tail(&ri_rec->list, head);
8335 static void free_root_item_list(struct list_head *list)
8337 struct root_item_record *ri_rec;
8339 while (!list_empty(list)) {
8340 ri_rec = list_first_entry(list, struct root_item_record,
8342 list_del_init(&ri_rec->list);
8347 static int deal_root_from_list(struct list_head *list,
8348 struct btrfs_root *root,
8349 struct block_info *bits,
8351 struct cache_tree *pending,
8352 struct cache_tree *seen,
8353 struct cache_tree *reada,
8354 struct cache_tree *nodes,
8355 struct cache_tree *extent_cache,
8356 struct cache_tree *chunk_cache,
8357 struct rb_root *dev_cache,
8358 struct block_group_tree *block_group_cache,
8359 struct device_extent_tree *dev_extent_cache)
8364 while (!list_empty(list)) {
8365 struct root_item_record *rec;
8366 struct extent_buffer *buf;
8367 rec = list_entry(list->next,
8368 struct root_item_record, list);
8370 buf = read_tree_block(root->fs_info->tree_root,
8371 rec->bytenr, rec->level_size, 0);
8372 if (!extent_buffer_uptodate(buf)) {
8373 free_extent_buffer(buf);
8377 ret = add_root_to_pending(buf, extent_cache, pending,
8378 seen, nodes, rec->objectid);
8382 * To rebuild extent tree, we need deal with snapshot
8383 * one by one, otherwise we deal with node firstly which
8384 * can maximize readahead.
8387 ret = run_next_block(root, bits, bits_nr, &last,
8388 pending, seen, reada, nodes,
8389 extent_cache, chunk_cache,
8390 dev_cache, block_group_cache,
8391 dev_extent_cache, rec);
8395 free_extent_buffer(buf);
8396 list_del(&rec->list);
8402 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8403 reada, nodes, extent_cache, chunk_cache,
8404 dev_cache, block_group_cache,
8405 dev_extent_cache, NULL);
8415 static int check_chunks_and_extents(struct btrfs_root *root)
8417 struct rb_root dev_cache;
8418 struct cache_tree chunk_cache;
8419 struct block_group_tree block_group_cache;
8420 struct device_extent_tree dev_extent_cache;
8421 struct cache_tree extent_cache;
8422 struct cache_tree seen;
8423 struct cache_tree pending;
8424 struct cache_tree reada;
8425 struct cache_tree nodes;
8426 struct extent_io_tree excluded_extents;
8427 struct cache_tree corrupt_blocks;
8428 struct btrfs_path path;
8429 struct btrfs_key key;
8430 struct btrfs_key found_key;
8432 struct block_info *bits;
8434 struct extent_buffer *leaf;
8436 struct btrfs_root_item ri;
8437 struct list_head dropping_trees;
8438 struct list_head normal_trees;
8439 struct btrfs_root *root1;
8444 dev_cache = RB_ROOT;
8445 cache_tree_init(&chunk_cache);
8446 block_group_tree_init(&block_group_cache);
8447 device_extent_tree_init(&dev_extent_cache);
8449 cache_tree_init(&extent_cache);
8450 cache_tree_init(&seen);
8451 cache_tree_init(&pending);
8452 cache_tree_init(&nodes);
8453 cache_tree_init(&reada);
8454 cache_tree_init(&corrupt_blocks);
8455 extent_io_tree_init(&excluded_extents);
8456 INIT_LIST_HEAD(&dropping_trees);
8457 INIT_LIST_HEAD(&normal_trees);
8460 root->fs_info->excluded_extents = &excluded_extents;
8461 root->fs_info->fsck_extent_cache = &extent_cache;
8462 root->fs_info->free_extent_hook = free_extent_hook;
8463 root->fs_info->corrupt_blocks = &corrupt_blocks;
8467 bits = malloc(bits_nr * sizeof(struct block_info));
8473 if (ctx.progress_enabled) {
8474 ctx.tp = TASK_EXTENTS;
8475 task_start(ctx.info);
8479 root1 = root->fs_info->tree_root;
8480 level = btrfs_header_level(root1->node);
8481 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8482 root1->node->start, 0, level, 0,
8483 root1->nodesize, NULL);
8486 root1 = root->fs_info->chunk_root;
8487 level = btrfs_header_level(root1->node);
8488 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8489 root1->node->start, 0, level, 0,
8490 root1->nodesize, NULL);
8493 btrfs_init_path(&path);
8496 key.type = BTRFS_ROOT_ITEM_KEY;
8497 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8502 leaf = path.nodes[0];
8503 slot = path.slots[0];
8504 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8505 ret = btrfs_next_leaf(root, &path);
8508 leaf = path.nodes[0];
8509 slot = path.slots[0];
8511 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8512 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8513 unsigned long offset;
8516 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8517 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8518 last_snapshot = btrfs_root_last_snapshot(&ri);
8519 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8520 level = btrfs_root_level(&ri);
8521 level_size = root->nodesize;
8522 ret = add_root_item_to_list(&normal_trees,
8524 btrfs_root_bytenr(&ri),
8525 last_snapshot, level,
8526 0, level_size, NULL);
8530 level = btrfs_root_level(&ri);
8531 level_size = root->nodesize;
8532 objectid = found_key.objectid;
8533 btrfs_disk_key_to_cpu(&found_key,
8535 ret = add_root_item_to_list(&dropping_trees,
8537 btrfs_root_bytenr(&ri),
8538 last_snapshot, level,
8540 level_size, &found_key);
8547 btrfs_release_path(&path);
8550 * check_block can return -EAGAIN if it fixes something, please keep
8551 * this in mind when dealing with return values from these functions, if
8552 * we get -EAGAIN we want to fall through and restart the loop.
8554 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8555 &seen, &reada, &nodes, &extent_cache,
8556 &chunk_cache, &dev_cache, &block_group_cache,
8563 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8564 &pending, &seen, &reada, &nodes,
8565 &extent_cache, &chunk_cache, &dev_cache,
8566 &block_group_cache, &dev_extent_cache);
8573 ret = check_chunks(&chunk_cache, &block_group_cache,
8574 &dev_extent_cache, NULL, NULL, NULL, 0);
8581 ret = check_extent_refs(root, &extent_cache);
8588 ret = check_devices(&dev_cache, &dev_extent_cache);
8593 task_stop(ctx.info);
8595 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8596 extent_io_tree_cleanup(&excluded_extents);
8597 root->fs_info->fsck_extent_cache = NULL;
8598 root->fs_info->free_extent_hook = NULL;
8599 root->fs_info->corrupt_blocks = NULL;
8600 root->fs_info->excluded_extents = NULL;
8603 free_chunk_cache_tree(&chunk_cache);
8604 free_device_cache_tree(&dev_cache);
8605 free_block_group_tree(&block_group_cache);
8606 free_device_extent_tree(&dev_extent_cache);
8607 free_extent_cache_tree(&seen);
8608 free_extent_cache_tree(&pending);
8609 free_extent_cache_tree(&reada);
8610 free_extent_cache_tree(&nodes);
8613 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8614 free_extent_cache_tree(&seen);
8615 free_extent_cache_tree(&pending);
8616 free_extent_cache_tree(&reada);
8617 free_extent_cache_tree(&nodes);
8618 free_chunk_cache_tree(&chunk_cache);
8619 free_block_group_tree(&block_group_cache);
8620 free_device_cache_tree(&dev_cache);
8621 free_device_extent_tree(&dev_extent_cache);
8622 free_extent_record_cache(root->fs_info, &extent_cache);
8623 free_root_item_list(&normal_trees);
8624 free_root_item_list(&dropping_trees);
8625 extent_io_tree_cleanup(&excluded_extents);
8630 * Check backrefs of a tree block given by @bytenr or @eb.
8632 * @root: the root containing the @bytenr or @eb
8633 * @eb: tree block extent buffer, can be NULL
8634 * @bytenr: bytenr of the tree block to search
8635 * @level: tree level of the tree block
8636 * @owner: owner of the tree block
8638 * Return >0 for any error found and output error message
8639 * Return 0 for no error found
8641 static int check_tree_block_ref(struct btrfs_root *root,
8642 struct extent_buffer *eb, u64 bytenr,
8643 int level, u64 owner)
8645 struct btrfs_key key;
8646 struct btrfs_root *extent_root = root->fs_info->extent_root;
8647 struct btrfs_path path;
8648 struct btrfs_extent_item *ei;
8649 struct btrfs_extent_inline_ref *iref;
8650 struct extent_buffer *leaf;
8656 u32 nodesize = root->nodesize;
8663 btrfs_init_path(&path);
8664 key.objectid = bytenr;
8665 if (btrfs_fs_incompat(root->fs_info,
8666 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8667 key.type = BTRFS_METADATA_ITEM_KEY;
8669 key.type = BTRFS_EXTENT_ITEM_KEY;
8670 key.offset = (u64)-1;
8672 /* Search for the backref in extent tree */
8673 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8675 err |= BACKREF_MISSING;
8678 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8680 err |= BACKREF_MISSING;
8684 leaf = path.nodes[0];
8685 slot = path.slots[0];
8686 btrfs_item_key_to_cpu(leaf, &key, slot);
8688 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8690 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8691 skinny_level = (int)key.offset;
8692 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8694 struct btrfs_tree_block_info *info;
8696 info = (struct btrfs_tree_block_info *)(ei + 1);
8697 skinny_level = btrfs_tree_block_level(leaf, info);
8698 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8705 if (!(btrfs_extent_flags(leaf, ei) &
8706 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8708 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8709 key.objectid, nodesize,
8710 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8711 err = BACKREF_MISMATCH;
8713 header_gen = btrfs_header_generation(eb);
8714 extent_gen = btrfs_extent_generation(leaf, ei);
8715 if (header_gen != extent_gen) {
8717 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8718 key.objectid, nodesize, header_gen,
8720 err = BACKREF_MISMATCH;
8722 if (level != skinny_level) {
8724 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8725 key.objectid, nodesize, level, skinny_level);
8726 err = BACKREF_MISMATCH;
8728 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8730 "extent[%llu %u] is referred by other roots than %llu",
8731 key.objectid, nodesize, root->objectid);
8732 err = BACKREF_MISMATCH;
8737 * Iterate the extent/metadata item to find the exact backref
8739 item_size = btrfs_item_size_nr(leaf, slot);
8740 ptr = (unsigned long)iref;
8741 end = (unsigned long)ei + item_size;
8743 iref = (struct btrfs_extent_inline_ref *)ptr;
8744 type = btrfs_extent_inline_ref_type(leaf, iref);
8745 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8747 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8748 (offset == root->objectid || offset == owner)) {
8750 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8751 /* Check if the backref points to valid referencer */
8752 found_ref = !check_tree_block_ref(root, NULL, offset,
8758 ptr += btrfs_extent_inline_ref_size(type);
8762 * Inlined extent item doesn't have what we need, check
8763 * TREE_BLOCK_REF_KEY
8766 btrfs_release_path(&path);
8767 key.objectid = bytenr;
8768 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8769 key.offset = root->objectid;
8771 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8776 err |= BACKREF_MISSING;
8778 btrfs_release_path(&path);
8779 if (eb && (err & BACKREF_MISSING))
8780 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8781 bytenr, nodesize, owner, level);
8786 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8788 * Return >0 any error found and output error message
8789 * Return 0 for no error found
8791 static int check_extent_data_item(struct btrfs_root *root,
8792 struct extent_buffer *eb, int slot)
8794 struct btrfs_file_extent_item *fi;
8795 struct btrfs_path path;
8796 struct btrfs_root *extent_root = root->fs_info->extent_root;
8797 struct btrfs_key fi_key;
8798 struct btrfs_key dbref_key;
8799 struct extent_buffer *leaf;
8800 struct btrfs_extent_item *ei;
8801 struct btrfs_extent_inline_ref *iref;
8802 struct btrfs_extent_data_ref *dref;
8804 u64 file_extent_gen;
8807 u64 extent_num_bytes;
8815 int found_dbackref = 0;
8819 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8820 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8821 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8823 /* Nothing to check for hole and inline data extents */
8824 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8825 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8828 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8829 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8830 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8832 /* Check unaligned disk_num_bytes and num_bytes */
8833 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8835 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8836 fi_key.objectid, fi_key.offset, disk_num_bytes,
8838 err |= BYTES_UNALIGNED;
8840 data_bytes_allocated += disk_num_bytes;
8842 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8844 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8845 fi_key.objectid, fi_key.offset, extent_num_bytes,
8847 err |= BYTES_UNALIGNED;
8849 data_bytes_referenced += extent_num_bytes;
8851 owner = btrfs_header_owner(eb);
8853 /* Check the extent item of the file extent in extent tree */
8854 btrfs_init_path(&path);
8855 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8856 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8857 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8859 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8861 err |= BACKREF_MISSING;
8865 leaf = path.nodes[0];
8866 slot = path.slots[0];
8867 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8869 extent_flags = btrfs_extent_flags(leaf, ei);
8870 extent_gen = btrfs_extent_generation(leaf, ei);
8872 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8874 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8875 disk_bytenr, disk_num_bytes,
8876 BTRFS_EXTENT_FLAG_DATA);
8877 err |= BACKREF_MISMATCH;
8880 if (file_extent_gen < extent_gen) {
8882 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8883 disk_bytenr, disk_num_bytes, file_extent_gen,
8885 err |= BACKREF_MISMATCH;
8888 /* Check data backref inside that extent item */
8889 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8890 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8891 ptr = (unsigned long)iref;
8892 end = (unsigned long)ei + item_size;
8894 iref = (struct btrfs_extent_inline_ref *)ptr;
8895 type = btrfs_extent_inline_ref_type(leaf, iref);
8896 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8898 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8899 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8900 if (ref_root == owner || ref_root == root->objectid)
8902 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8903 found_dbackref = !check_tree_block_ref(root, NULL,
8904 btrfs_extent_inline_ref_offset(leaf, iref),
8910 ptr += btrfs_extent_inline_ref_size(type);
8913 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8914 if (!found_dbackref) {
8915 btrfs_release_path(&path);
8917 btrfs_init_path(&path);
8918 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8919 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8920 dbref_key.offset = hash_extent_data_ref(root->objectid,
8921 fi_key.objectid, fi_key.offset);
8923 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8924 &dbref_key, &path, 0, 0);
8929 if (!found_dbackref)
8930 err |= BACKREF_MISSING;
8932 btrfs_release_path(&path);
8933 if (err & BACKREF_MISSING) {
8934 error("data extent[%llu %llu] backref lost",
8935 disk_bytenr, disk_num_bytes);
8941 * Get real tree block level for the case like shared block
8942 * Return >= 0 as tree level
8943 * Return <0 for error
8945 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8947 struct extent_buffer *eb;
8948 struct btrfs_path path;
8949 struct btrfs_key key;
8950 struct btrfs_extent_item *ei;
8953 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8958 /* Search extent tree for extent generation and level */
8959 key.objectid = bytenr;
8960 key.type = BTRFS_METADATA_ITEM_KEY;
8961 key.offset = (u64)-1;
8963 btrfs_init_path(&path);
8964 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8967 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8975 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8976 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8977 struct btrfs_extent_item);
8978 flags = btrfs_extent_flags(path.nodes[0], ei);
8979 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8984 /* Get transid for later read_tree_block() check */
8985 transid = btrfs_extent_generation(path.nodes[0], ei);
8987 /* Get backref level as one source */
8988 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8989 backref_level = key.offset;
8991 struct btrfs_tree_block_info *info;
8993 info = (struct btrfs_tree_block_info *)(ei + 1);
8994 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8996 btrfs_release_path(&path);
8998 /* Get level from tree block as an alternative source */
8999 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9000 if (!extent_buffer_uptodate(eb)) {
9001 free_extent_buffer(eb);
9004 header_level = btrfs_header_level(eb);
9005 free_extent_buffer(eb);
9007 if (header_level != backref_level)
9009 return header_level;
9012 btrfs_release_path(&path);
9017 * Check if a tree block backref is valid (points to a valid tree block)
9018 * if level == -1, level will be resolved
9019 * Return >0 for any error found and print error message
9021 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9022 u64 bytenr, int level)
9024 struct btrfs_root *root;
9025 struct btrfs_key key;
9026 struct btrfs_path path;
9027 struct extent_buffer *eb;
9028 struct extent_buffer *node;
9029 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9033 /* Query level for level == -1 special case */
9035 level = query_tree_block_level(fs_info, bytenr);
9037 err |= REFERENCER_MISSING;
9041 key.objectid = root_id;
9042 key.type = BTRFS_ROOT_ITEM_KEY;
9043 key.offset = (u64)-1;
9045 root = btrfs_read_fs_root(fs_info, &key);
9047 err |= REFERENCER_MISSING;
9051 /* Read out the tree block to get item/node key */
9052 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9053 if (!extent_buffer_uptodate(eb)) {
9054 err |= REFERENCER_MISSING;
9055 free_extent_buffer(eb);
9059 /* Empty tree, no need to check key */
9060 if (!btrfs_header_nritems(eb) && !level) {
9061 free_extent_buffer(eb);
9066 btrfs_node_key_to_cpu(eb, &key, 0);
9068 btrfs_item_key_to_cpu(eb, &key, 0);
9070 free_extent_buffer(eb);
9072 btrfs_init_path(&path);
9073 path.lowest_level = level;
9074 /* Search with the first key, to ensure we can reach it */
9075 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9077 err |= REFERENCER_MISSING;
9081 node = path.nodes[level];
9082 if (btrfs_header_bytenr(node) != bytenr) {
9084 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9085 bytenr, nodesize, bytenr,
9086 btrfs_header_bytenr(node));
9087 err |= REFERENCER_MISMATCH;
9089 if (btrfs_header_level(node) != level) {
9091 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9092 bytenr, nodesize, level,
9093 btrfs_header_level(node));
9094 err |= REFERENCER_MISMATCH;
9098 btrfs_release_path(&path);
9100 if (err & REFERENCER_MISSING) {
9102 error("extent [%llu %d] lost referencer (owner: %llu)",
9103 bytenr, nodesize, root_id);
9106 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9107 bytenr, nodesize, root_id, level);
9114 * Check referencer for shared block backref
9115 * If level == -1, this function will resolve the level.
9117 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9118 u64 parent, u64 bytenr, int level)
9120 struct extent_buffer *eb;
9121 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9123 int found_parent = 0;
9126 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9127 if (!extent_buffer_uptodate(eb))
9131 level = query_tree_block_level(fs_info, bytenr);
9135 if (level + 1 != btrfs_header_level(eb))
9138 nr = btrfs_header_nritems(eb);
9139 for (i = 0; i < nr; i++) {
9140 if (bytenr == btrfs_node_blockptr(eb, i)) {
9146 free_extent_buffer(eb);
9147 if (!found_parent) {
9149 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9150 bytenr, nodesize, parent, level);
9151 return REFERENCER_MISSING;
9157 * Check referencer for normal (inlined) data ref
9158 * If len == 0, it will be resolved by searching in extent tree
9160 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9161 u64 root_id, u64 objectid, u64 offset,
9162 u64 bytenr, u64 len, u32 count)
9164 struct btrfs_root *root;
9165 struct btrfs_root *extent_root = fs_info->extent_root;
9166 struct btrfs_key key;
9167 struct btrfs_path path;
9168 struct extent_buffer *leaf;
9169 struct btrfs_file_extent_item *fi;
9170 u32 found_count = 0;
9175 key.objectid = bytenr;
9176 key.type = BTRFS_EXTENT_ITEM_KEY;
9177 key.offset = (u64)-1;
9179 btrfs_init_path(&path);
9180 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9183 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9186 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9187 if (key.objectid != bytenr ||
9188 key.type != BTRFS_EXTENT_ITEM_KEY)
9191 btrfs_release_path(&path);
9193 key.objectid = root_id;
9194 key.type = BTRFS_ROOT_ITEM_KEY;
9195 key.offset = (u64)-1;
9196 btrfs_init_path(&path);
9198 root = btrfs_read_fs_root(fs_info, &key);
9202 key.objectid = objectid;
9203 key.type = BTRFS_EXTENT_DATA_KEY;
9205 * It can be nasty as data backref offset is
9206 * file offset - file extent offset, which is smaller or
9207 * equal to original backref offset. The only special case is
9208 * overflow. So we need to special check and do further search.
9210 key.offset = offset & (1ULL << 63) ? 0 : offset;
9212 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9217 * Search afterwards to get correct one
9218 * NOTE: As we must do a comprehensive check on the data backref to
9219 * make sure the dref count also matches, we must iterate all file
9220 * extents for that inode.
9223 leaf = path.nodes[0];
9224 slot = path.slots[0];
9226 btrfs_item_key_to_cpu(leaf, &key, slot);
9227 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9229 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9231 * Except normal disk bytenr and disk num bytes, we still
9232 * need to do extra check on dbackref offset as
9233 * dbackref offset = file_offset - file_extent_offset
9235 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9236 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9237 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9241 ret = btrfs_next_item(root, &path);
9246 btrfs_release_path(&path);
9247 if (found_count != count) {
9249 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9250 bytenr, len, root_id, objectid, offset, count, found_count);
9251 return REFERENCER_MISSING;
9257 * Check if the referencer of a shared data backref exists
9259 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9260 u64 parent, u64 bytenr)
9262 struct extent_buffer *eb;
9263 struct btrfs_key key;
9264 struct btrfs_file_extent_item *fi;
9265 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9267 int found_parent = 0;
9270 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9271 if (!extent_buffer_uptodate(eb))
9274 nr = btrfs_header_nritems(eb);
9275 for (i = 0; i < nr; i++) {
9276 btrfs_item_key_to_cpu(eb, &key, i);
9277 if (key.type != BTRFS_EXTENT_DATA_KEY)
9280 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9281 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9284 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9291 free_extent_buffer(eb);
9292 if (!found_parent) {
9293 error("shared extent %llu referencer lost (parent: %llu)",
9295 return REFERENCER_MISSING;
9301 * This function will check a given extent item, including its backref and
9302 * itself (like crossing stripe boundary and type)
9304 * Since we don't use extent_record anymore, introduce new error bit
9306 static int check_extent_item(struct btrfs_fs_info *fs_info,
9307 struct extent_buffer *eb, int slot)
9309 struct btrfs_extent_item *ei;
9310 struct btrfs_extent_inline_ref *iref;
9311 struct btrfs_extent_data_ref *dref;
9315 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9316 u32 item_size = btrfs_item_size_nr(eb, slot);
9321 struct btrfs_key key;
9325 btrfs_item_key_to_cpu(eb, &key, slot);
9326 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9327 bytes_used += key.offset;
9329 bytes_used += nodesize;
9331 if (item_size < sizeof(*ei)) {
9333 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9334 * old thing when on disk format is still un-determined.
9335 * No need to care about it anymore
9337 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9341 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9342 flags = btrfs_extent_flags(eb, ei);
9344 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9346 if (metadata && check_crossing_stripes(global_info, key.objectid,
9348 error("bad metadata [%llu, %llu) crossing stripe boundary",
9349 key.objectid, key.objectid + nodesize);
9350 err |= CROSSING_STRIPE_BOUNDARY;
9353 ptr = (unsigned long)(ei + 1);
9355 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9356 /* Old EXTENT_ITEM metadata */
9357 struct btrfs_tree_block_info *info;
9359 info = (struct btrfs_tree_block_info *)ptr;
9360 level = btrfs_tree_block_level(eb, info);
9361 ptr += sizeof(struct btrfs_tree_block_info);
9363 /* New METADATA_ITEM */
9366 end = (unsigned long)ei + item_size;
9369 err |= ITEM_SIZE_MISMATCH;
9373 /* Now check every backref in this extent item */
9375 iref = (struct btrfs_extent_inline_ref *)ptr;
9376 type = btrfs_extent_inline_ref_type(eb, iref);
9377 offset = btrfs_extent_inline_ref_offset(eb, iref);
9379 case BTRFS_TREE_BLOCK_REF_KEY:
9380 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9384 case BTRFS_SHARED_BLOCK_REF_KEY:
9385 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9389 case BTRFS_EXTENT_DATA_REF_KEY:
9390 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9391 ret = check_extent_data_backref(fs_info,
9392 btrfs_extent_data_ref_root(eb, dref),
9393 btrfs_extent_data_ref_objectid(eb, dref),
9394 btrfs_extent_data_ref_offset(eb, dref),
9395 key.objectid, key.offset,
9396 btrfs_extent_data_ref_count(eb, dref));
9399 case BTRFS_SHARED_DATA_REF_KEY:
9400 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9404 error("extent[%llu %d %llu] has unknown ref type: %d",
9405 key.objectid, key.type, key.offset, type);
9406 err |= UNKNOWN_TYPE;
9410 ptr += btrfs_extent_inline_ref_size(type);
9419 * Check if a dev extent item is referred correctly by its chunk
9421 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9422 struct extent_buffer *eb, int slot)
9424 struct btrfs_root *chunk_root = fs_info->chunk_root;
9425 struct btrfs_dev_extent *ptr;
9426 struct btrfs_path path;
9427 struct btrfs_key chunk_key;
9428 struct btrfs_key devext_key;
9429 struct btrfs_chunk *chunk;
9430 struct extent_buffer *l;
9434 int found_chunk = 0;
9437 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9438 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9439 length = btrfs_dev_extent_length(eb, ptr);
9441 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9442 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9443 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9445 btrfs_init_path(&path);
9446 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9451 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9452 if (btrfs_chunk_length(l, chunk) != length)
9455 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9456 for (i = 0; i < num_stripes; i++) {
9457 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9458 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9460 if (devid == devext_key.objectid &&
9461 offset == devext_key.offset) {
9467 btrfs_release_path(&path);
9470 "device extent[%llu, %llu, %llu] did not find the related chunk",
9471 devext_key.objectid, devext_key.offset, length);
9472 return REFERENCER_MISSING;
9478 * Check if the used space is correct with the dev item
9480 static int check_dev_item(struct btrfs_fs_info *fs_info,
9481 struct extent_buffer *eb, int slot)
9483 struct btrfs_root *dev_root = fs_info->dev_root;
9484 struct btrfs_dev_item *dev_item;
9485 struct btrfs_path path;
9486 struct btrfs_key key;
9487 struct btrfs_dev_extent *ptr;
9493 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9494 dev_id = btrfs_device_id(eb, dev_item);
9495 used = btrfs_device_bytes_used(eb, dev_item);
9497 key.objectid = dev_id;
9498 key.type = BTRFS_DEV_EXTENT_KEY;
9501 btrfs_init_path(&path);
9502 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9504 btrfs_item_key_to_cpu(eb, &key, slot);
9505 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9506 key.objectid, key.type, key.offset);
9507 btrfs_release_path(&path);
9508 return REFERENCER_MISSING;
9511 /* Iterate dev_extents to calculate the used space of a device */
9513 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9515 if (key.objectid > dev_id)
9517 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9520 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9521 struct btrfs_dev_extent);
9522 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9524 ret = btrfs_next_item(dev_root, &path);
9528 btrfs_release_path(&path);
9530 if (used != total) {
9531 btrfs_item_key_to_cpu(eb, &key, slot);
9533 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9534 total, used, BTRFS_ROOT_TREE_OBJECTID,
9535 BTRFS_DEV_EXTENT_KEY, dev_id);
9536 return ACCOUNTING_MISMATCH;
9542 * Check a block group item with its referener (chunk) and its used space
9543 * with extent/metadata item
9545 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9546 struct extent_buffer *eb, int slot)
9548 struct btrfs_root *extent_root = fs_info->extent_root;
9549 struct btrfs_root *chunk_root = fs_info->chunk_root;
9550 struct btrfs_block_group_item *bi;
9551 struct btrfs_block_group_item bg_item;
9552 struct btrfs_path path;
9553 struct btrfs_key bg_key;
9554 struct btrfs_key chunk_key;
9555 struct btrfs_key extent_key;
9556 struct btrfs_chunk *chunk;
9557 struct extent_buffer *leaf;
9558 struct btrfs_extent_item *ei;
9559 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9567 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9568 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9569 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9570 used = btrfs_block_group_used(&bg_item);
9571 bg_flags = btrfs_block_group_flags(&bg_item);
9573 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9574 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9575 chunk_key.offset = bg_key.objectid;
9577 btrfs_init_path(&path);
9578 /* Search for the referencer chunk */
9579 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9582 "block group[%llu %llu] did not find the related chunk item",
9583 bg_key.objectid, bg_key.offset);
9584 err |= REFERENCER_MISSING;
9586 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9587 struct btrfs_chunk);
9588 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9591 "block group[%llu %llu] related chunk item length does not match",
9592 bg_key.objectid, bg_key.offset);
9593 err |= REFERENCER_MISMATCH;
9596 btrfs_release_path(&path);
9598 /* Search from the block group bytenr */
9599 extent_key.objectid = bg_key.objectid;
9600 extent_key.type = 0;
9601 extent_key.offset = 0;
9603 btrfs_init_path(&path);
9604 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9608 /* Iterate extent tree to account used space */
9610 leaf = path.nodes[0];
9611 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9612 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9615 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9616 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9618 if (extent_key.objectid < bg_key.objectid)
9621 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9624 total += extent_key.offset;
9626 ei = btrfs_item_ptr(leaf, path.slots[0],
9627 struct btrfs_extent_item);
9628 flags = btrfs_extent_flags(leaf, ei);
9629 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9630 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9632 "bad extent[%llu, %llu) type mismatch with chunk",
9633 extent_key.objectid,
9634 extent_key.objectid + extent_key.offset);
9635 err |= CHUNK_TYPE_MISMATCH;
9637 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9638 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9639 BTRFS_BLOCK_GROUP_METADATA))) {
9641 "bad extent[%llu, %llu) type mismatch with chunk",
9642 extent_key.objectid,
9643 extent_key.objectid + nodesize);
9644 err |= CHUNK_TYPE_MISMATCH;
9648 ret = btrfs_next_item(extent_root, &path);
9654 btrfs_release_path(&path);
9656 if (total != used) {
9658 "block group[%llu %llu] used %llu but extent items used %llu",
9659 bg_key.objectid, bg_key.offset, used, total);
9660 err |= ACCOUNTING_MISMATCH;
9666 * Check a chunk item.
9667 * Including checking all referred dev_extents and block group
9669 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9670 struct extent_buffer *eb, int slot)
9672 struct btrfs_root *extent_root = fs_info->extent_root;
9673 struct btrfs_root *dev_root = fs_info->dev_root;
9674 struct btrfs_path path;
9675 struct btrfs_key chunk_key;
9676 struct btrfs_key bg_key;
9677 struct btrfs_key devext_key;
9678 struct btrfs_chunk *chunk;
9679 struct extent_buffer *leaf;
9680 struct btrfs_block_group_item *bi;
9681 struct btrfs_block_group_item bg_item;
9682 struct btrfs_dev_extent *ptr;
9683 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9695 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9696 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9697 length = btrfs_chunk_length(eb, chunk);
9698 chunk_end = chunk_key.offset + length;
9699 if (!IS_ALIGNED(length, sectorsize)) {
9700 error("chunk[%llu %llu) not aligned to %u",
9701 chunk_key.offset, chunk_end, sectorsize);
9702 err |= BYTES_UNALIGNED;
9706 type = btrfs_chunk_type(eb, chunk);
9707 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9708 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9709 error("chunk[%llu %llu) has no chunk type",
9710 chunk_key.offset, chunk_end);
9711 err |= UNKNOWN_TYPE;
9713 if (profile && (profile & (profile - 1))) {
9714 error("chunk[%llu %llu) multiple profiles detected: %llx",
9715 chunk_key.offset, chunk_end, profile);
9716 err |= UNKNOWN_TYPE;
9719 bg_key.objectid = chunk_key.offset;
9720 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9721 bg_key.offset = length;
9723 btrfs_init_path(&path);
9724 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9727 "chunk[%llu %llu) did not find the related block group item",
9728 chunk_key.offset, chunk_end);
9729 err |= REFERENCER_MISSING;
9731 leaf = path.nodes[0];
9732 bi = btrfs_item_ptr(leaf, path.slots[0],
9733 struct btrfs_block_group_item);
9734 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9736 if (btrfs_block_group_flags(&bg_item) != type) {
9738 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9739 chunk_key.offset, chunk_end, type,
9740 btrfs_block_group_flags(&bg_item));
9741 err |= REFERENCER_MISSING;
9745 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9746 for (i = 0; i < num_stripes; i++) {
9747 btrfs_release_path(&path);
9748 btrfs_init_path(&path);
9749 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9750 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9751 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9753 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9758 leaf = path.nodes[0];
9759 ptr = btrfs_item_ptr(leaf, path.slots[0],
9760 struct btrfs_dev_extent);
9761 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9762 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9763 if (objectid != chunk_key.objectid ||
9764 offset != chunk_key.offset ||
9765 btrfs_dev_extent_length(leaf, ptr) != length)
9769 err |= BACKREF_MISSING;
9771 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9772 chunk_key.objectid, chunk_end, i);
9775 btrfs_release_path(&path);
9781 * Main entry function to check known items and update related accounting info
9783 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9785 struct btrfs_fs_info *fs_info = root->fs_info;
9786 struct btrfs_key key;
9789 struct btrfs_extent_data_ref *dref;
9794 btrfs_item_key_to_cpu(eb, &key, slot);
9798 case BTRFS_EXTENT_DATA_KEY:
9799 ret = check_extent_data_item(root, eb, slot);
9802 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9803 ret = check_block_group_item(fs_info, eb, slot);
9806 case BTRFS_DEV_ITEM_KEY:
9807 ret = check_dev_item(fs_info, eb, slot);
9810 case BTRFS_CHUNK_ITEM_KEY:
9811 ret = check_chunk_item(fs_info, eb, slot);
9814 case BTRFS_DEV_EXTENT_KEY:
9815 ret = check_dev_extent_item(fs_info, eb, slot);
9818 case BTRFS_EXTENT_ITEM_KEY:
9819 case BTRFS_METADATA_ITEM_KEY:
9820 ret = check_extent_item(fs_info, eb, slot);
9823 case BTRFS_EXTENT_CSUM_KEY:
9824 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9826 case BTRFS_TREE_BLOCK_REF_KEY:
9827 ret = check_tree_block_backref(fs_info, key.offset,
9831 case BTRFS_EXTENT_DATA_REF_KEY:
9832 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9833 ret = check_extent_data_backref(fs_info,
9834 btrfs_extent_data_ref_root(eb, dref),
9835 btrfs_extent_data_ref_objectid(eb, dref),
9836 btrfs_extent_data_ref_offset(eb, dref),
9838 btrfs_extent_data_ref_count(eb, dref));
9841 case BTRFS_SHARED_BLOCK_REF_KEY:
9842 ret = check_shared_block_backref(fs_info, key.offset,
9846 case BTRFS_SHARED_DATA_REF_KEY:
9847 ret = check_shared_data_backref(fs_info, key.offset,
9855 if (++slot < btrfs_header_nritems(eb))
9862 * Helper function for later fs/subvol tree check. To determine if a tree
9863 * block should be checked.
9864 * This function will ensure only the direct referencer with lowest rootid to
9865 * check a fs/subvolume tree block.
9867 * Backref check at extent tree would detect errors like missing subvolume
9868 * tree, so we can do aggressive check to reduce duplicated checks.
9870 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9872 struct btrfs_root *extent_root = root->fs_info->extent_root;
9873 struct btrfs_key key;
9874 struct btrfs_path path;
9875 struct extent_buffer *leaf;
9877 struct btrfs_extent_item *ei;
9883 struct btrfs_extent_inline_ref *iref;
9886 btrfs_init_path(&path);
9887 key.objectid = btrfs_header_bytenr(eb);
9888 key.type = BTRFS_METADATA_ITEM_KEY;
9889 key.offset = (u64)-1;
9892 * Any failure in backref resolving means we can't determine
9893 * whom the tree block belongs to.
9894 * So in that case, we need to check that tree block
9896 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9900 ret = btrfs_previous_extent_item(extent_root, &path,
9901 btrfs_header_bytenr(eb));
9905 leaf = path.nodes[0];
9906 slot = path.slots[0];
9907 btrfs_item_key_to_cpu(leaf, &key, slot);
9908 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9910 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9911 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9913 struct btrfs_tree_block_info *info;
9915 info = (struct btrfs_tree_block_info *)(ei + 1);
9916 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9919 item_size = btrfs_item_size_nr(leaf, slot);
9920 ptr = (unsigned long)iref;
9921 end = (unsigned long)ei + item_size;
9923 iref = (struct btrfs_extent_inline_ref *)ptr;
9924 type = btrfs_extent_inline_ref_type(leaf, iref);
9925 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9928 * We only check the tree block if current root is
9929 * the lowest referencer of it.
9931 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9932 offset < root->objectid) {
9933 btrfs_release_path(&path);
9937 ptr += btrfs_extent_inline_ref_size(type);
9940 * Normally we should also check keyed tree block ref, but that may be
9941 * very time consuming. Inlined ref should already make us skip a lot
9942 * of refs now. So skip search keyed tree block ref.
9946 btrfs_release_path(&path);
9951 * Traversal function for tree block. We will do:
9952 * 1) Skip shared fs/subvolume tree blocks
9953 * 2) Update related bytes accounting
9954 * 3) Pre-order traversal
9956 static int traverse_tree_block(struct btrfs_root *root,
9957 struct extent_buffer *node)
9959 struct extent_buffer *eb;
9960 struct btrfs_key key;
9961 struct btrfs_key drop_key;
9969 * Skip shared fs/subvolume tree block, in that case they will
9970 * be checked by referencer with lowest rootid
9972 if (is_fstree(root->objectid) && !should_check(root, node))
9975 /* Update bytes accounting */
9976 total_btree_bytes += node->len;
9977 if (fs_root_objectid(btrfs_header_owner(node)))
9978 total_fs_tree_bytes += node->len;
9979 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9980 total_extent_tree_bytes += node->len;
9981 if (!found_old_backref &&
9982 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9983 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9984 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9985 found_old_backref = 1;
9987 /* pre-order tranversal, check itself first */
9988 level = btrfs_header_level(node);
9989 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9990 btrfs_header_level(node),
9991 btrfs_header_owner(node));
9995 "check %s failed root %llu bytenr %llu level %d, force continue check",
9996 level ? "node":"leaf", root->objectid,
9997 btrfs_header_bytenr(node), btrfs_header_level(node));
10000 btree_space_waste += btrfs_leaf_free_space(root, node);
10001 ret = check_leaf_items(root, node);
10006 nr = btrfs_header_nritems(node);
10007 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10008 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10009 sizeof(struct btrfs_key_ptr);
10011 /* Then check all its children */
10012 for (i = 0; i < nr; i++) {
10013 u64 blocknr = btrfs_node_blockptr(node, i);
10015 btrfs_node_key_to_cpu(node, &key, i);
10016 if (level == root->root_item.drop_level &&
10017 is_dropped_key(&key, &drop_key))
10021 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10022 * to call the function itself.
10024 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10025 if (extent_buffer_uptodate(eb)) {
10026 ret = traverse_tree_block(root, eb);
10029 free_extent_buffer(eb);
10036 * Low memory usage version check_chunks_and_extents.
10038 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10040 struct btrfs_path path;
10041 struct btrfs_key key;
10042 struct btrfs_root *root1;
10043 struct btrfs_root *cur_root;
10047 root1 = root->fs_info->chunk_root;
10048 ret = traverse_tree_block(root1, root1->node);
10051 root1 = root->fs_info->tree_root;
10052 ret = traverse_tree_block(root1, root1->node);
10055 btrfs_init_path(&path);
10056 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10058 key.type = BTRFS_ROOT_ITEM_KEY;
10060 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10062 error("cannot find extent treet in tree_root");
10067 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10068 if (key.type != BTRFS_ROOT_ITEM_KEY)
10070 key.offset = (u64)-1;
10072 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10073 if (IS_ERR(cur_root) || !cur_root) {
10074 error("failed to read tree: %lld", key.objectid);
10078 ret = traverse_tree_block(cur_root, cur_root->node);
10082 ret = btrfs_next_item(root1, &path);
10088 btrfs_release_path(&path);
10092 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10093 struct btrfs_root *root, int overwrite)
10095 struct extent_buffer *c;
10096 struct extent_buffer *old = root->node;
10099 struct btrfs_disk_key disk_key = {0,0,0};
10105 extent_buffer_get(c);
10108 c = btrfs_alloc_free_block(trans, root,
10110 root->root_key.objectid,
10111 &disk_key, level, 0, 0);
10114 extent_buffer_get(c);
10118 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10119 btrfs_set_header_level(c, level);
10120 btrfs_set_header_bytenr(c, c->start);
10121 btrfs_set_header_generation(c, trans->transid);
10122 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10123 btrfs_set_header_owner(c, root->root_key.objectid);
10125 write_extent_buffer(c, root->fs_info->fsid,
10126 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10128 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10129 btrfs_header_chunk_tree_uuid(c),
10132 btrfs_mark_buffer_dirty(c);
10134 * this case can happen in the following case:
10136 * 1.overwrite previous root.
10138 * 2.reinit reloc data root, this is because we skip pin
10139 * down reloc data tree before which means we can allocate
10140 * same block bytenr here.
10142 if (old->start == c->start) {
10143 btrfs_set_root_generation(&root->root_item,
10145 root->root_item.level = btrfs_header_level(root->node);
10146 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10147 &root->root_key, &root->root_item);
10149 free_extent_buffer(c);
10153 free_extent_buffer(old);
10155 add_root_to_dirty_list(root);
10159 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10160 struct extent_buffer *eb, int tree_root)
10162 struct extent_buffer *tmp;
10163 struct btrfs_root_item *ri;
10164 struct btrfs_key key;
10167 int level = btrfs_header_level(eb);
10173 * If we have pinned this block before, don't pin it again.
10174 * This can not only avoid forever loop with broken filesystem
10175 * but also give us some speedups.
10177 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10178 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10181 btrfs_pin_extent(fs_info, eb->start, eb->len);
10183 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10184 nritems = btrfs_header_nritems(eb);
10185 for (i = 0; i < nritems; i++) {
10187 btrfs_item_key_to_cpu(eb, &key, i);
10188 if (key.type != BTRFS_ROOT_ITEM_KEY)
10190 /* Skip the extent root and reloc roots */
10191 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10192 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10193 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10195 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10196 bytenr = btrfs_disk_root_bytenr(eb, ri);
10199 * If at any point we start needing the real root we
10200 * will have to build a stump root for the root we are
10201 * in, but for now this doesn't actually use the root so
10202 * just pass in extent_root.
10204 tmp = read_tree_block(fs_info->extent_root, bytenr,
10206 if (!extent_buffer_uptodate(tmp)) {
10207 fprintf(stderr, "Error reading root block\n");
10210 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10211 free_extent_buffer(tmp);
10215 bytenr = btrfs_node_blockptr(eb, i);
10217 /* If we aren't the tree root don't read the block */
10218 if (level == 1 && !tree_root) {
10219 btrfs_pin_extent(fs_info, bytenr, nodesize);
10223 tmp = read_tree_block(fs_info->extent_root, bytenr,
10225 if (!extent_buffer_uptodate(tmp)) {
10226 fprintf(stderr, "Error reading tree block\n");
10229 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10230 free_extent_buffer(tmp);
10239 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10243 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10247 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10250 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10252 struct btrfs_block_group_cache *cache;
10253 struct btrfs_path *path;
10254 struct extent_buffer *leaf;
10255 struct btrfs_chunk *chunk;
10256 struct btrfs_key key;
10260 path = btrfs_alloc_path();
10265 key.type = BTRFS_CHUNK_ITEM_KEY;
10268 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10270 btrfs_free_path(path);
10275 * We do this in case the block groups were screwed up and had alloc
10276 * bits that aren't actually set on the chunks. This happens with
10277 * restored images every time and could happen in real life I guess.
10279 fs_info->avail_data_alloc_bits = 0;
10280 fs_info->avail_metadata_alloc_bits = 0;
10281 fs_info->avail_system_alloc_bits = 0;
10283 /* First we need to create the in-memory block groups */
10285 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10286 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10288 btrfs_free_path(path);
10296 leaf = path->nodes[0];
10297 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10298 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10303 chunk = btrfs_item_ptr(leaf, path->slots[0],
10304 struct btrfs_chunk);
10305 btrfs_add_block_group(fs_info, 0,
10306 btrfs_chunk_type(leaf, chunk),
10307 key.objectid, key.offset,
10308 btrfs_chunk_length(leaf, chunk));
10309 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10310 key.offset + btrfs_chunk_length(leaf, chunk),
10316 cache = btrfs_lookup_first_block_group(fs_info, start);
10320 start = cache->key.objectid + cache->key.offset;
10323 btrfs_free_path(path);
10327 static int reset_balance(struct btrfs_trans_handle *trans,
10328 struct btrfs_fs_info *fs_info)
10330 struct btrfs_root *root = fs_info->tree_root;
10331 struct btrfs_path *path;
10332 struct extent_buffer *leaf;
10333 struct btrfs_key key;
10334 int del_slot, del_nr = 0;
10338 path = btrfs_alloc_path();
10342 key.objectid = BTRFS_BALANCE_OBJECTID;
10343 key.type = BTRFS_BALANCE_ITEM_KEY;
10346 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10351 goto reinit_data_reloc;
10356 ret = btrfs_del_item(trans, root, path);
10359 btrfs_release_path(path);
10361 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10362 key.type = BTRFS_ROOT_ITEM_KEY;
10365 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10369 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10374 ret = btrfs_del_items(trans, root, path,
10381 btrfs_release_path(path);
10384 ret = btrfs_search_slot(trans, root, &key, path,
10391 leaf = path->nodes[0];
10392 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10393 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10395 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10400 del_slot = path->slots[0];
10409 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10413 btrfs_release_path(path);
10416 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10417 key.type = BTRFS_ROOT_ITEM_KEY;
10418 key.offset = (u64)-1;
10419 root = btrfs_read_fs_root(fs_info, &key);
10420 if (IS_ERR(root)) {
10421 fprintf(stderr, "Error reading data reloc tree\n");
10422 ret = PTR_ERR(root);
10425 record_root_in_trans(trans, root);
10426 ret = btrfs_fsck_reinit_root(trans, root, 0);
10429 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10431 btrfs_free_path(path);
10435 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10436 struct btrfs_fs_info *fs_info)
10442 * The only reason we don't do this is because right now we're just
10443 * walking the trees we find and pinning down their bytes, we don't look
10444 * at any of the leaves. In order to do mixed groups we'd have to check
10445 * the leaves of any fs roots and pin down the bytes for any file
10446 * extents we find. Not hard but why do it if we don't have to?
10448 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10449 fprintf(stderr, "We don't support re-initing the extent tree "
10450 "for mixed block groups yet, please notify a btrfs "
10451 "developer you want to do this so they can add this "
10452 "functionality.\n");
10457 * first we need to walk all of the trees except the extent tree and pin
10458 * down the bytes that are in use so we don't overwrite any existing
10461 ret = pin_metadata_blocks(fs_info);
10463 fprintf(stderr, "error pinning down used bytes\n");
10468 * Need to drop all the block groups since we're going to recreate all
10471 btrfs_free_block_groups(fs_info);
10472 ret = reset_block_groups(fs_info);
10474 fprintf(stderr, "error resetting the block groups\n");
10478 /* Ok we can allocate now, reinit the extent root */
10479 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10481 fprintf(stderr, "extent root initialization failed\n");
10483 * When the transaction code is updated we should end the
10484 * transaction, but for now progs only knows about commit so
10485 * just return an error.
10491 * Now we have all the in-memory block groups setup so we can make
10492 * allocations properly, and the metadata we care about is safe since we
10493 * pinned all of it above.
10496 struct btrfs_block_group_cache *cache;
10498 cache = btrfs_lookup_first_block_group(fs_info, start);
10501 start = cache->key.objectid + cache->key.offset;
10502 ret = btrfs_insert_item(trans, fs_info->extent_root,
10503 &cache->key, &cache->item,
10504 sizeof(cache->item));
10506 fprintf(stderr, "Error adding block group\n");
10509 btrfs_extent_post_op(trans, fs_info->extent_root);
10512 ret = reset_balance(trans, fs_info);
10514 fprintf(stderr, "error resetting the pending balance\n");
10519 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10521 struct btrfs_path *path;
10522 struct btrfs_trans_handle *trans;
10523 struct btrfs_key key;
10526 printf("Recowing metadata block %llu\n", eb->start);
10527 key.objectid = btrfs_header_owner(eb);
10528 key.type = BTRFS_ROOT_ITEM_KEY;
10529 key.offset = (u64)-1;
10531 root = btrfs_read_fs_root(root->fs_info, &key);
10532 if (IS_ERR(root)) {
10533 fprintf(stderr, "Couldn't find owner root %llu\n",
10535 return PTR_ERR(root);
10538 path = btrfs_alloc_path();
10542 trans = btrfs_start_transaction(root, 1);
10543 if (IS_ERR(trans)) {
10544 btrfs_free_path(path);
10545 return PTR_ERR(trans);
10548 path->lowest_level = btrfs_header_level(eb);
10549 if (path->lowest_level)
10550 btrfs_node_key_to_cpu(eb, &key, 0);
10552 btrfs_item_key_to_cpu(eb, &key, 0);
10554 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10555 btrfs_commit_transaction(trans, root);
10556 btrfs_free_path(path);
10560 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10562 struct btrfs_path *path;
10563 struct btrfs_trans_handle *trans;
10564 struct btrfs_key key;
10567 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10568 bad->key.type, bad->key.offset);
10569 key.objectid = bad->root_id;
10570 key.type = BTRFS_ROOT_ITEM_KEY;
10571 key.offset = (u64)-1;
10573 root = btrfs_read_fs_root(root->fs_info, &key);
10574 if (IS_ERR(root)) {
10575 fprintf(stderr, "Couldn't find owner root %llu\n",
10577 return PTR_ERR(root);
10580 path = btrfs_alloc_path();
10584 trans = btrfs_start_transaction(root, 1);
10585 if (IS_ERR(trans)) {
10586 btrfs_free_path(path);
10587 return PTR_ERR(trans);
10590 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10596 ret = btrfs_del_item(trans, root, path);
10598 btrfs_commit_transaction(trans, root);
10599 btrfs_free_path(path);
10603 static int zero_log_tree(struct btrfs_root *root)
10605 struct btrfs_trans_handle *trans;
10608 trans = btrfs_start_transaction(root, 1);
10609 if (IS_ERR(trans)) {
10610 ret = PTR_ERR(trans);
10613 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10614 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10615 ret = btrfs_commit_transaction(trans, root);
10619 static int populate_csum(struct btrfs_trans_handle *trans,
10620 struct btrfs_root *csum_root, char *buf, u64 start,
10627 while (offset < len) {
10628 sectorsize = csum_root->sectorsize;
10629 ret = read_extent_data(csum_root, buf, start + offset,
10633 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10634 start + offset, buf, sectorsize);
10637 offset += sectorsize;
10642 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10643 struct btrfs_root *csum_root,
10644 struct btrfs_root *cur_root)
10646 struct btrfs_path *path;
10647 struct btrfs_key key;
10648 struct extent_buffer *node;
10649 struct btrfs_file_extent_item *fi;
10656 path = btrfs_alloc_path();
10659 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10669 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10672 /* Iterate all regular file extents and fill its csum */
10674 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10676 if (key.type != BTRFS_EXTENT_DATA_KEY)
10678 node = path->nodes[0];
10679 slot = path->slots[0];
10680 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10681 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10683 start = btrfs_file_extent_disk_bytenr(node, fi);
10684 len = btrfs_file_extent_disk_num_bytes(node, fi);
10686 ret = populate_csum(trans, csum_root, buf, start, len);
10687 if (ret == -EEXIST)
10693 * TODO: if next leaf is corrupted, jump to nearest next valid
10696 ret = btrfs_next_item(cur_root, path);
10706 btrfs_free_path(path);
10711 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10712 struct btrfs_root *csum_root)
10714 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10715 struct btrfs_path *path;
10716 struct btrfs_root *tree_root = fs_info->tree_root;
10717 struct btrfs_root *cur_root;
10718 struct extent_buffer *node;
10719 struct btrfs_key key;
10723 path = btrfs_alloc_path();
10727 key.objectid = BTRFS_FS_TREE_OBJECTID;
10729 key.type = BTRFS_ROOT_ITEM_KEY;
10731 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10740 node = path->nodes[0];
10741 slot = path->slots[0];
10742 btrfs_item_key_to_cpu(node, &key, slot);
10743 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10745 if (key.type != BTRFS_ROOT_ITEM_KEY)
10747 if (!is_fstree(key.objectid))
10749 key.offset = (u64)-1;
10751 cur_root = btrfs_read_fs_root(fs_info, &key);
10752 if (IS_ERR(cur_root) || !cur_root) {
10753 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10757 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10762 ret = btrfs_next_item(tree_root, path);
10772 btrfs_free_path(path);
10776 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10777 struct btrfs_root *csum_root)
10779 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10780 struct btrfs_path *path;
10781 struct btrfs_extent_item *ei;
10782 struct extent_buffer *leaf;
10784 struct btrfs_key key;
10787 path = btrfs_alloc_path();
10792 key.type = BTRFS_EXTENT_ITEM_KEY;
10795 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10797 btrfs_free_path(path);
10801 buf = malloc(csum_root->sectorsize);
10803 btrfs_free_path(path);
10808 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10809 ret = btrfs_next_leaf(extent_root, path);
10817 leaf = path->nodes[0];
10819 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10820 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10825 ei = btrfs_item_ptr(leaf, path->slots[0],
10826 struct btrfs_extent_item);
10827 if (!(btrfs_extent_flags(leaf, ei) &
10828 BTRFS_EXTENT_FLAG_DATA)) {
10833 ret = populate_csum(trans, csum_root, buf, key.objectid,
10840 btrfs_free_path(path);
10846 * Recalculate the csum and put it into the csum tree.
10848 * Extent tree init will wipe out all the extent info, so in that case, we
10849 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10850 * will use fs/subvol trees to init the csum tree.
10852 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10853 struct btrfs_root *csum_root,
10854 int search_fs_tree)
10856 if (search_fs_tree)
10857 return fill_csum_tree_from_fs(trans, csum_root);
10859 return fill_csum_tree_from_extent(trans, csum_root);
10862 static void free_roots_info_cache(void)
10864 if (!roots_info_cache)
10867 while (!cache_tree_empty(roots_info_cache)) {
10868 struct cache_extent *entry;
10869 struct root_item_info *rii;
10871 entry = first_cache_extent(roots_info_cache);
10874 remove_cache_extent(roots_info_cache, entry);
10875 rii = container_of(entry, struct root_item_info, cache_extent);
10879 free(roots_info_cache);
10880 roots_info_cache = NULL;
10883 static int build_roots_info_cache(struct btrfs_fs_info *info)
10886 struct btrfs_key key;
10887 struct extent_buffer *leaf;
10888 struct btrfs_path *path;
10890 if (!roots_info_cache) {
10891 roots_info_cache = malloc(sizeof(*roots_info_cache));
10892 if (!roots_info_cache)
10894 cache_tree_init(roots_info_cache);
10897 path = btrfs_alloc_path();
10902 key.type = BTRFS_EXTENT_ITEM_KEY;
10905 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10908 leaf = path->nodes[0];
10911 struct btrfs_key found_key;
10912 struct btrfs_extent_item *ei;
10913 struct btrfs_extent_inline_ref *iref;
10914 int slot = path->slots[0];
10919 struct cache_extent *entry;
10920 struct root_item_info *rii;
10922 if (slot >= btrfs_header_nritems(leaf)) {
10923 ret = btrfs_next_leaf(info->extent_root, path);
10930 leaf = path->nodes[0];
10931 slot = path->slots[0];
10934 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10936 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10937 found_key.type != BTRFS_METADATA_ITEM_KEY)
10940 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10941 flags = btrfs_extent_flags(leaf, ei);
10943 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10944 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10947 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10948 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10949 level = found_key.offset;
10951 struct btrfs_tree_block_info *binfo;
10953 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10954 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10955 level = btrfs_tree_block_level(leaf, binfo);
10959 * For a root extent, it must be of the following type and the
10960 * first (and only one) iref in the item.
10962 type = btrfs_extent_inline_ref_type(leaf, iref);
10963 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10966 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10967 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10969 rii = malloc(sizeof(struct root_item_info));
10974 rii->cache_extent.start = root_id;
10975 rii->cache_extent.size = 1;
10976 rii->level = (u8)-1;
10977 entry = &rii->cache_extent;
10978 ret = insert_cache_extent(roots_info_cache, entry);
10981 rii = container_of(entry, struct root_item_info,
10985 ASSERT(rii->cache_extent.start == root_id);
10986 ASSERT(rii->cache_extent.size == 1);
10988 if (level > rii->level || rii->level == (u8)-1) {
10989 rii->level = level;
10990 rii->bytenr = found_key.objectid;
10991 rii->gen = btrfs_extent_generation(leaf, ei);
10992 rii->node_count = 1;
10993 } else if (level == rii->level) {
11001 btrfs_free_path(path);
11006 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11007 struct btrfs_path *path,
11008 const struct btrfs_key *root_key,
11009 const int read_only_mode)
11011 const u64 root_id = root_key->objectid;
11012 struct cache_extent *entry;
11013 struct root_item_info *rii;
11014 struct btrfs_root_item ri;
11015 unsigned long offset;
11017 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11020 "Error: could not find extent items for root %llu\n",
11021 root_key->objectid);
11025 rii = container_of(entry, struct root_item_info, cache_extent);
11026 ASSERT(rii->cache_extent.start == root_id);
11027 ASSERT(rii->cache_extent.size == 1);
11029 if (rii->node_count != 1) {
11031 "Error: could not find btree root extent for root %llu\n",
11036 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11037 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11039 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11040 btrfs_root_level(&ri) != rii->level ||
11041 btrfs_root_generation(&ri) != rii->gen) {
11044 * If we're in repair mode but our caller told us to not update
11045 * the root item, i.e. just check if it needs to be updated, don't
11046 * print this message, since the caller will call us again shortly
11047 * for the same root item without read only mode (the caller will
11048 * open a transaction first).
11050 if (!(read_only_mode && repair))
11052 "%sroot item for root %llu,"
11053 " current bytenr %llu, current gen %llu, current level %u,"
11054 " new bytenr %llu, new gen %llu, new level %u\n",
11055 (read_only_mode ? "" : "fixing "),
11057 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11058 btrfs_root_level(&ri),
11059 rii->bytenr, rii->gen, rii->level);
11061 if (btrfs_root_generation(&ri) > rii->gen) {
11063 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11064 root_id, btrfs_root_generation(&ri), rii->gen);
11068 if (!read_only_mode) {
11069 btrfs_set_root_bytenr(&ri, rii->bytenr);
11070 btrfs_set_root_level(&ri, rii->level);
11071 btrfs_set_root_generation(&ri, rii->gen);
11072 write_extent_buffer(path->nodes[0], &ri,
11073 offset, sizeof(ri));
11083 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11084 * caused read-only snapshots to be corrupted if they were created at a moment
11085 * when the source subvolume/snapshot had orphan items. The issue was that the
11086 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11087 * node instead of the post orphan cleanup root node.
11088 * So this function, and its callees, just detects and fixes those cases. Even
11089 * though the regression was for read-only snapshots, this function applies to
11090 * any snapshot/subvolume root.
11091 * This must be run before any other repair code - not doing it so, makes other
11092 * repair code delete or modify backrefs in the extent tree for example, which
11093 * will result in an inconsistent fs after repairing the root items.
11095 static int repair_root_items(struct btrfs_fs_info *info)
11097 struct btrfs_path *path = NULL;
11098 struct btrfs_key key;
11099 struct extent_buffer *leaf;
11100 struct btrfs_trans_handle *trans = NULL;
11103 int need_trans = 0;
11105 ret = build_roots_info_cache(info);
11109 path = btrfs_alloc_path();
11115 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11116 key.type = BTRFS_ROOT_ITEM_KEY;
11121 * Avoid opening and committing transactions if a leaf doesn't have
11122 * any root items that need to be fixed, so that we avoid rotating
11123 * backup roots unnecessarily.
11126 trans = btrfs_start_transaction(info->tree_root, 1);
11127 if (IS_ERR(trans)) {
11128 ret = PTR_ERR(trans);
11133 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11137 leaf = path->nodes[0];
11140 struct btrfs_key found_key;
11142 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11143 int no_more_keys = find_next_key(path, &key);
11145 btrfs_release_path(path);
11147 ret = btrfs_commit_transaction(trans,
11159 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11161 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11163 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11166 ret = maybe_repair_root_item(info, path, &found_key,
11171 if (!trans && repair) {
11174 btrfs_release_path(path);
11184 free_roots_info_cache();
11185 btrfs_free_path(path);
11187 btrfs_commit_transaction(trans, info->tree_root);
11194 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11196 struct btrfs_trans_handle *trans;
11197 struct btrfs_block_group_cache *bg_cache;
11201 /* Clear all free space cache inodes and its extent data */
11203 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11206 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11209 current = bg_cache->key.objectid + bg_cache->key.offset;
11212 /* Don't forget to set cache_generation to -1 */
11213 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11214 if (IS_ERR(trans)) {
11215 error("failed to update super block cache generation");
11216 return PTR_ERR(trans);
11218 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11219 btrfs_commit_transaction(trans, fs_info->tree_root);
11224 const char * const cmd_check_usage[] = {
11225 "btrfs check [options] <device>",
11226 "Check structural integrity of a filesystem (unmounted).",
11227 "Check structural integrity of an unmounted filesystem. Verify internal",
11228 "trees' consistency and item connectivity. In the repair mode try to",
11229 "fix the problems found. ",
11230 "WARNING: the repair mode is considered dangerous",
11232 "-s|--super <superblock> use this superblock copy",
11233 "-b|--backup use the first valid backup root copy",
11234 "--repair try to repair the filesystem",
11235 "--readonly run in read-only mode (default)",
11236 "--init-csum-tree create a new CRC tree",
11237 "--init-extent-tree create a new extent tree",
11238 "--mode <MODE> allows choice of memory/IO trade-offs",
11239 " where MODE is one of:",
11240 " original - read inodes and extents to memory (requires",
11241 " more memory, does less IO)",
11242 " lowmem - try to use less memory but read blocks again",
11244 "--check-data-csum verify checksums of data blocks",
11245 "-Q|--qgroup-report print a report on qgroup consistency",
11246 "-E|--subvol-extents <subvolid>",
11247 " print subvolume extents and sharing state",
11248 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11249 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11250 "-p|--progress indicate progress",
11251 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11252 " NOTE: v1 support implemented",
11256 int cmd_check(int argc, char **argv)
11258 struct cache_tree root_cache;
11259 struct btrfs_root *root;
11260 struct btrfs_fs_info *info;
11263 u64 tree_root_bytenr = 0;
11264 u64 chunk_root_bytenr = 0;
11265 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11268 int init_csum_tree = 0;
11270 int clear_space_cache = 0;
11271 int qgroup_report = 0;
11272 int qgroups_repaired = 0;
11273 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11277 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11278 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11279 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11280 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11281 static const struct option long_options[] = {
11282 { "super", required_argument, NULL, 's' },
11283 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11284 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11285 { "init-csum-tree", no_argument, NULL,
11286 GETOPT_VAL_INIT_CSUM },
11287 { "init-extent-tree", no_argument, NULL,
11288 GETOPT_VAL_INIT_EXTENT },
11289 { "check-data-csum", no_argument, NULL,
11290 GETOPT_VAL_CHECK_CSUM },
11291 { "backup", no_argument, NULL, 'b' },
11292 { "subvol-extents", required_argument, NULL, 'E' },
11293 { "qgroup-report", no_argument, NULL, 'Q' },
11294 { "tree-root", required_argument, NULL, 'r' },
11295 { "chunk-root", required_argument, NULL,
11296 GETOPT_VAL_CHUNK_TREE },
11297 { "progress", no_argument, NULL, 'p' },
11298 { "mode", required_argument, NULL,
11300 { "clear-space-cache", required_argument, NULL,
11301 GETOPT_VAL_CLEAR_SPACE_CACHE},
11302 { NULL, 0, NULL, 0}
11305 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11309 case 'a': /* ignored */ break;
11311 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11314 num = arg_strtou64(optarg);
11315 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11317 "super mirror should be less than %d",
11318 BTRFS_SUPER_MIRROR_MAX);
11321 bytenr = btrfs_sb_offset(((int)num));
11322 printf("using SB copy %llu, bytenr %llu\n", num,
11323 (unsigned long long)bytenr);
11329 subvolid = arg_strtou64(optarg);
11332 tree_root_bytenr = arg_strtou64(optarg);
11334 case GETOPT_VAL_CHUNK_TREE:
11335 chunk_root_bytenr = arg_strtou64(optarg);
11338 ctx.progress_enabled = true;
11342 usage(cmd_check_usage);
11343 case GETOPT_VAL_REPAIR:
11344 printf("enabling repair mode\n");
11346 ctree_flags |= OPEN_CTREE_WRITES;
11348 case GETOPT_VAL_READONLY:
11351 case GETOPT_VAL_INIT_CSUM:
11352 printf("Creating a new CRC tree\n");
11353 init_csum_tree = 1;
11355 ctree_flags |= OPEN_CTREE_WRITES;
11357 case GETOPT_VAL_INIT_EXTENT:
11358 init_extent_tree = 1;
11359 ctree_flags |= (OPEN_CTREE_WRITES |
11360 OPEN_CTREE_NO_BLOCK_GROUPS);
11363 case GETOPT_VAL_CHECK_CSUM:
11364 check_data_csum = 1;
11366 case GETOPT_VAL_MODE:
11367 check_mode = parse_check_mode(optarg);
11368 if (check_mode == CHECK_MODE_UNKNOWN) {
11369 error("unknown mode: %s", optarg);
11373 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11374 if (strcmp(optarg, "v1") != 0) {
11376 "only v1 support implmented, unrecognized value %s",
11380 clear_space_cache = 1;
11381 ctree_flags |= OPEN_CTREE_WRITES;
11386 if (check_argc_exact(argc - optind, 1))
11387 usage(cmd_check_usage);
11389 if (ctx.progress_enabled) {
11390 ctx.tp = TASK_NOTHING;
11391 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11394 /* This check is the only reason for --readonly to exist */
11395 if (readonly && repair) {
11396 error("repair options are not compatible with --readonly");
11401 * Not supported yet
11403 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11404 error("low memory mode doesn't support repair yet");
11409 cache_tree_init(&root_cache);
11411 if((ret = check_mounted(argv[optind])) < 0) {
11412 error("could not check mount status: %s", strerror(-ret));
11415 error("%s is currently mounted, aborting", argv[optind]);
11420 /* only allow partial opening under repair mode */
11422 ctree_flags |= OPEN_CTREE_PARTIAL;
11424 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11425 chunk_root_bytenr, ctree_flags);
11427 error("cannot open file system");
11432 global_info = info;
11433 root = info->fs_root;
11434 if (clear_space_cache) {
11435 if (btrfs_fs_compat_ro(info,
11436 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11438 "free space cache v2 detected, clearing not implemented");
11442 printf("Clearing free space cache\n");
11443 ret = clear_free_space_cache(info);
11445 error("failed to clear free space cache");
11448 printf("Free space cache cleared\n");
11454 * repair mode will force us to commit transaction which
11455 * will make us fail to load log tree when mounting.
11457 if (repair && btrfs_super_log_root(info->super_copy)) {
11458 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11463 ret = zero_log_tree(root);
11465 error("failed to zero log tree: %d", ret);
11470 uuid_unparse(info->super_copy->fsid, uuidbuf);
11471 if (qgroup_report) {
11472 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11474 ret = qgroup_verify_all(info);
11480 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11481 subvolid, argv[optind], uuidbuf);
11482 ret = print_extent_state(info, subvolid);
11485 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11487 if (!extent_buffer_uptodate(info->tree_root->node) ||
11488 !extent_buffer_uptodate(info->dev_root->node) ||
11489 !extent_buffer_uptodate(info->chunk_root->node)) {
11490 error("critical roots corrupted, unable to check the filesystem");
11495 if (init_extent_tree || init_csum_tree) {
11496 struct btrfs_trans_handle *trans;
11498 trans = btrfs_start_transaction(info->extent_root, 0);
11499 if (IS_ERR(trans)) {
11500 error("error starting transaction");
11501 ret = PTR_ERR(trans);
11505 if (init_extent_tree) {
11506 printf("Creating a new extent tree\n");
11507 ret = reinit_extent_tree(trans, info);
11512 if (init_csum_tree) {
11513 printf("Reinitialize checksum tree\n");
11514 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11516 error("checksum tree initialization failed: %d",
11522 ret = fill_csum_tree(trans, info->csum_root,
11525 error("checksum tree refilling failed: %d", ret);
11530 * Ok now we commit and run the normal fsck, which will add
11531 * extent entries for all of the items it finds.
11533 ret = btrfs_commit_transaction(trans, info->extent_root);
11537 if (!extent_buffer_uptodate(info->extent_root->node)) {
11538 error("critical: extent_root, unable to check the filesystem");
11542 if (!extent_buffer_uptodate(info->csum_root->node)) {
11543 error("critical: csum_root, unable to check the filesystem");
11548 if (!ctx.progress_enabled)
11549 printf("checking extents");
11550 if (check_mode == CHECK_MODE_LOWMEM)
11551 ret = check_chunks_and_extents_v2(root);
11553 ret = check_chunks_and_extents(root);
11555 printf("Errors found in extent allocation tree or chunk allocation");
11557 ret = repair_root_items(info);
11561 fprintf(stderr, "Fixed %d roots.\n", ret);
11563 } else if (ret > 0) {
11565 "Found %d roots with an outdated root item.\n",
11568 "Please run a filesystem check with the option --repair to fix them.\n");
11573 if (!ctx.progress_enabled) {
11574 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11575 fprintf(stderr, "checking free space tree\n");
11577 fprintf(stderr, "checking free space cache\n");
11579 ret = check_space_cache(root);
11584 * We used to have to have these hole extents in between our real
11585 * extents so if we don't have this flag set we need to make sure there
11586 * are no gaps in the file extents for inodes, otherwise we can just
11587 * ignore it when this happens.
11589 no_holes = btrfs_fs_incompat(root->fs_info,
11590 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11591 if (!ctx.progress_enabled)
11592 fprintf(stderr, "checking fs roots\n");
11593 ret = check_fs_roots(root, &root_cache);
11597 fprintf(stderr, "checking csums\n");
11598 ret = check_csums(root);
11602 fprintf(stderr, "checking root refs\n");
11603 ret = check_root_refs(root, &root_cache);
11607 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11608 struct extent_buffer *eb;
11610 eb = list_first_entry(&root->fs_info->recow_ebs,
11611 struct extent_buffer, recow);
11612 list_del_init(&eb->recow);
11613 ret = recow_extent_buffer(root, eb);
11618 while (!list_empty(&delete_items)) {
11619 struct bad_item *bad;
11621 bad = list_first_entry(&delete_items, struct bad_item, list);
11622 list_del_init(&bad->list);
11624 ret = delete_bad_item(root, bad);
11628 if (info->quota_enabled) {
11630 fprintf(stderr, "checking quota groups\n");
11631 err = qgroup_verify_all(info);
11635 err = repair_qgroups(info, &qgroups_repaired);
11640 if (!list_empty(&root->fs_info->recow_ebs)) {
11641 error("transid errors in file system");
11645 /* Don't override original ret */
11646 if (!ret && qgroups_repaired)
11647 ret = qgroups_repaired;
11649 if (found_old_backref) { /*
11650 * there was a disk format change when mixed
11651 * backref was in testing tree. The old format
11652 * existed about one week.
11654 printf("\n * Found old mixed backref format. "
11655 "The old format is not supported! *"
11656 "\n * Please mount the FS in readonly mode, "
11657 "backup data and re-format the FS. *\n\n");
11660 printf("found %llu bytes used err is %d\n",
11661 (unsigned long long)bytes_used, ret);
11662 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11663 printf("total tree bytes: %llu\n",
11664 (unsigned long long)total_btree_bytes);
11665 printf("total fs tree bytes: %llu\n",
11666 (unsigned long long)total_fs_tree_bytes);
11667 printf("total extent tree bytes: %llu\n",
11668 (unsigned long long)total_extent_tree_bytes);
11669 printf("btree space waste bytes: %llu\n",
11670 (unsigned long long)btree_space_waste);
11671 printf("file data blocks allocated: %llu\n referenced %llu\n",
11672 (unsigned long long)data_bytes_allocated,
11673 (unsigned long long)data_bytes_referenced);
11675 free_qgroup_counts();
11676 free_root_recs_tree(&root_cache);
11680 if (ctx.progress_enabled)
11681 task_deinit(ctx.info);