2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 return list_entry(entry, struct extent_backref, list);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
121 * Much like data_backref, just removed the undetermined members
122 * and change it to use list_head.
123 * During extent scan, it is stored in root->orphan_data_extent.
124 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126 struct orphan_data_extent {
127 struct list_head list;
135 struct tree_backref {
136 struct extent_backref node;
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 return container_of(back, struct tree_backref, node);
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
151 struct extent_record {
152 struct list_head backrefs;
153 struct list_head dups;
154 struct list_head list;
155 struct cache_extent cache;
156 struct btrfs_disk_key parent_key;
161 u64 extent_item_refs;
163 u64 parent_generation;
167 unsigned int flag_block_full_backref:2;
168 unsigned int found_rec:1;
169 unsigned int content_checked:1;
170 unsigned int owner_ref_checked:1;
171 unsigned int is_root:1;
172 unsigned int metadata:1;
173 unsigned int bad_full_backref:1;
174 unsigned int crossing_stripes:1;
175 unsigned int wrong_chunk_type:1;
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 return container_of(entry, struct extent_record, list);
183 struct inode_backref {
184 struct list_head list;
185 unsigned int found_dir_item:1;
186 unsigned int found_dir_index:1;
187 unsigned int found_inode_ref:1;
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 return list_entry(entry, struct inode_backref, list);
202 struct root_item_record {
203 struct list_head list;
210 struct btrfs_key drop_key;
213 #define REF_ERR_NO_DIR_ITEM (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX (1 << 1)
215 #define REF_ERR_NO_INODE_REF (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
218 #define REF_ERR_DUP_INODE_REF (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
227 struct file_extent_hole {
233 struct inode_record {
234 struct list_head backrefs;
235 unsigned int checked:1;
236 unsigned int merging:1;
237 unsigned int found_inode_item:1;
238 unsigned int found_dir_item:1;
239 unsigned int found_file_extent:1;
240 unsigned int found_csum_item:1;
241 unsigned int some_csum_missing:1;
242 unsigned int nodatasum:1;
255 struct rb_root holes;
256 struct list_head orphan_extents;
261 #define I_ERR_NO_INODE_ITEM (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
277 struct root_backref {
278 struct list_head list;
279 unsigned int found_dir_item:1;
280 unsigned int found_dir_index:1;
281 unsigned int found_back_ref:1;
282 unsigned int found_forward_ref:1;
283 unsigned int reachable:1;
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 return list_entry(entry, struct root_backref, list);
298 struct list_head backrefs;
299 struct cache_extent cache;
300 unsigned int found_root_item:1;
306 struct cache_extent cache;
311 struct cache_extent cache;
312 struct cache_tree root_cache;
313 struct cache_tree inode_cache;
314 struct inode_record *current;
323 struct walk_control {
324 struct cache_tree shared;
325 struct shared_node *nodes[BTRFS_MAX_LEVEL];
331 struct btrfs_key key;
333 struct list_head list;
336 struct extent_entry {
341 struct list_head list;
344 struct root_item_info {
345 /* level of the root */
347 /* number of nodes at this level, must be 1 for a root */
351 struct cache_extent cache_extent;
355 * Error bit for low memory mode check.
357 * Currently no caller cares about it yet. Just internal use for error
360 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH (1 << 8)
371 static void *print_status_check(void *p)
373 struct task_ctx *priv = p;
374 const char work_indicator[] = { '.', 'o', 'O', 'o' };
376 static char *task_position_string[] = {
378 "checking free space cache",
382 task_period_start(priv->info, 1000 /* 1s */);
384 if (priv->tp == TASK_NOTHING)
388 printf("%s [%c]\r", task_position_string[priv->tp],
389 work_indicator[count % 4]);
392 task_period_wait(priv->info);
397 static int print_status_return(void *p)
405 static enum btrfs_check_mode parse_check_mode(const char *str)
407 if (strcmp(str, "lowmem") == 0)
408 return CHECK_MODE_LOWMEM;
409 if (strcmp(str, "orig") == 0)
410 return CHECK_MODE_ORIGINAL;
411 if (strcmp(str, "original") == 0)
412 return CHECK_MODE_ORIGINAL;
414 return CHECK_MODE_UNKNOWN;
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
420 struct file_extent_hole *hole;
422 if (RB_EMPTY_ROOT(holes))
425 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 struct file_extent_hole *hole1;
432 struct file_extent_hole *hole2;
434 hole1 = rb_entry(node1, struct file_extent_hole, node);
435 hole2 = rb_entry(node2, struct file_extent_hole, node);
437 if (hole1->start > hole2->start)
439 if (hole1->start < hole2->start)
441 /* Now hole1->start == hole2->start */
442 if (hole1->len >= hole2->len)
444 * Hole 1 will be merge center
445 * Same hole will be merged later
448 /* Hole 2 will be merge center */
453 * Add a hole to the record
455 * This will do hole merge for copy_file_extent_holes(),
456 * which will ensure there won't be continuous holes.
458 static int add_file_extent_hole(struct rb_root *holes,
461 struct file_extent_hole *hole;
462 struct file_extent_hole *prev = NULL;
463 struct file_extent_hole *next = NULL;
465 hole = malloc(sizeof(*hole));
470 /* Since compare will not return 0, no -EEXIST will happen */
471 rb_insert(holes, &hole->node, compare_hole);
473 /* simple merge with previous hole */
474 if (rb_prev(&hole->node))
475 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477 if (prev && prev->start + prev->len >= hole->start) {
478 hole->len = hole->start + hole->len - prev->start;
479 hole->start = prev->start;
480 rb_erase(&prev->node, holes);
485 /* iterate merge with next holes */
487 if (!rb_next(&hole->node))
489 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491 if (hole->start + hole->len >= next->start) {
492 if (hole->start + hole->len <= next->start + next->len)
493 hole->len = next->start + next->len -
495 rb_erase(&next->node, holes);
504 static int compare_hole_range(struct rb_node *node, void *data)
506 struct file_extent_hole *hole;
509 hole = (struct file_extent_hole *)data;
512 hole = rb_entry(node, struct file_extent_hole, node);
513 if (start < hole->start)
515 if (start >= hole->start && start < hole->start + hole->len)
521 * Delete a hole in the record
523 * This will do the hole split and is much restrict than add.
525 static int del_file_extent_hole(struct rb_root *holes,
528 struct file_extent_hole *hole;
529 struct file_extent_hole tmp;
534 struct rb_node *node;
541 node = rb_search(holes, &tmp, compare_hole_range, NULL);
544 hole = rb_entry(node, struct file_extent_hole, node);
545 if (start + len > hole->start + hole->len)
549 * Now there will be no overlap, delete the hole and re-add the
550 * split(s) if they exists.
552 if (start > hole->start) {
553 prev_start = hole->start;
554 prev_len = start - hole->start;
557 if (hole->start + hole->len > start + len) {
558 next_start = start + len;
559 next_len = hole->start + hole->len - start - len;
562 rb_erase(node, holes);
565 ret = add_file_extent_hole(holes, prev_start, prev_len);
570 ret = add_file_extent_hole(holes, next_start, next_len);
577 static int copy_file_extent_holes(struct rb_root *dst,
580 struct file_extent_hole *hole;
581 struct rb_node *node;
584 node = rb_first(src);
586 hole = rb_entry(node, struct file_extent_hole, node);
587 ret = add_file_extent_hole(dst, hole->start, hole->len);
590 node = rb_next(node);
595 static void free_file_extent_holes(struct rb_root *holes)
597 struct rb_node *node;
598 struct file_extent_hole *hole;
600 node = rb_first(holes);
602 hole = rb_entry(node, struct file_extent_hole, node);
603 rb_erase(node, holes);
605 node = rb_first(holes);
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612 struct btrfs_root *root)
614 if (root->last_trans != trans->transid) {
615 root->track_dirty = 1;
616 root->last_trans = trans->transid;
617 root->commit_root = root->node;
618 extent_buffer_get(root->node);
622 static u8 imode_to_type(u32 imode)
625 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
627 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
628 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
629 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
630 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
631 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
632 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
635 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 struct device_record *rec1;
642 struct device_record *rec2;
644 rec1 = rb_entry(node1, struct device_record, node);
645 rec2 = rb_entry(node2, struct device_record, node);
646 if (rec1->devid > rec2->devid)
648 else if (rec1->devid < rec2->devid)
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 struct inode_record *rec;
657 struct inode_backref *backref;
658 struct inode_backref *orig;
659 struct inode_backref *tmp;
660 struct orphan_data_extent *src_orphan;
661 struct orphan_data_extent *dst_orphan;
666 rec = malloc(sizeof(*rec));
668 return ERR_PTR(-ENOMEM);
669 memcpy(rec, orig_rec, sizeof(*rec));
671 INIT_LIST_HEAD(&rec->backrefs);
672 INIT_LIST_HEAD(&rec->orphan_extents);
673 rec->holes = RB_ROOT;
675 list_for_each_entry(orig, &orig_rec->backrefs, list) {
676 size = sizeof(*orig) + orig->namelen + 1;
677 backref = malloc(size);
682 memcpy(backref, orig, size);
683 list_add_tail(&backref->list, &rec->backrefs);
685 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686 dst_orphan = malloc(sizeof(*dst_orphan));
691 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
701 rb = rb_first(&rec->holes);
703 struct file_extent_hole *hole;
705 hole = rb_entry(rb, struct file_extent_hole, node);
711 if (!list_empty(&rec->backrefs))
712 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713 list_del(&orig->list);
717 if (!list_empty(&rec->orphan_extents))
718 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719 list_del(&orig->list);
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
731 struct orphan_data_extent *orphan;
733 if (list_empty(orphan_extents))
735 printf("The following data extent is lost in tree %llu:\n",
737 list_for_each_entry(orphan, orphan_extents, list) {
738 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739 orphan->objectid, orphan->offset, orphan->disk_bytenr,
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 u64 root_objectid = root->root_key.objectid;
747 int errors = rec->errors;
751 /* reloc root errors, we print its corresponding fs root objectid*/
752 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753 root_objectid = root->root_key.offset;
754 fprintf(stderr, "reloc");
756 fprintf(stderr, "root %llu inode %llu errors %x",
757 (unsigned long long) root_objectid,
758 (unsigned long long) rec->ino, rec->errors);
760 if (errors & I_ERR_NO_INODE_ITEM)
761 fprintf(stderr, ", no inode item");
762 if (errors & I_ERR_NO_ORPHAN_ITEM)
763 fprintf(stderr, ", no orphan item");
764 if (errors & I_ERR_DUP_INODE_ITEM)
765 fprintf(stderr, ", dup inode item");
766 if (errors & I_ERR_DUP_DIR_INDEX)
767 fprintf(stderr, ", dup dir index");
768 if (errors & I_ERR_ODD_DIR_ITEM)
769 fprintf(stderr, ", odd dir item");
770 if (errors & I_ERR_ODD_FILE_EXTENT)
771 fprintf(stderr, ", odd file extent");
772 if (errors & I_ERR_BAD_FILE_EXTENT)
773 fprintf(stderr, ", bad file extent");
774 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775 fprintf(stderr, ", file extent overlap");
776 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777 fprintf(stderr, ", file extent discount");
778 if (errors & I_ERR_DIR_ISIZE_WRONG)
779 fprintf(stderr, ", dir isize wrong");
780 if (errors & I_ERR_FILE_NBYTES_WRONG)
781 fprintf(stderr, ", nbytes wrong");
782 if (errors & I_ERR_ODD_CSUM_ITEM)
783 fprintf(stderr, ", odd csum item");
784 if (errors & I_ERR_SOME_CSUM_MISSING)
785 fprintf(stderr, ", some csum missing");
786 if (errors & I_ERR_LINK_COUNT_WRONG)
787 fprintf(stderr, ", link count wrong");
788 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789 fprintf(stderr, ", orphan file extent");
790 fprintf(stderr, "\n");
791 /* Print the orphan extents if needed */
792 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795 /* Print the holes if needed */
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797 struct file_extent_hole *hole;
798 struct rb_node *node;
801 node = rb_first(&rec->holes);
802 fprintf(stderr, "Found file extent holes:\n");
805 hole = rb_entry(node, struct file_extent_hole, node);
806 fprintf(stderr, "\tstart: %llu, len: %llu\n",
807 hole->start, hole->len);
808 node = rb_next(node);
811 fprintf(stderr, "\tstart: 0, len: %llu\n",
812 round_up(rec->isize, root->sectorsize));
816 static void print_ref_error(int errors)
818 if (errors & REF_ERR_NO_DIR_ITEM)
819 fprintf(stderr, ", no dir item");
820 if (errors & REF_ERR_NO_DIR_INDEX)
821 fprintf(stderr, ", no dir index");
822 if (errors & REF_ERR_NO_INODE_REF)
823 fprintf(stderr, ", no inode ref");
824 if (errors & REF_ERR_DUP_DIR_ITEM)
825 fprintf(stderr, ", dup dir item");
826 if (errors & REF_ERR_DUP_DIR_INDEX)
827 fprintf(stderr, ", dup dir index");
828 if (errors & REF_ERR_DUP_INODE_REF)
829 fprintf(stderr, ", dup inode ref");
830 if (errors & REF_ERR_INDEX_UNMATCH)
831 fprintf(stderr, ", index mismatch");
832 if (errors & REF_ERR_FILETYPE_UNMATCH)
833 fprintf(stderr, ", filetype mismatch");
834 if (errors & REF_ERR_NAME_TOO_LONG)
835 fprintf(stderr, ", name too long");
836 if (errors & REF_ERR_NO_ROOT_REF)
837 fprintf(stderr, ", no root ref");
838 if (errors & REF_ERR_NO_ROOT_BACKREF)
839 fprintf(stderr, ", no root backref");
840 if (errors & REF_ERR_DUP_ROOT_REF)
841 fprintf(stderr, ", dup root ref");
842 if (errors & REF_ERR_DUP_ROOT_BACKREF)
843 fprintf(stderr, ", dup root backref");
844 fprintf(stderr, "\n");
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
850 struct ptr_node *node;
851 struct cache_extent *cache;
852 struct inode_record *rec = NULL;
855 cache = lookup_cache_extent(inode_cache, ino, 1);
857 node = container_of(cache, struct ptr_node, cache);
859 if (mod && rec->refs > 1) {
860 node->data = clone_inode_rec(rec);
861 if (IS_ERR(node->data))
867 rec = calloc(1, sizeof(*rec));
869 return ERR_PTR(-ENOMEM);
871 rec->extent_start = (u64)-1;
873 INIT_LIST_HEAD(&rec->backrefs);
874 INIT_LIST_HEAD(&rec->orphan_extents);
875 rec->holes = RB_ROOT;
877 node = malloc(sizeof(*node));
880 return ERR_PTR(-ENOMEM);
882 node->cache.start = ino;
883 node->cache.size = 1;
886 if (ino == BTRFS_FREE_INO_OBJECTID)
889 ret = insert_cache_extent(inode_cache, &node->cache);
891 return ERR_PTR(-EEXIST);
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 struct orphan_data_extent *orphan;
900 while (!list_empty(orphan_extents)) {
901 orphan = list_entry(orphan_extents->next,
902 struct orphan_data_extent, list);
903 list_del(&orphan->list);
908 static void free_inode_rec(struct inode_record *rec)
910 struct inode_backref *backref;
915 while (!list_empty(&rec->backrefs)) {
916 backref = to_inode_backref(rec->backrefs.next);
917 list_del(&backref->list);
920 free_orphan_data_extents(&rec->orphan_extents);
921 free_file_extent_holes(&rec->holes);
925 static int can_free_inode_rec(struct inode_record *rec)
927 if (!rec->errors && rec->checked && rec->found_inode_item &&
928 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934 struct inode_record *rec)
936 struct cache_extent *cache;
937 struct inode_backref *tmp, *backref;
938 struct ptr_node *node;
941 if (!rec->found_inode_item)
944 filetype = imode_to_type(rec->imode);
945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946 if (backref->found_dir_item && backref->found_dir_index) {
947 if (backref->filetype != filetype)
948 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949 if (!backref->errors && backref->found_inode_ref &&
950 rec->nlink == rec->found_link) {
951 list_del(&backref->list);
957 if (!rec->checked || rec->merging)
960 if (S_ISDIR(rec->imode)) {
961 if (rec->found_size != rec->isize)
962 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963 if (rec->found_file_extent)
964 rec->errors |= I_ERR_ODD_FILE_EXTENT;
965 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966 if (rec->found_dir_item)
967 rec->errors |= I_ERR_ODD_DIR_ITEM;
968 if (rec->found_size != rec->nbytes)
969 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970 if (rec->nlink > 0 && !no_holes &&
971 (rec->extent_end < rec->isize ||
972 first_extent_gap(&rec->holes) < rec->isize))
973 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
976 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977 if (rec->found_csum_item && rec->nodatasum)
978 rec->errors |= I_ERR_ODD_CSUM_ITEM;
979 if (rec->some_csum_missing && !rec->nodatasum)
980 rec->errors |= I_ERR_SOME_CSUM_MISSING;
983 BUG_ON(rec->refs != 1);
984 if (can_free_inode_rec(rec)) {
985 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986 node = container_of(cache, struct ptr_node, cache);
987 BUG_ON(node->data != rec);
988 remove_cache_extent(inode_cache, &node->cache);
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 struct btrfs_path path;
997 struct btrfs_key key;
1000 key.objectid = BTRFS_ORPHAN_OBJECTID;
1001 key.type = BTRFS_ORPHAN_ITEM_KEY;
1004 btrfs_init_path(&path);
1005 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006 btrfs_release_path(&path);
1012 static int process_inode_item(struct extent_buffer *eb,
1013 int slot, struct btrfs_key *key,
1014 struct shared_node *active_node)
1016 struct inode_record *rec;
1017 struct btrfs_inode_item *item;
1019 rec = active_node->current;
1020 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021 if (rec->found_inode_item) {
1022 rec->errors |= I_ERR_DUP_INODE_ITEM;
1025 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026 rec->nlink = btrfs_inode_nlink(eb, item);
1027 rec->isize = btrfs_inode_size(eb, item);
1028 rec->nbytes = btrfs_inode_nbytes(eb, item);
1029 rec->imode = btrfs_inode_mode(eb, item);
1030 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032 rec->found_inode_item = 1;
1033 if (rec->nlink == 0)
1034 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035 maybe_free_inode_rec(&active_node->inode_cache, rec);
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041 int namelen, u64 dir)
1043 struct inode_backref *backref;
1045 list_for_each_entry(backref, &rec->backrefs, list) {
1046 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048 if (backref->dir != dir || backref->namelen != namelen)
1050 if (memcmp(name, backref->name, namelen))
1055 backref = malloc(sizeof(*backref) + namelen + 1);
1058 memset(backref, 0, sizeof(*backref));
1060 backref->namelen = namelen;
1061 memcpy(backref->name, name, namelen);
1062 backref->name[namelen] = '\0';
1063 list_add_tail(&backref->list, &rec->backrefs);
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068 u64 ino, u64 dir, u64 index,
1069 const char *name, int namelen,
1070 u8 filetype, u8 itemtype, int errors)
1072 struct inode_record *rec;
1073 struct inode_backref *backref;
1075 rec = get_inode_rec(inode_cache, ino, 1);
1076 BUG_ON(IS_ERR(rec));
1077 backref = get_inode_backref(rec, name, namelen, dir);
1080 backref->errors |= errors;
1081 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082 if (backref->found_dir_index)
1083 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084 if (backref->found_inode_ref && backref->index != index)
1085 backref->errors |= REF_ERR_INDEX_UNMATCH;
1086 if (backref->found_dir_item && backref->filetype != filetype)
1087 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089 backref->index = index;
1090 backref->filetype = filetype;
1091 backref->found_dir_index = 1;
1092 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094 if (backref->found_dir_item)
1095 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096 if (backref->found_dir_index && backref->filetype != filetype)
1097 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099 backref->filetype = filetype;
1100 backref->found_dir_item = 1;
1101 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103 if (backref->found_inode_ref)
1104 backref->errors |= REF_ERR_DUP_INODE_REF;
1105 if (backref->found_dir_index && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1108 backref->index = index;
1110 backref->ref_type = itemtype;
1111 backref->found_inode_ref = 1;
1116 maybe_free_inode_rec(inode_cache, rec);
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121 struct cache_tree *dst_cache)
1123 struct inode_backref *backref;
1128 list_for_each_entry(backref, &src->backrefs, list) {
1129 if (backref->found_dir_index) {
1130 add_inode_backref(dst_cache, dst->ino, backref->dir,
1131 backref->index, backref->name,
1132 backref->namelen, backref->filetype,
1133 BTRFS_DIR_INDEX_KEY, backref->errors);
1135 if (backref->found_dir_item) {
1137 add_inode_backref(dst_cache, dst->ino,
1138 backref->dir, 0, backref->name,
1139 backref->namelen, backref->filetype,
1140 BTRFS_DIR_ITEM_KEY, backref->errors);
1142 if (backref->found_inode_ref) {
1143 add_inode_backref(dst_cache, dst->ino,
1144 backref->dir, backref->index,
1145 backref->name, backref->namelen, 0,
1146 backref->ref_type, backref->errors);
1150 if (src->found_dir_item)
1151 dst->found_dir_item = 1;
1152 if (src->found_file_extent)
1153 dst->found_file_extent = 1;
1154 if (src->found_csum_item)
1155 dst->found_csum_item = 1;
1156 if (src->some_csum_missing)
1157 dst->some_csum_missing = 1;
1158 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1164 BUG_ON(src->found_link < dir_count);
1165 dst->found_link += src->found_link - dir_count;
1166 dst->found_size += src->found_size;
1167 if (src->extent_start != (u64)-1) {
1168 if (dst->extent_start == (u64)-1) {
1169 dst->extent_start = src->extent_start;
1170 dst->extent_end = src->extent_end;
1172 if (dst->extent_end > src->extent_start)
1173 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174 else if (dst->extent_end < src->extent_start) {
1175 ret = add_file_extent_hole(&dst->holes,
1177 src->extent_start - dst->extent_end);
1179 if (dst->extent_end < src->extent_end)
1180 dst->extent_end = src->extent_end;
1184 dst->errors |= src->errors;
1185 if (src->found_inode_item) {
1186 if (!dst->found_inode_item) {
1187 dst->nlink = src->nlink;
1188 dst->isize = src->isize;
1189 dst->nbytes = src->nbytes;
1190 dst->imode = src->imode;
1191 dst->nodatasum = src->nodatasum;
1192 dst->found_inode_item = 1;
1194 dst->errors |= I_ERR_DUP_INODE_ITEM;
1202 static int splice_shared_node(struct shared_node *src_node,
1203 struct shared_node *dst_node)
1205 struct cache_extent *cache;
1206 struct ptr_node *node, *ins;
1207 struct cache_tree *src, *dst;
1208 struct inode_record *rec, *conflict;
1209 u64 current_ino = 0;
1213 if (--src_node->refs == 0)
1215 if (src_node->current)
1216 current_ino = src_node->current->ino;
1218 src = &src_node->root_cache;
1219 dst = &dst_node->root_cache;
1221 cache = search_cache_extent(src, 0);
1223 node = container_of(cache, struct ptr_node, cache);
1225 cache = next_cache_extent(cache);
1228 remove_cache_extent(src, &node->cache);
1231 ins = malloc(sizeof(*ins));
1233 ins->cache.start = node->cache.start;
1234 ins->cache.size = node->cache.size;
1238 ret = insert_cache_extent(dst, &ins->cache);
1239 if (ret == -EEXIST) {
1240 conflict = get_inode_rec(dst, rec->ino, 1);
1241 BUG_ON(IS_ERR(conflict));
1242 merge_inode_recs(rec, conflict, dst);
1244 conflict->checked = 1;
1245 if (dst_node->current == conflict)
1246 dst_node->current = NULL;
1248 maybe_free_inode_rec(dst, conflict);
1249 free_inode_rec(rec);
1256 if (src == &src_node->root_cache) {
1257 src = &src_node->inode_cache;
1258 dst = &dst_node->inode_cache;
1262 if (current_ino > 0 && (!dst_node->current ||
1263 current_ino > dst_node->current->ino)) {
1264 if (dst_node->current) {
1265 dst_node->current->checked = 1;
1266 maybe_free_inode_rec(dst, dst_node->current);
1268 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269 BUG_ON(IS_ERR(dst_node->current));
1274 static void free_inode_ptr(struct cache_extent *cache)
1276 struct ptr_node *node;
1277 struct inode_record *rec;
1279 node = container_of(cache, struct ptr_node, cache);
1281 free_inode_rec(rec);
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1290 struct cache_extent *cache;
1291 struct shared_node *node;
1293 cache = lookup_cache_extent(shared, bytenr, 1);
1295 node = container_of(cache, struct shared_node, cache);
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1304 struct shared_node *node;
1306 node = calloc(1, sizeof(*node));
1309 node->cache.start = bytenr;
1310 node->cache.size = 1;
1311 cache_tree_init(&node->root_cache);
1312 cache_tree_init(&node->inode_cache);
1315 ret = insert_cache_extent(shared, &node->cache);
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321 struct walk_control *wc, int level)
1323 struct shared_node *node;
1324 struct shared_node *dest;
1327 if (level == wc->active_node)
1330 BUG_ON(wc->active_node <= level);
1331 node = find_shared_node(&wc->shared, bytenr);
1333 ret = add_shared_node(&wc->shared, bytenr, refs);
1335 node = find_shared_node(&wc->shared, bytenr);
1336 wc->nodes[level] = node;
1337 wc->active_node = level;
1341 if (wc->root_level == wc->active_node &&
1342 btrfs_root_refs(&root->root_item) == 0) {
1343 if (--node->refs == 0) {
1344 free_inode_recs_tree(&node->root_cache);
1345 free_inode_recs_tree(&node->inode_cache);
1346 remove_cache_extent(&wc->shared, &node->cache);
1352 dest = wc->nodes[wc->active_node];
1353 splice_shared_node(node, dest);
1354 if (node->refs == 0) {
1355 remove_cache_extent(&wc->shared, &node->cache);
1361 static int leave_shared_node(struct btrfs_root *root,
1362 struct walk_control *wc, int level)
1364 struct shared_node *node;
1365 struct shared_node *dest;
1368 if (level == wc->root_level)
1371 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1375 BUG_ON(i >= BTRFS_MAX_LEVEL);
1377 node = wc->nodes[wc->active_node];
1378 wc->nodes[wc->active_node] = NULL;
1379 wc->active_node = i;
1381 dest = wc->nodes[wc->active_node];
1382 if (wc->active_node < wc->root_level ||
1383 btrfs_root_refs(&root->root_item) > 0) {
1384 BUG_ON(node->refs <= 1);
1385 splice_shared_node(node, dest);
1387 BUG_ON(node->refs < 2);
1396 * 1 - if the root with id child_root_id is a child of root parent_root_id
1397 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1398 * has other root(s) as parent(s)
1399 * 2 - if the root child_root_id doesn't have any parent roots
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1404 struct btrfs_path path;
1405 struct btrfs_key key;
1406 struct extent_buffer *leaf;
1410 btrfs_init_path(&path);
1412 key.objectid = parent_root_id;
1413 key.type = BTRFS_ROOT_REF_KEY;
1414 key.offset = child_root_id;
1415 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1419 btrfs_release_path(&path);
1423 key.objectid = child_root_id;
1424 key.type = BTRFS_ROOT_BACKREF_KEY;
1426 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1432 leaf = path.nodes[0];
1433 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1437 leaf = path.nodes[0];
1440 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441 if (key.objectid != child_root_id ||
1442 key.type != BTRFS_ROOT_BACKREF_KEY)
1447 if (key.offset == parent_root_id) {
1448 btrfs_release_path(&path);
1455 btrfs_release_path(&path);
1458 return has_parent ? 0 : 2;
1461 static int process_dir_item(struct btrfs_root *root,
1462 struct extent_buffer *eb,
1463 int slot, struct btrfs_key *key,
1464 struct shared_node *active_node)
1474 struct btrfs_dir_item *di;
1475 struct inode_record *rec;
1476 struct cache_tree *root_cache;
1477 struct cache_tree *inode_cache;
1478 struct btrfs_key location;
1479 char namebuf[BTRFS_NAME_LEN];
1481 root_cache = &active_node->root_cache;
1482 inode_cache = &active_node->inode_cache;
1483 rec = active_node->current;
1484 rec->found_dir_item = 1;
1486 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487 total = btrfs_item_size_nr(eb, slot);
1488 while (cur < total) {
1490 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491 name_len = btrfs_dir_name_len(eb, di);
1492 data_len = btrfs_dir_data_len(eb, di);
1493 filetype = btrfs_dir_type(eb, di);
1495 rec->found_size += name_len;
1496 if (name_len <= BTRFS_NAME_LEN) {
1500 len = BTRFS_NAME_LEN;
1501 error = REF_ERR_NAME_TOO_LONG;
1503 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506 add_inode_backref(inode_cache, location.objectid,
1507 key->objectid, key->offset, namebuf,
1508 len, filetype, key->type, error);
1509 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510 add_inode_backref(root_cache, location.objectid,
1511 key->objectid, key->offset,
1512 namebuf, len, filetype,
1515 fprintf(stderr, "invalid location in dir item %u\n",
1517 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518 key->objectid, key->offset, namebuf,
1519 len, filetype, key->type, error);
1522 len = sizeof(*di) + name_len + data_len;
1523 di = (struct btrfs_dir_item *)((char *)di + len);
1526 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527 rec->errors |= I_ERR_DUP_DIR_INDEX;
1532 static int process_inode_ref(struct extent_buffer *eb,
1533 int slot, struct btrfs_key *key,
1534 struct shared_node *active_node)
1542 struct cache_tree *inode_cache;
1543 struct btrfs_inode_ref *ref;
1544 char namebuf[BTRFS_NAME_LEN];
1546 inode_cache = &active_node->inode_cache;
1548 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549 total = btrfs_item_size_nr(eb, slot);
1550 while (cur < total) {
1551 name_len = btrfs_inode_ref_name_len(eb, ref);
1552 index = btrfs_inode_ref_index(eb, ref);
1553 if (name_len <= BTRFS_NAME_LEN) {
1557 len = BTRFS_NAME_LEN;
1558 error = REF_ERR_NAME_TOO_LONG;
1560 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561 add_inode_backref(inode_cache, key->objectid, key->offset,
1562 index, namebuf, len, 0, key->type, error);
1564 len = sizeof(*ref) + name_len;
1565 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1571 static int process_inode_extref(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1582 struct cache_tree *inode_cache;
1583 struct btrfs_inode_extref *extref;
1584 char namebuf[BTRFS_NAME_LEN];
1586 inode_cache = &active_node->inode_cache;
1588 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589 total = btrfs_item_size_nr(eb, slot);
1590 while (cur < total) {
1591 name_len = btrfs_inode_extref_name_len(eb, extref);
1592 index = btrfs_inode_extref_index(eb, extref);
1593 parent = btrfs_inode_extref_parent(eb, extref);
1594 if (name_len <= BTRFS_NAME_LEN) {
1598 len = BTRFS_NAME_LEN;
1599 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf,
1602 (unsigned long)(extref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, parent,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*extref) + name_len;
1607 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615 u64 len, u64 *found)
1617 struct btrfs_key key;
1618 struct btrfs_path path;
1619 struct extent_buffer *leaf;
1624 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626 btrfs_init_path(&path);
1628 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630 key.type = BTRFS_EXTENT_CSUM_KEY;
1632 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1636 if (ret > 0 && path.slots[0] > 0) {
1637 leaf = path.nodes[0];
1638 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640 key.type == BTRFS_EXTENT_CSUM_KEY)
1645 leaf = path.nodes[0];
1646 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1652 leaf = path.nodes[0];
1655 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657 key.type != BTRFS_EXTENT_CSUM_KEY)
1660 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661 if (key.offset >= start + len)
1664 if (key.offset > start)
1667 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669 if (csum_end > start) {
1670 size = min(csum_end - start, len);
1679 btrfs_release_path(&path);
1685 static int process_file_extent(struct btrfs_root *root,
1686 struct extent_buffer *eb,
1687 int slot, struct btrfs_key *key,
1688 struct shared_node *active_node)
1690 struct inode_record *rec;
1691 struct btrfs_file_extent_item *fi;
1693 u64 disk_bytenr = 0;
1694 u64 extent_offset = 0;
1695 u64 mask = root->sectorsize - 1;
1699 rec = active_node->current;
1700 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701 rec->found_file_extent = 1;
1703 if (rec->extent_start == (u64)-1) {
1704 rec->extent_start = key->offset;
1705 rec->extent_end = key->offset;
1708 if (rec->extent_end > key->offset)
1709 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710 else if (rec->extent_end < key->offset) {
1711 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712 key->offset - rec->extent_end);
1717 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718 extent_type = btrfs_file_extent_type(eb, fi);
1720 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724 rec->found_size += num_bytes;
1725 num_bytes = (num_bytes + mask) & ~mask;
1726 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730 extent_offset = btrfs_file_extent_offset(eb, fi);
1731 if (num_bytes == 0 || (num_bytes & mask))
1732 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733 if (num_bytes + extent_offset >
1734 btrfs_file_extent_ram_bytes(eb, fi))
1735 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737 (btrfs_file_extent_compression(eb, fi) ||
1738 btrfs_file_extent_encryption(eb, fi) ||
1739 btrfs_file_extent_other_encoding(eb, fi)))
1740 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741 if (disk_bytenr > 0)
1742 rec->found_size += num_bytes;
1744 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746 rec->extent_end = key->offset + num_bytes;
1749 * The data reloc tree will copy full extents into its inode and then
1750 * copy the corresponding csums. Because the extent it copied could be
1751 * a preallocated extent that hasn't been written to yet there may be no
1752 * csums to copy, ergo we won't have csums for our file extent. This is
1753 * ok so just don't bother checking csums if the inode belongs to the
1756 if (disk_bytenr > 0 &&
1757 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759 if (btrfs_file_extent_compression(eb, fi))
1760 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762 disk_bytenr += extent_offset;
1764 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1767 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769 rec->found_csum_item = 1;
1770 if (found < num_bytes)
1771 rec->some_csum_missing = 1;
1772 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781 struct walk_control *wc)
1783 struct btrfs_key key;
1787 struct cache_tree *inode_cache;
1788 struct shared_node *active_node;
1790 if (wc->root_level == wc->active_node &&
1791 btrfs_root_refs(&root->root_item) == 0)
1794 active_node = wc->nodes[wc->active_node];
1795 inode_cache = &active_node->inode_cache;
1796 nritems = btrfs_header_nritems(eb);
1797 for (i = 0; i < nritems; i++) {
1798 btrfs_item_key_to_cpu(eb, &key, i);
1800 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1805 if (active_node->current == NULL ||
1806 active_node->current->ino < key.objectid) {
1807 if (active_node->current) {
1808 active_node->current->checked = 1;
1809 maybe_free_inode_rec(inode_cache,
1810 active_node->current);
1812 active_node->current = get_inode_rec(inode_cache,
1814 BUG_ON(IS_ERR(active_node->current));
1817 case BTRFS_DIR_ITEM_KEY:
1818 case BTRFS_DIR_INDEX_KEY:
1819 ret = process_dir_item(root, eb, i, &key, active_node);
1821 case BTRFS_INODE_REF_KEY:
1822 ret = process_inode_ref(eb, i, &key, active_node);
1824 case BTRFS_INODE_EXTREF_KEY:
1825 ret = process_inode_extref(eb, i, &key, active_node);
1827 case BTRFS_INODE_ITEM_KEY:
1828 ret = process_inode_item(eb, i, &key, active_node);
1830 case BTRFS_EXTENT_DATA_KEY:
1831 ret = process_file_extent(root, eb, i, &key,
1841 static void reada_walk_down(struct btrfs_root *root,
1842 struct extent_buffer *node, int slot)
1851 level = btrfs_header_level(node);
1855 nritems = btrfs_header_nritems(node);
1856 blocksize = root->nodesize;
1857 for (i = slot; i < nritems; i++) {
1858 bytenr = btrfs_node_blockptr(node, i);
1859 ptr_gen = btrfs_node_ptr_generation(node, i);
1860 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1865 * Check the child node/leaf by the following condition:
1866 * 1. the first item key of the node/leaf should be the same with the one
1868 * 2. block in parent node should match the child node/leaf.
1869 * 3. generation of parent node and child's header should be consistent.
1871 * Or the child node/leaf pointed by the key in parent is not valid.
1873 * We hope to check leaf owner too, but since subvol may share leaves,
1874 * which makes leaf owner check not so strong, key check should be
1875 * sufficient enough for that case.
1877 static int check_child_node(struct btrfs_root *root,
1878 struct extent_buffer *parent, int slot,
1879 struct extent_buffer *child)
1881 struct btrfs_key parent_key;
1882 struct btrfs_key child_key;
1885 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886 if (btrfs_header_level(child) == 0)
1887 btrfs_item_key_to_cpu(child, &child_key, 0);
1889 btrfs_node_key_to_cpu(child, &child_key, 0);
1891 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1894 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895 parent_key.objectid, parent_key.type, parent_key.offset,
1896 child_key.objectid, child_key.type, child_key.offset);
1898 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901 btrfs_node_blockptr(parent, slot),
1902 btrfs_header_bytenr(child));
1904 if (btrfs_node_ptr_generation(parent, slot) !=
1905 btrfs_header_generation(child)) {
1907 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908 btrfs_header_generation(child),
1909 btrfs_node_ptr_generation(parent, slot));
1915 u64 bytenr[BTRFS_MAX_LEVEL];
1916 u64 refs[BTRFS_MAX_LEVEL];
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920 struct walk_control *wc, int *level,
1921 struct node_refs *nrefs)
1923 enum btrfs_tree_block_status status;
1926 struct extent_buffer *next;
1927 struct extent_buffer *cur;
1932 WARN_ON(*level < 0);
1933 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936 refs = nrefs->refs[*level];
1939 ret = btrfs_lookup_extent_info(NULL, root,
1940 path->nodes[*level]->start,
1941 *level, 1, &refs, NULL);
1946 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947 nrefs->refs[*level] = refs;
1951 ret = enter_shared_node(root, path->nodes[*level]->start,
1959 while (*level >= 0) {
1960 WARN_ON(*level < 0);
1961 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962 cur = path->nodes[*level];
1964 if (btrfs_header_level(cur) != *level)
1967 if (path->slots[*level] >= btrfs_header_nritems(cur))
1970 ret = process_one_leaf(root, cur, wc);
1975 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977 blocksize = root->nodesize;
1979 if (bytenr == nrefs->bytenr[*level - 1]) {
1980 refs = nrefs->refs[*level - 1];
1982 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983 *level - 1, 1, &refs, NULL);
1987 nrefs->bytenr[*level - 1] = bytenr;
1988 nrefs->refs[*level - 1] = refs;
1993 ret = enter_shared_node(root, bytenr, refs,
1996 path->slots[*level]++;
2001 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003 free_extent_buffer(next);
2004 reada_walk_down(root, cur, path->slots[*level]);
2005 next = read_tree_block(root, bytenr, blocksize,
2007 if (!extent_buffer_uptodate(next)) {
2008 struct btrfs_key node_key;
2010 btrfs_node_key_to_cpu(path->nodes[*level],
2012 path->slots[*level]);
2013 btrfs_add_corrupt_extent_record(root->fs_info,
2015 path->nodes[*level]->start,
2016 root->nodesize, *level);
2022 ret = check_child_node(root, cur, path->slots[*level], next);
2028 if (btrfs_is_leaf(next))
2029 status = btrfs_check_leaf(root, NULL, next);
2031 status = btrfs_check_node(root, NULL, next);
2032 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033 free_extent_buffer(next);
2038 *level = *level - 1;
2039 free_extent_buffer(path->nodes[*level]);
2040 path->nodes[*level] = next;
2041 path->slots[*level] = 0;
2044 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049 struct walk_control *wc, int *level)
2052 struct extent_buffer *leaf;
2054 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055 leaf = path->nodes[i];
2056 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2061 free_extent_buffer(path->nodes[*level]);
2062 path->nodes[*level] = NULL;
2063 BUG_ON(*level > wc->active_node);
2064 if (*level == wc->active_node)
2065 leave_shared_node(root, wc, *level);
2072 static int check_root_dir(struct inode_record *rec)
2074 struct inode_backref *backref;
2077 if (!rec->found_inode_item || rec->errors)
2079 if (rec->nlink != 1 || rec->found_link != 0)
2081 if (list_empty(&rec->backrefs))
2083 backref = to_inode_backref(rec->backrefs.next);
2084 if (!backref->found_inode_ref)
2086 if (backref->index != 0 || backref->namelen != 2 ||
2087 memcmp(backref->name, "..", 2))
2089 if (backref->found_dir_index || backref->found_dir_item)
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097 struct btrfs_root *root, struct btrfs_path *path,
2098 struct inode_record *rec)
2100 struct btrfs_inode_item *ei;
2101 struct btrfs_key key;
2104 key.objectid = rec->ino;
2105 key.type = BTRFS_INODE_ITEM_KEY;
2106 key.offset = (u64)-1;
2108 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2112 if (!path->slots[0]) {
2119 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120 if (key.objectid != rec->ino) {
2125 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126 struct btrfs_inode_item);
2127 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128 btrfs_mark_buffer_dirty(path->nodes[0]);
2129 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131 root->root_key.objectid);
2133 btrfs_release_path(path);
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 struct btrfs_path *path,
2140 struct inode_record *rec)
2144 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145 btrfs_release_path(path);
2147 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152 struct btrfs_root *root,
2153 struct btrfs_path *path,
2154 struct inode_record *rec)
2156 struct btrfs_inode_item *ei;
2157 struct btrfs_key key;
2160 key.objectid = rec->ino;
2161 key.type = BTRFS_INODE_ITEM_KEY;
2164 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2171 /* Since ret == 0, no need to check anything */
2172 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173 struct btrfs_inode_item);
2174 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175 btrfs_mark_buffer_dirty(path->nodes[0]);
2176 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177 printf("reset nbytes for ino %llu root %llu\n",
2178 rec->ino, root->root_key.objectid);
2180 btrfs_release_path(path);
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185 struct cache_tree *inode_cache,
2186 struct inode_record *rec,
2187 struct inode_backref *backref)
2189 struct btrfs_path path;
2190 struct btrfs_trans_handle *trans;
2191 struct btrfs_dir_item *dir_item;
2192 struct extent_buffer *leaf;
2193 struct btrfs_key key;
2194 struct btrfs_disk_key disk_key;
2195 struct inode_record *dir_rec;
2196 unsigned long name_ptr;
2197 u32 data_size = sizeof(*dir_item) + backref->namelen;
2200 trans = btrfs_start_transaction(root, 1);
2202 return PTR_ERR(trans);
2204 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205 (unsigned long long)rec->ino);
2207 btrfs_init_path(&path);
2208 key.objectid = backref->dir;
2209 key.type = BTRFS_DIR_INDEX_KEY;
2210 key.offset = backref->index;
2211 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2214 leaf = path.nodes[0];
2215 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217 disk_key.objectid = cpu_to_le64(rec->ino);
2218 disk_key.type = BTRFS_INODE_ITEM_KEY;
2219 disk_key.offset = 0;
2221 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223 btrfs_set_dir_data_len(leaf, dir_item, 0);
2224 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225 name_ptr = (unsigned long)(dir_item + 1);
2226 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227 btrfs_mark_buffer_dirty(leaf);
2228 btrfs_release_path(&path);
2229 btrfs_commit_transaction(trans, root);
2231 backref->found_dir_index = 1;
2232 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233 BUG_ON(IS_ERR(dir_rec));
2236 dir_rec->found_size += backref->namelen;
2237 if (dir_rec->found_size == dir_rec->isize &&
2238 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240 if (dir_rec->found_size != dir_rec->isize)
2241 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2246 static int delete_dir_index(struct btrfs_root *root,
2247 struct cache_tree *inode_cache,
2248 struct inode_record *rec,
2249 struct inode_backref *backref)
2251 struct btrfs_trans_handle *trans;
2252 struct btrfs_dir_item *di;
2253 struct btrfs_path path;
2256 trans = btrfs_start_transaction(root, 1);
2258 return PTR_ERR(trans);
2260 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261 (unsigned long long)backref->dir,
2262 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263 (unsigned long long)root->objectid);
2265 btrfs_init_path(&path);
2266 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267 backref->name, backref->namelen,
2268 backref->index, -1);
2271 btrfs_release_path(&path);
2272 btrfs_commit_transaction(trans, root);
2279 ret = btrfs_del_item(trans, root, &path);
2281 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283 btrfs_release_path(&path);
2284 btrfs_commit_transaction(trans, root);
2288 static int create_inode_item(struct btrfs_root *root,
2289 struct inode_record *rec,
2290 struct inode_backref *backref, int root_dir)
2292 struct btrfs_trans_handle *trans;
2293 struct btrfs_inode_item inode_item;
2294 time_t now = time(NULL);
2297 trans = btrfs_start_transaction(root, 1);
2298 if (IS_ERR(trans)) {
2299 ret = PTR_ERR(trans);
2303 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304 "be incomplete, please check permissions and content after "
2305 "the fsck completes.\n", (unsigned long long)root->objectid,
2306 (unsigned long long)rec->ino);
2308 memset(&inode_item, 0, sizeof(inode_item));
2309 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315 if (rec->found_dir_item) {
2316 if (rec->found_file_extent)
2317 fprintf(stderr, "root %llu inode %llu has both a dir "
2318 "item and extents, unsure if it is a dir or a "
2319 "regular file so setting it as a directory\n",
2320 (unsigned long long)root->objectid,
2321 (unsigned long long)rec->ino);
2322 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324 } else if (!rec->found_dir_item) {
2325 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339 btrfs_commit_transaction(trans, root);
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344 struct inode_record *rec,
2345 struct cache_tree *inode_cache,
2348 struct inode_backref *tmp, *backref;
2349 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2353 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354 if (!delete && rec->ino == root_dirid) {
2355 if (!rec->found_inode_item) {
2356 ret = create_inode_item(root, rec, backref, 1);
2363 /* Index 0 for root dir's are special, don't mess with it */
2364 if (rec->ino == root_dirid && backref->index == 0)
2368 ((backref->found_dir_index && !backref->found_inode_ref) ||
2369 (backref->found_dir_index && backref->found_inode_ref &&
2370 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371 ret = delete_dir_index(root, inode_cache, rec, backref);
2375 list_del(&backref->list);
2379 if (!delete && !backref->found_dir_index &&
2380 backref->found_dir_item && backref->found_inode_ref) {
2381 ret = add_missing_dir_index(root, inode_cache, rec,
2386 if (backref->found_dir_item &&
2387 backref->found_dir_index &&
2388 backref->found_dir_index) {
2389 if (!backref->errors &&
2390 backref->found_inode_ref) {
2391 list_del(&backref->list);
2397 if (!delete && (!backref->found_dir_index &&
2398 !backref->found_dir_item &&
2399 backref->found_inode_ref)) {
2400 struct btrfs_trans_handle *trans;
2401 struct btrfs_key location;
2403 ret = check_dir_conflict(root, backref->name,
2409 * let nlink fixing routine to handle it,
2410 * which can do it better.
2415 location.objectid = rec->ino;
2416 location.type = BTRFS_INODE_ITEM_KEY;
2417 location.offset = 0;
2419 trans = btrfs_start_transaction(root, 1);
2420 if (IS_ERR(trans)) {
2421 ret = PTR_ERR(trans);
2424 fprintf(stderr, "adding missing dir index/item pair "
2426 (unsigned long long)rec->ino);
2427 ret = btrfs_insert_dir_item(trans, root, backref->name,
2429 backref->dir, &location,
2430 imode_to_type(rec->imode),
2433 btrfs_commit_transaction(trans, root);
2437 if (!delete && (backref->found_inode_ref &&
2438 backref->found_dir_index &&
2439 backref->found_dir_item &&
2440 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441 !rec->found_inode_item)) {
2442 ret = create_inode_item(root, rec, backref, 0);
2449 return ret ? ret : repaired;
2453 * To determine the file type for nlink/inode_item repair
2455 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456 * Return -ENOENT if file type is not found.
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2460 struct inode_backref *backref;
2462 /* For inode item recovered case */
2463 if (rec->found_inode_item) {
2464 *type = imode_to_type(rec->imode);
2468 list_for_each_entry(backref, &rec->backrefs, list) {
2469 if (backref->found_dir_index || backref->found_dir_item) {
2470 *type = backref->filetype;
2478 * To determine the file name for nlink repair
2480 * Return 0 if file name is found, set name and namelen.
2481 * Return -ENOENT if file name is not found.
2483 static int find_file_name(struct inode_record *rec,
2484 char *name, int *namelen)
2486 struct inode_backref *backref;
2488 list_for_each_entry(backref, &rec->backrefs, list) {
2489 if (backref->found_dir_index || backref->found_dir_item ||
2490 backref->found_inode_ref) {
2491 memcpy(name, backref->name, backref->namelen);
2492 *namelen = backref->namelen;
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501 struct btrfs_root *root,
2502 struct btrfs_path *path,
2503 struct inode_record *rec)
2505 struct inode_backref *backref;
2506 struct inode_backref *tmp;
2507 struct btrfs_key key;
2508 struct btrfs_inode_item *inode_item;
2511 /* We don't believe this either, reset it and iterate backref */
2512 rec->found_link = 0;
2514 /* Remove all backref including the valid ones */
2515 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517 backref->index, backref->name,
2518 backref->namelen, 0);
2522 /* remove invalid backref, so it won't be added back */
2523 if (!(backref->found_dir_index &&
2524 backref->found_dir_item &&
2525 backref->found_inode_ref)) {
2526 list_del(&backref->list);
2533 /* Set nlink to 0 */
2534 key.objectid = rec->ino;
2535 key.type = BTRFS_INODE_ITEM_KEY;
2537 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2544 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545 struct btrfs_inode_item);
2546 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547 btrfs_mark_buffer_dirty(path->nodes[0]);
2548 btrfs_release_path(path);
2551 * Add back valid inode_ref/dir_item/dir_index,
2552 * add_link() will handle the nlink inc, so new nlink must be correct
2554 list_for_each_entry(backref, &rec->backrefs, list) {
2555 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556 backref->name, backref->namelen,
2557 backref->filetype, &backref->index, 1);
2562 btrfs_release_path(path);
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567 struct btrfs_root *root,
2568 struct btrfs_path *path,
2569 struct inode_record *rec)
2571 char *dir_name = "lost+found";
2572 char namebuf[BTRFS_NAME_LEN] = {0};
2577 int name_recovered = 0;
2578 int type_recovered = 0;
2582 * Get file name and type first before these invalid inode ref
2583 * are deleted by remove_all_invalid_backref()
2585 name_recovered = !find_file_name(rec, namebuf, &namelen);
2586 type_recovered = !find_file_type(rec, &type);
2588 if (!name_recovered) {
2589 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590 rec->ino, rec->ino);
2591 namelen = count_digits(rec->ino);
2592 sprintf(namebuf, "%llu", rec->ino);
2595 if (!type_recovered) {
2596 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598 type = BTRFS_FT_REG_FILE;
2602 ret = reset_nlink(trans, root, path, rec);
2605 "Failed to reset nlink for inode %llu: %s\n",
2606 rec->ino, strerror(-ret));
2610 if (rec->found_link == 0) {
2611 lost_found_ino = root->highest_inode;
2612 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2617 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2621 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622 dir_name, strerror(-ret));
2625 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626 namebuf, namelen, type, NULL, 1);
2628 * Add ".INO" suffix several times to handle case where
2629 * "FILENAME.INO" is already taken by another file.
2631 while (ret == -EEXIST) {
2633 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635 if (namelen + count_digits(rec->ino) + 1 >
2640 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642 namelen += count_digits(rec->ino) + 1;
2643 ret = btrfs_add_link(trans, root, rec->ino,
2644 lost_found_ino, namebuf,
2645 namelen, type, NULL, 1);
2649 "Failed to link the inode %llu to %s dir: %s\n",
2650 rec->ino, dir_name, strerror(-ret));
2654 * Just increase the found_link, don't actually add the
2655 * backref. This will make things easier and this inode
2656 * record will be freed after the repair is done.
2657 * So fsck will not report problem about this inode.
2660 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661 namelen, namebuf, dir_name);
2663 printf("Fixed the nlink of inode %llu\n", rec->ino);
2666 * Clear the flag anyway, or we will loop forever for the same inode
2667 * as it will not be removed from the bad inode list and the dead loop
2670 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671 btrfs_release_path(path);
2676 * Check if there is any normal(reg or prealloc) file extent for given
2678 * This is used to determine the file type when neither its dir_index/item or
2679 * inode_item exists.
2681 * This will *NOT* report error, if any error happens, just consider it does
2682 * not have any normal file extent.
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 struct btrfs_path path;
2687 struct btrfs_key key;
2688 struct btrfs_key found_key;
2689 struct btrfs_file_extent_item *fi;
2693 btrfs_init_path(&path);
2695 key.type = BTRFS_EXTENT_DATA_KEY;
2698 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2703 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2704 ret = btrfs_next_leaf(root, &path);
2711 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713 if (found_key.objectid != ino ||
2714 found_key.type != BTRFS_EXTENT_DATA_KEY)
2716 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2717 struct btrfs_file_extent_item);
2718 type = btrfs_file_extent_type(path.nodes[0], fi);
2719 if (type != BTRFS_FILE_EXTENT_INLINE) {
2725 btrfs_release_path(&path);
2729 static u32 btrfs_type_to_imode(u8 type)
2731 static u32 imode_by_btrfs_type[] = {
2732 [BTRFS_FT_REG_FILE] = S_IFREG,
2733 [BTRFS_FT_DIR] = S_IFDIR,
2734 [BTRFS_FT_CHRDEV] = S_IFCHR,
2735 [BTRFS_FT_BLKDEV] = S_IFBLK,
2736 [BTRFS_FT_FIFO] = S_IFIFO,
2737 [BTRFS_FT_SOCK] = S_IFSOCK,
2738 [BTRFS_FT_SYMLINK] = S_IFLNK,
2741 return imode_by_btrfs_type[(type)];
2744 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2745 struct btrfs_root *root,
2746 struct btrfs_path *path,
2747 struct inode_record *rec)
2751 int type_recovered = 0;
2754 printf("Trying to rebuild inode:%llu\n", rec->ino);
2756 type_recovered = !find_file_type(rec, &filetype);
2759 * Try to determine inode type if type not found.
2761 * For found regular file extent, it must be FILE.
2762 * For found dir_item/index, it must be DIR.
2764 * For undetermined one, use FILE as fallback.
2767 * 1. If found backref(inode_index/item is already handled) to it,
2769 * Need new inode-inode ref structure to allow search for that.
2771 if (!type_recovered) {
2772 if (rec->found_file_extent &&
2773 find_normal_file_extent(root, rec->ino)) {
2775 filetype = BTRFS_FT_REG_FILE;
2776 } else if (rec->found_dir_item) {
2778 filetype = BTRFS_FT_DIR;
2779 } else if (!list_empty(&rec->orphan_extents)) {
2781 filetype = BTRFS_FT_REG_FILE;
2783 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2786 filetype = BTRFS_FT_REG_FILE;
2790 ret = btrfs_new_inode(trans, root, rec->ino,
2791 mode | btrfs_type_to_imode(filetype));
2796 * Here inode rebuild is done, we only rebuild the inode item,
2797 * don't repair the nlink(like move to lost+found).
2798 * That is the job of nlink repair.
2800 * We just fill the record and return
2802 rec->found_dir_item = 1;
2803 rec->imode = mode | btrfs_type_to_imode(filetype);
2805 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2806 /* Ensure the inode_nlinks repair function will be called */
2807 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2812 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2813 struct btrfs_root *root,
2814 struct btrfs_path *path,
2815 struct inode_record *rec)
2817 struct orphan_data_extent *orphan;
2818 struct orphan_data_extent *tmp;
2821 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823 * Check for conflicting file extents
2825 * Here we don't know whether the extents is compressed or not,
2826 * so we can only assume it not compressed nor data offset,
2827 * and use its disk_len as extent length.
2829 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2830 orphan->offset, orphan->disk_len, 0);
2831 btrfs_release_path(path);
2836 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2837 orphan->disk_bytenr, orphan->disk_len);
2838 ret = btrfs_free_extent(trans,
2839 root->fs_info->extent_root,
2840 orphan->disk_bytenr, orphan->disk_len,
2841 0, root->objectid, orphan->objectid,
2846 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2847 orphan->offset, orphan->disk_bytenr,
2848 orphan->disk_len, orphan->disk_len);
2852 /* Update file size info */
2853 rec->found_size += orphan->disk_len;
2854 if (rec->found_size == rec->nbytes)
2855 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857 /* Update the file extent hole info too */
2858 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2862 if (RB_EMPTY_ROOT(&rec->holes))
2863 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865 list_del(&orphan->list);
2868 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2873 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2874 struct btrfs_root *root,
2875 struct btrfs_path *path,
2876 struct inode_record *rec)
2878 struct rb_node *node;
2879 struct file_extent_hole *hole;
2883 node = rb_first(&rec->holes);
2887 hole = rb_entry(node, struct file_extent_hole, node);
2888 ret = btrfs_punch_hole(trans, root, rec->ino,
2889 hole->start, hole->len);
2892 ret = del_file_extent_hole(&rec->holes, hole->start,
2896 if (RB_EMPTY_ROOT(&rec->holes))
2897 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2898 node = rb_first(&rec->holes);
2900 /* special case for a file losing all its file extent */
2902 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2903 round_up(rec->isize, root->sectorsize));
2907 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2908 rec->ino, root->objectid);
2913 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 struct btrfs_trans_handle *trans;
2916 struct btrfs_path *path;
2919 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2920 I_ERR_NO_ORPHAN_ITEM |
2921 I_ERR_LINK_COUNT_WRONG |
2922 I_ERR_NO_INODE_ITEM |
2923 I_ERR_FILE_EXTENT_ORPHAN |
2924 I_ERR_FILE_EXTENT_DISCOUNT|
2925 I_ERR_FILE_NBYTES_WRONG)))
2928 path = btrfs_alloc_path();
2933 * For nlink repair, it may create a dir and add link, so
2934 * 2 for parent(256)'s dir_index and dir_item
2935 * 2 for lost+found dir's inode_item and inode_ref
2936 * 1 for the new inode_ref of the file
2937 * 2 for lost+found dir's dir_index and dir_item for the file
2939 trans = btrfs_start_transaction(root, 7);
2940 if (IS_ERR(trans)) {
2941 btrfs_free_path(path);
2942 return PTR_ERR(trans);
2945 if (rec->errors & I_ERR_NO_INODE_ITEM)
2946 ret = repair_inode_no_item(trans, root, path, rec);
2947 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2948 ret = repair_inode_orphan_extent(trans, root, path, rec);
2949 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2950 ret = repair_inode_discount_extent(trans, root, path, rec);
2951 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2952 ret = repair_inode_isize(trans, root, path, rec);
2953 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2954 ret = repair_inode_orphan_item(trans, root, path, rec);
2955 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2956 ret = repair_inode_nlinks(trans, root, path, rec);
2957 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2958 ret = repair_inode_nbytes(trans, root, path, rec);
2959 btrfs_commit_transaction(trans, root);
2960 btrfs_free_path(path);
2964 static int check_inode_recs(struct btrfs_root *root,
2965 struct cache_tree *inode_cache)
2967 struct cache_extent *cache;
2968 struct ptr_node *node;
2969 struct inode_record *rec;
2970 struct inode_backref *backref;
2975 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2977 if (btrfs_root_refs(&root->root_item) == 0) {
2978 if (!cache_tree_empty(inode_cache))
2979 fprintf(stderr, "warning line %d\n", __LINE__);
2984 * We need to record the highest inode number for later 'lost+found'
2986 * We must select an ino not used/referred by any existing inode, or
2987 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2988 * this may cause 'lost+found' dir has wrong nlinks.
2990 cache = last_cache_extent(inode_cache);
2992 node = container_of(cache, struct ptr_node, cache);
2994 if (rec->ino > root->highest_inode)
2995 root->highest_inode = rec->ino;
2999 * We need to repair backrefs first because we could change some of the
3000 * errors in the inode recs.
3002 * We also need to go through and delete invalid backrefs first and then
3003 * add the correct ones second. We do this because we may get EEXIST
3004 * when adding back the correct index because we hadn't yet deleted the
3007 * For example, if we were missing a dir index then the directories
3008 * isize would be wrong, so if we fixed the isize to what we thought it
3009 * would be and then fixed the backref we'd still have a invalid fs, so
3010 * we need to add back the dir index and then check to see if the isize
3015 if (stage == 3 && !err)
3018 cache = search_cache_extent(inode_cache, 0);
3019 while (repair && cache) {
3020 node = container_of(cache, struct ptr_node, cache);
3022 cache = next_cache_extent(cache);
3024 /* Need to free everything up and rescan */
3026 remove_cache_extent(inode_cache, &node->cache);
3028 free_inode_rec(rec);
3032 if (list_empty(&rec->backrefs))
3035 ret = repair_inode_backrefs(root, rec, inode_cache,
3049 rec = get_inode_rec(inode_cache, root_dirid, 0);
3050 BUG_ON(IS_ERR(rec));
3052 ret = check_root_dir(rec);
3054 fprintf(stderr, "root %llu root dir %llu error\n",
3055 (unsigned long long)root->root_key.objectid,
3056 (unsigned long long)root_dirid);
3057 print_inode_error(root, rec);
3062 struct btrfs_trans_handle *trans;
3064 trans = btrfs_start_transaction(root, 1);
3065 if (IS_ERR(trans)) {
3066 err = PTR_ERR(trans);
3071 "root %llu missing its root dir, recreating\n",
3072 (unsigned long long)root->objectid);
3074 ret = btrfs_make_root_dir(trans, root, root_dirid);
3077 btrfs_commit_transaction(trans, root);
3081 fprintf(stderr, "root %llu root dir %llu not found\n",
3082 (unsigned long long)root->root_key.objectid,
3083 (unsigned long long)root_dirid);
3087 cache = search_cache_extent(inode_cache, 0);
3090 node = container_of(cache, struct ptr_node, cache);
3092 remove_cache_extent(inode_cache, &node->cache);
3094 if (rec->ino == root_dirid ||
3095 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3096 free_inode_rec(rec);
3100 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3101 ret = check_orphan_item(root, rec->ino);
3103 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3104 if (can_free_inode_rec(rec)) {
3105 free_inode_rec(rec);
3110 if (!rec->found_inode_item)
3111 rec->errors |= I_ERR_NO_INODE_ITEM;
3112 if (rec->found_link != rec->nlink)
3113 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3115 ret = try_repair_inode(root, rec);
3116 if (ret == 0 && can_free_inode_rec(rec)) {
3117 free_inode_rec(rec);
3123 if (!(repair && ret == 0))
3125 print_inode_error(root, rec);
3126 list_for_each_entry(backref, &rec->backrefs, list) {
3127 if (!backref->found_dir_item)
3128 backref->errors |= REF_ERR_NO_DIR_ITEM;
3129 if (!backref->found_dir_index)
3130 backref->errors |= REF_ERR_NO_DIR_INDEX;
3131 if (!backref->found_inode_ref)
3132 backref->errors |= REF_ERR_NO_INODE_REF;
3133 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3134 " namelen %u name %s filetype %d errors %x",
3135 (unsigned long long)backref->dir,
3136 (unsigned long long)backref->index,
3137 backref->namelen, backref->name,
3138 backref->filetype, backref->errors);
3139 print_ref_error(backref->errors);
3141 free_inode_rec(rec);
3143 return (error > 0) ? -1 : 0;
3146 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3149 struct cache_extent *cache;
3150 struct root_record *rec = NULL;
3153 cache = lookup_cache_extent(root_cache, objectid, 1);
3155 rec = container_of(cache, struct root_record, cache);
3157 rec = calloc(1, sizeof(*rec));
3159 return ERR_PTR(-ENOMEM);
3160 rec->objectid = objectid;
3161 INIT_LIST_HEAD(&rec->backrefs);
3162 rec->cache.start = objectid;
3163 rec->cache.size = 1;
3165 ret = insert_cache_extent(root_cache, &rec->cache);
3167 return ERR_PTR(-EEXIST);
3172 static struct root_backref *get_root_backref(struct root_record *rec,
3173 u64 ref_root, u64 dir, u64 index,
3174 const char *name, int namelen)
3176 struct root_backref *backref;
3178 list_for_each_entry(backref, &rec->backrefs, list) {
3179 if (backref->ref_root != ref_root || backref->dir != dir ||
3180 backref->namelen != namelen)
3182 if (memcmp(name, backref->name, namelen))
3187 backref = calloc(1, sizeof(*backref) + namelen + 1);
3190 backref->ref_root = ref_root;
3192 backref->index = index;
3193 backref->namelen = namelen;
3194 memcpy(backref->name, name, namelen);
3195 backref->name[namelen] = '\0';
3196 list_add_tail(&backref->list, &rec->backrefs);
3200 static void free_root_record(struct cache_extent *cache)
3202 struct root_record *rec;
3203 struct root_backref *backref;
3205 rec = container_of(cache, struct root_record, cache);
3206 while (!list_empty(&rec->backrefs)) {
3207 backref = to_root_backref(rec->backrefs.next);
3208 list_del(&backref->list);
3215 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3217 static int add_root_backref(struct cache_tree *root_cache,
3218 u64 root_id, u64 ref_root, u64 dir, u64 index,
3219 const char *name, int namelen,
3220 int item_type, int errors)
3222 struct root_record *rec;
3223 struct root_backref *backref;
3225 rec = get_root_rec(root_cache, root_id);
3226 BUG_ON(IS_ERR(rec));
3227 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3230 backref->errors |= errors;
3232 if (item_type != BTRFS_DIR_ITEM_KEY) {
3233 if (backref->found_dir_index || backref->found_back_ref ||
3234 backref->found_forward_ref) {
3235 if (backref->index != index)
3236 backref->errors |= REF_ERR_INDEX_UNMATCH;
3238 backref->index = index;
3242 if (item_type == BTRFS_DIR_ITEM_KEY) {
3243 if (backref->found_forward_ref)
3245 backref->found_dir_item = 1;
3246 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3247 backref->found_dir_index = 1;
3248 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3249 if (backref->found_forward_ref)
3250 backref->errors |= REF_ERR_DUP_ROOT_REF;
3251 else if (backref->found_dir_item)
3253 backref->found_forward_ref = 1;
3254 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3255 if (backref->found_back_ref)
3256 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3257 backref->found_back_ref = 1;
3262 if (backref->found_forward_ref && backref->found_dir_item)
3263 backref->reachable = 1;
3267 static int merge_root_recs(struct btrfs_root *root,
3268 struct cache_tree *src_cache,
3269 struct cache_tree *dst_cache)
3271 struct cache_extent *cache;
3272 struct ptr_node *node;
3273 struct inode_record *rec;
3274 struct inode_backref *backref;
3277 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3278 free_inode_recs_tree(src_cache);
3283 cache = search_cache_extent(src_cache, 0);
3286 node = container_of(cache, struct ptr_node, cache);
3288 remove_cache_extent(src_cache, &node->cache);
3291 ret = is_child_root(root, root->objectid, rec->ino);
3297 list_for_each_entry(backref, &rec->backrefs, list) {
3298 BUG_ON(backref->found_inode_ref);
3299 if (backref->found_dir_item)
3300 add_root_backref(dst_cache, rec->ino,
3301 root->root_key.objectid, backref->dir,
3302 backref->index, backref->name,
3303 backref->namelen, BTRFS_DIR_ITEM_KEY,
3305 if (backref->found_dir_index)
3306 add_root_backref(dst_cache, rec->ino,
3307 root->root_key.objectid, backref->dir,
3308 backref->index, backref->name,
3309 backref->namelen, BTRFS_DIR_INDEX_KEY,
3313 free_inode_rec(rec);
3320 static int check_root_refs(struct btrfs_root *root,
3321 struct cache_tree *root_cache)
3323 struct root_record *rec;
3324 struct root_record *ref_root;
3325 struct root_backref *backref;
3326 struct cache_extent *cache;
3332 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3333 BUG_ON(IS_ERR(rec));
3336 /* fixme: this can not detect circular references */
3339 cache = search_cache_extent(root_cache, 0);
3343 rec = container_of(cache, struct root_record, cache);
3344 cache = next_cache_extent(cache);
3346 if (rec->found_ref == 0)
3349 list_for_each_entry(backref, &rec->backrefs, list) {
3350 if (!backref->reachable)
3353 ref_root = get_root_rec(root_cache,
3355 BUG_ON(IS_ERR(ref_root));
3356 if (ref_root->found_ref > 0)
3359 backref->reachable = 0;
3361 if (rec->found_ref == 0)
3367 cache = search_cache_extent(root_cache, 0);
3371 rec = container_of(cache, struct root_record, cache);
3372 cache = next_cache_extent(cache);
3374 if (rec->found_ref == 0 &&
3375 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3376 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3377 ret = check_orphan_item(root->fs_info->tree_root,
3383 * If we don't have a root item then we likely just have
3384 * a dir item in a snapshot for this root but no actual
3385 * ref key or anything so it's meaningless.
3387 if (!rec->found_root_item)
3390 fprintf(stderr, "fs tree %llu not referenced\n",
3391 (unsigned long long)rec->objectid);
3395 if (rec->found_ref > 0 && !rec->found_root_item)
3397 list_for_each_entry(backref, &rec->backrefs, list) {
3398 if (!backref->found_dir_item)
3399 backref->errors |= REF_ERR_NO_DIR_ITEM;
3400 if (!backref->found_dir_index)
3401 backref->errors |= REF_ERR_NO_DIR_INDEX;
3402 if (!backref->found_back_ref)
3403 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3404 if (!backref->found_forward_ref)
3405 backref->errors |= REF_ERR_NO_ROOT_REF;
3406 if (backref->reachable && backref->errors)
3413 fprintf(stderr, "fs tree %llu refs %u %s\n",
3414 (unsigned long long)rec->objectid, rec->found_ref,
3415 rec->found_root_item ? "" : "not found");
3417 list_for_each_entry(backref, &rec->backrefs, list) {
3418 if (!backref->reachable)
3420 if (!backref->errors && rec->found_root_item)
3422 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3423 " index %llu namelen %u name %s errors %x\n",
3424 (unsigned long long)backref->ref_root,
3425 (unsigned long long)backref->dir,
3426 (unsigned long long)backref->index,
3427 backref->namelen, backref->name,
3429 print_ref_error(backref->errors);
3432 return errors > 0 ? 1 : 0;
3435 static int process_root_ref(struct extent_buffer *eb, int slot,
3436 struct btrfs_key *key,
3437 struct cache_tree *root_cache)
3443 struct btrfs_root_ref *ref;
3444 char namebuf[BTRFS_NAME_LEN];
3447 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3449 dirid = btrfs_root_ref_dirid(eb, ref);
3450 index = btrfs_root_ref_sequence(eb, ref);
3451 name_len = btrfs_root_ref_name_len(eb, ref);
3453 if (name_len <= BTRFS_NAME_LEN) {
3457 len = BTRFS_NAME_LEN;
3458 error = REF_ERR_NAME_TOO_LONG;
3460 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3462 if (key->type == BTRFS_ROOT_REF_KEY) {
3463 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3464 index, namebuf, len, key->type, error);
3466 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3467 index, namebuf, len, key->type, error);
3472 static void free_corrupt_block(struct cache_extent *cache)
3474 struct btrfs_corrupt_block *corrupt;
3476 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3480 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3483 * Repair the btree of the given root.
3485 * The fix is to remove the node key in corrupt_blocks cache_tree.
3486 * and rebalance the tree.
3487 * After the fix, the btree should be writeable.
3489 static int repair_btree(struct btrfs_root *root,
3490 struct cache_tree *corrupt_blocks)
3492 struct btrfs_trans_handle *trans;
3493 struct btrfs_path *path;
3494 struct btrfs_corrupt_block *corrupt;
3495 struct cache_extent *cache;
3496 struct btrfs_key key;
3501 if (cache_tree_empty(corrupt_blocks))
3504 path = btrfs_alloc_path();
3508 trans = btrfs_start_transaction(root, 1);
3509 if (IS_ERR(trans)) {
3510 ret = PTR_ERR(trans);
3511 fprintf(stderr, "Error starting transaction: %s\n",
3515 cache = first_cache_extent(corrupt_blocks);
3517 corrupt = container_of(cache, struct btrfs_corrupt_block,
3519 level = corrupt->level;
3520 path->lowest_level = level;
3521 key.objectid = corrupt->key.objectid;
3522 key.type = corrupt->key.type;
3523 key.offset = corrupt->key.offset;
3526 * Here we don't want to do any tree balance, since it may
3527 * cause a balance with corrupted brother leaf/node,
3528 * so ins_len set to 0 here.
3529 * Balance will be done after all corrupt node/leaf is deleted.
3531 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3534 offset = btrfs_node_blockptr(path->nodes[level],
3535 path->slots[level]);
3537 /* Remove the ptr */
3538 ret = btrfs_del_ptr(trans, root, path, level,
3539 path->slots[level]);
3543 * Remove the corresponding extent
3544 * return value is not concerned.
3546 btrfs_release_path(path);
3547 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3548 0, root->root_key.objectid,
3550 cache = next_cache_extent(cache);
3553 /* Balance the btree using btrfs_search_slot() */
3554 cache = first_cache_extent(corrupt_blocks);
3556 corrupt = container_of(cache, struct btrfs_corrupt_block,
3558 memcpy(&key, &corrupt->key, sizeof(key));
3559 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3562 /* return will always >0 since it won't find the item */
3564 btrfs_release_path(path);
3565 cache = next_cache_extent(cache);
3568 btrfs_commit_transaction(trans, root);
3570 btrfs_free_path(path);
3574 static int check_fs_root(struct btrfs_root *root,
3575 struct cache_tree *root_cache,
3576 struct walk_control *wc)
3582 struct btrfs_path path;
3583 struct shared_node root_node;
3584 struct root_record *rec;
3585 struct btrfs_root_item *root_item = &root->root_item;
3586 struct cache_tree corrupt_blocks;
3587 struct orphan_data_extent *orphan;
3588 struct orphan_data_extent *tmp;
3589 enum btrfs_tree_block_status status;
3590 struct node_refs nrefs;
3593 * Reuse the corrupt_block cache tree to record corrupted tree block
3595 * Unlike the usage in extent tree check, here we do it in a per
3596 * fs/subvol tree base.
3598 cache_tree_init(&corrupt_blocks);
3599 root->fs_info->corrupt_blocks = &corrupt_blocks;
3601 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3602 rec = get_root_rec(root_cache, root->root_key.objectid);
3603 BUG_ON(IS_ERR(rec));
3604 if (btrfs_root_refs(root_item) > 0)
3605 rec->found_root_item = 1;
3608 btrfs_init_path(&path);
3609 memset(&root_node, 0, sizeof(root_node));
3610 cache_tree_init(&root_node.root_cache);
3611 cache_tree_init(&root_node.inode_cache);
3612 memset(&nrefs, 0, sizeof(nrefs));
3614 /* Move the orphan extent record to corresponding inode_record */
3615 list_for_each_entry_safe(orphan, tmp,
3616 &root->orphan_data_extents, list) {
3617 struct inode_record *inode;
3619 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3621 BUG_ON(IS_ERR(inode));
3622 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3623 list_move(&orphan->list, &inode->orphan_extents);
3626 level = btrfs_header_level(root->node);
3627 memset(wc->nodes, 0, sizeof(wc->nodes));
3628 wc->nodes[level] = &root_node;
3629 wc->active_node = level;
3630 wc->root_level = level;
3632 /* We may not have checked the root block, lets do that now */
3633 if (btrfs_is_leaf(root->node))
3634 status = btrfs_check_leaf(root, NULL, root->node);
3636 status = btrfs_check_node(root, NULL, root->node);
3637 if (status != BTRFS_TREE_BLOCK_CLEAN)
3640 if (btrfs_root_refs(root_item) > 0 ||
3641 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3642 path.nodes[level] = root->node;
3643 extent_buffer_get(root->node);
3644 path.slots[level] = 0;
3646 struct btrfs_key key;
3647 struct btrfs_disk_key found_key;
3649 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3650 level = root_item->drop_level;
3651 path.lowest_level = level;
3652 if (level > btrfs_header_level(root->node) ||
3653 level >= BTRFS_MAX_LEVEL) {
3654 error("ignoring invalid drop level: %u", level);
3657 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3660 btrfs_node_key(path.nodes[level], &found_key,
3662 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3663 sizeof(found_key)));
3667 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3673 wret = walk_up_tree(root, &path, wc, &level);
3680 btrfs_release_path(&path);
3682 if (!cache_tree_empty(&corrupt_blocks)) {
3683 struct cache_extent *cache;
3684 struct btrfs_corrupt_block *corrupt;
3686 printf("The following tree block(s) is corrupted in tree %llu:\n",
3687 root->root_key.objectid);
3688 cache = first_cache_extent(&corrupt_blocks);
3690 corrupt = container_of(cache,
3691 struct btrfs_corrupt_block,
3693 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3694 cache->start, corrupt->level,
3695 corrupt->key.objectid, corrupt->key.type,
3696 corrupt->key.offset);
3697 cache = next_cache_extent(cache);
3700 printf("Try to repair the btree for root %llu\n",
3701 root->root_key.objectid);
3702 ret = repair_btree(root, &corrupt_blocks);
3704 fprintf(stderr, "Failed to repair btree: %s\n",
3707 printf("Btree for root %llu is fixed\n",
3708 root->root_key.objectid);
3712 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3716 if (root_node.current) {
3717 root_node.current->checked = 1;
3718 maybe_free_inode_rec(&root_node.inode_cache,
3722 err = check_inode_recs(root, &root_node.inode_cache);
3726 free_corrupt_blocks_tree(&corrupt_blocks);
3727 root->fs_info->corrupt_blocks = NULL;
3728 free_orphan_data_extents(&root->orphan_data_extents);
3732 static int fs_root_objectid(u64 objectid)
3734 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3735 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3737 return is_fstree(objectid);
3740 static int check_fs_roots(struct btrfs_root *root,
3741 struct cache_tree *root_cache)
3743 struct btrfs_path path;
3744 struct btrfs_key key;
3745 struct walk_control wc;
3746 struct extent_buffer *leaf, *tree_node;
3747 struct btrfs_root *tmp_root;
3748 struct btrfs_root *tree_root = root->fs_info->tree_root;
3752 if (ctx.progress_enabled) {
3753 ctx.tp = TASK_FS_ROOTS;
3754 task_start(ctx.info);
3758 * Just in case we made any changes to the extent tree that weren't
3759 * reflected into the free space cache yet.
3762 reset_cached_block_groups(root->fs_info);
3763 memset(&wc, 0, sizeof(wc));
3764 cache_tree_init(&wc.shared);
3765 btrfs_init_path(&path);
3770 key.type = BTRFS_ROOT_ITEM_KEY;
3771 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3776 tree_node = tree_root->node;
3778 if (tree_node != tree_root->node) {
3779 free_root_recs_tree(root_cache);
3780 btrfs_release_path(&path);
3783 leaf = path.nodes[0];
3784 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3785 ret = btrfs_next_leaf(tree_root, &path);
3791 leaf = path.nodes[0];
3793 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3794 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3795 fs_root_objectid(key.objectid)) {
3796 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3797 tmp_root = btrfs_read_fs_root_no_cache(
3798 root->fs_info, &key);
3800 key.offset = (u64)-1;
3801 tmp_root = btrfs_read_fs_root(
3802 root->fs_info, &key);
3804 if (IS_ERR(tmp_root)) {
3808 ret = check_fs_root(tmp_root, root_cache, &wc);
3809 if (ret == -EAGAIN) {
3810 free_root_recs_tree(root_cache);
3811 btrfs_release_path(&path);
3816 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3817 btrfs_free_fs_root(tmp_root);
3818 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3819 key.type == BTRFS_ROOT_BACKREF_KEY) {
3820 process_root_ref(leaf, path.slots[0], &key,
3827 btrfs_release_path(&path);
3829 free_extent_cache_tree(&wc.shared);
3830 if (!cache_tree_empty(&wc.shared))
3831 fprintf(stderr, "warning line %d\n", __LINE__);
3833 task_stop(ctx.info);
3838 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3840 struct list_head *cur = rec->backrefs.next;
3841 struct extent_backref *back;
3842 struct tree_backref *tback;
3843 struct data_backref *dback;
3847 while(cur != &rec->backrefs) {
3848 back = to_extent_backref(cur);
3850 if (!back->found_extent_tree) {
3854 if (back->is_data) {
3855 dback = to_data_backref(back);
3856 fprintf(stderr, "Backref %llu %s %llu"
3857 " owner %llu offset %llu num_refs %lu"
3858 " not found in extent tree\n",
3859 (unsigned long long)rec->start,
3860 back->full_backref ?
3862 back->full_backref ?
3863 (unsigned long long)dback->parent:
3864 (unsigned long long)dback->root,
3865 (unsigned long long)dback->owner,
3866 (unsigned long long)dback->offset,
3867 (unsigned long)dback->num_refs);
3869 tback = to_tree_backref(back);
3870 fprintf(stderr, "Backref %llu parent %llu"
3871 " root %llu not found in extent tree\n",
3872 (unsigned long long)rec->start,
3873 (unsigned long long)tback->parent,
3874 (unsigned long long)tback->root);
3877 if (!back->is_data && !back->found_ref) {
3881 tback = to_tree_backref(back);
3882 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3883 (unsigned long long)rec->start,
3884 back->full_backref ? "parent" : "root",
3885 back->full_backref ?
3886 (unsigned long long)tback->parent :
3887 (unsigned long long)tback->root, back);
3889 if (back->is_data) {
3890 dback = to_data_backref(back);
3891 if (dback->found_ref != dback->num_refs) {
3895 fprintf(stderr, "Incorrect local backref count"
3896 " on %llu %s %llu owner %llu"
3897 " offset %llu found %u wanted %u back %p\n",
3898 (unsigned long long)rec->start,
3899 back->full_backref ?
3901 back->full_backref ?
3902 (unsigned long long)dback->parent:
3903 (unsigned long long)dback->root,
3904 (unsigned long long)dback->owner,
3905 (unsigned long long)dback->offset,
3906 dback->found_ref, dback->num_refs, back);
3908 if (dback->disk_bytenr != rec->start) {
3912 fprintf(stderr, "Backref disk bytenr does not"
3913 " match extent record, bytenr=%llu, "
3914 "ref bytenr=%llu\n",
3915 (unsigned long long)rec->start,
3916 (unsigned long long)dback->disk_bytenr);
3919 if (dback->bytes != rec->nr) {
3923 fprintf(stderr, "Backref bytes do not match "
3924 "extent backref, bytenr=%llu, ref "
3925 "bytes=%llu, backref bytes=%llu\n",
3926 (unsigned long long)rec->start,
3927 (unsigned long long)rec->nr,
3928 (unsigned long long)dback->bytes);
3931 if (!back->is_data) {
3934 dback = to_data_backref(back);
3935 found += dback->found_ref;
3938 if (found != rec->refs) {
3942 fprintf(stderr, "Incorrect global backref count "
3943 "on %llu found %llu wanted %llu\n",
3944 (unsigned long long)rec->start,
3945 (unsigned long long)found,
3946 (unsigned long long)rec->refs);
3952 static int free_all_extent_backrefs(struct extent_record *rec)
3954 struct extent_backref *back;
3955 struct list_head *cur;
3956 while (!list_empty(&rec->backrefs)) {
3957 cur = rec->backrefs.next;
3958 back = to_extent_backref(cur);
3965 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3966 struct cache_tree *extent_cache)
3968 struct cache_extent *cache;
3969 struct extent_record *rec;
3972 cache = first_cache_extent(extent_cache);
3975 rec = container_of(cache, struct extent_record, cache);
3976 remove_cache_extent(extent_cache, cache);
3977 free_all_extent_backrefs(rec);
3982 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3983 struct extent_record *rec)
3985 if (rec->content_checked && rec->owner_ref_checked &&
3986 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3987 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3988 !rec->bad_full_backref && !rec->crossing_stripes &&
3989 !rec->wrong_chunk_type) {
3990 remove_cache_extent(extent_cache, &rec->cache);
3991 free_all_extent_backrefs(rec);
3992 list_del_init(&rec->list);
3998 static int check_owner_ref(struct btrfs_root *root,
3999 struct extent_record *rec,
4000 struct extent_buffer *buf)
4002 struct extent_backref *node;
4003 struct tree_backref *back;
4004 struct btrfs_root *ref_root;
4005 struct btrfs_key key;
4006 struct btrfs_path path;
4007 struct extent_buffer *parent;
4012 list_for_each_entry(node, &rec->backrefs, list) {
4015 if (!node->found_ref)
4017 if (node->full_backref)
4019 back = to_tree_backref(node);
4020 if (btrfs_header_owner(buf) == back->root)
4023 BUG_ON(rec->is_root);
4025 /* try to find the block by search corresponding fs tree */
4026 key.objectid = btrfs_header_owner(buf);
4027 key.type = BTRFS_ROOT_ITEM_KEY;
4028 key.offset = (u64)-1;
4030 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4031 if (IS_ERR(ref_root))
4034 level = btrfs_header_level(buf);
4036 btrfs_item_key_to_cpu(buf, &key, 0);
4038 btrfs_node_key_to_cpu(buf, &key, 0);
4040 btrfs_init_path(&path);
4041 path.lowest_level = level + 1;
4042 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4046 parent = path.nodes[level + 1];
4047 if (parent && buf->start == btrfs_node_blockptr(parent,
4048 path.slots[level + 1]))
4051 btrfs_release_path(&path);
4052 return found ? 0 : 1;
4055 static int is_extent_tree_record(struct extent_record *rec)
4057 struct list_head *cur = rec->backrefs.next;
4058 struct extent_backref *node;
4059 struct tree_backref *back;
4062 while(cur != &rec->backrefs) {
4063 node = to_extent_backref(cur);
4067 back = to_tree_backref(node);
4068 if (node->full_backref)
4070 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4077 static int record_bad_block_io(struct btrfs_fs_info *info,
4078 struct cache_tree *extent_cache,
4081 struct extent_record *rec;
4082 struct cache_extent *cache;
4083 struct btrfs_key key;
4085 cache = lookup_cache_extent(extent_cache, start, len);
4089 rec = container_of(cache, struct extent_record, cache);
4090 if (!is_extent_tree_record(rec))
4093 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4094 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4097 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4098 struct extent_buffer *buf, int slot)
4100 if (btrfs_header_level(buf)) {
4101 struct btrfs_key_ptr ptr1, ptr2;
4103 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4104 sizeof(struct btrfs_key_ptr));
4105 read_extent_buffer(buf, &ptr2,
4106 btrfs_node_key_ptr_offset(slot + 1),
4107 sizeof(struct btrfs_key_ptr));
4108 write_extent_buffer(buf, &ptr1,
4109 btrfs_node_key_ptr_offset(slot + 1),
4110 sizeof(struct btrfs_key_ptr));
4111 write_extent_buffer(buf, &ptr2,
4112 btrfs_node_key_ptr_offset(slot),
4113 sizeof(struct btrfs_key_ptr));
4115 struct btrfs_disk_key key;
4116 btrfs_node_key(buf, &key, 0);
4117 btrfs_fixup_low_keys(root, path, &key,
4118 btrfs_header_level(buf) + 1);
4121 struct btrfs_item *item1, *item2;
4122 struct btrfs_key k1, k2;
4123 char *item1_data, *item2_data;
4124 u32 item1_offset, item2_offset, item1_size, item2_size;
4126 item1 = btrfs_item_nr(slot);
4127 item2 = btrfs_item_nr(slot + 1);
4128 btrfs_item_key_to_cpu(buf, &k1, slot);
4129 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4130 item1_offset = btrfs_item_offset(buf, item1);
4131 item2_offset = btrfs_item_offset(buf, item2);
4132 item1_size = btrfs_item_size(buf, item1);
4133 item2_size = btrfs_item_size(buf, item2);
4135 item1_data = malloc(item1_size);
4138 item2_data = malloc(item2_size);
4144 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4145 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4147 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4148 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4152 btrfs_set_item_offset(buf, item1, item2_offset);
4153 btrfs_set_item_offset(buf, item2, item1_offset);
4154 btrfs_set_item_size(buf, item1, item2_size);
4155 btrfs_set_item_size(buf, item2, item1_size);
4157 path->slots[0] = slot;
4158 btrfs_set_item_key_unsafe(root, path, &k2);
4159 path->slots[0] = slot + 1;
4160 btrfs_set_item_key_unsafe(root, path, &k1);
4165 static int fix_key_order(struct btrfs_trans_handle *trans,
4166 struct btrfs_root *root,
4167 struct btrfs_path *path)
4169 struct extent_buffer *buf;
4170 struct btrfs_key k1, k2;
4172 int level = path->lowest_level;
4175 buf = path->nodes[level];
4176 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4178 btrfs_node_key_to_cpu(buf, &k1, i);
4179 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4181 btrfs_item_key_to_cpu(buf, &k1, i);
4182 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4184 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4186 ret = swap_values(root, path, buf, i);
4189 btrfs_mark_buffer_dirty(buf);
4195 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4196 struct btrfs_root *root,
4197 struct btrfs_path *path,
4198 struct extent_buffer *buf, int slot)
4200 struct btrfs_key key;
4201 int nritems = btrfs_header_nritems(buf);
4203 btrfs_item_key_to_cpu(buf, &key, slot);
4205 /* These are all the keys we can deal with missing. */
4206 if (key.type != BTRFS_DIR_INDEX_KEY &&
4207 key.type != BTRFS_EXTENT_ITEM_KEY &&
4208 key.type != BTRFS_METADATA_ITEM_KEY &&
4209 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4210 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4213 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4214 (unsigned long long)key.objectid, key.type,
4215 (unsigned long long)key.offset, slot, buf->start);
4216 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4217 btrfs_item_nr_offset(slot + 1),
4218 sizeof(struct btrfs_item) *
4219 (nritems - slot - 1));
4220 btrfs_set_header_nritems(buf, nritems - 1);
4222 struct btrfs_disk_key disk_key;
4224 btrfs_item_key(buf, &disk_key, 0);
4225 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4227 btrfs_mark_buffer_dirty(buf);
4231 static int fix_item_offset(struct btrfs_trans_handle *trans,
4232 struct btrfs_root *root,
4233 struct btrfs_path *path)
4235 struct extent_buffer *buf;
4239 /* We should only get this for leaves */
4240 BUG_ON(path->lowest_level);
4241 buf = path->nodes[0];
4243 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4244 unsigned int shift = 0, offset;
4246 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4247 BTRFS_LEAF_DATA_SIZE(root)) {
4248 if (btrfs_item_end_nr(buf, i) >
4249 BTRFS_LEAF_DATA_SIZE(root)) {
4250 ret = delete_bogus_item(trans, root, path,
4254 fprintf(stderr, "item is off the end of the "
4255 "leaf, can't fix\n");
4259 shift = BTRFS_LEAF_DATA_SIZE(root) -
4260 btrfs_item_end_nr(buf, i);
4261 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4262 btrfs_item_offset_nr(buf, i - 1)) {
4263 if (btrfs_item_end_nr(buf, i) >
4264 btrfs_item_offset_nr(buf, i - 1)) {
4265 ret = delete_bogus_item(trans, root, path,
4269 fprintf(stderr, "items overlap, can't fix\n");
4273 shift = btrfs_item_offset_nr(buf, i - 1) -
4274 btrfs_item_end_nr(buf, i);
4279 printf("Shifting item nr %d by %u bytes in block %llu\n",
4280 i, shift, (unsigned long long)buf->start);
4281 offset = btrfs_item_offset_nr(buf, i);
4282 memmove_extent_buffer(buf,
4283 btrfs_leaf_data(buf) + offset + shift,
4284 btrfs_leaf_data(buf) + offset,
4285 btrfs_item_size_nr(buf, i));
4286 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4288 btrfs_mark_buffer_dirty(buf);
4292 * We may have moved things, in which case we want to exit so we don't
4293 * write those changes out. Once we have proper abort functionality in
4294 * progs this can be changed to something nicer.
4301 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4302 * then just return -EIO.
4304 static int try_to_fix_bad_block(struct btrfs_root *root,
4305 struct extent_buffer *buf,
4306 enum btrfs_tree_block_status status)
4308 struct btrfs_trans_handle *trans;
4309 struct ulist *roots;
4310 struct ulist_node *node;
4311 struct btrfs_root *search_root;
4312 struct btrfs_path *path;
4313 struct ulist_iterator iter;
4314 struct btrfs_key root_key, key;
4317 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4318 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4321 path = btrfs_alloc_path();
4325 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4328 btrfs_free_path(path);
4332 ULIST_ITER_INIT(&iter);
4333 while ((node = ulist_next(roots, &iter))) {
4334 root_key.objectid = node->val;
4335 root_key.type = BTRFS_ROOT_ITEM_KEY;
4336 root_key.offset = (u64)-1;
4338 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4345 trans = btrfs_start_transaction(search_root, 0);
4346 if (IS_ERR(trans)) {
4347 ret = PTR_ERR(trans);
4351 path->lowest_level = btrfs_header_level(buf);
4352 path->skip_check_block = 1;
4353 if (path->lowest_level)
4354 btrfs_node_key_to_cpu(buf, &key, 0);
4356 btrfs_item_key_to_cpu(buf, &key, 0);
4357 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4360 btrfs_commit_transaction(trans, search_root);
4363 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4364 ret = fix_key_order(trans, search_root, path);
4365 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4366 ret = fix_item_offset(trans, search_root, path);
4368 btrfs_commit_transaction(trans, search_root);
4371 btrfs_release_path(path);
4372 btrfs_commit_transaction(trans, search_root);
4375 btrfs_free_path(path);
4379 static int check_block(struct btrfs_root *root,
4380 struct cache_tree *extent_cache,
4381 struct extent_buffer *buf, u64 flags)
4383 struct extent_record *rec;
4384 struct cache_extent *cache;
4385 struct btrfs_key key;
4386 enum btrfs_tree_block_status status;
4390 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4393 rec = container_of(cache, struct extent_record, cache);
4394 rec->generation = btrfs_header_generation(buf);
4396 level = btrfs_header_level(buf);
4397 if (btrfs_header_nritems(buf) > 0) {
4400 btrfs_item_key_to_cpu(buf, &key, 0);
4402 btrfs_node_key_to_cpu(buf, &key, 0);
4404 rec->info_objectid = key.objectid;
4406 rec->info_level = level;
4408 if (btrfs_is_leaf(buf))
4409 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4411 status = btrfs_check_node(root, &rec->parent_key, buf);
4413 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4415 status = try_to_fix_bad_block(root, buf, status);
4416 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4418 fprintf(stderr, "bad block %llu\n",
4419 (unsigned long long)buf->start);
4422 * Signal to callers we need to start the scan over
4423 * again since we'll have cowed blocks.
4428 rec->content_checked = 1;
4429 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4430 rec->owner_ref_checked = 1;
4432 ret = check_owner_ref(root, rec, buf);
4434 rec->owner_ref_checked = 1;
4438 maybe_free_extent_rec(extent_cache, rec);
4442 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4443 u64 parent, u64 root)
4445 struct list_head *cur = rec->backrefs.next;
4446 struct extent_backref *node;
4447 struct tree_backref *back;
4449 while(cur != &rec->backrefs) {
4450 node = to_extent_backref(cur);
4454 back = to_tree_backref(node);
4456 if (!node->full_backref)
4458 if (parent == back->parent)
4461 if (node->full_backref)
4463 if (back->root == root)
4470 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4471 u64 parent, u64 root)
4473 struct tree_backref *ref = malloc(sizeof(*ref));
4477 memset(&ref->node, 0, sizeof(ref->node));
4479 ref->parent = parent;
4480 ref->node.full_backref = 1;
4483 ref->node.full_backref = 0;
4485 list_add_tail(&ref->node.list, &rec->backrefs);
4490 static struct data_backref *find_data_backref(struct extent_record *rec,
4491 u64 parent, u64 root,
4492 u64 owner, u64 offset,
4494 u64 disk_bytenr, u64 bytes)
4496 struct list_head *cur = rec->backrefs.next;
4497 struct extent_backref *node;
4498 struct data_backref *back;
4500 while(cur != &rec->backrefs) {
4501 node = to_extent_backref(cur);
4505 back = to_data_backref(node);
4507 if (!node->full_backref)
4509 if (parent == back->parent)
4512 if (node->full_backref)
4514 if (back->root == root && back->owner == owner &&
4515 back->offset == offset) {
4516 if (found_ref && node->found_ref &&
4517 (back->bytes != bytes ||
4518 back->disk_bytenr != disk_bytenr))
4527 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4528 u64 parent, u64 root,
4529 u64 owner, u64 offset,
4532 struct data_backref *ref = malloc(sizeof(*ref));
4536 memset(&ref->node, 0, sizeof(ref->node));
4537 ref->node.is_data = 1;
4540 ref->parent = parent;
4543 ref->node.full_backref = 1;
4547 ref->offset = offset;
4548 ref->node.full_backref = 0;
4550 ref->bytes = max_size;
4553 list_add_tail(&ref->node.list, &rec->backrefs);
4554 if (max_size > rec->max_size)
4555 rec->max_size = max_size;
4559 /* Check if the type of extent matches with its chunk */
4560 static void check_extent_type(struct extent_record *rec)
4562 struct btrfs_block_group_cache *bg_cache;
4564 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4568 /* data extent, check chunk directly*/
4569 if (!rec->metadata) {
4570 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4571 rec->wrong_chunk_type = 1;
4575 /* metadata extent, check the obvious case first */
4576 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4577 BTRFS_BLOCK_GROUP_METADATA))) {
4578 rec->wrong_chunk_type = 1;
4583 * Check SYSTEM extent, as it's also marked as metadata, we can only
4584 * make sure it's a SYSTEM extent by its backref
4586 if (!list_empty(&rec->backrefs)) {
4587 struct extent_backref *node;
4588 struct tree_backref *tback;
4591 node = to_extent_backref(rec->backrefs.next);
4592 if (node->is_data) {
4593 /* tree block shouldn't have data backref */
4594 rec->wrong_chunk_type = 1;
4597 tback = container_of(node, struct tree_backref, node);
4599 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4600 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4602 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4603 if (!(bg_cache->flags & bg_type))
4604 rec->wrong_chunk_type = 1;
4609 * Allocate a new extent record, fill default values from @tmpl and insert int
4610 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4611 * the cache, otherwise it fails.
4613 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4614 struct extent_record *tmpl)
4616 struct extent_record *rec;
4619 rec = malloc(sizeof(*rec));
4622 rec->start = tmpl->start;
4623 rec->max_size = tmpl->max_size;
4624 rec->nr = max(tmpl->nr, tmpl->max_size);
4625 rec->found_rec = tmpl->found_rec;
4626 rec->content_checked = tmpl->content_checked;
4627 rec->owner_ref_checked = tmpl->owner_ref_checked;
4628 rec->num_duplicates = 0;
4629 rec->metadata = tmpl->metadata;
4630 rec->flag_block_full_backref = FLAG_UNSET;
4631 rec->bad_full_backref = 0;
4632 rec->crossing_stripes = 0;
4633 rec->wrong_chunk_type = 0;
4634 rec->is_root = tmpl->is_root;
4635 rec->refs = tmpl->refs;
4636 rec->extent_item_refs = tmpl->extent_item_refs;
4637 rec->parent_generation = tmpl->parent_generation;
4638 INIT_LIST_HEAD(&rec->backrefs);
4639 INIT_LIST_HEAD(&rec->dups);
4640 INIT_LIST_HEAD(&rec->list);
4641 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4642 rec->cache.start = tmpl->start;
4643 rec->cache.size = tmpl->nr;
4644 ret = insert_cache_extent(extent_cache, &rec->cache);
4649 bytes_used += rec->nr;
4652 rec->crossing_stripes = check_crossing_stripes(global_info,
4653 rec->start, global_info->tree_root->nodesize);
4654 check_extent_type(rec);
4659 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4661 * - refs - if found, increase refs
4662 * - is_root - if found, set
4663 * - content_checked - if found, set
4664 * - owner_ref_checked - if found, set
4666 * If not found, create a new one, initialize and insert.
4668 static int add_extent_rec(struct cache_tree *extent_cache,
4669 struct extent_record *tmpl)
4671 struct extent_record *rec;
4672 struct cache_extent *cache;
4676 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4678 rec = container_of(cache, struct extent_record, cache);
4682 rec->nr = max(tmpl->nr, tmpl->max_size);
4685 * We need to make sure to reset nr to whatever the extent
4686 * record says was the real size, this way we can compare it to
4689 if (tmpl->found_rec) {
4690 if (tmpl->start != rec->start || rec->found_rec) {
4691 struct extent_record *tmp;
4694 if (list_empty(&rec->list))
4695 list_add_tail(&rec->list,
4696 &duplicate_extents);
4699 * We have to do this song and dance in case we
4700 * find an extent record that falls inside of
4701 * our current extent record but does not have
4702 * the same objectid.
4704 tmp = malloc(sizeof(*tmp));
4707 tmp->start = tmpl->start;
4708 tmp->max_size = tmpl->max_size;
4711 tmp->metadata = tmpl->metadata;
4712 tmp->extent_item_refs = tmpl->extent_item_refs;
4713 INIT_LIST_HEAD(&tmp->list);
4714 list_add_tail(&tmp->list, &rec->dups);
4715 rec->num_duplicates++;
4722 if (tmpl->extent_item_refs && !dup) {
4723 if (rec->extent_item_refs) {
4724 fprintf(stderr, "block %llu rec "
4725 "extent_item_refs %llu, passed %llu\n",
4726 (unsigned long long)tmpl->start,
4727 (unsigned long long)
4728 rec->extent_item_refs,
4729 (unsigned long long)tmpl->extent_item_refs);
4731 rec->extent_item_refs = tmpl->extent_item_refs;
4735 if (tmpl->content_checked)
4736 rec->content_checked = 1;
4737 if (tmpl->owner_ref_checked)
4738 rec->owner_ref_checked = 1;
4739 memcpy(&rec->parent_key, &tmpl->parent_key,
4740 sizeof(tmpl->parent_key));
4741 if (tmpl->parent_generation)
4742 rec->parent_generation = tmpl->parent_generation;
4743 if (rec->max_size < tmpl->max_size)
4744 rec->max_size = tmpl->max_size;
4747 * A metadata extent can't cross stripe_len boundary, otherwise
4748 * kernel scrub won't be able to handle it.
4749 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4753 rec->crossing_stripes = check_crossing_stripes(
4754 global_info, rec->start,
4755 global_info->tree_root->nodesize);
4756 check_extent_type(rec);
4757 maybe_free_extent_rec(extent_cache, rec);
4761 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4766 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4767 u64 parent, u64 root, int found_ref)
4769 struct extent_record *rec;
4770 struct tree_backref *back;
4771 struct cache_extent *cache;
4774 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4776 struct extent_record tmpl;
4778 memset(&tmpl, 0, sizeof(tmpl));
4779 tmpl.start = bytenr;
4783 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4787 /* really a bug in cache_extent implement now */
4788 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4793 rec = container_of(cache, struct extent_record, cache);
4794 if (rec->start != bytenr) {
4796 * Several cause, from unaligned bytenr to over lapping extents
4801 back = find_tree_backref(rec, parent, root);
4803 back = alloc_tree_backref(rec, parent, root);
4809 if (back->node.found_ref) {
4810 fprintf(stderr, "Extent back ref already exists "
4811 "for %llu parent %llu root %llu \n",
4812 (unsigned long long)bytenr,
4813 (unsigned long long)parent,
4814 (unsigned long long)root);
4816 back->node.found_ref = 1;
4818 if (back->node.found_extent_tree) {
4819 fprintf(stderr, "Extent back ref already exists "
4820 "for %llu parent %llu root %llu \n",
4821 (unsigned long long)bytenr,
4822 (unsigned long long)parent,
4823 (unsigned long long)root);
4825 back->node.found_extent_tree = 1;
4827 check_extent_type(rec);
4828 maybe_free_extent_rec(extent_cache, rec);
4832 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4833 u64 parent, u64 root, u64 owner, u64 offset,
4834 u32 num_refs, int found_ref, u64 max_size)
4836 struct extent_record *rec;
4837 struct data_backref *back;
4838 struct cache_extent *cache;
4841 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4843 struct extent_record tmpl;
4845 memset(&tmpl, 0, sizeof(tmpl));
4846 tmpl.start = bytenr;
4848 tmpl.max_size = max_size;
4850 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4854 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4859 rec = container_of(cache, struct extent_record, cache);
4860 if (rec->max_size < max_size)
4861 rec->max_size = max_size;
4864 * If found_ref is set then max_size is the real size and must match the
4865 * existing refs. So if we have already found a ref then we need to
4866 * make sure that this ref matches the existing one, otherwise we need
4867 * to add a new backref so we can notice that the backrefs don't match
4868 * and we need to figure out who is telling the truth. This is to
4869 * account for that awful fsync bug I introduced where we'd end up with
4870 * a btrfs_file_extent_item that would have its length include multiple
4871 * prealloc extents or point inside of a prealloc extent.
4873 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4876 back = alloc_data_backref(rec, parent, root, owner, offset,
4882 BUG_ON(num_refs != 1);
4883 if (back->node.found_ref)
4884 BUG_ON(back->bytes != max_size);
4885 back->node.found_ref = 1;
4886 back->found_ref += 1;
4887 back->bytes = max_size;
4888 back->disk_bytenr = bytenr;
4890 rec->content_checked = 1;
4891 rec->owner_ref_checked = 1;
4893 if (back->node.found_extent_tree) {
4894 fprintf(stderr, "Extent back ref already exists "
4895 "for %llu parent %llu root %llu "
4896 "owner %llu offset %llu num_refs %lu\n",
4897 (unsigned long long)bytenr,
4898 (unsigned long long)parent,
4899 (unsigned long long)root,
4900 (unsigned long long)owner,
4901 (unsigned long long)offset,
4902 (unsigned long)num_refs);
4904 back->num_refs = num_refs;
4905 back->node.found_extent_tree = 1;
4907 maybe_free_extent_rec(extent_cache, rec);
4911 static int add_pending(struct cache_tree *pending,
4912 struct cache_tree *seen, u64 bytenr, u32 size)
4915 ret = add_cache_extent(seen, bytenr, size);
4918 add_cache_extent(pending, bytenr, size);
4922 static int pick_next_pending(struct cache_tree *pending,
4923 struct cache_tree *reada,
4924 struct cache_tree *nodes,
4925 u64 last, struct block_info *bits, int bits_nr,
4928 unsigned long node_start = last;
4929 struct cache_extent *cache;
4932 cache = search_cache_extent(reada, 0);
4934 bits[0].start = cache->start;
4935 bits[0].size = cache->size;
4940 if (node_start > 32768)
4941 node_start -= 32768;
4943 cache = search_cache_extent(nodes, node_start);
4945 cache = search_cache_extent(nodes, 0);
4948 cache = search_cache_extent(pending, 0);
4953 bits[ret].start = cache->start;
4954 bits[ret].size = cache->size;
4955 cache = next_cache_extent(cache);
4957 } while (cache && ret < bits_nr);
4963 bits[ret].start = cache->start;
4964 bits[ret].size = cache->size;
4965 cache = next_cache_extent(cache);
4967 } while (cache && ret < bits_nr);
4969 if (bits_nr - ret > 8) {
4970 u64 lookup = bits[0].start + bits[0].size;
4971 struct cache_extent *next;
4972 next = search_cache_extent(pending, lookup);
4974 if (next->start - lookup > 32768)
4976 bits[ret].start = next->start;
4977 bits[ret].size = next->size;
4978 lookup = next->start + next->size;
4982 next = next_cache_extent(next);
4990 static void free_chunk_record(struct cache_extent *cache)
4992 struct chunk_record *rec;
4994 rec = container_of(cache, struct chunk_record, cache);
4995 list_del_init(&rec->list);
4996 list_del_init(&rec->dextents);
5000 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5002 cache_tree_free_extents(chunk_cache, free_chunk_record);
5005 static void free_device_record(struct rb_node *node)
5007 struct device_record *rec;
5009 rec = container_of(node, struct device_record, node);
5013 FREE_RB_BASED_TREE(device_cache, free_device_record);
5015 int insert_block_group_record(struct block_group_tree *tree,
5016 struct block_group_record *bg_rec)
5020 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5024 list_add_tail(&bg_rec->list, &tree->block_groups);
5028 static void free_block_group_record(struct cache_extent *cache)
5030 struct block_group_record *rec;
5032 rec = container_of(cache, struct block_group_record, cache);
5033 list_del_init(&rec->list);
5037 void free_block_group_tree(struct block_group_tree *tree)
5039 cache_tree_free_extents(&tree->tree, free_block_group_record);
5042 int insert_device_extent_record(struct device_extent_tree *tree,
5043 struct device_extent_record *de_rec)
5048 * Device extent is a bit different from the other extents, because
5049 * the extents which belong to the different devices may have the
5050 * same start and size, so we need use the special extent cache
5051 * search/insert functions.
5053 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5057 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5058 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5062 static void free_device_extent_record(struct cache_extent *cache)
5064 struct device_extent_record *rec;
5066 rec = container_of(cache, struct device_extent_record, cache);
5067 if (!list_empty(&rec->chunk_list))
5068 list_del_init(&rec->chunk_list);
5069 if (!list_empty(&rec->device_list))
5070 list_del_init(&rec->device_list);
5074 void free_device_extent_tree(struct device_extent_tree *tree)
5076 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5079 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5080 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5081 struct extent_buffer *leaf, int slot)
5083 struct btrfs_extent_ref_v0 *ref0;
5084 struct btrfs_key key;
5087 btrfs_item_key_to_cpu(leaf, &key, slot);
5088 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5089 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5090 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5093 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5094 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5100 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5101 struct btrfs_key *key,
5104 struct btrfs_chunk *ptr;
5105 struct chunk_record *rec;
5108 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5109 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5111 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5113 fprintf(stderr, "memory allocation failed\n");
5117 INIT_LIST_HEAD(&rec->list);
5118 INIT_LIST_HEAD(&rec->dextents);
5121 rec->cache.start = key->offset;
5122 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5124 rec->generation = btrfs_header_generation(leaf);
5126 rec->objectid = key->objectid;
5127 rec->type = key->type;
5128 rec->offset = key->offset;
5130 rec->length = rec->cache.size;
5131 rec->owner = btrfs_chunk_owner(leaf, ptr);
5132 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5133 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5134 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5135 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5136 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5137 rec->num_stripes = num_stripes;
5138 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5140 for (i = 0; i < rec->num_stripes; ++i) {
5141 rec->stripes[i].devid =
5142 btrfs_stripe_devid_nr(leaf, ptr, i);
5143 rec->stripes[i].offset =
5144 btrfs_stripe_offset_nr(leaf, ptr, i);
5145 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5146 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5153 static int process_chunk_item(struct cache_tree *chunk_cache,
5154 struct btrfs_key *key, struct extent_buffer *eb,
5157 struct chunk_record *rec;
5158 struct btrfs_chunk *chunk;
5161 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5163 * Do extra check for this chunk item,
5165 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5166 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5167 * and owner<->key_type check.
5169 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5172 error("chunk(%llu, %llu) is not valid, ignore it",
5173 key->offset, btrfs_chunk_length(eb, chunk));
5176 rec = btrfs_new_chunk_record(eb, key, slot);
5177 ret = insert_cache_extent(chunk_cache, &rec->cache);
5179 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5180 rec->offset, rec->length);
5187 static int process_device_item(struct rb_root *dev_cache,
5188 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5190 struct btrfs_dev_item *ptr;
5191 struct device_record *rec;
5194 ptr = btrfs_item_ptr(eb,
5195 slot, struct btrfs_dev_item);
5197 rec = malloc(sizeof(*rec));
5199 fprintf(stderr, "memory allocation failed\n");
5203 rec->devid = key->offset;
5204 rec->generation = btrfs_header_generation(eb);
5206 rec->objectid = key->objectid;
5207 rec->type = key->type;
5208 rec->offset = key->offset;
5210 rec->devid = btrfs_device_id(eb, ptr);
5211 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5212 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5214 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5216 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5223 struct block_group_record *
5224 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5227 struct btrfs_block_group_item *ptr;
5228 struct block_group_record *rec;
5230 rec = calloc(1, sizeof(*rec));
5232 fprintf(stderr, "memory allocation failed\n");
5236 rec->cache.start = key->objectid;
5237 rec->cache.size = key->offset;
5239 rec->generation = btrfs_header_generation(leaf);
5241 rec->objectid = key->objectid;
5242 rec->type = key->type;
5243 rec->offset = key->offset;
5245 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5246 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5248 INIT_LIST_HEAD(&rec->list);
5253 static int process_block_group_item(struct block_group_tree *block_group_cache,
5254 struct btrfs_key *key,
5255 struct extent_buffer *eb, int slot)
5257 struct block_group_record *rec;
5260 rec = btrfs_new_block_group_record(eb, key, slot);
5261 ret = insert_block_group_record(block_group_cache, rec);
5263 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5264 rec->objectid, rec->offset);
5271 struct device_extent_record *
5272 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5273 struct btrfs_key *key, int slot)
5275 struct device_extent_record *rec;
5276 struct btrfs_dev_extent *ptr;
5278 rec = calloc(1, sizeof(*rec));
5280 fprintf(stderr, "memory allocation failed\n");
5284 rec->cache.objectid = key->objectid;
5285 rec->cache.start = key->offset;
5287 rec->generation = btrfs_header_generation(leaf);
5289 rec->objectid = key->objectid;
5290 rec->type = key->type;
5291 rec->offset = key->offset;
5293 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5294 rec->chunk_objecteid =
5295 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5297 btrfs_dev_extent_chunk_offset(leaf, ptr);
5298 rec->length = btrfs_dev_extent_length(leaf, ptr);
5299 rec->cache.size = rec->length;
5301 INIT_LIST_HEAD(&rec->chunk_list);
5302 INIT_LIST_HEAD(&rec->device_list);
5308 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5309 struct btrfs_key *key, struct extent_buffer *eb,
5312 struct device_extent_record *rec;
5315 rec = btrfs_new_device_extent_record(eb, key, slot);
5316 ret = insert_device_extent_record(dev_extent_cache, rec);
5319 "Device extent[%llu, %llu, %llu] existed.\n",
5320 rec->objectid, rec->offset, rec->length);
5327 static int process_extent_item(struct btrfs_root *root,
5328 struct cache_tree *extent_cache,
5329 struct extent_buffer *eb, int slot)
5331 struct btrfs_extent_item *ei;
5332 struct btrfs_extent_inline_ref *iref;
5333 struct btrfs_extent_data_ref *dref;
5334 struct btrfs_shared_data_ref *sref;
5335 struct btrfs_key key;
5336 struct extent_record tmpl;
5341 u32 item_size = btrfs_item_size_nr(eb, slot);
5347 btrfs_item_key_to_cpu(eb, &key, slot);
5349 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5351 num_bytes = root->nodesize;
5353 num_bytes = key.offset;
5356 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5357 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5358 key.objectid, root->sectorsize);
5361 if (item_size < sizeof(*ei)) {
5362 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5363 struct btrfs_extent_item_v0 *ei0;
5364 BUG_ON(item_size != sizeof(*ei0));
5365 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5366 refs = btrfs_extent_refs_v0(eb, ei0);
5370 memset(&tmpl, 0, sizeof(tmpl));
5371 tmpl.start = key.objectid;
5372 tmpl.nr = num_bytes;
5373 tmpl.extent_item_refs = refs;
5374 tmpl.metadata = metadata;
5376 tmpl.max_size = num_bytes;
5378 return add_extent_rec(extent_cache, &tmpl);
5381 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5382 refs = btrfs_extent_refs(eb, ei);
5383 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5387 if (metadata && num_bytes != root->nodesize) {
5388 error("ignore invalid metadata extent, length %llu does not equal to %u",
5389 num_bytes, root->nodesize);
5392 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5393 error("ignore invalid data extent, length %llu is not aligned to %u",
5394 num_bytes, root->sectorsize);
5398 memset(&tmpl, 0, sizeof(tmpl));
5399 tmpl.start = key.objectid;
5400 tmpl.nr = num_bytes;
5401 tmpl.extent_item_refs = refs;
5402 tmpl.metadata = metadata;
5404 tmpl.max_size = num_bytes;
5405 add_extent_rec(extent_cache, &tmpl);
5407 ptr = (unsigned long)(ei + 1);
5408 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5409 key.type == BTRFS_EXTENT_ITEM_KEY)
5410 ptr += sizeof(struct btrfs_tree_block_info);
5412 end = (unsigned long)ei + item_size;
5414 iref = (struct btrfs_extent_inline_ref *)ptr;
5415 type = btrfs_extent_inline_ref_type(eb, iref);
5416 offset = btrfs_extent_inline_ref_offset(eb, iref);
5418 case BTRFS_TREE_BLOCK_REF_KEY:
5419 ret = add_tree_backref(extent_cache, key.objectid,
5422 error("add_tree_backref failed: %s",
5425 case BTRFS_SHARED_BLOCK_REF_KEY:
5426 ret = add_tree_backref(extent_cache, key.objectid,
5429 error("add_tree_backref failed: %s",
5432 case BTRFS_EXTENT_DATA_REF_KEY:
5433 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5434 add_data_backref(extent_cache, key.objectid, 0,
5435 btrfs_extent_data_ref_root(eb, dref),
5436 btrfs_extent_data_ref_objectid(eb,
5438 btrfs_extent_data_ref_offset(eb, dref),
5439 btrfs_extent_data_ref_count(eb, dref),
5442 case BTRFS_SHARED_DATA_REF_KEY:
5443 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5444 add_data_backref(extent_cache, key.objectid, offset,
5446 btrfs_shared_data_ref_count(eb, sref),
5450 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5451 key.objectid, key.type, num_bytes);
5454 ptr += btrfs_extent_inline_ref_size(type);
5461 static int check_cache_range(struct btrfs_root *root,
5462 struct btrfs_block_group_cache *cache,
5463 u64 offset, u64 bytes)
5465 struct btrfs_free_space *entry;
5471 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5472 bytenr = btrfs_sb_offset(i);
5473 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5474 cache->key.objectid, bytenr, 0,
5475 &logical, &nr, &stripe_len);
5480 if (logical[nr] + stripe_len <= offset)
5482 if (offset + bytes <= logical[nr])
5484 if (logical[nr] == offset) {
5485 if (stripe_len >= bytes) {
5489 bytes -= stripe_len;
5490 offset += stripe_len;
5491 } else if (logical[nr] < offset) {
5492 if (logical[nr] + stripe_len >=
5497 bytes = (offset + bytes) -
5498 (logical[nr] + stripe_len);
5499 offset = logical[nr] + stripe_len;
5502 * Could be tricky, the super may land in the
5503 * middle of the area we're checking. First
5504 * check the easiest case, it's at the end.
5506 if (logical[nr] + stripe_len >=
5508 bytes = logical[nr] - offset;
5512 /* Check the left side */
5513 ret = check_cache_range(root, cache,
5515 logical[nr] - offset);
5521 /* Now we continue with the right side */
5522 bytes = (offset + bytes) -
5523 (logical[nr] + stripe_len);
5524 offset = logical[nr] + stripe_len;
5531 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5533 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5534 offset, offset+bytes);
5538 if (entry->offset != offset) {
5539 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5544 if (entry->bytes != bytes) {
5545 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5546 bytes, entry->bytes, offset);
5550 unlink_free_space(cache->free_space_ctl, entry);
5555 static int verify_space_cache(struct btrfs_root *root,
5556 struct btrfs_block_group_cache *cache)
5558 struct btrfs_path *path;
5559 struct extent_buffer *leaf;
5560 struct btrfs_key key;
5564 path = btrfs_alloc_path();
5568 root = root->fs_info->extent_root;
5570 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5572 key.objectid = last;
5574 key.type = BTRFS_EXTENT_ITEM_KEY;
5576 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5581 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5582 ret = btrfs_next_leaf(root, path);
5590 leaf = path->nodes[0];
5591 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5592 if (key.objectid >= cache->key.offset + cache->key.objectid)
5594 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5595 key.type != BTRFS_METADATA_ITEM_KEY) {
5600 if (last == key.objectid) {
5601 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5602 last = key.objectid + key.offset;
5604 last = key.objectid + root->nodesize;
5609 ret = check_cache_range(root, cache, last,
5610 key.objectid - last);
5613 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5614 last = key.objectid + key.offset;
5616 last = key.objectid + root->nodesize;
5620 if (last < cache->key.objectid + cache->key.offset)
5621 ret = check_cache_range(root, cache, last,
5622 cache->key.objectid +
5623 cache->key.offset - last);
5626 btrfs_free_path(path);
5629 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5630 fprintf(stderr, "There are still entries left in the space "
5638 static int check_space_cache(struct btrfs_root *root)
5640 struct btrfs_block_group_cache *cache;
5641 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5645 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5646 btrfs_super_generation(root->fs_info->super_copy) !=
5647 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5648 printf("cache and super generation don't match, space cache "
5649 "will be invalidated\n");
5653 if (ctx.progress_enabled) {
5654 ctx.tp = TASK_FREE_SPACE;
5655 task_start(ctx.info);
5659 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5663 start = cache->key.objectid + cache->key.offset;
5664 if (!cache->free_space_ctl) {
5665 if (btrfs_init_free_space_ctl(cache,
5666 root->sectorsize)) {
5671 btrfs_remove_free_space_cache(cache);
5674 if (btrfs_fs_compat_ro(root->fs_info,
5675 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5676 ret = exclude_super_stripes(root, cache);
5678 fprintf(stderr, "could not exclude super stripes: %s\n",
5683 ret = load_free_space_tree(root->fs_info, cache);
5684 free_excluded_extents(root, cache);
5686 fprintf(stderr, "could not load free space tree: %s\n",
5693 ret = load_free_space_cache(root->fs_info, cache);
5698 ret = verify_space_cache(root, cache);
5700 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5701 cache->key.objectid);
5706 task_stop(ctx.info);
5708 return error ? -EINVAL : 0;
5711 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5712 u64 num_bytes, unsigned long leaf_offset,
5713 struct extent_buffer *eb) {
5716 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5718 unsigned long csum_offset;
5722 u64 data_checked = 0;
5728 if (num_bytes % root->sectorsize)
5731 data = malloc(num_bytes);
5735 while (offset < num_bytes) {
5738 read_len = num_bytes - offset;
5739 /* read as much space once a time */
5740 ret = read_extent_data(root, data + offset,
5741 bytenr + offset, &read_len, mirror);
5745 /* verify every 4k data's checksum */
5746 while (data_checked < read_len) {
5748 tmp = offset + data_checked;
5750 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5751 csum, root->sectorsize);
5752 btrfs_csum_final(csum, (u8 *)&csum);
5754 csum_offset = leaf_offset +
5755 tmp / root->sectorsize * csum_size;
5756 read_extent_buffer(eb, (char *)&csum_expected,
5757 csum_offset, csum_size);
5758 /* try another mirror */
5759 if (csum != csum_expected) {
5760 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5761 mirror, bytenr + tmp,
5762 csum, csum_expected);
5763 num_copies = btrfs_num_copies(
5764 &root->fs_info->mapping_tree,
5766 if (mirror < num_copies - 1) {
5771 data_checked += root->sectorsize;
5780 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5783 struct btrfs_path *path;
5784 struct extent_buffer *leaf;
5785 struct btrfs_key key;
5788 path = btrfs_alloc_path();
5790 fprintf(stderr, "Error allocating path\n");
5794 key.objectid = bytenr;
5795 key.type = BTRFS_EXTENT_ITEM_KEY;
5796 key.offset = (u64)-1;
5799 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5802 fprintf(stderr, "Error looking up extent record %d\n", ret);
5803 btrfs_free_path(path);
5806 if (path->slots[0] > 0) {
5809 ret = btrfs_prev_leaf(root, path);
5812 } else if (ret > 0) {
5819 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5822 * Block group items come before extent items if they have the same
5823 * bytenr, so walk back one more just in case. Dear future traveller,
5824 * first congrats on mastering time travel. Now if it's not too much
5825 * trouble could you go back to 2006 and tell Chris to make the
5826 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5827 * EXTENT_ITEM_KEY please?
5829 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5830 if (path->slots[0] > 0) {
5833 ret = btrfs_prev_leaf(root, path);
5836 } else if (ret > 0) {
5841 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5845 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5846 ret = btrfs_next_leaf(root, path);
5848 fprintf(stderr, "Error going to next leaf "
5850 btrfs_free_path(path);
5856 leaf = path->nodes[0];
5857 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5858 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5862 if (key.objectid + key.offset < bytenr) {
5866 if (key.objectid > bytenr + num_bytes)
5869 if (key.objectid == bytenr) {
5870 if (key.offset >= num_bytes) {
5874 num_bytes -= key.offset;
5875 bytenr += key.offset;
5876 } else if (key.objectid < bytenr) {
5877 if (key.objectid + key.offset >= bytenr + num_bytes) {
5881 num_bytes = (bytenr + num_bytes) -
5882 (key.objectid + key.offset);
5883 bytenr = key.objectid + key.offset;
5885 if (key.objectid + key.offset < bytenr + num_bytes) {
5886 u64 new_start = key.objectid + key.offset;
5887 u64 new_bytes = bytenr + num_bytes - new_start;
5890 * Weird case, the extent is in the middle of
5891 * our range, we'll have to search one side
5892 * and then the other. Not sure if this happens
5893 * in real life, but no harm in coding it up
5894 * anyway just in case.
5896 btrfs_release_path(path);
5897 ret = check_extent_exists(root, new_start,
5900 fprintf(stderr, "Right section didn't "
5904 num_bytes = key.objectid - bytenr;
5907 num_bytes = key.objectid - bytenr;
5914 if (num_bytes && !ret) {
5915 fprintf(stderr, "There are no extents for csum range "
5916 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5920 btrfs_free_path(path);
5924 static int check_csums(struct btrfs_root *root)
5926 struct btrfs_path *path;
5927 struct extent_buffer *leaf;
5928 struct btrfs_key key;
5929 u64 offset = 0, num_bytes = 0;
5930 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5934 unsigned long leaf_offset;
5936 root = root->fs_info->csum_root;
5937 if (!extent_buffer_uptodate(root->node)) {
5938 fprintf(stderr, "No valid csum tree found\n");
5942 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5943 key.type = BTRFS_EXTENT_CSUM_KEY;
5946 path = btrfs_alloc_path();
5950 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5952 fprintf(stderr, "Error searching csum tree %d\n", ret);
5953 btrfs_free_path(path);
5957 if (ret > 0 && path->slots[0])
5962 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5963 ret = btrfs_next_leaf(root, path);
5965 fprintf(stderr, "Error going to next leaf "
5972 leaf = path->nodes[0];
5974 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5975 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5980 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5981 csum_size) * root->sectorsize;
5982 if (!check_data_csum)
5983 goto skip_csum_check;
5984 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5985 ret = check_extent_csums(root, key.offset, data_len,
5991 offset = key.offset;
5992 } else if (key.offset != offset + num_bytes) {
5993 ret = check_extent_exists(root, offset, num_bytes);
5995 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5996 "there is no extent record\n",
5997 offset, offset+num_bytes);
6000 offset = key.offset;
6003 num_bytes += data_len;
6007 btrfs_free_path(path);
6011 static int is_dropped_key(struct btrfs_key *key,
6012 struct btrfs_key *drop_key) {
6013 if (key->objectid < drop_key->objectid)
6015 else if (key->objectid == drop_key->objectid) {
6016 if (key->type < drop_key->type)
6018 else if (key->type == drop_key->type) {
6019 if (key->offset < drop_key->offset)
6027 * Here are the rules for FULL_BACKREF.
6029 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6030 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6032 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6033 * if it happened after the relocation occurred since we'll have dropped the
6034 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6035 * have no real way to know for sure.
6037 * We process the blocks one root at a time, and we start from the lowest root
6038 * objectid and go to the highest. So we can just lookup the owner backref for
6039 * the record and if we don't find it then we know it doesn't exist and we have
6042 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6043 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6044 * be set or not and then we can check later once we've gathered all the refs.
6046 static int calc_extent_flag(struct btrfs_root *root,
6047 struct cache_tree *extent_cache,
6048 struct extent_buffer *buf,
6049 struct root_item_record *ri,
6052 struct extent_record *rec;
6053 struct cache_extent *cache;
6054 struct tree_backref *tback;
6057 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6058 /* we have added this extent before */
6062 rec = container_of(cache, struct extent_record, cache);
6065 * Except file/reloc tree, we can not have
6068 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6073 if (buf->start == ri->bytenr)
6076 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6079 owner = btrfs_header_owner(buf);
6080 if (owner == ri->objectid)
6083 tback = find_tree_backref(rec, 0, owner);
6088 if (rec->flag_block_full_backref != FLAG_UNSET &&
6089 rec->flag_block_full_backref != 0)
6090 rec->bad_full_backref = 1;
6093 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6094 if (rec->flag_block_full_backref != FLAG_UNSET &&
6095 rec->flag_block_full_backref != 1)
6096 rec->bad_full_backref = 1;
6100 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6102 fprintf(stderr, "Invalid key type(");
6103 print_key_type(stderr, 0, key_type);
6104 fprintf(stderr, ") found in root(");
6105 print_objectid(stderr, rootid, 0);
6106 fprintf(stderr, ")\n");
6110 * Check if the key is valid with its extent buffer.
6112 * This is a early check in case invalid key exists in a extent buffer
6113 * This is not comprehensive yet, but should prevent wrong key/item passed
6116 static int check_type_with_root(u64 rootid, u8 key_type)
6119 /* Only valid in chunk tree */
6120 case BTRFS_DEV_ITEM_KEY:
6121 case BTRFS_CHUNK_ITEM_KEY:
6122 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6125 /* valid in csum and log tree */
6126 case BTRFS_CSUM_TREE_OBJECTID:
6127 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6131 case BTRFS_EXTENT_ITEM_KEY:
6132 case BTRFS_METADATA_ITEM_KEY:
6133 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6134 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6137 case BTRFS_ROOT_ITEM_KEY:
6138 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6141 case BTRFS_DEV_EXTENT_KEY:
6142 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6148 report_mismatch_key_root(key_type, rootid);
6152 static int run_next_block(struct btrfs_root *root,
6153 struct block_info *bits,
6156 struct cache_tree *pending,
6157 struct cache_tree *seen,
6158 struct cache_tree *reada,
6159 struct cache_tree *nodes,
6160 struct cache_tree *extent_cache,
6161 struct cache_tree *chunk_cache,
6162 struct rb_root *dev_cache,
6163 struct block_group_tree *block_group_cache,
6164 struct device_extent_tree *dev_extent_cache,
6165 struct root_item_record *ri)
6167 struct extent_buffer *buf;
6168 struct extent_record *rec = NULL;
6179 struct btrfs_key key;
6180 struct cache_extent *cache;
6183 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6184 bits_nr, &reada_bits);
6189 for(i = 0; i < nritems; i++) {
6190 ret = add_cache_extent(reada, bits[i].start,
6195 /* fixme, get the parent transid */
6196 readahead_tree_block(root, bits[i].start,
6200 *last = bits[0].start;
6201 bytenr = bits[0].start;
6202 size = bits[0].size;
6204 cache = lookup_cache_extent(pending, bytenr, size);
6206 remove_cache_extent(pending, cache);
6209 cache = lookup_cache_extent(reada, bytenr, size);
6211 remove_cache_extent(reada, cache);
6214 cache = lookup_cache_extent(nodes, bytenr, size);
6216 remove_cache_extent(nodes, cache);
6219 cache = lookup_cache_extent(extent_cache, bytenr, size);
6221 rec = container_of(cache, struct extent_record, cache);
6222 gen = rec->parent_generation;
6225 /* fixme, get the real parent transid */
6226 buf = read_tree_block(root, bytenr, size, gen);
6227 if (!extent_buffer_uptodate(buf)) {
6228 record_bad_block_io(root->fs_info,
6229 extent_cache, bytenr, size);
6233 nritems = btrfs_header_nritems(buf);
6236 if (!init_extent_tree) {
6237 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6238 btrfs_header_level(buf), 1, NULL,
6241 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6243 fprintf(stderr, "Couldn't calc extent flags\n");
6244 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6249 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6251 fprintf(stderr, "Couldn't calc extent flags\n");
6252 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6256 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6258 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6259 ri->objectid == btrfs_header_owner(buf)) {
6261 * Ok we got to this block from it's original owner and
6262 * we have FULL_BACKREF set. Relocation can leave
6263 * converted blocks over so this is altogether possible,
6264 * however it's not possible if the generation > the
6265 * last snapshot, so check for this case.
6267 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6268 btrfs_header_generation(buf) > ri->last_snapshot) {
6269 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6270 rec->bad_full_backref = 1;
6275 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6276 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6277 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6278 rec->bad_full_backref = 1;
6282 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6283 rec->flag_block_full_backref = 1;
6287 rec->flag_block_full_backref = 0;
6289 owner = btrfs_header_owner(buf);
6292 ret = check_block(root, extent_cache, buf, flags);
6296 if (btrfs_is_leaf(buf)) {
6297 btree_space_waste += btrfs_leaf_free_space(root, buf);
6298 for (i = 0; i < nritems; i++) {
6299 struct btrfs_file_extent_item *fi;
6300 btrfs_item_key_to_cpu(buf, &key, i);
6302 * Check key type against the leaf owner.
6303 * Could filter quite a lot of early error if
6306 if (check_type_with_root(btrfs_header_owner(buf),
6308 fprintf(stderr, "ignoring invalid key\n");
6311 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6312 process_extent_item(root, extent_cache, buf,
6316 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6317 process_extent_item(root, extent_cache, buf,
6321 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6323 btrfs_item_size_nr(buf, i);
6326 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6327 process_chunk_item(chunk_cache, &key, buf, i);
6330 if (key.type == BTRFS_DEV_ITEM_KEY) {
6331 process_device_item(dev_cache, &key, buf, i);
6334 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6335 process_block_group_item(block_group_cache,
6339 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6340 process_device_extent_item(dev_extent_cache,
6345 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6346 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6347 process_extent_ref_v0(extent_cache, buf, i);
6354 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6355 ret = add_tree_backref(extent_cache,
6356 key.objectid, 0, key.offset, 0);
6358 error("add_tree_backref failed: %s",
6362 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6363 ret = add_tree_backref(extent_cache,
6364 key.objectid, key.offset, 0, 0);
6366 error("add_tree_backref failed: %s",
6370 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6371 struct btrfs_extent_data_ref *ref;
6372 ref = btrfs_item_ptr(buf, i,
6373 struct btrfs_extent_data_ref);
6374 add_data_backref(extent_cache,
6376 btrfs_extent_data_ref_root(buf, ref),
6377 btrfs_extent_data_ref_objectid(buf,
6379 btrfs_extent_data_ref_offset(buf, ref),
6380 btrfs_extent_data_ref_count(buf, ref),
6381 0, root->sectorsize);
6384 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6385 struct btrfs_shared_data_ref *ref;
6386 ref = btrfs_item_ptr(buf, i,
6387 struct btrfs_shared_data_ref);
6388 add_data_backref(extent_cache,
6389 key.objectid, key.offset, 0, 0, 0,
6390 btrfs_shared_data_ref_count(buf, ref),
6391 0, root->sectorsize);
6394 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6395 struct bad_item *bad;
6397 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6401 bad = malloc(sizeof(struct bad_item));
6404 INIT_LIST_HEAD(&bad->list);
6405 memcpy(&bad->key, &key,
6406 sizeof(struct btrfs_key));
6407 bad->root_id = owner;
6408 list_add_tail(&bad->list, &delete_items);
6411 if (key.type != BTRFS_EXTENT_DATA_KEY)
6413 fi = btrfs_item_ptr(buf, i,
6414 struct btrfs_file_extent_item);
6415 if (btrfs_file_extent_type(buf, fi) ==
6416 BTRFS_FILE_EXTENT_INLINE)
6418 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6421 data_bytes_allocated +=
6422 btrfs_file_extent_disk_num_bytes(buf, fi);
6423 if (data_bytes_allocated < root->sectorsize) {
6426 data_bytes_referenced +=
6427 btrfs_file_extent_num_bytes(buf, fi);
6428 add_data_backref(extent_cache,
6429 btrfs_file_extent_disk_bytenr(buf, fi),
6430 parent, owner, key.objectid, key.offset -
6431 btrfs_file_extent_offset(buf, fi), 1, 1,
6432 btrfs_file_extent_disk_num_bytes(buf, fi));
6436 struct btrfs_key first_key;
6438 first_key.objectid = 0;
6441 btrfs_item_key_to_cpu(buf, &first_key, 0);
6442 level = btrfs_header_level(buf);
6443 for (i = 0; i < nritems; i++) {
6444 struct extent_record tmpl;
6446 ptr = btrfs_node_blockptr(buf, i);
6447 size = root->nodesize;
6448 btrfs_node_key_to_cpu(buf, &key, i);
6450 if ((level == ri->drop_level)
6451 && is_dropped_key(&key, &ri->drop_key)) {
6456 memset(&tmpl, 0, sizeof(tmpl));
6457 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6458 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6463 tmpl.max_size = size;
6464 ret = add_extent_rec(extent_cache, &tmpl);
6468 ret = add_tree_backref(extent_cache, ptr, parent,
6471 error("add_tree_backref failed: %s",
6477 add_pending(nodes, seen, ptr, size);
6479 add_pending(pending, seen, ptr, size);
6482 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6483 nritems) * sizeof(struct btrfs_key_ptr);
6485 total_btree_bytes += buf->len;
6486 if (fs_root_objectid(btrfs_header_owner(buf)))
6487 total_fs_tree_bytes += buf->len;
6488 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6489 total_extent_tree_bytes += buf->len;
6490 if (!found_old_backref &&
6491 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6492 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6493 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6494 found_old_backref = 1;
6496 free_extent_buffer(buf);
6500 static int add_root_to_pending(struct extent_buffer *buf,
6501 struct cache_tree *extent_cache,
6502 struct cache_tree *pending,
6503 struct cache_tree *seen,
6504 struct cache_tree *nodes,
6507 struct extent_record tmpl;
6510 if (btrfs_header_level(buf) > 0)
6511 add_pending(nodes, seen, buf->start, buf->len);
6513 add_pending(pending, seen, buf->start, buf->len);
6515 memset(&tmpl, 0, sizeof(tmpl));
6516 tmpl.start = buf->start;
6521 tmpl.max_size = buf->len;
6522 add_extent_rec(extent_cache, &tmpl);
6524 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6525 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6526 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6529 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6534 /* as we fix the tree, we might be deleting blocks that
6535 * we're tracking for repair. This hook makes sure we
6536 * remove any backrefs for blocks as we are fixing them.
6538 static int free_extent_hook(struct btrfs_trans_handle *trans,
6539 struct btrfs_root *root,
6540 u64 bytenr, u64 num_bytes, u64 parent,
6541 u64 root_objectid, u64 owner, u64 offset,
6544 struct extent_record *rec;
6545 struct cache_extent *cache;
6547 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6549 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6550 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6554 rec = container_of(cache, struct extent_record, cache);
6556 struct data_backref *back;
6557 back = find_data_backref(rec, parent, root_objectid, owner,
6558 offset, 1, bytenr, num_bytes);
6561 if (back->node.found_ref) {
6562 back->found_ref -= refs_to_drop;
6564 rec->refs -= refs_to_drop;
6566 if (back->node.found_extent_tree) {
6567 back->num_refs -= refs_to_drop;
6568 if (rec->extent_item_refs)
6569 rec->extent_item_refs -= refs_to_drop;
6571 if (back->found_ref == 0)
6572 back->node.found_ref = 0;
6573 if (back->num_refs == 0)
6574 back->node.found_extent_tree = 0;
6576 if (!back->node.found_extent_tree && back->node.found_ref) {
6577 list_del(&back->node.list);
6581 struct tree_backref *back;
6582 back = find_tree_backref(rec, parent, root_objectid);
6585 if (back->node.found_ref) {
6588 back->node.found_ref = 0;
6590 if (back->node.found_extent_tree) {
6591 if (rec->extent_item_refs)
6592 rec->extent_item_refs--;
6593 back->node.found_extent_tree = 0;
6595 if (!back->node.found_extent_tree && back->node.found_ref) {
6596 list_del(&back->node.list);
6600 maybe_free_extent_rec(extent_cache, rec);
6605 static int delete_extent_records(struct btrfs_trans_handle *trans,
6606 struct btrfs_root *root,
6607 struct btrfs_path *path,
6608 u64 bytenr, u64 new_len)
6610 struct btrfs_key key;
6611 struct btrfs_key found_key;
6612 struct extent_buffer *leaf;
6617 key.objectid = bytenr;
6619 key.offset = (u64)-1;
6622 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6629 if (path->slots[0] == 0)
6635 leaf = path->nodes[0];
6636 slot = path->slots[0];
6638 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6639 if (found_key.objectid != bytenr)
6642 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6643 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6644 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6645 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6646 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6647 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6648 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6649 btrfs_release_path(path);
6650 if (found_key.type == 0) {
6651 if (found_key.offset == 0)
6653 key.offset = found_key.offset - 1;
6654 key.type = found_key.type;
6656 key.type = found_key.type - 1;
6657 key.offset = (u64)-1;
6661 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6662 found_key.objectid, found_key.type, found_key.offset);
6664 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6667 btrfs_release_path(path);
6669 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6670 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6671 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6672 found_key.offset : root->nodesize;
6674 ret = btrfs_update_block_group(trans, root, bytenr,
6681 btrfs_release_path(path);
6686 * for a single backref, this will allocate a new extent
6687 * and add the backref to it.
6689 static int record_extent(struct btrfs_trans_handle *trans,
6690 struct btrfs_fs_info *info,
6691 struct btrfs_path *path,
6692 struct extent_record *rec,
6693 struct extent_backref *back,
6694 int allocated, u64 flags)
6697 struct btrfs_root *extent_root = info->extent_root;
6698 struct extent_buffer *leaf;
6699 struct btrfs_key ins_key;
6700 struct btrfs_extent_item *ei;
6701 struct tree_backref *tback;
6702 struct data_backref *dback;
6703 struct btrfs_tree_block_info *bi;
6706 rec->max_size = max_t(u64, rec->max_size,
6707 info->extent_root->nodesize);
6710 u32 item_size = sizeof(*ei);
6713 item_size += sizeof(*bi);
6715 ins_key.objectid = rec->start;
6716 ins_key.offset = rec->max_size;
6717 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6719 ret = btrfs_insert_empty_item(trans, extent_root, path,
6720 &ins_key, item_size);
6724 leaf = path->nodes[0];
6725 ei = btrfs_item_ptr(leaf, path->slots[0],
6726 struct btrfs_extent_item);
6728 btrfs_set_extent_refs(leaf, ei, 0);
6729 btrfs_set_extent_generation(leaf, ei, rec->generation);
6731 if (back->is_data) {
6732 btrfs_set_extent_flags(leaf, ei,
6733 BTRFS_EXTENT_FLAG_DATA);
6735 struct btrfs_disk_key copy_key;;
6737 tback = to_tree_backref(back);
6738 bi = (struct btrfs_tree_block_info *)(ei + 1);
6739 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6742 btrfs_set_disk_key_objectid(©_key,
6743 rec->info_objectid);
6744 btrfs_set_disk_key_type(©_key, 0);
6745 btrfs_set_disk_key_offset(©_key, 0);
6747 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6748 btrfs_set_tree_block_key(leaf, bi, ©_key);
6750 btrfs_set_extent_flags(leaf, ei,
6751 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6754 btrfs_mark_buffer_dirty(leaf);
6755 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6756 rec->max_size, 1, 0);
6759 btrfs_release_path(path);
6762 if (back->is_data) {
6766 dback = to_data_backref(back);
6767 if (back->full_backref)
6768 parent = dback->parent;
6772 for (i = 0; i < dback->found_ref; i++) {
6773 /* if parent != 0, we're doing a full backref
6774 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6775 * just makes the backref allocator create a data
6778 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6779 rec->start, rec->max_size,
6783 BTRFS_FIRST_FREE_OBJECTID :
6789 fprintf(stderr, "adding new data backref"
6790 " on %llu %s %llu owner %llu"
6791 " offset %llu found %d\n",
6792 (unsigned long long)rec->start,
6793 back->full_backref ?
6795 back->full_backref ?
6796 (unsigned long long)parent :
6797 (unsigned long long)dback->root,
6798 (unsigned long long)dback->owner,
6799 (unsigned long long)dback->offset,
6804 tback = to_tree_backref(back);
6805 if (back->full_backref)
6806 parent = tback->parent;
6810 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6811 rec->start, rec->max_size,
6812 parent, tback->root, 0, 0);
6813 fprintf(stderr, "adding new tree backref on "
6814 "start %llu len %llu parent %llu root %llu\n",
6815 rec->start, rec->max_size, parent, tback->root);
6818 btrfs_release_path(path);
6822 static struct extent_entry *find_entry(struct list_head *entries,
6823 u64 bytenr, u64 bytes)
6825 struct extent_entry *entry = NULL;
6827 list_for_each_entry(entry, entries, list) {
6828 if (entry->bytenr == bytenr && entry->bytes == bytes)
6835 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6837 struct extent_entry *entry, *best = NULL, *prev = NULL;
6839 list_for_each_entry(entry, entries, list) {
6846 * If there are as many broken entries as entries then we know
6847 * not to trust this particular entry.
6849 if (entry->broken == entry->count)
6853 * If our current entry == best then we can't be sure our best
6854 * is really the best, so we need to keep searching.
6856 if (best && best->count == entry->count) {
6862 /* Prev == entry, not good enough, have to keep searching */
6863 if (!prev->broken && prev->count == entry->count)
6867 best = (prev->count > entry->count) ? prev : entry;
6868 else if (best->count < entry->count)
6876 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6877 struct data_backref *dback, struct extent_entry *entry)
6879 struct btrfs_trans_handle *trans;
6880 struct btrfs_root *root;
6881 struct btrfs_file_extent_item *fi;
6882 struct extent_buffer *leaf;
6883 struct btrfs_key key;
6887 key.objectid = dback->root;
6888 key.type = BTRFS_ROOT_ITEM_KEY;
6889 key.offset = (u64)-1;
6890 root = btrfs_read_fs_root(info, &key);
6892 fprintf(stderr, "Couldn't find root for our ref\n");
6897 * The backref points to the original offset of the extent if it was
6898 * split, so we need to search down to the offset we have and then walk
6899 * forward until we find the backref we're looking for.
6901 key.objectid = dback->owner;
6902 key.type = BTRFS_EXTENT_DATA_KEY;
6903 key.offset = dback->offset;
6904 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6906 fprintf(stderr, "Error looking up ref %d\n", ret);
6911 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6912 ret = btrfs_next_leaf(root, path);
6914 fprintf(stderr, "Couldn't find our ref, next\n");
6918 leaf = path->nodes[0];
6919 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6920 if (key.objectid != dback->owner ||
6921 key.type != BTRFS_EXTENT_DATA_KEY) {
6922 fprintf(stderr, "Couldn't find our ref, search\n");
6925 fi = btrfs_item_ptr(leaf, path->slots[0],
6926 struct btrfs_file_extent_item);
6927 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6928 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6930 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6935 btrfs_release_path(path);
6937 trans = btrfs_start_transaction(root, 1);
6939 return PTR_ERR(trans);
6942 * Ok we have the key of the file extent we want to fix, now we can cow
6943 * down to the thing and fix it.
6945 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6947 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6948 key.objectid, key.type, key.offset, ret);
6952 fprintf(stderr, "Well that's odd, we just found this key "
6953 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6958 leaf = path->nodes[0];
6959 fi = btrfs_item_ptr(leaf, path->slots[0],
6960 struct btrfs_file_extent_item);
6962 if (btrfs_file_extent_compression(leaf, fi) &&
6963 dback->disk_bytenr != entry->bytenr) {
6964 fprintf(stderr, "Ref doesn't match the record start and is "
6965 "compressed, please take a btrfs-image of this file "
6966 "system and send it to a btrfs developer so they can "
6967 "complete this functionality for bytenr %Lu\n",
6968 dback->disk_bytenr);
6973 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6974 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6975 } else if (dback->disk_bytenr > entry->bytenr) {
6976 u64 off_diff, offset;
6978 off_diff = dback->disk_bytenr - entry->bytenr;
6979 offset = btrfs_file_extent_offset(leaf, fi);
6980 if (dback->disk_bytenr + offset +
6981 btrfs_file_extent_num_bytes(leaf, fi) >
6982 entry->bytenr + entry->bytes) {
6983 fprintf(stderr, "Ref is past the entry end, please "
6984 "take a btrfs-image of this file system and "
6985 "send it to a btrfs developer, ref %Lu\n",
6986 dback->disk_bytenr);
6991 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6992 btrfs_set_file_extent_offset(leaf, fi, offset);
6993 } else if (dback->disk_bytenr < entry->bytenr) {
6996 offset = btrfs_file_extent_offset(leaf, fi);
6997 if (dback->disk_bytenr + offset < entry->bytenr) {
6998 fprintf(stderr, "Ref is before the entry start, please"
6999 " take a btrfs-image of this file system and "
7000 "send it to a btrfs developer, ref %Lu\n",
7001 dback->disk_bytenr);
7006 offset += dback->disk_bytenr;
7007 offset -= entry->bytenr;
7008 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7009 btrfs_set_file_extent_offset(leaf, fi, offset);
7012 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7015 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7016 * only do this if we aren't using compression, otherwise it's a
7019 if (!btrfs_file_extent_compression(leaf, fi))
7020 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7022 printf("ram bytes may be wrong?\n");
7023 btrfs_mark_buffer_dirty(leaf);
7025 err = btrfs_commit_transaction(trans, root);
7026 btrfs_release_path(path);
7027 return ret ? ret : err;
7030 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7031 struct extent_record *rec)
7033 struct extent_backref *back;
7034 struct data_backref *dback;
7035 struct extent_entry *entry, *best = NULL;
7038 int broken_entries = 0;
7043 * Metadata is easy and the backrefs should always agree on bytenr and
7044 * size, if not we've got bigger issues.
7049 list_for_each_entry(back, &rec->backrefs, list) {
7050 if (back->full_backref || !back->is_data)
7053 dback = to_data_backref(back);
7056 * We only pay attention to backrefs that we found a real
7059 if (dback->found_ref == 0)
7063 * For now we only catch when the bytes don't match, not the
7064 * bytenr. We can easily do this at the same time, but I want
7065 * to have a fs image to test on before we just add repair
7066 * functionality willy-nilly so we know we won't screw up the
7070 entry = find_entry(&entries, dback->disk_bytenr,
7073 entry = malloc(sizeof(struct extent_entry));
7078 memset(entry, 0, sizeof(*entry));
7079 entry->bytenr = dback->disk_bytenr;
7080 entry->bytes = dback->bytes;
7081 list_add_tail(&entry->list, &entries);
7086 * If we only have on entry we may think the entries agree when
7087 * in reality they don't so we have to do some extra checking.
7089 if (dback->disk_bytenr != rec->start ||
7090 dback->bytes != rec->nr || back->broken)
7101 /* Yay all the backrefs agree, carry on good sir */
7102 if (nr_entries <= 1 && !mismatch)
7105 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7106 "%Lu\n", rec->start);
7109 * First we want to see if the backrefs can agree amongst themselves who
7110 * is right, so figure out which one of the entries has the highest
7113 best = find_most_right_entry(&entries);
7116 * Ok so we may have an even split between what the backrefs think, so
7117 * this is where we use the extent ref to see what it thinks.
7120 entry = find_entry(&entries, rec->start, rec->nr);
7121 if (!entry && (!broken_entries || !rec->found_rec)) {
7122 fprintf(stderr, "Backrefs don't agree with each other "
7123 "and extent record doesn't agree with anybody,"
7124 " so we can't fix bytenr %Lu bytes %Lu\n",
7125 rec->start, rec->nr);
7128 } else if (!entry) {
7130 * Ok our backrefs were broken, we'll assume this is the
7131 * correct value and add an entry for this range.
7133 entry = malloc(sizeof(struct extent_entry));
7138 memset(entry, 0, sizeof(*entry));
7139 entry->bytenr = rec->start;
7140 entry->bytes = rec->nr;
7141 list_add_tail(&entry->list, &entries);
7145 best = find_most_right_entry(&entries);
7147 fprintf(stderr, "Backrefs and extent record evenly "
7148 "split on who is right, this is going to "
7149 "require user input to fix bytenr %Lu bytes "
7150 "%Lu\n", rec->start, rec->nr);
7157 * I don't think this can happen currently as we'll abort() if we catch
7158 * this case higher up, but in case somebody removes that we still can't
7159 * deal with it properly here yet, so just bail out of that's the case.
7161 if (best->bytenr != rec->start) {
7162 fprintf(stderr, "Extent start and backref starts don't match, "
7163 "please use btrfs-image on this file system and send "
7164 "it to a btrfs developer so they can make fsck fix "
7165 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7166 rec->start, rec->nr);
7172 * Ok great we all agreed on an extent record, let's go find the real
7173 * references and fix up the ones that don't match.
7175 list_for_each_entry(back, &rec->backrefs, list) {
7176 if (back->full_backref || !back->is_data)
7179 dback = to_data_backref(back);
7182 * Still ignoring backrefs that don't have a real ref attached
7185 if (dback->found_ref == 0)
7188 if (dback->bytes == best->bytes &&
7189 dback->disk_bytenr == best->bytenr)
7192 ret = repair_ref(info, path, dback, best);
7198 * Ok we messed with the actual refs, which means we need to drop our
7199 * entire cache and go back and rescan. I know this is a huge pain and
7200 * adds a lot of extra work, but it's the only way to be safe. Once all
7201 * the backrefs agree we may not need to do anything to the extent
7206 while (!list_empty(&entries)) {
7207 entry = list_entry(entries.next, struct extent_entry, list);
7208 list_del_init(&entry->list);
7214 static int process_duplicates(struct btrfs_root *root,
7215 struct cache_tree *extent_cache,
7216 struct extent_record *rec)
7218 struct extent_record *good, *tmp;
7219 struct cache_extent *cache;
7223 * If we found a extent record for this extent then return, or if we
7224 * have more than one duplicate we are likely going to need to delete
7227 if (rec->found_rec || rec->num_duplicates > 1)
7230 /* Shouldn't happen but just in case */
7231 BUG_ON(!rec->num_duplicates);
7234 * So this happens if we end up with a backref that doesn't match the
7235 * actual extent entry. So either the backref is bad or the extent
7236 * entry is bad. Either way we want to have the extent_record actually
7237 * reflect what we found in the extent_tree, so we need to take the
7238 * duplicate out and use that as the extent_record since the only way we
7239 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7241 remove_cache_extent(extent_cache, &rec->cache);
7243 good = to_extent_record(rec->dups.next);
7244 list_del_init(&good->list);
7245 INIT_LIST_HEAD(&good->backrefs);
7246 INIT_LIST_HEAD(&good->dups);
7247 good->cache.start = good->start;
7248 good->cache.size = good->nr;
7249 good->content_checked = 0;
7250 good->owner_ref_checked = 0;
7251 good->num_duplicates = 0;
7252 good->refs = rec->refs;
7253 list_splice_init(&rec->backrefs, &good->backrefs);
7255 cache = lookup_cache_extent(extent_cache, good->start,
7259 tmp = container_of(cache, struct extent_record, cache);
7262 * If we find another overlapping extent and it's found_rec is
7263 * set then it's a duplicate and we need to try and delete
7266 if (tmp->found_rec || tmp->num_duplicates > 0) {
7267 if (list_empty(&good->list))
7268 list_add_tail(&good->list,
7269 &duplicate_extents);
7270 good->num_duplicates += tmp->num_duplicates + 1;
7271 list_splice_init(&tmp->dups, &good->dups);
7272 list_del_init(&tmp->list);
7273 list_add_tail(&tmp->list, &good->dups);
7274 remove_cache_extent(extent_cache, &tmp->cache);
7279 * Ok we have another non extent item backed extent rec, so lets
7280 * just add it to this extent and carry on like we did above.
7282 good->refs += tmp->refs;
7283 list_splice_init(&tmp->backrefs, &good->backrefs);
7284 remove_cache_extent(extent_cache, &tmp->cache);
7287 ret = insert_cache_extent(extent_cache, &good->cache);
7290 return good->num_duplicates ? 0 : 1;
7293 static int delete_duplicate_records(struct btrfs_root *root,
7294 struct extent_record *rec)
7296 struct btrfs_trans_handle *trans;
7297 LIST_HEAD(delete_list);
7298 struct btrfs_path *path;
7299 struct extent_record *tmp, *good, *n;
7302 struct btrfs_key key;
7304 path = btrfs_alloc_path();
7311 /* Find the record that covers all of the duplicates. */
7312 list_for_each_entry(tmp, &rec->dups, list) {
7313 if (good->start < tmp->start)
7315 if (good->nr > tmp->nr)
7318 if (tmp->start + tmp->nr < good->start + good->nr) {
7319 fprintf(stderr, "Ok we have overlapping extents that "
7320 "aren't completely covered by each other, this "
7321 "is going to require more careful thought. "
7322 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7323 tmp->start, tmp->nr, good->start, good->nr);
7330 list_add_tail(&rec->list, &delete_list);
7332 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7335 list_move_tail(&tmp->list, &delete_list);
7338 root = root->fs_info->extent_root;
7339 trans = btrfs_start_transaction(root, 1);
7340 if (IS_ERR(trans)) {
7341 ret = PTR_ERR(trans);
7345 list_for_each_entry(tmp, &delete_list, list) {
7346 if (tmp->found_rec == 0)
7348 key.objectid = tmp->start;
7349 key.type = BTRFS_EXTENT_ITEM_KEY;
7350 key.offset = tmp->nr;
7352 /* Shouldn't happen but just in case */
7353 if (tmp->metadata) {
7354 fprintf(stderr, "Well this shouldn't happen, extent "
7355 "record overlaps but is metadata? "
7356 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7360 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7366 ret = btrfs_del_item(trans, root, path);
7369 btrfs_release_path(path);
7372 err = btrfs_commit_transaction(trans, root);
7376 while (!list_empty(&delete_list)) {
7377 tmp = to_extent_record(delete_list.next);
7378 list_del_init(&tmp->list);
7384 while (!list_empty(&rec->dups)) {
7385 tmp = to_extent_record(rec->dups.next);
7386 list_del_init(&tmp->list);
7390 btrfs_free_path(path);
7392 if (!ret && !nr_del)
7393 rec->num_duplicates = 0;
7395 return ret ? ret : nr_del;
7398 static int find_possible_backrefs(struct btrfs_fs_info *info,
7399 struct btrfs_path *path,
7400 struct cache_tree *extent_cache,
7401 struct extent_record *rec)
7403 struct btrfs_root *root;
7404 struct extent_backref *back;
7405 struct data_backref *dback;
7406 struct cache_extent *cache;
7407 struct btrfs_file_extent_item *fi;
7408 struct btrfs_key key;
7412 list_for_each_entry(back, &rec->backrefs, list) {
7413 /* Don't care about full backrefs (poor unloved backrefs) */
7414 if (back->full_backref || !back->is_data)
7417 dback = to_data_backref(back);
7419 /* We found this one, we don't need to do a lookup */
7420 if (dback->found_ref)
7423 key.objectid = dback->root;
7424 key.type = BTRFS_ROOT_ITEM_KEY;
7425 key.offset = (u64)-1;
7427 root = btrfs_read_fs_root(info, &key);
7429 /* No root, definitely a bad ref, skip */
7430 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7432 /* Other err, exit */
7434 return PTR_ERR(root);
7436 key.objectid = dback->owner;
7437 key.type = BTRFS_EXTENT_DATA_KEY;
7438 key.offset = dback->offset;
7439 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7441 btrfs_release_path(path);
7444 /* Didn't find it, we can carry on */
7449 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7450 struct btrfs_file_extent_item);
7451 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7452 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7453 btrfs_release_path(path);
7454 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7456 struct extent_record *tmp;
7457 tmp = container_of(cache, struct extent_record, cache);
7460 * If we found an extent record for the bytenr for this
7461 * particular backref then we can't add it to our
7462 * current extent record. We only want to add backrefs
7463 * that don't have a corresponding extent item in the
7464 * extent tree since they likely belong to this record
7465 * and we need to fix it if it doesn't match bytenrs.
7471 dback->found_ref += 1;
7472 dback->disk_bytenr = bytenr;
7473 dback->bytes = bytes;
7476 * Set this so the verify backref code knows not to trust the
7477 * values in this backref.
7486 * Record orphan data ref into corresponding root.
7488 * Return 0 if the extent item contains data ref and recorded.
7489 * Return 1 if the extent item contains no useful data ref
7490 * On that case, it may contains only shared_dataref or metadata backref
7491 * or the file extent exists(this should be handled by the extent bytenr
7493 * Return <0 if something goes wrong.
7495 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7496 struct extent_record *rec)
7498 struct btrfs_key key;
7499 struct btrfs_root *dest_root;
7500 struct extent_backref *back;
7501 struct data_backref *dback;
7502 struct orphan_data_extent *orphan;
7503 struct btrfs_path *path;
7504 int recorded_data_ref = 0;
7509 path = btrfs_alloc_path();
7512 list_for_each_entry(back, &rec->backrefs, list) {
7513 if (back->full_backref || !back->is_data ||
7514 !back->found_extent_tree)
7516 dback = to_data_backref(back);
7517 if (dback->found_ref)
7519 key.objectid = dback->root;
7520 key.type = BTRFS_ROOT_ITEM_KEY;
7521 key.offset = (u64)-1;
7523 dest_root = btrfs_read_fs_root(fs_info, &key);
7525 /* For non-exist root we just skip it */
7526 if (IS_ERR(dest_root) || !dest_root)
7529 key.objectid = dback->owner;
7530 key.type = BTRFS_EXTENT_DATA_KEY;
7531 key.offset = dback->offset;
7533 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7534 btrfs_release_path(path);
7536 * For ret < 0, it's OK since the fs-tree may be corrupted,
7537 * we need to record it for inode/file extent rebuild.
7538 * For ret > 0, we record it only for file extent rebuild.
7539 * For ret == 0, the file extent exists but only bytenr
7540 * mismatch, let the original bytenr fix routine to handle,
7546 orphan = malloc(sizeof(*orphan));
7551 INIT_LIST_HEAD(&orphan->list);
7552 orphan->root = dback->root;
7553 orphan->objectid = dback->owner;
7554 orphan->offset = dback->offset;
7555 orphan->disk_bytenr = rec->cache.start;
7556 orphan->disk_len = rec->cache.size;
7557 list_add(&dest_root->orphan_data_extents, &orphan->list);
7558 recorded_data_ref = 1;
7561 btrfs_free_path(path);
7563 return !recorded_data_ref;
7569 * when an incorrect extent item is found, this will delete
7570 * all of the existing entries for it and recreate them
7571 * based on what the tree scan found.
7573 static int fixup_extent_refs(struct btrfs_fs_info *info,
7574 struct cache_tree *extent_cache,
7575 struct extent_record *rec)
7577 struct btrfs_trans_handle *trans = NULL;
7579 struct btrfs_path *path;
7580 struct list_head *cur = rec->backrefs.next;
7581 struct cache_extent *cache;
7582 struct extent_backref *back;
7586 if (rec->flag_block_full_backref)
7587 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7589 path = btrfs_alloc_path();
7593 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7595 * Sometimes the backrefs themselves are so broken they don't
7596 * get attached to any meaningful rec, so first go back and
7597 * check any of our backrefs that we couldn't find and throw
7598 * them into the list if we find the backref so that
7599 * verify_backrefs can figure out what to do.
7601 ret = find_possible_backrefs(info, path, extent_cache, rec);
7606 /* step one, make sure all of the backrefs agree */
7607 ret = verify_backrefs(info, path, rec);
7611 trans = btrfs_start_transaction(info->extent_root, 1);
7612 if (IS_ERR(trans)) {
7613 ret = PTR_ERR(trans);
7617 /* step two, delete all the existing records */
7618 ret = delete_extent_records(trans, info->extent_root, path,
7619 rec->start, rec->max_size);
7624 /* was this block corrupt? If so, don't add references to it */
7625 cache = lookup_cache_extent(info->corrupt_blocks,
7626 rec->start, rec->max_size);
7632 /* step three, recreate all the refs we did find */
7633 while(cur != &rec->backrefs) {
7634 back = to_extent_backref(cur);
7638 * if we didn't find any references, don't create a
7641 if (!back->found_ref)
7644 rec->bad_full_backref = 0;
7645 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7653 int err = btrfs_commit_transaction(trans, info->extent_root);
7658 btrfs_free_path(path);
7662 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7663 struct extent_record *rec)
7665 struct btrfs_trans_handle *trans;
7666 struct btrfs_root *root = fs_info->extent_root;
7667 struct btrfs_path *path;
7668 struct btrfs_extent_item *ei;
7669 struct btrfs_key key;
7673 key.objectid = rec->start;
7674 if (rec->metadata) {
7675 key.type = BTRFS_METADATA_ITEM_KEY;
7676 key.offset = rec->info_level;
7678 key.type = BTRFS_EXTENT_ITEM_KEY;
7679 key.offset = rec->max_size;
7682 path = btrfs_alloc_path();
7686 trans = btrfs_start_transaction(root, 0);
7687 if (IS_ERR(trans)) {
7688 btrfs_free_path(path);
7689 return PTR_ERR(trans);
7692 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7694 btrfs_free_path(path);
7695 btrfs_commit_transaction(trans, root);
7698 fprintf(stderr, "Didn't find extent for %llu\n",
7699 (unsigned long long)rec->start);
7700 btrfs_free_path(path);
7701 btrfs_commit_transaction(trans, root);
7705 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7706 struct btrfs_extent_item);
7707 flags = btrfs_extent_flags(path->nodes[0], ei);
7708 if (rec->flag_block_full_backref) {
7709 fprintf(stderr, "setting full backref on %llu\n",
7710 (unsigned long long)key.objectid);
7711 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7713 fprintf(stderr, "clearing full backref on %llu\n",
7714 (unsigned long long)key.objectid);
7715 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7717 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7718 btrfs_mark_buffer_dirty(path->nodes[0]);
7719 btrfs_free_path(path);
7720 return btrfs_commit_transaction(trans, root);
7723 /* right now we only prune from the extent allocation tree */
7724 static int prune_one_block(struct btrfs_trans_handle *trans,
7725 struct btrfs_fs_info *info,
7726 struct btrfs_corrupt_block *corrupt)
7729 struct btrfs_path path;
7730 struct extent_buffer *eb;
7734 int level = corrupt->level + 1;
7736 btrfs_init_path(&path);
7738 /* we want to stop at the parent to our busted block */
7739 path.lowest_level = level;
7741 ret = btrfs_search_slot(trans, info->extent_root,
7742 &corrupt->key, &path, -1, 1);
7747 eb = path.nodes[level];
7754 * hopefully the search gave us the block we want to prune,
7755 * lets try that first
7757 slot = path.slots[level];
7758 found = btrfs_node_blockptr(eb, slot);
7759 if (found == corrupt->cache.start)
7762 nritems = btrfs_header_nritems(eb);
7764 /* the search failed, lets scan this node and hope we find it */
7765 for (slot = 0; slot < nritems; slot++) {
7766 found = btrfs_node_blockptr(eb, slot);
7767 if (found == corrupt->cache.start)
7771 * we couldn't find the bad block. TODO, search all the nodes for pointers
7774 if (eb == info->extent_root->node) {
7779 btrfs_release_path(&path);
7784 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7785 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7788 btrfs_release_path(&path);
7792 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7794 struct btrfs_trans_handle *trans = NULL;
7795 struct cache_extent *cache;
7796 struct btrfs_corrupt_block *corrupt;
7799 cache = search_cache_extent(info->corrupt_blocks, 0);
7803 trans = btrfs_start_transaction(info->extent_root, 1);
7805 return PTR_ERR(trans);
7807 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7808 prune_one_block(trans, info, corrupt);
7809 remove_cache_extent(info->corrupt_blocks, cache);
7812 return btrfs_commit_transaction(trans, info->extent_root);
7816 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7818 struct btrfs_block_group_cache *cache;
7823 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7824 &start, &end, EXTENT_DIRTY);
7827 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7833 cache = btrfs_lookup_first_block_group(fs_info, start);
7838 start = cache->key.objectid + cache->key.offset;
7842 static int check_extent_refs(struct btrfs_root *root,
7843 struct cache_tree *extent_cache)
7845 struct extent_record *rec;
7846 struct cache_extent *cache;
7855 * if we're doing a repair, we have to make sure
7856 * we don't allocate from the problem extents.
7857 * In the worst case, this will be all the
7860 cache = search_cache_extent(extent_cache, 0);
7862 rec = container_of(cache, struct extent_record, cache);
7863 set_extent_dirty(root->fs_info->excluded_extents,
7865 rec->start + rec->max_size - 1,
7867 cache = next_cache_extent(cache);
7870 /* pin down all the corrupted blocks too */
7871 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7873 set_extent_dirty(root->fs_info->excluded_extents,
7875 cache->start + cache->size - 1,
7877 cache = next_cache_extent(cache);
7879 prune_corrupt_blocks(root->fs_info);
7880 reset_cached_block_groups(root->fs_info);
7883 reset_cached_block_groups(root->fs_info);
7886 * We need to delete any duplicate entries we find first otherwise we
7887 * could mess up the extent tree when we have backrefs that actually
7888 * belong to a different extent item and not the weird duplicate one.
7890 while (repair && !list_empty(&duplicate_extents)) {
7891 rec = to_extent_record(duplicate_extents.next);
7892 list_del_init(&rec->list);
7894 /* Sometimes we can find a backref before we find an actual
7895 * extent, so we need to process it a little bit to see if there
7896 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7897 * if this is a backref screwup. If we need to delete stuff
7898 * process_duplicates() will return 0, otherwise it will return
7901 if (process_duplicates(root, extent_cache, rec))
7903 ret = delete_duplicate_records(root, rec);
7907 * delete_duplicate_records will return the number of entries
7908 * deleted, so if it's greater than 0 then we know we actually
7909 * did something and we need to remove.
7923 cache = search_cache_extent(extent_cache, 0);
7926 rec = container_of(cache, struct extent_record, cache);
7927 if (rec->num_duplicates) {
7928 fprintf(stderr, "extent item %llu has multiple extent "
7929 "items\n", (unsigned long long)rec->start);
7934 if (rec->refs != rec->extent_item_refs) {
7935 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7936 (unsigned long long)rec->start,
7937 (unsigned long long)rec->nr);
7938 fprintf(stderr, "extent item %llu, found %llu\n",
7939 (unsigned long long)rec->extent_item_refs,
7940 (unsigned long long)rec->refs);
7941 ret = record_orphan_data_extents(root->fs_info, rec);
7948 * we can't use the extent to repair file
7949 * extent, let the fallback method handle it.
7951 if (!fixed && repair) {
7952 ret = fixup_extent_refs(
7963 if (all_backpointers_checked(rec, 1)) {
7964 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7965 (unsigned long long)rec->start,
7966 (unsigned long long)rec->nr);
7968 if (!fixed && !recorded && repair) {
7969 ret = fixup_extent_refs(root->fs_info,
7978 if (!rec->owner_ref_checked) {
7979 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7980 (unsigned long long)rec->start,
7981 (unsigned long long)rec->nr);
7982 if (!fixed && !recorded && repair) {
7983 ret = fixup_extent_refs(root->fs_info,
7992 if (rec->bad_full_backref) {
7993 fprintf(stderr, "bad full backref, on [%llu]\n",
7994 (unsigned long long)rec->start);
7996 ret = fixup_extent_flags(root->fs_info, rec);
8005 * Although it's not a extent ref's problem, we reuse this
8006 * routine for error reporting.
8007 * No repair function yet.
8009 if (rec->crossing_stripes) {
8011 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8012 rec->start, rec->start + rec->max_size);
8017 if (rec->wrong_chunk_type) {
8019 "bad extent [%llu, %llu), type mismatch with chunk\n",
8020 rec->start, rec->start + rec->max_size);
8025 remove_cache_extent(extent_cache, cache);
8026 free_all_extent_backrefs(rec);
8027 if (!init_extent_tree && repair && (!cur_err || fixed))
8028 clear_extent_dirty(root->fs_info->excluded_extents,
8030 rec->start + rec->max_size - 1,
8036 if (ret && ret != -EAGAIN) {
8037 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8040 struct btrfs_trans_handle *trans;
8042 root = root->fs_info->extent_root;
8043 trans = btrfs_start_transaction(root, 1);
8044 if (IS_ERR(trans)) {
8045 ret = PTR_ERR(trans);
8049 btrfs_fix_block_accounting(trans, root);
8050 ret = btrfs_commit_transaction(trans, root);
8055 fprintf(stderr, "repaired damaged extent references\n");
8061 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8065 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8066 stripe_size = length;
8067 stripe_size /= num_stripes;
8068 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8069 stripe_size = length * 2;
8070 stripe_size /= num_stripes;
8071 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8072 stripe_size = length;
8073 stripe_size /= (num_stripes - 1);
8074 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8075 stripe_size = length;
8076 stripe_size /= (num_stripes - 2);
8078 stripe_size = length;
8084 * Check the chunk with its block group/dev list ref:
8085 * Return 0 if all refs seems valid.
8086 * Return 1 if part of refs seems valid, need later check for rebuild ref
8087 * like missing block group and needs to search extent tree to rebuild them.
8088 * Return -1 if essential refs are missing and unable to rebuild.
8090 static int check_chunk_refs(struct chunk_record *chunk_rec,
8091 struct block_group_tree *block_group_cache,
8092 struct device_extent_tree *dev_extent_cache,
8095 struct cache_extent *block_group_item;
8096 struct block_group_record *block_group_rec;
8097 struct cache_extent *dev_extent_item;
8098 struct device_extent_record *dev_extent_rec;
8102 int metadump_v2 = 0;
8106 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8109 if (block_group_item) {
8110 block_group_rec = container_of(block_group_item,
8111 struct block_group_record,
8113 if (chunk_rec->length != block_group_rec->offset ||
8114 chunk_rec->offset != block_group_rec->objectid ||
8116 chunk_rec->type_flags != block_group_rec->flags)) {
8119 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8120 chunk_rec->objectid,
8125 chunk_rec->type_flags,
8126 block_group_rec->objectid,
8127 block_group_rec->type,
8128 block_group_rec->offset,
8129 block_group_rec->offset,
8130 block_group_rec->objectid,
8131 block_group_rec->flags);
8134 list_del_init(&block_group_rec->list);
8135 chunk_rec->bg_rec = block_group_rec;
8140 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8141 chunk_rec->objectid,
8146 chunk_rec->type_flags);
8153 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8154 chunk_rec->num_stripes);
8155 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8156 devid = chunk_rec->stripes[i].devid;
8157 offset = chunk_rec->stripes[i].offset;
8158 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8159 devid, offset, length);
8160 if (dev_extent_item) {
8161 dev_extent_rec = container_of(dev_extent_item,
8162 struct device_extent_record,
8164 if (dev_extent_rec->objectid != devid ||
8165 dev_extent_rec->offset != offset ||
8166 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8167 dev_extent_rec->length != length) {
8170 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8171 chunk_rec->objectid,
8174 chunk_rec->stripes[i].devid,
8175 chunk_rec->stripes[i].offset,
8176 dev_extent_rec->objectid,
8177 dev_extent_rec->offset,
8178 dev_extent_rec->length);
8181 list_move(&dev_extent_rec->chunk_list,
8182 &chunk_rec->dextents);
8187 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8188 chunk_rec->objectid,
8191 chunk_rec->stripes[i].devid,
8192 chunk_rec->stripes[i].offset);
8199 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8200 int check_chunks(struct cache_tree *chunk_cache,
8201 struct block_group_tree *block_group_cache,
8202 struct device_extent_tree *dev_extent_cache,
8203 struct list_head *good, struct list_head *bad,
8204 struct list_head *rebuild, int silent)
8206 struct cache_extent *chunk_item;
8207 struct chunk_record *chunk_rec;
8208 struct block_group_record *bg_rec;
8209 struct device_extent_record *dext_rec;
8213 chunk_item = first_cache_extent(chunk_cache);
8214 while (chunk_item) {
8215 chunk_rec = container_of(chunk_item, struct chunk_record,
8217 err = check_chunk_refs(chunk_rec, block_group_cache,
8218 dev_extent_cache, silent);
8221 if (err == 0 && good)
8222 list_add_tail(&chunk_rec->list, good);
8223 if (err > 0 && rebuild)
8224 list_add_tail(&chunk_rec->list, rebuild);
8226 list_add_tail(&chunk_rec->list, bad);
8227 chunk_item = next_cache_extent(chunk_item);
8230 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8233 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8241 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8245 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8256 static int check_device_used(struct device_record *dev_rec,
8257 struct device_extent_tree *dext_cache)
8259 struct cache_extent *cache;
8260 struct device_extent_record *dev_extent_rec;
8263 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8265 dev_extent_rec = container_of(cache,
8266 struct device_extent_record,
8268 if (dev_extent_rec->objectid != dev_rec->devid)
8271 list_del_init(&dev_extent_rec->device_list);
8272 total_byte += dev_extent_rec->length;
8273 cache = next_cache_extent(cache);
8276 if (total_byte != dev_rec->byte_used) {
8278 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8279 total_byte, dev_rec->byte_used, dev_rec->objectid,
8280 dev_rec->type, dev_rec->offset);
8287 /* check btrfs_dev_item -> btrfs_dev_extent */
8288 static int check_devices(struct rb_root *dev_cache,
8289 struct device_extent_tree *dev_extent_cache)
8291 struct rb_node *dev_node;
8292 struct device_record *dev_rec;
8293 struct device_extent_record *dext_rec;
8297 dev_node = rb_first(dev_cache);
8299 dev_rec = container_of(dev_node, struct device_record, node);
8300 err = check_device_used(dev_rec, dev_extent_cache);
8304 dev_node = rb_next(dev_node);
8306 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8309 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8310 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8317 static int add_root_item_to_list(struct list_head *head,
8318 u64 objectid, u64 bytenr, u64 last_snapshot,
8319 u8 level, u8 drop_level,
8320 int level_size, struct btrfs_key *drop_key)
8323 struct root_item_record *ri_rec;
8324 ri_rec = malloc(sizeof(*ri_rec));
8327 ri_rec->bytenr = bytenr;
8328 ri_rec->objectid = objectid;
8329 ri_rec->level = level;
8330 ri_rec->level_size = level_size;
8331 ri_rec->drop_level = drop_level;
8332 ri_rec->last_snapshot = last_snapshot;
8334 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8335 list_add_tail(&ri_rec->list, head);
8340 static void free_root_item_list(struct list_head *list)
8342 struct root_item_record *ri_rec;
8344 while (!list_empty(list)) {
8345 ri_rec = list_first_entry(list, struct root_item_record,
8347 list_del_init(&ri_rec->list);
8352 static int deal_root_from_list(struct list_head *list,
8353 struct btrfs_root *root,
8354 struct block_info *bits,
8356 struct cache_tree *pending,
8357 struct cache_tree *seen,
8358 struct cache_tree *reada,
8359 struct cache_tree *nodes,
8360 struct cache_tree *extent_cache,
8361 struct cache_tree *chunk_cache,
8362 struct rb_root *dev_cache,
8363 struct block_group_tree *block_group_cache,
8364 struct device_extent_tree *dev_extent_cache)
8369 while (!list_empty(list)) {
8370 struct root_item_record *rec;
8371 struct extent_buffer *buf;
8372 rec = list_entry(list->next,
8373 struct root_item_record, list);
8375 buf = read_tree_block(root->fs_info->tree_root,
8376 rec->bytenr, rec->level_size, 0);
8377 if (!extent_buffer_uptodate(buf)) {
8378 free_extent_buffer(buf);
8382 ret = add_root_to_pending(buf, extent_cache, pending,
8383 seen, nodes, rec->objectid);
8387 * To rebuild extent tree, we need deal with snapshot
8388 * one by one, otherwise we deal with node firstly which
8389 * can maximize readahead.
8392 ret = run_next_block(root, bits, bits_nr, &last,
8393 pending, seen, reada, nodes,
8394 extent_cache, chunk_cache,
8395 dev_cache, block_group_cache,
8396 dev_extent_cache, rec);
8400 free_extent_buffer(buf);
8401 list_del(&rec->list);
8407 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8408 reada, nodes, extent_cache, chunk_cache,
8409 dev_cache, block_group_cache,
8410 dev_extent_cache, NULL);
8420 static int check_chunks_and_extents(struct btrfs_root *root)
8422 struct rb_root dev_cache;
8423 struct cache_tree chunk_cache;
8424 struct block_group_tree block_group_cache;
8425 struct device_extent_tree dev_extent_cache;
8426 struct cache_tree extent_cache;
8427 struct cache_tree seen;
8428 struct cache_tree pending;
8429 struct cache_tree reada;
8430 struct cache_tree nodes;
8431 struct extent_io_tree excluded_extents;
8432 struct cache_tree corrupt_blocks;
8433 struct btrfs_path path;
8434 struct btrfs_key key;
8435 struct btrfs_key found_key;
8437 struct block_info *bits;
8439 struct extent_buffer *leaf;
8441 struct btrfs_root_item ri;
8442 struct list_head dropping_trees;
8443 struct list_head normal_trees;
8444 struct btrfs_root *root1;
8449 dev_cache = RB_ROOT;
8450 cache_tree_init(&chunk_cache);
8451 block_group_tree_init(&block_group_cache);
8452 device_extent_tree_init(&dev_extent_cache);
8454 cache_tree_init(&extent_cache);
8455 cache_tree_init(&seen);
8456 cache_tree_init(&pending);
8457 cache_tree_init(&nodes);
8458 cache_tree_init(&reada);
8459 cache_tree_init(&corrupt_blocks);
8460 extent_io_tree_init(&excluded_extents);
8461 INIT_LIST_HEAD(&dropping_trees);
8462 INIT_LIST_HEAD(&normal_trees);
8465 root->fs_info->excluded_extents = &excluded_extents;
8466 root->fs_info->fsck_extent_cache = &extent_cache;
8467 root->fs_info->free_extent_hook = free_extent_hook;
8468 root->fs_info->corrupt_blocks = &corrupt_blocks;
8472 bits = malloc(bits_nr * sizeof(struct block_info));
8478 if (ctx.progress_enabled) {
8479 ctx.tp = TASK_EXTENTS;
8480 task_start(ctx.info);
8484 root1 = root->fs_info->tree_root;
8485 level = btrfs_header_level(root1->node);
8486 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8487 root1->node->start, 0, level, 0,
8488 root1->nodesize, NULL);
8491 root1 = root->fs_info->chunk_root;
8492 level = btrfs_header_level(root1->node);
8493 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8494 root1->node->start, 0, level, 0,
8495 root1->nodesize, NULL);
8498 btrfs_init_path(&path);
8501 key.type = BTRFS_ROOT_ITEM_KEY;
8502 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8507 leaf = path.nodes[0];
8508 slot = path.slots[0];
8509 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8510 ret = btrfs_next_leaf(root, &path);
8513 leaf = path.nodes[0];
8514 slot = path.slots[0];
8516 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8517 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8518 unsigned long offset;
8521 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8522 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8523 last_snapshot = btrfs_root_last_snapshot(&ri);
8524 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8525 level = btrfs_root_level(&ri);
8526 level_size = root->nodesize;
8527 ret = add_root_item_to_list(&normal_trees,
8529 btrfs_root_bytenr(&ri),
8530 last_snapshot, level,
8531 0, level_size, NULL);
8535 level = btrfs_root_level(&ri);
8536 level_size = root->nodesize;
8537 objectid = found_key.objectid;
8538 btrfs_disk_key_to_cpu(&found_key,
8540 ret = add_root_item_to_list(&dropping_trees,
8542 btrfs_root_bytenr(&ri),
8543 last_snapshot, level,
8545 level_size, &found_key);
8552 btrfs_release_path(&path);
8555 * check_block can return -EAGAIN if it fixes something, please keep
8556 * this in mind when dealing with return values from these functions, if
8557 * we get -EAGAIN we want to fall through and restart the loop.
8559 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8560 &seen, &reada, &nodes, &extent_cache,
8561 &chunk_cache, &dev_cache, &block_group_cache,
8568 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8569 &pending, &seen, &reada, &nodes,
8570 &extent_cache, &chunk_cache, &dev_cache,
8571 &block_group_cache, &dev_extent_cache);
8578 ret = check_chunks(&chunk_cache, &block_group_cache,
8579 &dev_extent_cache, NULL, NULL, NULL, 0);
8586 ret = check_extent_refs(root, &extent_cache);
8593 ret = check_devices(&dev_cache, &dev_extent_cache);
8598 task_stop(ctx.info);
8600 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8601 extent_io_tree_cleanup(&excluded_extents);
8602 root->fs_info->fsck_extent_cache = NULL;
8603 root->fs_info->free_extent_hook = NULL;
8604 root->fs_info->corrupt_blocks = NULL;
8605 root->fs_info->excluded_extents = NULL;
8608 free_chunk_cache_tree(&chunk_cache);
8609 free_device_cache_tree(&dev_cache);
8610 free_block_group_tree(&block_group_cache);
8611 free_device_extent_tree(&dev_extent_cache);
8612 free_extent_cache_tree(&seen);
8613 free_extent_cache_tree(&pending);
8614 free_extent_cache_tree(&reada);
8615 free_extent_cache_tree(&nodes);
8618 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8619 free_extent_cache_tree(&seen);
8620 free_extent_cache_tree(&pending);
8621 free_extent_cache_tree(&reada);
8622 free_extent_cache_tree(&nodes);
8623 free_chunk_cache_tree(&chunk_cache);
8624 free_block_group_tree(&block_group_cache);
8625 free_device_cache_tree(&dev_cache);
8626 free_device_extent_tree(&dev_extent_cache);
8627 free_extent_record_cache(root->fs_info, &extent_cache);
8628 free_root_item_list(&normal_trees);
8629 free_root_item_list(&dropping_trees);
8630 extent_io_tree_cleanup(&excluded_extents);
8635 * Check backrefs of a tree block given by @bytenr or @eb.
8637 * @root: the root containing the @bytenr or @eb
8638 * @eb: tree block extent buffer, can be NULL
8639 * @bytenr: bytenr of the tree block to search
8640 * @level: tree level of the tree block
8641 * @owner: owner of the tree block
8643 * Return >0 for any error found and output error message
8644 * Return 0 for no error found
8646 static int check_tree_block_ref(struct btrfs_root *root,
8647 struct extent_buffer *eb, u64 bytenr,
8648 int level, u64 owner)
8650 struct btrfs_key key;
8651 struct btrfs_root *extent_root = root->fs_info->extent_root;
8652 struct btrfs_path path;
8653 struct btrfs_extent_item *ei;
8654 struct btrfs_extent_inline_ref *iref;
8655 struct extent_buffer *leaf;
8661 u32 nodesize = root->nodesize;
8668 btrfs_init_path(&path);
8669 key.objectid = bytenr;
8670 if (btrfs_fs_incompat(root->fs_info,
8671 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8672 key.type = BTRFS_METADATA_ITEM_KEY;
8674 key.type = BTRFS_EXTENT_ITEM_KEY;
8675 key.offset = (u64)-1;
8677 /* Search for the backref in extent tree */
8678 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8680 err |= BACKREF_MISSING;
8683 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8685 err |= BACKREF_MISSING;
8689 leaf = path.nodes[0];
8690 slot = path.slots[0];
8691 btrfs_item_key_to_cpu(leaf, &key, slot);
8693 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8695 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8696 skinny_level = (int)key.offset;
8697 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8699 struct btrfs_tree_block_info *info;
8701 info = (struct btrfs_tree_block_info *)(ei + 1);
8702 skinny_level = btrfs_tree_block_level(leaf, info);
8703 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8710 if (!(btrfs_extent_flags(leaf, ei) &
8711 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8713 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8714 key.objectid, nodesize,
8715 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8716 err = BACKREF_MISMATCH;
8718 header_gen = btrfs_header_generation(eb);
8719 extent_gen = btrfs_extent_generation(leaf, ei);
8720 if (header_gen != extent_gen) {
8722 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8723 key.objectid, nodesize, header_gen,
8725 err = BACKREF_MISMATCH;
8727 if (level != skinny_level) {
8729 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8730 key.objectid, nodesize, level, skinny_level);
8731 err = BACKREF_MISMATCH;
8733 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8735 "extent[%llu %u] is referred by other roots than %llu",
8736 key.objectid, nodesize, root->objectid);
8737 err = BACKREF_MISMATCH;
8742 * Iterate the extent/metadata item to find the exact backref
8744 item_size = btrfs_item_size_nr(leaf, slot);
8745 ptr = (unsigned long)iref;
8746 end = (unsigned long)ei + item_size;
8748 iref = (struct btrfs_extent_inline_ref *)ptr;
8749 type = btrfs_extent_inline_ref_type(leaf, iref);
8750 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8752 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8753 (offset == root->objectid || offset == owner)) {
8755 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8756 /* Check if the backref points to valid referencer */
8757 found_ref = !check_tree_block_ref(root, NULL, offset,
8763 ptr += btrfs_extent_inline_ref_size(type);
8767 * Inlined extent item doesn't have what we need, check
8768 * TREE_BLOCK_REF_KEY
8771 btrfs_release_path(&path);
8772 key.objectid = bytenr;
8773 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8774 key.offset = root->objectid;
8776 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8781 err |= BACKREF_MISSING;
8783 btrfs_release_path(&path);
8784 if (eb && (err & BACKREF_MISSING))
8785 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8786 bytenr, nodesize, owner, level);
8791 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8793 * Return >0 any error found and output error message
8794 * Return 0 for no error found
8796 static int check_extent_data_item(struct btrfs_root *root,
8797 struct extent_buffer *eb, int slot)
8799 struct btrfs_file_extent_item *fi;
8800 struct btrfs_path path;
8801 struct btrfs_root *extent_root = root->fs_info->extent_root;
8802 struct btrfs_key fi_key;
8803 struct btrfs_key dbref_key;
8804 struct extent_buffer *leaf;
8805 struct btrfs_extent_item *ei;
8806 struct btrfs_extent_inline_ref *iref;
8807 struct btrfs_extent_data_ref *dref;
8809 u64 file_extent_gen;
8812 u64 extent_num_bytes;
8820 int found_dbackref = 0;
8824 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8825 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8826 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8828 /* Nothing to check for hole and inline data extents */
8829 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8830 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8833 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8834 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8835 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8837 /* Check unaligned disk_num_bytes and num_bytes */
8838 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8840 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8841 fi_key.objectid, fi_key.offset, disk_num_bytes,
8843 err |= BYTES_UNALIGNED;
8845 data_bytes_allocated += disk_num_bytes;
8847 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8849 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8850 fi_key.objectid, fi_key.offset, extent_num_bytes,
8852 err |= BYTES_UNALIGNED;
8854 data_bytes_referenced += extent_num_bytes;
8856 owner = btrfs_header_owner(eb);
8858 /* Check the extent item of the file extent in extent tree */
8859 btrfs_init_path(&path);
8860 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8861 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8862 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8864 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8866 err |= BACKREF_MISSING;
8870 leaf = path.nodes[0];
8871 slot = path.slots[0];
8872 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8874 extent_flags = btrfs_extent_flags(leaf, ei);
8875 extent_gen = btrfs_extent_generation(leaf, ei);
8877 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8879 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8880 disk_bytenr, disk_num_bytes,
8881 BTRFS_EXTENT_FLAG_DATA);
8882 err |= BACKREF_MISMATCH;
8885 if (file_extent_gen < extent_gen) {
8887 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8888 disk_bytenr, disk_num_bytes, file_extent_gen,
8890 err |= BACKREF_MISMATCH;
8893 /* Check data backref inside that extent item */
8894 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8895 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8896 ptr = (unsigned long)iref;
8897 end = (unsigned long)ei + item_size;
8899 iref = (struct btrfs_extent_inline_ref *)ptr;
8900 type = btrfs_extent_inline_ref_type(leaf, iref);
8901 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8903 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8904 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8905 if (ref_root == owner || ref_root == root->objectid)
8907 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8908 found_dbackref = !check_tree_block_ref(root, NULL,
8909 btrfs_extent_inline_ref_offset(leaf, iref),
8915 ptr += btrfs_extent_inline_ref_size(type);
8918 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8919 if (!found_dbackref) {
8920 btrfs_release_path(&path);
8922 btrfs_init_path(&path);
8923 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8924 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8925 dbref_key.offset = hash_extent_data_ref(root->objectid,
8926 fi_key.objectid, fi_key.offset);
8928 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8929 &dbref_key, &path, 0, 0);
8934 if (!found_dbackref)
8935 err |= BACKREF_MISSING;
8937 btrfs_release_path(&path);
8938 if (err & BACKREF_MISSING) {
8939 error("data extent[%llu %llu] backref lost",
8940 disk_bytenr, disk_num_bytes);
8946 * Get real tree block level for the case like shared block
8947 * Return >= 0 as tree level
8948 * Return <0 for error
8950 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8952 struct extent_buffer *eb;
8953 struct btrfs_path path;
8954 struct btrfs_key key;
8955 struct btrfs_extent_item *ei;
8958 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8963 /* Search extent tree for extent generation and level */
8964 key.objectid = bytenr;
8965 key.type = BTRFS_METADATA_ITEM_KEY;
8966 key.offset = (u64)-1;
8968 btrfs_init_path(&path);
8969 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8972 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8980 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8981 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8982 struct btrfs_extent_item);
8983 flags = btrfs_extent_flags(path.nodes[0], ei);
8984 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8989 /* Get transid for later read_tree_block() check */
8990 transid = btrfs_extent_generation(path.nodes[0], ei);
8992 /* Get backref level as one source */
8993 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8994 backref_level = key.offset;
8996 struct btrfs_tree_block_info *info;
8998 info = (struct btrfs_tree_block_info *)(ei + 1);
8999 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9001 btrfs_release_path(&path);
9003 /* Get level from tree block as an alternative source */
9004 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9005 if (!extent_buffer_uptodate(eb)) {
9006 free_extent_buffer(eb);
9009 header_level = btrfs_header_level(eb);
9010 free_extent_buffer(eb);
9012 if (header_level != backref_level)
9014 return header_level;
9017 btrfs_release_path(&path);
9022 * Check if a tree block backref is valid (points to a valid tree block)
9023 * if level == -1, level will be resolved
9024 * Return >0 for any error found and print error message
9026 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9027 u64 bytenr, int level)
9029 struct btrfs_root *root;
9030 struct btrfs_key key;
9031 struct btrfs_path path;
9032 struct extent_buffer *eb;
9033 struct extent_buffer *node;
9034 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9038 /* Query level for level == -1 special case */
9040 level = query_tree_block_level(fs_info, bytenr);
9042 err |= REFERENCER_MISSING;
9046 key.objectid = root_id;
9047 key.type = BTRFS_ROOT_ITEM_KEY;
9048 key.offset = (u64)-1;
9050 root = btrfs_read_fs_root(fs_info, &key);
9052 err |= REFERENCER_MISSING;
9056 /* Read out the tree block to get item/node key */
9057 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9058 if (!extent_buffer_uptodate(eb)) {
9059 err |= REFERENCER_MISSING;
9060 free_extent_buffer(eb);
9064 /* Empty tree, no need to check key */
9065 if (!btrfs_header_nritems(eb) && !level) {
9066 free_extent_buffer(eb);
9071 btrfs_node_key_to_cpu(eb, &key, 0);
9073 btrfs_item_key_to_cpu(eb, &key, 0);
9075 free_extent_buffer(eb);
9077 btrfs_init_path(&path);
9078 path.lowest_level = level;
9079 /* Search with the first key, to ensure we can reach it */
9080 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9082 err |= REFERENCER_MISSING;
9086 node = path.nodes[level];
9087 if (btrfs_header_bytenr(node) != bytenr) {
9089 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9090 bytenr, nodesize, bytenr,
9091 btrfs_header_bytenr(node));
9092 err |= REFERENCER_MISMATCH;
9094 if (btrfs_header_level(node) != level) {
9096 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9097 bytenr, nodesize, level,
9098 btrfs_header_level(node));
9099 err |= REFERENCER_MISMATCH;
9103 btrfs_release_path(&path);
9105 if (err & REFERENCER_MISSING) {
9107 error("extent [%llu %d] lost referencer (owner: %llu)",
9108 bytenr, nodesize, root_id);
9111 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9112 bytenr, nodesize, root_id, level);
9119 * Check referencer for shared block backref
9120 * If level == -1, this function will resolve the level.
9122 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9123 u64 parent, u64 bytenr, int level)
9125 struct extent_buffer *eb;
9126 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9128 int found_parent = 0;
9131 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9132 if (!extent_buffer_uptodate(eb))
9136 level = query_tree_block_level(fs_info, bytenr);
9140 if (level + 1 != btrfs_header_level(eb))
9143 nr = btrfs_header_nritems(eb);
9144 for (i = 0; i < nr; i++) {
9145 if (bytenr == btrfs_node_blockptr(eb, i)) {
9151 free_extent_buffer(eb);
9152 if (!found_parent) {
9154 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9155 bytenr, nodesize, parent, level);
9156 return REFERENCER_MISSING;
9162 * Check referencer for normal (inlined) data ref
9163 * If len == 0, it will be resolved by searching in extent tree
9165 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9166 u64 root_id, u64 objectid, u64 offset,
9167 u64 bytenr, u64 len, u32 count)
9169 struct btrfs_root *root;
9170 struct btrfs_root *extent_root = fs_info->extent_root;
9171 struct btrfs_key key;
9172 struct btrfs_path path;
9173 struct extent_buffer *leaf;
9174 struct btrfs_file_extent_item *fi;
9175 u32 found_count = 0;
9180 key.objectid = bytenr;
9181 key.type = BTRFS_EXTENT_ITEM_KEY;
9182 key.offset = (u64)-1;
9184 btrfs_init_path(&path);
9185 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9188 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9191 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9192 if (key.objectid != bytenr ||
9193 key.type != BTRFS_EXTENT_ITEM_KEY)
9196 btrfs_release_path(&path);
9198 key.objectid = root_id;
9199 key.type = BTRFS_ROOT_ITEM_KEY;
9200 key.offset = (u64)-1;
9201 btrfs_init_path(&path);
9203 root = btrfs_read_fs_root(fs_info, &key);
9207 key.objectid = objectid;
9208 key.type = BTRFS_EXTENT_DATA_KEY;
9210 * It can be nasty as data backref offset is
9211 * file offset - file extent offset, which is smaller or
9212 * equal to original backref offset. The only special case is
9213 * overflow. So we need to special check and do further search.
9215 key.offset = offset & (1ULL << 63) ? 0 : offset;
9217 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9222 * Search afterwards to get correct one
9223 * NOTE: As we must do a comprehensive check on the data backref to
9224 * make sure the dref count also matches, we must iterate all file
9225 * extents for that inode.
9228 leaf = path.nodes[0];
9229 slot = path.slots[0];
9231 btrfs_item_key_to_cpu(leaf, &key, slot);
9232 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9234 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9236 * Except normal disk bytenr and disk num bytes, we still
9237 * need to do extra check on dbackref offset as
9238 * dbackref offset = file_offset - file_extent_offset
9240 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9241 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9242 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9246 ret = btrfs_next_item(root, &path);
9251 btrfs_release_path(&path);
9252 if (found_count != count) {
9254 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9255 bytenr, len, root_id, objectid, offset, count, found_count);
9256 return REFERENCER_MISSING;
9262 * Check if the referencer of a shared data backref exists
9264 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9265 u64 parent, u64 bytenr)
9267 struct extent_buffer *eb;
9268 struct btrfs_key key;
9269 struct btrfs_file_extent_item *fi;
9270 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9272 int found_parent = 0;
9275 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9276 if (!extent_buffer_uptodate(eb))
9279 nr = btrfs_header_nritems(eb);
9280 for (i = 0; i < nr; i++) {
9281 btrfs_item_key_to_cpu(eb, &key, i);
9282 if (key.type != BTRFS_EXTENT_DATA_KEY)
9285 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9286 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9289 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9296 free_extent_buffer(eb);
9297 if (!found_parent) {
9298 error("shared extent %llu referencer lost (parent: %llu)",
9300 return REFERENCER_MISSING;
9306 * This function will check a given extent item, including its backref and
9307 * itself (like crossing stripe boundary and type)
9309 * Since we don't use extent_record anymore, introduce new error bit
9311 static int check_extent_item(struct btrfs_fs_info *fs_info,
9312 struct extent_buffer *eb, int slot)
9314 struct btrfs_extent_item *ei;
9315 struct btrfs_extent_inline_ref *iref;
9316 struct btrfs_extent_data_ref *dref;
9320 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9321 u32 item_size = btrfs_item_size_nr(eb, slot);
9326 struct btrfs_key key;
9330 btrfs_item_key_to_cpu(eb, &key, slot);
9331 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9332 bytes_used += key.offset;
9334 bytes_used += nodesize;
9336 if (item_size < sizeof(*ei)) {
9338 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9339 * old thing when on disk format is still un-determined.
9340 * No need to care about it anymore
9342 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9346 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9347 flags = btrfs_extent_flags(eb, ei);
9349 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9351 if (metadata && check_crossing_stripes(global_info, key.objectid,
9353 error("bad metadata [%llu, %llu) crossing stripe boundary",
9354 key.objectid, key.objectid + nodesize);
9355 err |= CROSSING_STRIPE_BOUNDARY;
9358 ptr = (unsigned long)(ei + 1);
9360 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9361 /* Old EXTENT_ITEM metadata */
9362 struct btrfs_tree_block_info *info;
9364 info = (struct btrfs_tree_block_info *)ptr;
9365 level = btrfs_tree_block_level(eb, info);
9366 ptr += sizeof(struct btrfs_tree_block_info);
9368 /* New METADATA_ITEM */
9371 end = (unsigned long)ei + item_size;
9374 err |= ITEM_SIZE_MISMATCH;
9378 /* Now check every backref in this extent item */
9380 iref = (struct btrfs_extent_inline_ref *)ptr;
9381 type = btrfs_extent_inline_ref_type(eb, iref);
9382 offset = btrfs_extent_inline_ref_offset(eb, iref);
9384 case BTRFS_TREE_BLOCK_REF_KEY:
9385 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9389 case BTRFS_SHARED_BLOCK_REF_KEY:
9390 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9394 case BTRFS_EXTENT_DATA_REF_KEY:
9395 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9396 ret = check_extent_data_backref(fs_info,
9397 btrfs_extent_data_ref_root(eb, dref),
9398 btrfs_extent_data_ref_objectid(eb, dref),
9399 btrfs_extent_data_ref_offset(eb, dref),
9400 key.objectid, key.offset,
9401 btrfs_extent_data_ref_count(eb, dref));
9404 case BTRFS_SHARED_DATA_REF_KEY:
9405 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9409 error("extent[%llu %d %llu] has unknown ref type: %d",
9410 key.objectid, key.type, key.offset, type);
9411 err |= UNKNOWN_TYPE;
9415 ptr += btrfs_extent_inline_ref_size(type);
9424 * Check if a dev extent item is referred correctly by its chunk
9426 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9427 struct extent_buffer *eb, int slot)
9429 struct btrfs_root *chunk_root = fs_info->chunk_root;
9430 struct btrfs_dev_extent *ptr;
9431 struct btrfs_path path;
9432 struct btrfs_key chunk_key;
9433 struct btrfs_key devext_key;
9434 struct btrfs_chunk *chunk;
9435 struct extent_buffer *l;
9439 int found_chunk = 0;
9442 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9443 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9444 length = btrfs_dev_extent_length(eb, ptr);
9446 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9447 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9448 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9450 btrfs_init_path(&path);
9451 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9456 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9457 if (btrfs_chunk_length(l, chunk) != length)
9460 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9461 for (i = 0; i < num_stripes; i++) {
9462 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9463 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9465 if (devid == devext_key.objectid &&
9466 offset == devext_key.offset) {
9472 btrfs_release_path(&path);
9475 "device extent[%llu, %llu, %llu] did not find the related chunk",
9476 devext_key.objectid, devext_key.offset, length);
9477 return REFERENCER_MISSING;
9483 * Check if the used space is correct with the dev item
9485 static int check_dev_item(struct btrfs_fs_info *fs_info,
9486 struct extent_buffer *eb, int slot)
9488 struct btrfs_root *dev_root = fs_info->dev_root;
9489 struct btrfs_dev_item *dev_item;
9490 struct btrfs_path path;
9491 struct btrfs_key key;
9492 struct btrfs_dev_extent *ptr;
9498 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9499 dev_id = btrfs_device_id(eb, dev_item);
9500 used = btrfs_device_bytes_used(eb, dev_item);
9502 key.objectid = dev_id;
9503 key.type = BTRFS_DEV_EXTENT_KEY;
9506 btrfs_init_path(&path);
9507 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9509 btrfs_item_key_to_cpu(eb, &key, slot);
9510 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9511 key.objectid, key.type, key.offset);
9512 btrfs_release_path(&path);
9513 return REFERENCER_MISSING;
9516 /* Iterate dev_extents to calculate the used space of a device */
9518 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9520 if (key.objectid > dev_id)
9522 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9525 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9526 struct btrfs_dev_extent);
9527 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9529 ret = btrfs_next_item(dev_root, &path);
9533 btrfs_release_path(&path);
9535 if (used != total) {
9536 btrfs_item_key_to_cpu(eb, &key, slot);
9538 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9539 total, used, BTRFS_ROOT_TREE_OBJECTID,
9540 BTRFS_DEV_EXTENT_KEY, dev_id);
9541 return ACCOUNTING_MISMATCH;
9547 * Check a block group item with its referener (chunk) and its used space
9548 * with extent/metadata item
9550 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9551 struct extent_buffer *eb, int slot)
9553 struct btrfs_root *extent_root = fs_info->extent_root;
9554 struct btrfs_root *chunk_root = fs_info->chunk_root;
9555 struct btrfs_block_group_item *bi;
9556 struct btrfs_block_group_item bg_item;
9557 struct btrfs_path path;
9558 struct btrfs_key bg_key;
9559 struct btrfs_key chunk_key;
9560 struct btrfs_key extent_key;
9561 struct btrfs_chunk *chunk;
9562 struct extent_buffer *leaf;
9563 struct btrfs_extent_item *ei;
9564 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9572 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9573 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9574 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9575 used = btrfs_block_group_used(&bg_item);
9576 bg_flags = btrfs_block_group_flags(&bg_item);
9578 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9579 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9580 chunk_key.offset = bg_key.objectid;
9582 btrfs_init_path(&path);
9583 /* Search for the referencer chunk */
9584 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9587 "block group[%llu %llu] did not find the related chunk item",
9588 bg_key.objectid, bg_key.offset);
9589 err |= REFERENCER_MISSING;
9591 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9592 struct btrfs_chunk);
9593 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9596 "block group[%llu %llu] related chunk item length does not match",
9597 bg_key.objectid, bg_key.offset);
9598 err |= REFERENCER_MISMATCH;
9601 btrfs_release_path(&path);
9603 /* Search from the block group bytenr */
9604 extent_key.objectid = bg_key.objectid;
9605 extent_key.type = 0;
9606 extent_key.offset = 0;
9608 btrfs_init_path(&path);
9609 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9613 /* Iterate extent tree to account used space */
9615 leaf = path.nodes[0];
9616 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9617 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9620 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9621 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9623 if (extent_key.objectid < bg_key.objectid)
9626 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9629 total += extent_key.offset;
9631 ei = btrfs_item_ptr(leaf, path.slots[0],
9632 struct btrfs_extent_item);
9633 flags = btrfs_extent_flags(leaf, ei);
9634 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9635 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9637 "bad extent[%llu, %llu) type mismatch with chunk",
9638 extent_key.objectid,
9639 extent_key.objectid + extent_key.offset);
9640 err |= CHUNK_TYPE_MISMATCH;
9642 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9643 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9644 BTRFS_BLOCK_GROUP_METADATA))) {
9646 "bad extent[%llu, %llu) type mismatch with chunk",
9647 extent_key.objectid,
9648 extent_key.objectid + nodesize);
9649 err |= CHUNK_TYPE_MISMATCH;
9653 ret = btrfs_next_item(extent_root, &path);
9659 btrfs_release_path(&path);
9661 if (total != used) {
9663 "block group[%llu %llu] used %llu but extent items used %llu",
9664 bg_key.objectid, bg_key.offset, used, total);
9665 err |= ACCOUNTING_MISMATCH;
9671 * Check a chunk item.
9672 * Including checking all referred dev_extents and block group
9674 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9675 struct extent_buffer *eb, int slot)
9677 struct btrfs_root *extent_root = fs_info->extent_root;
9678 struct btrfs_root *dev_root = fs_info->dev_root;
9679 struct btrfs_path path;
9680 struct btrfs_key chunk_key;
9681 struct btrfs_key bg_key;
9682 struct btrfs_key devext_key;
9683 struct btrfs_chunk *chunk;
9684 struct extent_buffer *leaf;
9685 struct btrfs_block_group_item *bi;
9686 struct btrfs_block_group_item bg_item;
9687 struct btrfs_dev_extent *ptr;
9688 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9700 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9701 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9702 length = btrfs_chunk_length(eb, chunk);
9703 chunk_end = chunk_key.offset + length;
9704 if (!IS_ALIGNED(length, sectorsize)) {
9705 error("chunk[%llu %llu) not aligned to %u",
9706 chunk_key.offset, chunk_end, sectorsize);
9707 err |= BYTES_UNALIGNED;
9711 type = btrfs_chunk_type(eb, chunk);
9712 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9713 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9714 error("chunk[%llu %llu) has no chunk type",
9715 chunk_key.offset, chunk_end);
9716 err |= UNKNOWN_TYPE;
9718 if (profile && (profile & (profile - 1))) {
9719 error("chunk[%llu %llu) multiple profiles detected: %llx",
9720 chunk_key.offset, chunk_end, profile);
9721 err |= UNKNOWN_TYPE;
9724 bg_key.objectid = chunk_key.offset;
9725 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9726 bg_key.offset = length;
9728 btrfs_init_path(&path);
9729 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9732 "chunk[%llu %llu) did not find the related block group item",
9733 chunk_key.offset, chunk_end);
9734 err |= REFERENCER_MISSING;
9736 leaf = path.nodes[0];
9737 bi = btrfs_item_ptr(leaf, path.slots[0],
9738 struct btrfs_block_group_item);
9739 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9741 if (btrfs_block_group_flags(&bg_item) != type) {
9743 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9744 chunk_key.offset, chunk_end, type,
9745 btrfs_block_group_flags(&bg_item));
9746 err |= REFERENCER_MISSING;
9750 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9751 for (i = 0; i < num_stripes; i++) {
9752 btrfs_release_path(&path);
9753 btrfs_init_path(&path);
9754 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9755 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9756 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9758 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9763 leaf = path.nodes[0];
9764 ptr = btrfs_item_ptr(leaf, path.slots[0],
9765 struct btrfs_dev_extent);
9766 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9767 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9768 if (objectid != chunk_key.objectid ||
9769 offset != chunk_key.offset ||
9770 btrfs_dev_extent_length(leaf, ptr) != length)
9774 err |= BACKREF_MISSING;
9776 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9777 chunk_key.objectid, chunk_end, i);
9780 btrfs_release_path(&path);
9786 * Main entry function to check known items and update related accounting info
9788 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9790 struct btrfs_fs_info *fs_info = root->fs_info;
9791 struct btrfs_key key;
9794 struct btrfs_extent_data_ref *dref;
9799 btrfs_item_key_to_cpu(eb, &key, slot);
9803 case BTRFS_EXTENT_DATA_KEY:
9804 ret = check_extent_data_item(root, eb, slot);
9807 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9808 ret = check_block_group_item(fs_info, eb, slot);
9811 case BTRFS_DEV_ITEM_KEY:
9812 ret = check_dev_item(fs_info, eb, slot);
9815 case BTRFS_CHUNK_ITEM_KEY:
9816 ret = check_chunk_item(fs_info, eb, slot);
9819 case BTRFS_DEV_EXTENT_KEY:
9820 ret = check_dev_extent_item(fs_info, eb, slot);
9823 case BTRFS_EXTENT_ITEM_KEY:
9824 case BTRFS_METADATA_ITEM_KEY:
9825 ret = check_extent_item(fs_info, eb, slot);
9828 case BTRFS_EXTENT_CSUM_KEY:
9829 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9831 case BTRFS_TREE_BLOCK_REF_KEY:
9832 ret = check_tree_block_backref(fs_info, key.offset,
9836 case BTRFS_EXTENT_DATA_REF_KEY:
9837 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9838 ret = check_extent_data_backref(fs_info,
9839 btrfs_extent_data_ref_root(eb, dref),
9840 btrfs_extent_data_ref_objectid(eb, dref),
9841 btrfs_extent_data_ref_offset(eb, dref),
9843 btrfs_extent_data_ref_count(eb, dref));
9846 case BTRFS_SHARED_BLOCK_REF_KEY:
9847 ret = check_shared_block_backref(fs_info, key.offset,
9851 case BTRFS_SHARED_DATA_REF_KEY:
9852 ret = check_shared_data_backref(fs_info, key.offset,
9860 if (++slot < btrfs_header_nritems(eb))
9867 * Helper function for later fs/subvol tree check. To determine if a tree
9868 * block should be checked.
9869 * This function will ensure only the direct referencer with lowest rootid to
9870 * check a fs/subvolume tree block.
9872 * Backref check at extent tree would detect errors like missing subvolume
9873 * tree, so we can do aggressive check to reduce duplicated checks.
9875 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9877 struct btrfs_root *extent_root = root->fs_info->extent_root;
9878 struct btrfs_key key;
9879 struct btrfs_path path;
9880 struct extent_buffer *leaf;
9882 struct btrfs_extent_item *ei;
9888 struct btrfs_extent_inline_ref *iref;
9891 btrfs_init_path(&path);
9892 key.objectid = btrfs_header_bytenr(eb);
9893 key.type = BTRFS_METADATA_ITEM_KEY;
9894 key.offset = (u64)-1;
9897 * Any failure in backref resolving means we can't determine
9898 * whom the tree block belongs to.
9899 * So in that case, we need to check that tree block
9901 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9905 ret = btrfs_previous_extent_item(extent_root, &path,
9906 btrfs_header_bytenr(eb));
9910 leaf = path.nodes[0];
9911 slot = path.slots[0];
9912 btrfs_item_key_to_cpu(leaf, &key, slot);
9913 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9915 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9916 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9918 struct btrfs_tree_block_info *info;
9920 info = (struct btrfs_tree_block_info *)(ei + 1);
9921 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9924 item_size = btrfs_item_size_nr(leaf, slot);
9925 ptr = (unsigned long)iref;
9926 end = (unsigned long)ei + item_size;
9928 iref = (struct btrfs_extent_inline_ref *)ptr;
9929 type = btrfs_extent_inline_ref_type(leaf, iref);
9930 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9933 * We only check the tree block if current root is
9934 * the lowest referencer of it.
9936 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9937 offset < root->objectid) {
9938 btrfs_release_path(&path);
9942 ptr += btrfs_extent_inline_ref_size(type);
9945 * Normally we should also check keyed tree block ref, but that may be
9946 * very time consuming. Inlined ref should already make us skip a lot
9947 * of refs now. So skip search keyed tree block ref.
9951 btrfs_release_path(&path);
9956 * Traversal function for tree block. We will do:
9957 * 1) Skip shared fs/subvolume tree blocks
9958 * 2) Update related bytes accounting
9959 * 3) Pre-order traversal
9961 static int traverse_tree_block(struct btrfs_root *root,
9962 struct extent_buffer *node)
9964 struct extent_buffer *eb;
9965 struct btrfs_key key;
9966 struct btrfs_key drop_key;
9974 * Skip shared fs/subvolume tree block, in that case they will
9975 * be checked by referencer with lowest rootid
9977 if (is_fstree(root->objectid) && !should_check(root, node))
9980 /* Update bytes accounting */
9981 total_btree_bytes += node->len;
9982 if (fs_root_objectid(btrfs_header_owner(node)))
9983 total_fs_tree_bytes += node->len;
9984 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9985 total_extent_tree_bytes += node->len;
9986 if (!found_old_backref &&
9987 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9988 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9989 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9990 found_old_backref = 1;
9992 /* pre-order tranversal, check itself first */
9993 level = btrfs_header_level(node);
9994 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9995 btrfs_header_level(node),
9996 btrfs_header_owner(node));
10000 "check %s failed root %llu bytenr %llu level %d, force continue check",
10001 level ? "node":"leaf", root->objectid,
10002 btrfs_header_bytenr(node), btrfs_header_level(node));
10005 btree_space_waste += btrfs_leaf_free_space(root, node);
10006 ret = check_leaf_items(root, node);
10011 nr = btrfs_header_nritems(node);
10012 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10013 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10014 sizeof(struct btrfs_key_ptr);
10016 /* Then check all its children */
10017 for (i = 0; i < nr; i++) {
10018 u64 blocknr = btrfs_node_blockptr(node, i);
10020 btrfs_node_key_to_cpu(node, &key, i);
10021 if (level == root->root_item.drop_level &&
10022 is_dropped_key(&key, &drop_key))
10026 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10027 * to call the function itself.
10029 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10030 if (extent_buffer_uptodate(eb)) {
10031 ret = traverse_tree_block(root, eb);
10034 free_extent_buffer(eb);
10041 * Low memory usage version check_chunks_and_extents.
10043 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10045 struct btrfs_path path;
10046 struct btrfs_key key;
10047 struct btrfs_root *root1;
10048 struct btrfs_root *cur_root;
10052 root1 = root->fs_info->chunk_root;
10053 ret = traverse_tree_block(root1, root1->node);
10056 root1 = root->fs_info->tree_root;
10057 ret = traverse_tree_block(root1, root1->node);
10060 btrfs_init_path(&path);
10061 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10063 key.type = BTRFS_ROOT_ITEM_KEY;
10065 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10067 error("cannot find extent treet in tree_root");
10072 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10073 if (key.type != BTRFS_ROOT_ITEM_KEY)
10075 key.offset = (u64)-1;
10077 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10078 if (IS_ERR(cur_root) || !cur_root) {
10079 error("failed to read tree: %lld", key.objectid);
10083 ret = traverse_tree_block(cur_root, cur_root->node);
10087 ret = btrfs_next_item(root1, &path);
10093 btrfs_release_path(&path);
10097 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10098 struct btrfs_root *root, int overwrite)
10100 struct extent_buffer *c;
10101 struct extent_buffer *old = root->node;
10104 struct btrfs_disk_key disk_key = {0,0,0};
10110 extent_buffer_get(c);
10113 c = btrfs_alloc_free_block(trans, root,
10115 root->root_key.objectid,
10116 &disk_key, level, 0, 0);
10119 extent_buffer_get(c);
10123 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10124 btrfs_set_header_level(c, level);
10125 btrfs_set_header_bytenr(c, c->start);
10126 btrfs_set_header_generation(c, trans->transid);
10127 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10128 btrfs_set_header_owner(c, root->root_key.objectid);
10130 write_extent_buffer(c, root->fs_info->fsid,
10131 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10133 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10134 btrfs_header_chunk_tree_uuid(c),
10137 btrfs_mark_buffer_dirty(c);
10139 * this case can happen in the following case:
10141 * 1.overwrite previous root.
10143 * 2.reinit reloc data root, this is because we skip pin
10144 * down reloc data tree before which means we can allocate
10145 * same block bytenr here.
10147 if (old->start == c->start) {
10148 btrfs_set_root_generation(&root->root_item,
10150 root->root_item.level = btrfs_header_level(root->node);
10151 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10152 &root->root_key, &root->root_item);
10154 free_extent_buffer(c);
10158 free_extent_buffer(old);
10160 add_root_to_dirty_list(root);
10164 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10165 struct extent_buffer *eb, int tree_root)
10167 struct extent_buffer *tmp;
10168 struct btrfs_root_item *ri;
10169 struct btrfs_key key;
10172 int level = btrfs_header_level(eb);
10178 * If we have pinned this block before, don't pin it again.
10179 * This can not only avoid forever loop with broken filesystem
10180 * but also give us some speedups.
10182 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10183 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10186 btrfs_pin_extent(fs_info, eb->start, eb->len);
10188 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10189 nritems = btrfs_header_nritems(eb);
10190 for (i = 0; i < nritems; i++) {
10192 btrfs_item_key_to_cpu(eb, &key, i);
10193 if (key.type != BTRFS_ROOT_ITEM_KEY)
10195 /* Skip the extent root and reloc roots */
10196 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10197 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10198 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10200 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10201 bytenr = btrfs_disk_root_bytenr(eb, ri);
10204 * If at any point we start needing the real root we
10205 * will have to build a stump root for the root we are
10206 * in, but for now this doesn't actually use the root so
10207 * just pass in extent_root.
10209 tmp = read_tree_block(fs_info->extent_root, bytenr,
10211 if (!extent_buffer_uptodate(tmp)) {
10212 fprintf(stderr, "Error reading root block\n");
10215 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10216 free_extent_buffer(tmp);
10220 bytenr = btrfs_node_blockptr(eb, i);
10222 /* If we aren't the tree root don't read the block */
10223 if (level == 1 && !tree_root) {
10224 btrfs_pin_extent(fs_info, bytenr, nodesize);
10228 tmp = read_tree_block(fs_info->extent_root, bytenr,
10230 if (!extent_buffer_uptodate(tmp)) {
10231 fprintf(stderr, "Error reading tree block\n");
10234 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10235 free_extent_buffer(tmp);
10244 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10248 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10252 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10255 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10257 struct btrfs_block_group_cache *cache;
10258 struct btrfs_path *path;
10259 struct extent_buffer *leaf;
10260 struct btrfs_chunk *chunk;
10261 struct btrfs_key key;
10265 path = btrfs_alloc_path();
10270 key.type = BTRFS_CHUNK_ITEM_KEY;
10273 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10275 btrfs_free_path(path);
10280 * We do this in case the block groups were screwed up and had alloc
10281 * bits that aren't actually set on the chunks. This happens with
10282 * restored images every time and could happen in real life I guess.
10284 fs_info->avail_data_alloc_bits = 0;
10285 fs_info->avail_metadata_alloc_bits = 0;
10286 fs_info->avail_system_alloc_bits = 0;
10288 /* First we need to create the in-memory block groups */
10290 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10291 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10293 btrfs_free_path(path);
10301 leaf = path->nodes[0];
10302 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10303 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10308 chunk = btrfs_item_ptr(leaf, path->slots[0],
10309 struct btrfs_chunk);
10310 btrfs_add_block_group(fs_info, 0,
10311 btrfs_chunk_type(leaf, chunk),
10312 key.objectid, key.offset,
10313 btrfs_chunk_length(leaf, chunk));
10314 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10315 key.offset + btrfs_chunk_length(leaf, chunk),
10321 cache = btrfs_lookup_first_block_group(fs_info, start);
10325 start = cache->key.objectid + cache->key.offset;
10328 btrfs_free_path(path);
10332 static int reset_balance(struct btrfs_trans_handle *trans,
10333 struct btrfs_fs_info *fs_info)
10335 struct btrfs_root *root = fs_info->tree_root;
10336 struct btrfs_path *path;
10337 struct extent_buffer *leaf;
10338 struct btrfs_key key;
10339 int del_slot, del_nr = 0;
10343 path = btrfs_alloc_path();
10347 key.objectid = BTRFS_BALANCE_OBJECTID;
10348 key.type = BTRFS_BALANCE_ITEM_KEY;
10351 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10356 goto reinit_data_reloc;
10361 ret = btrfs_del_item(trans, root, path);
10364 btrfs_release_path(path);
10366 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10367 key.type = BTRFS_ROOT_ITEM_KEY;
10370 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10374 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10379 ret = btrfs_del_items(trans, root, path,
10386 btrfs_release_path(path);
10389 ret = btrfs_search_slot(trans, root, &key, path,
10396 leaf = path->nodes[0];
10397 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10398 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10400 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10405 del_slot = path->slots[0];
10414 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10418 btrfs_release_path(path);
10421 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10422 key.type = BTRFS_ROOT_ITEM_KEY;
10423 key.offset = (u64)-1;
10424 root = btrfs_read_fs_root(fs_info, &key);
10425 if (IS_ERR(root)) {
10426 fprintf(stderr, "Error reading data reloc tree\n");
10427 ret = PTR_ERR(root);
10430 record_root_in_trans(trans, root);
10431 ret = btrfs_fsck_reinit_root(trans, root, 0);
10434 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10436 btrfs_free_path(path);
10440 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10441 struct btrfs_fs_info *fs_info)
10447 * The only reason we don't do this is because right now we're just
10448 * walking the trees we find and pinning down their bytes, we don't look
10449 * at any of the leaves. In order to do mixed groups we'd have to check
10450 * the leaves of any fs roots and pin down the bytes for any file
10451 * extents we find. Not hard but why do it if we don't have to?
10453 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10454 fprintf(stderr, "We don't support re-initing the extent tree "
10455 "for mixed block groups yet, please notify a btrfs "
10456 "developer you want to do this so they can add this "
10457 "functionality.\n");
10462 * first we need to walk all of the trees except the extent tree and pin
10463 * down the bytes that are in use so we don't overwrite any existing
10466 ret = pin_metadata_blocks(fs_info);
10468 fprintf(stderr, "error pinning down used bytes\n");
10473 * Need to drop all the block groups since we're going to recreate all
10476 btrfs_free_block_groups(fs_info);
10477 ret = reset_block_groups(fs_info);
10479 fprintf(stderr, "error resetting the block groups\n");
10483 /* Ok we can allocate now, reinit the extent root */
10484 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10486 fprintf(stderr, "extent root initialization failed\n");
10488 * When the transaction code is updated we should end the
10489 * transaction, but for now progs only knows about commit so
10490 * just return an error.
10496 * Now we have all the in-memory block groups setup so we can make
10497 * allocations properly, and the metadata we care about is safe since we
10498 * pinned all of it above.
10501 struct btrfs_block_group_cache *cache;
10503 cache = btrfs_lookup_first_block_group(fs_info, start);
10506 start = cache->key.objectid + cache->key.offset;
10507 ret = btrfs_insert_item(trans, fs_info->extent_root,
10508 &cache->key, &cache->item,
10509 sizeof(cache->item));
10511 fprintf(stderr, "Error adding block group\n");
10514 btrfs_extent_post_op(trans, fs_info->extent_root);
10517 ret = reset_balance(trans, fs_info);
10519 fprintf(stderr, "error resetting the pending balance\n");
10524 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10526 struct btrfs_path *path;
10527 struct btrfs_trans_handle *trans;
10528 struct btrfs_key key;
10531 printf("Recowing metadata block %llu\n", eb->start);
10532 key.objectid = btrfs_header_owner(eb);
10533 key.type = BTRFS_ROOT_ITEM_KEY;
10534 key.offset = (u64)-1;
10536 root = btrfs_read_fs_root(root->fs_info, &key);
10537 if (IS_ERR(root)) {
10538 fprintf(stderr, "Couldn't find owner root %llu\n",
10540 return PTR_ERR(root);
10543 path = btrfs_alloc_path();
10547 trans = btrfs_start_transaction(root, 1);
10548 if (IS_ERR(trans)) {
10549 btrfs_free_path(path);
10550 return PTR_ERR(trans);
10553 path->lowest_level = btrfs_header_level(eb);
10554 if (path->lowest_level)
10555 btrfs_node_key_to_cpu(eb, &key, 0);
10557 btrfs_item_key_to_cpu(eb, &key, 0);
10559 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10560 btrfs_commit_transaction(trans, root);
10561 btrfs_free_path(path);
10565 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10567 struct btrfs_path *path;
10568 struct btrfs_trans_handle *trans;
10569 struct btrfs_key key;
10572 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10573 bad->key.type, bad->key.offset);
10574 key.objectid = bad->root_id;
10575 key.type = BTRFS_ROOT_ITEM_KEY;
10576 key.offset = (u64)-1;
10578 root = btrfs_read_fs_root(root->fs_info, &key);
10579 if (IS_ERR(root)) {
10580 fprintf(stderr, "Couldn't find owner root %llu\n",
10582 return PTR_ERR(root);
10585 path = btrfs_alloc_path();
10589 trans = btrfs_start_transaction(root, 1);
10590 if (IS_ERR(trans)) {
10591 btrfs_free_path(path);
10592 return PTR_ERR(trans);
10595 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10601 ret = btrfs_del_item(trans, root, path);
10603 btrfs_commit_transaction(trans, root);
10604 btrfs_free_path(path);
10608 static int zero_log_tree(struct btrfs_root *root)
10610 struct btrfs_trans_handle *trans;
10613 trans = btrfs_start_transaction(root, 1);
10614 if (IS_ERR(trans)) {
10615 ret = PTR_ERR(trans);
10618 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10619 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10620 ret = btrfs_commit_transaction(trans, root);
10624 static int populate_csum(struct btrfs_trans_handle *trans,
10625 struct btrfs_root *csum_root, char *buf, u64 start,
10632 while (offset < len) {
10633 sectorsize = csum_root->sectorsize;
10634 ret = read_extent_data(csum_root, buf, start + offset,
10638 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10639 start + offset, buf, sectorsize);
10642 offset += sectorsize;
10647 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10648 struct btrfs_root *csum_root,
10649 struct btrfs_root *cur_root)
10651 struct btrfs_path *path;
10652 struct btrfs_key key;
10653 struct extent_buffer *node;
10654 struct btrfs_file_extent_item *fi;
10661 path = btrfs_alloc_path();
10664 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10674 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10677 /* Iterate all regular file extents and fill its csum */
10679 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10681 if (key.type != BTRFS_EXTENT_DATA_KEY)
10683 node = path->nodes[0];
10684 slot = path->slots[0];
10685 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10686 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10688 start = btrfs_file_extent_disk_bytenr(node, fi);
10689 len = btrfs_file_extent_disk_num_bytes(node, fi);
10691 ret = populate_csum(trans, csum_root, buf, start, len);
10692 if (ret == -EEXIST)
10698 * TODO: if next leaf is corrupted, jump to nearest next valid
10701 ret = btrfs_next_item(cur_root, path);
10711 btrfs_free_path(path);
10716 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10717 struct btrfs_root *csum_root)
10719 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10720 struct btrfs_path *path;
10721 struct btrfs_root *tree_root = fs_info->tree_root;
10722 struct btrfs_root *cur_root;
10723 struct extent_buffer *node;
10724 struct btrfs_key key;
10728 path = btrfs_alloc_path();
10732 key.objectid = BTRFS_FS_TREE_OBJECTID;
10734 key.type = BTRFS_ROOT_ITEM_KEY;
10736 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10745 node = path->nodes[0];
10746 slot = path->slots[0];
10747 btrfs_item_key_to_cpu(node, &key, slot);
10748 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10750 if (key.type != BTRFS_ROOT_ITEM_KEY)
10752 if (!is_fstree(key.objectid))
10754 key.offset = (u64)-1;
10756 cur_root = btrfs_read_fs_root(fs_info, &key);
10757 if (IS_ERR(cur_root) || !cur_root) {
10758 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10762 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10767 ret = btrfs_next_item(tree_root, path);
10777 btrfs_free_path(path);
10781 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10782 struct btrfs_root *csum_root)
10784 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10785 struct btrfs_path *path;
10786 struct btrfs_extent_item *ei;
10787 struct extent_buffer *leaf;
10789 struct btrfs_key key;
10792 path = btrfs_alloc_path();
10797 key.type = BTRFS_EXTENT_ITEM_KEY;
10800 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10802 btrfs_free_path(path);
10806 buf = malloc(csum_root->sectorsize);
10808 btrfs_free_path(path);
10813 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10814 ret = btrfs_next_leaf(extent_root, path);
10822 leaf = path->nodes[0];
10824 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10825 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10830 ei = btrfs_item_ptr(leaf, path->slots[0],
10831 struct btrfs_extent_item);
10832 if (!(btrfs_extent_flags(leaf, ei) &
10833 BTRFS_EXTENT_FLAG_DATA)) {
10838 ret = populate_csum(trans, csum_root, buf, key.objectid,
10845 btrfs_free_path(path);
10851 * Recalculate the csum and put it into the csum tree.
10853 * Extent tree init will wipe out all the extent info, so in that case, we
10854 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10855 * will use fs/subvol trees to init the csum tree.
10857 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10858 struct btrfs_root *csum_root,
10859 int search_fs_tree)
10861 if (search_fs_tree)
10862 return fill_csum_tree_from_fs(trans, csum_root);
10864 return fill_csum_tree_from_extent(trans, csum_root);
10867 static void free_roots_info_cache(void)
10869 if (!roots_info_cache)
10872 while (!cache_tree_empty(roots_info_cache)) {
10873 struct cache_extent *entry;
10874 struct root_item_info *rii;
10876 entry = first_cache_extent(roots_info_cache);
10879 remove_cache_extent(roots_info_cache, entry);
10880 rii = container_of(entry, struct root_item_info, cache_extent);
10884 free(roots_info_cache);
10885 roots_info_cache = NULL;
10888 static int build_roots_info_cache(struct btrfs_fs_info *info)
10891 struct btrfs_key key;
10892 struct extent_buffer *leaf;
10893 struct btrfs_path *path;
10895 if (!roots_info_cache) {
10896 roots_info_cache = malloc(sizeof(*roots_info_cache));
10897 if (!roots_info_cache)
10899 cache_tree_init(roots_info_cache);
10902 path = btrfs_alloc_path();
10907 key.type = BTRFS_EXTENT_ITEM_KEY;
10910 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10913 leaf = path->nodes[0];
10916 struct btrfs_key found_key;
10917 struct btrfs_extent_item *ei;
10918 struct btrfs_extent_inline_ref *iref;
10919 int slot = path->slots[0];
10924 struct cache_extent *entry;
10925 struct root_item_info *rii;
10927 if (slot >= btrfs_header_nritems(leaf)) {
10928 ret = btrfs_next_leaf(info->extent_root, path);
10935 leaf = path->nodes[0];
10936 slot = path->slots[0];
10939 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10941 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10942 found_key.type != BTRFS_METADATA_ITEM_KEY)
10945 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10946 flags = btrfs_extent_flags(leaf, ei);
10948 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10949 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10952 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10953 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10954 level = found_key.offset;
10956 struct btrfs_tree_block_info *binfo;
10958 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10959 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10960 level = btrfs_tree_block_level(leaf, binfo);
10964 * For a root extent, it must be of the following type and the
10965 * first (and only one) iref in the item.
10967 type = btrfs_extent_inline_ref_type(leaf, iref);
10968 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10971 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10972 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10974 rii = malloc(sizeof(struct root_item_info));
10979 rii->cache_extent.start = root_id;
10980 rii->cache_extent.size = 1;
10981 rii->level = (u8)-1;
10982 entry = &rii->cache_extent;
10983 ret = insert_cache_extent(roots_info_cache, entry);
10986 rii = container_of(entry, struct root_item_info,
10990 ASSERT(rii->cache_extent.start == root_id);
10991 ASSERT(rii->cache_extent.size == 1);
10993 if (level > rii->level || rii->level == (u8)-1) {
10994 rii->level = level;
10995 rii->bytenr = found_key.objectid;
10996 rii->gen = btrfs_extent_generation(leaf, ei);
10997 rii->node_count = 1;
10998 } else if (level == rii->level) {
11006 btrfs_free_path(path);
11011 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11012 struct btrfs_path *path,
11013 const struct btrfs_key *root_key,
11014 const int read_only_mode)
11016 const u64 root_id = root_key->objectid;
11017 struct cache_extent *entry;
11018 struct root_item_info *rii;
11019 struct btrfs_root_item ri;
11020 unsigned long offset;
11022 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11025 "Error: could not find extent items for root %llu\n",
11026 root_key->objectid);
11030 rii = container_of(entry, struct root_item_info, cache_extent);
11031 ASSERT(rii->cache_extent.start == root_id);
11032 ASSERT(rii->cache_extent.size == 1);
11034 if (rii->node_count != 1) {
11036 "Error: could not find btree root extent for root %llu\n",
11041 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11042 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11044 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11045 btrfs_root_level(&ri) != rii->level ||
11046 btrfs_root_generation(&ri) != rii->gen) {
11049 * If we're in repair mode but our caller told us to not update
11050 * the root item, i.e. just check if it needs to be updated, don't
11051 * print this message, since the caller will call us again shortly
11052 * for the same root item without read only mode (the caller will
11053 * open a transaction first).
11055 if (!(read_only_mode && repair))
11057 "%sroot item for root %llu,"
11058 " current bytenr %llu, current gen %llu, current level %u,"
11059 " new bytenr %llu, new gen %llu, new level %u\n",
11060 (read_only_mode ? "" : "fixing "),
11062 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11063 btrfs_root_level(&ri),
11064 rii->bytenr, rii->gen, rii->level);
11066 if (btrfs_root_generation(&ri) > rii->gen) {
11068 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11069 root_id, btrfs_root_generation(&ri), rii->gen);
11073 if (!read_only_mode) {
11074 btrfs_set_root_bytenr(&ri, rii->bytenr);
11075 btrfs_set_root_level(&ri, rii->level);
11076 btrfs_set_root_generation(&ri, rii->gen);
11077 write_extent_buffer(path->nodes[0], &ri,
11078 offset, sizeof(ri));
11088 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11089 * caused read-only snapshots to be corrupted if they were created at a moment
11090 * when the source subvolume/snapshot had orphan items. The issue was that the
11091 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11092 * node instead of the post orphan cleanup root node.
11093 * So this function, and its callees, just detects and fixes those cases. Even
11094 * though the regression was for read-only snapshots, this function applies to
11095 * any snapshot/subvolume root.
11096 * This must be run before any other repair code - not doing it so, makes other
11097 * repair code delete or modify backrefs in the extent tree for example, which
11098 * will result in an inconsistent fs after repairing the root items.
11100 static int repair_root_items(struct btrfs_fs_info *info)
11102 struct btrfs_path *path = NULL;
11103 struct btrfs_key key;
11104 struct extent_buffer *leaf;
11105 struct btrfs_trans_handle *trans = NULL;
11108 int need_trans = 0;
11110 ret = build_roots_info_cache(info);
11114 path = btrfs_alloc_path();
11120 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11121 key.type = BTRFS_ROOT_ITEM_KEY;
11126 * Avoid opening and committing transactions if a leaf doesn't have
11127 * any root items that need to be fixed, so that we avoid rotating
11128 * backup roots unnecessarily.
11131 trans = btrfs_start_transaction(info->tree_root, 1);
11132 if (IS_ERR(trans)) {
11133 ret = PTR_ERR(trans);
11138 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11142 leaf = path->nodes[0];
11145 struct btrfs_key found_key;
11147 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11148 int no_more_keys = find_next_key(path, &key);
11150 btrfs_release_path(path);
11152 ret = btrfs_commit_transaction(trans,
11164 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11166 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11168 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11171 ret = maybe_repair_root_item(info, path, &found_key,
11176 if (!trans && repair) {
11179 btrfs_release_path(path);
11189 free_roots_info_cache();
11190 btrfs_free_path(path);
11192 btrfs_commit_transaction(trans, info->tree_root);
11199 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11201 struct btrfs_trans_handle *trans;
11202 struct btrfs_block_group_cache *bg_cache;
11206 /* Clear all free space cache inodes and its extent data */
11208 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11211 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11214 current = bg_cache->key.objectid + bg_cache->key.offset;
11217 /* Don't forget to set cache_generation to -1 */
11218 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11219 if (IS_ERR(trans)) {
11220 error("failed to update super block cache generation");
11221 return PTR_ERR(trans);
11223 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11224 btrfs_commit_transaction(trans, fs_info->tree_root);
11229 const char * const cmd_check_usage[] = {
11230 "btrfs check [options] <device>",
11231 "Check structural integrity of a filesystem (unmounted).",
11232 "Check structural integrity of an unmounted filesystem. Verify internal",
11233 "trees' consistency and item connectivity. In the repair mode try to",
11234 "fix the problems found. ",
11235 "WARNING: the repair mode is considered dangerous",
11237 "-s|--super <superblock> use this superblock copy",
11238 "-b|--backup use the first valid backup root copy",
11239 "--repair try to repair the filesystem",
11240 "--readonly run in read-only mode (default)",
11241 "--init-csum-tree create a new CRC tree",
11242 "--init-extent-tree create a new extent tree",
11243 "--mode <MODE> allows choice of memory/IO trade-offs",
11244 " where MODE is one of:",
11245 " original - read inodes and extents to memory (requires",
11246 " more memory, does less IO)",
11247 " lowmem - try to use less memory but read blocks again",
11249 "--check-data-csum verify checksums of data blocks",
11250 "-Q|--qgroup-report print a report on qgroup consistency",
11251 "-E|--subvol-extents <subvolid>",
11252 " print subvolume extents and sharing state",
11253 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11254 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11255 "-p|--progress indicate progress",
11256 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11257 " NOTE: v1 support implemented",
11261 int cmd_check(int argc, char **argv)
11263 struct cache_tree root_cache;
11264 struct btrfs_root *root;
11265 struct btrfs_fs_info *info;
11268 u64 tree_root_bytenr = 0;
11269 u64 chunk_root_bytenr = 0;
11270 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11273 int init_csum_tree = 0;
11275 int clear_space_cache = 0;
11276 int qgroup_report = 0;
11277 int qgroups_repaired = 0;
11278 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11282 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11283 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11284 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11285 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11286 static const struct option long_options[] = {
11287 { "super", required_argument, NULL, 's' },
11288 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11289 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11290 { "init-csum-tree", no_argument, NULL,
11291 GETOPT_VAL_INIT_CSUM },
11292 { "init-extent-tree", no_argument, NULL,
11293 GETOPT_VAL_INIT_EXTENT },
11294 { "check-data-csum", no_argument, NULL,
11295 GETOPT_VAL_CHECK_CSUM },
11296 { "backup", no_argument, NULL, 'b' },
11297 { "subvol-extents", required_argument, NULL, 'E' },
11298 { "qgroup-report", no_argument, NULL, 'Q' },
11299 { "tree-root", required_argument, NULL, 'r' },
11300 { "chunk-root", required_argument, NULL,
11301 GETOPT_VAL_CHUNK_TREE },
11302 { "progress", no_argument, NULL, 'p' },
11303 { "mode", required_argument, NULL,
11305 { "clear-space-cache", required_argument, NULL,
11306 GETOPT_VAL_CLEAR_SPACE_CACHE},
11307 { NULL, 0, NULL, 0}
11310 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11314 case 'a': /* ignored */ break;
11316 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11319 num = arg_strtou64(optarg);
11320 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11322 "super mirror should be less than %d",
11323 BTRFS_SUPER_MIRROR_MAX);
11326 bytenr = btrfs_sb_offset(((int)num));
11327 printf("using SB copy %llu, bytenr %llu\n", num,
11328 (unsigned long long)bytenr);
11334 subvolid = arg_strtou64(optarg);
11337 tree_root_bytenr = arg_strtou64(optarg);
11339 case GETOPT_VAL_CHUNK_TREE:
11340 chunk_root_bytenr = arg_strtou64(optarg);
11343 ctx.progress_enabled = true;
11347 usage(cmd_check_usage);
11348 case GETOPT_VAL_REPAIR:
11349 printf("enabling repair mode\n");
11351 ctree_flags |= OPEN_CTREE_WRITES;
11353 case GETOPT_VAL_READONLY:
11356 case GETOPT_VAL_INIT_CSUM:
11357 printf("Creating a new CRC tree\n");
11358 init_csum_tree = 1;
11360 ctree_flags |= OPEN_CTREE_WRITES;
11362 case GETOPT_VAL_INIT_EXTENT:
11363 init_extent_tree = 1;
11364 ctree_flags |= (OPEN_CTREE_WRITES |
11365 OPEN_CTREE_NO_BLOCK_GROUPS);
11368 case GETOPT_VAL_CHECK_CSUM:
11369 check_data_csum = 1;
11371 case GETOPT_VAL_MODE:
11372 check_mode = parse_check_mode(optarg);
11373 if (check_mode == CHECK_MODE_UNKNOWN) {
11374 error("unknown mode: %s", optarg);
11378 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11379 if (strcmp(optarg, "v1") != 0) {
11381 "only v1 support implmented, unrecognized value %s",
11385 clear_space_cache = 1;
11386 ctree_flags |= OPEN_CTREE_WRITES;
11391 if (check_argc_exact(argc - optind, 1))
11392 usage(cmd_check_usage);
11394 if (ctx.progress_enabled) {
11395 ctx.tp = TASK_NOTHING;
11396 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11399 /* This check is the only reason for --readonly to exist */
11400 if (readonly && repair) {
11401 error("repair options are not compatible with --readonly");
11406 * Not supported yet
11408 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11409 error("low memory mode doesn't support repair yet");
11414 cache_tree_init(&root_cache);
11416 if((ret = check_mounted(argv[optind])) < 0) {
11417 error("could not check mount status: %s", strerror(-ret));
11420 error("%s is currently mounted, aborting", argv[optind]);
11425 /* only allow partial opening under repair mode */
11427 ctree_flags |= OPEN_CTREE_PARTIAL;
11429 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11430 chunk_root_bytenr, ctree_flags);
11432 error("cannot open file system");
11437 global_info = info;
11438 root = info->fs_root;
11439 if (clear_space_cache) {
11440 if (btrfs_fs_compat_ro(info,
11441 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11443 "free space cache v2 detected, clearing not implemented");
11447 printf("Clearing free space cache\n");
11448 ret = clear_free_space_cache(info);
11450 error("failed to clear free space cache");
11453 printf("Free space cache cleared\n");
11459 * repair mode will force us to commit transaction which
11460 * will make us fail to load log tree when mounting.
11462 if (repair && btrfs_super_log_root(info->super_copy)) {
11463 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11468 ret = zero_log_tree(root);
11470 error("failed to zero log tree: %d", ret);
11475 uuid_unparse(info->super_copy->fsid, uuidbuf);
11476 if (qgroup_report) {
11477 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11479 ret = qgroup_verify_all(info);
11485 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11486 subvolid, argv[optind], uuidbuf);
11487 ret = print_extent_state(info, subvolid);
11490 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11492 if (!extent_buffer_uptodate(info->tree_root->node) ||
11493 !extent_buffer_uptodate(info->dev_root->node) ||
11494 !extent_buffer_uptodate(info->chunk_root->node)) {
11495 error("critical roots corrupted, unable to check the filesystem");
11500 if (init_extent_tree || init_csum_tree) {
11501 struct btrfs_trans_handle *trans;
11503 trans = btrfs_start_transaction(info->extent_root, 0);
11504 if (IS_ERR(trans)) {
11505 error("error starting transaction");
11506 ret = PTR_ERR(trans);
11510 if (init_extent_tree) {
11511 printf("Creating a new extent tree\n");
11512 ret = reinit_extent_tree(trans, info);
11517 if (init_csum_tree) {
11518 printf("Reinitialize checksum tree\n");
11519 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11521 error("checksum tree initialization failed: %d",
11527 ret = fill_csum_tree(trans, info->csum_root,
11530 error("checksum tree refilling failed: %d", ret);
11535 * Ok now we commit and run the normal fsck, which will add
11536 * extent entries for all of the items it finds.
11538 ret = btrfs_commit_transaction(trans, info->extent_root);
11542 if (!extent_buffer_uptodate(info->extent_root->node)) {
11543 error("critical: extent_root, unable to check the filesystem");
11547 if (!extent_buffer_uptodate(info->csum_root->node)) {
11548 error("critical: csum_root, unable to check the filesystem");
11553 if (!ctx.progress_enabled)
11554 printf("checking extents");
11555 if (check_mode == CHECK_MODE_LOWMEM)
11556 ret = check_chunks_and_extents_v2(root);
11558 ret = check_chunks_and_extents(root);
11560 printf("Errors found in extent allocation tree or chunk allocation");
11562 ret = repair_root_items(info);
11566 fprintf(stderr, "Fixed %d roots.\n", ret);
11568 } else if (ret > 0) {
11570 "Found %d roots with an outdated root item.\n",
11573 "Please run a filesystem check with the option --repair to fix them.\n");
11578 if (!ctx.progress_enabled) {
11579 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11580 fprintf(stderr, "checking free space tree\n");
11582 fprintf(stderr, "checking free space cache\n");
11584 ret = check_space_cache(root);
11589 * We used to have to have these hole extents in between our real
11590 * extents so if we don't have this flag set we need to make sure there
11591 * are no gaps in the file extents for inodes, otherwise we can just
11592 * ignore it when this happens.
11594 no_holes = btrfs_fs_incompat(root->fs_info,
11595 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11596 if (!ctx.progress_enabled)
11597 fprintf(stderr, "checking fs roots\n");
11598 ret = check_fs_roots(root, &root_cache);
11602 fprintf(stderr, "checking csums\n");
11603 ret = check_csums(root);
11607 fprintf(stderr, "checking root refs\n");
11608 ret = check_root_refs(root, &root_cache);
11612 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11613 struct extent_buffer *eb;
11615 eb = list_first_entry(&root->fs_info->recow_ebs,
11616 struct extent_buffer, recow);
11617 list_del_init(&eb->recow);
11618 ret = recow_extent_buffer(root, eb);
11623 while (!list_empty(&delete_items)) {
11624 struct bad_item *bad;
11626 bad = list_first_entry(&delete_items, struct bad_item, list);
11627 list_del_init(&bad->list);
11629 ret = delete_bad_item(root, bad);
11633 if (info->quota_enabled) {
11635 fprintf(stderr, "checking quota groups\n");
11636 err = qgroup_verify_all(info);
11640 err = repair_qgroups(info, &qgroups_repaired);
11645 if (!list_empty(&root->fs_info->recow_ebs)) {
11646 error("transid errors in file system");
11650 /* Don't override original ret */
11651 if (!ret && qgroups_repaired)
11652 ret = qgroups_repaired;
11654 if (found_old_backref) { /*
11655 * there was a disk format change when mixed
11656 * backref was in testing tree. The old format
11657 * existed about one week.
11659 printf("\n * Found old mixed backref format. "
11660 "The old format is not supported! *"
11661 "\n * Please mount the FS in readonly mode, "
11662 "backup data and re-format the FS. *\n\n");
11665 printf("found %llu bytes used err is %d\n",
11666 (unsigned long long)bytes_used, ret);
11667 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11668 printf("total tree bytes: %llu\n",
11669 (unsigned long long)total_btree_bytes);
11670 printf("total fs tree bytes: %llu\n",
11671 (unsigned long long)total_fs_tree_bytes);
11672 printf("total extent tree bytes: %llu\n",
11673 (unsigned long long)total_extent_tree_bytes);
11674 printf("btree space waste bytes: %llu\n",
11675 (unsigned long long)btree_space_waste);
11676 printf("file data blocks allocated: %llu\n referenced %llu\n",
11677 (unsigned long long)data_bytes_allocated,
11678 (unsigned long long)data_bytes_referenced);
11680 free_qgroup_counts();
11681 free_root_recs_tree(&root_cache);
11685 if (ctx.progress_enabled)
11686 task_deinit(ctx.info);