2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 return list_entry(entry, struct extent_backref, list);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
121 * Much like data_backref, just removed the undetermined members
122 * and change it to use list_head.
123 * During extent scan, it is stored in root->orphan_data_extent.
124 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126 struct orphan_data_extent {
127 struct list_head list;
135 struct tree_backref {
136 struct extent_backref node;
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 return container_of(back, struct tree_backref, node);
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
151 struct extent_record {
152 struct list_head backrefs;
153 struct list_head dups;
154 struct list_head list;
155 struct cache_extent cache;
156 struct btrfs_disk_key parent_key;
161 u64 extent_item_refs;
163 u64 parent_generation;
167 unsigned int flag_block_full_backref:2;
168 unsigned int found_rec:1;
169 unsigned int content_checked:1;
170 unsigned int owner_ref_checked:1;
171 unsigned int is_root:1;
172 unsigned int metadata:1;
173 unsigned int bad_full_backref:1;
174 unsigned int crossing_stripes:1;
175 unsigned int wrong_chunk_type:1;
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 return container_of(entry, struct extent_record, list);
183 struct inode_backref {
184 struct list_head list;
185 unsigned int found_dir_item:1;
186 unsigned int found_dir_index:1;
187 unsigned int found_inode_ref:1;
188 unsigned int filetype:8;
190 unsigned int ref_type;
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 return list_entry(entry, struct inode_backref, list);
202 struct root_item_record {
203 struct list_head list;
210 struct btrfs_key drop_key;
213 #define REF_ERR_NO_DIR_ITEM (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX (1 << 1)
215 #define REF_ERR_NO_INODE_REF (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
218 #define REF_ERR_DUP_INODE_REF (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
227 struct file_extent_hole {
233 struct inode_record {
234 struct list_head backrefs;
235 unsigned int checked:1;
236 unsigned int merging:1;
237 unsigned int found_inode_item:1;
238 unsigned int found_dir_item:1;
239 unsigned int found_file_extent:1;
240 unsigned int found_csum_item:1;
241 unsigned int some_csum_missing:1;
242 unsigned int nodatasum:1;
255 struct rb_root holes;
256 struct list_head orphan_extents;
261 #define I_ERR_NO_INODE_ITEM (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
277 struct root_backref {
278 struct list_head list;
279 unsigned int found_dir_item:1;
280 unsigned int found_dir_index:1;
281 unsigned int found_back_ref:1;
282 unsigned int found_forward_ref:1;
283 unsigned int reachable:1;
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 return list_entry(entry, struct root_backref, list);
298 struct list_head backrefs;
299 struct cache_extent cache;
300 unsigned int found_root_item:1;
306 struct cache_extent cache;
311 struct cache_extent cache;
312 struct cache_tree root_cache;
313 struct cache_tree inode_cache;
314 struct inode_record *current;
323 struct walk_control {
324 struct cache_tree shared;
325 struct shared_node *nodes[BTRFS_MAX_LEVEL];
331 struct btrfs_key key;
333 struct list_head list;
336 struct extent_entry {
341 struct list_head list;
344 struct root_item_info {
345 /* level of the root */
347 /* number of nodes at this level, must be 1 for a root */
351 struct cache_extent cache_extent;
355 * Error bit for low memory mode check.
357 * Currently no caller cares about it yet. Just internal use for error
360 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH (1 << 8)
371 static void *print_status_check(void *p)
373 struct task_ctx *priv = p;
374 const char work_indicator[] = { '.', 'o', 'O', 'o' };
376 static char *task_position_string[] = {
378 "checking free space cache",
382 task_period_start(priv->info, 1000 /* 1s */);
384 if (priv->tp == TASK_NOTHING)
388 printf("%s [%c]\r", task_position_string[priv->tp],
389 work_indicator[count % 4]);
392 task_period_wait(priv->info);
397 static int print_status_return(void *p)
405 static enum btrfs_check_mode parse_check_mode(const char *str)
407 if (strcmp(str, "lowmem") == 0)
408 return CHECK_MODE_LOWMEM;
409 if (strcmp(str, "orig") == 0)
410 return CHECK_MODE_ORIGINAL;
411 if (strcmp(str, "original") == 0)
412 return CHECK_MODE_ORIGINAL;
414 return CHECK_MODE_UNKNOWN;
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
420 struct file_extent_hole *hole;
422 if (RB_EMPTY_ROOT(holes))
425 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 struct file_extent_hole *hole1;
432 struct file_extent_hole *hole2;
434 hole1 = rb_entry(node1, struct file_extent_hole, node);
435 hole2 = rb_entry(node2, struct file_extent_hole, node);
437 if (hole1->start > hole2->start)
439 if (hole1->start < hole2->start)
441 /* Now hole1->start == hole2->start */
442 if (hole1->len >= hole2->len)
444 * Hole 1 will be merge center
445 * Same hole will be merged later
448 /* Hole 2 will be merge center */
453 * Add a hole to the record
455 * This will do hole merge for copy_file_extent_holes(),
456 * which will ensure there won't be continuous holes.
458 static int add_file_extent_hole(struct rb_root *holes,
461 struct file_extent_hole *hole;
462 struct file_extent_hole *prev = NULL;
463 struct file_extent_hole *next = NULL;
465 hole = malloc(sizeof(*hole));
470 /* Since compare will not return 0, no -EEXIST will happen */
471 rb_insert(holes, &hole->node, compare_hole);
473 /* simple merge with previous hole */
474 if (rb_prev(&hole->node))
475 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477 if (prev && prev->start + prev->len >= hole->start) {
478 hole->len = hole->start + hole->len - prev->start;
479 hole->start = prev->start;
480 rb_erase(&prev->node, holes);
485 /* iterate merge with next holes */
487 if (!rb_next(&hole->node))
489 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491 if (hole->start + hole->len >= next->start) {
492 if (hole->start + hole->len <= next->start + next->len)
493 hole->len = next->start + next->len -
495 rb_erase(&next->node, holes);
504 static int compare_hole_range(struct rb_node *node, void *data)
506 struct file_extent_hole *hole;
509 hole = (struct file_extent_hole *)data;
512 hole = rb_entry(node, struct file_extent_hole, node);
513 if (start < hole->start)
515 if (start >= hole->start && start < hole->start + hole->len)
521 * Delete a hole in the record
523 * This will do the hole split and is much restrict than add.
525 static int del_file_extent_hole(struct rb_root *holes,
528 struct file_extent_hole *hole;
529 struct file_extent_hole tmp;
534 struct rb_node *node;
541 node = rb_search(holes, &tmp, compare_hole_range, NULL);
544 hole = rb_entry(node, struct file_extent_hole, node);
545 if (start + len > hole->start + hole->len)
549 * Now there will be no overlap, delete the hole and re-add the
550 * split(s) if they exists.
552 if (start > hole->start) {
553 prev_start = hole->start;
554 prev_len = start - hole->start;
557 if (hole->start + hole->len > start + len) {
558 next_start = start + len;
559 next_len = hole->start + hole->len - start - len;
562 rb_erase(node, holes);
565 ret = add_file_extent_hole(holes, prev_start, prev_len);
570 ret = add_file_extent_hole(holes, next_start, next_len);
577 static int copy_file_extent_holes(struct rb_root *dst,
580 struct file_extent_hole *hole;
581 struct rb_node *node;
584 node = rb_first(src);
586 hole = rb_entry(node, struct file_extent_hole, node);
587 ret = add_file_extent_hole(dst, hole->start, hole->len);
590 node = rb_next(node);
595 static void free_file_extent_holes(struct rb_root *holes)
597 struct rb_node *node;
598 struct file_extent_hole *hole;
600 node = rb_first(holes);
602 hole = rb_entry(node, struct file_extent_hole, node);
603 rb_erase(node, holes);
605 node = rb_first(holes);
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612 struct btrfs_root *root)
614 if (root->last_trans != trans->transid) {
615 root->track_dirty = 1;
616 root->last_trans = trans->transid;
617 root->commit_root = root->node;
618 extent_buffer_get(root->node);
622 static u8 imode_to_type(u32 imode)
625 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
627 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
628 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
629 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
630 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
631 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
632 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
635 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 struct device_record *rec1;
642 struct device_record *rec2;
644 rec1 = rb_entry(node1, struct device_record, node);
645 rec2 = rb_entry(node2, struct device_record, node);
646 if (rec1->devid > rec2->devid)
648 else if (rec1->devid < rec2->devid)
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 struct inode_record *rec;
657 struct inode_backref *backref;
658 struct inode_backref *orig;
659 struct inode_backref *tmp;
660 struct orphan_data_extent *src_orphan;
661 struct orphan_data_extent *dst_orphan;
665 rec = malloc(sizeof(*rec));
667 return ERR_PTR(-ENOMEM);
668 memcpy(rec, orig_rec, sizeof(*rec));
670 INIT_LIST_HEAD(&rec->backrefs);
671 INIT_LIST_HEAD(&rec->orphan_extents);
672 rec->holes = RB_ROOT;
674 list_for_each_entry(orig, &orig_rec->backrefs, list) {
675 size = sizeof(*orig) + orig->namelen + 1;
676 backref = malloc(size);
681 memcpy(backref, orig, size);
682 list_add_tail(&backref->list, &rec->backrefs);
684 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
685 dst_orphan = malloc(sizeof(*dst_orphan));
690 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
691 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
693 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
699 if (!list_empty(&rec->backrefs))
700 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
701 list_del(&orig->list);
705 if (!list_empty(&rec->orphan_extents))
706 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
707 list_del(&orig->list);
716 static void print_orphan_data_extents(struct list_head *orphan_extents,
719 struct orphan_data_extent *orphan;
721 if (list_empty(orphan_extents))
723 printf("The following data extent is lost in tree %llu:\n",
725 list_for_each_entry(orphan, orphan_extents, list) {
726 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
727 orphan->objectid, orphan->offset, orphan->disk_bytenr,
732 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
734 u64 root_objectid = root->root_key.objectid;
735 int errors = rec->errors;
739 /* reloc root errors, we print its corresponding fs root objectid*/
740 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
741 root_objectid = root->root_key.offset;
742 fprintf(stderr, "reloc");
744 fprintf(stderr, "root %llu inode %llu errors %x",
745 (unsigned long long) root_objectid,
746 (unsigned long long) rec->ino, rec->errors);
748 if (errors & I_ERR_NO_INODE_ITEM)
749 fprintf(stderr, ", no inode item");
750 if (errors & I_ERR_NO_ORPHAN_ITEM)
751 fprintf(stderr, ", no orphan item");
752 if (errors & I_ERR_DUP_INODE_ITEM)
753 fprintf(stderr, ", dup inode item");
754 if (errors & I_ERR_DUP_DIR_INDEX)
755 fprintf(stderr, ", dup dir index");
756 if (errors & I_ERR_ODD_DIR_ITEM)
757 fprintf(stderr, ", odd dir item");
758 if (errors & I_ERR_ODD_FILE_EXTENT)
759 fprintf(stderr, ", odd file extent");
760 if (errors & I_ERR_BAD_FILE_EXTENT)
761 fprintf(stderr, ", bad file extent");
762 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
763 fprintf(stderr, ", file extent overlap");
764 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
765 fprintf(stderr, ", file extent discount");
766 if (errors & I_ERR_DIR_ISIZE_WRONG)
767 fprintf(stderr, ", dir isize wrong");
768 if (errors & I_ERR_FILE_NBYTES_WRONG)
769 fprintf(stderr, ", nbytes wrong");
770 if (errors & I_ERR_ODD_CSUM_ITEM)
771 fprintf(stderr, ", odd csum item");
772 if (errors & I_ERR_SOME_CSUM_MISSING)
773 fprintf(stderr, ", some csum missing");
774 if (errors & I_ERR_LINK_COUNT_WRONG)
775 fprintf(stderr, ", link count wrong");
776 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
777 fprintf(stderr, ", orphan file extent");
778 fprintf(stderr, "\n");
779 /* Print the orphan extents if needed */
780 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
781 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
783 /* Print the holes if needed */
784 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
785 struct file_extent_hole *hole;
786 struct rb_node *node;
789 node = rb_first(&rec->holes);
790 fprintf(stderr, "Found file extent holes:\n");
793 hole = rb_entry(node, struct file_extent_hole, node);
794 fprintf(stderr, "\tstart: %llu, len: %llu\n",
795 hole->start, hole->len);
796 node = rb_next(node);
799 fprintf(stderr, "\tstart: 0, len: %llu\n",
800 round_up(rec->isize, root->sectorsize));
804 static void print_ref_error(int errors)
806 if (errors & REF_ERR_NO_DIR_ITEM)
807 fprintf(stderr, ", no dir item");
808 if (errors & REF_ERR_NO_DIR_INDEX)
809 fprintf(stderr, ", no dir index");
810 if (errors & REF_ERR_NO_INODE_REF)
811 fprintf(stderr, ", no inode ref");
812 if (errors & REF_ERR_DUP_DIR_ITEM)
813 fprintf(stderr, ", dup dir item");
814 if (errors & REF_ERR_DUP_DIR_INDEX)
815 fprintf(stderr, ", dup dir index");
816 if (errors & REF_ERR_DUP_INODE_REF)
817 fprintf(stderr, ", dup inode ref");
818 if (errors & REF_ERR_INDEX_UNMATCH)
819 fprintf(stderr, ", index mismatch");
820 if (errors & REF_ERR_FILETYPE_UNMATCH)
821 fprintf(stderr, ", filetype mismatch");
822 if (errors & REF_ERR_NAME_TOO_LONG)
823 fprintf(stderr, ", name too long");
824 if (errors & REF_ERR_NO_ROOT_REF)
825 fprintf(stderr, ", no root ref");
826 if (errors & REF_ERR_NO_ROOT_BACKREF)
827 fprintf(stderr, ", no root backref");
828 if (errors & REF_ERR_DUP_ROOT_REF)
829 fprintf(stderr, ", dup root ref");
830 if (errors & REF_ERR_DUP_ROOT_BACKREF)
831 fprintf(stderr, ", dup root backref");
832 fprintf(stderr, "\n");
835 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
838 struct ptr_node *node;
839 struct cache_extent *cache;
840 struct inode_record *rec = NULL;
843 cache = lookup_cache_extent(inode_cache, ino, 1);
845 node = container_of(cache, struct ptr_node, cache);
847 if (mod && rec->refs > 1) {
848 node->data = clone_inode_rec(rec);
849 if (IS_ERR(node->data))
855 rec = calloc(1, sizeof(*rec));
857 return ERR_PTR(-ENOMEM);
859 rec->extent_start = (u64)-1;
861 INIT_LIST_HEAD(&rec->backrefs);
862 INIT_LIST_HEAD(&rec->orphan_extents);
863 rec->holes = RB_ROOT;
865 node = malloc(sizeof(*node));
868 return ERR_PTR(-ENOMEM);
870 node->cache.start = ino;
871 node->cache.size = 1;
874 if (ino == BTRFS_FREE_INO_OBJECTID)
877 ret = insert_cache_extent(inode_cache, &node->cache);
879 return ERR_PTR(-EEXIST);
884 static void free_orphan_data_extents(struct list_head *orphan_extents)
886 struct orphan_data_extent *orphan;
888 while (!list_empty(orphan_extents)) {
889 orphan = list_entry(orphan_extents->next,
890 struct orphan_data_extent, list);
891 list_del(&orphan->list);
896 static void free_inode_rec(struct inode_record *rec)
898 struct inode_backref *backref;
903 while (!list_empty(&rec->backrefs)) {
904 backref = to_inode_backref(rec->backrefs.next);
905 list_del(&backref->list);
908 free_orphan_data_extents(&rec->orphan_extents);
909 free_file_extent_holes(&rec->holes);
913 static int can_free_inode_rec(struct inode_record *rec)
915 if (!rec->errors && rec->checked && rec->found_inode_item &&
916 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
921 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
922 struct inode_record *rec)
924 struct cache_extent *cache;
925 struct inode_backref *tmp, *backref;
926 struct ptr_node *node;
927 unsigned char filetype;
929 if (!rec->found_inode_item)
932 filetype = imode_to_type(rec->imode);
933 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
934 if (backref->found_dir_item && backref->found_dir_index) {
935 if (backref->filetype != filetype)
936 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
937 if (!backref->errors && backref->found_inode_ref &&
938 rec->nlink == rec->found_link) {
939 list_del(&backref->list);
945 if (!rec->checked || rec->merging)
948 if (S_ISDIR(rec->imode)) {
949 if (rec->found_size != rec->isize)
950 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
951 if (rec->found_file_extent)
952 rec->errors |= I_ERR_ODD_FILE_EXTENT;
953 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
954 if (rec->found_dir_item)
955 rec->errors |= I_ERR_ODD_DIR_ITEM;
956 if (rec->found_size != rec->nbytes)
957 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
958 if (rec->nlink > 0 && !no_holes &&
959 (rec->extent_end < rec->isize ||
960 first_extent_gap(&rec->holes) < rec->isize))
961 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
964 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
965 if (rec->found_csum_item && rec->nodatasum)
966 rec->errors |= I_ERR_ODD_CSUM_ITEM;
967 if (rec->some_csum_missing && !rec->nodatasum)
968 rec->errors |= I_ERR_SOME_CSUM_MISSING;
971 BUG_ON(rec->refs != 1);
972 if (can_free_inode_rec(rec)) {
973 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
974 node = container_of(cache, struct ptr_node, cache);
975 BUG_ON(node->data != rec);
976 remove_cache_extent(inode_cache, &node->cache);
982 static int check_orphan_item(struct btrfs_root *root, u64 ino)
984 struct btrfs_path path;
985 struct btrfs_key key;
988 key.objectid = BTRFS_ORPHAN_OBJECTID;
989 key.type = BTRFS_ORPHAN_ITEM_KEY;
992 btrfs_init_path(&path);
993 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
994 btrfs_release_path(&path);
1000 static int process_inode_item(struct extent_buffer *eb,
1001 int slot, struct btrfs_key *key,
1002 struct shared_node *active_node)
1004 struct inode_record *rec;
1005 struct btrfs_inode_item *item;
1007 rec = active_node->current;
1008 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1009 if (rec->found_inode_item) {
1010 rec->errors |= I_ERR_DUP_INODE_ITEM;
1013 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1014 rec->nlink = btrfs_inode_nlink(eb, item);
1015 rec->isize = btrfs_inode_size(eb, item);
1016 rec->nbytes = btrfs_inode_nbytes(eb, item);
1017 rec->imode = btrfs_inode_mode(eb, item);
1018 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1020 rec->found_inode_item = 1;
1021 if (rec->nlink == 0)
1022 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1023 maybe_free_inode_rec(&active_node->inode_cache, rec);
1027 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1029 int namelen, u64 dir)
1031 struct inode_backref *backref;
1033 list_for_each_entry(backref, &rec->backrefs, list) {
1034 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1036 if (backref->dir != dir || backref->namelen != namelen)
1038 if (memcmp(name, backref->name, namelen))
1043 backref = malloc(sizeof(*backref) + namelen + 1);
1046 memset(backref, 0, sizeof(*backref));
1048 backref->namelen = namelen;
1049 memcpy(backref->name, name, namelen);
1050 backref->name[namelen] = '\0';
1051 list_add_tail(&backref->list, &rec->backrefs);
1055 static int add_inode_backref(struct cache_tree *inode_cache,
1056 u64 ino, u64 dir, u64 index,
1057 const char *name, int namelen,
1058 int filetype, int itemtype, int errors)
1060 struct inode_record *rec;
1061 struct inode_backref *backref;
1063 rec = get_inode_rec(inode_cache, ino, 1);
1064 BUG_ON(IS_ERR(rec));
1065 backref = get_inode_backref(rec, name, namelen, dir);
1068 backref->errors |= errors;
1069 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1070 if (backref->found_dir_index)
1071 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1072 if (backref->found_inode_ref && backref->index != index)
1073 backref->errors |= REF_ERR_INDEX_UNMATCH;
1074 if (backref->found_dir_item && backref->filetype != filetype)
1075 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1077 backref->index = index;
1078 backref->filetype = filetype;
1079 backref->found_dir_index = 1;
1080 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1082 if (backref->found_dir_item)
1083 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1084 if (backref->found_dir_index && backref->filetype != filetype)
1085 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1087 backref->filetype = filetype;
1088 backref->found_dir_item = 1;
1089 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1090 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1091 if (backref->found_inode_ref)
1092 backref->errors |= REF_ERR_DUP_INODE_REF;
1093 if (backref->found_dir_index && backref->index != index)
1094 backref->errors |= REF_ERR_INDEX_UNMATCH;
1096 backref->index = index;
1098 backref->ref_type = itemtype;
1099 backref->found_inode_ref = 1;
1104 maybe_free_inode_rec(inode_cache, rec);
1108 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1109 struct cache_tree *dst_cache)
1111 struct inode_backref *backref;
1116 list_for_each_entry(backref, &src->backrefs, list) {
1117 if (backref->found_dir_index) {
1118 add_inode_backref(dst_cache, dst->ino, backref->dir,
1119 backref->index, backref->name,
1120 backref->namelen, backref->filetype,
1121 BTRFS_DIR_INDEX_KEY, backref->errors);
1123 if (backref->found_dir_item) {
1125 add_inode_backref(dst_cache, dst->ino,
1126 backref->dir, 0, backref->name,
1127 backref->namelen, backref->filetype,
1128 BTRFS_DIR_ITEM_KEY, backref->errors);
1130 if (backref->found_inode_ref) {
1131 add_inode_backref(dst_cache, dst->ino,
1132 backref->dir, backref->index,
1133 backref->name, backref->namelen, 0,
1134 backref->ref_type, backref->errors);
1138 if (src->found_dir_item)
1139 dst->found_dir_item = 1;
1140 if (src->found_file_extent)
1141 dst->found_file_extent = 1;
1142 if (src->found_csum_item)
1143 dst->found_csum_item = 1;
1144 if (src->some_csum_missing)
1145 dst->some_csum_missing = 1;
1146 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1147 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1152 BUG_ON(src->found_link < dir_count);
1153 dst->found_link += src->found_link - dir_count;
1154 dst->found_size += src->found_size;
1155 if (src->extent_start != (u64)-1) {
1156 if (dst->extent_start == (u64)-1) {
1157 dst->extent_start = src->extent_start;
1158 dst->extent_end = src->extent_end;
1160 if (dst->extent_end > src->extent_start)
1161 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1162 else if (dst->extent_end < src->extent_start) {
1163 ret = add_file_extent_hole(&dst->holes,
1165 src->extent_start - dst->extent_end);
1167 if (dst->extent_end < src->extent_end)
1168 dst->extent_end = src->extent_end;
1172 dst->errors |= src->errors;
1173 if (src->found_inode_item) {
1174 if (!dst->found_inode_item) {
1175 dst->nlink = src->nlink;
1176 dst->isize = src->isize;
1177 dst->nbytes = src->nbytes;
1178 dst->imode = src->imode;
1179 dst->nodatasum = src->nodatasum;
1180 dst->found_inode_item = 1;
1182 dst->errors |= I_ERR_DUP_INODE_ITEM;
1190 static int splice_shared_node(struct shared_node *src_node,
1191 struct shared_node *dst_node)
1193 struct cache_extent *cache;
1194 struct ptr_node *node, *ins;
1195 struct cache_tree *src, *dst;
1196 struct inode_record *rec, *conflict;
1197 u64 current_ino = 0;
1201 if (--src_node->refs == 0)
1203 if (src_node->current)
1204 current_ino = src_node->current->ino;
1206 src = &src_node->root_cache;
1207 dst = &dst_node->root_cache;
1209 cache = search_cache_extent(src, 0);
1211 node = container_of(cache, struct ptr_node, cache);
1213 cache = next_cache_extent(cache);
1216 remove_cache_extent(src, &node->cache);
1219 ins = malloc(sizeof(*ins));
1221 ins->cache.start = node->cache.start;
1222 ins->cache.size = node->cache.size;
1226 ret = insert_cache_extent(dst, &ins->cache);
1227 if (ret == -EEXIST) {
1228 conflict = get_inode_rec(dst, rec->ino, 1);
1229 BUG_ON(IS_ERR(conflict));
1230 merge_inode_recs(rec, conflict, dst);
1232 conflict->checked = 1;
1233 if (dst_node->current == conflict)
1234 dst_node->current = NULL;
1236 maybe_free_inode_rec(dst, conflict);
1237 free_inode_rec(rec);
1244 if (src == &src_node->root_cache) {
1245 src = &src_node->inode_cache;
1246 dst = &dst_node->inode_cache;
1250 if (current_ino > 0 && (!dst_node->current ||
1251 current_ino > dst_node->current->ino)) {
1252 if (dst_node->current) {
1253 dst_node->current->checked = 1;
1254 maybe_free_inode_rec(dst, dst_node->current);
1256 dst_node->current = get_inode_rec(dst, current_ino, 1);
1257 BUG_ON(IS_ERR(dst_node->current));
1262 static void free_inode_ptr(struct cache_extent *cache)
1264 struct ptr_node *node;
1265 struct inode_record *rec;
1267 node = container_of(cache, struct ptr_node, cache);
1269 free_inode_rec(rec);
1273 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1275 static struct shared_node *find_shared_node(struct cache_tree *shared,
1278 struct cache_extent *cache;
1279 struct shared_node *node;
1281 cache = lookup_cache_extent(shared, bytenr, 1);
1283 node = container_of(cache, struct shared_node, cache);
1289 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1292 struct shared_node *node;
1294 node = calloc(1, sizeof(*node));
1297 node->cache.start = bytenr;
1298 node->cache.size = 1;
1299 cache_tree_init(&node->root_cache);
1300 cache_tree_init(&node->inode_cache);
1303 ret = insert_cache_extent(shared, &node->cache);
1308 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1309 struct walk_control *wc, int level)
1311 struct shared_node *node;
1312 struct shared_node *dest;
1315 if (level == wc->active_node)
1318 BUG_ON(wc->active_node <= level);
1319 node = find_shared_node(&wc->shared, bytenr);
1321 ret = add_shared_node(&wc->shared, bytenr, refs);
1323 node = find_shared_node(&wc->shared, bytenr);
1324 wc->nodes[level] = node;
1325 wc->active_node = level;
1329 if (wc->root_level == wc->active_node &&
1330 btrfs_root_refs(&root->root_item) == 0) {
1331 if (--node->refs == 0) {
1332 free_inode_recs_tree(&node->root_cache);
1333 free_inode_recs_tree(&node->inode_cache);
1334 remove_cache_extent(&wc->shared, &node->cache);
1340 dest = wc->nodes[wc->active_node];
1341 splice_shared_node(node, dest);
1342 if (node->refs == 0) {
1343 remove_cache_extent(&wc->shared, &node->cache);
1349 static int leave_shared_node(struct btrfs_root *root,
1350 struct walk_control *wc, int level)
1352 struct shared_node *node;
1353 struct shared_node *dest;
1356 if (level == wc->root_level)
1359 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1363 BUG_ON(i >= BTRFS_MAX_LEVEL);
1365 node = wc->nodes[wc->active_node];
1366 wc->nodes[wc->active_node] = NULL;
1367 wc->active_node = i;
1369 dest = wc->nodes[wc->active_node];
1370 if (wc->active_node < wc->root_level ||
1371 btrfs_root_refs(&root->root_item) > 0) {
1372 BUG_ON(node->refs <= 1);
1373 splice_shared_node(node, dest);
1375 BUG_ON(node->refs < 2);
1384 * 1 - if the root with id child_root_id is a child of root parent_root_id
1385 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1386 * has other root(s) as parent(s)
1387 * 2 - if the root child_root_id doesn't have any parent roots
1389 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1392 struct btrfs_path path;
1393 struct btrfs_key key;
1394 struct extent_buffer *leaf;
1398 btrfs_init_path(&path);
1400 key.objectid = parent_root_id;
1401 key.type = BTRFS_ROOT_REF_KEY;
1402 key.offset = child_root_id;
1403 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1407 btrfs_release_path(&path);
1411 key.objectid = child_root_id;
1412 key.type = BTRFS_ROOT_BACKREF_KEY;
1414 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1420 leaf = path.nodes[0];
1421 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1422 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1425 leaf = path.nodes[0];
1428 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1429 if (key.objectid != child_root_id ||
1430 key.type != BTRFS_ROOT_BACKREF_KEY)
1435 if (key.offset == parent_root_id) {
1436 btrfs_release_path(&path);
1443 btrfs_release_path(&path);
1446 return has_parent ? 0 : 2;
1449 static int process_dir_item(struct btrfs_root *root,
1450 struct extent_buffer *eb,
1451 int slot, struct btrfs_key *key,
1452 struct shared_node *active_node)
1462 struct btrfs_dir_item *di;
1463 struct inode_record *rec;
1464 struct cache_tree *root_cache;
1465 struct cache_tree *inode_cache;
1466 struct btrfs_key location;
1467 char namebuf[BTRFS_NAME_LEN];
1469 root_cache = &active_node->root_cache;
1470 inode_cache = &active_node->inode_cache;
1471 rec = active_node->current;
1472 rec->found_dir_item = 1;
1474 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1475 total = btrfs_item_size_nr(eb, slot);
1476 while (cur < total) {
1478 btrfs_dir_item_key_to_cpu(eb, di, &location);
1479 name_len = btrfs_dir_name_len(eb, di);
1480 data_len = btrfs_dir_data_len(eb, di);
1481 filetype = btrfs_dir_type(eb, di);
1483 rec->found_size += name_len;
1484 if (name_len <= BTRFS_NAME_LEN) {
1488 len = BTRFS_NAME_LEN;
1489 error = REF_ERR_NAME_TOO_LONG;
1491 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1493 if (location.type == BTRFS_INODE_ITEM_KEY) {
1494 add_inode_backref(inode_cache, location.objectid,
1495 key->objectid, key->offset, namebuf,
1496 len, filetype, key->type, error);
1497 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1498 add_inode_backref(root_cache, location.objectid,
1499 key->objectid, key->offset,
1500 namebuf, len, filetype,
1503 fprintf(stderr, "invalid location in dir item %u\n",
1505 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1506 key->objectid, key->offset, namebuf,
1507 len, filetype, key->type, error);
1510 len = sizeof(*di) + name_len + data_len;
1511 di = (struct btrfs_dir_item *)((char *)di + len);
1514 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1515 rec->errors |= I_ERR_DUP_DIR_INDEX;
1520 static int process_inode_ref(struct extent_buffer *eb,
1521 int slot, struct btrfs_key *key,
1522 struct shared_node *active_node)
1530 struct cache_tree *inode_cache;
1531 struct btrfs_inode_ref *ref;
1532 char namebuf[BTRFS_NAME_LEN];
1534 inode_cache = &active_node->inode_cache;
1536 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1537 total = btrfs_item_size_nr(eb, slot);
1538 while (cur < total) {
1539 name_len = btrfs_inode_ref_name_len(eb, ref);
1540 index = btrfs_inode_ref_index(eb, ref);
1541 if (name_len <= BTRFS_NAME_LEN) {
1545 len = BTRFS_NAME_LEN;
1546 error = REF_ERR_NAME_TOO_LONG;
1548 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1549 add_inode_backref(inode_cache, key->objectid, key->offset,
1550 index, namebuf, len, 0, key->type, error);
1552 len = sizeof(*ref) + name_len;
1553 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1559 static int process_inode_extref(struct extent_buffer *eb,
1560 int slot, struct btrfs_key *key,
1561 struct shared_node *active_node)
1570 struct cache_tree *inode_cache;
1571 struct btrfs_inode_extref *extref;
1572 char namebuf[BTRFS_NAME_LEN];
1574 inode_cache = &active_node->inode_cache;
1576 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1577 total = btrfs_item_size_nr(eb, slot);
1578 while (cur < total) {
1579 name_len = btrfs_inode_extref_name_len(eb, extref);
1580 index = btrfs_inode_extref_index(eb, extref);
1581 parent = btrfs_inode_extref_parent(eb, extref);
1582 if (name_len <= BTRFS_NAME_LEN) {
1586 len = BTRFS_NAME_LEN;
1587 error = REF_ERR_NAME_TOO_LONG;
1589 read_extent_buffer(eb, namebuf,
1590 (unsigned long)(extref + 1), len);
1591 add_inode_backref(inode_cache, key->objectid, parent,
1592 index, namebuf, len, 0, key->type, error);
1594 len = sizeof(*extref) + name_len;
1595 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1602 static int count_csum_range(struct btrfs_root *root, u64 start,
1603 u64 len, u64 *found)
1605 struct btrfs_key key;
1606 struct btrfs_path path;
1607 struct extent_buffer *leaf;
1612 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1614 btrfs_init_path(&path);
1616 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1618 key.type = BTRFS_EXTENT_CSUM_KEY;
1620 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1624 if (ret > 0 && path.slots[0] > 0) {
1625 leaf = path.nodes[0];
1626 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1627 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1628 key.type == BTRFS_EXTENT_CSUM_KEY)
1633 leaf = path.nodes[0];
1634 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1635 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1640 leaf = path.nodes[0];
1643 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1644 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1645 key.type != BTRFS_EXTENT_CSUM_KEY)
1648 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1649 if (key.offset >= start + len)
1652 if (key.offset > start)
1655 size = btrfs_item_size_nr(leaf, path.slots[0]);
1656 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1657 if (csum_end > start) {
1658 size = min(csum_end - start, len);
1667 btrfs_release_path(&path);
1673 static int process_file_extent(struct btrfs_root *root,
1674 struct extent_buffer *eb,
1675 int slot, struct btrfs_key *key,
1676 struct shared_node *active_node)
1678 struct inode_record *rec;
1679 struct btrfs_file_extent_item *fi;
1681 u64 disk_bytenr = 0;
1682 u64 extent_offset = 0;
1683 u64 mask = root->sectorsize - 1;
1687 rec = active_node->current;
1688 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1689 rec->found_file_extent = 1;
1691 if (rec->extent_start == (u64)-1) {
1692 rec->extent_start = key->offset;
1693 rec->extent_end = key->offset;
1696 if (rec->extent_end > key->offset)
1697 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1698 else if (rec->extent_end < key->offset) {
1699 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1700 key->offset - rec->extent_end);
1705 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1706 extent_type = btrfs_file_extent_type(eb, fi);
1708 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1709 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1711 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1712 rec->found_size += num_bytes;
1713 num_bytes = (num_bytes + mask) & ~mask;
1714 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1715 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1716 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1717 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1718 extent_offset = btrfs_file_extent_offset(eb, fi);
1719 if (num_bytes == 0 || (num_bytes & mask))
1720 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1721 if (num_bytes + extent_offset >
1722 btrfs_file_extent_ram_bytes(eb, fi))
1723 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1725 (btrfs_file_extent_compression(eb, fi) ||
1726 btrfs_file_extent_encryption(eb, fi) ||
1727 btrfs_file_extent_other_encoding(eb, fi)))
1728 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1729 if (disk_bytenr > 0)
1730 rec->found_size += num_bytes;
1732 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734 rec->extent_end = key->offset + num_bytes;
1737 * The data reloc tree will copy full extents into its inode and then
1738 * copy the corresponding csums. Because the extent it copied could be
1739 * a preallocated extent that hasn't been written to yet there may be no
1740 * csums to copy, ergo we won't have csums for our file extent. This is
1741 * ok so just don't bother checking csums if the inode belongs to the
1744 if (disk_bytenr > 0 &&
1745 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1747 if (btrfs_file_extent_compression(eb, fi))
1748 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1750 disk_bytenr += extent_offset;
1752 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1755 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1757 rec->found_csum_item = 1;
1758 if (found < num_bytes)
1759 rec->some_csum_missing = 1;
1760 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1762 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1768 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1769 struct walk_control *wc)
1771 struct btrfs_key key;
1775 struct cache_tree *inode_cache;
1776 struct shared_node *active_node;
1778 if (wc->root_level == wc->active_node &&
1779 btrfs_root_refs(&root->root_item) == 0)
1782 active_node = wc->nodes[wc->active_node];
1783 inode_cache = &active_node->inode_cache;
1784 nritems = btrfs_header_nritems(eb);
1785 for (i = 0; i < nritems; i++) {
1786 btrfs_item_key_to_cpu(eb, &key, i);
1788 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1790 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1793 if (active_node->current == NULL ||
1794 active_node->current->ino < key.objectid) {
1795 if (active_node->current) {
1796 active_node->current->checked = 1;
1797 maybe_free_inode_rec(inode_cache,
1798 active_node->current);
1800 active_node->current = get_inode_rec(inode_cache,
1802 BUG_ON(IS_ERR(active_node->current));
1805 case BTRFS_DIR_ITEM_KEY:
1806 case BTRFS_DIR_INDEX_KEY:
1807 ret = process_dir_item(root, eb, i, &key, active_node);
1809 case BTRFS_INODE_REF_KEY:
1810 ret = process_inode_ref(eb, i, &key, active_node);
1812 case BTRFS_INODE_EXTREF_KEY:
1813 ret = process_inode_extref(eb, i, &key, active_node);
1815 case BTRFS_INODE_ITEM_KEY:
1816 ret = process_inode_item(eb, i, &key, active_node);
1818 case BTRFS_EXTENT_DATA_KEY:
1819 ret = process_file_extent(root, eb, i, &key,
1829 static void reada_walk_down(struct btrfs_root *root,
1830 struct extent_buffer *node, int slot)
1839 level = btrfs_header_level(node);
1843 nritems = btrfs_header_nritems(node);
1844 blocksize = root->nodesize;
1845 for (i = slot; i < nritems; i++) {
1846 bytenr = btrfs_node_blockptr(node, i);
1847 ptr_gen = btrfs_node_ptr_generation(node, i);
1848 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1853 * Check the child node/leaf by the following condition:
1854 * 1. the first item key of the node/leaf should be the same with the one
1856 * 2. block in parent node should match the child node/leaf.
1857 * 3. generation of parent node and child's header should be consistent.
1859 * Or the child node/leaf pointed by the key in parent is not valid.
1861 * We hope to check leaf owner too, but since subvol may share leaves,
1862 * which makes leaf owner check not so strong, key check should be
1863 * sufficient enough for that case.
1865 static int check_child_node(struct btrfs_root *root,
1866 struct extent_buffer *parent, int slot,
1867 struct extent_buffer *child)
1869 struct btrfs_key parent_key;
1870 struct btrfs_key child_key;
1873 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1874 if (btrfs_header_level(child) == 0)
1875 btrfs_item_key_to_cpu(child, &child_key, 0);
1877 btrfs_node_key_to_cpu(child, &child_key, 0);
1879 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1882 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1883 parent_key.objectid, parent_key.type, parent_key.offset,
1884 child_key.objectid, child_key.type, child_key.offset);
1886 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1888 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1889 btrfs_node_blockptr(parent, slot),
1890 btrfs_header_bytenr(child));
1892 if (btrfs_node_ptr_generation(parent, slot) !=
1893 btrfs_header_generation(child)) {
1895 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1896 btrfs_header_generation(child),
1897 btrfs_node_ptr_generation(parent, slot));
1903 u64 bytenr[BTRFS_MAX_LEVEL];
1904 u64 refs[BTRFS_MAX_LEVEL];
1907 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1908 struct walk_control *wc, int *level,
1909 struct node_refs *nrefs)
1911 enum btrfs_tree_block_status status;
1914 struct extent_buffer *next;
1915 struct extent_buffer *cur;
1920 WARN_ON(*level < 0);
1921 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1923 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1924 refs = nrefs->refs[*level];
1927 ret = btrfs_lookup_extent_info(NULL, root,
1928 path->nodes[*level]->start,
1929 *level, 1, &refs, NULL);
1934 nrefs->bytenr[*level] = path->nodes[*level]->start;
1935 nrefs->refs[*level] = refs;
1939 ret = enter_shared_node(root, path->nodes[*level]->start,
1947 while (*level >= 0) {
1948 WARN_ON(*level < 0);
1949 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1950 cur = path->nodes[*level];
1952 if (btrfs_header_level(cur) != *level)
1955 if (path->slots[*level] >= btrfs_header_nritems(cur))
1958 ret = process_one_leaf(root, cur, wc);
1963 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1964 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1965 blocksize = root->nodesize;
1967 if (bytenr == nrefs->bytenr[*level - 1]) {
1968 refs = nrefs->refs[*level - 1];
1970 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1971 *level - 1, 1, &refs, NULL);
1975 nrefs->bytenr[*level - 1] = bytenr;
1976 nrefs->refs[*level - 1] = refs;
1981 ret = enter_shared_node(root, bytenr, refs,
1984 path->slots[*level]++;
1989 next = btrfs_find_tree_block(root, bytenr, blocksize);
1990 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1991 free_extent_buffer(next);
1992 reada_walk_down(root, cur, path->slots[*level]);
1993 next = read_tree_block(root, bytenr, blocksize,
1995 if (!extent_buffer_uptodate(next)) {
1996 struct btrfs_key node_key;
1998 btrfs_node_key_to_cpu(path->nodes[*level],
2000 path->slots[*level]);
2001 btrfs_add_corrupt_extent_record(root->fs_info,
2003 path->nodes[*level]->start,
2004 root->nodesize, *level);
2010 ret = check_child_node(root, cur, path->slots[*level], next);
2016 if (btrfs_is_leaf(next))
2017 status = btrfs_check_leaf(root, NULL, next);
2019 status = btrfs_check_node(root, NULL, next);
2020 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2021 free_extent_buffer(next);
2026 *level = *level - 1;
2027 free_extent_buffer(path->nodes[*level]);
2028 path->nodes[*level] = next;
2029 path->slots[*level] = 0;
2032 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2036 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2037 struct walk_control *wc, int *level)
2040 struct extent_buffer *leaf;
2042 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2043 leaf = path->nodes[i];
2044 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2049 free_extent_buffer(path->nodes[*level]);
2050 path->nodes[*level] = NULL;
2051 BUG_ON(*level > wc->active_node);
2052 if (*level == wc->active_node)
2053 leave_shared_node(root, wc, *level);
2060 static int check_root_dir(struct inode_record *rec)
2062 struct inode_backref *backref;
2065 if (!rec->found_inode_item || rec->errors)
2067 if (rec->nlink != 1 || rec->found_link != 0)
2069 if (list_empty(&rec->backrefs))
2071 backref = to_inode_backref(rec->backrefs.next);
2072 if (!backref->found_inode_ref)
2074 if (backref->index != 0 || backref->namelen != 2 ||
2075 memcmp(backref->name, "..", 2))
2077 if (backref->found_dir_index || backref->found_dir_item)
2084 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2085 struct btrfs_root *root, struct btrfs_path *path,
2086 struct inode_record *rec)
2088 struct btrfs_inode_item *ei;
2089 struct btrfs_key key;
2092 key.objectid = rec->ino;
2093 key.type = BTRFS_INODE_ITEM_KEY;
2094 key.offset = (u64)-1;
2096 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2100 if (!path->slots[0]) {
2107 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2108 if (key.objectid != rec->ino) {
2113 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2114 struct btrfs_inode_item);
2115 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2116 btrfs_mark_buffer_dirty(path->nodes[0]);
2117 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2118 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2119 root->root_key.objectid);
2121 btrfs_release_path(path);
2125 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2126 struct btrfs_root *root,
2127 struct btrfs_path *path,
2128 struct inode_record *rec)
2132 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2133 btrfs_release_path(path);
2135 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2139 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2140 struct btrfs_root *root,
2141 struct btrfs_path *path,
2142 struct inode_record *rec)
2144 struct btrfs_inode_item *ei;
2145 struct btrfs_key key;
2148 key.objectid = rec->ino;
2149 key.type = BTRFS_INODE_ITEM_KEY;
2152 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2159 /* Since ret == 0, no need to check anything */
2160 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2161 struct btrfs_inode_item);
2162 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2163 btrfs_mark_buffer_dirty(path->nodes[0]);
2164 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2165 printf("reset nbytes for ino %llu root %llu\n",
2166 rec->ino, root->root_key.objectid);
2168 btrfs_release_path(path);
2172 static int add_missing_dir_index(struct btrfs_root *root,
2173 struct cache_tree *inode_cache,
2174 struct inode_record *rec,
2175 struct inode_backref *backref)
2177 struct btrfs_path *path;
2178 struct btrfs_trans_handle *trans;
2179 struct btrfs_dir_item *dir_item;
2180 struct extent_buffer *leaf;
2181 struct btrfs_key key;
2182 struct btrfs_disk_key disk_key;
2183 struct inode_record *dir_rec;
2184 unsigned long name_ptr;
2185 u32 data_size = sizeof(*dir_item) + backref->namelen;
2188 path = btrfs_alloc_path();
2192 trans = btrfs_start_transaction(root, 1);
2193 if (IS_ERR(trans)) {
2194 btrfs_free_path(path);
2195 return PTR_ERR(trans);
2198 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2199 (unsigned long long)rec->ino);
2200 key.objectid = backref->dir;
2201 key.type = BTRFS_DIR_INDEX_KEY;
2202 key.offset = backref->index;
2204 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2207 leaf = path->nodes[0];
2208 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2210 disk_key.objectid = cpu_to_le64(rec->ino);
2211 disk_key.type = BTRFS_INODE_ITEM_KEY;
2212 disk_key.offset = 0;
2214 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2215 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2216 btrfs_set_dir_data_len(leaf, dir_item, 0);
2217 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2218 name_ptr = (unsigned long)(dir_item + 1);
2219 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2220 btrfs_mark_buffer_dirty(leaf);
2221 btrfs_free_path(path);
2222 btrfs_commit_transaction(trans, root);
2224 backref->found_dir_index = 1;
2225 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2226 BUG_ON(IS_ERR(dir_rec));
2229 dir_rec->found_size += backref->namelen;
2230 if (dir_rec->found_size == dir_rec->isize &&
2231 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2232 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2233 if (dir_rec->found_size != dir_rec->isize)
2234 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2239 static int delete_dir_index(struct btrfs_root *root,
2240 struct cache_tree *inode_cache,
2241 struct inode_record *rec,
2242 struct inode_backref *backref)
2244 struct btrfs_trans_handle *trans;
2245 struct btrfs_dir_item *di;
2246 struct btrfs_path *path;
2249 path = btrfs_alloc_path();
2253 trans = btrfs_start_transaction(root, 1);
2254 if (IS_ERR(trans)) {
2255 btrfs_free_path(path);
2256 return PTR_ERR(trans);
2260 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261 (unsigned long long)backref->dir,
2262 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263 (unsigned long long)root->objectid);
2265 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2266 backref->name, backref->namelen,
2267 backref->index, -1);
2270 btrfs_free_path(path);
2271 btrfs_commit_transaction(trans, root);
2278 ret = btrfs_del_item(trans, root, path);
2280 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2282 btrfs_free_path(path);
2283 btrfs_commit_transaction(trans, root);
2287 static int create_inode_item(struct btrfs_root *root,
2288 struct inode_record *rec,
2289 struct inode_backref *backref, int root_dir)
2291 struct btrfs_trans_handle *trans;
2292 struct btrfs_inode_item inode_item;
2293 time_t now = time(NULL);
2296 trans = btrfs_start_transaction(root, 1);
2297 if (IS_ERR(trans)) {
2298 ret = PTR_ERR(trans);
2302 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2303 "be incomplete, please check permissions and content after "
2304 "the fsck completes.\n", (unsigned long long)root->objectid,
2305 (unsigned long long)rec->ino);
2307 memset(&inode_item, 0, sizeof(inode_item));
2308 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2310 btrfs_set_stack_inode_nlink(&inode_item, 1);
2312 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2313 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2314 if (rec->found_dir_item) {
2315 if (rec->found_file_extent)
2316 fprintf(stderr, "root %llu inode %llu has both a dir "
2317 "item and extents, unsure if it is a dir or a "
2318 "regular file so setting it as a directory\n",
2319 (unsigned long long)root->objectid,
2320 (unsigned long long)rec->ino);
2321 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2322 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2323 } else if (!rec->found_dir_item) {
2324 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2325 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2327 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2328 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2329 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2330 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2331 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2332 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2333 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2334 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2336 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2338 btrfs_commit_transaction(trans, root);
2342 static int repair_inode_backrefs(struct btrfs_root *root,
2343 struct inode_record *rec,
2344 struct cache_tree *inode_cache,
2347 struct inode_backref *tmp, *backref;
2348 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2352 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2353 if (!delete && rec->ino == root_dirid) {
2354 if (!rec->found_inode_item) {
2355 ret = create_inode_item(root, rec, backref, 1);
2362 /* Index 0 for root dir's are special, don't mess with it */
2363 if (rec->ino == root_dirid && backref->index == 0)
2367 ((backref->found_dir_index && !backref->found_inode_ref) ||
2368 (backref->found_dir_index && backref->found_inode_ref &&
2369 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2370 ret = delete_dir_index(root, inode_cache, rec, backref);
2374 list_del(&backref->list);
2378 if (!delete && !backref->found_dir_index &&
2379 backref->found_dir_item && backref->found_inode_ref) {
2380 ret = add_missing_dir_index(root, inode_cache, rec,
2385 if (backref->found_dir_item &&
2386 backref->found_dir_index &&
2387 backref->found_dir_index) {
2388 if (!backref->errors &&
2389 backref->found_inode_ref) {
2390 list_del(&backref->list);
2396 if (!delete && (!backref->found_dir_index &&
2397 !backref->found_dir_item &&
2398 backref->found_inode_ref)) {
2399 struct btrfs_trans_handle *trans;
2400 struct btrfs_key location;
2402 ret = check_dir_conflict(root, backref->name,
2408 * let nlink fixing routine to handle it,
2409 * which can do it better.
2414 location.objectid = rec->ino;
2415 location.type = BTRFS_INODE_ITEM_KEY;
2416 location.offset = 0;
2418 trans = btrfs_start_transaction(root, 1);
2419 if (IS_ERR(trans)) {
2420 ret = PTR_ERR(trans);
2423 fprintf(stderr, "adding missing dir index/item pair "
2425 (unsigned long long)rec->ino);
2426 ret = btrfs_insert_dir_item(trans, root, backref->name,
2428 backref->dir, &location,
2429 imode_to_type(rec->imode),
2432 btrfs_commit_transaction(trans, root);
2436 if (!delete && (backref->found_inode_ref &&
2437 backref->found_dir_index &&
2438 backref->found_dir_item &&
2439 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2440 !rec->found_inode_item)) {
2441 ret = create_inode_item(root, rec, backref, 0);
2448 return ret ? ret : repaired;
2452 * To determine the file type for nlink/inode_item repair
2454 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2455 * Return -ENOENT if file type is not found.
2457 static int find_file_type(struct inode_record *rec, u8 *type)
2459 struct inode_backref *backref;
2461 /* For inode item recovered case */
2462 if (rec->found_inode_item) {
2463 *type = imode_to_type(rec->imode);
2467 list_for_each_entry(backref, &rec->backrefs, list) {
2468 if (backref->found_dir_index || backref->found_dir_item) {
2469 *type = backref->filetype;
2477 * To determine the file name for nlink repair
2479 * Return 0 if file name is found, set name and namelen.
2480 * Return -ENOENT if file name is not found.
2482 static int find_file_name(struct inode_record *rec,
2483 char *name, int *namelen)
2485 struct inode_backref *backref;
2487 list_for_each_entry(backref, &rec->backrefs, list) {
2488 if (backref->found_dir_index || backref->found_dir_item ||
2489 backref->found_inode_ref) {
2490 memcpy(name, backref->name, backref->namelen);
2491 *namelen = backref->namelen;
2498 /* Reset the nlink of the inode to the correct one */
2499 static int reset_nlink(struct btrfs_trans_handle *trans,
2500 struct btrfs_root *root,
2501 struct btrfs_path *path,
2502 struct inode_record *rec)
2504 struct inode_backref *backref;
2505 struct inode_backref *tmp;
2506 struct btrfs_key key;
2507 struct btrfs_inode_item *inode_item;
2510 /* We don't believe this either, reset it and iterate backref */
2511 rec->found_link = 0;
2513 /* Remove all backref including the valid ones */
2514 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2515 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2516 backref->index, backref->name,
2517 backref->namelen, 0);
2521 /* remove invalid backref, so it won't be added back */
2522 if (!(backref->found_dir_index &&
2523 backref->found_dir_item &&
2524 backref->found_inode_ref)) {
2525 list_del(&backref->list);
2532 /* Set nlink to 0 */
2533 key.objectid = rec->ino;
2534 key.type = BTRFS_INODE_ITEM_KEY;
2536 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2543 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2544 struct btrfs_inode_item);
2545 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2546 btrfs_mark_buffer_dirty(path->nodes[0]);
2547 btrfs_release_path(path);
2550 * Add back valid inode_ref/dir_item/dir_index,
2551 * add_link() will handle the nlink inc, so new nlink must be correct
2553 list_for_each_entry(backref, &rec->backrefs, list) {
2554 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2555 backref->name, backref->namelen,
2556 backref->filetype, &backref->index, 1);
2561 btrfs_release_path(path);
2565 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2566 struct btrfs_root *root,
2567 struct btrfs_path *path,
2568 struct inode_record *rec)
2570 char *dir_name = "lost+found";
2571 char namebuf[BTRFS_NAME_LEN] = {0};
2576 int name_recovered = 0;
2577 int type_recovered = 0;
2581 * Get file name and type first before these invalid inode ref
2582 * are deleted by remove_all_invalid_backref()
2584 name_recovered = !find_file_name(rec, namebuf, &namelen);
2585 type_recovered = !find_file_type(rec, &type);
2587 if (!name_recovered) {
2588 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2589 rec->ino, rec->ino);
2590 namelen = count_digits(rec->ino);
2591 sprintf(namebuf, "%llu", rec->ino);
2594 if (!type_recovered) {
2595 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2597 type = BTRFS_FT_REG_FILE;
2601 ret = reset_nlink(trans, root, path, rec);
2604 "Failed to reset nlink for inode %llu: %s\n",
2605 rec->ino, strerror(-ret));
2609 if (rec->found_link == 0) {
2610 lost_found_ino = root->highest_inode;
2611 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2616 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2617 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2620 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2621 dir_name, strerror(-ret));
2624 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2625 namebuf, namelen, type, NULL, 1);
2627 * Add ".INO" suffix several times to handle case where
2628 * "FILENAME.INO" is already taken by another file.
2630 while (ret == -EEXIST) {
2632 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2634 if (namelen + count_digits(rec->ino) + 1 >
2639 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2641 namelen += count_digits(rec->ino) + 1;
2642 ret = btrfs_add_link(trans, root, rec->ino,
2643 lost_found_ino, namebuf,
2644 namelen, type, NULL, 1);
2648 "Failed to link the inode %llu to %s dir: %s\n",
2649 rec->ino, dir_name, strerror(-ret));
2653 * Just increase the found_link, don't actually add the
2654 * backref. This will make things easier and this inode
2655 * record will be freed after the repair is done.
2656 * So fsck will not report problem about this inode.
2659 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2660 namelen, namebuf, dir_name);
2662 printf("Fixed the nlink of inode %llu\n", rec->ino);
2665 * Clear the flag anyway, or we will loop forever for the same inode
2666 * as it will not be removed from the bad inode list and the dead loop
2669 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2670 btrfs_release_path(path);
2675 * Check if there is any normal(reg or prealloc) file extent for given
2677 * This is used to determine the file type when neither its dir_index/item or
2678 * inode_item exists.
2680 * This will *NOT* report error, if any error happens, just consider it does
2681 * not have any normal file extent.
2683 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2685 struct btrfs_path *path;
2686 struct btrfs_key key;
2687 struct btrfs_key found_key;
2688 struct btrfs_file_extent_item *fi;
2692 path = btrfs_alloc_path();
2696 key.type = BTRFS_EXTENT_DATA_KEY;
2699 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2704 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2705 ret = btrfs_next_leaf(root, path);
2712 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2714 if (found_key.objectid != ino ||
2715 found_key.type != BTRFS_EXTENT_DATA_KEY)
2717 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2718 struct btrfs_file_extent_item);
2719 type = btrfs_file_extent_type(path->nodes[0], fi);
2720 if (type != BTRFS_FILE_EXTENT_INLINE) {
2726 btrfs_free_path(path);
2730 static u32 btrfs_type_to_imode(u8 type)
2732 static u32 imode_by_btrfs_type[] = {
2733 [BTRFS_FT_REG_FILE] = S_IFREG,
2734 [BTRFS_FT_DIR] = S_IFDIR,
2735 [BTRFS_FT_CHRDEV] = S_IFCHR,
2736 [BTRFS_FT_BLKDEV] = S_IFBLK,
2737 [BTRFS_FT_FIFO] = S_IFIFO,
2738 [BTRFS_FT_SOCK] = S_IFSOCK,
2739 [BTRFS_FT_SYMLINK] = S_IFLNK,
2742 return imode_by_btrfs_type[(type)];
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746 struct btrfs_root *root,
2747 struct btrfs_path *path,
2748 struct inode_record *rec)
2752 int type_recovered = 0;
2755 printf("Trying to rebuild inode:%llu\n", rec->ino);
2757 type_recovered = !find_file_type(rec, &filetype);
2760 * Try to determine inode type if type not found.
2762 * For found regular file extent, it must be FILE.
2763 * For found dir_item/index, it must be DIR.
2765 * For undetermined one, use FILE as fallback.
2768 * 1. If found backref(inode_index/item is already handled) to it,
2770 * Need new inode-inode ref structure to allow search for that.
2772 if (!type_recovered) {
2773 if (rec->found_file_extent &&
2774 find_normal_file_extent(root, rec->ino)) {
2776 filetype = BTRFS_FT_REG_FILE;
2777 } else if (rec->found_dir_item) {
2779 filetype = BTRFS_FT_DIR;
2780 } else if (!list_empty(&rec->orphan_extents)) {
2782 filetype = BTRFS_FT_REG_FILE;
2784 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2787 filetype = BTRFS_FT_REG_FILE;
2791 ret = btrfs_new_inode(trans, root, rec->ino,
2792 mode | btrfs_type_to_imode(filetype));
2797 * Here inode rebuild is done, we only rebuild the inode item,
2798 * don't repair the nlink(like move to lost+found).
2799 * That is the job of nlink repair.
2801 * We just fill the record and return
2803 rec->found_dir_item = 1;
2804 rec->imode = mode | btrfs_type_to_imode(filetype);
2806 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807 /* Ensure the inode_nlinks repair function will be called */
2808 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct orphan_data_extent *orphan;
2819 struct orphan_data_extent *tmp;
2822 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824 * Check for conflicting file extents
2826 * Here we don't know whether the extents is compressed or not,
2827 * so we can only assume it not compressed nor data offset,
2828 * and use its disk_len as extent length.
2830 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831 orphan->offset, orphan->disk_len, 0);
2832 btrfs_release_path(path);
2837 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838 orphan->disk_bytenr, orphan->disk_len);
2839 ret = btrfs_free_extent(trans,
2840 root->fs_info->extent_root,
2841 orphan->disk_bytenr, orphan->disk_len,
2842 0, root->objectid, orphan->objectid,
2847 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848 orphan->offset, orphan->disk_bytenr,
2849 orphan->disk_len, orphan->disk_len);
2853 /* Update file size info */
2854 rec->found_size += orphan->disk_len;
2855 if (rec->found_size == rec->nbytes)
2856 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858 /* Update the file extent hole info too */
2859 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2863 if (RB_EMPTY_ROOT(&rec->holes))
2864 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866 list_del(&orphan->list);
2869 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 struct btrfs_path *path,
2877 struct inode_record *rec)
2879 struct rb_node *node;
2880 struct file_extent_hole *hole;
2884 node = rb_first(&rec->holes);
2888 hole = rb_entry(node, struct file_extent_hole, node);
2889 ret = btrfs_punch_hole(trans, root, rec->ino,
2890 hole->start, hole->len);
2893 ret = del_file_extent_hole(&rec->holes, hole->start,
2897 if (RB_EMPTY_ROOT(&rec->holes))
2898 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899 node = rb_first(&rec->holes);
2901 /* special case for a file losing all its file extent */
2903 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904 round_up(rec->isize, root->sectorsize));
2908 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909 rec->ino, root->objectid);
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 struct btrfs_trans_handle *trans;
2917 struct btrfs_path *path;
2920 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921 I_ERR_NO_ORPHAN_ITEM |
2922 I_ERR_LINK_COUNT_WRONG |
2923 I_ERR_NO_INODE_ITEM |
2924 I_ERR_FILE_EXTENT_ORPHAN |
2925 I_ERR_FILE_EXTENT_DISCOUNT|
2926 I_ERR_FILE_NBYTES_WRONG)))
2929 path = btrfs_alloc_path();
2934 * For nlink repair, it may create a dir and add link, so
2935 * 2 for parent(256)'s dir_index and dir_item
2936 * 2 for lost+found dir's inode_item and inode_ref
2937 * 1 for the new inode_ref of the file
2938 * 2 for lost+found dir's dir_index and dir_item for the file
2940 trans = btrfs_start_transaction(root, 7);
2941 if (IS_ERR(trans)) {
2942 btrfs_free_path(path);
2943 return PTR_ERR(trans);
2946 if (rec->errors & I_ERR_NO_INODE_ITEM)
2947 ret = repair_inode_no_item(trans, root, path, rec);
2948 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2949 ret = repair_inode_orphan_extent(trans, root, path, rec);
2950 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2951 ret = repair_inode_discount_extent(trans, root, path, rec);
2952 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2953 ret = repair_inode_isize(trans, root, path, rec);
2954 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2955 ret = repair_inode_orphan_item(trans, root, path, rec);
2956 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2957 ret = repair_inode_nlinks(trans, root, path, rec);
2958 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2959 ret = repair_inode_nbytes(trans, root, path, rec);
2960 btrfs_commit_transaction(trans, root);
2961 btrfs_free_path(path);
2965 static int check_inode_recs(struct btrfs_root *root,
2966 struct cache_tree *inode_cache)
2968 struct cache_extent *cache;
2969 struct ptr_node *node;
2970 struct inode_record *rec;
2971 struct inode_backref *backref;
2976 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2978 if (btrfs_root_refs(&root->root_item) == 0) {
2979 if (!cache_tree_empty(inode_cache))
2980 fprintf(stderr, "warning line %d\n", __LINE__);
2985 * We need to record the highest inode number for later 'lost+found'
2987 * We must select an ino not used/referred by any existing inode, or
2988 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2989 * this may cause 'lost+found' dir has wrong nlinks.
2991 cache = last_cache_extent(inode_cache);
2993 node = container_of(cache, struct ptr_node, cache);
2995 if (rec->ino > root->highest_inode)
2996 root->highest_inode = rec->ino;
3000 * We need to repair backrefs first because we could change some of the
3001 * errors in the inode recs.
3003 * We also need to go through and delete invalid backrefs first and then
3004 * add the correct ones second. We do this because we may get EEXIST
3005 * when adding back the correct index because we hadn't yet deleted the
3008 * For example, if we were missing a dir index then the directories
3009 * isize would be wrong, so if we fixed the isize to what we thought it
3010 * would be and then fixed the backref we'd still have a invalid fs, so
3011 * we need to add back the dir index and then check to see if the isize
3016 if (stage == 3 && !err)
3019 cache = search_cache_extent(inode_cache, 0);
3020 while (repair && cache) {
3021 node = container_of(cache, struct ptr_node, cache);
3023 cache = next_cache_extent(cache);
3025 /* Need to free everything up and rescan */
3027 remove_cache_extent(inode_cache, &node->cache);
3029 free_inode_rec(rec);
3033 if (list_empty(&rec->backrefs))
3036 ret = repair_inode_backrefs(root, rec, inode_cache,
3050 rec = get_inode_rec(inode_cache, root_dirid, 0);
3051 BUG_ON(IS_ERR(rec));
3053 ret = check_root_dir(rec);
3055 fprintf(stderr, "root %llu root dir %llu error\n",
3056 (unsigned long long)root->root_key.objectid,
3057 (unsigned long long)root_dirid);
3058 print_inode_error(root, rec);
3063 struct btrfs_trans_handle *trans;
3065 trans = btrfs_start_transaction(root, 1);
3066 if (IS_ERR(trans)) {
3067 err = PTR_ERR(trans);
3072 "root %llu missing its root dir, recreating\n",
3073 (unsigned long long)root->objectid);
3075 ret = btrfs_make_root_dir(trans, root, root_dirid);
3078 btrfs_commit_transaction(trans, root);
3082 fprintf(stderr, "root %llu root dir %llu not found\n",
3083 (unsigned long long)root->root_key.objectid,
3084 (unsigned long long)root_dirid);
3088 cache = search_cache_extent(inode_cache, 0);
3091 node = container_of(cache, struct ptr_node, cache);
3093 remove_cache_extent(inode_cache, &node->cache);
3095 if (rec->ino == root_dirid ||
3096 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3097 free_inode_rec(rec);
3101 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3102 ret = check_orphan_item(root, rec->ino);
3104 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3105 if (can_free_inode_rec(rec)) {
3106 free_inode_rec(rec);
3111 if (!rec->found_inode_item)
3112 rec->errors |= I_ERR_NO_INODE_ITEM;
3113 if (rec->found_link != rec->nlink)
3114 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3116 ret = try_repair_inode(root, rec);
3117 if (ret == 0 && can_free_inode_rec(rec)) {
3118 free_inode_rec(rec);
3124 if (!(repair && ret == 0))
3126 print_inode_error(root, rec);
3127 list_for_each_entry(backref, &rec->backrefs, list) {
3128 if (!backref->found_dir_item)
3129 backref->errors |= REF_ERR_NO_DIR_ITEM;
3130 if (!backref->found_dir_index)
3131 backref->errors |= REF_ERR_NO_DIR_INDEX;
3132 if (!backref->found_inode_ref)
3133 backref->errors |= REF_ERR_NO_INODE_REF;
3134 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3135 " namelen %u name %s filetype %d errors %x",
3136 (unsigned long long)backref->dir,
3137 (unsigned long long)backref->index,
3138 backref->namelen, backref->name,
3139 backref->filetype, backref->errors);
3140 print_ref_error(backref->errors);
3142 free_inode_rec(rec);
3144 return (error > 0) ? -1 : 0;
3147 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3150 struct cache_extent *cache;
3151 struct root_record *rec = NULL;
3154 cache = lookup_cache_extent(root_cache, objectid, 1);
3156 rec = container_of(cache, struct root_record, cache);
3158 rec = calloc(1, sizeof(*rec));
3160 return ERR_PTR(-ENOMEM);
3161 rec->objectid = objectid;
3162 INIT_LIST_HEAD(&rec->backrefs);
3163 rec->cache.start = objectid;
3164 rec->cache.size = 1;
3166 ret = insert_cache_extent(root_cache, &rec->cache);
3168 return ERR_PTR(-EEXIST);
3173 static struct root_backref *get_root_backref(struct root_record *rec,
3174 u64 ref_root, u64 dir, u64 index,
3175 const char *name, int namelen)
3177 struct root_backref *backref;
3179 list_for_each_entry(backref, &rec->backrefs, list) {
3180 if (backref->ref_root != ref_root || backref->dir != dir ||
3181 backref->namelen != namelen)
3183 if (memcmp(name, backref->name, namelen))
3188 backref = calloc(1, sizeof(*backref) + namelen + 1);
3191 backref->ref_root = ref_root;
3193 backref->index = index;
3194 backref->namelen = namelen;
3195 memcpy(backref->name, name, namelen);
3196 backref->name[namelen] = '\0';
3197 list_add_tail(&backref->list, &rec->backrefs);
3201 static void free_root_record(struct cache_extent *cache)
3203 struct root_record *rec;
3204 struct root_backref *backref;
3206 rec = container_of(cache, struct root_record, cache);
3207 while (!list_empty(&rec->backrefs)) {
3208 backref = to_root_backref(rec->backrefs.next);
3209 list_del(&backref->list);
3216 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3218 static int add_root_backref(struct cache_tree *root_cache,
3219 u64 root_id, u64 ref_root, u64 dir, u64 index,
3220 const char *name, int namelen,
3221 int item_type, int errors)
3223 struct root_record *rec;
3224 struct root_backref *backref;
3226 rec = get_root_rec(root_cache, root_id);
3227 BUG_ON(IS_ERR(rec));
3228 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3231 backref->errors |= errors;
3233 if (item_type != BTRFS_DIR_ITEM_KEY) {
3234 if (backref->found_dir_index || backref->found_back_ref ||
3235 backref->found_forward_ref) {
3236 if (backref->index != index)
3237 backref->errors |= REF_ERR_INDEX_UNMATCH;
3239 backref->index = index;
3243 if (item_type == BTRFS_DIR_ITEM_KEY) {
3244 if (backref->found_forward_ref)
3246 backref->found_dir_item = 1;
3247 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3248 backref->found_dir_index = 1;
3249 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3250 if (backref->found_forward_ref)
3251 backref->errors |= REF_ERR_DUP_ROOT_REF;
3252 else if (backref->found_dir_item)
3254 backref->found_forward_ref = 1;
3255 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3256 if (backref->found_back_ref)
3257 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3258 backref->found_back_ref = 1;
3263 if (backref->found_forward_ref && backref->found_dir_item)
3264 backref->reachable = 1;
3268 static int merge_root_recs(struct btrfs_root *root,
3269 struct cache_tree *src_cache,
3270 struct cache_tree *dst_cache)
3272 struct cache_extent *cache;
3273 struct ptr_node *node;
3274 struct inode_record *rec;
3275 struct inode_backref *backref;
3278 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3279 free_inode_recs_tree(src_cache);
3284 cache = search_cache_extent(src_cache, 0);
3287 node = container_of(cache, struct ptr_node, cache);
3289 remove_cache_extent(src_cache, &node->cache);
3292 ret = is_child_root(root, root->objectid, rec->ino);
3298 list_for_each_entry(backref, &rec->backrefs, list) {
3299 BUG_ON(backref->found_inode_ref);
3300 if (backref->found_dir_item)
3301 add_root_backref(dst_cache, rec->ino,
3302 root->root_key.objectid, backref->dir,
3303 backref->index, backref->name,
3304 backref->namelen, BTRFS_DIR_ITEM_KEY,
3306 if (backref->found_dir_index)
3307 add_root_backref(dst_cache, rec->ino,
3308 root->root_key.objectid, backref->dir,
3309 backref->index, backref->name,
3310 backref->namelen, BTRFS_DIR_INDEX_KEY,
3314 free_inode_rec(rec);
3321 static int check_root_refs(struct btrfs_root *root,
3322 struct cache_tree *root_cache)
3324 struct root_record *rec;
3325 struct root_record *ref_root;
3326 struct root_backref *backref;
3327 struct cache_extent *cache;
3333 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3334 BUG_ON(IS_ERR(rec));
3337 /* fixme: this can not detect circular references */
3340 cache = search_cache_extent(root_cache, 0);
3344 rec = container_of(cache, struct root_record, cache);
3345 cache = next_cache_extent(cache);
3347 if (rec->found_ref == 0)
3350 list_for_each_entry(backref, &rec->backrefs, list) {
3351 if (!backref->reachable)
3354 ref_root = get_root_rec(root_cache,
3356 BUG_ON(IS_ERR(ref_root));
3357 if (ref_root->found_ref > 0)
3360 backref->reachable = 0;
3362 if (rec->found_ref == 0)
3368 cache = search_cache_extent(root_cache, 0);
3372 rec = container_of(cache, struct root_record, cache);
3373 cache = next_cache_extent(cache);
3375 if (rec->found_ref == 0 &&
3376 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3377 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3378 ret = check_orphan_item(root->fs_info->tree_root,
3384 * If we don't have a root item then we likely just have
3385 * a dir item in a snapshot for this root but no actual
3386 * ref key or anything so it's meaningless.
3388 if (!rec->found_root_item)
3391 fprintf(stderr, "fs tree %llu not referenced\n",
3392 (unsigned long long)rec->objectid);
3396 if (rec->found_ref > 0 && !rec->found_root_item)
3398 list_for_each_entry(backref, &rec->backrefs, list) {
3399 if (!backref->found_dir_item)
3400 backref->errors |= REF_ERR_NO_DIR_ITEM;
3401 if (!backref->found_dir_index)
3402 backref->errors |= REF_ERR_NO_DIR_INDEX;
3403 if (!backref->found_back_ref)
3404 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3405 if (!backref->found_forward_ref)
3406 backref->errors |= REF_ERR_NO_ROOT_REF;
3407 if (backref->reachable && backref->errors)
3414 fprintf(stderr, "fs tree %llu refs %u %s\n",
3415 (unsigned long long)rec->objectid, rec->found_ref,
3416 rec->found_root_item ? "" : "not found");
3418 list_for_each_entry(backref, &rec->backrefs, list) {
3419 if (!backref->reachable)
3421 if (!backref->errors && rec->found_root_item)
3423 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3424 " index %llu namelen %u name %s errors %x\n",
3425 (unsigned long long)backref->ref_root,
3426 (unsigned long long)backref->dir,
3427 (unsigned long long)backref->index,
3428 backref->namelen, backref->name,
3430 print_ref_error(backref->errors);
3433 return errors > 0 ? 1 : 0;
3436 static int process_root_ref(struct extent_buffer *eb, int slot,
3437 struct btrfs_key *key,
3438 struct cache_tree *root_cache)
3444 struct btrfs_root_ref *ref;
3445 char namebuf[BTRFS_NAME_LEN];
3448 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3450 dirid = btrfs_root_ref_dirid(eb, ref);
3451 index = btrfs_root_ref_sequence(eb, ref);
3452 name_len = btrfs_root_ref_name_len(eb, ref);
3454 if (name_len <= BTRFS_NAME_LEN) {
3458 len = BTRFS_NAME_LEN;
3459 error = REF_ERR_NAME_TOO_LONG;
3461 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3463 if (key->type == BTRFS_ROOT_REF_KEY) {
3464 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3465 index, namebuf, len, key->type, error);
3467 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3468 index, namebuf, len, key->type, error);
3473 static void free_corrupt_block(struct cache_extent *cache)
3475 struct btrfs_corrupt_block *corrupt;
3477 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3481 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3484 * Repair the btree of the given root.
3486 * The fix is to remove the node key in corrupt_blocks cache_tree.
3487 * and rebalance the tree.
3488 * After the fix, the btree should be writeable.
3490 static int repair_btree(struct btrfs_root *root,
3491 struct cache_tree *corrupt_blocks)
3493 struct btrfs_trans_handle *trans;
3494 struct btrfs_path *path;
3495 struct btrfs_corrupt_block *corrupt;
3496 struct cache_extent *cache;
3497 struct btrfs_key key;
3502 if (cache_tree_empty(corrupt_blocks))
3505 path = btrfs_alloc_path();
3509 trans = btrfs_start_transaction(root, 1);
3510 if (IS_ERR(trans)) {
3511 ret = PTR_ERR(trans);
3512 fprintf(stderr, "Error starting transaction: %s\n",
3516 cache = first_cache_extent(corrupt_blocks);
3518 corrupt = container_of(cache, struct btrfs_corrupt_block,
3520 level = corrupt->level;
3521 path->lowest_level = level;
3522 key.objectid = corrupt->key.objectid;
3523 key.type = corrupt->key.type;
3524 key.offset = corrupt->key.offset;
3527 * Here we don't want to do any tree balance, since it may
3528 * cause a balance with corrupted brother leaf/node,
3529 * so ins_len set to 0 here.
3530 * Balance will be done after all corrupt node/leaf is deleted.
3532 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3535 offset = btrfs_node_blockptr(path->nodes[level],
3536 path->slots[level]);
3538 /* Remove the ptr */
3539 ret = btrfs_del_ptr(trans, root, path, level,
3540 path->slots[level]);
3544 * Remove the corresponding extent
3545 * return value is not concerned.
3547 btrfs_release_path(path);
3548 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3549 0, root->root_key.objectid,
3551 cache = next_cache_extent(cache);
3554 /* Balance the btree using btrfs_search_slot() */
3555 cache = first_cache_extent(corrupt_blocks);
3557 corrupt = container_of(cache, struct btrfs_corrupt_block,
3559 memcpy(&key, &corrupt->key, sizeof(key));
3560 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3563 /* return will always >0 since it won't find the item */
3565 btrfs_release_path(path);
3566 cache = next_cache_extent(cache);
3569 btrfs_commit_transaction(trans, root);
3571 btrfs_free_path(path);
3575 static int check_fs_root(struct btrfs_root *root,
3576 struct cache_tree *root_cache,
3577 struct walk_control *wc)
3583 struct btrfs_path path;
3584 struct shared_node root_node;
3585 struct root_record *rec;
3586 struct btrfs_root_item *root_item = &root->root_item;
3587 struct cache_tree corrupt_blocks;
3588 struct orphan_data_extent *orphan;
3589 struct orphan_data_extent *tmp;
3590 enum btrfs_tree_block_status status;
3591 struct node_refs nrefs;
3594 * Reuse the corrupt_block cache tree to record corrupted tree block
3596 * Unlike the usage in extent tree check, here we do it in a per
3597 * fs/subvol tree base.
3599 cache_tree_init(&corrupt_blocks);
3600 root->fs_info->corrupt_blocks = &corrupt_blocks;
3602 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3603 rec = get_root_rec(root_cache, root->root_key.objectid);
3604 BUG_ON(IS_ERR(rec));
3605 if (btrfs_root_refs(root_item) > 0)
3606 rec->found_root_item = 1;
3609 btrfs_init_path(&path);
3610 memset(&root_node, 0, sizeof(root_node));
3611 cache_tree_init(&root_node.root_cache);
3612 cache_tree_init(&root_node.inode_cache);
3613 memset(&nrefs, 0, sizeof(nrefs));
3615 /* Move the orphan extent record to corresponding inode_record */
3616 list_for_each_entry_safe(orphan, tmp,
3617 &root->orphan_data_extents, list) {
3618 struct inode_record *inode;
3620 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3622 BUG_ON(IS_ERR(inode));
3623 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3624 list_move(&orphan->list, &inode->orphan_extents);
3627 level = btrfs_header_level(root->node);
3628 memset(wc->nodes, 0, sizeof(wc->nodes));
3629 wc->nodes[level] = &root_node;
3630 wc->active_node = level;
3631 wc->root_level = level;
3633 /* We may not have checked the root block, lets do that now */
3634 if (btrfs_is_leaf(root->node))
3635 status = btrfs_check_leaf(root, NULL, root->node);
3637 status = btrfs_check_node(root, NULL, root->node);
3638 if (status != BTRFS_TREE_BLOCK_CLEAN)
3641 if (btrfs_root_refs(root_item) > 0 ||
3642 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3643 path.nodes[level] = root->node;
3644 extent_buffer_get(root->node);
3645 path.slots[level] = 0;
3647 struct btrfs_key key;
3648 struct btrfs_disk_key found_key;
3650 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3651 level = root_item->drop_level;
3652 path.lowest_level = level;
3653 if (level > btrfs_header_level(root->node) ||
3654 level >= BTRFS_MAX_LEVEL) {
3655 error("ignoring invalid drop level: %u", level);
3658 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3661 btrfs_node_key(path.nodes[level], &found_key,
3663 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3664 sizeof(found_key)));
3668 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3674 wret = walk_up_tree(root, &path, wc, &level);
3681 btrfs_release_path(&path);
3683 if (!cache_tree_empty(&corrupt_blocks)) {
3684 struct cache_extent *cache;
3685 struct btrfs_corrupt_block *corrupt;
3687 printf("The following tree block(s) is corrupted in tree %llu:\n",
3688 root->root_key.objectid);
3689 cache = first_cache_extent(&corrupt_blocks);
3691 corrupt = container_of(cache,
3692 struct btrfs_corrupt_block,
3694 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3695 cache->start, corrupt->level,
3696 corrupt->key.objectid, corrupt->key.type,
3697 corrupt->key.offset);
3698 cache = next_cache_extent(cache);
3701 printf("Try to repair the btree for root %llu\n",
3702 root->root_key.objectid);
3703 ret = repair_btree(root, &corrupt_blocks);
3705 fprintf(stderr, "Failed to repair btree: %s\n",
3708 printf("Btree for root %llu is fixed\n",
3709 root->root_key.objectid);
3713 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3717 if (root_node.current) {
3718 root_node.current->checked = 1;
3719 maybe_free_inode_rec(&root_node.inode_cache,
3723 err = check_inode_recs(root, &root_node.inode_cache);
3727 free_corrupt_blocks_tree(&corrupt_blocks);
3728 root->fs_info->corrupt_blocks = NULL;
3729 free_orphan_data_extents(&root->orphan_data_extents);
3733 static int fs_root_objectid(u64 objectid)
3735 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3736 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3738 return is_fstree(objectid);
3741 static int check_fs_roots(struct btrfs_root *root,
3742 struct cache_tree *root_cache)
3744 struct btrfs_path path;
3745 struct btrfs_key key;
3746 struct walk_control wc;
3747 struct extent_buffer *leaf, *tree_node;
3748 struct btrfs_root *tmp_root;
3749 struct btrfs_root *tree_root = root->fs_info->tree_root;
3753 if (ctx.progress_enabled) {
3754 ctx.tp = TASK_FS_ROOTS;
3755 task_start(ctx.info);
3759 * Just in case we made any changes to the extent tree that weren't
3760 * reflected into the free space cache yet.
3763 reset_cached_block_groups(root->fs_info);
3764 memset(&wc, 0, sizeof(wc));
3765 cache_tree_init(&wc.shared);
3766 btrfs_init_path(&path);
3771 key.type = BTRFS_ROOT_ITEM_KEY;
3772 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3777 tree_node = tree_root->node;
3779 if (tree_node != tree_root->node) {
3780 free_root_recs_tree(root_cache);
3781 btrfs_release_path(&path);
3784 leaf = path.nodes[0];
3785 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3786 ret = btrfs_next_leaf(tree_root, &path);
3792 leaf = path.nodes[0];
3794 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3795 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3796 fs_root_objectid(key.objectid)) {
3797 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3798 tmp_root = btrfs_read_fs_root_no_cache(
3799 root->fs_info, &key);
3801 key.offset = (u64)-1;
3802 tmp_root = btrfs_read_fs_root(
3803 root->fs_info, &key);
3805 if (IS_ERR(tmp_root)) {
3809 ret = check_fs_root(tmp_root, root_cache, &wc);
3810 if (ret == -EAGAIN) {
3811 free_root_recs_tree(root_cache);
3812 btrfs_release_path(&path);
3817 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3818 btrfs_free_fs_root(tmp_root);
3819 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3820 key.type == BTRFS_ROOT_BACKREF_KEY) {
3821 process_root_ref(leaf, path.slots[0], &key,
3828 btrfs_release_path(&path);
3830 free_extent_cache_tree(&wc.shared);
3831 if (!cache_tree_empty(&wc.shared))
3832 fprintf(stderr, "warning line %d\n", __LINE__);
3834 task_stop(ctx.info);
3839 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3841 struct list_head *cur = rec->backrefs.next;
3842 struct extent_backref *back;
3843 struct tree_backref *tback;
3844 struct data_backref *dback;
3848 while(cur != &rec->backrefs) {
3849 back = to_extent_backref(cur);
3851 if (!back->found_extent_tree) {
3855 if (back->is_data) {
3856 dback = to_data_backref(back);
3857 fprintf(stderr, "Backref %llu %s %llu"
3858 " owner %llu offset %llu num_refs %lu"
3859 " not found in extent tree\n",
3860 (unsigned long long)rec->start,
3861 back->full_backref ?
3863 back->full_backref ?
3864 (unsigned long long)dback->parent:
3865 (unsigned long long)dback->root,
3866 (unsigned long long)dback->owner,
3867 (unsigned long long)dback->offset,
3868 (unsigned long)dback->num_refs);
3870 tback = to_tree_backref(back);
3871 fprintf(stderr, "Backref %llu parent %llu"
3872 " root %llu not found in extent tree\n",
3873 (unsigned long long)rec->start,
3874 (unsigned long long)tback->parent,
3875 (unsigned long long)tback->root);
3878 if (!back->is_data && !back->found_ref) {
3882 tback = to_tree_backref(back);
3883 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3884 (unsigned long long)rec->start,
3885 back->full_backref ? "parent" : "root",
3886 back->full_backref ?
3887 (unsigned long long)tback->parent :
3888 (unsigned long long)tback->root, back);
3890 if (back->is_data) {
3891 dback = to_data_backref(back);
3892 if (dback->found_ref != dback->num_refs) {
3896 fprintf(stderr, "Incorrect local backref count"
3897 " on %llu %s %llu owner %llu"
3898 " offset %llu found %u wanted %u back %p\n",
3899 (unsigned long long)rec->start,
3900 back->full_backref ?
3902 back->full_backref ?
3903 (unsigned long long)dback->parent:
3904 (unsigned long long)dback->root,
3905 (unsigned long long)dback->owner,
3906 (unsigned long long)dback->offset,
3907 dback->found_ref, dback->num_refs, back);
3909 if (dback->disk_bytenr != rec->start) {
3913 fprintf(stderr, "Backref disk bytenr does not"
3914 " match extent record, bytenr=%llu, "
3915 "ref bytenr=%llu\n",
3916 (unsigned long long)rec->start,
3917 (unsigned long long)dback->disk_bytenr);
3920 if (dback->bytes != rec->nr) {
3924 fprintf(stderr, "Backref bytes do not match "
3925 "extent backref, bytenr=%llu, ref "
3926 "bytes=%llu, backref bytes=%llu\n",
3927 (unsigned long long)rec->start,
3928 (unsigned long long)rec->nr,
3929 (unsigned long long)dback->bytes);
3932 if (!back->is_data) {
3935 dback = to_data_backref(back);
3936 found += dback->found_ref;
3939 if (found != rec->refs) {
3943 fprintf(stderr, "Incorrect global backref count "
3944 "on %llu found %llu wanted %llu\n",
3945 (unsigned long long)rec->start,
3946 (unsigned long long)found,
3947 (unsigned long long)rec->refs);
3953 static int free_all_extent_backrefs(struct extent_record *rec)
3955 struct extent_backref *back;
3956 struct list_head *cur;
3957 while (!list_empty(&rec->backrefs)) {
3958 cur = rec->backrefs.next;
3959 back = to_extent_backref(cur);
3966 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3967 struct cache_tree *extent_cache)
3969 struct cache_extent *cache;
3970 struct extent_record *rec;
3973 cache = first_cache_extent(extent_cache);
3976 rec = container_of(cache, struct extent_record, cache);
3977 remove_cache_extent(extent_cache, cache);
3978 free_all_extent_backrefs(rec);
3983 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3984 struct extent_record *rec)
3986 if (rec->content_checked && rec->owner_ref_checked &&
3987 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3988 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3989 !rec->bad_full_backref && !rec->crossing_stripes &&
3990 !rec->wrong_chunk_type) {
3991 remove_cache_extent(extent_cache, &rec->cache);
3992 free_all_extent_backrefs(rec);
3993 list_del_init(&rec->list);
3999 static int check_owner_ref(struct btrfs_root *root,
4000 struct extent_record *rec,
4001 struct extent_buffer *buf)
4003 struct extent_backref *node;
4004 struct tree_backref *back;
4005 struct btrfs_root *ref_root;
4006 struct btrfs_key key;
4007 struct btrfs_path path;
4008 struct extent_buffer *parent;
4013 list_for_each_entry(node, &rec->backrefs, list) {
4016 if (!node->found_ref)
4018 if (node->full_backref)
4020 back = to_tree_backref(node);
4021 if (btrfs_header_owner(buf) == back->root)
4024 BUG_ON(rec->is_root);
4026 /* try to find the block by search corresponding fs tree */
4027 key.objectid = btrfs_header_owner(buf);
4028 key.type = BTRFS_ROOT_ITEM_KEY;
4029 key.offset = (u64)-1;
4031 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4032 if (IS_ERR(ref_root))
4035 level = btrfs_header_level(buf);
4037 btrfs_item_key_to_cpu(buf, &key, 0);
4039 btrfs_node_key_to_cpu(buf, &key, 0);
4041 btrfs_init_path(&path);
4042 path.lowest_level = level + 1;
4043 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4047 parent = path.nodes[level + 1];
4048 if (parent && buf->start == btrfs_node_blockptr(parent,
4049 path.slots[level + 1]))
4052 btrfs_release_path(&path);
4053 return found ? 0 : 1;
4056 static int is_extent_tree_record(struct extent_record *rec)
4058 struct list_head *cur = rec->backrefs.next;
4059 struct extent_backref *node;
4060 struct tree_backref *back;
4063 while(cur != &rec->backrefs) {
4064 node = to_extent_backref(cur);
4068 back = to_tree_backref(node);
4069 if (node->full_backref)
4071 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4078 static int record_bad_block_io(struct btrfs_fs_info *info,
4079 struct cache_tree *extent_cache,
4082 struct extent_record *rec;
4083 struct cache_extent *cache;
4084 struct btrfs_key key;
4086 cache = lookup_cache_extent(extent_cache, start, len);
4090 rec = container_of(cache, struct extent_record, cache);
4091 if (!is_extent_tree_record(rec))
4094 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4095 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4098 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4099 struct extent_buffer *buf, int slot)
4101 if (btrfs_header_level(buf)) {
4102 struct btrfs_key_ptr ptr1, ptr2;
4104 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4105 sizeof(struct btrfs_key_ptr));
4106 read_extent_buffer(buf, &ptr2,
4107 btrfs_node_key_ptr_offset(slot + 1),
4108 sizeof(struct btrfs_key_ptr));
4109 write_extent_buffer(buf, &ptr1,
4110 btrfs_node_key_ptr_offset(slot + 1),
4111 sizeof(struct btrfs_key_ptr));
4112 write_extent_buffer(buf, &ptr2,
4113 btrfs_node_key_ptr_offset(slot),
4114 sizeof(struct btrfs_key_ptr));
4116 struct btrfs_disk_key key;
4117 btrfs_node_key(buf, &key, 0);
4118 btrfs_fixup_low_keys(root, path, &key,
4119 btrfs_header_level(buf) + 1);
4122 struct btrfs_item *item1, *item2;
4123 struct btrfs_key k1, k2;
4124 char *item1_data, *item2_data;
4125 u32 item1_offset, item2_offset, item1_size, item2_size;
4127 item1 = btrfs_item_nr(slot);
4128 item2 = btrfs_item_nr(slot + 1);
4129 btrfs_item_key_to_cpu(buf, &k1, slot);
4130 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4131 item1_offset = btrfs_item_offset(buf, item1);
4132 item2_offset = btrfs_item_offset(buf, item2);
4133 item1_size = btrfs_item_size(buf, item1);
4134 item2_size = btrfs_item_size(buf, item2);
4136 item1_data = malloc(item1_size);
4139 item2_data = malloc(item2_size);
4145 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4146 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4148 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4149 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4153 btrfs_set_item_offset(buf, item1, item2_offset);
4154 btrfs_set_item_offset(buf, item2, item1_offset);
4155 btrfs_set_item_size(buf, item1, item2_size);
4156 btrfs_set_item_size(buf, item2, item1_size);
4158 path->slots[0] = slot;
4159 btrfs_set_item_key_unsafe(root, path, &k2);
4160 path->slots[0] = slot + 1;
4161 btrfs_set_item_key_unsafe(root, path, &k1);
4166 static int fix_key_order(struct btrfs_trans_handle *trans,
4167 struct btrfs_root *root,
4168 struct btrfs_path *path)
4170 struct extent_buffer *buf;
4171 struct btrfs_key k1, k2;
4173 int level = path->lowest_level;
4176 buf = path->nodes[level];
4177 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4179 btrfs_node_key_to_cpu(buf, &k1, i);
4180 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4182 btrfs_item_key_to_cpu(buf, &k1, i);
4183 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4185 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4187 ret = swap_values(root, path, buf, i);
4190 btrfs_mark_buffer_dirty(buf);
4196 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4197 struct btrfs_root *root,
4198 struct btrfs_path *path,
4199 struct extent_buffer *buf, int slot)
4201 struct btrfs_key key;
4202 int nritems = btrfs_header_nritems(buf);
4204 btrfs_item_key_to_cpu(buf, &key, slot);
4206 /* These are all the keys we can deal with missing. */
4207 if (key.type != BTRFS_DIR_INDEX_KEY &&
4208 key.type != BTRFS_EXTENT_ITEM_KEY &&
4209 key.type != BTRFS_METADATA_ITEM_KEY &&
4210 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4211 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4214 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4215 (unsigned long long)key.objectid, key.type,
4216 (unsigned long long)key.offset, slot, buf->start);
4217 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4218 btrfs_item_nr_offset(slot + 1),
4219 sizeof(struct btrfs_item) *
4220 (nritems - slot - 1));
4221 btrfs_set_header_nritems(buf, nritems - 1);
4223 struct btrfs_disk_key disk_key;
4225 btrfs_item_key(buf, &disk_key, 0);
4226 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4228 btrfs_mark_buffer_dirty(buf);
4232 static int fix_item_offset(struct btrfs_trans_handle *trans,
4233 struct btrfs_root *root,
4234 struct btrfs_path *path)
4236 struct extent_buffer *buf;
4240 /* We should only get this for leaves */
4241 BUG_ON(path->lowest_level);
4242 buf = path->nodes[0];
4244 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4245 unsigned int shift = 0, offset;
4247 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4248 BTRFS_LEAF_DATA_SIZE(root)) {
4249 if (btrfs_item_end_nr(buf, i) >
4250 BTRFS_LEAF_DATA_SIZE(root)) {
4251 ret = delete_bogus_item(trans, root, path,
4255 fprintf(stderr, "item is off the end of the "
4256 "leaf, can't fix\n");
4260 shift = BTRFS_LEAF_DATA_SIZE(root) -
4261 btrfs_item_end_nr(buf, i);
4262 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4263 btrfs_item_offset_nr(buf, i - 1)) {
4264 if (btrfs_item_end_nr(buf, i) >
4265 btrfs_item_offset_nr(buf, i - 1)) {
4266 ret = delete_bogus_item(trans, root, path,
4270 fprintf(stderr, "items overlap, can't fix\n");
4274 shift = btrfs_item_offset_nr(buf, i - 1) -
4275 btrfs_item_end_nr(buf, i);
4280 printf("Shifting item nr %d by %u bytes in block %llu\n",
4281 i, shift, (unsigned long long)buf->start);
4282 offset = btrfs_item_offset_nr(buf, i);
4283 memmove_extent_buffer(buf,
4284 btrfs_leaf_data(buf) + offset + shift,
4285 btrfs_leaf_data(buf) + offset,
4286 btrfs_item_size_nr(buf, i));
4287 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4289 btrfs_mark_buffer_dirty(buf);
4293 * We may have moved things, in which case we want to exit so we don't
4294 * write those changes out. Once we have proper abort functionality in
4295 * progs this can be changed to something nicer.
4302 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4303 * then just return -EIO.
4305 static int try_to_fix_bad_block(struct btrfs_root *root,
4306 struct extent_buffer *buf,
4307 enum btrfs_tree_block_status status)
4309 struct btrfs_trans_handle *trans;
4310 struct ulist *roots;
4311 struct ulist_node *node;
4312 struct btrfs_root *search_root;
4313 struct btrfs_path *path;
4314 struct ulist_iterator iter;
4315 struct btrfs_key root_key, key;
4318 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4319 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4322 path = btrfs_alloc_path();
4326 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4329 btrfs_free_path(path);
4333 ULIST_ITER_INIT(&iter);
4334 while ((node = ulist_next(roots, &iter))) {
4335 root_key.objectid = node->val;
4336 root_key.type = BTRFS_ROOT_ITEM_KEY;
4337 root_key.offset = (u64)-1;
4339 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4346 trans = btrfs_start_transaction(search_root, 0);
4347 if (IS_ERR(trans)) {
4348 ret = PTR_ERR(trans);
4352 path->lowest_level = btrfs_header_level(buf);
4353 path->skip_check_block = 1;
4354 if (path->lowest_level)
4355 btrfs_node_key_to_cpu(buf, &key, 0);
4357 btrfs_item_key_to_cpu(buf, &key, 0);
4358 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4361 btrfs_commit_transaction(trans, search_root);
4364 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4365 ret = fix_key_order(trans, search_root, path);
4366 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4367 ret = fix_item_offset(trans, search_root, path);
4369 btrfs_commit_transaction(trans, search_root);
4372 btrfs_release_path(path);
4373 btrfs_commit_transaction(trans, search_root);
4376 btrfs_free_path(path);
4380 static int check_block(struct btrfs_root *root,
4381 struct cache_tree *extent_cache,
4382 struct extent_buffer *buf, u64 flags)
4384 struct extent_record *rec;
4385 struct cache_extent *cache;
4386 struct btrfs_key key;
4387 enum btrfs_tree_block_status status;
4391 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4394 rec = container_of(cache, struct extent_record, cache);
4395 rec->generation = btrfs_header_generation(buf);
4397 level = btrfs_header_level(buf);
4398 if (btrfs_header_nritems(buf) > 0) {
4401 btrfs_item_key_to_cpu(buf, &key, 0);
4403 btrfs_node_key_to_cpu(buf, &key, 0);
4405 rec->info_objectid = key.objectid;
4407 rec->info_level = level;
4409 if (btrfs_is_leaf(buf))
4410 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4412 status = btrfs_check_node(root, &rec->parent_key, buf);
4414 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4416 status = try_to_fix_bad_block(root, buf, status);
4417 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4419 fprintf(stderr, "bad block %llu\n",
4420 (unsigned long long)buf->start);
4423 * Signal to callers we need to start the scan over
4424 * again since we'll have cowed blocks.
4429 rec->content_checked = 1;
4430 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4431 rec->owner_ref_checked = 1;
4433 ret = check_owner_ref(root, rec, buf);
4435 rec->owner_ref_checked = 1;
4439 maybe_free_extent_rec(extent_cache, rec);
4443 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4444 u64 parent, u64 root)
4446 struct list_head *cur = rec->backrefs.next;
4447 struct extent_backref *node;
4448 struct tree_backref *back;
4450 while(cur != &rec->backrefs) {
4451 node = to_extent_backref(cur);
4455 back = to_tree_backref(node);
4457 if (!node->full_backref)
4459 if (parent == back->parent)
4462 if (node->full_backref)
4464 if (back->root == root)
4471 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4472 u64 parent, u64 root)
4474 struct tree_backref *ref = malloc(sizeof(*ref));
4478 memset(&ref->node, 0, sizeof(ref->node));
4480 ref->parent = parent;
4481 ref->node.full_backref = 1;
4484 ref->node.full_backref = 0;
4486 list_add_tail(&ref->node.list, &rec->backrefs);
4491 static struct data_backref *find_data_backref(struct extent_record *rec,
4492 u64 parent, u64 root,
4493 u64 owner, u64 offset,
4495 u64 disk_bytenr, u64 bytes)
4497 struct list_head *cur = rec->backrefs.next;
4498 struct extent_backref *node;
4499 struct data_backref *back;
4501 while(cur != &rec->backrefs) {
4502 node = to_extent_backref(cur);
4506 back = to_data_backref(node);
4508 if (!node->full_backref)
4510 if (parent == back->parent)
4513 if (node->full_backref)
4515 if (back->root == root && back->owner == owner &&
4516 back->offset == offset) {
4517 if (found_ref && node->found_ref &&
4518 (back->bytes != bytes ||
4519 back->disk_bytenr != disk_bytenr))
4528 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4529 u64 parent, u64 root,
4530 u64 owner, u64 offset,
4533 struct data_backref *ref = malloc(sizeof(*ref));
4537 memset(&ref->node, 0, sizeof(ref->node));
4538 ref->node.is_data = 1;
4541 ref->parent = parent;
4544 ref->node.full_backref = 1;
4548 ref->offset = offset;
4549 ref->node.full_backref = 0;
4551 ref->bytes = max_size;
4554 list_add_tail(&ref->node.list, &rec->backrefs);
4555 if (max_size > rec->max_size)
4556 rec->max_size = max_size;
4560 /* Check if the type of extent matches with its chunk */
4561 static void check_extent_type(struct extent_record *rec)
4563 struct btrfs_block_group_cache *bg_cache;
4565 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4569 /* data extent, check chunk directly*/
4570 if (!rec->metadata) {
4571 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4572 rec->wrong_chunk_type = 1;
4576 /* metadata extent, check the obvious case first */
4577 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4578 BTRFS_BLOCK_GROUP_METADATA))) {
4579 rec->wrong_chunk_type = 1;
4584 * Check SYSTEM extent, as it's also marked as metadata, we can only
4585 * make sure it's a SYSTEM extent by its backref
4587 if (!list_empty(&rec->backrefs)) {
4588 struct extent_backref *node;
4589 struct tree_backref *tback;
4592 node = to_extent_backref(rec->backrefs.next);
4593 if (node->is_data) {
4594 /* tree block shouldn't have data backref */
4595 rec->wrong_chunk_type = 1;
4598 tback = container_of(node, struct tree_backref, node);
4600 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4601 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4603 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4604 if (!(bg_cache->flags & bg_type))
4605 rec->wrong_chunk_type = 1;
4610 * Allocate a new extent record, fill default values from @tmpl and insert int
4611 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4612 * the cache, otherwise it fails.
4614 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4615 struct extent_record *tmpl)
4617 struct extent_record *rec;
4620 rec = malloc(sizeof(*rec));
4623 rec->start = tmpl->start;
4624 rec->max_size = tmpl->max_size;
4625 rec->nr = max(tmpl->nr, tmpl->max_size);
4626 rec->found_rec = tmpl->found_rec;
4627 rec->content_checked = tmpl->content_checked;
4628 rec->owner_ref_checked = tmpl->owner_ref_checked;
4629 rec->num_duplicates = 0;
4630 rec->metadata = tmpl->metadata;
4631 rec->flag_block_full_backref = FLAG_UNSET;
4632 rec->bad_full_backref = 0;
4633 rec->crossing_stripes = 0;
4634 rec->wrong_chunk_type = 0;
4635 rec->is_root = tmpl->is_root;
4636 rec->refs = tmpl->refs;
4637 rec->extent_item_refs = tmpl->extent_item_refs;
4638 rec->parent_generation = tmpl->parent_generation;
4639 INIT_LIST_HEAD(&rec->backrefs);
4640 INIT_LIST_HEAD(&rec->dups);
4641 INIT_LIST_HEAD(&rec->list);
4642 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4643 rec->cache.start = tmpl->start;
4644 rec->cache.size = tmpl->nr;
4645 ret = insert_cache_extent(extent_cache, &rec->cache);
4647 bytes_used += rec->nr;
4650 rec->crossing_stripes = check_crossing_stripes(rec->start,
4651 global_info->tree_root->nodesize);
4652 check_extent_type(rec);
4657 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4659 * - refs - if found, increase refs
4660 * - is_root - if found, set
4661 * - content_checked - if found, set
4662 * - owner_ref_checked - if found, set
4664 * If not found, create a new one, initialize and insert.
4666 static int add_extent_rec(struct cache_tree *extent_cache,
4667 struct extent_record *tmpl)
4669 struct extent_record *rec;
4670 struct cache_extent *cache;
4674 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4676 rec = container_of(cache, struct extent_record, cache);
4680 rec->nr = max(tmpl->nr, tmpl->max_size);
4683 * We need to make sure to reset nr to whatever the extent
4684 * record says was the real size, this way we can compare it to
4687 if (tmpl->found_rec) {
4688 if (tmpl->start != rec->start || rec->found_rec) {
4689 struct extent_record *tmp;
4692 if (list_empty(&rec->list))
4693 list_add_tail(&rec->list,
4694 &duplicate_extents);
4697 * We have to do this song and dance in case we
4698 * find an extent record that falls inside of
4699 * our current extent record but does not have
4700 * the same objectid.
4702 tmp = malloc(sizeof(*tmp));
4705 tmp->start = tmpl->start;
4706 tmp->max_size = tmpl->max_size;
4709 tmp->metadata = tmpl->metadata;
4710 tmp->extent_item_refs = tmpl->extent_item_refs;
4711 INIT_LIST_HEAD(&tmp->list);
4712 list_add_tail(&tmp->list, &rec->dups);
4713 rec->num_duplicates++;
4720 if (tmpl->extent_item_refs && !dup) {
4721 if (rec->extent_item_refs) {
4722 fprintf(stderr, "block %llu rec "
4723 "extent_item_refs %llu, passed %llu\n",
4724 (unsigned long long)tmpl->start,
4725 (unsigned long long)
4726 rec->extent_item_refs,
4727 (unsigned long long)tmpl->extent_item_refs);
4729 rec->extent_item_refs = tmpl->extent_item_refs;
4733 if (tmpl->content_checked)
4734 rec->content_checked = 1;
4735 if (tmpl->owner_ref_checked)
4736 rec->owner_ref_checked = 1;
4737 memcpy(&rec->parent_key, &tmpl->parent_key,
4738 sizeof(tmpl->parent_key));
4739 if (tmpl->parent_generation)
4740 rec->parent_generation = tmpl->parent_generation;
4741 if (rec->max_size < tmpl->max_size)
4742 rec->max_size = tmpl->max_size;
4745 * A metadata extent can't cross stripe_len boundary, otherwise
4746 * kernel scrub won't be able to handle it.
4747 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4751 rec->crossing_stripes = check_crossing_stripes(
4752 rec->start, global_info->tree_root->nodesize);
4753 check_extent_type(rec);
4754 maybe_free_extent_rec(extent_cache, rec);
4758 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4763 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4764 u64 parent, u64 root, int found_ref)
4766 struct extent_record *rec;
4767 struct tree_backref *back;
4768 struct cache_extent *cache;
4771 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4773 struct extent_record tmpl;
4775 memset(&tmpl, 0, sizeof(tmpl));
4776 tmpl.start = bytenr;
4780 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4784 /* really a bug in cache_extent implement now */
4785 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4790 rec = container_of(cache, struct extent_record, cache);
4791 if (rec->start != bytenr) {
4793 * Several cause, from unaligned bytenr to over lapping extents
4798 back = find_tree_backref(rec, parent, root);
4800 back = alloc_tree_backref(rec, parent, root);
4806 if (back->node.found_ref) {
4807 fprintf(stderr, "Extent back ref already exists "
4808 "for %llu parent %llu root %llu \n",
4809 (unsigned long long)bytenr,
4810 (unsigned long long)parent,
4811 (unsigned long long)root);
4813 back->node.found_ref = 1;
4815 if (back->node.found_extent_tree) {
4816 fprintf(stderr, "Extent back ref already exists "
4817 "for %llu parent %llu root %llu \n",
4818 (unsigned long long)bytenr,
4819 (unsigned long long)parent,
4820 (unsigned long long)root);
4822 back->node.found_extent_tree = 1;
4824 check_extent_type(rec);
4825 maybe_free_extent_rec(extent_cache, rec);
4829 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4830 u64 parent, u64 root, u64 owner, u64 offset,
4831 u32 num_refs, int found_ref, u64 max_size)
4833 struct extent_record *rec;
4834 struct data_backref *back;
4835 struct cache_extent *cache;
4838 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4840 struct extent_record tmpl;
4842 memset(&tmpl, 0, sizeof(tmpl));
4843 tmpl.start = bytenr;
4845 tmpl.max_size = max_size;
4847 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4851 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4856 rec = container_of(cache, struct extent_record, cache);
4857 if (rec->max_size < max_size)
4858 rec->max_size = max_size;
4861 * If found_ref is set then max_size is the real size and must match the
4862 * existing refs. So if we have already found a ref then we need to
4863 * make sure that this ref matches the existing one, otherwise we need
4864 * to add a new backref so we can notice that the backrefs don't match
4865 * and we need to figure out who is telling the truth. This is to
4866 * account for that awful fsync bug I introduced where we'd end up with
4867 * a btrfs_file_extent_item that would have its length include multiple
4868 * prealloc extents or point inside of a prealloc extent.
4870 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4873 back = alloc_data_backref(rec, parent, root, owner, offset,
4879 BUG_ON(num_refs != 1);
4880 if (back->node.found_ref)
4881 BUG_ON(back->bytes != max_size);
4882 back->node.found_ref = 1;
4883 back->found_ref += 1;
4884 back->bytes = max_size;
4885 back->disk_bytenr = bytenr;
4887 rec->content_checked = 1;
4888 rec->owner_ref_checked = 1;
4890 if (back->node.found_extent_tree) {
4891 fprintf(stderr, "Extent back ref already exists "
4892 "for %llu parent %llu root %llu "
4893 "owner %llu offset %llu num_refs %lu\n",
4894 (unsigned long long)bytenr,
4895 (unsigned long long)parent,
4896 (unsigned long long)root,
4897 (unsigned long long)owner,
4898 (unsigned long long)offset,
4899 (unsigned long)num_refs);
4901 back->num_refs = num_refs;
4902 back->node.found_extent_tree = 1;
4904 maybe_free_extent_rec(extent_cache, rec);
4908 static int add_pending(struct cache_tree *pending,
4909 struct cache_tree *seen, u64 bytenr, u32 size)
4912 ret = add_cache_extent(seen, bytenr, size);
4915 add_cache_extent(pending, bytenr, size);
4919 static int pick_next_pending(struct cache_tree *pending,
4920 struct cache_tree *reada,
4921 struct cache_tree *nodes,
4922 u64 last, struct block_info *bits, int bits_nr,
4925 unsigned long node_start = last;
4926 struct cache_extent *cache;
4929 cache = search_cache_extent(reada, 0);
4931 bits[0].start = cache->start;
4932 bits[0].size = cache->size;
4937 if (node_start > 32768)
4938 node_start -= 32768;
4940 cache = search_cache_extent(nodes, node_start);
4942 cache = search_cache_extent(nodes, 0);
4945 cache = search_cache_extent(pending, 0);
4950 bits[ret].start = cache->start;
4951 bits[ret].size = cache->size;
4952 cache = next_cache_extent(cache);
4954 } while (cache && ret < bits_nr);
4960 bits[ret].start = cache->start;
4961 bits[ret].size = cache->size;
4962 cache = next_cache_extent(cache);
4964 } while (cache && ret < bits_nr);
4966 if (bits_nr - ret > 8) {
4967 u64 lookup = bits[0].start + bits[0].size;
4968 struct cache_extent *next;
4969 next = search_cache_extent(pending, lookup);
4971 if (next->start - lookup > 32768)
4973 bits[ret].start = next->start;
4974 bits[ret].size = next->size;
4975 lookup = next->start + next->size;
4979 next = next_cache_extent(next);
4987 static void free_chunk_record(struct cache_extent *cache)
4989 struct chunk_record *rec;
4991 rec = container_of(cache, struct chunk_record, cache);
4992 list_del_init(&rec->list);
4993 list_del_init(&rec->dextents);
4997 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4999 cache_tree_free_extents(chunk_cache, free_chunk_record);
5002 static void free_device_record(struct rb_node *node)
5004 struct device_record *rec;
5006 rec = container_of(node, struct device_record, node);
5010 FREE_RB_BASED_TREE(device_cache, free_device_record);
5012 int insert_block_group_record(struct block_group_tree *tree,
5013 struct block_group_record *bg_rec)
5017 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5021 list_add_tail(&bg_rec->list, &tree->block_groups);
5025 static void free_block_group_record(struct cache_extent *cache)
5027 struct block_group_record *rec;
5029 rec = container_of(cache, struct block_group_record, cache);
5030 list_del_init(&rec->list);
5034 void free_block_group_tree(struct block_group_tree *tree)
5036 cache_tree_free_extents(&tree->tree, free_block_group_record);
5039 int insert_device_extent_record(struct device_extent_tree *tree,
5040 struct device_extent_record *de_rec)
5045 * Device extent is a bit different from the other extents, because
5046 * the extents which belong to the different devices may have the
5047 * same start and size, so we need use the special extent cache
5048 * search/insert functions.
5050 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5054 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5055 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5059 static void free_device_extent_record(struct cache_extent *cache)
5061 struct device_extent_record *rec;
5063 rec = container_of(cache, struct device_extent_record, cache);
5064 if (!list_empty(&rec->chunk_list))
5065 list_del_init(&rec->chunk_list);
5066 if (!list_empty(&rec->device_list))
5067 list_del_init(&rec->device_list);
5071 void free_device_extent_tree(struct device_extent_tree *tree)
5073 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5076 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5077 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5078 struct extent_buffer *leaf, int slot)
5080 struct btrfs_extent_ref_v0 *ref0;
5081 struct btrfs_key key;
5084 btrfs_item_key_to_cpu(leaf, &key, slot);
5085 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5086 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5087 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5090 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5091 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5097 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5098 struct btrfs_key *key,
5101 struct btrfs_chunk *ptr;
5102 struct chunk_record *rec;
5105 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5106 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5108 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5110 fprintf(stderr, "memory allocation failed\n");
5114 INIT_LIST_HEAD(&rec->list);
5115 INIT_LIST_HEAD(&rec->dextents);
5118 rec->cache.start = key->offset;
5119 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5121 rec->generation = btrfs_header_generation(leaf);
5123 rec->objectid = key->objectid;
5124 rec->type = key->type;
5125 rec->offset = key->offset;
5127 rec->length = rec->cache.size;
5128 rec->owner = btrfs_chunk_owner(leaf, ptr);
5129 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5130 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5131 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5132 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5133 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5134 rec->num_stripes = num_stripes;
5135 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5137 for (i = 0; i < rec->num_stripes; ++i) {
5138 rec->stripes[i].devid =
5139 btrfs_stripe_devid_nr(leaf, ptr, i);
5140 rec->stripes[i].offset =
5141 btrfs_stripe_offset_nr(leaf, ptr, i);
5142 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5143 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5150 static int process_chunk_item(struct cache_tree *chunk_cache,
5151 struct btrfs_key *key, struct extent_buffer *eb,
5154 struct chunk_record *rec;
5155 struct btrfs_chunk *chunk;
5158 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5160 * Do extra check for this chunk item,
5162 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5163 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5164 * and owner<->key_type check.
5166 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5169 error("chunk(%llu, %llu) is not valid, ignore it",
5170 key->offset, btrfs_chunk_length(eb, chunk));
5173 rec = btrfs_new_chunk_record(eb, key, slot);
5174 ret = insert_cache_extent(chunk_cache, &rec->cache);
5176 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5177 rec->offset, rec->length);
5184 static int process_device_item(struct rb_root *dev_cache,
5185 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5187 struct btrfs_dev_item *ptr;
5188 struct device_record *rec;
5191 ptr = btrfs_item_ptr(eb,
5192 slot, struct btrfs_dev_item);
5194 rec = malloc(sizeof(*rec));
5196 fprintf(stderr, "memory allocation failed\n");
5200 rec->devid = key->offset;
5201 rec->generation = btrfs_header_generation(eb);
5203 rec->objectid = key->objectid;
5204 rec->type = key->type;
5205 rec->offset = key->offset;
5207 rec->devid = btrfs_device_id(eb, ptr);
5208 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5209 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5211 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5213 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5220 struct block_group_record *
5221 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5224 struct btrfs_block_group_item *ptr;
5225 struct block_group_record *rec;
5227 rec = calloc(1, sizeof(*rec));
5229 fprintf(stderr, "memory allocation failed\n");
5233 rec->cache.start = key->objectid;
5234 rec->cache.size = key->offset;
5236 rec->generation = btrfs_header_generation(leaf);
5238 rec->objectid = key->objectid;
5239 rec->type = key->type;
5240 rec->offset = key->offset;
5242 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5243 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5245 INIT_LIST_HEAD(&rec->list);
5250 static int process_block_group_item(struct block_group_tree *block_group_cache,
5251 struct btrfs_key *key,
5252 struct extent_buffer *eb, int slot)
5254 struct block_group_record *rec;
5257 rec = btrfs_new_block_group_record(eb, key, slot);
5258 ret = insert_block_group_record(block_group_cache, rec);
5260 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5261 rec->objectid, rec->offset);
5268 struct device_extent_record *
5269 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5270 struct btrfs_key *key, int slot)
5272 struct device_extent_record *rec;
5273 struct btrfs_dev_extent *ptr;
5275 rec = calloc(1, sizeof(*rec));
5277 fprintf(stderr, "memory allocation failed\n");
5281 rec->cache.objectid = key->objectid;
5282 rec->cache.start = key->offset;
5284 rec->generation = btrfs_header_generation(leaf);
5286 rec->objectid = key->objectid;
5287 rec->type = key->type;
5288 rec->offset = key->offset;
5290 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5291 rec->chunk_objecteid =
5292 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5294 btrfs_dev_extent_chunk_offset(leaf, ptr);
5295 rec->length = btrfs_dev_extent_length(leaf, ptr);
5296 rec->cache.size = rec->length;
5298 INIT_LIST_HEAD(&rec->chunk_list);
5299 INIT_LIST_HEAD(&rec->device_list);
5305 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5306 struct btrfs_key *key, struct extent_buffer *eb,
5309 struct device_extent_record *rec;
5312 rec = btrfs_new_device_extent_record(eb, key, slot);
5313 ret = insert_device_extent_record(dev_extent_cache, rec);
5316 "Device extent[%llu, %llu, %llu] existed.\n",
5317 rec->objectid, rec->offset, rec->length);
5324 static int process_extent_item(struct btrfs_root *root,
5325 struct cache_tree *extent_cache,
5326 struct extent_buffer *eb, int slot)
5328 struct btrfs_extent_item *ei;
5329 struct btrfs_extent_inline_ref *iref;
5330 struct btrfs_extent_data_ref *dref;
5331 struct btrfs_shared_data_ref *sref;
5332 struct btrfs_key key;
5333 struct extent_record tmpl;
5338 u32 item_size = btrfs_item_size_nr(eb, slot);
5344 btrfs_item_key_to_cpu(eb, &key, slot);
5346 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5348 num_bytes = root->nodesize;
5350 num_bytes = key.offset;
5353 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5354 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5355 key.objectid, root->sectorsize);
5358 if (item_size < sizeof(*ei)) {
5359 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5360 struct btrfs_extent_item_v0 *ei0;
5361 BUG_ON(item_size != sizeof(*ei0));
5362 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5363 refs = btrfs_extent_refs_v0(eb, ei0);
5367 memset(&tmpl, 0, sizeof(tmpl));
5368 tmpl.start = key.objectid;
5369 tmpl.nr = num_bytes;
5370 tmpl.extent_item_refs = refs;
5371 tmpl.metadata = metadata;
5373 tmpl.max_size = num_bytes;
5375 return add_extent_rec(extent_cache, &tmpl);
5378 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5379 refs = btrfs_extent_refs(eb, ei);
5380 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5384 if (metadata && num_bytes != root->nodesize) {
5385 error("ignore invalid metadata extent, length %llu does not equal to %u",
5386 num_bytes, root->nodesize);
5389 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5390 error("ignore invalid data extent, length %llu is not aligned to %u",
5391 num_bytes, root->sectorsize);
5395 memset(&tmpl, 0, sizeof(tmpl));
5396 tmpl.start = key.objectid;
5397 tmpl.nr = num_bytes;
5398 tmpl.extent_item_refs = refs;
5399 tmpl.metadata = metadata;
5401 tmpl.max_size = num_bytes;
5402 add_extent_rec(extent_cache, &tmpl);
5404 ptr = (unsigned long)(ei + 1);
5405 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5406 key.type == BTRFS_EXTENT_ITEM_KEY)
5407 ptr += sizeof(struct btrfs_tree_block_info);
5409 end = (unsigned long)ei + item_size;
5411 iref = (struct btrfs_extent_inline_ref *)ptr;
5412 type = btrfs_extent_inline_ref_type(eb, iref);
5413 offset = btrfs_extent_inline_ref_offset(eb, iref);
5415 case BTRFS_TREE_BLOCK_REF_KEY:
5416 ret = add_tree_backref(extent_cache, key.objectid,
5419 error("add_tree_backref failed: %s",
5422 case BTRFS_SHARED_BLOCK_REF_KEY:
5423 ret = add_tree_backref(extent_cache, key.objectid,
5426 error("add_tree_backref failed: %s",
5429 case BTRFS_EXTENT_DATA_REF_KEY:
5430 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5431 add_data_backref(extent_cache, key.objectid, 0,
5432 btrfs_extent_data_ref_root(eb, dref),
5433 btrfs_extent_data_ref_objectid(eb,
5435 btrfs_extent_data_ref_offset(eb, dref),
5436 btrfs_extent_data_ref_count(eb, dref),
5439 case BTRFS_SHARED_DATA_REF_KEY:
5440 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5441 add_data_backref(extent_cache, key.objectid, offset,
5443 btrfs_shared_data_ref_count(eb, sref),
5447 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5448 key.objectid, key.type, num_bytes);
5451 ptr += btrfs_extent_inline_ref_size(type);
5458 static int check_cache_range(struct btrfs_root *root,
5459 struct btrfs_block_group_cache *cache,
5460 u64 offset, u64 bytes)
5462 struct btrfs_free_space *entry;
5468 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5469 bytenr = btrfs_sb_offset(i);
5470 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5471 cache->key.objectid, bytenr, 0,
5472 &logical, &nr, &stripe_len);
5477 if (logical[nr] + stripe_len <= offset)
5479 if (offset + bytes <= logical[nr])
5481 if (logical[nr] == offset) {
5482 if (stripe_len >= bytes) {
5486 bytes -= stripe_len;
5487 offset += stripe_len;
5488 } else if (logical[nr] < offset) {
5489 if (logical[nr] + stripe_len >=
5494 bytes = (offset + bytes) -
5495 (logical[nr] + stripe_len);
5496 offset = logical[nr] + stripe_len;
5499 * Could be tricky, the super may land in the
5500 * middle of the area we're checking. First
5501 * check the easiest case, it's at the end.
5503 if (logical[nr] + stripe_len >=
5505 bytes = logical[nr] - offset;
5509 /* Check the left side */
5510 ret = check_cache_range(root, cache,
5512 logical[nr] - offset);
5518 /* Now we continue with the right side */
5519 bytes = (offset + bytes) -
5520 (logical[nr] + stripe_len);
5521 offset = logical[nr] + stripe_len;
5528 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5530 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5531 offset, offset+bytes);
5535 if (entry->offset != offset) {
5536 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5541 if (entry->bytes != bytes) {
5542 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5543 bytes, entry->bytes, offset);
5547 unlink_free_space(cache->free_space_ctl, entry);
5552 static int verify_space_cache(struct btrfs_root *root,
5553 struct btrfs_block_group_cache *cache)
5555 struct btrfs_path *path;
5556 struct extent_buffer *leaf;
5557 struct btrfs_key key;
5561 path = btrfs_alloc_path();
5565 root = root->fs_info->extent_root;
5567 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5569 key.objectid = last;
5571 key.type = BTRFS_EXTENT_ITEM_KEY;
5573 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5578 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5579 ret = btrfs_next_leaf(root, path);
5587 leaf = path->nodes[0];
5588 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5589 if (key.objectid >= cache->key.offset + cache->key.objectid)
5591 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5592 key.type != BTRFS_METADATA_ITEM_KEY) {
5597 if (last == key.objectid) {
5598 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5599 last = key.objectid + key.offset;
5601 last = key.objectid + root->nodesize;
5606 ret = check_cache_range(root, cache, last,
5607 key.objectid - last);
5610 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5611 last = key.objectid + key.offset;
5613 last = key.objectid + root->nodesize;
5617 if (last < cache->key.objectid + cache->key.offset)
5618 ret = check_cache_range(root, cache, last,
5619 cache->key.objectid +
5620 cache->key.offset - last);
5623 btrfs_free_path(path);
5626 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5627 fprintf(stderr, "There are still entries left in the space "
5635 static int check_space_cache(struct btrfs_root *root)
5637 struct btrfs_block_group_cache *cache;
5638 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5642 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5643 btrfs_super_generation(root->fs_info->super_copy) !=
5644 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5645 printf("cache and super generation don't match, space cache "
5646 "will be invalidated\n");
5650 if (ctx.progress_enabled) {
5651 ctx.tp = TASK_FREE_SPACE;
5652 task_start(ctx.info);
5656 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5660 start = cache->key.objectid + cache->key.offset;
5661 if (!cache->free_space_ctl) {
5662 if (btrfs_init_free_space_ctl(cache,
5663 root->sectorsize)) {
5668 btrfs_remove_free_space_cache(cache);
5671 if (btrfs_fs_compat_ro(root->fs_info,
5672 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5673 ret = exclude_super_stripes(root, cache);
5675 fprintf(stderr, "could not exclude super stripes: %s\n",
5680 ret = load_free_space_tree(root->fs_info, cache);
5681 free_excluded_extents(root, cache);
5683 fprintf(stderr, "could not load free space tree: %s\n",
5690 ret = load_free_space_cache(root->fs_info, cache);
5695 ret = verify_space_cache(root, cache);
5697 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5698 cache->key.objectid);
5703 task_stop(ctx.info);
5705 return error ? -EINVAL : 0;
5708 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5709 u64 num_bytes, unsigned long leaf_offset,
5710 struct extent_buffer *eb) {
5713 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5715 unsigned long csum_offset;
5719 u64 data_checked = 0;
5725 if (num_bytes % root->sectorsize)
5728 data = malloc(num_bytes);
5732 while (offset < num_bytes) {
5735 read_len = num_bytes - offset;
5736 /* read as much space once a time */
5737 ret = read_extent_data(root, data + offset,
5738 bytenr + offset, &read_len, mirror);
5742 /* verify every 4k data's checksum */
5743 while (data_checked < read_len) {
5745 tmp = offset + data_checked;
5747 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5748 csum, root->sectorsize);
5749 btrfs_csum_final(csum, (char *)&csum);
5751 csum_offset = leaf_offset +
5752 tmp / root->sectorsize * csum_size;
5753 read_extent_buffer(eb, (char *)&csum_expected,
5754 csum_offset, csum_size);
5755 /* try another mirror */
5756 if (csum != csum_expected) {
5757 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5758 mirror, bytenr + tmp,
5759 csum, csum_expected);
5760 num_copies = btrfs_num_copies(
5761 &root->fs_info->mapping_tree,
5763 if (mirror < num_copies - 1) {
5768 data_checked += root->sectorsize;
5777 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5780 struct btrfs_path *path;
5781 struct extent_buffer *leaf;
5782 struct btrfs_key key;
5785 path = btrfs_alloc_path();
5787 fprintf(stderr, "Error allocating path\n");
5791 key.objectid = bytenr;
5792 key.type = BTRFS_EXTENT_ITEM_KEY;
5793 key.offset = (u64)-1;
5796 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5799 fprintf(stderr, "Error looking up extent record %d\n", ret);
5800 btrfs_free_path(path);
5803 if (path->slots[0] > 0) {
5806 ret = btrfs_prev_leaf(root, path);
5809 } else if (ret > 0) {
5816 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5819 * Block group items come before extent items if they have the same
5820 * bytenr, so walk back one more just in case. Dear future traveller,
5821 * first congrats on mastering time travel. Now if it's not too much
5822 * trouble could you go back to 2006 and tell Chris to make the
5823 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5824 * EXTENT_ITEM_KEY please?
5826 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5827 if (path->slots[0] > 0) {
5830 ret = btrfs_prev_leaf(root, path);
5833 } else if (ret > 0) {
5838 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5842 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5843 ret = btrfs_next_leaf(root, path);
5845 fprintf(stderr, "Error going to next leaf "
5847 btrfs_free_path(path);
5853 leaf = path->nodes[0];
5854 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5855 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5859 if (key.objectid + key.offset < bytenr) {
5863 if (key.objectid > bytenr + num_bytes)
5866 if (key.objectid == bytenr) {
5867 if (key.offset >= num_bytes) {
5871 num_bytes -= key.offset;
5872 bytenr += key.offset;
5873 } else if (key.objectid < bytenr) {
5874 if (key.objectid + key.offset >= bytenr + num_bytes) {
5878 num_bytes = (bytenr + num_bytes) -
5879 (key.objectid + key.offset);
5880 bytenr = key.objectid + key.offset;
5882 if (key.objectid + key.offset < bytenr + num_bytes) {
5883 u64 new_start = key.objectid + key.offset;
5884 u64 new_bytes = bytenr + num_bytes - new_start;
5887 * Weird case, the extent is in the middle of
5888 * our range, we'll have to search one side
5889 * and then the other. Not sure if this happens
5890 * in real life, but no harm in coding it up
5891 * anyway just in case.
5893 btrfs_release_path(path);
5894 ret = check_extent_exists(root, new_start,
5897 fprintf(stderr, "Right section didn't "
5901 num_bytes = key.objectid - bytenr;
5904 num_bytes = key.objectid - bytenr;
5911 if (num_bytes && !ret) {
5912 fprintf(stderr, "There are no extents for csum range "
5913 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5917 btrfs_free_path(path);
5921 static int check_csums(struct btrfs_root *root)
5923 struct btrfs_path *path;
5924 struct extent_buffer *leaf;
5925 struct btrfs_key key;
5926 u64 offset = 0, num_bytes = 0;
5927 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5931 unsigned long leaf_offset;
5933 root = root->fs_info->csum_root;
5934 if (!extent_buffer_uptodate(root->node)) {
5935 fprintf(stderr, "No valid csum tree found\n");
5939 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5940 key.type = BTRFS_EXTENT_CSUM_KEY;
5943 path = btrfs_alloc_path();
5947 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5949 fprintf(stderr, "Error searching csum tree %d\n", ret);
5950 btrfs_free_path(path);
5954 if (ret > 0 && path->slots[0])
5959 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5960 ret = btrfs_next_leaf(root, path);
5962 fprintf(stderr, "Error going to next leaf "
5969 leaf = path->nodes[0];
5971 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5972 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5977 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5978 csum_size) * root->sectorsize;
5979 if (!check_data_csum)
5980 goto skip_csum_check;
5981 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5982 ret = check_extent_csums(root, key.offset, data_len,
5988 offset = key.offset;
5989 } else if (key.offset != offset + num_bytes) {
5990 ret = check_extent_exists(root, offset, num_bytes);
5992 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5993 "there is no extent record\n",
5994 offset, offset+num_bytes);
5997 offset = key.offset;
6000 num_bytes += data_len;
6004 btrfs_free_path(path);
6008 static int is_dropped_key(struct btrfs_key *key,
6009 struct btrfs_key *drop_key) {
6010 if (key->objectid < drop_key->objectid)
6012 else if (key->objectid == drop_key->objectid) {
6013 if (key->type < drop_key->type)
6015 else if (key->type == drop_key->type) {
6016 if (key->offset < drop_key->offset)
6024 * Here are the rules for FULL_BACKREF.
6026 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6027 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6029 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6030 * if it happened after the relocation occurred since we'll have dropped the
6031 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6032 * have no real way to know for sure.
6034 * We process the blocks one root at a time, and we start from the lowest root
6035 * objectid and go to the highest. So we can just lookup the owner backref for
6036 * the record and if we don't find it then we know it doesn't exist and we have
6039 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6040 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6041 * be set or not and then we can check later once we've gathered all the refs.
6043 static int calc_extent_flag(struct btrfs_root *root,
6044 struct cache_tree *extent_cache,
6045 struct extent_buffer *buf,
6046 struct root_item_record *ri,
6049 struct extent_record *rec;
6050 struct cache_extent *cache;
6051 struct tree_backref *tback;
6054 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6055 /* we have added this extent before */
6059 rec = container_of(cache, struct extent_record, cache);
6062 * Except file/reloc tree, we can not have
6065 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6070 if (buf->start == ri->bytenr)
6073 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6076 owner = btrfs_header_owner(buf);
6077 if (owner == ri->objectid)
6080 tback = find_tree_backref(rec, 0, owner);
6085 if (rec->flag_block_full_backref != FLAG_UNSET &&
6086 rec->flag_block_full_backref != 0)
6087 rec->bad_full_backref = 1;
6090 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6091 if (rec->flag_block_full_backref != FLAG_UNSET &&
6092 rec->flag_block_full_backref != 1)
6093 rec->bad_full_backref = 1;
6097 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6099 fprintf(stderr, "Invalid key type(");
6100 print_key_type(stderr, 0, key_type);
6101 fprintf(stderr, ") found in root(");
6102 print_objectid(stderr, rootid, 0);
6103 fprintf(stderr, ")\n");
6107 * Check if the key is valid with its extent buffer.
6109 * This is a early check in case invalid key exists in a extent buffer
6110 * This is not comprehensive yet, but should prevent wrong key/item passed
6113 static int check_type_with_root(u64 rootid, u8 key_type)
6116 /* Only valid in chunk tree */
6117 case BTRFS_DEV_ITEM_KEY:
6118 case BTRFS_CHUNK_ITEM_KEY:
6119 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6122 /* valid in csum and log tree */
6123 case BTRFS_CSUM_TREE_OBJECTID:
6124 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6128 case BTRFS_EXTENT_ITEM_KEY:
6129 case BTRFS_METADATA_ITEM_KEY:
6130 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6131 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6134 case BTRFS_ROOT_ITEM_KEY:
6135 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6138 case BTRFS_DEV_EXTENT_KEY:
6139 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6145 report_mismatch_key_root(key_type, rootid);
6149 static int run_next_block(struct btrfs_root *root,
6150 struct block_info *bits,
6153 struct cache_tree *pending,
6154 struct cache_tree *seen,
6155 struct cache_tree *reada,
6156 struct cache_tree *nodes,
6157 struct cache_tree *extent_cache,
6158 struct cache_tree *chunk_cache,
6159 struct rb_root *dev_cache,
6160 struct block_group_tree *block_group_cache,
6161 struct device_extent_tree *dev_extent_cache,
6162 struct root_item_record *ri)
6164 struct extent_buffer *buf;
6165 struct extent_record *rec = NULL;
6176 struct btrfs_key key;
6177 struct cache_extent *cache;
6180 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6181 bits_nr, &reada_bits);
6186 for(i = 0; i < nritems; i++) {
6187 ret = add_cache_extent(reada, bits[i].start,
6192 /* fixme, get the parent transid */
6193 readahead_tree_block(root, bits[i].start,
6197 *last = bits[0].start;
6198 bytenr = bits[0].start;
6199 size = bits[0].size;
6201 cache = lookup_cache_extent(pending, bytenr, size);
6203 remove_cache_extent(pending, cache);
6206 cache = lookup_cache_extent(reada, bytenr, size);
6208 remove_cache_extent(reada, cache);
6211 cache = lookup_cache_extent(nodes, bytenr, size);
6213 remove_cache_extent(nodes, cache);
6216 cache = lookup_cache_extent(extent_cache, bytenr, size);
6218 rec = container_of(cache, struct extent_record, cache);
6219 gen = rec->parent_generation;
6222 /* fixme, get the real parent transid */
6223 buf = read_tree_block(root, bytenr, size, gen);
6224 if (!extent_buffer_uptodate(buf)) {
6225 record_bad_block_io(root->fs_info,
6226 extent_cache, bytenr, size);
6230 nritems = btrfs_header_nritems(buf);
6233 if (!init_extent_tree) {
6234 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6235 btrfs_header_level(buf), 1, NULL,
6238 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6240 fprintf(stderr, "Couldn't calc extent flags\n");
6241 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6246 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6248 fprintf(stderr, "Couldn't calc extent flags\n");
6249 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6253 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6255 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6256 ri->objectid == btrfs_header_owner(buf)) {
6258 * Ok we got to this block from it's original owner and
6259 * we have FULL_BACKREF set. Relocation can leave
6260 * converted blocks over so this is altogether possible,
6261 * however it's not possible if the generation > the
6262 * last snapshot, so check for this case.
6264 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6265 btrfs_header_generation(buf) > ri->last_snapshot) {
6266 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6267 rec->bad_full_backref = 1;
6272 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6273 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6274 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6275 rec->bad_full_backref = 1;
6279 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6280 rec->flag_block_full_backref = 1;
6284 rec->flag_block_full_backref = 0;
6286 owner = btrfs_header_owner(buf);
6289 ret = check_block(root, extent_cache, buf, flags);
6293 if (btrfs_is_leaf(buf)) {
6294 btree_space_waste += btrfs_leaf_free_space(root, buf);
6295 for (i = 0; i < nritems; i++) {
6296 struct btrfs_file_extent_item *fi;
6297 btrfs_item_key_to_cpu(buf, &key, i);
6299 * Check key type against the leaf owner.
6300 * Could filter quite a lot of early error if
6303 if (check_type_with_root(btrfs_header_owner(buf),
6305 fprintf(stderr, "ignoring invalid key\n");
6308 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6309 process_extent_item(root, extent_cache, buf,
6313 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6314 process_extent_item(root, extent_cache, buf,
6318 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6320 btrfs_item_size_nr(buf, i);
6323 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6324 process_chunk_item(chunk_cache, &key, buf, i);
6327 if (key.type == BTRFS_DEV_ITEM_KEY) {
6328 process_device_item(dev_cache, &key, buf, i);
6331 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6332 process_block_group_item(block_group_cache,
6336 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6337 process_device_extent_item(dev_extent_cache,
6342 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6343 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6344 process_extent_ref_v0(extent_cache, buf, i);
6351 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6352 ret = add_tree_backref(extent_cache,
6353 key.objectid, 0, key.offset, 0);
6355 error("add_tree_backref failed: %s",
6359 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6360 ret = add_tree_backref(extent_cache,
6361 key.objectid, key.offset, 0, 0);
6363 error("add_tree_backref failed: %s",
6367 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6368 struct btrfs_extent_data_ref *ref;
6369 ref = btrfs_item_ptr(buf, i,
6370 struct btrfs_extent_data_ref);
6371 add_data_backref(extent_cache,
6373 btrfs_extent_data_ref_root(buf, ref),
6374 btrfs_extent_data_ref_objectid(buf,
6376 btrfs_extent_data_ref_offset(buf, ref),
6377 btrfs_extent_data_ref_count(buf, ref),
6378 0, root->sectorsize);
6381 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6382 struct btrfs_shared_data_ref *ref;
6383 ref = btrfs_item_ptr(buf, i,
6384 struct btrfs_shared_data_ref);
6385 add_data_backref(extent_cache,
6386 key.objectid, key.offset, 0, 0, 0,
6387 btrfs_shared_data_ref_count(buf, ref),
6388 0, root->sectorsize);
6391 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6392 struct bad_item *bad;
6394 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6398 bad = malloc(sizeof(struct bad_item));
6401 INIT_LIST_HEAD(&bad->list);
6402 memcpy(&bad->key, &key,
6403 sizeof(struct btrfs_key));
6404 bad->root_id = owner;
6405 list_add_tail(&bad->list, &delete_items);
6408 if (key.type != BTRFS_EXTENT_DATA_KEY)
6410 fi = btrfs_item_ptr(buf, i,
6411 struct btrfs_file_extent_item);
6412 if (btrfs_file_extent_type(buf, fi) ==
6413 BTRFS_FILE_EXTENT_INLINE)
6415 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6418 data_bytes_allocated +=
6419 btrfs_file_extent_disk_num_bytes(buf, fi);
6420 if (data_bytes_allocated < root->sectorsize) {
6423 data_bytes_referenced +=
6424 btrfs_file_extent_num_bytes(buf, fi);
6425 add_data_backref(extent_cache,
6426 btrfs_file_extent_disk_bytenr(buf, fi),
6427 parent, owner, key.objectid, key.offset -
6428 btrfs_file_extent_offset(buf, fi), 1, 1,
6429 btrfs_file_extent_disk_num_bytes(buf, fi));
6433 struct btrfs_key first_key;
6435 first_key.objectid = 0;
6438 btrfs_item_key_to_cpu(buf, &first_key, 0);
6439 level = btrfs_header_level(buf);
6440 for (i = 0; i < nritems; i++) {
6441 struct extent_record tmpl;
6443 ptr = btrfs_node_blockptr(buf, i);
6444 size = root->nodesize;
6445 btrfs_node_key_to_cpu(buf, &key, i);
6447 if ((level == ri->drop_level)
6448 && is_dropped_key(&key, &ri->drop_key)) {
6453 memset(&tmpl, 0, sizeof(tmpl));
6454 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6455 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6460 tmpl.max_size = size;
6461 ret = add_extent_rec(extent_cache, &tmpl);
6465 ret = add_tree_backref(extent_cache, ptr, parent,
6468 error("add_tree_backref failed: %s",
6474 add_pending(nodes, seen, ptr, size);
6476 add_pending(pending, seen, ptr, size);
6479 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6480 nritems) * sizeof(struct btrfs_key_ptr);
6482 total_btree_bytes += buf->len;
6483 if (fs_root_objectid(btrfs_header_owner(buf)))
6484 total_fs_tree_bytes += buf->len;
6485 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6486 total_extent_tree_bytes += buf->len;
6487 if (!found_old_backref &&
6488 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6489 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6490 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6491 found_old_backref = 1;
6493 free_extent_buffer(buf);
6497 static int add_root_to_pending(struct extent_buffer *buf,
6498 struct cache_tree *extent_cache,
6499 struct cache_tree *pending,
6500 struct cache_tree *seen,
6501 struct cache_tree *nodes,
6504 struct extent_record tmpl;
6507 if (btrfs_header_level(buf) > 0)
6508 add_pending(nodes, seen, buf->start, buf->len);
6510 add_pending(pending, seen, buf->start, buf->len);
6512 memset(&tmpl, 0, sizeof(tmpl));
6513 tmpl.start = buf->start;
6518 tmpl.max_size = buf->len;
6519 add_extent_rec(extent_cache, &tmpl);
6521 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6522 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6523 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6526 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6531 /* as we fix the tree, we might be deleting blocks that
6532 * we're tracking for repair. This hook makes sure we
6533 * remove any backrefs for blocks as we are fixing them.
6535 static int free_extent_hook(struct btrfs_trans_handle *trans,
6536 struct btrfs_root *root,
6537 u64 bytenr, u64 num_bytes, u64 parent,
6538 u64 root_objectid, u64 owner, u64 offset,
6541 struct extent_record *rec;
6542 struct cache_extent *cache;
6544 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6546 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6547 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6551 rec = container_of(cache, struct extent_record, cache);
6553 struct data_backref *back;
6554 back = find_data_backref(rec, parent, root_objectid, owner,
6555 offset, 1, bytenr, num_bytes);
6558 if (back->node.found_ref) {
6559 back->found_ref -= refs_to_drop;
6561 rec->refs -= refs_to_drop;
6563 if (back->node.found_extent_tree) {
6564 back->num_refs -= refs_to_drop;
6565 if (rec->extent_item_refs)
6566 rec->extent_item_refs -= refs_to_drop;
6568 if (back->found_ref == 0)
6569 back->node.found_ref = 0;
6570 if (back->num_refs == 0)
6571 back->node.found_extent_tree = 0;
6573 if (!back->node.found_extent_tree && back->node.found_ref) {
6574 list_del(&back->node.list);
6578 struct tree_backref *back;
6579 back = find_tree_backref(rec, parent, root_objectid);
6582 if (back->node.found_ref) {
6585 back->node.found_ref = 0;
6587 if (back->node.found_extent_tree) {
6588 if (rec->extent_item_refs)
6589 rec->extent_item_refs--;
6590 back->node.found_extent_tree = 0;
6592 if (!back->node.found_extent_tree && back->node.found_ref) {
6593 list_del(&back->node.list);
6597 maybe_free_extent_rec(extent_cache, rec);
6602 static int delete_extent_records(struct btrfs_trans_handle *trans,
6603 struct btrfs_root *root,
6604 struct btrfs_path *path,
6605 u64 bytenr, u64 new_len)
6607 struct btrfs_key key;
6608 struct btrfs_key found_key;
6609 struct extent_buffer *leaf;
6614 key.objectid = bytenr;
6616 key.offset = (u64)-1;
6619 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6626 if (path->slots[0] == 0)
6632 leaf = path->nodes[0];
6633 slot = path->slots[0];
6635 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6636 if (found_key.objectid != bytenr)
6639 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6640 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6641 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6642 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6643 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6644 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6645 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6646 btrfs_release_path(path);
6647 if (found_key.type == 0) {
6648 if (found_key.offset == 0)
6650 key.offset = found_key.offset - 1;
6651 key.type = found_key.type;
6653 key.type = found_key.type - 1;
6654 key.offset = (u64)-1;
6658 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6659 found_key.objectid, found_key.type, found_key.offset);
6661 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6664 btrfs_release_path(path);
6666 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6667 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6668 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6669 found_key.offset : root->nodesize;
6671 ret = btrfs_update_block_group(trans, root, bytenr,
6678 btrfs_release_path(path);
6683 * for a single backref, this will allocate a new extent
6684 * and add the backref to it.
6686 static int record_extent(struct btrfs_trans_handle *trans,
6687 struct btrfs_fs_info *info,
6688 struct btrfs_path *path,
6689 struct extent_record *rec,
6690 struct extent_backref *back,
6691 int allocated, u64 flags)
6694 struct btrfs_root *extent_root = info->extent_root;
6695 struct extent_buffer *leaf;
6696 struct btrfs_key ins_key;
6697 struct btrfs_extent_item *ei;
6698 struct tree_backref *tback;
6699 struct data_backref *dback;
6700 struct btrfs_tree_block_info *bi;
6703 rec->max_size = max_t(u64, rec->max_size,
6704 info->extent_root->nodesize);
6707 u32 item_size = sizeof(*ei);
6710 item_size += sizeof(*bi);
6712 ins_key.objectid = rec->start;
6713 ins_key.offset = rec->max_size;
6714 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6716 ret = btrfs_insert_empty_item(trans, extent_root, path,
6717 &ins_key, item_size);
6721 leaf = path->nodes[0];
6722 ei = btrfs_item_ptr(leaf, path->slots[0],
6723 struct btrfs_extent_item);
6725 btrfs_set_extent_refs(leaf, ei, 0);
6726 btrfs_set_extent_generation(leaf, ei, rec->generation);
6728 if (back->is_data) {
6729 btrfs_set_extent_flags(leaf, ei,
6730 BTRFS_EXTENT_FLAG_DATA);
6732 struct btrfs_disk_key copy_key;;
6734 tback = to_tree_backref(back);
6735 bi = (struct btrfs_tree_block_info *)(ei + 1);
6736 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6739 btrfs_set_disk_key_objectid(©_key,
6740 rec->info_objectid);
6741 btrfs_set_disk_key_type(©_key, 0);
6742 btrfs_set_disk_key_offset(©_key, 0);
6744 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6745 btrfs_set_tree_block_key(leaf, bi, ©_key);
6747 btrfs_set_extent_flags(leaf, ei,
6748 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6751 btrfs_mark_buffer_dirty(leaf);
6752 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6753 rec->max_size, 1, 0);
6756 btrfs_release_path(path);
6759 if (back->is_data) {
6763 dback = to_data_backref(back);
6764 if (back->full_backref)
6765 parent = dback->parent;
6769 for (i = 0; i < dback->found_ref; i++) {
6770 /* if parent != 0, we're doing a full backref
6771 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6772 * just makes the backref allocator create a data
6775 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6776 rec->start, rec->max_size,
6780 BTRFS_FIRST_FREE_OBJECTID :
6786 fprintf(stderr, "adding new data backref"
6787 " on %llu %s %llu owner %llu"
6788 " offset %llu found %d\n",
6789 (unsigned long long)rec->start,
6790 back->full_backref ?
6792 back->full_backref ?
6793 (unsigned long long)parent :
6794 (unsigned long long)dback->root,
6795 (unsigned long long)dback->owner,
6796 (unsigned long long)dback->offset,
6801 tback = to_tree_backref(back);
6802 if (back->full_backref)
6803 parent = tback->parent;
6807 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6808 rec->start, rec->max_size,
6809 parent, tback->root, 0, 0);
6810 fprintf(stderr, "adding new tree backref on "
6811 "start %llu len %llu parent %llu root %llu\n",
6812 rec->start, rec->max_size, parent, tback->root);
6815 btrfs_release_path(path);
6819 static struct extent_entry *find_entry(struct list_head *entries,
6820 u64 bytenr, u64 bytes)
6822 struct extent_entry *entry = NULL;
6824 list_for_each_entry(entry, entries, list) {
6825 if (entry->bytenr == bytenr && entry->bytes == bytes)
6832 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6834 struct extent_entry *entry, *best = NULL, *prev = NULL;
6836 list_for_each_entry(entry, entries, list) {
6843 * If there are as many broken entries as entries then we know
6844 * not to trust this particular entry.
6846 if (entry->broken == entry->count)
6850 * If our current entry == best then we can't be sure our best
6851 * is really the best, so we need to keep searching.
6853 if (best && best->count == entry->count) {
6859 /* Prev == entry, not good enough, have to keep searching */
6860 if (!prev->broken && prev->count == entry->count)
6864 best = (prev->count > entry->count) ? prev : entry;
6865 else if (best->count < entry->count)
6873 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6874 struct data_backref *dback, struct extent_entry *entry)
6876 struct btrfs_trans_handle *trans;
6877 struct btrfs_root *root;
6878 struct btrfs_file_extent_item *fi;
6879 struct extent_buffer *leaf;
6880 struct btrfs_key key;
6884 key.objectid = dback->root;
6885 key.type = BTRFS_ROOT_ITEM_KEY;
6886 key.offset = (u64)-1;
6887 root = btrfs_read_fs_root(info, &key);
6889 fprintf(stderr, "Couldn't find root for our ref\n");
6894 * The backref points to the original offset of the extent if it was
6895 * split, so we need to search down to the offset we have and then walk
6896 * forward until we find the backref we're looking for.
6898 key.objectid = dback->owner;
6899 key.type = BTRFS_EXTENT_DATA_KEY;
6900 key.offset = dback->offset;
6901 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6903 fprintf(stderr, "Error looking up ref %d\n", ret);
6908 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6909 ret = btrfs_next_leaf(root, path);
6911 fprintf(stderr, "Couldn't find our ref, next\n");
6915 leaf = path->nodes[0];
6916 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6917 if (key.objectid != dback->owner ||
6918 key.type != BTRFS_EXTENT_DATA_KEY) {
6919 fprintf(stderr, "Couldn't find our ref, search\n");
6922 fi = btrfs_item_ptr(leaf, path->slots[0],
6923 struct btrfs_file_extent_item);
6924 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6925 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6927 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6932 btrfs_release_path(path);
6934 trans = btrfs_start_transaction(root, 1);
6936 return PTR_ERR(trans);
6939 * Ok we have the key of the file extent we want to fix, now we can cow
6940 * down to the thing and fix it.
6942 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6944 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6945 key.objectid, key.type, key.offset, ret);
6949 fprintf(stderr, "Well that's odd, we just found this key "
6950 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6955 leaf = path->nodes[0];
6956 fi = btrfs_item_ptr(leaf, path->slots[0],
6957 struct btrfs_file_extent_item);
6959 if (btrfs_file_extent_compression(leaf, fi) &&
6960 dback->disk_bytenr != entry->bytenr) {
6961 fprintf(stderr, "Ref doesn't match the record start and is "
6962 "compressed, please take a btrfs-image of this file "
6963 "system and send it to a btrfs developer so they can "
6964 "complete this functionality for bytenr %Lu\n",
6965 dback->disk_bytenr);
6970 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6971 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6972 } else if (dback->disk_bytenr > entry->bytenr) {
6973 u64 off_diff, offset;
6975 off_diff = dback->disk_bytenr - entry->bytenr;
6976 offset = btrfs_file_extent_offset(leaf, fi);
6977 if (dback->disk_bytenr + offset +
6978 btrfs_file_extent_num_bytes(leaf, fi) >
6979 entry->bytenr + entry->bytes) {
6980 fprintf(stderr, "Ref is past the entry end, please "
6981 "take a btrfs-image of this file system and "
6982 "send it to a btrfs developer, ref %Lu\n",
6983 dback->disk_bytenr);
6988 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6989 btrfs_set_file_extent_offset(leaf, fi, offset);
6990 } else if (dback->disk_bytenr < entry->bytenr) {
6993 offset = btrfs_file_extent_offset(leaf, fi);
6994 if (dback->disk_bytenr + offset < entry->bytenr) {
6995 fprintf(stderr, "Ref is before the entry start, please"
6996 " take a btrfs-image of this file system and "
6997 "send it to a btrfs developer, ref %Lu\n",
6998 dback->disk_bytenr);
7003 offset += dback->disk_bytenr;
7004 offset -= entry->bytenr;
7005 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7006 btrfs_set_file_extent_offset(leaf, fi, offset);
7009 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7012 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7013 * only do this if we aren't using compression, otherwise it's a
7016 if (!btrfs_file_extent_compression(leaf, fi))
7017 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7019 printf("ram bytes may be wrong?\n");
7020 btrfs_mark_buffer_dirty(leaf);
7022 err = btrfs_commit_transaction(trans, root);
7023 btrfs_release_path(path);
7024 return ret ? ret : err;
7027 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7028 struct extent_record *rec)
7030 struct extent_backref *back;
7031 struct data_backref *dback;
7032 struct extent_entry *entry, *best = NULL;
7035 int broken_entries = 0;
7040 * Metadata is easy and the backrefs should always agree on bytenr and
7041 * size, if not we've got bigger issues.
7046 list_for_each_entry(back, &rec->backrefs, list) {
7047 if (back->full_backref || !back->is_data)
7050 dback = to_data_backref(back);
7053 * We only pay attention to backrefs that we found a real
7056 if (dback->found_ref == 0)
7060 * For now we only catch when the bytes don't match, not the
7061 * bytenr. We can easily do this at the same time, but I want
7062 * to have a fs image to test on before we just add repair
7063 * functionality willy-nilly so we know we won't screw up the
7067 entry = find_entry(&entries, dback->disk_bytenr,
7070 entry = malloc(sizeof(struct extent_entry));
7075 memset(entry, 0, sizeof(*entry));
7076 entry->bytenr = dback->disk_bytenr;
7077 entry->bytes = dback->bytes;
7078 list_add_tail(&entry->list, &entries);
7083 * If we only have on entry we may think the entries agree when
7084 * in reality they don't so we have to do some extra checking.
7086 if (dback->disk_bytenr != rec->start ||
7087 dback->bytes != rec->nr || back->broken)
7098 /* Yay all the backrefs agree, carry on good sir */
7099 if (nr_entries <= 1 && !mismatch)
7102 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7103 "%Lu\n", rec->start);
7106 * First we want to see if the backrefs can agree amongst themselves who
7107 * is right, so figure out which one of the entries has the highest
7110 best = find_most_right_entry(&entries);
7113 * Ok so we may have an even split between what the backrefs think, so
7114 * this is where we use the extent ref to see what it thinks.
7117 entry = find_entry(&entries, rec->start, rec->nr);
7118 if (!entry && (!broken_entries || !rec->found_rec)) {
7119 fprintf(stderr, "Backrefs don't agree with each other "
7120 "and extent record doesn't agree with anybody,"
7121 " so we can't fix bytenr %Lu bytes %Lu\n",
7122 rec->start, rec->nr);
7125 } else if (!entry) {
7127 * Ok our backrefs were broken, we'll assume this is the
7128 * correct value and add an entry for this range.
7130 entry = malloc(sizeof(struct extent_entry));
7135 memset(entry, 0, sizeof(*entry));
7136 entry->bytenr = rec->start;
7137 entry->bytes = rec->nr;
7138 list_add_tail(&entry->list, &entries);
7142 best = find_most_right_entry(&entries);
7144 fprintf(stderr, "Backrefs and extent record evenly "
7145 "split on who is right, this is going to "
7146 "require user input to fix bytenr %Lu bytes "
7147 "%Lu\n", rec->start, rec->nr);
7154 * I don't think this can happen currently as we'll abort() if we catch
7155 * this case higher up, but in case somebody removes that we still can't
7156 * deal with it properly here yet, so just bail out of that's the case.
7158 if (best->bytenr != rec->start) {
7159 fprintf(stderr, "Extent start and backref starts don't match, "
7160 "please use btrfs-image on this file system and send "
7161 "it to a btrfs developer so they can make fsck fix "
7162 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7163 rec->start, rec->nr);
7169 * Ok great we all agreed on an extent record, let's go find the real
7170 * references and fix up the ones that don't match.
7172 list_for_each_entry(back, &rec->backrefs, list) {
7173 if (back->full_backref || !back->is_data)
7176 dback = to_data_backref(back);
7179 * Still ignoring backrefs that don't have a real ref attached
7182 if (dback->found_ref == 0)
7185 if (dback->bytes == best->bytes &&
7186 dback->disk_bytenr == best->bytenr)
7189 ret = repair_ref(info, path, dback, best);
7195 * Ok we messed with the actual refs, which means we need to drop our
7196 * entire cache and go back and rescan. I know this is a huge pain and
7197 * adds a lot of extra work, but it's the only way to be safe. Once all
7198 * the backrefs agree we may not need to do anything to the extent
7203 while (!list_empty(&entries)) {
7204 entry = list_entry(entries.next, struct extent_entry, list);
7205 list_del_init(&entry->list);
7211 static int process_duplicates(struct btrfs_root *root,
7212 struct cache_tree *extent_cache,
7213 struct extent_record *rec)
7215 struct extent_record *good, *tmp;
7216 struct cache_extent *cache;
7220 * If we found a extent record for this extent then return, or if we
7221 * have more than one duplicate we are likely going to need to delete
7224 if (rec->found_rec || rec->num_duplicates > 1)
7227 /* Shouldn't happen but just in case */
7228 BUG_ON(!rec->num_duplicates);
7231 * So this happens if we end up with a backref that doesn't match the
7232 * actual extent entry. So either the backref is bad or the extent
7233 * entry is bad. Either way we want to have the extent_record actually
7234 * reflect what we found in the extent_tree, so we need to take the
7235 * duplicate out and use that as the extent_record since the only way we
7236 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7238 remove_cache_extent(extent_cache, &rec->cache);
7240 good = to_extent_record(rec->dups.next);
7241 list_del_init(&good->list);
7242 INIT_LIST_HEAD(&good->backrefs);
7243 INIT_LIST_HEAD(&good->dups);
7244 good->cache.start = good->start;
7245 good->cache.size = good->nr;
7246 good->content_checked = 0;
7247 good->owner_ref_checked = 0;
7248 good->num_duplicates = 0;
7249 good->refs = rec->refs;
7250 list_splice_init(&rec->backrefs, &good->backrefs);
7252 cache = lookup_cache_extent(extent_cache, good->start,
7256 tmp = container_of(cache, struct extent_record, cache);
7259 * If we find another overlapping extent and it's found_rec is
7260 * set then it's a duplicate and we need to try and delete
7263 if (tmp->found_rec || tmp->num_duplicates > 0) {
7264 if (list_empty(&good->list))
7265 list_add_tail(&good->list,
7266 &duplicate_extents);
7267 good->num_duplicates += tmp->num_duplicates + 1;
7268 list_splice_init(&tmp->dups, &good->dups);
7269 list_del_init(&tmp->list);
7270 list_add_tail(&tmp->list, &good->dups);
7271 remove_cache_extent(extent_cache, &tmp->cache);
7276 * Ok we have another non extent item backed extent rec, so lets
7277 * just add it to this extent and carry on like we did above.
7279 good->refs += tmp->refs;
7280 list_splice_init(&tmp->backrefs, &good->backrefs);
7281 remove_cache_extent(extent_cache, &tmp->cache);
7284 ret = insert_cache_extent(extent_cache, &good->cache);
7287 return good->num_duplicates ? 0 : 1;
7290 static int delete_duplicate_records(struct btrfs_root *root,
7291 struct extent_record *rec)
7293 struct btrfs_trans_handle *trans;
7294 LIST_HEAD(delete_list);
7295 struct btrfs_path *path;
7296 struct extent_record *tmp, *good, *n;
7299 struct btrfs_key key;
7301 path = btrfs_alloc_path();
7308 /* Find the record that covers all of the duplicates. */
7309 list_for_each_entry(tmp, &rec->dups, list) {
7310 if (good->start < tmp->start)
7312 if (good->nr > tmp->nr)
7315 if (tmp->start + tmp->nr < good->start + good->nr) {
7316 fprintf(stderr, "Ok we have overlapping extents that "
7317 "aren't completely covered by each other, this "
7318 "is going to require more careful thought. "
7319 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7320 tmp->start, tmp->nr, good->start, good->nr);
7327 list_add_tail(&rec->list, &delete_list);
7329 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7332 list_move_tail(&tmp->list, &delete_list);
7335 root = root->fs_info->extent_root;
7336 trans = btrfs_start_transaction(root, 1);
7337 if (IS_ERR(trans)) {
7338 ret = PTR_ERR(trans);
7342 list_for_each_entry(tmp, &delete_list, list) {
7343 if (tmp->found_rec == 0)
7345 key.objectid = tmp->start;
7346 key.type = BTRFS_EXTENT_ITEM_KEY;
7347 key.offset = tmp->nr;
7349 /* Shouldn't happen but just in case */
7350 if (tmp->metadata) {
7351 fprintf(stderr, "Well this shouldn't happen, extent "
7352 "record overlaps but is metadata? "
7353 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7357 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7363 ret = btrfs_del_item(trans, root, path);
7366 btrfs_release_path(path);
7369 err = btrfs_commit_transaction(trans, root);
7373 while (!list_empty(&delete_list)) {
7374 tmp = to_extent_record(delete_list.next);
7375 list_del_init(&tmp->list);
7381 while (!list_empty(&rec->dups)) {
7382 tmp = to_extent_record(rec->dups.next);
7383 list_del_init(&tmp->list);
7387 btrfs_free_path(path);
7389 if (!ret && !nr_del)
7390 rec->num_duplicates = 0;
7392 return ret ? ret : nr_del;
7395 static int find_possible_backrefs(struct btrfs_fs_info *info,
7396 struct btrfs_path *path,
7397 struct cache_tree *extent_cache,
7398 struct extent_record *rec)
7400 struct btrfs_root *root;
7401 struct extent_backref *back;
7402 struct data_backref *dback;
7403 struct cache_extent *cache;
7404 struct btrfs_file_extent_item *fi;
7405 struct btrfs_key key;
7409 list_for_each_entry(back, &rec->backrefs, list) {
7410 /* Don't care about full backrefs (poor unloved backrefs) */
7411 if (back->full_backref || !back->is_data)
7414 dback = to_data_backref(back);
7416 /* We found this one, we don't need to do a lookup */
7417 if (dback->found_ref)
7420 key.objectid = dback->root;
7421 key.type = BTRFS_ROOT_ITEM_KEY;
7422 key.offset = (u64)-1;
7424 root = btrfs_read_fs_root(info, &key);
7426 /* No root, definitely a bad ref, skip */
7427 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7429 /* Other err, exit */
7431 return PTR_ERR(root);
7433 key.objectid = dback->owner;
7434 key.type = BTRFS_EXTENT_DATA_KEY;
7435 key.offset = dback->offset;
7436 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7438 btrfs_release_path(path);
7441 /* Didn't find it, we can carry on */
7446 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7447 struct btrfs_file_extent_item);
7448 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7449 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7450 btrfs_release_path(path);
7451 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7453 struct extent_record *tmp;
7454 tmp = container_of(cache, struct extent_record, cache);
7457 * If we found an extent record for the bytenr for this
7458 * particular backref then we can't add it to our
7459 * current extent record. We only want to add backrefs
7460 * that don't have a corresponding extent item in the
7461 * extent tree since they likely belong to this record
7462 * and we need to fix it if it doesn't match bytenrs.
7468 dback->found_ref += 1;
7469 dback->disk_bytenr = bytenr;
7470 dback->bytes = bytes;
7473 * Set this so the verify backref code knows not to trust the
7474 * values in this backref.
7483 * Record orphan data ref into corresponding root.
7485 * Return 0 if the extent item contains data ref and recorded.
7486 * Return 1 if the extent item contains no useful data ref
7487 * On that case, it may contains only shared_dataref or metadata backref
7488 * or the file extent exists(this should be handled by the extent bytenr
7490 * Return <0 if something goes wrong.
7492 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7493 struct extent_record *rec)
7495 struct btrfs_key key;
7496 struct btrfs_root *dest_root;
7497 struct extent_backref *back;
7498 struct data_backref *dback;
7499 struct orphan_data_extent *orphan;
7500 struct btrfs_path *path;
7501 int recorded_data_ref = 0;
7506 path = btrfs_alloc_path();
7509 list_for_each_entry(back, &rec->backrefs, list) {
7510 if (back->full_backref || !back->is_data ||
7511 !back->found_extent_tree)
7513 dback = to_data_backref(back);
7514 if (dback->found_ref)
7516 key.objectid = dback->root;
7517 key.type = BTRFS_ROOT_ITEM_KEY;
7518 key.offset = (u64)-1;
7520 dest_root = btrfs_read_fs_root(fs_info, &key);
7522 /* For non-exist root we just skip it */
7523 if (IS_ERR(dest_root) || !dest_root)
7526 key.objectid = dback->owner;
7527 key.type = BTRFS_EXTENT_DATA_KEY;
7528 key.offset = dback->offset;
7530 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7532 * For ret < 0, it's OK since the fs-tree may be corrupted,
7533 * we need to record it for inode/file extent rebuild.
7534 * For ret > 0, we record it only for file extent rebuild.
7535 * For ret == 0, the file extent exists but only bytenr
7536 * mismatch, let the original bytenr fix routine to handle,
7542 orphan = malloc(sizeof(*orphan));
7547 INIT_LIST_HEAD(&orphan->list);
7548 orphan->root = dback->root;
7549 orphan->objectid = dback->owner;
7550 orphan->offset = dback->offset;
7551 orphan->disk_bytenr = rec->cache.start;
7552 orphan->disk_len = rec->cache.size;
7553 list_add(&dest_root->orphan_data_extents, &orphan->list);
7554 recorded_data_ref = 1;
7557 btrfs_free_path(path);
7559 return !recorded_data_ref;
7565 * when an incorrect extent item is found, this will delete
7566 * all of the existing entries for it and recreate them
7567 * based on what the tree scan found.
7569 static int fixup_extent_refs(struct btrfs_fs_info *info,
7570 struct cache_tree *extent_cache,
7571 struct extent_record *rec)
7573 struct btrfs_trans_handle *trans = NULL;
7575 struct btrfs_path *path;
7576 struct list_head *cur = rec->backrefs.next;
7577 struct cache_extent *cache;
7578 struct extent_backref *back;
7582 if (rec->flag_block_full_backref)
7583 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7585 path = btrfs_alloc_path();
7589 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7591 * Sometimes the backrefs themselves are so broken they don't
7592 * get attached to any meaningful rec, so first go back and
7593 * check any of our backrefs that we couldn't find and throw
7594 * them into the list if we find the backref so that
7595 * verify_backrefs can figure out what to do.
7597 ret = find_possible_backrefs(info, path, extent_cache, rec);
7602 /* step one, make sure all of the backrefs agree */
7603 ret = verify_backrefs(info, path, rec);
7607 trans = btrfs_start_transaction(info->extent_root, 1);
7608 if (IS_ERR(trans)) {
7609 ret = PTR_ERR(trans);
7613 /* step two, delete all the existing records */
7614 ret = delete_extent_records(trans, info->extent_root, path,
7615 rec->start, rec->max_size);
7620 /* was this block corrupt? If so, don't add references to it */
7621 cache = lookup_cache_extent(info->corrupt_blocks,
7622 rec->start, rec->max_size);
7628 /* step three, recreate all the refs we did find */
7629 while(cur != &rec->backrefs) {
7630 back = to_extent_backref(cur);
7634 * if we didn't find any references, don't create a
7637 if (!back->found_ref)
7640 rec->bad_full_backref = 0;
7641 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7649 int err = btrfs_commit_transaction(trans, info->extent_root);
7654 btrfs_free_path(path);
7658 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7659 struct extent_record *rec)
7661 struct btrfs_trans_handle *trans;
7662 struct btrfs_root *root = fs_info->extent_root;
7663 struct btrfs_path *path;
7664 struct btrfs_extent_item *ei;
7665 struct btrfs_key key;
7669 key.objectid = rec->start;
7670 if (rec->metadata) {
7671 key.type = BTRFS_METADATA_ITEM_KEY;
7672 key.offset = rec->info_level;
7674 key.type = BTRFS_EXTENT_ITEM_KEY;
7675 key.offset = rec->max_size;
7678 path = btrfs_alloc_path();
7682 trans = btrfs_start_transaction(root, 0);
7683 if (IS_ERR(trans)) {
7684 btrfs_free_path(path);
7685 return PTR_ERR(trans);
7688 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7690 btrfs_free_path(path);
7691 btrfs_commit_transaction(trans, root);
7694 fprintf(stderr, "Didn't find extent for %llu\n",
7695 (unsigned long long)rec->start);
7696 btrfs_free_path(path);
7697 btrfs_commit_transaction(trans, root);
7701 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7702 struct btrfs_extent_item);
7703 flags = btrfs_extent_flags(path->nodes[0], ei);
7704 if (rec->flag_block_full_backref) {
7705 fprintf(stderr, "setting full backref on %llu\n",
7706 (unsigned long long)key.objectid);
7707 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7709 fprintf(stderr, "clearing full backref on %llu\n",
7710 (unsigned long long)key.objectid);
7711 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7713 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7714 btrfs_mark_buffer_dirty(path->nodes[0]);
7715 btrfs_free_path(path);
7716 return btrfs_commit_transaction(trans, root);
7719 /* right now we only prune from the extent allocation tree */
7720 static int prune_one_block(struct btrfs_trans_handle *trans,
7721 struct btrfs_fs_info *info,
7722 struct btrfs_corrupt_block *corrupt)
7725 struct btrfs_path path;
7726 struct extent_buffer *eb;
7730 int level = corrupt->level + 1;
7732 btrfs_init_path(&path);
7734 /* we want to stop at the parent to our busted block */
7735 path.lowest_level = level;
7737 ret = btrfs_search_slot(trans, info->extent_root,
7738 &corrupt->key, &path, -1, 1);
7743 eb = path.nodes[level];
7750 * hopefully the search gave us the block we want to prune,
7751 * lets try that first
7753 slot = path.slots[level];
7754 found = btrfs_node_blockptr(eb, slot);
7755 if (found == corrupt->cache.start)
7758 nritems = btrfs_header_nritems(eb);
7760 /* the search failed, lets scan this node and hope we find it */
7761 for (slot = 0; slot < nritems; slot++) {
7762 found = btrfs_node_blockptr(eb, slot);
7763 if (found == corrupt->cache.start)
7767 * we couldn't find the bad block. TODO, search all the nodes for pointers
7770 if (eb == info->extent_root->node) {
7775 btrfs_release_path(&path);
7780 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7781 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7784 btrfs_release_path(&path);
7788 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7790 struct btrfs_trans_handle *trans = NULL;
7791 struct cache_extent *cache;
7792 struct btrfs_corrupt_block *corrupt;
7795 cache = search_cache_extent(info->corrupt_blocks, 0);
7799 trans = btrfs_start_transaction(info->extent_root, 1);
7801 return PTR_ERR(trans);
7803 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7804 prune_one_block(trans, info, corrupt);
7805 remove_cache_extent(info->corrupt_blocks, cache);
7808 return btrfs_commit_transaction(trans, info->extent_root);
7812 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7814 struct btrfs_block_group_cache *cache;
7819 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7820 &start, &end, EXTENT_DIRTY);
7823 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7829 cache = btrfs_lookup_first_block_group(fs_info, start);
7834 start = cache->key.objectid + cache->key.offset;
7838 static int check_extent_refs(struct btrfs_root *root,
7839 struct cache_tree *extent_cache)
7841 struct extent_record *rec;
7842 struct cache_extent *cache;
7851 * if we're doing a repair, we have to make sure
7852 * we don't allocate from the problem extents.
7853 * In the worst case, this will be all the
7856 cache = search_cache_extent(extent_cache, 0);
7858 rec = container_of(cache, struct extent_record, cache);
7859 set_extent_dirty(root->fs_info->excluded_extents,
7861 rec->start + rec->max_size - 1,
7863 cache = next_cache_extent(cache);
7866 /* pin down all the corrupted blocks too */
7867 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7869 set_extent_dirty(root->fs_info->excluded_extents,
7871 cache->start + cache->size - 1,
7873 cache = next_cache_extent(cache);
7875 prune_corrupt_blocks(root->fs_info);
7876 reset_cached_block_groups(root->fs_info);
7879 reset_cached_block_groups(root->fs_info);
7882 * We need to delete any duplicate entries we find first otherwise we
7883 * could mess up the extent tree when we have backrefs that actually
7884 * belong to a different extent item and not the weird duplicate one.
7886 while (repair && !list_empty(&duplicate_extents)) {
7887 rec = to_extent_record(duplicate_extents.next);
7888 list_del_init(&rec->list);
7890 /* Sometimes we can find a backref before we find an actual
7891 * extent, so we need to process it a little bit to see if there
7892 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7893 * if this is a backref screwup. If we need to delete stuff
7894 * process_duplicates() will return 0, otherwise it will return
7897 if (process_duplicates(root, extent_cache, rec))
7899 ret = delete_duplicate_records(root, rec);
7903 * delete_duplicate_records will return the number of entries
7904 * deleted, so if it's greater than 0 then we know we actually
7905 * did something and we need to remove.
7919 cache = search_cache_extent(extent_cache, 0);
7922 rec = container_of(cache, struct extent_record, cache);
7923 if (rec->num_duplicates) {
7924 fprintf(stderr, "extent item %llu has multiple extent "
7925 "items\n", (unsigned long long)rec->start);
7930 if (rec->refs != rec->extent_item_refs) {
7931 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7932 (unsigned long long)rec->start,
7933 (unsigned long long)rec->nr);
7934 fprintf(stderr, "extent item %llu, found %llu\n",
7935 (unsigned long long)rec->extent_item_refs,
7936 (unsigned long long)rec->refs);
7937 ret = record_orphan_data_extents(root->fs_info, rec);
7944 * we can't use the extent to repair file
7945 * extent, let the fallback method handle it.
7947 if (!fixed && repair) {
7948 ret = fixup_extent_refs(
7959 if (all_backpointers_checked(rec, 1)) {
7960 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7961 (unsigned long long)rec->start,
7962 (unsigned long long)rec->nr);
7964 if (!fixed && !recorded && repair) {
7965 ret = fixup_extent_refs(root->fs_info,
7974 if (!rec->owner_ref_checked) {
7975 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7976 (unsigned long long)rec->start,
7977 (unsigned long long)rec->nr);
7978 if (!fixed && !recorded && repair) {
7979 ret = fixup_extent_refs(root->fs_info,
7988 if (rec->bad_full_backref) {
7989 fprintf(stderr, "bad full backref, on [%llu]\n",
7990 (unsigned long long)rec->start);
7992 ret = fixup_extent_flags(root->fs_info, rec);
8001 * Although it's not a extent ref's problem, we reuse this
8002 * routine for error reporting.
8003 * No repair function yet.
8005 if (rec->crossing_stripes) {
8007 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8008 rec->start, rec->start + rec->max_size);
8013 if (rec->wrong_chunk_type) {
8015 "bad extent [%llu, %llu), type mismatch with chunk\n",
8016 rec->start, rec->start + rec->max_size);
8021 remove_cache_extent(extent_cache, cache);
8022 free_all_extent_backrefs(rec);
8023 if (!init_extent_tree && repair && (!cur_err || fixed))
8024 clear_extent_dirty(root->fs_info->excluded_extents,
8026 rec->start + rec->max_size - 1,
8032 if (ret && ret != -EAGAIN) {
8033 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8036 struct btrfs_trans_handle *trans;
8038 root = root->fs_info->extent_root;
8039 trans = btrfs_start_transaction(root, 1);
8040 if (IS_ERR(trans)) {
8041 ret = PTR_ERR(trans);
8045 btrfs_fix_block_accounting(trans, root);
8046 ret = btrfs_commit_transaction(trans, root);
8051 fprintf(stderr, "repaired damaged extent references\n");
8057 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8061 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8062 stripe_size = length;
8063 stripe_size /= num_stripes;
8064 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8065 stripe_size = length * 2;
8066 stripe_size /= num_stripes;
8067 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8068 stripe_size = length;
8069 stripe_size /= (num_stripes - 1);
8070 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8071 stripe_size = length;
8072 stripe_size /= (num_stripes - 2);
8074 stripe_size = length;
8080 * Check the chunk with its block group/dev list ref:
8081 * Return 0 if all refs seems valid.
8082 * Return 1 if part of refs seems valid, need later check for rebuild ref
8083 * like missing block group and needs to search extent tree to rebuild them.
8084 * Return -1 if essential refs are missing and unable to rebuild.
8086 static int check_chunk_refs(struct chunk_record *chunk_rec,
8087 struct block_group_tree *block_group_cache,
8088 struct device_extent_tree *dev_extent_cache,
8091 struct cache_extent *block_group_item;
8092 struct block_group_record *block_group_rec;
8093 struct cache_extent *dev_extent_item;
8094 struct device_extent_record *dev_extent_rec;
8098 int metadump_v2 = 0;
8102 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8105 if (block_group_item) {
8106 block_group_rec = container_of(block_group_item,
8107 struct block_group_record,
8109 if (chunk_rec->length != block_group_rec->offset ||
8110 chunk_rec->offset != block_group_rec->objectid ||
8112 chunk_rec->type_flags != block_group_rec->flags)) {
8115 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8116 chunk_rec->objectid,
8121 chunk_rec->type_flags,
8122 block_group_rec->objectid,
8123 block_group_rec->type,
8124 block_group_rec->offset,
8125 block_group_rec->offset,
8126 block_group_rec->objectid,
8127 block_group_rec->flags);
8130 list_del_init(&block_group_rec->list);
8131 chunk_rec->bg_rec = block_group_rec;
8136 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8137 chunk_rec->objectid,
8142 chunk_rec->type_flags);
8149 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8150 chunk_rec->num_stripes);
8151 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8152 devid = chunk_rec->stripes[i].devid;
8153 offset = chunk_rec->stripes[i].offset;
8154 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8155 devid, offset, length);
8156 if (dev_extent_item) {
8157 dev_extent_rec = container_of(dev_extent_item,
8158 struct device_extent_record,
8160 if (dev_extent_rec->objectid != devid ||
8161 dev_extent_rec->offset != offset ||
8162 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8163 dev_extent_rec->length != length) {
8166 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8167 chunk_rec->objectid,
8170 chunk_rec->stripes[i].devid,
8171 chunk_rec->stripes[i].offset,
8172 dev_extent_rec->objectid,
8173 dev_extent_rec->offset,
8174 dev_extent_rec->length);
8177 list_move(&dev_extent_rec->chunk_list,
8178 &chunk_rec->dextents);
8183 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8184 chunk_rec->objectid,
8187 chunk_rec->stripes[i].devid,
8188 chunk_rec->stripes[i].offset);
8195 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8196 int check_chunks(struct cache_tree *chunk_cache,
8197 struct block_group_tree *block_group_cache,
8198 struct device_extent_tree *dev_extent_cache,
8199 struct list_head *good, struct list_head *bad,
8200 struct list_head *rebuild, int silent)
8202 struct cache_extent *chunk_item;
8203 struct chunk_record *chunk_rec;
8204 struct block_group_record *bg_rec;
8205 struct device_extent_record *dext_rec;
8209 chunk_item = first_cache_extent(chunk_cache);
8210 while (chunk_item) {
8211 chunk_rec = container_of(chunk_item, struct chunk_record,
8213 err = check_chunk_refs(chunk_rec, block_group_cache,
8214 dev_extent_cache, silent);
8217 if (err == 0 && good)
8218 list_add_tail(&chunk_rec->list, good);
8219 if (err > 0 && rebuild)
8220 list_add_tail(&chunk_rec->list, rebuild);
8222 list_add_tail(&chunk_rec->list, bad);
8223 chunk_item = next_cache_extent(chunk_item);
8226 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8229 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8237 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8241 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8252 static int check_device_used(struct device_record *dev_rec,
8253 struct device_extent_tree *dext_cache)
8255 struct cache_extent *cache;
8256 struct device_extent_record *dev_extent_rec;
8259 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8261 dev_extent_rec = container_of(cache,
8262 struct device_extent_record,
8264 if (dev_extent_rec->objectid != dev_rec->devid)
8267 list_del_init(&dev_extent_rec->device_list);
8268 total_byte += dev_extent_rec->length;
8269 cache = next_cache_extent(cache);
8272 if (total_byte != dev_rec->byte_used) {
8274 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8275 total_byte, dev_rec->byte_used, dev_rec->objectid,
8276 dev_rec->type, dev_rec->offset);
8283 /* check btrfs_dev_item -> btrfs_dev_extent */
8284 static int check_devices(struct rb_root *dev_cache,
8285 struct device_extent_tree *dev_extent_cache)
8287 struct rb_node *dev_node;
8288 struct device_record *dev_rec;
8289 struct device_extent_record *dext_rec;
8293 dev_node = rb_first(dev_cache);
8295 dev_rec = container_of(dev_node, struct device_record, node);
8296 err = check_device_used(dev_rec, dev_extent_cache);
8300 dev_node = rb_next(dev_node);
8302 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8305 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8306 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8313 static int add_root_item_to_list(struct list_head *head,
8314 u64 objectid, u64 bytenr, u64 last_snapshot,
8315 u8 level, u8 drop_level,
8316 int level_size, struct btrfs_key *drop_key)
8319 struct root_item_record *ri_rec;
8320 ri_rec = malloc(sizeof(*ri_rec));
8323 ri_rec->bytenr = bytenr;
8324 ri_rec->objectid = objectid;
8325 ri_rec->level = level;
8326 ri_rec->level_size = level_size;
8327 ri_rec->drop_level = drop_level;
8328 ri_rec->last_snapshot = last_snapshot;
8330 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8331 list_add_tail(&ri_rec->list, head);
8336 static void free_root_item_list(struct list_head *list)
8338 struct root_item_record *ri_rec;
8340 while (!list_empty(list)) {
8341 ri_rec = list_first_entry(list, struct root_item_record,
8343 list_del_init(&ri_rec->list);
8348 static int deal_root_from_list(struct list_head *list,
8349 struct btrfs_root *root,
8350 struct block_info *bits,
8352 struct cache_tree *pending,
8353 struct cache_tree *seen,
8354 struct cache_tree *reada,
8355 struct cache_tree *nodes,
8356 struct cache_tree *extent_cache,
8357 struct cache_tree *chunk_cache,
8358 struct rb_root *dev_cache,
8359 struct block_group_tree *block_group_cache,
8360 struct device_extent_tree *dev_extent_cache)
8365 while (!list_empty(list)) {
8366 struct root_item_record *rec;
8367 struct extent_buffer *buf;
8368 rec = list_entry(list->next,
8369 struct root_item_record, list);
8371 buf = read_tree_block(root->fs_info->tree_root,
8372 rec->bytenr, rec->level_size, 0);
8373 if (!extent_buffer_uptodate(buf)) {
8374 free_extent_buffer(buf);
8378 ret = add_root_to_pending(buf, extent_cache, pending,
8379 seen, nodes, rec->objectid);
8383 * To rebuild extent tree, we need deal with snapshot
8384 * one by one, otherwise we deal with node firstly which
8385 * can maximize readahead.
8388 ret = run_next_block(root, bits, bits_nr, &last,
8389 pending, seen, reada, nodes,
8390 extent_cache, chunk_cache,
8391 dev_cache, block_group_cache,
8392 dev_extent_cache, rec);
8396 free_extent_buffer(buf);
8397 list_del(&rec->list);
8403 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8404 reada, nodes, extent_cache, chunk_cache,
8405 dev_cache, block_group_cache,
8406 dev_extent_cache, NULL);
8416 static int check_chunks_and_extents(struct btrfs_root *root)
8418 struct rb_root dev_cache;
8419 struct cache_tree chunk_cache;
8420 struct block_group_tree block_group_cache;
8421 struct device_extent_tree dev_extent_cache;
8422 struct cache_tree extent_cache;
8423 struct cache_tree seen;
8424 struct cache_tree pending;
8425 struct cache_tree reada;
8426 struct cache_tree nodes;
8427 struct extent_io_tree excluded_extents;
8428 struct cache_tree corrupt_blocks;
8429 struct btrfs_path path;
8430 struct btrfs_key key;
8431 struct btrfs_key found_key;
8433 struct block_info *bits;
8435 struct extent_buffer *leaf;
8437 struct btrfs_root_item ri;
8438 struct list_head dropping_trees;
8439 struct list_head normal_trees;
8440 struct btrfs_root *root1;
8445 dev_cache = RB_ROOT;
8446 cache_tree_init(&chunk_cache);
8447 block_group_tree_init(&block_group_cache);
8448 device_extent_tree_init(&dev_extent_cache);
8450 cache_tree_init(&extent_cache);
8451 cache_tree_init(&seen);
8452 cache_tree_init(&pending);
8453 cache_tree_init(&nodes);
8454 cache_tree_init(&reada);
8455 cache_tree_init(&corrupt_blocks);
8456 extent_io_tree_init(&excluded_extents);
8457 INIT_LIST_HEAD(&dropping_trees);
8458 INIT_LIST_HEAD(&normal_trees);
8461 root->fs_info->excluded_extents = &excluded_extents;
8462 root->fs_info->fsck_extent_cache = &extent_cache;
8463 root->fs_info->free_extent_hook = free_extent_hook;
8464 root->fs_info->corrupt_blocks = &corrupt_blocks;
8468 bits = malloc(bits_nr * sizeof(struct block_info));
8474 if (ctx.progress_enabled) {
8475 ctx.tp = TASK_EXTENTS;
8476 task_start(ctx.info);
8480 root1 = root->fs_info->tree_root;
8481 level = btrfs_header_level(root1->node);
8482 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8483 root1->node->start, 0, level, 0,
8484 root1->nodesize, NULL);
8487 root1 = root->fs_info->chunk_root;
8488 level = btrfs_header_level(root1->node);
8489 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8490 root1->node->start, 0, level, 0,
8491 root1->nodesize, NULL);
8494 btrfs_init_path(&path);
8497 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8498 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8503 leaf = path.nodes[0];
8504 slot = path.slots[0];
8505 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8506 ret = btrfs_next_leaf(root, &path);
8509 leaf = path.nodes[0];
8510 slot = path.slots[0];
8512 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8513 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8514 unsigned long offset;
8517 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8518 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8519 last_snapshot = btrfs_root_last_snapshot(&ri);
8520 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8521 level = btrfs_root_level(&ri);
8522 level_size = root->nodesize;
8523 ret = add_root_item_to_list(&normal_trees,
8525 btrfs_root_bytenr(&ri),
8526 last_snapshot, level,
8527 0, level_size, NULL);
8531 level = btrfs_root_level(&ri);
8532 level_size = root->nodesize;
8533 objectid = found_key.objectid;
8534 btrfs_disk_key_to_cpu(&found_key,
8536 ret = add_root_item_to_list(&dropping_trees,
8538 btrfs_root_bytenr(&ri),
8539 last_snapshot, level,
8541 level_size, &found_key);
8548 btrfs_release_path(&path);
8551 * check_block can return -EAGAIN if it fixes something, please keep
8552 * this in mind when dealing with return values from these functions, if
8553 * we get -EAGAIN we want to fall through and restart the loop.
8555 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8556 &seen, &reada, &nodes, &extent_cache,
8557 &chunk_cache, &dev_cache, &block_group_cache,
8564 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8565 &pending, &seen, &reada, &nodes,
8566 &extent_cache, &chunk_cache, &dev_cache,
8567 &block_group_cache, &dev_extent_cache);
8574 ret = check_chunks(&chunk_cache, &block_group_cache,
8575 &dev_extent_cache, NULL, NULL, NULL, 0);
8582 ret = check_extent_refs(root, &extent_cache);
8589 ret = check_devices(&dev_cache, &dev_extent_cache);
8594 task_stop(ctx.info);
8596 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8597 extent_io_tree_cleanup(&excluded_extents);
8598 root->fs_info->fsck_extent_cache = NULL;
8599 root->fs_info->free_extent_hook = NULL;
8600 root->fs_info->corrupt_blocks = NULL;
8601 root->fs_info->excluded_extents = NULL;
8604 free_chunk_cache_tree(&chunk_cache);
8605 free_device_cache_tree(&dev_cache);
8606 free_block_group_tree(&block_group_cache);
8607 free_device_extent_tree(&dev_extent_cache);
8608 free_extent_cache_tree(&seen);
8609 free_extent_cache_tree(&pending);
8610 free_extent_cache_tree(&reada);
8611 free_extent_cache_tree(&nodes);
8614 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8615 free_extent_cache_tree(&seen);
8616 free_extent_cache_tree(&pending);
8617 free_extent_cache_tree(&reada);
8618 free_extent_cache_tree(&nodes);
8619 free_chunk_cache_tree(&chunk_cache);
8620 free_block_group_tree(&block_group_cache);
8621 free_device_cache_tree(&dev_cache);
8622 free_device_extent_tree(&dev_extent_cache);
8623 free_extent_record_cache(root->fs_info, &extent_cache);
8624 free_root_item_list(&normal_trees);
8625 free_root_item_list(&dropping_trees);
8626 extent_io_tree_cleanup(&excluded_extents);
8631 * Check backrefs of a tree block given by @bytenr or @eb.
8633 * @root: the root containing the @bytenr or @eb
8634 * @eb: tree block extent buffer, can be NULL
8635 * @bytenr: bytenr of the tree block to search
8636 * @level: tree level of the tree block
8637 * @owner: owner of the tree block
8639 * Return >0 for any error found and output error message
8640 * Return 0 for no error found
8642 static int check_tree_block_ref(struct btrfs_root *root,
8643 struct extent_buffer *eb, u64 bytenr,
8644 int level, u64 owner)
8646 struct btrfs_key key;
8647 struct btrfs_root *extent_root = root->fs_info->extent_root;
8648 struct btrfs_path path;
8649 struct btrfs_extent_item *ei;
8650 struct btrfs_extent_inline_ref *iref;
8651 struct extent_buffer *leaf;
8657 u32 nodesize = root->nodesize;
8664 btrfs_init_path(&path);
8665 key.objectid = bytenr;
8666 if (btrfs_fs_incompat(root->fs_info,
8667 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8668 key.type = BTRFS_METADATA_ITEM_KEY;
8670 key.type = BTRFS_EXTENT_ITEM_KEY;
8671 key.offset = (u64)-1;
8673 /* Search for the backref in extent tree */
8674 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8676 err |= BACKREF_MISSING;
8679 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8681 err |= BACKREF_MISSING;
8685 leaf = path.nodes[0];
8686 slot = path.slots[0];
8687 btrfs_item_key_to_cpu(leaf, &key, slot);
8689 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8691 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8692 skinny_level = (int)key.offset;
8693 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8695 struct btrfs_tree_block_info *info;
8697 info = (struct btrfs_tree_block_info *)(ei + 1);
8698 skinny_level = btrfs_tree_block_level(leaf, info);
8699 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8706 if (!(btrfs_extent_flags(leaf, ei) &
8707 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8709 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8710 key.objectid, nodesize,
8711 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8712 err = BACKREF_MISMATCH;
8714 header_gen = btrfs_header_generation(eb);
8715 extent_gen = btrfs_extent_generation(leaf, ei);
8716 if (header_gen != extent_gen) {
8718 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8719 key.objectid, nodesize, header_gen,
8721 err = BACKREF_MISMATCH;
8723 if (level != skinny_level) {
8725 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8726 key.objectid, nodesize, level, skinny_level);
8727 err = BACKREF_MISMATCH;
8729 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8731 "extent[%llu %u] is referred by other roots than %llu",
8732 key.objectid, nodesize, root->objectid);
8733 err = BACKREF_MISMATCH;
8738 * Iterate the extent/metadata item to find the exact backref
8740 item_size = btrfs_item_size_nr(leaf, slot);
8741 ptr = (unsigned long)iref;
8742 end = (unsigned long)ei + item_size;
8744 iref = (struct btrfs_extent_inline_ref *)ptr;
8745 type = btrfs_extent_inline_ref_type(leaf, iref);
8746 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8748 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8749 (offset == root->objectid || offset == owner)) {
8751 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8752 /* Check if the backref points to valid referencer */
8753 found_ref = !check_tree_block_ref(root, NULL, offset,
8759 ptr += btrfs_extent_inline_ref_size(type);
8763 * Inlined extent item doesn't have what we need, check
8764 * TREE_BLOCK_REF_KEY
8767 btrfs_release_path(&path);
8768 key.objectid = bytenr;
8769 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8770 key.offset = root->objectid;
8772 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8777 err |= BACKREF_MISSING;
8779 btrfs_release_path(&path);
8780 if (eb && (err & BACKREF_MISSING))
8781 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8782 bytenr, nodesize, owner, level);
8787 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8789 * Return >0 any error found and output error message
8790 * Return 0 for no error found
8792 static int check_extent_data_item(struct btrfs_root *root,
8793 struct extent_buffer *eb, int slot)
8795 struct btrfs_file_extent_item *fi;
8796 struct btrfs_path path;
8797 struct btrfs_root *extent_root = root->fs_info->extent_root;
8798 struct btrfs_key fi_key;
8799 struct btrfs_key dbref_key;
8800 struct extent_buffer *leaf;
8801 struct btrfs_extent_item *ei;
8802 struct btrfs_extent_inline_ref *iref;
8803 struct btrfs_extent_data_ref *dref;
8805 u64 file_extent_gen;
8808 u64 extent_num_bytes;
8816 int found_dbackref = 0;
8820 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8821 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8822 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8824 /* Nothing to check for hole and inline data extents */
8825 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8826 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8829 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8830 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8831 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8833 /* Check unaligned disk_num_bytes and num_bytes */
8834 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8836 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8837 fi_key.objectid, fi_key.offset, disk_num_bytes,
8839 err |= BYTES_UNALIGNED;
8841 data_bytes_allocated += disk_num_bytes;
8843 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8845 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8846 fi_key.objectid, fi_key.offset, extent_num_bytes,
8848 err |= BYTES_UNALIGNED;
8850 data_bytes_referenced += extent_num_bytes;
8852 owner = btrfs_header_owner(eb);
8854 /* Check the extent item of the file extent in extent tree */
8855 btrfs_init_path(&path);
8856 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8857 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8858 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8860 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8862 err |= BACKREF_MISSING;
8866 leaf = path.nodes[0];
8867 slot = path.slots[0];
8868 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8870 extent_flags = btrfs_extent_flags(leaf, ei);
8871 extent_gen = btrfs_extent_generation(leaf, ei);
8873 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8875 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8876 disk_bytenr, disk_num_bytes,
8877 BTRFS_EXTENT_FLAG_DATA);
8878 err |= BACKREF_MISMATCH;
8881 if (file_extent_gen < extent_gen) {
8883 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8884 disk_bytenr, disk_num_bytes, file_extent_gen,
8886 err |= BACKREF_MISMATCH;
8889 /* Check data backref inside that extent item */
8890 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8891 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8892 ptr = (unsigned long)iref;
8893 end = (unsigned long)ei + item_size;
8895 iref = (struct btrfs_extent_inline_ref *)ptr;
8896 type = btrfs_extent_inline_ref_type(leaf, iref);
8897 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8899 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8900 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8901 if (ref_root == owner || ref_root == root->objectid)
8903 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8904 found_dbackref = !check_tree_block_ref(root, NULL,
8905 btrfs_extent_inline_ref_offset(leaf, iref),
8911 ptr += btrfs_extent_inline_ref_size(type);
8914 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8915 if (!found_dbackref) {
8916 btrfs_release_path(&path);
8918 btrfs_init_path(&path);
8919 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8920 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8921 dbref_key.offset = hash_extent_data_ref(root->objectid,
8922 fi_key.objectid, fi_key.offset);
8924 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8925 &dbref_key, &path, 0, 0);
8930 if (!found_dbackref)
8931 err |= BACKREF_MISSING;
8933 btrfs_release_path(&path);
8934 if (err & BACKREF_MISSING) {
8935 error("data extent[%llu %llu] backref lost",
8936 disk_bytenr, disk_num_bytes);
8942 * Get real tree block level for the case like shared block
8943 * Return >= 0 as tree level
8944 * Return <0 for error
8946 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8948 struct extent_buffer *eb;
8949 struct btrfs_path path;
8950 struct btrfs_key key;
8951 struct btrfs_extent_item *ei;
8954 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8959 /* Search extent tree for extent generation and level */
8960 key.objectid = bytenr;
8961 key.type = BTRFS_METADATA_ITEM_KEY;
8962 key.offset = (u64)-1;
8964 btrfs_init_path(&path);
8965 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8968 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8976 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8977 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8978 struct btrfs_extent_item);
8979 flags = btrfs_extent_flags(path.nodes[0], ei);
8980 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8985 /* Get transid for later read_tree_block() check */
8986 transid = btrfs_extent_generation(path.nodes[0], ei);
8988 /* Get backref level as one source */
8989 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8990 backref_level = key.offset;
8992 struct btrfs_tree_block_info *info;
8994 info = (struct btrfs_tree_block_info *)(ei + 1);
8995 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8997 btrfs_release_path(&path);
8999 /* Get level from tree block as an alternative source */
9000 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9001 if (!extent_buffer_uptodate(eb)) {
9002 free_extent_buffer(eb);
9005 header_level = btrfs_header_level(eb);
9006 free_extent_buffer(eb);
9008 if (header_level != backref_level)
9010 return header_level;
9013 btrfs_release_path(&path);
9018 * Check if a tree block backref is valid (points to a valid tree block)
9019 * if level == -1, level will be resolved
9020 * Return >0 for any error found and print error message
9022 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9023 u64 bytenr, int level)
9025 struct btrfs_root *root;
9026 struct btrfs_key key;
9027 struct btrfs_path path;
9028 struct extent_buffer *eb;
9029 struct extent_buffer *node;
9030 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9034 /* Query level for level == -1 special case */
9036 level = query_tree_block_level(fs_info, bytenr);
9038 err |= REFERENCER_MISSING;
9042 key.objectid = root_id;
9043 key.type = BTRFS_ROOT_ITEM_KEY;
9044 key.offset = (u64)-1;
9046 root = btrfs_read_fs_root(fs_info, &key);
9048 err |= REFERENCER_MISSING;
9052 /* Read out the tree block to get item/node key */
9053 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9054 if (!extent_buffer_uptodate(eb)) {
9055 err |= REFERENCER_MISSING;
9056 free_extent_buffer(eb);
9060 /* Empty tree, no need to check key */
9061 if (!btrfs_header_nritems(eb) && !level) {
9062 free_extent_buffer(eb);
9067 btrfs_node_key_to_cpu(eb, &key, 0);
9069 btrfs_item_key_to_cpu(eb, &key, 0);
9071 free_extent_buffer(eb);
9073 btrfs_init_path(&path);
9074 path.lowest_level = level;
9075 /* Search with the first key, to ensure we can reach it */
9076 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9078 err |= REFERENCER_MISSING;
9082 node = path.nodes[level];
9083 if (btrfs_header_bytenr(node) != bytenr) {
9085 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9086 bytenr, nodesize, bytenr,
9087 btrfs_header_bytenr(node));
9088 err |= REFERENCER_MISMATCH;
9090 if (btrfs_header_level(node) != level) {
9092 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9093 bytenr, nodesize, level,
9094 btrfs_header_level(node));
9095 err |= REFERENCER_MISMATCH;
9099 btrfs_release_path(&path);
9101 if (err & REFERENCER_MISSING) {
9103 error("extent [%llu %d] lost referencer (owner: %llu)",
9104 bytenr, nodesize, root_id);
9107 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9108 bytenr, nodesize, root_id, level);
9115 * Check referencer for shared block backref
9116 * If level == -1, this function will resolve the level.
9118 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9119 u64 parent, u64 bytenr, int level)
9121 struct extent_buffer *eb;
9122 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9124 int found_parent = 0;
9127 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9128 if (!extent_buffer_uptodate(eb))
9132 level = query_tree_block_level(fs_info, bytenr);
9136 if (level + 1 != btrfs_header_level(eb))
9139 nr = btrfs_header_nritems(eb);
9140 for (i = 0; i < nr; i++) {
9141 if (bytenr == btrfs_node_blockptr(eb, i)) {
9147 free_extent_buffer(eb);
9148 if (!found_parent) {
9150 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9151 bytenr, nodesize, parent, level);
9152 return REFERENCER_MISSING;
9158 * Check referencer for normal (inlined) data ref
9159 * If len == 0, it will be resolved by searching in extent tree
9161 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9162 u64 root_id, u64 objectid, u64 offset,
9163 u64 bytenr, u64 len, u32 count)
9165 struct btrfs_root *root;
9166 struct btrfs_root *extent_root = fs_info->extent_root;
9167 struct btrfs_key key;
9168 struct btrfs_path path;
9169 struct extent_buffer *leaf;
9170 struct btrfs_file_extent_item *fi;
9171 u32 found_count = 0;
9176 key.objectid = bytenr;
9177 key.type = BTRFS_EXTENT_ITEM_KEY;
9178 key.offset = (u64)-1;
9180 btrfs_init_path(&path);
9181 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9184 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9187 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9188 if (key.objectid != bytenr ||
9189 key.type != BTRFS_EXTENT_ITEM_KEY)
9192 btrfs_release_path(&path);
9194 key.objectid = root_id;
9195 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9196 key.offset = (u64)-1;
9197 btrfs_init_path(&path);
9199 root = btrfs_read_fs_root(fs_info, &key);
9203 key.objectid = objectid;
9204 key.type = BTRFS_EXTENT_DATA_KEY;
9206 * It can be nasty as data backref offset is
9207 * file offset - file extent offset, which is smaller or
9208 * equal to original backref offset. The only special case is
9209 * overflow. So we need to special check and do further search.
9211 key.offset = offset & (1ULL << 63) ? 0 : offset;
9213 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9218 * Search afterwards to get correct one
9219 * NOTE: As we must do a comprehensive check on the data backref to
9220 * make sure the dref count also matches, we must iterate all file
9221 * extents for that inode.
9224 leaf = path.nodes[0];
9225 slot = path.slots[0];
9227 btrfs_item_key_to_cpu(leaf, &key, slot);
9228 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9230 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9232 * Except normal disk bytenr and disk num bytes, we still
9233 * need to do extra check on dbackref offset as
9234 * dbackref offset = file_offset - file_extent_offset
9236 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9237 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9238 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9242 ret = btrfs_next_item(root, &path);
9247 btrfs_release_path(&path);
9248 if (found_count != count) {
9250 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9251 bytenr, len, root_id, objectid, offset, count, found_count);
9252 return REFERENCER_MISSING;
9258 * Check if the referencer of a shared data backref exists
9260 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9261 u64 parent, u64 bytenr)
9263 struct extent_buffer *eb;
9264 struct btrfs_key key;
9265 struct btrfs_file_extent_item *fi;
9266 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9268 int found_parent = 0;
9271 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9272 if (!extent_buffer_uptodate(eb))
9275 nr = btrfs_header_nritems(eb);
9276 for (i = 0; i < nr; i++) {
9277 btrfs_item_key_to_cpu(eb, &key, i);
9278 if (key.type != BTRFS_EXTENT_DATA_KEY)
9281 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9282 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9285 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9292 free_extent_buffer(eb);
9293 if (!found_parent) {
9294 error("shared extent %llu referencer lost (parent: %llu)",
9296 return REFERENCER_MISSING;
9302 * This function will check a given extent item, including its backref and
9303 * itself (like crossing stripe boundary and type)
9305 * Since we don't use extent_record anymore, introduce new error bit
9307 static int check_extent_item(struct btrfs_fs_info *fs_info,
9308 struct extent_buffer *eb, int slot)
9310 struct btrfs_extent_item *ei;
9311 struct btrfs_extent_inline_ref *iref;
9312 struct btrfs_extent_data_ref *dref;
9316 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9317 u32 item_size = btrfs_item_size_nr(eb, slot);
9322 struct btrfs_key key;
9326 btrfs_item_key_to_cpu(eb, &key, slot);
9327 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9328 bytes_used += key.offset;
9330 bytes_used += nodesize;
9332 if (item_size < sizeof(*ei)) {
9334 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9335 * old thing when on disk format is still un-determined.
9336 * No need to care about it anymore
9338 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9342 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9343 flags = btrfs_extent_flags(eb, ei);
9345 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9347 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9348 error("bad metadata [%llu, %llu) crossing stripe boundary",
9349 key.objectid, key.objectid + nodesize);
9350 err |= CROSSING_STRIPE_BOUNDARY;
9353 ptr = (unsigned long)(ei + 1);
9355 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9356 /* Old EXTENT_ITEM metadata */
9357 struct btrfs_tree_block_info *info;
9359 info = (struct btrfs_tree_block_info *)ptr;
9360 level = btrfs_tree_block_level(eb, info);
9361 ptr += sizeof(struct btrfs_tree_block_info);
9363 /* New METADATA_ITEM */
9366 end = (unsigned long)ei + item_size;
9369 err |= ITEM_SIZE_MISMATCH;
9373 /* Now check every backref in this extent item */
9375 iref = (struct btrfs_extent_inline_ref *)ptr;
9376 type = btrfs_extent_inline_ref_type(eb, iref);
9377 offset = btrfs_extent_inline_ref_offset(eb, iref);
9379 case BTRFS_TREE_BLOCK_REF_KEY:
9380 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9384 case BTRFS_SHARED_BLOCK_REF_KEY:
9385 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9389 case BTRFS_EXTENT_DATA_REF_KEY:
9390 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9391 ret = check_extent_data_backref(fs_info,
9392 btrfs_extent_data_ref_root(eb, dref),
9393 btrfs_extent_data_ref_objectid(eb, dref),
9394 btrfs_extent_data_ref_offset(eb, dref),
9395 key.objectid, key.offset,
9396 btrfs_extent_data_ref_count(eb, dref));
9399 case BTRFS_SHARED_DATA_REF_KEY:
9400 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9404 error("extent[%llu %d %llu] has unknown ref type: %d",
9405 key.objectid, key.type, key.offset, type);
9406 err |= UNKNOWN_TYPE;
9410 ptr += btrfs_extent_inline_ref_size(type);
9419 * Check if a dev extent item is referred correctly by its chunk
9421 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9422 struct extent_buffer *eb, int slot)
9424 struct btrfs_root *chunk_root = fs_info->chunk_root;
9425 struct btrfs_dev_extent *ptr;
9426 struct btrfs_path path;
9427 struct btrfs_key chunk_key;
9428 struct btrfs_key devext_key;
9429 struct btrfs_chunk *chunk;
9430 struct extent_buffer *l;
9434 int found_chunk = 0;
9437 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9438 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9439 length = btrfs_dev_extent_length(eb, ptr);
9441 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9442 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9443 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9445 btrfs_init_path(&path);
9446 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9451 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9452 if (btrfs_chunk_length(l, chunk) != length)
9455 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9456 for (i = 0; i < num_stripes; i++) {
9457 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9458 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9460 if (devid == devext_key.objectid &&
9461 offset == devext_key.offset) {
9467 btrfs_release_path(&path);
9470 "device extent[%llu, %llu, %llu] did not find the related chunk",
9471 devext_key.objectid, devext_key.offset, length);
9472 return REFERENCER_MISSING;
9478 * Check if the used space is correct with the dev item
9480 static int check_dev_item(struct btrfs_fs_info *fs_info,
9481 struct extent_buffer *eb, int slot)
9483 struct btrfs_root *dev_root = fs_info->dev_root;
9484 struct btrfs_dev_item *dev_item;
9485 struct btrfs_path path;
9486 struct btrfs_key key;
9487 struct btrfs_dev_extent *ptr;
9493 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9494 dev_id = btrfs_device_id(eb, dev_item);
9495 used = btrfs_device_bytes_used(eb, dev_item);
9497 key.objectid = dev_id;
9498 key.type = BTRFS_DEV_EXTENT_KEY;
9501 btrfs_init_path(&path);
9502 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9504 btrfs_item_key_to_cpu(eb, &key, slot);
9505 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9506 key.objectid, key.type, key.offset);
9507 btrfs_release_path(&path);
9508 return REFERENCER_MISSING;
9511 /* Iterate dev_extents to calculate the used space of a device */
9513 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9515 if (key.objectid > dev_id)
9517 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9520 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9521 struct btrfs_dev_extent);
9522 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9524 ret = btrfs_next_item(dev_root, &path);
9528 btrfs_release_path(&path);
9530 if (used != total) {
9531 btrfs_item_key_to_cpu(eb, &key, slot);
9533 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9534 total, used, BTRFS_ROOT_TREE_OBJECTID,
9535 BTRFS_DEV_EXTENT_KEY, dev_id);
9536 return ACCOUNTING_MISMATCH;
9542 * Check a block group item with its referener (chunk) and its used space
9543 * with extent/metadata item
9545 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9546 struct extent_buffer *eb, int slot)
9548 struct btrfs_root *extent_root = fs_info->extent_root;
9549 struct btrfs_root *chunk_root = fs_info->chunk_root;
9550 struct btrfs_block_group_item *bi;
9551 struct btrfs_block_group_item bg_item;
9552 struct btrfs_path path;
9553 struct btrfs_key bg_key;
9554 struct btrfs_key chunk_key;
9555 struct btrfs_key extent_key;
9556 struct btrfs_chunk *chunk;
9557 struct extent_buffer *leaf;
9558 struct btrfs_extent_item *ei;
9559 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9567 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9568 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9569 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9570 used = btrfs_block_group_used(&bg_item);
9571 bg_flags = btrfs_block_group_flags(&bg_item);
9573 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9574 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9575 chunk_key.offset = bg_key.objectid;
9577 btrfs_init_path(&path);
9578 /* Search for the referencer chunk */
9579 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9582 "block group[%llu %llu] did not find the related chunk item",
9583 bg_key.objectid, bg_key.offset);
9584 err |= REFERENCER_MISSING;
9586 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9587 struct btrfs_chunk);
9588 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9591 "block group[%llu %llu] related chunk item length does not match",
9592 bg_key.objectid, bg_key.offset);
9593 err |= REFERENCER_MISMATCH;
9596 btrfs_release_path(&path);
9598 /* Search from the block group bytenr */
9599 extent_key.objectid = bg_key.objectid;
9600 extent_key.type = 0;
9601 extent_key.offset = 0;
9603 btrfs_init_path(&path);
9604 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9608 /* Iterate extent tree to account used space */
9610 leaf = path.nodes[0];
9611 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9612 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9615 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9616 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9618 if (extent_key.objectid < bg_key.objectid)
9621 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9624 total += extent_key.offset;
9626 ei = btrfs_item_ptr(leaf, path.slots[0],
9627 struct btrfs_extent_item);
9628 flags = btrfs_extent_flags(leaf, ei);
9629 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9630 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9632 "bad extent[%llu, %llu) type mismatch with chunk",
9633 extent_key.objectid,
9634 extent_key.objectid + extent_key.offset);
9635 err |= CHUNK_TYPE_MISMATCH;
9637 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9638 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9639 BTRFS_BLOCK_GROUP_METADATA))) {
9641 "bad extent[%llu, %llu) type mismatch with chunk",
9642 extent_key.objectid,
9643 extent_key.objectid + nodesize);
9644 err |= CHUNK_TYPE_MISMATCH;
9648 ret = btrfs_next_item(extent_root, &path);
9654 btrfs_release_path(&path);
9656 if (total != used) {
9658 "block group[%llu %llu] used %llu but extent items used %llu",
9659 bg_key.objectid, bg_key.offset, used, total);
9660 err |= ACCOUNTING_MISMATCH;
9666 * Check a chunk item.
9667 * Including checking all referred dev_extents and block group
9669 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9670 struct extent_buffer *eb, int slot)
9672 struct btrfs_root *extent_root = fs_info->extent_root;
9673 struct btrfs_root *dev_root = fs_info->dev_root;
9674 struct btrfs_path path;
9675 struct btrfs_key chunk_key;
9676 struct btrfs_key bg_key;
9677 struct btrfs_key devext_key;
9678 struct btrfs_chunk *chunk;
9679 struct extent_buffer *leaf;
9680 struct btrfs_block_group_item *bi;
9681 struct btrfs_block_group_item bg_item;
9682 struct btrfs_dev_extent *ptr;
9683 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9695 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9696 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9697 length = btrfs_chunk_length(eb, chunk);
9698 chunk_end = chunk_key.offset + length;
9699 if (!IS_ALIGNED(length, sectorsize)) {
9700 error("chunk[%llu %llu) not aligned to %u",
9701 chunk_key.offset, chunk_end, sectorsize);
9702 err |= BYTES_UNALIGNED;
9706 type = btrfs_chunk_type(eb, chunk);
9707 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9708 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9709 error("chunk[%llu %llu) has no chunk type",
9710 chunk_key.offset, chunk_end);
9711 err |= UNKNOWN_TYPE;
9713 if (profile && (profile & (profile - 1))) {
9714 error("chunk[%llu %llu) multiple profiles detected: %llx",
9715 chunk_key.offset, chunk_end, profile);
9716 err |= UNKNOWN_TYPE;
9719 bg_key.objectid = chunk_key.offset;
9720 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9721 bg_key.offset = length;
9723 btrfs_init_path(&path);
9724 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9727 "chunk[%llu %llu) did not find the related block group item",
9728 chunk_key.offset, chunk_end);
9729 err |= REFERENCER_MISSING;
9731 leaf = path.nodes[0];
9732 bi = btrfs_item_ptr(leaf, path.slots[0],
9733 struct btrfs_block_group_item);
9734 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9736 if (btrfs_block_group_flags(&bg_item) != type) {
9738 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9739 chunk_key.offset, chunk_end, type,
9740 btrfs_block_group_flags(&bg_item));
9741 err |= REFERENCER_MISSING;
9745 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9746 for (i = 0; i < num_stripes; i++) {
9747 btrfs_release_path(&path);
9748 btrfs_init_path(&path);
9749 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9750 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9751 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9753 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9758 leaf = path.nodes[0];
9759 ptr = btrfs_item_ptr(leaf, path.slots[0],
9760 struct btrfs_dev_extent);
9761 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9762 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9763 if (objectid != chunk_key.objectid ||
9764 offset != chunk_key.offset ||
9765 btrfs_dev_extent_length(leaf, ptr) != length)
9769 err |= BACKREF_MISSING;
9771 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9772 chunk_key.objectid, chunk_end, i);
9775 btrfs_release_path(&path);
9781 * Main entry function to check known items and update related accounting info
9783 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9785 struct btrfs_fs_info *fs_info = root->fs_info;
9786 struct btrfs_key key;
9789 struct btrfs_extent_data_ref *dref;
9794 btrfs_item_key_to_cpu(eb, &key, slot);
9795 type = btrfs_key_type(&key);
9798 case BTRFS_EXTENT_DATA_KEY:
9799 ret = check_extent_data_item(root, eb, slot);
9802 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9803 ret = check_block_group_item(fs_info, eb, slot);
9806 case BTRFS_DEV_ITEM_KEY:
9807 ret = check_dev_item(fs_info, eb, slot);
9810 case BTRFS_CHUNK_ITEM_KEY:
9811 ret = check_chunk_item(fs_info, eb, slot);
9814 case BTRFS_DEV_EXTENT_KEY:
9815 ret = check_dev_extent_item(fs_info, eb, slot);
9818 case BTRFS_EXTENT_ITEM_KEY:
9819 case BTRFS_METADATA_ITEM_KEY:
9820 ret = check_extent_item(fs_info, eb, slot);
9823 case BTRFS_EXTENT_CSUM_KEY:
9824 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9826 case BTRFS_TREE_BLOCK_REF_KEY:
9827 ret = check_tree_block_backref(fs_info, key.offset,
9831 case BTRFS_EXTENT_DATA_REF_KEY:
9832 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9833 ret = check_extent_data_backref(fs_info,
9834 btrfs_extent_data_ref_root(eb, dref),
9835 btrfs_extent_data_ref_objectid(eb, dref),
9836 btrfs_extent_data_ref_offset(eb, dref),
9838 btrfs_extent_data_ref_count(eb, dref));
9841 case BTRFS_SHARED_BLOCK_REF_KEY:
9842 ret = check_shared_block_backref(fs_info, key.offset,
9846 case BTRFS_SHARED_DATA_REF_KEY:
9847 ret = check_shared_data_backref(fs_info, key.offset,
9855 if (++slot < btrfs_header_nritems(eb))
9862 * Helper function for later fs/subvol tree check. To determine if a tree
9863 * block should be checked.
9864 * This function will ensure only the direct referencer with lowest rootid to
9865 * check a fs/subvolume tree block.
9867 * Backref check at extent tree would detect errors like missing subvolume
9868 * tree, so we can do aggressive check to reduce duplicated checks.
9870 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9872 struct btrfs_root *extent_root = root->fs_info->extent_root;
9873 struct btrfs_key key;
9874 struct btrfs_path path;
9875 struct extent_buffer *leaf;
9877 struct btrfs_extent_item *ei;
9883 struct btrfs_extent_inline_ref *iref;
9886 btrfs_init_path(&path);
9887 key.objectid = btrfs_header_bytenr(eb);
9888 key.type = BTRFS_METADATA_ITEM_KEY;
9889 key.offset = (u64)-1;
9892 * Any failure in backref resolving means we can't determine
9893 * whom the tree block belongs to.
9894 * So in that case, we need to check that tree block
9896 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9900 ret = btrfs_previous_extent_item(extent_root, &path,
9901 btrfs_header_bytenr(eb));
9905 leaf = path.nodes[0];
9906 slot = path.slots[0];
9907 btrfs_item_key_to_cpu(leaf, &key, slot);
9908 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9910 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9911 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9913 struct btrfs_tree_block_info *info;
9915 info = (struct btrfs_tree_block_info *)(ei + 1);
9916 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9919 item_size = btrfs_item_size_nr(leaf, slot);
9920 ptr = (unsigned long)iref;
9921 end = (unsigned long)ei + item_size;
9923 iref = (struct btrfs_extent_inline_ref *)ptr;
9924 type = btrfs_extent_inline_ref_type(leaf, iref);
9925 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9928 * We only check the tree block if current root is
9929 * the lowest referencer of it.
9931 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9932 offset < root->objectid) {
9933 btrfs_release_path(&path);
9937 ptr += btrfs_extent_inline_ref_size(type);
9940 * Normally we should also check keyed tree block ref, but that may be
9941 * very time consuming. Inlined ref should already make us skip a lot
9942 * of refs now. So skip search keyed tree block ref.
9946 btrfs_release_path(&path);
9951 * Traversal function for tree block. We will do:
9952 * 1) Skip shared fs/subvolume tree blocks
9953 * 2) Update related bytes accounting
9954 * 3) Pre-order traversal
9956 static int traverse_tree_block(struct btrfs_root *root,
9957 struct extent_buffer *node)
9959 struct extent_buffer *eb;
9960 struct btrfs_key key;
9961 struct btrfs_key drop_key;
9969 * Skip shared fs/subvolume tree block, in that case they will
9970 * be checked by referencer with lowest rootid
9972 if (is_fstree(root->objectid) && !should_check(root, node))
9975 /* Update bytes accounting */
9976 total_btree_bytes += node->len;
9977 if (fs_root_objectid(btrfs_header_owner(node)))
9978 total_fs_tree_bytes += node->len;
9979 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9980 total_extent_tree_bytes += node->len;
9981 if (!found_old_backref &&
9982 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9983 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9984 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9985 found_old_backref = 1;
9987 /* pre-order tranversal, check itself first */
9988 level = btrfs_header_level(node);
9989 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9990 btrfs_header_level(node),
9991 btrfs_header_owner(node));
9995 "check %s failed root %llu bytenr %llu level %d, force continue check",
9996 level ? "node":"leaf", root->objectid,
9997 btrfs_header_bytenr(node), btrfs_header_level(node));
10000 btree_space_waste += btrfs_leaf_free_space(root, node);
10001 ret = check_leaf_items(root, node);
10006 nr = btrfs_header_nritems(node);
10007 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10008 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10009 sizeof(struct btrfs_key_ptr);
10011 /* Then check all its children */
10012 for (i = 0; i < nr; i++) {
10013 u64 blocknr = btrfs_node_blockptr(node, i);
10015 btrfs_node_key_to_cpu(node, &key, i);
10016 if (level == root->root_item.drop_level &&
10017 is_dropped_key(&key, &drop_key))
10021 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10022 * to call the function itself.
10024 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10025 if (extent_buffer_uptodate(eb)) {
10026 ret = traverse_tree_block(root, eb);
10029 free_extent_buffer(eb);
10036 * Low memory usage version check_chunks_and_extents.
10038 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10040 struct btrfs_path path;
10041 struct btrfs_key key;
10042 struct btrfs_root *root1;
10043 struct btrfs_root *cur_root;
10047 root1 = root->fs_info->chunk_root;
10048 ret = traverse_tree_block(root1, root1->node);
10051 root1 = root->fs_info->tree_root;
10052 ret = traverse_tree_block(root1, root1->node);
10055 btrfs_init_path(&path);
10056 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10058 key.type = BTRFS_ROOT_ITEM_KEY;
10060 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10062 error("cannot find extent treet in tree_root");
10067 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10068 if (key.type != BTRFS_ROOT_ITEM_KEY)
10070 key.offset = (u64)-1;
10072 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10073 if (IS_ERR(cur_root) || !cur_root) {
10074 error("failed to read tree: %lld", key.objectid);
10078 ret = traverse_tree_block(cur_root, cur_root->node);
10082 ret = btrfs_next_item(root1, &path);
10088 btrfs_release_path(&path);
10092 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10093 struct btrfs_root *root, int overwrite)
10095 struct extent_buffer *c;
10096 struct extent_buffer *old = root->node;
10099 struct btrfs_disk_key disk_key = {0,0,0};
10105 extent_buffer_get(c);
10108 c = btrfs_alloc_free_block(trans, root,
10110 root->root_key.objectid,
10111 &disk_key, level, 0, 0);
10114 extent_buffer_get(c);
10118 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10119 btrfs_set_header_level(c, level);
10120 btrfs_set_header_bytenr(c, c->start);
10121 btrfs_set_header_generation(c, trans->transid);
10122 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10123 btrfs_set_header_owner(c, root->root_key.objectid);
10125 write_extent_buffer(c, root->fs_info->fsid,
10126 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10128 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10129 btrfs_header_chunk_tree_uuid(c),
10132 btrfs_mark_buffer_dirty(c);
10134 * this case can happen in the following case:
10136 * 1.overwrite previous root.
10138 * 2.reinit reloc data root, this is because we skip pin
10139 * down reloc data tree before which means we can allocate
10140 * same block bytenr here.
10142 if (old->start == c->start) {
10143 btrfs_set_root_generation(&root->root_item,
10145 root->root_item.level = btrfs_header_level(root->node);
10146 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10147 &root->root_key, &root->root_item);
10149 free_extent_buffer(c);
10153 free_extent_buffer(old);
10155 add_root_to_dirty_list(root);
10159 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10160 struct extent_buffer *eb, int tree_root)
10162 struct extent_buffer *tmp;
10163 struct btrfs_root_item *ri;
10164 struct btrfs_key key;
10167 int level = btrfs_header_level(eb);
10173 * If we have pinned this block before, don't pin it again.
10174 * This can not only avoid forever loop with broken filesystem
10175 * but also give us some speedups.
10177 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10178 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10181 btrfs_pin_extent(fs_info, eb->start, eb->len);
10183 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10184 nritems = btrfs_header_nritems(eb);
10185 for (i = 0; i < nritems; i++) {
10187 btrfs_item_key_to_cpu(eb, &key, i);
10188 if (key.type != BTRFS_ROOT_ITEM_KEY)
10190 /* Skip the extent root and reloc roots */
10191 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10192 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10193 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10195 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10196 bytenr = btrfs_disk_root_bytenr(eb, ri);
10199 * If at any point we start needing the real root we
10200 * will have to build a stump root for the root we are
10201 * in, but for now this doesn't actually use the root so
10202 * just pass in extent_root.
10204 tmp = read_tree_block(fs_info->extent_root, bytenr,
10206 if (!extent_buffer_uptodate(tmp)) {
10207 fprintf(stderr, "Error reading root block\n");
10210 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10211 free_extent_buffer(tmp);
10215 bytenr = btrfs_node_blockptr(eb, i);
10217 /* If we aren't the tree root don't read the block */
10218 if (level == 1 && !tree_root) {
10219 btrfs_pin_extent(fs_info, bytenr, nodesize);
10223 tmp = read_tree_block(fs_info->extent_root, bytenr,
10225 if (!extent_buffer_uptodate(tmp)) {
10226 fprintf(stderr, "Error reading tree block\n");
10229 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10230 free_extent_buffer(tmp);
10239 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10243 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10247 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10250 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10252 struct btrfs_block_group_cache *cache;
10253 struct btrfs_path *path;
10254 struct extent_buffer *leaf;
10255 struct btrfs_chunk *chunk;
10256 struct btrfs_key key;
10260 path = btrfs_alloc_path();
10265 key.type = BTRFS_CHUNK_ITEM_KEY;
10268 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10270 btrfs_free_path(path);
10275 * We do this in case the block groups were screwed up and had alloc
10276 * bits that aren't actually set on the chunks. This happens with
10277 * restored images every time and could happen in real life I guess.
10279 fs_info->avail_data_alloc_bits = 0;
10280 fs_info->avail_metadata_alloc_bits = 0;
10281 fs_info->avail_system_alloc_bits = 0;
10283 /* First we need to create the in-memory block groups */
10285 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10286 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10288 btrfs_free_path(path);
10296 leaf = path->nodes[0];
10297 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10298 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10303 chunk = btrfs_item_ptr(leaf, path->slots[0],
10304 struct btrfs_chunk);
10305 btrfs_add_block_group(fs_info, 0,
10306 btrfs_chunk_type(leaf, chunk),
10307 key.objectid, key.offset,
10308 btrfs_chunk_length(leaf, chunk));
10309 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10310 key.offset + btrfs_chunk_length(leaf, chunk),
10316 cache = btrfs_lookup_first_block_group(fs_info, start);
10320 start = cache->key.objectid + cache->key.offset;
10323 btrfs_free_path(path);
10327 static int reset_balance(struct btrfs_trans_handle *trans,
10328 struct btrfs_fs_info *fs_info)
10330 struct btrfs_root *root = fs_info->tree_root;
10331 struct btrfs_path *path;
10332 struct extent_buffer *leaf;
10333 struct btrfs_key key;
10334 int del_slot, del_nr = 0;
10338 path = btrfs_alloc_path();
10342 key.objectid = BTRFS_BALANCE_OBJECTID;
10343 key.type = BTRFS_BALANCE_ITEM_KEY;
10346 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10351 goto reinit_data_reloc;
10356 ret = btrfs_del_item(trans, root, path);
10359 btrfs_release_path(path);
10361 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10362 key.type = BTRFS_ROOT_ITEM_KEY;
10365 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10369 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10374 ret = btrfs_del_items(trans, root, path,
10381 btrfs_release_path(path);
10384 ret = btrfs_search_slot(trans, root, &key, path,
10391 leaf = path->nodes[0];
10392 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10393 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10395 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10400 del_slot = path->slots[0];
10409 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10413 btrfs_release_path(path);
10416 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10417 key.type = BTRFS_ROOT_ITEM_KEY;
10418 key.offset = (u64)-1;
10419 root = btrfs_read_fs_root(fs_info, &key);
10420 if (IS_ERR(root)) {
10421 fprintf(stderr, "Error reading data reloc tree\n");
10422 ret = PTR_ERR(root);
10425 record_root_in_trans(trans, root);
10426 ret = btrfs_fsck_reinit_root(trans, root, 0);
10429 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10431 btrfs_free_path(path);
10435 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10436 struct btrfs_fs_info *fs_info)
10442 * The only reason we don't do this is because right now we're just
10443 * walking the trees we find and pinning down their bytes, we don't look
10444 * at any of the leaves. In order to do mixed groups we'd have to check
10445 * the leaves of any fs roots and pin down the bytes for any file
10446 * extents we find. Not hard but why do it if we don't have to?
10448 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10449 fprintf(stderr, "We don't support re-initing the extent tree "
10450 "for mixed block groups yet, please notify a btrfs "
10451 "developer you want to do this so they can add this "
10452 "functionality.\n");
10457 * first we need to walk all of the trees except the extent tree and pin
10458 * down the bytes that are in use so we don't overwrite any existing
10461 ret = pin_metadata_blocks(fs_info);
10463 fprintf(stderr, "error pinning down used bytes\n");
10468 * Need to drop all the block groups since we're going to recreate all
10471 btrfs_free_block_groups(fs_info);
10472 ret = reset_block_groups(fs_info);
10474 fprintf(stderr, "error resetting the block groups\n");
10478 /* Ok we can allocate now, reinit the extent root */
10479 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10481 fprintf(stderr, "extent root initialization failed\n");
10483 * When the transaction code is updated we should end the
10484 * transaction, but for now progs only knows about commit so
10485 * just return an error.
10491 * Now we have all the in-memory block groups setup so we can make
10492 * allocations properly, and the metadata we care about is safe since we
10493 * pinned all of it above.
10496 struct btrfs_block_group_cache *cache;
10498 cache = btrfs_lookup_first_block_group(fs_info, start);
10501 start = cache->key.objectid + cache->key.offset;
10502 ret = btrfs_insert_item(trans, fs_info->extent_root,
10503 &cache->key, &cache->item,
10504 sizeof(cache->item));
10506 fprintf(stderr, "Error adding block group\n");
10509 btrfs_extent_post_op(trans, fs_info->extent_root);
10512 ret = reset_balance(trans, fs_info);
10514 fprintf(stderr, "error resetting the pending balance\n");
10519 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10521 struct btrfs_path *path;
10522 struct btrfs_trans_handle *trans;
10523 struct btrfs_key key;
10526 printf("Recowing metadata block %llu\n", eb->start);
10527 key.objectid = btrfs_header_owner(eb);
10528 key.type = BTRFS_ROOT_ITEM_KEY;
10529 key.offset = (u64)-1;
10531 root = btrfs_read_fs_root(root->fs_info, &key);
10532 if (IS_ERR(root)) {
10533 fprintf(stderr, "Couldn't find owner root %llu\n",
10535 return PTR_ERR(root);
10538 path = btrfs_alloc_path();
10542 trans = btrfs_start_transaction(root, 1);
10543 if (IS_ERR(trans)) {
10544 btrfs_free_path(path);
10545 return PTR_ERR(trans);
10548 path->lowest_level = btrfs_header_level(eb);
10549 if (path->lowest_level)
10550 btrfs_node_key_to_cpu(eb, &key, 0);
10552 btrfs_item_key_to_cpu(eb, &key, 0);
10554 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10555 btrfs_commit_transaction(trans, root);
10556 btrfs_free_path(path);
10560 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10562 struct btrfs_path *path;
10563 struct btrfs_trans_handle *trans;
10564 struct btrfs_key key;
10567 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10568 bad->key.type, bad->key.offset);
10569 key.objectid = bad->root_id;
10570 key.type = BTRFS_ROOT_ITEM_KEY;
10571 key.offset = (u64)-1;
10573 root = btrfs_read_fs_root(root->fs_info, &key);
10574 if (IS_ERR(root)) {
10575 fprintf(stderr, "Couldn't find owner root %llu\n",
10577 return PTR_ERR(root);
10580 path = btrfs_alloc_path();
10584 trans = btrfs_start_transaction(root, 1);
10585 if (IS_ERR(trans)) {
10586 btrfs_free_path(path);
10587 return PTR_ERR(trans);
10590 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10596 ret = btrfs_del_item(trans, root, path);
10598 btrfs_commit_transaction(trans, root);
10599 btrfs_free_path(path);
10603 static int zero_log_tree(struct btrfs_root *root)
10605 struct btrfs_trans_handle *trans;
10608 trans = btrfs_start_transaction(root, 1);
10609 if (IS_ERR(trans)) {
10610 ret = PTR_ERR(trans);
10613 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10614 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10615 ret = btrfs_commit_transaction(trans, root);
10619 static int populate_csum(struct btrfs_trans_handle *trans,
10620 struct btrfs_root *csum_root, char *buf, u64 start,
10627 while (offset < len) {
10628 sectorsize = csum_root->sectorsize;
10629 ret = read_extent_data(csum_root, buf, start + offset,
10633 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10634 start + offset, buf, sectorsize);
10637 offset += sectorsize;
10642 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10643 struct btrfs_root *csum_root,
10644 struct btrfs_root *cur_root)
10646 struct btrfs_path *path;
10647 struct btrfs_key key;
10648 struct extent_buffer *node;
10649 struct btrfs_file_extent_item *fi;
10656 path = btrfs_alloc_path();
10659 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10669 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10672 /* Iterate all regular file extents and fill its csum */
10674 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10676 if (key.type != BTRFS_EXTENT_DATA_KEY)
10678 node = path->nodes[0];
10679 slot = path->slots[0];
10680 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10681 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10683 start = btrfs_file_extent_disk_bytenr(node, fi);
10684 len = btrfs_file_extent_disk_num_bytes(node, fi);
10686 ret = populate_csum(trans, csum_root, buf, start, len);
10687 if (ret == -EEXIST)
10693 * TODO: if next leaf is corrupted, jump to nearest next valid
10696 ret = btrfs_next_item(cur_root, path);
10706 btrfs_free_path(path);
10711 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10712 struct btrfs_root *csum_root)
10714 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10715 struct btrfs_path *path;
10716 struct btrfs_root *tree_root = fs_info->tree_root;
10717 struct btrfs_root *cur_root;
10718 struct extent_buffer *node;
10719 struct btrfs_key key;
10723 path = btrfs_alloc_path();
10727 key.objectid = BTRFS_FS_TREE_OBJECTID;
10729 key.type = BTRFS_ROOT_ITEM_KEY;
10731 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10740 node = path->nodes[0];
10741 slot = path->slots[0];
10742 btrfs_item_key_to_cpu(node, &key, slot);
10743 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10745 if (key.type != BTRFS_ROOT_ITEM_KEY)
10747 if (!is_fstree(key.objectid))
10749 key.offset = (u64)-1;
10751 cur_root = btrfs_read_fs_root(fs_info, &key);
10752 if (IS_ERR(cur_root) || !cur_root) {
10753 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10757 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10762 ret = btrfs_next_item(tree_root, path);
10772 btrfs_free_path(path);
10776 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10777 struct btrfs_root *csum_root)
10779 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10780 struct btrfs_path *path;
10781 struct btrfs_extent_item *ei;
10782 struct extent_buffer *leaf;
10784 struct btrfs_key key;
10787 path = btrfs_alloc_path();
10792 key.type = BTRFS_EXTENT_ITEM_KEY;
10795 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10797 btrfs_free_path(path);
10801 buf = malloc(csum_root->sectorsize);
10803 btrfs_free_path(path);
10808 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10809 ret = btrfs_next_leaf(extent_root, path);
10817 leaf = path->nodes[0];
10819 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10820 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10825 ei = btrfs_item_ptr(leaf, path->slots[0],
10826 struct btrfs_extent_item);
10827 if (!(btrfs_extent_flags(leaf, ei) &
10828 BTRFS_EXTENT_FLAG_DATA)) {
10833 ret = populate_csum(trans, csum_root, buf, key.objectid,
10840 btrfs_free_path(path);
10846 * Recalculate the csum and put it into the csum tree.
10848 * Extent tree init will wipe out all the extent info, so in that case, we
10849 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10850 * will use fs/subvol trees to init the csum tree.
10852 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10853 struct btrfs_root *csum_root,
10854 int search_fs_tree)
10856 if (search_fs_tree)
10857 return fill_csum_tree_from_fs(trans, csum_root);
10859 return fill_csum_tree_from_extent(trans, csum_root);
10862 static void free_roots_info_cache(void)
10864 if (!roots_info_cache)
10867 while (!cache_tree_empty(roots_info_cache)) {
10868 struct cache_extent *entry;
10869 struct root_item_info *rii;
10871 entry = first_cache_extent(roots_info_cache);
10874 remove_cache_extent(roots_info_cache, entry);
10875 rii = container_of(entry, struct root_item_info, cache_extent);
10879 free(roots_info_cache);
10880 roots_info_cache = NULL;
10883 static int build_roots_info_cache(struct btrfs_fs_info *info)
10886 struct btrfs_key key;
10887 struct extent_buffer *leaf;
10888 struct btrfs_path *path;
10890 if (!roots_info_cache) {
10891 roots_info_cache = malloc(sizeof(*roots_info_cache));
10892 if (!roots_info_cache)
10894 cache_tree_init(roots_info_cache);
10897 path = btrfs_alloc_path();
10902 key.type = BTRFS_EXTENT_ITEM_KEY;
10905 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10908 leaf = path->nodes[0];
10911 struct btrfs_key found_key;
10912 struct btrfs_extent_item *ei;
10913 struct btrfs_extent_inline_ref *iref;
10914 int slot = path->slots[0];
10919 struct cache_extent *entry;
10920 struct root_item_info *rii;
10922 if (slot >= btrfs_header_nritems(leaf)) {
10923 ret = btrfs_next_leaf(info->extent_root, path);
10930 leaf = path->nodes[0];
10931 slot = path->slots[0];
10934 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10936 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10937 found_key.type != BTRFS_METADATA_ITEM_KEY)
10940 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10941 flags = btrfs_extent_flags(leaf, ei);
10943 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10944 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10947 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10948 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10949 level = found_key.offset;
10951 struct btrfs_tree_block_info *binfo;
10953 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10954 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10955 level = btrfs_tree_block_level(leaf, binfo);
10959 * For a root extent, it must be of the following type and the
10960 * first (and only one) iref in the item.
10962 type = btrfs_extent_inline_ref_type(leaf, iref);
10963 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10966 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10967 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10969 rii = malloc(sizeof(struct root_item_info));
10974 rii->cache_extent.start = root_id;
10975 rii->cache_extent.size = 1;
10976 rii->level = (u8)-1;
10977 entry = &rii->cache_extent;
10978 ret = insert_cache_extent(roots_info_cache, entry);
10981 rii = container_of(entry, struct root_item_info,
10985 ASSERT(rii->cache_extent.start == root_id);
10986 ASSERT(rii->cache_extent.size == 1);
10988 if (level > rii->level || rii->level == (u8)-1) {
10989 rii->level = level;
10990 rii->bytenr = found_key.objectid;
10991 rii->gen = btrfs_extent_generation(leaf, ei);
10992 rii->node_count = 1;
10993 } else if (level == rii->level) {
11001 btrfs_free_path(path);
11006 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11007 struct btrfs_path *path,
11008 const struct btrfs_key *root_key,
11009 const int read_only_mode)
11011 const u64 root_id = root_key->objectid;
11012 struct cache_extent *entry;
11013 struct root_item_info *rii;
11014 struct btrfs_root_item ri;
11015 unsigned long offset;
11017 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11020 "Error: could not find extent items for root %llu\n",
11021 root_key->objectid);
11025 rii = container_of(entry, struct root_item_info, cache_extent);
11026 ASSERT(rii->cache_extent.start == root_id);
11027 ASSERT(rii->cache_extent.size == 1);
11029 if (rii->node_count != 1) {
11031 "Error: could not find btree root extent for root %llu\n",
11036 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11037 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11039 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11040 btrfs_root_level(&ri) != rii->level ||
11041 btrfs_root_generation(&ri) != rii->gen) {
11044 * If we're in repair mode but our caller told us to not update
11045 * the root item, i.e. just check if it needs to be updated, don't
11046 * print this message, since the caller will call us again shortly
11047 * for the same root item without read only mode (the caller will
11048 * open a transaction first).
11050 if (!(read_only_mode && repair))
11052 "%sroot item for root %llu,"
11053 " current bytenr %llu, current gen %llu, current level %u,"
11054 " new bytenr %llu, new gen %llu, new level %u\n",
11055 (read_only_mode ? "" : "fixing "),
11057 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11058 btrfs_root_level(&ri),
11059 rii->bytenr, rii->gen, rii->level);
11061 if (btrfs_root_generation(&ri) > rii->gen) {
11063 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11064 root_id, btrfs_root_generation(&ri), rii->gen);
11068 if (!read_only_mode) {
11069 btrfs_set_root_bytenr(&ri, rii->bytenr);
11070 btrfs_set_root_level(&ri, rii->level);
11071 btrfs_set_root_generation(&ri, rii->gen);
11072 write_extent_buffer(path->nodes[0], &ri,
11073 offset, sizeof(ri));
11083 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11084 * caused read-only snapshots to be corrupted if they were created at a moment
11085 * when the source subvolume/snapshot had orphan items. The issue was that the
11086 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11087 * node instead of the post orphan cleanup root node.
11088 * So this function, and its callees, just detects and fixes those cases. Even
11089 * though the regression was for read-only snapshots, this function applies to
11090 * any snapshot/subvolume root.
11091 * This must be run before any other repair code - not doing it so, makes other
11092 * repair code delete or modify backrefs in the extent tree for example, which
11093 * will result in an inconsistent fs after repairing the root items.
11095 static int repair_root_items(struct btrfs_fs_info *info)
11097 struct btrfs_path *path = NULL;
11098 struct btrfs_key key;
11099 struct extent_buffer *leaf;
11100 struct btrfs_trans_handle *trans = NULL;
11103 int need_trans = 0;
11105 ret = build_roots_info_cache(info);
11109 path = btrfs_alloc_path();
11115 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11116 key.type = BTRFS_ROOT_ITEM_KEY;
11121 * Avoid opening and committing transactions if a leaf doesn't have
11122 * any root items that need to be fixed, so that we avoid rotating
11123 * backup roots unnecessarily.
11126 trans = btrfs_start_transaction(info->tree_root, 1);
11127 if (IS_ERR(trans)) {
11128 ret = PTR_ERR(trans);
11133 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11137 leaf = path->nodes[0];
11140 struct btrfs_key found_key;
11142 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11143 int no_more_keys = find_next_key(path, &key);
11145 btrfs_release_path(path);
11147 ret = btrfs_commit_transaction(trans,
11159 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11161 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11163 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11166 ret = maybe_repair_root_item(info, path, &found_key,
11171 if (!trans && repair) {
11174 btrfs_release_path(path);
11184 free_roots_info_cache();
11185 btrfs_free_path(path);
11187 btrfs_commit_transaction(trans, info->tree_root);
11194 const char * const cmd_check_usage[] = {
11195 "btrfs check [options] <device>",
11196 "Check structural integrity of a filesystem (unmounted).",
11197 "Check structural integrity of an unmounted filesystem. Verify internal",
11198 "trees' consistency and item connectivity. In the repair mode try to",
11199 "fix the problems found. ",
11200 "WARNING: the repair mode is considered dangerous",
11202 "-s|--super <superblock> use this superblock copy",
11203 "-b|--backup use the first valid backup root copy",
11204 "--repair try to repair the filesystem",
11205 "--readonly run in read-only mode (default)",
11206 "--init-csum-tree create a new CRC tree",
11207 "--init-extent-tree create a new extent tree",
11208 "--mode <MODE> select mode, allows to make some memory/IO",
11209 " trade-offs, where MODE is one of:",
11210 " original - read inodes and extents to memory (requires",
11211 " more memory, does less IO)",
11212 " lowmem - try to use less memory but read blocks again",
11214 "--check-data-csum verify checksums of data blocks",
11215 "-Q|--qgroup-report print a report on qgroup consistency",
11216 "-E|--subvol-extents <subvolid>",
11217 " print subvolume extents and sharing state",
11218 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11219 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11220 "-p|--progress indicate progress",
11224 int cmd_check(int argc, char **argv)
11226 struct cache_tree root_cache;
11227 struct btrfs_root *root;
11228 struct btrfs_fs_info *info;
11231 u64 tree_root_bytenr = 0;
11232 u64 chunk_root_bytenr = 0;
11233 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11236 int init_csum_tree = 0;
11238 int qgroup_report = 0;
11239 int qgroups_repaired = 0;
11240 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11244 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11245 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11246 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11248 static const struct option long_options[] = {
11249 { "super", required_argument, NULL, 's' },
11250 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11251 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11252 { "init-csum-tree", no_argument, NULL,
11253 GETOPT_VAL_INIT_CSUM },
11254 { "init-extent-tree", no_argument, NULL,
11255 GETOPT_VAL_INIT_EXTENT },
11256 { "check-data-csum", no_argument, NULL,
11257 GETOPT_VAL_CHECK_CSUM },
11258 { "backup", no_argument, NULL, 'b' },
11259 { "subvol-extents", required_argument, NULL, 'E' },
11260 { "qgroup-report", no_argument, NULL, 'Q' },
11261 { "tree-root", required_argument, NULL, 'r' },
11262 { "chunk-root", required_argument, NULL,
11263 GETOPT_VAL_CHUNK_TREE },
11264 { "progress", no_argument, NULL, 'p' },
11265 { "mode", required_argument, NULL,
11267 { NULL, 0, NULL, 0}
11270 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11274 case 'a': /* ignored */ break;
11276 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11279 num = arg_strtou64(optarg);
11280 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11282 "ERROR: super mirror should be less than: %d\n",
11283 BTRFS_SUPER_MIRROR_MAX);
11286 bytenr = btrfs_sb_offset(((int)num));
11287 printf("using SB copy %llu, bytenr %llu\n", num,
11288 (unsigned long long)bytenr);
11294 subvolid = arg_strtou64(optarg);
11297 tree_root_bytenr = arg_strtou64(optarg);
11299 case GETOPT_VAL_CHUNK_TREE:
11300 chunk_root_bytenr = arg_strtou64(optarg);
11303 ctx.progress_enabled = true;
11307 usage(cmd_check_usage);
11308 case GETOPT_VAL_REPAIR:
11309 printf("enabling repair mode\n");
11311 ctree_flags |= OPEN_CTREE_WRITES;
11313 case GETOPT_VAL_READONLY:
11316 case GETOPT_VAL_INIT_CSUM:
11317 printf("Creating a new CRC tree\n");
11318 init_csum_tree = 1;
11320 ctree_flags |= OPEN_CTREE_WRITES;
11322 case GETOPT_VAL_INIT_EXTENT:
11323 init_extent_tree = 1;
11324 ctree_flags |= (OPEN_CTREE_WRITES |
11325 OPEN_CTREE_NO_BLOCK_GROUPS);
11328 case GETOPT_VAL_CHECK_CSUM:
11329 check_data_csum = 1;
11331 case GETOPT_VAL_MODE:
11332 check_mode = parse_check_mode(optarg);
11333 if (check_mode == CHECK_MODE_UNKNOWN) {
11334 error("unknown mode: %s", optarg);
11341 if (check_argc_exact(argc - optind, 1))
11342 usage(cmd_check_usage);
11344 if (ctx.progress_enabled) {
11345 ctx.tp = TASK_NOTHING;
11346 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11349 /* This check is the only reason for --readonly to exist */
11350 if (readonly && repair) {
11351 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11356 * Not supported yet
11358 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11359 error("Low memory mode doesn't support repair yet");
11364 cache_tree_init(&root_cache);
11366 if((ret = check_mounted(argv[optind])) < 0) {
11367 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11370 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11375 /* only allow partial opening under repair mode */
11377 ctree_flags |= OPEN_CTREE_PARTIAL;
11379 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11380 chunk_root_bytenr, ctree_flags);
11382 fprintf(stderr, "Couldn't open file system\n");
11387 global_info = info;
11388 root = info->fs_root;
11391 * repair mode will force us to commit transaction which
11392 * will make us fail to load log tree when mounting.
11394 if (repair && btrfs_super_log_root(info->super_copy)) {
11395 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11400 ret = zero_log_tree(root);
11402 fprintf(stderr, "fail to zero log tree\n");
11407 uuid_unparse(info->super_copy->fsid, uuidbuf);
11408 if (qgroup_report) {
11409 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11411 ret = qgroup_verify_all(info);
11417 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11418 subvolid, argv[optind], uuidbuf);
11419 ret = print_extent_state(info, subvolid);
11422 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11424 if (!extent_buffer_uptodate(info->tree_root->node) ||
11425 !extent_buffer_uptodate(info->dev_root->node) ||
11426 !extent_buffer_uptodate(info->chunk_root->node)) {
11427 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11432 if (init_extent_tree || init_csum_tree) {
11433 struct btrfs_trans_handle *trans;
11435 trans = btrfs_start_transaction(info->extent_root, 0);
11436 if (IS_ERR(trans)) {
11437 fprintf(stderr, "Error starting transaction\n");
11438 ret = PTR_ERR(trans);
11442 if (init_extent_tree) {
11443 printf("Creating a new extent tree\n");
11444 ret = reinit_extent_tree(trans, info);
11449 if (init_csum_tree) {
11450 fprintf(stderr, "Reinit crc root\n");
11451 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11453 fprintf(stderr, "crc root initialization failed\n");
11458 ret = fill_csum_tree(trans, info->csum_root,
11461 fprintf(stderr, "crc refilling failed\n");
11466 * Ok now we commit and run the normal fsck, which will add
11467 * extent entries for all of the items it finds.
11469 ret = btrfs_commit_transaction(trans, info->extent_root);
11473 if (!extent_buffer_uptodate(info->extent_root->node)) {
11474 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11478 if (!extent_buffer_uptodate(info->csum_root->node)) {
11479 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11484 if (!ctx.progress_enabled)
11485 fprintf(stderr, "checking extents\n");
11486 if (check_mode == CHECK_MODE_LOWMEM)
11487 ret = check_chunks_and_extents_v2(root);
11489 ret = check_chunks_and_extents(root);
11491 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11493 ret = repair_root_items(info);
11497 fprintf(stderr, "Fixed %d roots.\n", ret);
11499 } else if (ret > 0) {
11501 "Found %d roots with an outdated root item.\n",
11504 "Please run a filesystem check with the option --repair to fix them.\n");
11509 if (!ctx.progress_enabled) {
11510 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11511 fprintf(stderr, "checking free space tree\n");
11513 fprintf(stderr, "checking free space cache\n");
11515 ret = check_space_cache(root);
11520 * We used to have to have these hole extents in between our real
11521 * extents so if we don't have this flag set we need to make sure there
11522 * are no gaps in the file extents for inodes, otherwise we can just
11523 * ignore it when this happens.
11525 no_holes = btrfs_fs_incompat(root->fs_info,
11526 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11527 if (!ctx.progress_enabled)
11528 fprintf(stderr, "checking fs roots\n");
11529 ret = check_fs_roots(root, &root_cache);
11533 fprintf(stderr, "checking csums\n");
11534 ret = check_csums(root);
11538 fprintf(stderr, "checking root refs\n");
11539 ret = check_root_refs(root, &root_cache);
11543 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11544 struct extent_buffer *eb;
11546 eb = list_first_entry(&root->fs_info->recow_ebs,
11547 struct extent_buffer, recow);
11548 list_del_init(&eb->recow);
11549 ret = recow_extent_buffer(root, eb);
11554 while (!list_empty(&delete_items)) {
11555 struct bad_item *bad;
11557 bad = list_first_entry(&delete_items, struct bad_item, list);
11558 list_del_init(&bad->list);
11560 ret = delete_bad_item(root, bad);
11564 if (info->quota_enabled) {
11566 fprintf(stderr, "checking quota groups\n");
11567 err = qgroup_verify_all(info);
11571 err = repair_qgroups(info, &qgroups_repaired);
11576 if (!list_empty(&root->fs_info->recow_ebs)) {
11577 fprintf(stderr, "Transid errors in file system\n");
11581 /* Don't override original ret */
11582 if (!ret && qgroups_repaired)
11583 ret = qgroups_repaired;
11585 if (found_old_backref) { /*
11586 * there was a disk format change when mixed
11587 * backref was in testing tree. The old format
11588 * existed about one week.
11590 printf("\n * Found old mixed backref format. "
11591 "The old format is not supported! *"
11592 "\n * Please mount the FS in readonly mode, "
11593 "backup data and re-format the FS. *\n\n");
11596 printf("found %llu bytes used err is %d\n",
11597 (unsigned long long)bytes_used, ret);
11598 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11599 printf("total tree bytes: %llu\n",
11600 (unsigned long long)total_btree_bytes);
11601 printf("total fs tree bytes: %llu\n",
11602 (unsigned long long)total_fs_tree_bytes);
11603 printf("total extent tree bytes: %llu\n",
11604 (unsigned long long)total_extent_tree_bytes);
11605 printf("btree space waste bytes: %llu\n",
11606 (unsigned long long)btree_space_waste);
11607 printf("file data blocks allocated: %llu\n referenced %llu\n",
11608 (unsigned long long)data_bytes_allocated,
11609 (unsigned long long)data_bytes_referenced);
11611 free_qgroup_counts();
11612 free_root_recs_tree(&root_cache);
11616 if (ctx.progress_enabled)
11617 task_deinit(ctx.info);