2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 return list_entry(entry, struct extent_backref, list);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
121 * Much like data_backref, just removed the undetermined members
122 * and change it to use list_head.
123 * During extent scan, it is stored in root->orphan_data_extent.
124 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126 struct orphan_data_extent {
127 struct list_head list;
135 struct tree_backref {
136 struct extent_backref node;
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 return container_of(back, struct tree_backref, node);
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
151 struct extent_record {
152 struct list_head backrefs;
153 struct list_head dups;
154 struct list_head list;
155 struct cache_extent cache;
156 struct btrfs_disk_key parent_key;
161 u64 extent_item_refs;
163 u64 parent_generation;
167 unsigned int flag_block_full_backref:2;
168 unsigned int found_rec:1;
169 unsigned int content_checked:1;
170 unsigned int owner_ref_checked:1;
171 unsigned int is_root:1;
172 unsigned int metadata:1;
173 unsigned int bad_full_backref:1;
174 unsigned int crossing_stripes:1;
175 unsigned int wrong_chunk_type:1;
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 return container_of(entry, struct extent_record, list);
183 struct inode_backref {
184 struct list_head list;
185 unsigned int found_dir_item:1;
186 unsigned int found_dir_index:1;
187 unsigned int found_inode_ref:1;
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 return list_entry(entry, struct inode_backref, list);
202 struct root_item_record {
203 struct list_head list;
210 struct btrfs_key drop_key;
213 #define REF_ERR_NO_DIR_ITEM (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX (1 << 1)
215 #define REF_ERR_NO_INODE_REF (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
218 #define REF_ERR_DUP_INODE_REF (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
227 struct file_extent_hole {
233 struct inode_record {
234 struct list_head backrefs;
235 unsigned int checked:1;
236 unsigned int merging:1;
237 unsigned int found_inode_item:1;
238 unsigned int found_dir_item:1;
239 unsigned int found_file_extent:1;
240 unsigned int found_csum_item:1;
241 unsigned int some_csum_missing:1;
242 unsigned int nodatasum:1;
255 struct rb_root holes;
256 struct list_head orphan_extents;
261 #define I_ERR_NO_INODE_ITEM (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
277 struct root_backref {
278 struct list_head list;
279 unsigned int found_dir_item:1;
280 unsigned int found_dir_index:1;
281 unsigned int found_back_ref:1;
282 unsigned int found_forward_ref:1;
283 unsigned int reachable:1;
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 return list_entry(entry, struct root_backref, list);
298 struct list_head backrefs;
299 struct cache_extent cache;
300 unsigned int found_root_item:1;
306 struct cache_extent cache;
311 struct cache_extent cache;
312 struct cache_tree root_cache;
313 struct cache_tree inode_cache;
314 struct inode_record *current;
323 struct walk_control {
324 struct cache_tree shared;
325 struct shared_node *nodes[BTRFS_MAX_LEVEL];
331 struct btrfs_key key;
333 struct list_head list;
336 struct extent_entry {
341 struct list_head list;
344 struct root_item_info {
345 /* level of the root */
347 /* number of nodes at this level, must be 1 for a root */
351 struct cache_extent cache_extent;
355 * Error bit for low memory mode check.
357 * Currently no caller cares about it yet. Just internal use for error
360 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH (1 << 8)
371 static void *print_status_check(void *p)
373 struct task_ctx *priv = p;
374 const char work_indicator[] = { '.', 'o', 'O', 'o' };
376 static char *task_position_string[] = {
378 "checking free space cache",
382 task_period_start(priv->info, 1000 /* 1s */);
384 if (priv->tp == TASK_NOTHING)
388 printf("%s [%c]\r", task_position_string[priv->tp],
389 work_indicator[count % 4]);
392 task_period_wait(priv->info);
397 static int print_status_return(void *p)
405 static enum btrfs_check_mode parse_check_mode(const char *str)
407 if (strcmp(str, "lowmem") == 0)
408 return CHECK_MODE_LOWMEM;
409 if (strcmp(str, "orig") == 0)
410 return CHECK_MODE_ORIGINAL;
411 if (strcmp(str, "original") == 0)
412 return CHECK_MODE_ORIGINAL;
414 return CHECK_MODE_UNKNOWN;
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
420 struct file_extent_hole *hole;
422 if (RB_EMPTY_ROOT(holes))
425 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 struct file_extent_hole *hole1;
432 struct file_extent_hole *hole2;
434 hole1 = rb_entry(node1, struct file_extent_hole, node);
435 hole2 = rb_entry(node2, struct file_extent_hole, node);
437 if (hole1->start > hole2->start)
439 if (hole1->start < hole2->start)
441 /* Now hole1->start == hole2->start */
442 if (hole1->len >= hole2->len)
444 * Hole 1 will be merge center
445 * Same hole will be merged later
448 /* Hole 2 will be merge center */
453 * Add a hole to the record
455 * This will do hole merge for copy_file_extent_holes(),
456 * which will ensure there won't be continuous holes.
458 static int add_file_extent_hole(struct rb_root *holes,
461 struct file_extent_hole *hole;
462 struct file_extent_hole *prev = NULL;
463 struct file_extent_hole *next = NULL;
465 hole = malloc(sizeof(*hole));
470 /* Since compare will not return 0, no -EEXIST will happen */
471 rb_insert(holes, &hole->node, compare_hole);
473 /* simple merge with previous hole */
474 if (rb_prev(&hole->node))
475 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477 if (prev && prev->start + prev->len >= hole->start) {
478 hole->len = hole->start + hole->len - prev->start;
479 hole->start = prev->start;
480 rb_erase(&prev->node, holes);
485 /* iterate merge with next holes */
487 if (!rb_next(&hole->node))
489 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491 if (hole->start + hole->len >= next->start) {
492 if (hole->start + hole->len <= next->start + next->len)
493 hole->len = next->start + next->len -
495 rb_erase(&next->node, holes);
504 static int compare_hole_range(struct rb_node *node, void *data)
506 struct file_extent_hole *hole;
509 hole = (struct file_extent_hole *)data;
512 hole = rb_entry(node, struct file_extent_hole, node);
513 if (start < hole->start)
515 if (start >= hole->start && start < hole->start + hole->len)
521 * Delete a hole in the record
523 * This will do the hole split and is much restrict than add.
525 static int del_file_extent_hole(struct rb_root *holes,
528 struct file_extent_hole *hole;
529 struct file_extent_hole tmp;
534 struct rb_node *node;
541 node = rb_search(holes, &tmp, compare_hole_range, NULL);
544 hole = rb_entry(node, struct file_extent_hole, node);
545 if (start + len > hole->start + hole->len)
549 * Now there will be no overlap, delete the hole and re-add the
550 * split(s) if they exists.
552 if (start > hole->start) {
553 prev_start = hole->start;
554 prev_len = start - hole->start;
557 if (hole->start + hole->len > start + len) {
558 next_start = start + len;
559 next_len = hole->start + hole->len - start - len;
562 rb_erase(node, holes);
565 ret = add_file_extent_hole(holes, prev_start, prev_len);
570 ret = add_file_extent_hole(holes, next_start, next_len);
577 static int copy_file_extent_holes(struct rb_root *dst,
580 struct file_extent_hole *hole;
581 struct rb_node *node;
584 node = rb_first(src);
586 hole = rb_entry(node, struct file_extent_hole, node);
587 ret = add_file_extent_hole(dst, hole->start, hole->len);
590 node = rb_next(node);
595 static void free_file_extent_holes(struct rb_root *holes)
597 struct rb_node *node;
598 struct file_extent_hole *hole;
600 node = rb_first(holes);
602 hole = rb_entry(node, struct file_extent_hole, node);
603 rb_erase(node, holes);
605 node = rb_first(holes);
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612 struct btrfs_root *root)
614 if (root->last_trans != trans->transid) {
615 root->track_dirty = 1;
616 root->last_trans = trans->transid;
617 root->commit_root = root->node;
618 extent_buffer_get(root->node);
622 static u8 imode_to_type(u32 imode)
625 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
627 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
628 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
629 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
630 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
631 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
632 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
635 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 struct device_record *rec1;
642 struct device_record *rec2;
644 rec1 = rb_entry(node1, struct device_record, node);
645 rec2 = rb_entry(node2, struct device_record, node);
646 if (rec1->devid > rec2->devid)
648 else if (rec1->devid < rec2->devid)
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 struct inode_record *rec;
657 struct inode_backref *backref;
658 struct inode_backref *orig;
659 struct inode_backref *tmp;
660 struct orphan_data_extent *src_orphan;
661 struct orphan_data_extent *dst_orphan;
666 rec = malloc(sizeof(*rec));
668 return ERR_PTR(-ENOMEM);
669 memcpy(rec, orig_rec, sizeof(*rec));
671 INIT_LIST_HEAD(&rec->backrefs);
672 INIT_LIST_HEAD(&rec->orphan_extents);
673 rec->holes = RB_ROOT;
675 list_for_each_entry(orig, &orig_rec->backrefs, list) {
676 size = sizeof(*orig) + orig->namelen + 1;
677 backref = malloc(size);
682 memcpy(backref, orig, size);
683 list_add_tail(&backref->list, &rec->backrefs);
685 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686 dst_orphan = malloc(sizeof(*dst_orphan));
691 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
701 rb = rb_first(&rec->holes);
703 struct file_extent_hole *hole;
705 hole = rb_entry(rb, struct file_extent_hole, node);
711 if (!list_empty(&rec->backrefs))
712 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713 list_del(&orig->list);
717 if (!list_empty(&rec->orphan_extents))
718 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719 list_del(&orig->list);
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
731 struct orphan_data_extent *orphan;
733 if (list_empty(orphan_extents))
735 printf("The following data extent is lost in tree %llu:\n",
737 list_for_each_entry(orphan, orphan_extents, list) {
738 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739 orphan->objectid, orphan->offset, orphan->disk_bytenr,
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 u64 root_objectid = root->root_key.objectid;
747 int errors = rec->errors;
751 /* reloc root errors, we print its corresponding fs root objectid*/
752 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753 root_objectid = root->root_key.offset;
754 fprintf(stderr, "reloc");
756 fprintf(stderr, "root %llu inode %llu errors %x",
757 (unsigned long long) root_objectid,
758 (unsigned long long) rec->ino, rec->errors);
760 if (errors & I_ERR_NO_INODE_ITEM)
761 fprintf(stderr, ", no inode item");
762 if (errors & I_ERR_NO_ORPHAN_ITEM)
763 fprintf(stderr, ", no orphan item");
764 if (errors & I_ERR_DUP_INODE_ITEM)
765 fprintf(stderr, ", dup inode item");
766 if (errors & I_ERR_DUP_DIR_INDEX)
767 fprintf(stderr, ", dup dir index");
768 if (errors & I_ERR_ODD_DIR_ITEM)
769 fprintf(stderr, ", odd dir item");
770 if (errors & I_ERR_ODD_FILE_EXTENT)
771 fprintf(stderr, ", odd file extent");
772 if (errors & I_ERR_BAD_FILE_EXTENT)
773 fprintf(stderr, ", bad file extent");
774 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775 fprintf(stderr, ", file extent overlap");
776 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777 fprintf(stderr, ", file extent discount");
778 if (errors & I_ERR_DIR_ISIZE_WRONG)
779 fprintf(stderr, ", dir isize wrong");
780 if (errors & I_ERR_FILE_NBYTES_WRONG)
781 fprintf(stderr, ", nbytes wrong");
782 if (errors & I_ERR_ODD_CSUM_ITEM)
783 fprintf(stderr, ", odd csum item");
784 if (errors & I_ERR_SOME_CSUM_MISSING)
785 fprintf(stderr, ", some csum missing");
786 if (errors & I_ERR_LINK_COUNT_WRONG)
787 fprintf(stderr, ", link count wrong");
788 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789 fprintf(stderr, ", orphan file extent");
790 fprintf(stderr, "\n");
791 /* Print the orphan extents if needed */
792 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795 /* Print the holes if needed */
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797 struct file_extent_hole *hole;
798 struct rb_node *node;
801 node = rb_first(&rec->holes);
802 fprintf(stderr, "Found file extent holes:\n");
805 hole = rb_entry(node, struct file_extent_hole, node);
806 fprintf(stderr, "\tstart: %llu, len: %llu\n",
807 hole->start, hole->len);
808 node = rb_next(node);
811 fprintf(stderr, "\tstart: 0, len: %llu\n",
812 round_up(rec->isize, root->sectorsize));
816 static void print_ref_error(int errors)
818 if (errors & REF_ERR_NO_DIR_ITEM)
819 fprintf(stderr, ", no dir item");
820 if (errors & REF_ERR_NO_DIR_INDEX)
821 fprintf(stderr, ", no dir index");
822 if (errors & REF_ERR_NO_INODE_REF)
823 fprintf(stderr, ", no inode ref");
824 if (errors & REF_ERR_DUP_DIR_ITEM)
825 fprintf(stderr, ", dup dir item");
826 if (errors & REF_ERR_DUP_DIR_INDEX)
827 fprintf(stderr, ", dup dir index");
828 if (errors & REF_ERR_DUP_INODE_REF)
829 fprintf(stderr, ", dup inode ref");
830 if (errors & REF_ERR_INDEX_UNMATCH)
831 fprintf(stderr, ", index mismatch");
832 if (errors & REF_ERR_FILETYPE_UNMATCH)
833 fprintf(stderr, ", filetype mismatch");
834 if (errors & REF_ERR_NAME_TOO_LONG)
835 fprintf(stderr, ", name too long");
836 if (errors & REF_ERR_NO_ROOT_REF)
837 fprintf(stderr, ", no root ref");
838 if (errors & REF_ERR_NO_ROOT_BACKREF)
839 fprintf(stderr, ", no root backref");
840 if (errors & REF_ERR_DUP_ROOT_REF)
841 fprintf(stderr, ", dup root ref");
842 if (errors & REF_ERR_DUP_ROOT_BACKREF)
843 fprintf(stderr, ", dup root backref");
844 fprintf(stderr, "\n");
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
850 struct ptr_node *node;
851 struct cache_extent *cache;
852 struct inode_record *rec = NULL;
855 cache = lookup_cache_extent(inode_cache, ino, 1);
857 node = container_of(cache, struct ptr_node, cache);
859 if (mod && rec->refs > 1) {
860 node->data = clone_inode_rec(rec);
861 if (IS_ERR(node->data))
867 rec = calloc(1, sizeof(*rec));
869 return ERR_PTR(-ENOMEM);
871 rec->extent_start = (u64)-1;
873 INIT_LIST_HEAD(&rec->backrefs);
874 INIT_LIST_HEAD(&rec->orphan_extents);
875 rec->holes = RB_ROOT;
877 node = malloc(sizeof(*node));
880 return ERR_PTR(-ENOMEM);
882 node->cache.start = ino;
883 node->cache.size = 1;
886 if (ino == BTRFS_FREE_INO_OBJECTID)
889 ret = insert_cache_extent(inode_cache, &node->cache);
891 return ERR_PTR(-EEXIST);
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 struct orphan_data_extent *orphan;
900 while (!list_empty(orphan_extents)) {
901 orphan = list_entry(orphan_extents->next,
902 struct orphan_data_extent, list);
903 list_del(&orphan->list);
908 static void free_inode_rec(struct inode_record *rec)
910 struct inode_backref *backref;
915 while (!list_empty(&rec->backrefs)) {
916 backref = to_inode_backref(rec->backrefs.next);
917 list_del(&backref->list);
920 free_orphan_data_extents(&rec->orphan_extents);
921 free_file_extent_holes(&rec->holes);
925 static int can_free_inode_rec(struct inode_record *rec)
927 if (!rec->errors && rec->checked && rec->found_inode_item &&
928 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934 struct inode_record *rec)
936 struct cache_extent *cache;
937 struct inode_backref *tmp, *backref;
938 struct ptr_node *node;
941 if (!rec->found_inode_item)
944 filetype = imode_to_type(rec->imode);
945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946 if (backref->found_dir_item && backref->found_dir_index) {
947 if (backref->filetype != filetype)
948 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949 if (!backref->errors && backref->found_inode_ref &&
950 rec->nlink == rec->found_link) {
951 list_del(&backref->list);
957 if (!rec->checked || rec->merging)
960 if (S_ISDIR(rec->imode)) {
961 if (rec->found_size != rec->isize)
962 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963 if (rec->found_file_extent)
964 rec->errors |= I_ERR_ODD_FILE_EXTENT;
965 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966 if (rec->found_dir_item)
967 rec->errors |= I_ERR_ODD_DIR_ITEM;
968 if (rec->found_size != rec->nbytes)
969 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970 if (rec->nlink > 0 && !no_holes &&
971 (rec->extent_end < rec->isize ||
972 first_extent_gap(&rec->holes) < rec->isize))
973 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
976 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977 if (rec->found_csum_item && rec->nodatasum)
978 rec->errors |= I_ERR_ODD_CSUM_ITEM;
979 if (rec->some_csum_missing && !rec->nodatasum)
980 rec->errors |= I_ERR_SOME_CSUM_MISSING;
983 BUG_ON(rec->refs != 1);
984 if (can_free_inode_rec(rec)) {
985 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986 node = container_of(cache, struct ptr_node, cache);
987 BUG_ON(node->data != rec);
988 remove_cache_extent(inode_cache, &node->cache);
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 struct btrfs_path path;
997 struct btrfs_key key;
1000 key.objectid = BTRFS_ORPHAN_OBJECTID;
1001 key.type = BTRFS_ORPHAN_ITEM_KEY;
1004 btrfs_init_path(&path);
1005 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006 btrfs_release_path(&path);
1012 static int process_inode_item(struct extent_buffer *eb,
1013 int slot, struct btrfs_key *key,
1014 struct shared_node *active_node)
1016 struct inode_record *rec;
1017 struct btrfs_inode_item *item;
1019 rec = active_node->current;
1020 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021 if (rec->found_inode_item) {
1022 rec->errors |= I_ERR_DUP_INODE_ITEM;
1025 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026 rec->nlink = btrfs_inode_nlink(eb, item);
1027 rec->isize = btrfs_inode_size(eb, item);
1028 rec->nbytes = btrfs_inode_nbytes(eb, item);
1029 rec->imode = btrfs_inode_mode(eb, item);
1030 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032 rec->found_inode_item = 1;
1033 if (rec->nlink == 0)
1034 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035 maybe_free_inode_rec(&active_node->inode_cache, rec);
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041 int namelen, u64 dir)
1043 struct inode_backref *backref;
1045 list_for_each_entry(backref, &rec->backrefs, list) {
1046 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048 if (backref->dir != dir || backref->namelen != namelen)
1050 if (memcmp(name, backref->name, namelen))
1055 backref = malloc(sizeof(*backref) + namelen + 1);
1058 memset(backref, 0, sizeof(*backref));
1060 backref->namelen = namelen;
1061 memcpy(backref->name, name, namelen);
1062 backref->name[namelen] = '\0';
1063 list_add_tail(&backref->list, &rec->backrefs);
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068 u64 ino, u64 dir, u64 index,
1069 const char *name, int namelen,
1070 u8 filetype, u8 itemtype, int errors)
1072 struct inode_record *rec;
1073 struct inode_backref *backref;
1075 rec = get_inode_rec(inode_cache, ino, 1);
1076 BUG_ON(IS_ERR(rec));
1077 backref = get_inode_backref(rec, name, namelen, dir);
1080 backref->errors |= errors;
1081 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082 if (backref->found_dir_index)
1083 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084 if (backref->found_inode_ref && backref->index != index)
1085 backref->errors |= REF_ERR_INDEX_UNMATCH;
1086 if (backref->found_dir_item && backref->filetype != filetype)
1087 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089 backref->index = index;
1090 backref->filetype = filetype;
1091 backref->found_dir_index = 1;
1092 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094 if (backref->found_dir_item)
1095 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096 if (backref->found_dir_index && backref->filetype != filetype)
1097 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099 backref->filetype = filetype;
1100 backref->found_dir_item = 1;
1101 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103 if (backref->found_inode_ref)
1104 backref->errors |= REF_ERR_DUP_INODE_REF;
1105 if (backref->found_dir_index && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1108 backref->index = index;
1110 backref->ref_type = itemtype;
1111 backref->found_inode_ref = 1;
1116 maybe_free_inode_rec(inode_cache, rec);
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121 struct cache_tree *dst_cache)
1123 struct inode_backref *backref;
1128 list_for_each_entry(backref, &src->backrefs, list) {
1129 if (backref->found_dir_index) {
1130 add_inode_backref(dst_cache, dst->ino, backref->dir,
1131 backref->index, backref->name,
1132 backref->namelen, backref->filetype,
1133 BTRFS_DIR_INDEX_KEY, backref->errors);
1135 if (backref->found_dir_item) {
1137 add_inode_backref(dst_cache, dst->ino,
1138 backref->dir, 0, backref->name,
1139 backref->namelen, backref->filetype,
1140 BTRFS_DIR_ITEM_KEY, backref->errors);
1142 if (backref->found_inode_ref) {
1143 add_inode_backref(dst_cache, dst->ino,
1144 backref->dir, backref->index,
1145 backref->name, backref->namelen, 0,
1146 backref->ref_type, backref->errors);
1150 if (src->found_dir_item)
1151 dst->found_dir_item = 1;
1152 if (src->found_file_extent)
1153 dst->found_file_extent = 1;
1154 if (src->found_csum_item)
1155 dst->found_csum_item = 1;
1156 if (src->some_csum_missing)
1157 dst->some_csum_missing = 1;
1158 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1164 BUG_ON(src->found_link < dir_count);
1165 dst->found_link += src->found_link - dir_count;
1166 dst->found_size += src->found_size;
1167 if (src->extent_start != (u64)-1) {
1168 if (dst->extent_start == (u64)-1) {
1169 dst->extent_start = src->extent_start;
1170 dst->extent_end = src->extent_end;
1172 if (dst->extent_end > src->extent_start)
1173 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174 else if (dst->extent_end < src->extent_start) {
1175 ret = add_file_extent_hole(&dst->holes,
1177 src->extent_start - dst->extent_end);
1179 if (dst->extent_end < src->extent_end)
1180 dst->extent_end = src->extent_end;
1184 dst->errors |= src->errors;
1185 if (src->found_inode_item) {
1186 if (!dst->found_inode_item) {
1187 dst->nlink = src->nlink;
1188 dst->isize = src->isize;
1189 dst->nbytes = src->nbytes;
1190 dst->imode = src->imode;
1191 dst->nodatasum = src->nodatasum;
1192 dst->found_inode_item = 1;
1194 dst->errors |= I_ERR_DUP_INODE_ITEM;
1202 static int splice_shared_node(struct shared_node *src_node,
1203 struct shared_node *dst_node)
1205 struct cache_extent *cache;
1206 struct ptr_node *node, *ins;
1207 struct cache_tree *src, *dst;
1208 struct inode_record *rec, *conflict;
1209 u64 current_ino = 0;
1213 if (--src_node->refs == 0)
1215 if (src_node->current)
1216 current_ino = src_node->current->ino;
1218 src = &src_node->root_cache;
1219 dst = &dst_node->root_cache;
1221 cache = search_cache_extent(src, 0);
1223 node = container_of(cache, struct ptr_node, cache);
1225 cache = next_cache_extent(cache);
1228 remove_cache_extent(src, &node->cache);
1231 ins = malloc(sizeof(*ins));
1233 ins->cache.start = node->cache.start;
1234 ins->cache.size = node->cache.size;
1238 ret = insert_cache_extent(dst, &ins->cache);
1239 if (ret == -EEXIST) {
1240 conflict = get_inode_rec(dst, rec->ino, 1);
1241 BUG_ON(IS_ERR(conflict));
1242 merge_inode_recs(rec, conflict, dst);
1244 conflict->checked = 1;
1245 if (dst_node->current == conflict)
1246 dst_node->current = NULL;
1248 maybe_free_inode_rec(dst, conflict);
1249 free_inode_rec(rec);
1256 if (src == &src_node->root_cache) {
1257 src = &src_node->inode_cache;
1258 dst = &dst_node->inode_cache;
1262 if (current_ino > 0 && (!dst_node->current ||
1263 current_ino > dst_node->current->ino)) {
1264 if (dst_node->current) {
1265 dst_node->current->checked = 1;
1266 maybe_free_inode_rec(dst, dst_node->current);
1268 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269 BUG_ON(IS_ERR(dst_node->current));
1274 static void free_inode_ptr(struct cache_extent *cache)
1276 struct ptr_node *node;
1277 struct inode_record *rec;
1279 node = container_of(cache, struct ptr_node, cache);
1281 free_inode_rec(rec);
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1290 struct cache_extent *cache;
1291 struct shared_node *node;
1293 cache = lookup_cache_extent(shared, bytenr, 1);
1295 node = container_of(cache, struct shared_node, cache);
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1304 struct shared_node *node;
1306 node = calloc(1, sizeof(*node));
1309 node->cache.start = bytenr;
1310 node->cache.size = 1;
1311 cache_tree_init(&node->root_cache);
1312 cache_tree_init(&node->inode_cache);
1315 ret = insert_cache_extent(shared, &node->cache);
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321 struct walk_control *wc, int level)
1323 struct shared_node *node;
1324 struct shared_node *dest;
1327 if (level == wc->active_node)
1330 BUG_ON(wc->active_node <= level);
1331 node = find_shared_node(&wc->shared, bytenr);
1333 ret = add_shared_node(&wc->shared, bytenr, refs);
1335 node = find_shared_node(&wc->shared, bytenr);
1336 wc->nodes[level] = node;
1337 wc->active_node = level;
1341 if (wc->root_level == wc->active_node &&
1342 btrfs_root_refs(&root->root_item) == 0) {
1343 if (--node->refs == 0) {
1344 free_inode_recs_tree(&node->root_cache);
1345 free_inode_recs_tree(&node->inode_cache);
1346 remove_cache_extent(&wc->shared, &node->cache);
1352 dest = wc->nodes[wc->active_node];
1353 splice_shared_node(node, dest);
1354 if (node->refs == 0) {
1355 remove_cache_extent(&wc->shared, &node->cache);
1361 static int leave_shared_node(struct btrfs_root *root,
1362 struct walk_control *wc, int level)
1364 struct shared_node *node;
1365 struct shared_node *dest;
1368 if (level == wc->root_level)
1371 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1375 BUG_ON(i >= BTRFS_MAX_LEVEL);
1377 node = wc->nodes[wc->active_node];
1378 wc->nodes[wc->active_node] = NULL;
1379 wc->active_node = i;
1381 dest = wc->nodes[wc->active_node];
1382 if (wc->active_node < wc->root_level ||
1383 btrfs_root_refs(&root->root_item) > 0) {
1384 BUG_ON(node->refs <= 1);
1385 splice_shared_node(node, dest);
1387 BUG_ON(node->refs < 2);
1396 * 1 - if the root with id child_root_id is a child of root parent_root_id
1397 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1398 * has other root(s) as parent(s)
1399 * 2 - if the root child_root_id doesn't have any parent roots
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1404 struct btrfs_path path;
1405 struct btrfs_key key;
1406 struct extent_buffer *leaf;
1410 btrfs_init_path(&path);
1412 key.objectid = parent_root_id;
1413 key.type = BTRFS_ROOT_REF_KEY;
1414 key.offset = child_root_id;
1415 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1419 btrfs_release_path(&path);
1423 key.objectid = child_root_id;
1424 key.type = BTRFS_ROOT_BACKREF_KEY;
1426 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1432 leaf = path.nodes[0];
1433 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1437 leaf = path.nodes[0];
1440 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441 if (key.objectid != child_root_id ||
1442 key.type != BTRFS_ROOT_BACKREF_KEY)
1447 if (key.offset == parent_root_id) {
1448 btrfs_release_path(&path);
1455 btrfs_release_path(&path);
1458 return has_parent ? 0 : 2;
1461 static int process_dir_item(struct btrfs_root *root,
1462 struct extent_buffer *eb,
1463 int slot, struct btrfs_key *key,
1464 struct shared_node *active_node)
1474 struct btrfs_dir_item *di;
1475 struct inode_record *rec;
1476 struct cache_tree *root_cache;
1477 struct cache_tree *inode_cache;
1478 struct btrfs_key location;
1479 char namebuf[BTRFS_NAME_LEN];
1481 root_cache = &active_node->root_cache;
1482 inode_cache = &active_node->inode_cache;
1483 rec = active_node->current;
1484 rec->found_dir_item = 1;
1486 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487 total = btrfs_item_size_nr(eb, slot);
1488 while (cur < total) {
1490 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491 name_len = btrfs_dir_name_len(eb, di);
1492 data_len = btrfs_dir_data_len(eb, di);
1493 filetype = btrfs_dir_type(eb, di);
1495 rec->found_size += name_len;
1496 if (name_len <= BTRFS_NAME_LEN) {
1500 len = BTRFS_NAME_LEN;
1501 error = REF_ERR_NAME_TOO_LONG;
1503 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506 add_inode_backref(inode_cache, location.objectid,
1507 key->objectid, key->offset, namebuf,
1508 len, filetype, key->type, error);
1509 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510 add_inode_backref(root_cache, location.objectid,
1511 key->objectid, key->offset,
1512 namebuf, len, filetype,
1515 fprintf(stderr, "invalid location in dir item %u\n",
1517 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518 key->objectid, key->offset, namebuf,
1519 len, filetype, key->type, error);
1522 len = sizeof(*di) + name_len + data_len;
1523 di = (struct btrfs_dir_item *)((char *)di + len);
1526 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527 rec->errors |= I_ERR_DUP_DIR_INDEX;
1532 static int process_inode_ref(struct extent_buffer *eb,
1533 int slot, struct btrfs_key *key,
1534 struct shared_node *active_node)
1542 struct cache_tree *inode_cache;
1543 struct btrfs_inode_ref *ref;
1544 char namebuf[BTRFS_NAME_LEN];
1546 inode_cache = &active_node->inode_cache;
1548 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549 total = btrfs_item_size_nr(eb, slot);
1550 while (cur < total) {
1551 name_len = btrfs_inode_ref_name_len(eb, ref);
1552 index = btrfs_inode_ref_index(eb, ref);
1553 if (name_len <= BTRFS_NAME_LEN) {
1557 len = BTRFS_NAME_LEN;
1558 error = REF_ERR_NAME_TOO_LONG;
1560 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561 add_inode_backref(inode_cache, key->objectid, key->offset,
1562 index, namebuf, len, 0, key->type, error);
1564 len = sizeof(*ref) + name_len;
1565 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1571 static int process_inode_extref(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1582 struct cache_tree *inode_cache;
1583 struct btrfs_inode_extref *extref;
1584 char namebuf[BTRFS_NAME_LEN];
1586 inode_cache = &active_node->inode_cache;
1588 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589 total = btrfs_item_size_nr(eb, slot);
1590 while (cur < total) {
1591 name_len = btrfs_inode_extref_name_len(eb, extref);
1592 index = btrfs_inode_extref_index(eb, extref);
1593 parent = btrfs_inode_extref_parent(eb, extref);
1594 if (name_len <= BTRFS_NAME_LEN) {
1598 len = BTRFS_NAME_LEN;
1599 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf,
1602 (unsigned long)(extref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, parent,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*extref) + name_len;
1607 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615 u64 len, u64 *found)
1617 struct btrfs_key key;
1618 struct btrfs_path path;
1619 struct extent_buffer *leaf;
1624 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626 btrfs_init_path(&path);
1628 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630 key.type = BTRFS_EXTENT_CSUM_KEY;
1632 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1636 if (ret > 0 && path.slots[0] > 0) {
1637 leaf = path.nodes[0];
1638 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640 key.type == BTRFS_EXTENT_CSUM_KEY)
1645 leaf = path.nodes[0];
1646 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1652 leaf = path.nodes[0];
1655 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657 key.type != BTRFS_EXTENT_CSUM_KEY)
1660 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661 if (key.offset >= start + len)
1664 if (key.offset > start)
1667 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669 if (csum_end > start) {
1670 size = min(csum_end - start, len);
1679 btrfs_release_path(&path);
1685 static int process_file_extent(struct btrfs_root *root,
1686 struct extent_buffer *eb,
1687 int slot, struct btrfs_key *key,
1688 struct shared_node *active_node)
1690 struct inode_record *rec;
1691 struct btrfs_file_extent_item *fi;
1693 u64 disk_bytenr = 0;
1694 u64 extent_offset = 0;
1695 u64 mask = root->sectorsize - 1;
1699 rec = active_node->current;
1700 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701 rec->found_file_extent = 1;
1703 if (rec->extent_start == (u64)-1) {
1704 rec->extent_start = key->offset;
1705 rec->extent_end = key->offset;
1708 if (rec->extent_end > key->offset)
1709 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710 else if (rec->extent_end < key->offset) {
1711 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712 key->offset - rec->extent_end);
1717 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718 extent_type = btrfs_file_extent_type(eb, fi);
1720 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724 rec->found_size += num_bytes;
1725 num_bytes = (num_bytes + mask) & ~mask;
1726 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730 extent_offset = btrfs_file_extent_offset(eb, fi);
1731 if (num_bytes == 0 || (num_bytes & mask))
1732 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733 if (num_bytes + extent_offset >
1734 btrfs_file_extent_ram_bytes(eb, fi))
1735 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737 (btrfs_file_extent_compression(eb, fi) ||
1738 btrfs_file_extent_encryption(eb, fi) ||
1739 btrfs_file_extent_other_encoding(eb, fi)))
1740 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741 if (disk_bytenr > 0)
1742 rec->found_size += num_bytes;
1744 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746 rec->extent_end = key->offset + num_bytes;
1749 * The data reloc tree will copy full extents into its inode and then
1750 * copy the corresponding csums. Because the extent it copied could be
1751 * a preallocated extent that hasn't been written to yet there may be no
1752 * csums to copy, ergo we won't have csums for our file extent. This is
1753 * ok so just don't bother checking csums if the inode belongs to the
1756 if (disk_bytenr > 0 &&
1757 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759 if (btrfs_file_extent_compression(eb, fi))
1760 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762 disk_bytenr += extent_offset;
1764 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1767 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769 rec->found_csum_item = 1;
1770 if (found < num_bytes)
1771 rec->some_csum_missing = 1;
1772 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781 struct walk_control *wc)
1783 struct btrfs_key key;
1787 struct cache_tree *inode_cache;
1788 struct shared_node *active_node;
1790 if (wc->root_level == wc->active_node &&
1791 btrfs_root_refs(&root->root_item) == 0)
1794 active_node = wc->nodes[wc->active_node];
1795 inode_cache = &active_node->inode_cache;
1796 nritems = btrfs_header_nritems(eb);
1797 for (i = 0; i < nritems; i++) {
1798 btrfs_item_key_to_cpu(eb, &key, i);
1800 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1805 if (active_node->current == NULL ||
1806 active_node->current->ino < key.objectid) {
1807 if (active_node->current) {
1808 active_node->current->checked = 1;
1809 maybe_free_inode_rec(inode_cache,
1810 active_node->current);
1812 active_node->current = get_inode_rec(inode_cache,
1814 BUG_ON(IS_ERR(active_node->current));
1817 case BTRFS_DIR_ITEM_KEY:
1818 case BTRFS_DIR_INDEX_KEY:
1819 ret = process_dir_item(root, eb, i, &key, active_node);
1821 case BTRFS_INODE_REF_KEY:
1822 ret = process_inode_ref(eb, i, &key, active_node);
1824 case BTRFS_INODE_EXTREF_KEY:
1825 ret = process_inode_extref(eb, i, &key, active_node);
1827 case BTRFS_INODE_ITEM_KEY:
1828 ret = process_inode_item(eb, i, &key, active_node);
1830 case BTRFS_EXTENT_DATA_KEY:
1831 ret = process_file_extent(root, eb, i, &key,
1841 static void reada_walk_down(struct btrfs_root *root,
1842 struct extent_buffer *node, int slot)
1851 level = btrfs_header_level(node);
1855 nritems = btrfs_header_nritems(node);
1856 blocksize = root->nodesize;
1857 for (i = slot; i < nritems; i++) {
1858 bytenr = btrfs_node_blockptr(node, i);
1859 ptr_gen = btrfs_node_ptr_generation(node, i);
1860 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1865 * Check the child node/leaf by the following condition:
1866 * 1. the first item key of the node/leaf should be the same with the one
1868 * 2. block in parent node should match the child node/leaf.
1869 * 3. generation of parent node and child's header should be consistent.
1871 * Or the child node/leaf pointed by the key in parent is not valid.
1873 * We hope to check leaf owner too, but since subvol may share leaves,
1874 * which makes leaf owner check not so strong, key check should be
1875 * sufficient enough for that case.
1877 static int check_child_node(struct btrfs_root *root,
1878 struct extent_buffer *parent, int slot,
1879 struct extent_buffer *child)
1881 struct btrfs_key parent_key;
1882 struct btrfs_key child_key;
1885 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886 if (btrfs_header_level(child) == 0)
1887 btrfs_item_key_to_cpu(child, &child_key, 0);
1889 btrfs_node_key_to_cpu(child, &child_key, 0);
1891 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1894 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895 parent_key.objectid, parent_key.type, parent_key.offset,
1896 child_key.objectid, child_key.type, child_key.offset);
1898 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901 btrfs_node_blockptr(parent, slot),
1902 btrfs_header_bytenr(child));
1904 if (btrfs_node_ptr_generation(parent, slot) !=
1905 btrfs_header_generation(child)) {
1907 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908 btrfs_header_generation(child),
1909 btrfs_node_ptr_generation(parent, slot));
1915 u64 bytenr[BTRFS_MAX_LEVEL];
1916 u64 refs[BTRFS_MAX_LEVEL];
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920 struct walk_control *wc, int *level,
1921 struct node_refs *nrefs)
1923 enum btrfs_tree_block_status status;
1926 struct extent_buffer *next;
1927 struct extent_buffer *cur;
1932 WARN_ON(*level < 0);
1933 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936 refs = nrefs->refs[*level];
1939 ret = btrfs_lookup_extent_info(NULL, root,
1940 path->nodes[*level]->start,
1941 *level, 1, &refs, NULL);
1946 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947 nrefs->refs[*level] = refs;
1951 ret = enter_shared_node(root, path->nodes[*level]->start,
1959 while (*level >= 0) {
1960 WARN_ON(*level < 0);
1961 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962 cur = path->nodes[*level];
1964 if (btrfs_header_level(cur) != *level)
1967 if (path->slots[*level] >= btrfs_header_nritems(cur))
1970 ret = process_one_leaf(root, cur, wc);
1975 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977 blocksize = root->nodesize;
1979 if (bytenr == nrefs->bytenr[*level - 1]) {
1980 refs = nrefs->refs[*level - 1];
1982 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983 *level - 1, 1, &refs, NULL);
1987 nrefs->bytenr[*level - 1] = bytenr;
1988 nrefs->refs[*level - 1] = refs;
1993 ret = enter_shared_node(root, bytenr, refs,
1996 path->slots[*level]++;
2001 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003 free_extent_buffer(next);
2004 reada_walk_down(root, cur, path->slots[*level]);
2005 next = read_tree_block(root, bytenr, blocksize,
2007 if (!extent_buffer_uptodate(next)) {
2008 struct btrfs_key node_key;
2010 btrfs_node_key_to_cpu(path->nodes[*level],
2012 path->slots[*level]);
2013 btrfs_add_corrupt_extent_record(root->fs_info,
2015 path->nodes[*level]->start,
2016 root->nodesize, *level);
2022 ret = check_child_node(root, cur, path->slots[*level], next);
2028 if (btrfs_is_leaf(next))
2029 status = btrfs_check_leaf(root, NULL, next);
2031 status = btrfs_check_node(root, NULL, next);
2032 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033 free_extent_buffer(next);
2038 *level = *level - 1;
2039 free_extent_buffer(path->nodes[*level]);
2040 path->nodes[*level] = next;
2041 path->slots[*level] = 0;
2044 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049 struct walk_control *wc, int *level)
2052 struct extent_buffer *leaf;
2054 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055 leaf = path->nodes[i];
2056 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2061 free_extent_buffer(path->nodes[*level]);
2062 path->nodes[*level] = NULL;
2063 BUG_ON(*level > wc->active_node);
2064 if (*level == wc->active_node)
2065 leave_shared_node(root, wc, *level);
2072 static int check_root_dir(struct inode_record *rec)
2074 struct inode_backref *backref;
2077 if (!rec->found_inode_item || rec->errors)
2079 if (rec->nlink != 1 || rec->found_link != 0)
2081 if (list_empty(&rec->backrefs))
2083 backref = to_inode_backref(rec->backrefs.next);
2084 if (!backref->found_inode_ref)
2086 if (backref->index != 0 || backref->namelen != 2 ||
2087 memcmp(backref->name, "..", 2))
2089 if (backref->found_dir_index || backref->found_dir_item)
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097 struct btrfs_root *root, struct btrfs_path *path,
2098 struct inode_record *rec)
2100 struct btrfs_inode_item *ei;
2101 struct btrfs_key key;
2104 key.objectid = rec->ino;
2105 key.type = BTRFS_INODE_ITEM_KEY;
2106 key.offset = (u64)-1;
2108 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2112 if (!path->slots[0]) {
2119 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120 if (key.objectid != rec->ino) {
2125 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126 struct btrfs_inode_item);
2127 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128 btrfs_mark_buffer_dirty(path->nodes[0]);
2129 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131 root->root_key.objectid);
2133 btrfs_release_path(path);
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 struct btrfs_path *path,
2140 struct inode_record *rec)
2144 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145 btrfs_release_path(path);
2147 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152 struct btrfs_root *root,
2153 struct btrfs_path *path,
2154 struct inode_record *rec)
2156 struct btrfs_inode_item *ei;
2157 struct btrfs_key key;
2160 key.objectid = rec->ino;
2161 key.type = BTRFS_INODE_ITEM_KEY;
2164 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2171 /* Since ret == 0, no need to check anything */
2172 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173 struct btrfs_inode_item);
2174 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175 btrfs_mark_buffer_dirty(path->nodes[0]);
2176 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177 printf("reset nbytes for ino %llu root %llu\n",
2178 rec->ino, root->root_key.objectid);
2180 btrfs_release_path(path);
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185 struct cache_tree *inode_cache,
2186 struct inode_record *rec,
2187 struct inode_backref *backref)
2189 struct btrfs_path path;
2190 struct btrfs_trans_handle *trans;
2191 struct btrfs_dir_item *dir_item;
2192 struct extent_buffer *leaf;
2193 struct btrfs_key key;
2194 struct btrfs_disk_key disk_key;
2195 struct inode_record *dir_rec;
2196 unsigned long name_ptr;
2197 u32 data_size = sizeof(*dir_item) + backref->namelen;
2200 trans = btrfs_start_transaction(root, 1);
2202 return PTR_ERR(trans);
2204 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205 (unsigned long long)rec->ino);
2207 btrfs_init_path(&path);
2208 key.objectid = backref->dir;
2209 key.type = BTRFS_DIR_INDEX_KEY;
2210 key.offset = backref->index;
2211 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2214 leaf = path.nodes[0];
2215 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217 disk_key.objectid = cpu_to_le64(rec->ino);
2218 disk_key.type = BTRFS_INODE_ITEM_KEY;
2219 disk_key.offset = 0;
2221 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223 btrfs_set_dir_data_len(leaf, dir_item, 0);
2224 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225 name_ptr = (unsigned long)(dir_item + 1);
2226 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227 btrfs_mark_buffer_dirty(leaf);
2228 btrfs_release_path(&path);
2229 btrfs_commit_transaction(trans, root);
2231 backref->found_dir_index = 1;
2232 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233 BUG_ON(IS_ERR(dir_rec));
2236 dir_rec->found_size += backref->namelen;
2237 if (dir_rec->found_size == dir_rec->isize &&
2238 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240 if (dir_rec->found_size != dir_rec->isize)
2241 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2246 static int delete_dir_index(struct btrfs_root *root,
2247 struct cache_tree *inode_cache,
2248 struct inode_record *rec,
2249 struct inode_backref *backref)
2251 struct btrfs_trans_handle *trans;
2252 struct btrfs_dir_item *di;
2253 struct btrfs_path path;
2256 trans = btrfs_start_transaction(root, 1);
2258 return PTR_ERR(trans);
2260 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261 (unsigned long long)backref->dir,
2262 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263 (unsigned long long)root->objectid);
2265 btrfs_init_path(&path);
2266 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267 backref->name, backref->namelen,
2268 backref->index, -1);
2271 btrfs_release_path(&path);
2272 btrfs_commit_transaction(trans, root);
2279 ret = btrfs_del_item(trans, root, &path);
2281 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283 btrfs_release_path(&path);
2284 btrfs_commit_transaction(trans, root);
2288 static int create_inode_item(struct btrfs_root *root,
2289 struct inode_record *rec,
2290 struct inode_backref *backref, int root_dir)
2292 struct btrfs_trans_handle *trans;
2293 struct btrfs_inode_item inode_item;
2294 time_t now = time(NULL);
2297 trans = btrfs_start_transaction(root, 1);
2298 if (IS_ERR(trans)) {
2299 ret = PTR_ERR(trans);
2303 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304 "be incomplete, please check permissions and content after "
2305 "the fsck completes.\n", (unsigned long long)root->objectid,
2306 (unsigned long long)rec->ino);
2308 memset(&inode_item, 0, sizeof(inode_item));
2309 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315 if (rec->found_dir_item) {
2316 if (rec->found_file_extent)
2317 fprintf(stderr, "root %llu inode %llu has both a dir "
2318 "item and extents, unsure if it is a dir or a "
2319 "regular file so setting it as a directory\n",
2320 (unsigned long long)root->objectid,
2321 (unsigned long long)rec->ino);
2322 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324 } else if (!rec->found_dir_item) {
2325 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339 btrfs_commit_transaction(trans, root);
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344 struct inode_record *rec,
2345 struct cache_tree *inode_cache,
2348 struct inode_backref *tmp, *backref;
2349 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2353 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354 if (!delete && rec->ino == root_dirid) {
2355 if (!rec->found_inode_item) {
2356 ret = create_inode_item(root, rec, backref, 1);
2363 /* Index 0 for root dir's are special, don't mess with it */
2364 if (rec->ino == root_dirid && backref->index == 0)
2368 ((backref->found_dir_index && !backref->found_inode_ref) ||
2369 (backref->found_dir_index && backref->found_inode_ref &&
2370 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371 ret = delete_dir_index(root, inode_cache, rec, backref);
2375 list_del(&backref->list);
2379 if (!delete && !backref->found_dir_index &&
2380 backref->found_dir_item && backref->found_inode_ref) {
2381 ret = add_missing_dir_index(root, inode_cache, rec,
2386 if (backref->found_dir_item &&
2387 backref->found_dir_index &&
2388 backref->found_dir_index) {
2389 if (!backref->errors &&
2390 backref->found_inode_ref) {
2391 list_del(&backref->list);
2397 if (!delete && (!backref->found_dir_index &&
2398 !backref->found_dir_item &&
2399 backref->found_inode_ref)) {
2400 struct btrfs_trans_handle *trans;
2401 struct btrfs_key location;
2403 ret = check_dir_conflict(root, backref->name,
2409 * let nlink fixing routine to handle it,
2410 * which can do it better.
2415 location.objectid = rec->ino;
2416 location.type = BTRFS_INODE_ITEM_KEY;
2417 location.offset = 0;
2419 trans = btrfs_start_transaction(root, 1);
2420 if (IS_ERR(trans)) {
2421 ret = PTR_ERR(trans);
2424 fprintf(stderr, "adding missing dir index/item pair "
2426 (unsigned long long)rec->ino);
2427 ret = btrfs_insert_dir_item(trans, root, backref->name,
2429 backref->dir, &location,
2430 imode_to_type(rec->imode),
2433 btrfs_commit_transaction(trans, root);
2437 if (!delete && (backref->found_inode_ref &&
2438 backref->found_dir_index &&
2439 backref->found_dir_item &&
2440 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441 !rec->found_inode_item)) {
2442 ret = create_inode_item(root, rec, backref, 0);
2449 return ret ? ret : repaired;
2453 * To determine the file type for nlink/inode_item repair
2455 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456 * Return -ENOENT if file type is not found.
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2460 struct inode_backref *backref;
2462 /* For inode item recovered case */
2463 if (rec->found_inode_item) {
2464 *type = imode_to_type(rec->imode);
2468 list_for_each_entry(backref, &rec->backrefs, list) {
2469 if (backref->found_dir_index || backref->found_dir_item) {
2470 *type = backref->filetype;
2478 * To determine the file name for nlink repair
2480 * Return 0 if file name is found, set name and namelen.
2481 * Return -ENOENT if file name is not found.
2483 static int find_file_name(struct inode_record *rec,
2484 char *name, int *namelen)
2486 struct inode_backref *backref;
2488 list_for_each_entry(backref, &rec->backrefs, list) {
2489 if (backref->found_dir_index || backref->found_dir_item ||
2490 backref->found_inode_ref) {
2491 memcpy(name, backref->name, backref->namelen);
2492 *namelen = backref->namelen;
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501 struct btrfs_root *root,
2502 struct btrfs_path *path,
2503 struct inode_record *rec)
2505 struct inode_backref *backref;
2506 struct inode_backref *tmp;
2507 struct btrfs_key key;
2508 struct btrfs_inode_item *inode_item;
2511 /* We don't believe this either, reset it and iterate backref */
2512 rec->found_link = 0;
2514 /* Remove all backref including the valid ones */
2515 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517 backref->index, backref->name,
2518 backref->namelen, 0);
2522 /* remove invalid backref, so it won't be added back */
2523 if (!(backref->found_dir_index &&
2524 backref->found_dir_item &&
2525 backref->found_inode_ref)) {
2526 list_del(&backref->list);
2533 /* Set nlink to 0 */
2534 key.objectid = rec->ino;
2535 key.type = BTRFS_INODE_ITEM_KEY;
2537 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2544 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545 struct btrfs_inode_item);
2546 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547 btrfs_mark_buffer_dirty(path->nodes[0]);
2548 btrfs_release_path(path);
2551 * Add back valid inode_ref/dir_item/dir_index,
2552 * add_link() will handle the nlink inc, so new nlink must be correct
2554 list_for_each_entry(backref, &rec->backrefs, list) {
2555 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556 backref->name, backref->namelen,
2557 backref->filetype, &backref->index, 1);
2562 btrfs_release_path(path);
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567 struct btrfs_root *root,
2568 struct btrfs_path *path,
2569 struct inode_record *rec)
2571 char *dir_name = "lost+found";
2572 char namebuf[BTRFS_NAME_LEN] = {0};
2577 int name_recovered = 0;
2578 int type_recovered = 0;
2582 * Get file name and type first before these invalid inode ref
2583 * are deleted by remove_all_invalid_backref()
2585 name_recovered = !find_file_name(rec, namebuf, &namelen);
2586 type_recovered = !find_file_type(rec, &type);
2588 if (!name_recovered) {
2589 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590 rec->ino, rec->ino);
2591 namelen = count_digits(rec->ino);
2592 sprintf(namebuf, "%llu", rec->ino);
2595 if (!type_recovered) {
2596 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598 type = BTRFS_FT_REG_FILE;
2602 ret = reset_nlink(trans, root, path, rec);
2605 "Failed to reset nlink for inode %llu: %s\n",
2606 rec->ino, strerror(-ret));
2610 if (rec->found_link == 0) {
2611 lost_found_ino = root->highest_inode;
2612 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2617 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2621 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622 dir_name, strerror(-ret));
2625 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626 namebuf, namelen, type, NULL, 1);
2628 * Add ".INO" suffix several times to handle case where
2629 * "FILENAME.INO" is already taken by another file.
2631 while (ret == -EEXIST) {
2633 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635 if (namelen + count_digits(rec->ino) + 1 >
2640 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642 namelen += count_digits(rec->ino) + 1;
2643 ret = btrfs_add_link(trans, root, rec->ino,
2644 lost_found_ino, namebuf,
2645 namelen, type, NULL, 1);
2649 "Failed to link the inode %llu to %s dir: %s\n",
2650 rec->ino, dir_name, strerror(-ret));
2654 * Just increase the found_link, don't actually add the
2655 * backref. This will make things easier and this inode
2656 * record will be freed after the repair is done.
2657 * So fsck will not report problem about this inode.
2660 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661 namelen, namebuf, dir_name);
2663 printf("Fixed the nlink of inode %llu\n", rec->ino);
2666 * Clear the flag anyway, or we will loop forever for the same inode
2667 * as it will not be removed from the bad inode list and the dead loop
2670 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671 btrfs_release_path(path);
2676 * Check if there is any normal(reg or prealloc) file extent for given
2678 * This is used to determine the file type when neither its dir_index/item or
2679 * inode_item exists.
2681 * This will *NOT* report error, if any error happens, just consider it does
2682 * not have any normal file extent.
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 struct btrfs_path path;
2687 struct btrfs_key key;
2688 struct btrfs_key found_key;
2689 struct btrfs_file_extent_item *fi;
2693 btrfs_init_path(&path);
2695 key.type = BTRFS_EXTENT_DATA_KEY;
2698 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2703 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2704 ret = btrfs_next_leaf(root, &path);
2711 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713 if (found_key.objectid != ino ||
2714 found_key.type != BTRFS_EXTENT_DATA_KEY)
2716 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2717 struct btrfs_file_extent_item);
2718 type = btrfs_file_extent_type(path.nodes[0], fi);
2719 if (type != BTRFS_FILE_EXTENT_INLINE) {
2725 btrfs_release_path(&path);
2729 static u32 btrfs_type_to_imode(u8 type)
2731 static u32 imode_by_btrfs_type[] = {
2732 [BTRFS_FT_REG_FILE] = S_IFREG,
2733 [BTRFS_FT_DIR] = S_IFDIR,
2734 [BTRFS_FT_CHRDEV] = S_IFCHR,
2735 [BTRFS_FT_BLKDEV] = S_IFBLK,
2736 [BTRFS_FT_FIFO] = S_IFIFO,
2737 [BTRFS_FT_SOCK] = S_IFSOCK,
2738 [BTRFS_FT_SYMLINK] = S_IFLNK,
2741 return imode_by_btrfs_type[(type)];
2744 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2745 struct btrfs_root *root,
2746 struct btrfs_path *path,
2747 struct inode_record *rec)
2751 int type_recovered = 0;
2754 printf("Trying to rebuild inode:%llu\n", rec->ino);
2756 type_recovered = !find_file_type(rec, &filetype);
2759 * Try to determine inode type if type not found.
2761 * For found regular file extent, it must be FILE.
2762 * For found dir_item/index, it must be DIR.
2764 * For undetermined one, use FILE as fallback.
2767 * 1. If found backref(inode_index/item is already handled) to it,
2769 * Need new inode-inode ref structure to allow search for that.
2771 if (!type_recovered) {
2772 if (rec->found_file_extent &&
2773 find_normal_file_extent(root, rec->ino)) {
2775 filetype = BTRFS_FT_REG_FILE;
2776 } else if (rec->found_dir_item) {
2778 filetype = BTRFS_FT_DIR;
2779 } else if (!list_empty(&rec->orphan_extents)) {
2781 filetype = BTRFS_FT_REG_FILE;
2783 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2786 filetype = BTRFS_FT_REG_FILE;
2790 ret = btrfs_new_inode(trans, root, rec->ino,
2791 mode | btrfs_type_to_imode(filetype));
2796 * Here inode rebuild is done, we only rebuild the inode item,
2797 * don't repair the nlink(like move to lost+found).
2798 * That is the job of nlink repair.
2800 * We just fill the record and return
2802 rec->found_dir_item = 1;
2803 rec->imode = mode | btrfs_type_to_imode(filetype);
2805 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2806 /* Ensure the inode_nlinks repair function will be called */
2807 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2812 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2813 struct btrfs_root *root,
2814 struct btrfs_path *path,
2815 struct inode_record *rec)
2817 struct orphan_data_extent *orphan;
2818 struct orphan_data_extent *tmp;
2821 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823 * Check for conflicting file extents
2825 * Here we don't know whether the extents is compressed or not,
2826 * so we can only assume it not compressed nor data offset,
2827 * and use its disk_len as extent length.
2829 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2830 orphan->offset, orphan->disk_len, 0);
2831 btrfs_release_path(path);
2836 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2837 orphan->disk_bytenr, orphan->disk_len);
2838 ret = btrfs_free_extent(trans,
2839 root->fs_info->extent_root,
2840 orphan->disk_bytenr, orphan->disk_len,
2841 0, root->objectid, orphan->objectid,
2846 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2847 orphan->offset, orphan->disk_bytenr,
2848 orphan->disk_len, orphan->disk_len);
2852 /* Update file size info */
2853 rec->found_size += orphan->disk_len;
2854 if (rec->found_size == rec->nbytes)
2855 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857 /* Update the file extent hole info too */
2858 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2862 if (RB_EMPTY_ROOT(&rec->holes))
2863 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865 list_del(&orphan->list);
2868 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2873 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2874 struct btrfs_root *root,
2875 struct btrfs_path *path,
2876 struct inode_record *rec)
2878 struct rb_node *node;
2879 struct file_extent_hole *hole;
2883 node = rb_first(&rec->holes);
2887 hole = rb_entry(node, struct file_extent_hole, node);
2888 ret = btrfs_punch_hole(trans, root, rec->ino,
2889 hole->start, hole->len);
2892 ret = del_file_extent_hole(&rec->holes, hole->start,
2896 if (RB_EMPTY_ROOT(&rec->holes))
2897 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2898 node = rb_first(&rec->holes);
2900 /* special case for a file losing all its file extent */
2902 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2903 round_up(rec->isize, root->sectorsize));
2907 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2908 rec->ino, root->objectid);
2913 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 struct btrfs_trans_handle *trans;
2916 struct btrfs_path path;
2919 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2920 I_ERR_NO_ORPHAN_ITEM |
2921 I_ERR_LINK_COUNT_WRONG |
2922 I_ERR_NO_INODE_ITEM |
2923 I_ERR_FILE_EXTENT_ORPHAN |
2924 I_ERR_FILE_EXTENT_DISCOUNT|
2925 I_ERR_FILE_NBYTES_WRONG)))
2929 * For nlink repair, it may create a dir and add link, so
2930 * 2 for parent(256)'s dir_index and dir_item
2931 * 2 for lost+found dir's inode_item and inode_ref
2932 * 1 for the new inode_ref of the file
2933 * 2 for lost+found dir's dir_index and dir_item for the file
2935 trans = btrfs_start_transaction(root, 7);
2937 return PTR_ERR(trans);
2939 btrfs_init_path(&path);
2940 if (rec->errors & I_ERR_NO_INODE_ITEM)
2941 ret = repair_inode_no_item(trans, root, &path, rec);
2942 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2943 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2944 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2945 ret = repair_inode_discount_extent(trans, root, &path, rec);
2946 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2947 ret = repair_inode_isize(trans, root, &path, rec);
2948 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2949 ret = repair_inode_orphan_item(trans, root, &path, rec);
2950 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2951 ret = repair_inode_nlinks(trans, root, &path, rec);
2952 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2953 ret = repair_inode_nbytes(trans, root, &path, rec);
2954 btrfs_commit_transaction(trans, root);
2955 btrfs_release_path(&path);
2959 static int check_inode_recs(struct btrfs_root *root,
2960 struct cache_tree *inode_cache)
2962 struct cache_extent *cache;
2963 struct ptr_node *node;
2964 struct inode_record *rec;
2965 struct inode_backref *backref;
2970 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972 if (btrfs_root_refs(&root->root_item) == 0) {
2973 if (!cache_tree_empty(inode_cache))
2974 fprintf(stderr, "warning line %d\n", __LINE__);
2979 * We need to record the highest inode number for later 'lost+found'
2981 * We must select an ino not used/referred by any existing inode, or
2982 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2983 * this may cause 'lost+found' dir has wrong nlinks.
2985 cache = last_cache_extent(inode_cache);
2987 node = container_of(cache, struct ptr_node, cache);
2989 if (rec->ino > root->highest_inode)
2990 root->highest_inode = rec->ino;
2994 * We need to repair backrefs first because we could change some of the
2995 * errors in the inode recs.
2997 * We also need to go through and delete invalid backrefs first and then
2998 * add the correct ones second. We do this because we may get EEXIST
2999 * when adding back the correct index because we hadn't yet deleted the
3002 * For example, if we were missing a dir index then the directories
3003 * isize would be wrong, so if we fixed the isize to what we thought it
3004 * would be and then fixed the backref we'd still have a invalid fs, so
3005 * we need to add back the dir index and then check to see if the isize
3010 if (stage == 3 && !err)
3013 cache = search_cache_extent(inode_cache, 0);
3014 while (repair && cache) {
3015 node = container_of(cache, struct ptr_node, cache);
3017 cache = next_cache_extent(cache);
3019 /* Need to free everything up and rescan */
3021 remove_cache_extent(inode_cache, &node->cache);
3023 free_inode_rec(rec);
3027 if (list_empty(&rec->backrefs))
3030 ret = repair_inode_backrefs(root, rec, inode_cache,
3044 rec = get_inode_rec(inode_cache, root_dirid, 0);
3045 BUG_ON(IS_ERR(rec));
3047 ret = check_root_dir(rec);
3049 fprintf(stderr, "root %llu root dir %llu error\n",
3050 (unsigned long long)root->root_key.objectid,
3051 (unsigned long long)root_dirid);
3052 print_inode_error(root, rec);
3057 struct btrfs_trans_handle *trans;
3059 trans = btrfs_start_transaction(root, 1);
3060 if (IS_ERR(trans)) {
3061 err = PTR_ERR(trans);
3066 "root %llu missing its root dir, recreating\n",
3067 (unsigned long long)root->objectid);
3069 ret = btrfs_make_root_dir(trans, root, root_dirid);
3072 btrfs_commit_transaction(trans, root);
3076 fprintf(stderr, "root %llu root dir %llu not found\n",
3077 (unsigned long long)root->root_key.objectid,
3078 (unsigned long long)root_dirid);
3082 cache = search_cache_extent(inode_cache, 0);
3085 node = container_of(cache, struct ptr_node, cache);
3087 remove_cache_extent(inode_cache, &node->cache);
3089 if (rec->ino == root_dirid ||
3090 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3091 free_inode_rec(rec);
3095 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3096 ret = check_orphan_item(root, rec->ino);
3098 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3099 if (can_free_inode_rec(rec)) {
3100 free_inode_rec(rec);
3105 if (!rec->found_inode_item)
3106 rec->errors |= I_ERR_NO_INODE_ITEM;
3107 if (rec->found_link != rec->nlink)
3108 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110 ret = try_repair_inode(root, rec);
3111 if (ret == 0 && can_free_inode_rec(rec)) {
3112 free_inode_rec(rec);
3118 if (!(repair && ret == 0))
3120 print_inode_error(root, rec);
3121 list_for_each_entry(backref, &rec->backrefs, list) {
3122 if (!backref->found_dir_item)
3123 backref->errors |= REF_ERR_NO_DIR_ITEM;
3124 if (!backref->found_dir_index)
3125 backref->errors |= REF_ERR_NO_DIR_INDEX;
3126 if (!backref->found_inode_ref)
3127 backref->errors |= REF_ERR_NO_INODE_REF;
3128 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3129 " namelen %u name %s filetype %d errors %x",
3130 (unsigned long long)backref->dir,
3131 (unsigned long long)backref->index,
3132 backref->namelen, backref->name,
3133 backref->filetype, backref->errors);
3134 print_ref_error(backref->errors);
3136 free_inode_rec(rec);
3138 return (error > 0) ? -1 : 0;
3141 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3144 struct cache_extent *cache;
3145 struct root_record *rec = NULL;
3148 cache = lookup_cache_extent(root_cache, objectid, 1);
3150 rec = container_of(cache, struct root_record, cache);
3152 rec = calloc(1, sizeof(*rec));
3154 return ERR_PTR(-ENOMEM);
3155 rec->objectid = objectid;
3156 INIT_LIST_HEAD(&rec->backrefs);
3157 rec->cache.start = objectid;
3158 rec->cache.size = 1;
3160 ret = insert_cache_extent(root_cache, &rec->cache);
3162 return ERR_PTR(-EEXIST);
3167 static struct root_backref *get_root_backref(struct root_record *rec,
3168 u64 ref_root, u64 dir, u64 index,
3169 const char *name, int namelen)
3171 struct root_backref *backref;
3173 list_for_each_entry(backref, &rec->backrefs, list) {
3174 if (backref->ref_root != ref_root || backref->dir != dir ||
3175 backref->namelen != namelen)
3177 if (memcmp(name, backref->name, namelen))
3182 backref = calloc(1, sizeof(*backref) + namelen + 1);
3185 backref->ref_root = ref_root;
3187 backref->index = index;
3188 backref->namelen = namelen;
3189 memcpy(backref->name, name, namelen);
3190 backref->name[namelen] = '\0';
3191 list_add_tail(&backref->list, &rec->backrefs);
3195 static void free_root_record(struct cache_extent *cache)
3197 struct root_record *rec;
3198 struct root_backref *backref;
3200 rec = container_of(cache, struct root_record, cache);
3201 while (!list_empty(&rec->backrefs)) {
3202 backref = to_root_backref(rec->backrefs.next);
3203 list_del(&backref->list);
3210 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212 static int add_root_backref(struct cache_tree *root_cache,
3213 u64 root_id, u64 ref_root, u64 dir, u64 index,
3214 const char *name, int namelen,
3215 int item_type, int errors)
3217 struct root_record *rec;
3218 struct root_backref *backref;
3220 rec = get_root_rec(root_cache, root_id);
3221 BUG_ON(IS_ERR(rec));
3222 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3225 backref->errors |= errors;
3227 if (item_type != BTRFS_DIR_ITEM_KEY) {
3228 if (backref->found_dir_index || backref->found_back_ref ||
3229 backref->found_forward_ref) {
3230 if (backref->index != index)
3231 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233 backref->index = index;
3237 if (item_type == BTRFS_DIR_ITEM_KEY) {
3238 if (backref->found_forward_ref)
3240 backref->found_dir_item = 1;
3241 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3242 backref->found_dir_index = 1;
3243 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3244 if (backref->found_forward_ref)
3245 backref->errors |= REF_ERR_DUP_ROOT_REF;
3246 else if (backref->found_dir_item)
3248 backref->found_forward_ref = 1;
3249 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3250 if (backref->found_back_ref)
3251 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3252 backref->found_back_ref = 1;
3257 if (backref->found_forward_ref && backref->found_dir_item)
3258 backref->reachable = 1;
3262 static int merge_root_recs(struct btrfs_root *root,
3263 struct cache_tree *src_cache,
3264 struct cache_tree *dst_cache)
3266 struct cache_extent *cache;
3267 struct ptr_node *node;
3268 struct inode_record *rec;
3269 struct inode_backref *backref;
3272 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3273 free_inode_recs_tree(src_cache);
3278 cache = search_cache_extent(src_cache, 0);
3281 node = container_of(cache, struct ptr_node, cache);
3283 remove_cache_extent(src_cache, &node->cache);
3286 ret = is_child_root(root, root->objectid, rec->ino);
3292 list_for_each_entry(backref, &rec->backrefs, list) {
3293 BUG_ON(backref->found_inode_ref);
3294 if (backref->found_dir_item)
3295 add_root_backref(dst_cache, rec->ino,
3296 root->root_key.objectid, backref->dir,
3297 backref->index, backref->name,
3298 backref->namelen, BTRFS_DIR_ITEM_KEY,
3300 if (backref->found_dir_index)
3301 add_root_backref(dst_cache, rec->ino,
3302 root->root_key.objectid, backref->dir,
3303 backref->index, backref->name,
3304 backref->namelen, BTRFS_DIR_INDEX_KEY,
3308 free_inode_rec(rec);
3315 static int check_root_refs(struct btrfs_root *root,
3316 struct cache_tree *root_cache)
3318 struct root_record *rec;
3319 struct root_record *ref_root;
3320 struct root_backref *backref;
3321 struct cache_extent *cache;
3327 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3328 BUG_ON(IS_ERR(rec));
3331 /* fixme: this can not detect circular references */
3334 cache = search_cache_extent(root_cache, 0);
3338 rec = container_of(cache, struct root_record, cache);
3339 cache = next_cache_extent(cache);
3341 if (rec->found_ref == 0)
3344 list_for_each_entry(backref, &rec->backrefs, list) {
3345 if (!backref->reachable)
3348 ref_root = get_root_rec(root_cache,
3350 BUG_ON(IS_ERR(ref_root));
3351 if (ref_root->found_ref > 0)
3354 backref->reachable = 0;
3356 if (rec->found_ref == 0)
3362 cache = search_cache_extent(root_cache, 0);
3366 rec = container_of(cache, struct root_record, cache);
3367 cache = next_cache_extent(cache);
3369 if (rec->found_ref == 0 &&
3370 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3371 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3372 ret = check_orphan_item(root->fs_info->tree_root,
3378 * If we don't have a root item then we likely just have
3379 * a dir item in a snapshot for this root but no actual
3380 * ref key or anything so it's meaningless.
3382 if (!rec->found_root_item)
3385 fprintf(stderr, "fs tree %llu not referenced\n",
3386 (unsigned long long)rec->objectid);
3390 if (rec->found_ref > 0 && !rec->found_root_item)
3392 list_for_each_entry(backref, &rec->backrefs, list) {
3393 if (!backref->found_dir_item)
3394 backref->errors |= REF_ERR_NO_DIR_ITEM;
3395 if (!backref->found_dir_index)
3396 backref->errors |= REF_ERR_NO_DIR_INDEX;
3397 if (!backref->found_back_ref)
3398 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3399 if (!backref->found_forward_ref)
3400 backref->errors |= REF_ERR_NO_ROOT_REF;
3401 if (backref->reachable && backref->errors)
3408 fprintf(stderr, "fs tree %llu refs %u %s\n",
3409 (unsigned long long)rec->objectid, rec->found_ref,
3410 rec->found_root_item ? "" : "not found");
3412 list_for_each_entry(backref, &rec->backrefs, list) {
3413 if (!backref->reachable)
3415 if (!backref->errors && rec->found_root_item)
3417 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3418 " index %llu namelen %u name %s errors %x\n",
3419 (unsigned long long)backref->ref_root,
3420 (unsigned long long)backref->dir,
3421 (unsigned long long)backref->index,
3422 backref->namelen, backref->name,
3424 print_ref_error(backref->errors);
3427 return errors > 0 ? 1 : 0;
3430 static int process_root_ref(struct extent_buffer *eb, int slot,
3431 struct btrfs_key *key,
3432 struct cache_tree *root_cache)
3438 struct btrfs_root_ref *ref;
3439 char namebuf[BTRFS_NAME_LEN];
3442 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444 dirid = btrfs_root_ref_dirid(eb, ref);
3445 index = btrfs_root_ref_sequence(eb, ref);
3446 name_len = btrfs_root_ref_name_len(eb, ref);
3448 if (name_len <= BTRFS_NAME_LEN) {
3452 len = BTRFS_NAME_LEN;
3453 error = REF_ERR_NAME_TOO_LONG;
3455 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457 if (key->type == BTRFS_ROOT_REF_KEY) {
3458 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3459 index, namebuf, len, key->type, error);
3461 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3462 index, namebuf, len, key->type, error);
3467 static void free_corrupt_block(struct cache_extent *cache)
3469 struct btrfs_corrupt_block *corrupt;
3471 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3475 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3478 * Repair the btree of the given root.
3480 * The fix is to remove the node key in corrupt_blocks cache_tree.
3481 * and rebalance the tree.
3482 * After the fix, the btree should be writeable.
3484 static int repair_btree(struct btrfs_root *root,
3485 struct cache_tree *corrupt_blocks)
3487 struct btrfs_trans_handle *trans;
3488 struct btrfs_path path;
3489 struct btrfs_corrupt_block *corrupt;
3490 struct cache_extent *cache;
3491 struct btrfs_key key;
3496 if (cache_tree_empty(corrupt_blocks))
3499 trans = btrfs_start_transaction(root, 1);
3500 if (IS_ERR(trans)) {
3501 ret = PTR_ERR(trans);
3502 fprintf(stderr, "Error starting transaction: %s\n",
3506 btrfs_init_path(&path);
3507 cache = first_cache_extent(corrupt_blocks);
3509 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511 level = corrupt->level;
3512 path.lowest_level = level;
3513 key.objectid = corrupt->key.objectid;
3514 key.type = corrupt->key.type;
3515 key.offset = corrupt->key.offset;
3518 * Here we don't want to do any tree balance, since it may
3519 * cause a balance with corrupted brother leaf/node,
3520 * so ins_len set to 0 here.
3521 * Balance will be done after all corrupt node/leaf is deleted.
3523 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3526 offset = btrfs_node_blockptr(path.nodes[level],
3529 /* Remove the ptr */
3530 ret = btrfs_del_ptr(trans, root, &path, level,
3535 * Remove the corresponding extent
3536 * return value is not concerned.
3538 btrfs_release_path(&path);
3539 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3540 0, root->root_key.objectid,
3542 cache = next_cache_extent(cache);
3545 /* Balance the btree using btrfs_search_slot() */
3546 cache = first_cache_extent(corrupt_blocks);
3548 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550 memcpy(&key, &corrupt->key, sizeof(key));
3551 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3554 /* return will always >0 since it won't find the item */
3556 btrfs_release_path(&path);
3557 cache = next_cache_extent(cache);
3560 btrfs_commit_transaction(trans, root);
3561 btrfs_release_path(&path);
3565 static int check_fs_root(struct btrfs_root *root,
3566 struct cache_tree *root_cache,
3567 struct walk_control *wc)
3573 struct btrfs_path path;
3574 struct shared_node root_node;
3575 struct root_record *rec;
3576 struct btrfs_root_item *root_item = &root->root_item;
3577 struct cache_tree corrupt_blocks;
3578 struct orphan_data_extent *orphan;
3579 struct orphan_data_extent *tmp;
3580 enum btrfs_tree_block_status status;
3581 struct node_refs nrefs;
3584 * Reuse the corrupt_block cache tree to record corrupted tree block
3586 * Unlike the usage in extent tree check, here we do it in a per
3587 * fs/subvol tree base.
3589 cache_tree_init(&corrupt_blocks);
3590 root->fs_info->corrupt_blocks = &corrupt_blocks;
3592 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3593 rec = get_root_rec(root_cache, root->root_key.objectid);
3594 BUG_ON(IS_ERR(rec));
3595 if (btrfs_root_refs(root_item) > 0)
3596 rec->found_root_item = 1;
3599 btrfs_init_path(&path);
3600 memset(&root_node, 0, sizeof(root_node));
3601 cache_tree_init(&root_node.root_cache);
3602 cache_tree_init(&root_node.inode_cache);
3603 memset(&nrefs, 0, sizeof(nrefs));
3605 /* Move the orphan extent record to corresponding inode_record */
3606 list_for_each_entry_safe(orphan, tmp,
3607 &root->orphan_data_extents, list) {
3608 struct inode_record *inode;
3610 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612 BUG_ON(IS_ERR(inode));
3613 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3614 list_move(&orphan->list, &inode->orphan_extents);
3617 level = btrfs_header_level(root->node);
3618 memset(wc->nodes, 0, sizeof(wc->nodes));
3619 wc->nodes[level] = &root_node;
3620 wc->active_node = level;
3621 wc->root_level = level;
3623 /* We may not have checked the root block, lets do that now */
3624 if (btrfs_is_leaf(root->node))
3625 status = btrfs_check_leaf(root, NULL, root->node);
3627 status = btrfs_check_node(root, NULL, root->node);
3628 if (status != BTRFS_TREE_BLOCK_CLEAN)
3631 if (btrfs_root_refs(root_item) > 0 ||
3632 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3633 path.nodes[level] = root->node;
3634 extent_buffer_get(root->node);
3635 path.slots[level] = 0;
3637 struct btrfs_key key;
3638 struct btrfs_disk_key found_key;
3640 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3641 level = root_item->drop_level;
3642 path.lowest_level = level;
3643 if (level > btrfs_header_level(root->node) ||
3644 level >= BTRFS_MAX_LEVEL) {
3645 error("ignoring invalid drop level: %u", level);
3648 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3651 btrfs_node_key(path.nodes[level], &found_key,
3653 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3654 sizeof(found_key)));
3658 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3664 wret = walk_up_tree(root, &path, wc, &level);
3671 btrfs_release_path(&path);
3673 if (!cache_tree_empty(&corrupt_blocks)) {
3674 struct cache_extent *cache;
3675 struct btrfs_corrupt_block *corrupt;
3677 printf("The following tree block(s) is corrupted in tree %llu:\n",
3678 root->root_key.objectid);
3679 cache = first_cache_extent(&corrupt_blocks);
3681 corrupt = container_of(cache,
3682 struct btrfs_corrupt_block,
3684 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3685 cache->start, corrupt->level,
3686 corrupt->key.objectid, corrupt->key.type,
3687 corrupt->key.offset);
3688 cache = next_cache_extent(cache);
3691 printf("Try to repair the btree for root %llu\n",
3692 root->root_key.objectid);
3693 ret = repair_btree(root, &corrupt_blocks);
3695 fprintf(stderr, "Failed to repair btree: %s\n",
3698 printf("Btree for root %llu is fixed\n",
3699 root->root_key.objectid);
3703 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3707 if (root_node.current) {
3708 root_node.current->checked = 1;
3709 maybe_free_inode_rec(&root_node.inode_cache,
3713 err = check_inode_recs(root, &root_node.inode_cache);
3717 free_corrupt_blocks_tree(&corrupt_blocks);
3718 root->fs_info->corrupt_blocks = NULL;
3719 free_orphan_data_extents(&root->orphan_data_extents);
3723 static int fs_root_objectid(u64 objectid)
3725 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3726 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728 return is_fstree(objectid);
3731 static int check_fs_roots(struct btrfs_root *root,
3732 struct cache_tree *root_cache)
3734 struct btrfs_path path;
3735 struct btrfs_key key;
3736 struct walk_control wc;
3737 struct extent_buffer *leaf, *tree_node;
3738 struct btrfs_root *tmp_root;
3739 struct btrfs_root *tree_root = root->fs_info->tree_root;
3743 if (ctx.progress_enabled) {
3744 ctx.tp = TASK_FS_ROOTS;
3745 task_start(ctx.info);
3749 * Just in case we made any changes to the extent tree that weren't
3750 * reflected into the free space cache yet.
3753 reset_cached_block_groups(root->fs_info);
3754 memset(&wc, 0, sizeof(wc));
3755 cache_tree_init(&wc.shared);
3756 btrfs_init_path(&path);
3761 key.type = BTRFS_ROOT_ITEM_KEY;
3762 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3767 tree_node = tree_root->node;
3769 if (tree_node != tree_root->node) {
3770 free_root_recs_tree(root_cache);
3771 btrfs_release_path(&path);
3774 leaf = path.nodes[0];
3775 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3776 ret = btrfs_next_leaf(tree_root, &path);
3782 leaf = path.nodes[0];
3784 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3785 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3786 fs_root_objectid(key.objectid)) {
3787 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3788 tmp_root = btrfs_read_fs_root_no_cache(
3789 root->fs_info, &key);
3791 key.offset = (u64)-1;
3792 tmp_root = btrfs_read_fs_root(
3793 root->fs_info, &key);
3795 if (IS_ERR(tmp_root)) {
3799 ret = check_fs_root(tmp_root, root_cache, &wc);
3800 if (ret == -EAGAIN) {
3801 free_root_recs_tree(root_cache);
3802 btrfs_release_path(&path);
3807 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3808 btrfs_free_fs_root(tmp_root);
3809 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3810 key.type == BTRFS_ROOT_BACKREF_KEY) {
3811 process_root_ref(leaf, path.slots[0], &key,
3818 btrfs_release_path(&path);
3820 free_extent_cache_tree(&wc.shared);
3821 if (!cache_tree_empty(&wc.shared))
3822 fprintf(stderr, "warning line %d\n", __LINE__);
3824 task_stop(ctx.info);
3829 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3831 struct list_head *cur = rec->backrefs.next;
3832 struct extent_backref *back;
3833 struct tree_backref *tback;
3834 struct data_backref *dback;
3838 while(cur != &rec->backrefs) {
3839 back = to_extent_backref(cur);
3841 if (!back->found_extent_tree) {
3845 if (back->is_data) {
3846 dback = to_data_backref(back);
3847 fprintf(stderr, "Backref %llu %s %llu"
3848 " owner %llu offset %llu num_refs %lu"
3849 " not found in extent tree\n",
3850 (unsigned long long)rec->start,
3851 back->full_backref ?
3853 back->full_backref ?
3854 (unsigned long long)dback->parent:
3855 (unsigned long long)dback->root,
3856 (unsigned long long)dback->owner,
3857 (unsigned long long)dback->offset,
3858 (unsigned long)dback->num_refs);
3860 tback = to_tree_backref(back);
3861 fprintf(stderr, "Backref %llu parent %llu"
3862 " root %llu not found in extent tree\n",
3863 (unsigned long long)rec->start,
3864 (unsigned long long)tback->parent,
3865 (unsigned long long)tback->root);
3868 if (!back->is_data && !back->found_ref) {
3872 tback = to_tree_backref(back);
3873 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3874 (unsigned long long)rec->start,
3875 back->full_backref ? "parent" : "root",
3876 back->full_backref ?
3877 (unsigned long long)tback->parent :
3878 (unsigned long long)tback->root, back);
3880 if (back->is_data) {
3881 dback = to_data_backref(back);
3882 if (dback->found_ref != dback->num_refs) {
3886 fprintf(stderr, "Incorrect local backref count"
3887 " on %llu %s %llu owner %llu"
3888 " offset %llu found %u wanted %u back %p\n",
3889 (unsigned long long)rec->start,
3890 back->full_backref ?
3892 back->full_backref ?
3893 (unsigned long long)dback->parent:
3894 (unsigned long long)dback->root,
3895 (unsigned long long)dback->owner,
3896 (unsigned long long)dback->offset,
3897 dback->found_ref, dback->num_refs, back);
3899 if (dback->disk_bytenr != rec->start) {
3903 fprintf(stderr, "Backref disk bytenr does not"
3904 " match extent record, bytenr=%llu, "
3905 "ref bytenr=%llu\n",
3906 (unsigned long long)rec->start,
3907 (unsigned long long)dback->disk_bytenr);
3910 if (dback->bytes != rec->nr) {
3914 fprintf(stderr, "Backref bytes do not match "
3915 "extent backref, bytenr=%llu, ref "
3916 "bytes=%llu, backref bytes=%llu\n",
3917 (unsigned long long)rec->start,
3918 (unsigned long long)rec->nr,
3919 (unsigned long long)dback->bytes);
3922 if (!back->is_data) {
3925 dback = to_data_backref(back);
3926 found += dback->found_ref;
3929 if (found != rec->refs) {
3933 fprintf(stderr, "Incorrect global backref count "
3934 "on %llu found %llu wanted %llu\n",
3935 (unsigned long long)rec->start,
3936 (unsigned long long)found,
3937 (unsigned long long)rec->refs);
3943 static int free_all_extent_backrefs(struct extent_record *rec)
3945 struct extent_backref *back;
3946 struct list_head *cur;
3947 while (!list_empty(&rec->backrefs)) {
3948 cur = rec->backrefs.next;
3949 back = to_extent_backref(cur);
3956 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3957 struct cache_tree *extent_cache)
3959 struct cache_extent *cache;
3960 struct extent_record *rec;
3963 cache = first_cache_extent(extent_cache);
3966 rec = container_of(cache, struct extent_record, cache);
3967 remove_cache_extent(extent_cache, cache);
3968 free_all_extent_backrefs(rec);
3973 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3974 struct extent_record *rec)
3976 if (rec->content_checked && rec->owner_ref_checked &&
3977 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3978 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3979 !rec->bad_full_backref && !rec->crossing_stripes &&
3980 !rec->wrong_chunk_type) {
3981 remove_cache_extent(extent_cache, &rec->cache);
3982 free_all_extent_backrefs(rec);
3983 list_del_init(&rec->list);
3989 static int check_owner_ref(struct btrfs_root *root,
3990 struct extent_record *rec,
3991 struct extent_buffer *buf)
3993 struct extent_backref *node;
3994 struct tree_backref *back;
3995 struct btrfs_root *ref_root;
3996 struct btrfs_key key;
3997 struct btrfs_path path;
3998 struct extent_buffer *parent;
4003 list_for_each_entry(node, &rec->backrefs, list) {
4006 if (!node->found_ref)
4008 if (node->full_backref)
4010 back = to_tree_backref(node);
4011 if (btrfs_header_owner(buf) == back->root)
4014 BUG_ON(rec->is_root);
4016 /* try to find the block by search corresponding fs tree */
4017 key.objectid = btrfs_header_owner(buf);
4018 key.type = BTRFS_ROOT_ITEM_KEY;
4019 key.offset = (u64)-1;
4021 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4022 if (IS_ERR(ref_root))
4025 level = btrfs_header_level(buf);
4027 btrfs_item_key_to_cpu(buf, &key, 0);
4029 btrfs_node_key_to_cpu(buf, &key, 0);
4031 btrfs_init_path(&path);
4032 path.lowest_level = level + 1;
4033 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4037 parent = path.nodes[level + 1];
4038 if (parent && buf->start == btrfs_node_blockptr(parent,
4039 path.slots[level + 1]))
4042 btrfs_release_path(&path);
4043 return found ? 0 : 1;
4046 static int is_extent_tree_record(struct extent_record *rec)
4048 struct list_head *cur = rec->backrefs.next;
4049 struct extent_backref *node;
4050 struct tree_backref *back;
4053 while(cur != &rec->backrefs) {
4054 node = to_extent_backref(cur);
4058 back = to_tree_backref(node);
4059 if (node->full_backref)
4061 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4068 static int record_bad_block_io(struct btrfs_fs_info *info,
4069 struct cache_tree *extent_cache,
4072 struct extent_record *rec;
4073 struct cache_extent *cache;
4074 struct btrfs_key key;
4076 cache = lookup_cache_extent(extent_cache, start, len);
4080 rec = container_of(cache, struct extent_record, cache);
4081 if (!is_extent_tree_record(rec))
4084 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4085 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4088 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4089 struct extent_buffer *buf, int slot)
4091 if (btrfs_header_level(buf)) {
4092 struct btrfs_key_ptr ptr1, ptr2;
4094 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4095 sizeof(struct btrfs_key_ptr));
4096 read_extent_buffer(buf, &ptr2,
4097 btrfs_node_key_ptr_offset(slot + 1),
4098 sizeof(struct btrfs_key_ptr));
4099 write_extent_buffer(buf, &ptr1,
4100 btrfs_node_key_ptr_offset(slot + 1),
4101 sizeof(struct btrfs_key_ptr));
4102 write_extent_buffer(buf, &ptr2,
4103 btrfs_node_key_ptr_offset(slot),
4104 sizeof(struct btrfs_key_ptr));
4106 struct btrfs_disk_key key;
4107 btrfs_node_key(buf, &key, 0);
4108 btrfs_fixup_low_keys(root, path, &key,
4109 btrfs_header_level(buf) + 1);
4112 struct btrfs_item *item1, *item2;
4113 struct btrfs_key k1, k2;
4114 char *item1_data, *item2_data;
4115 u32 item1_offset, item2_offset, item1_size, item2_size;
4117 item1 = btrfs_item_nr(slot);
4118 item2 = btrfs_item_nr(slot + 1);
4119 btrfs_item_key_to_cpu(buf, &k1, slot);
4120 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4121 item1_offset = btrfs_item_offset(buf, item1);
4122 item2_offset = btrfs_item_offset(buf, item2);
4123 item1_size = btrfs_item_size(buf, item1);
4124 item2_size = btrfs_item_size(buf, item2);
4126 item1_data = malloc(item1_size);
4129 item2_data = malloc(item2_size);
4135 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4136 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4138 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4139 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4143 btrfs_set_item_offset(buf, item1, item2_offset);
4144 btrfs_set_item_offset(buf, item2, item1_offset);
4145 btrfs_set_item_size(buf, item1, item2_size);
4146 btrfs_set_item_size(buf, item2, item1_size);
4148 path->slots[0] = slot;
4149 btrfs_set_item_key_unsafe(root, path, &k2);
4150 path->slots[0] = slot + 1;
4151 btrfs_set_item_key_unsafe(root, path, &k1);
4156 static int fix_key_order(struct btrfs_trans_handle *trans,
4157 struct btrfs_root *root,
4158 struct btrfs_path *path)
4160 struct extent_buffer *buf;
4161 struct btrfs_key k1, k2;
4163 int level = path->lowest_level;
4166 buf = path->nodes[level];
4167 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4169 btrfs_node_key_to_cpu(buf, &k1, i);
4170 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4172 btrfs_item_key_to_cpu(buf, &k1, i);
4173 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4175 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4177 ret = swap_values(root, path, buf, i);
4180 btrfs_mark_buffer_dirty(buf);
4186 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4187 struct btrfs_root *root,
4188 struct btrfs_path *path,
4189 struct extent_buffer *buf, int slot)
4191 struct btrfs_key key;
4192 int nritems = btrfs_header_nritems(buf);
4194 btrfs_item_key_to_cpu(buf, &key, slot);
4196 /* These are all the keys we can deal with missing. */
4197 if (key.type != BTRFS_DIR_INDEX_KEY &&
4198 key.type != BTRFS_EXTENT_ITEM_KEY &&
4199 key.type != BTRFS_METADATA_ITEM_KEY &&
4200 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4201 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4204 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4205 (unsigned long long)key.objectid, key.type,
4206 (unsigned long long)key.offset, slot, buf->start);
4207 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4208 btrfs_item_nr_offset(slot + 1),
4209 sizeof(struct btrfs_item) *
4210 (nritems - slot - 1));
4211 btrfs_set_header_nritems(buf, nritems - 1);
4213 struct btrfs_disk_key disk_key;
4215 btrfs_item_key(buf, &disk_key, 0);
4216 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4218 btrfs_mark_buffer_dirty(buf);
4222 static int fix_item_offset(struct btrfs_trans_handle *trans,
4223 struct btrfs_root *root,
4224 struct btrfs_path *path)
4226 struct extent_buffer *buf;
4230 /* We should only get this for leaves */
4231 BUG_ON(path->lowest_level);
4232 buf = path->nodes[0];
4234 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4235 unsigned int shift = 0, offset;
4237 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4238 BTRFS_LEAF_DATA_SIZE(root)) {
4239 if (btrfs_item_end_nr(buf, i) >
4240 BTRFS_LEAF_DATA_SIZE(root)) {
4241 ret = delete_bogus_item(trans, root, path,
4245 fprintf(stderr, "item is off the end of the "
4246 "leaf, can't fix\n");
4250 shift = BTRFS_LEAF_DATA_SIZE(root) -
4251 btrfs_item_end_nr(buf, i);
4252 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4253 btrfs_item_offset_nr(buf, i - 1)) {
4254 if (btrfs_item_end_nr(buf, i) >
4255 btrfs_item_offset_nr(buf, i - 1)) {
4256 ret = delete_bogus_item(trans, root, path,
4260 fprintf(stderr, "items overlap, can't fix\n");
4264 shift = btrfs_item_offset_nr(buf, i - 1) -
4265 btrfs_item_end_nr(buf, i);
4270 printf("Shifting item nr %d by %u bytes in block %llu\n",
4271 i, shift, (unsigned long long)buf->start);
4272 offset = btrfs_item_offset_nr(buf, i);
4273 memmove_extent_buffer(buf,
4274 btrfs_leaf_data(buf) + offset + shift,
4275 btrfs_leaf_data(buf) + offset,
4276 btrfs_item_size_nr(buf, i));
4277 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4279 btrfs_mark_buffer_dirty(buf);
4283 * We may have moved things, in which case we want to exit so we don't
4284 * write those changes out. Once we have proper abort functionality in
4285 * progs this can be changed to something nicer.
4292 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4293 * then just return -EIO.
4295 static int try_to_fix_bad_block(struct btrfs_root *root,
4296 struct extent_buffer *buf,
4297 enum btrfs_tree_block_status status)
4299 struct btrfs_trans_handle *trans;
4300 struct ulist *roots;
4301 struct ulist_node *node;
4302 struct btrfs_root *search_root;
4303 struct btrfs_path *path;
4304 struct ulist_iterator iter;
4305 struct btrfs_key root_key, key;
4308 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4309 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4312 path = btrfs_alloc_path();
4316 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4319 btrfs_free_path(path);
4323 ULIST_ITER_INIT(&iter);
4324 while ((node = ulist_next(roots, &iter))) {
4325 root_key.objectid = node->val;
4326 root_key.type = BTRFS_ROOT_ITEM_KEY;
4327 root_key.offset = (u64)-1;
4329 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4336 trans = btrfs_start_transaction(search_root, 0);
4337 if (IS_ERR(trans)) {
4338 ret = PTR_ERR(trans);
4342 path->lowest_level = btrfs_header_level(buf);
4343 path->skip_check_block = 1;
4344 if (path->lowest_level)
4345 btrfs_node_key_to_cpu(buf, &key, 0);
4347 btrfs_item_key_to_cpu(buf, &key, 0);
4348 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4351 btrfs_commit_transaction(trans, search_root);
4354 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4355 ret = fix_key_order(trans, search_root, path);
4356 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4357 ret = fix_item_offset(trans, search_root, path);
4359 btrfs_commit_transaction(trans, search_root);
4362 btrfs_release_path(path);
4363 btrfs_commit_transaction(trans, search_root);
4366 btrfs_free_path(path);
4370 static int check_block(struct btrfs_root *root,
4371 struct cache_tree *extent_cache,
4372 struct extent_buffer *buf, u64 flags)
4374 struct extent_record *rec;
4375 struct cache_extent *cache;
4376 struct btrfs_key key;
4377 enum btrfs_tree_block_status status;
4381 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4384 rec = container_of(cache, struct extent_record, cache);
4385 rec->generation = btrfs_header_generation(buf);
4387 level = btrfs_header_level(buf);
4388 if (btrfs_header_nritems(buf) > 0) {
4391 btrfs_item_key_to_cpu(buf, &key, 0);
4393 btrfs_node_key_to_cpu(buf, &key, 0);
4395 rec->info_objectid = key.objectid;
4397 rec->info_level = level;
4399 if (btrfs_is_leaf(buf))
4400 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4402 status = btrfs_check_node(root, &rec->parent_key, buf);
4404 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4406 status = try_to_fix_bad_block(root, buf, status);
4407 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4409 fprintf(stderr, "bad block %llu\n",
4410 (unsigned long long)buf->start);
4413 * Signal to callers we need to start the scan over
4414 * again since we'll have cowed blocks.
4419 rec->content_checked = 1;
4420 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4421 rec->owner_ref_checked = 1;
4423 ret = check_owner_ref(root, rec, buf);
4425 rec->owner_ref_checked = 1;
4429 maybe_free_extent_rec(extent_cache, rec);
4433 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4434 u64 parent, u64 root)
4436 struct list_head *cur = rec->backrefs.next;
4437 struct extent_backref *node;
4438 struct tree_backref *back;
4440 while(cur != &rec->backrefs) {
4441 node = to_extent_backref(cur);
4445 back = to_tree_backref(node);
4447 if (!node->full_backref)
4449 if (parent == back->parent)
4452 if (node->full_backref)
4454 if (back->root == root)
4461 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4462 u64 parent, u64 root)
4464 struct tree_backref *ref = malloc(sizeof(*ref));
4468 memset(&ref->node, 0, sizeof(ref->node));
4470 ref->parent = parent;
4471 ref->node.full_backref = 1;
4474 ref->node.full_backref = 0;
4476 list_add_tail(&ref->node.list, &rec->backrefs);
4481 static struct data_backref *find_data_backref(struct extent_record *rec,
4482 u64 parent, u64 root,
4483 u64 owner, u64 offset,
4485 u64 disk_bytenr, u64 bytes)
4487 struct list_head *cur = rec->backrefs.next;
4488 struct extent_backref *node;
4489 struct data_backref *back;
4491 while(cur != &rec->backrefs) {
4492 node = to_extent_backref(cur);
4496 back = to_data_backref(node);
4498 if (!node->full_backref)
4500 if (parent == back->parent)
4503 if (node->full_backref)
4505 if (back->root == root && back->owner == owner &&
4506 back->offset == offset) {
4507 if (found_ref && node->found_ref &&
4508 (back->bytes != bytes ||
4509 back->disk_bytenr != disk_bytenr))
4518 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4519 u64 parent, u64 root,
4520 u64 owner, u64 offset,
4523 struct data_backref *ref = malloc(sizeof(*ref));
4527 memset(&ref->node, 0, sizeof(ref->node));
4528 ref->node.is_data = 1;
4531 ref->parent = parent;
4534 ref->node.full_backref = 1;
4538 ref->offset = offset;
4539 ref->node.full_backref = 0;
4541 ref->bytes = max_size;
4544 list_add_tail(&ref->node.list, &rec->backrefs);
4545 if (max_size > rec->max_size)
4546 rec->max_size = max_size;
4550 /* Check if the type of extent matches with its chunk */
4551 static void check_extent_type(struct extent_record *rec)
4553 struct btrfs_block_group_cache *bg_cache;
4555 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4559 /* data extent, check chunk directly*/
4560 if (!rec->metadata) {
4561 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4562 rec->wrong_chunk_type = 1;
4566 /* metadata extent, check the obvious case first */
4567 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4568 BTRFS_BLOCK_GROUP_METADATA))) {
4569 rec->wrong_chunk_type = 1;
4574 * Check SYSTEM extent, as it's also marked as metadata, we can only
4575 * make sure it's a SYSTEM extent by its backref
4577 if (!list_empty(&rec->backrefs)) {
4578 struct extent_backref *node;
4579 struct tree_backref *tback;
4582 node = to_extent_backref(rec->backrefs.next);
4583 if (node->is_data) {
4584 /* tree block shouldn't have data backref */
4585 rec->wrong_chunk_type = 1;
4588 tback = container_of(node, struct tree_backref, node);
4590 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4591 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4593 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4594 if (!(bg_cache->flags & bg_type))
4595 rec->wrong_chunk_type = 1;
4600 * Allocate a new extent record, fill default values from @tmpl and insert int
4601 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4602 * the cache, otherwise it fails.
4604 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4605 struct extent_record *tmpl)
4607 struct extent_record *rec;
4610 rec = malloc(sizeof(*rec));
4613 rec->start = tmpl->start;
4614 rec->max_size = tmpl->max_size;
4615 rec->nr = max(tmpl->nr, tmpl->max_size);
4616 rec->found_rec = tmpl->found_rec;
4617 rec->content_checked = tmpl->content_checked;
4618 rec->owner_ref_checked = tmpl->owner_ref_checked;
4619 rec->num_duplicates = 0;
4620 rec->metadata = tmpl->metadata;
4621 rec->flag_block_full_backref = FLAG_UNSET;
4622 rec->bad_full_backref = 0;
4623 rec->crossing_stripes = 0;
4624 rec->wrong_chunk_type = 0;
4625 rec->is_root = tmpl->is_root;
4626 rec->refs = tmpl->refs;
4627 rec->extent_item_refs = tmpl->extent_item_refs;
4628 rec->parent_generation = tmpl->parent_generation;
4629 INIT_LIST_HEAD(&rec->backrefs);
4630 INIT_LIST_HEAD(&rec->dups);
4631 INIT_LIST_HEAD(&rec->list);
4632 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4633 rec->cache.start = tmpl->start;
4634 rec->cache.size = tmpl->nr;
4635 ret = insert_cache_extent(extent_cache, &rec->cache);
4640 bytes_used += rec->nr;
4643 rec->crossing_stripes = check_crossing_stripes(global_info,
4644 rec->start, global_info->tree_root->nodesize);
4645 check_extent_type(rec);
4650 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4652 * - refs - if found, increase refs
4653 * - is_root - if found, set
4654 * - content_checked - if found, set
4655 * - owner_ref_checked - if found, set
4657 * If not found, create a new one, initialize and insert.
4659 static int add_extent_rec(struct cache_tree *extent_cache,
4660 struct extent_record *tmpl)
4662 struct extent_record *rec;
4663 struct cache_extent *cache;
4667 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4669 rec = container_of(cache, struct extent_record, cache);
4673 rec->nr = max(tmpl->nr, tmpl->max_size);
4676 * We need to make sure to reset nr to whatever the extent
4677 * record says was the real size, this way we can compare it to
4680 if (tmpl->found_rec) {
4681 if (tmpl->start != rec->start || rec->found_rec) {
4682 struct extent_record *tmp;
4685 if (list_empty(&rec->list))
4686 list_add_tail(&rec->list,
4687 &duplicate_extents);
4690 * We have to do this song and dance in case we
4691 * find an extent record that falls inside of
4692 * our current extent record but does not have
4693 * the same objectid.
4695 tmp = malloc(sizeof(*tmp));
4698 tmp->start = tmpl->start;
4699 tmp->max_size = tmpl->max_size;
4702 tmp->metadata = tmpl->metadata;
4703 tmp->extent_item_refs = tmpl->extent_item_refs;
4704 INIT_LIST_HEAD(&tmp->list);
4705 list_add_tail(&tmp->list, &rec->dups);
4706 rec->num_duplicates++;
4713 if (tmpl->extent_item_refs && !dup) {
4714 if (rec->extent_item_refs) {
4715 fprintf(stderr, "block %llu rec "
4716 "extent_item_refs %llu, passed %llu\n",
4717 (unsigned long long)tmpl->start,
4718 (unsigned long long)
4719 rec->extent_item_refs,
4720 (unsigned long long)tmpl->extent_item_refs);
4722 rec->extent_item_refs = tmpl->extent_item_refs;
4726 if (tmpl->content_checked)
4727 rec->content_checked = 1;
4728 if (tmpl->owner_ref_checked)
4729 rec->owner_ref_checked = 1;
4730 memcpy(&rec->parent_key, &tmpl->parent_key,
4731 sizeof(tmpl->parent_key));
4732 if (tmpl->parent_generation)
4733 rec->parent_generation = tmpl->parent_generation;
4734 if (rec->max_size < tmpl->max_size)
4735 rec->max_size = tmpl->max_size;
4738 * A metadata extent can't cross stripe_len boundary, otherwise
4739 * kernel scrub won't be able to handle it.
4740 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4744 rec->crossing_stripes = check_crossing_stripes(
4745 global_info, rec->start,
4746 global_info->tree_root->nodesize);
4747 check_extent_type(rec);
4748 maybe_free_extent_rec(extent_cache, rec);
4752 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4757 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4758 u64 parent, u64 root, int found_ref)
4760 struct extent_record *rec;
4761 struct tree_backref *back;
4762 struct cache_extent *cache;
4765 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4767 struct extent_record tmpl;
4769 memset(&tmpl, 0, sizeof(tmpl));
4770 tmpl.start = bytenr;
4774 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4778 /* really a bug in cache_extent implement now */
4779 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4784 rec = container_of(cache, struct extent_record, cache);
4785 if (rec->start != bytenr) {
4787 * Several cause, from unaligned bytenr to over lapping extents
4792 back = find_tree_backref(rec, parent, root);
4794 back = alloc_tree_backref(rec, parent, root);
4800 if (back->node.found_ref) {
4801 fprintf(stderr, "Extent back ref already exists "
4802 "for %llu parent %llu root %llu \n",
4803 (unsigned long long)bytenr,
4804 (unsigned long long)parent,
4805 (unsigned long long)root);
4807 back->node.found_ref = 1;
4809 if (back->node.found_extent_tree) {
4810 fprintf(stderr, "Extent back ref already exists "
4811 "for %llu parent %llu root %llu \n",
4812 (unsigned long long)bytenr,
4813 (unsigned long long)parent,
4814 (unsigned long long)root);
4816 back->node.found_extent_tree = 1;
4818 check_extent_type(rec);
4819 maybe_free_extent_rec(extent_cache, rec);
4823 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4824 u64 parent, u64 root, u64 owner, u64 offset,
4825 u32 num_refs, int found_ref, u64 max_size)
4827 struct extent_record *rec;
4828 struct data_backref *back;
4829 struct cache_extent *cache;
4832 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4834 struct extent_record tmpl;
4836 memset(&tmpl, 0, sizeof(tmpl));
4837 tmpl.start = bytenr;
4839 tmpl.max_size = max_size;
4841 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4845 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4850 rec = container_of(cache, struct extent_record, cache);
4851 if (rec->max_size < max_size)
4852 rec->max_size = max_size;
4855 * If found_ref is set then max_size is the real size and must match the
4856 * existing refs. So if we have already found a ref then we need to
4857 * make sure that this ref matches the existing one, otherwise we need
4858 * to add a new backref so we can notice that the backrefs don't match
4859 * and we need to figure out who is telling the truth. This is to
4860 * account for that awful fsync bug I introduced where we'd end up with
4861 * a btrfs_file_extent_item that would have its length include multiple
4862 * prealloc extents or point inside of a prealloc extent.
4864 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4867 back = alloc_data_backref(rec, parent, root, owner, offset,
4873 BUG_ON(num_refs != 1);
4874 if (back->node.found_ref)
4875 BUG_ON(back->bytes != max_size);
4876 back->node.found_ref = 1;
4877 back->found_ref += 1;
4878 back->bytes = max_size;
4879 back->disk_bytenr = bytenr;
4881 rec->content_checked = 1;
4882 rec->owner_ref_checked = 1;
4884 if (back->node.found_extent_tree) {
4885 fprintf(stderr, "Extent back ref already exists "
4886 "for %llu parent %llu root %llu "
4887 "owner %llu offset %llu num_refs %lu\n",
4888 (unsigned long long)bytenr,
4889 (unsigned long long)parent,
4890 (unsigned long long)root,
4891 (unsigned long long)owner,
4892 (unsigned long long)offset,
4893 (unsigned long)num_refs);
4895 back->num_refs = num_refs;
4896 back->node.found_extent_tree = 1;
4898 maybe_free_extent_rec(extent_cache, rec);
4902 static int add_pending(struct cache_tree *pending,
4903 struct cache_tree *seen, u64 bytenr, u32 size)
4906 ret = add_cache_extent(seen, bytenr, size);
4909 add_cache_extent(pending, bytenr, size);
4913 static int pick_next_pending(struct cache_tree *pending,
4914 struct cache_tree *reada,
4915 struct cache_tree *nodes,
4916 u64 last, struct block_info *bits, int bits_nr,
4919 unsigned long node_start = last;
4920 struct cache_extent *cache;
4923 cache = search_cache_extent(reada, 0);
4925 bits[0].start = cache->start;
4926 bits[0].size = cache->size;
4931 if (node_start > 32768)
4932 node_start -= 32768;
4934 cache = search_cache_extent(nodes, node_start);
4936 cache = search_cache_extent(nodes, 0);
4939 cache = search_cache_extent(pending, 0);
4944 bits[ret].start = cache->start;
4945 bits[ret].size = cache->size;
4946 cache = next_cache_extent(cache);
4948 } while (cache && ret < bits_nr);
4954 bits[ret].start = cache->start;
4955 bits[ret].size = cache->size;
4956 cache = next_cache_extent(cache);
4958 } while (cache && ret < bits_nr);
4960 if (bits_nr - ret > 8) {
4961 u64 lookup = bits[0].start + bits[0].size;
4962 struct cache_extent *next;
4963 next = search_cache_extent(pending, lookup);
4965 if (next->start - lookup > 32768)
4967 bits[ret].start = next->start;
4968 bits[ret].size = next->size;
4969 lookup = next->start + next->size;
4973 next = next_cache_extent(next);
4981 static void free_chunk_record(struct cache_extent *cache)
4983 struct chunk_record *rec;
4985 rec = container_of(cache, struct chunk_record, cache);
4986 list_del_init(&rec->list);
4987 list_del_init(&rec->dextents);
4991 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4993 cache_tree_free_extents(chunk_cache, free_chunk_record);
4996 static void free_device_record(struct rb_node *node)
4998 struct device_record *rec;
5000 rec = container_of(node, struct device_record, node);
5004 FREE_RB_BASED_TREE(device_cache, free_device_record);
5006 int insert_block_group_record(struct block_group_tree *tree,
5007 struct block_group_record *bg_rec)
5011 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5015 list_add_tail(&bg_rec->list, &tree->block_groups);
5019 static void free_block_group_record(struct cache_extent *cache)
5021 struct block_group_record *rec;
5023 rec = container_of(cache, struct block_group_record, cache);
5024 list_del_init(&rec->list);
5028 void free_block_group_tree(struct block_group_tree *tree)
5030 cache_tree_free_extents(&tree->tree, free_block_group_record);
5033 int insert_device_extent_record(struct device_extent_tree *tree,
5034 struct device_extent_record *de_rec)
5039 * Device extent is a bit different from the other extents, because
5040 * the extents which belong to the different devices may have the
5041 * same start and size, so we need use the special extent cache
5042 * search/insert functions.
5044 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5048 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5049 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5053 static void free_device_extent_record(struct cache_extent *cache)
5055 struct device_extent_record *rec;
5057 rec = container_of(cache, struct device_extent_record, cache);
5058 if (!list_empty(&rec->chunk_list))
5059 list_del_init(&rec->chunk_list);
5060 if (!list_empty(&rec->device_list))
5061 list_del_init(&rec->device_list);
5065 void free_device_extent_tree(struct device_extent_tree *tree)
5067 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5070 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5071 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5072 struct extent_buffer *leaf, int slot)
5074 struct btrfs_extent_ref_v0 *ref0;
5075 struct btrfs_key key;
5078 btrfs_item_key_to_cpu(leaf, &key, slot);
5079 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5080 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5081 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5084 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5085 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5091 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5092 struct btrfs_key *key,
5095 struct btrfs_chunk *ptr;
5096 struct chunk_record *rec;
5099 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5100 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5102 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5104 fprintf(stderr, "memory allocation failed\n");
5108 INIT_LIST_HEAD(&rec->list);
5109 INIT_LIST_HEAD(&rec->dextents);
5112 rec->cache.start = key->offset;
5113 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5115 rec->generation = btrfs_header_generation(leaf);
5117 rec->objectid = key->objectid;
5118 rec->type = key->type;
5119 rec->offset = key->offset;
5121 rec->length = rec->cache.size;
5122 rec->owner = btrfs_chunk_owner(leaf, ptr);
5123 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5124 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5125 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5126 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5127 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5128 rec->num_stripes = num_stripes;
5129 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5131 for (i = 0; i < rec->num_stripes; ++i) {
5132 rec->stripes[i].devid =
5133 btrfs_stripe_devid_nr(leaf, ptr, i);
5134 rec->stripes[i].offset =
5135 btrfs_stripe_offset_nr(leaf, ptr, i);
5136 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5137 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5144 static int process_chunk_item(struct cache_tree *chunk_cache,
5145 struct btrfs_key *key, struct extent_buffer *eb,
5148 struct chunk_record *rec;
5149 struct btrfs_chunk *chunk;
5152 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5154 * Do extra check for this chunk item,
5156 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5157 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5158 * and owner<->key_type check.
5160 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5163 error("chunk(%llu, %llu) is not valid, ignore it",
5164 key->offset, btrfs_chunk_length(eb, chunk));
5167 rec = btrfs_new_chunk_record(eb, key, slot);
5168 ret = insert_cache_extent(chunk_cache, &rec->cache);
5170 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5171 rec->offset, rec->length);
5178 static int process_device_item(struct rb_root *dev_cache,
5179 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5181 struct btrfs_dev_item *ptr;
5182 struct device_record *rec;
5185 ptr = btrfs_item_ptr(eb,
5186 slot, struct btrfs_dev_item);
5188 rec = malloc(sizeof(*rec));
5190 fprintf(stderr, "memory allocation failed\n");
5194 rec->devid = key->offset;
5195 rec->generation = btrfs_header_generation(eb);
5197 rec->objectid = key->objectid;
5198 rec->type = key->type;
5199 rec->offset = key->offset;
5201 rec->devid = btrfs_device_id(eb, ptr);
5202 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5203 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5205 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5207 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5214 struct block_group_record *
5215 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5218 struct btrfs_block_group_item *ptr;
5219 struct block_group_record *rec;
5221 rec = calloc(1, sizeof(*rec));
5223 fprintf(stderr, "memory allocation failed\n");
5227 rec->cache.start = key->objectid;
5228 rec->cache.size = key->offset;
5230 rec->generation = btrfs_header_generation(leaf);
5232 rec->objectid = key->objectid;
5233 rec->type = key->type;
5234 rec->offset = key->offset;
5236 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5237 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5239 INIT_LIST_HEAD(&rec->list);
5244 static int process_block_group_item(struct block_group_tree *block_group_cache,
5245 struct btrfs_key *key,
5246 struct extent_buffer *eb, int slot)
5248 struct block_group_record *rec;
5251 rec = btrfs_new_block_group_record(eb, key, slot);
5252 ret = insert_block_group_record(block_group_cache, rec);
5254 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5255 rec->objectid, rec->offset);
5262 struct device_extent_record *
5263 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5264 struct btrfs_key *key, int slot)
5266 struct device_extent_record *rec;
5267 struct btrfs_dev_extent *ptr;
5269 rec = calloc(1, sizeof(*rec));
5271 fprintf(stderr, "memory allocation failed\n");
5275 rec->cache.objectid = key->objectid;
5276 rec->cache.start = key->offset;
5278 rec->generation = btrfs_header_generation(leaf);
5280 rec->objectid = key->objectid;
5281 rec->type = key->type;
5282 rec->offset = key->offset;
5284 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5285 rec->chunk_objecteid =
5286 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5288 btrfs_dev_extent_chunk_offset(leaf, ptr);
5289 rec->length = btrfs_dev_extent_length(leaf, ptr);
5290 rec->cache.size = rec->length;
5292 INIT_LIST_HEAD(&rec->chunk_list);
5293 INIT_LIST_HEAD(&rec->device_list);
5299 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5300 struct btrfs_key *key, struct extent_buffer *eb,
5303 struct device_extent_record *rec;
5306 rec = btrfs_new_device_extent_record(eb, key, slot);
5307 ret = insert_device_extent_record(dev_extent_cache, rec);
5310 "Device extent[%llu, %llu, %llu] existed.\n",
5311 rec->objectid, rec->offset, rec->length);
5318 static int process_extent_item(struct btrfs_root *root,
5319 struct cache_tree *extent_cache,
5320 struct extent_buffer *eb, int slot)
5322 struct btrfs_extent_item *ei;
5323 struct btrfs_extent_inline_ref *iref;
5324 struct btrfs_extent_data_ref *dref;
5325 struct btrfs_shared_data_ref *sref;
5326 struct btrfs_key key;
5327 struct extent_record tmpl;
5332 u32 item_size = btrfs_item_size_nr(eb, slot);
5338 btrfs_item_key_to_cpu(eb, &key, slot);
5340 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5342 num_bytes = root->nodesize;
5344 num_bytes = key.offset;
5347 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5348 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5349 key.objectid, root->sectorsize);
5352 if (item_size < sizeof(*ei)) {
5353 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5354 struct btrfs_extent_item_v0 *ei0;
5355 BUG_ON(item_size != sizeof(*ei0));
5356 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5357 refs = btrfs_extent_refs_v0(eb, ei0);
5361 memset(&tmpl, 0, sizeof(tmpl));
5362 tmpl.start = key.objectid;
5363 tmpl.nr = num_bytes;
5364 tmpl.extent_item_refs = refs;
5365 tmpl.metadata = metadata;
5367 tmpl.max_size = num_bytes;
5369 return add_extent_rec(extent_cache, &tmpl);
5372 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5373 refs = btrfs_extent_refs(eb, ei);
5374 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5378 if (metadata && num_bytes != root->nodesize) {
5379 error("ignore invalid metadata extent, length %llu does not equal to %u",
5380 num_bytes, root->nodesize);
5383 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5384 error("ignore invalid data extent, length %llu is not aligned to %u",
5385 num_bytes, root->sectorsize);
5389 memset(&tmpl, 0, sizeof(tmpl));
5390 tmpl.start = key.objectid;
5391 tmpl.nr = num_bytes;
5392 tmpl.extent_item_refs = refs;
5393 tmpl.metadata = metadata;
5395 tmpl.max_size = num_bytes;
5396 add_extent_rec(extent_cache, &tmpl);
5398 ptr = (unsigned long)(ei + 1);
5399 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5400 key.type == BTRFS_EXTENT_ITEM_KEY)
5401 ptr += sizeof(struct btrfs_tree_block_info);
5403 end = (unsigned long)ei + item_size;
5405 iref = (struct btrfs_extent_inline_ref *)ptr;
5406 type = btrfs_extent_inline_ref_type(eb, iref);
5407 offset = btrfs_extent_inline_ref_offset(eb, iref);
5409 case BTRFS_TREE_BLOCK_REF_KEY:
5410 ret = add_tree_backref(extent_cache, key.objectid,
5413 error("add_tree_backref failed: %s",
5416 case BTRFS_SHARED_BLOCK_REF_KEY:
5417 ret = add_tree_backref(extent_cache, key.objectid,
5420 error("add_tree_backref failed: %s",
5423 case BTRFS_EXTENT_DATA_REF_KEY:
5424 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5425 add_data_backref(extent_cache, key.objectid, 0,
5426 btrfs_extent_data_ref_root(eb, dref),
5427 btrfs_extent_data_ref_objectid(eb,
5429 btrfs_extent_data_ref_offset(eb, dref),
5430 btrfs_extent_data_ref_count(eb, dref),
5433 case BTRFS_SHARED_DATA_REF_KEY:
5434 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5435 add_data_backref(extent_cache, key.objectid, offset,
5437 btrfs_shared_data_ref_count(eb, sref),
5441 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5442 key.objectid, key.type, num_bytes);
5445 ptr += btrfs_extent_inline_ref_size(type);
5452 static int check_cache_range(struct btrfs_root *root,
5453 struct btrfs_block_group_cache *cache,
5454 u64 offset, u64 bytes)
5456 struct btrfs_free_space *entry;
5462 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5463 bytenr = btrfs_sb_offset(i);
5464 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5465 cache->key.objectid, bytenr, 0,
5466 &logical, &nr, &stripe_len);
5471 if (logical[nr] + stripe_len <= offset)
5473 if (offset + bytes <= logical[nr])
5475 if (logical[nr] == offset) {
5476 if (stripe_len >= bytes) {
5480 bytes -= stripe_len;
5481 offset += stripe_len;
5482 } else if (logical[nr] < offset) {
5483 if (logical[nr] + stripe_len >=
5488 bytes = (offset + bytes) -
5489 (logical[nr] + stripe_len);
5490 offset = logical[nr] + stripe_len;
5493 * Could be tricky, the super may land in the
5494 * middle of the area we're checking. First
5495 * check the easiest case, it's at the end.
5497 if (logical[nr] + stripe_len >=
5499 bytes = logical[nr] - offset;
5503 /* Check the left side */
5504 ret = check_cache_range(root, cache,
5506 logical[nr] - offset);
5512 /* Now we continue with the right side */
5513 bytes = (offset + bytes) -
5514 (logical[nr] + stripe_len);
5515 offset = logical[nr] + stripe_len;
5522 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5524 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5525 offset, offset+bytes);
5529 if (entry->offset != offset) {
5530 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5535 if (entry->bytes != bytes) {
5536 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5537 bytes, entry->bytes, offset);
5541 unlink_free_space(cache->free_space_ctl, entry);
5546 static int verify_space_cache(struct btrfs_root *root,
5547 struct btrfs_block_group_cache *cache)
5549 struct btrfs_path *path;
5550 struct extent_buffer *leaf;
5551 struct btrfs_key key;
5555 path = btrfs_alloc_path();
5559 root = root->fs_info->extent_root;
5561 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5563 key.objectid = last;
5565 key.type = BTRFS_EXTENT_ITEM_KEY;
5567 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5572 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5573 ret = btrfs_next_leaf(root, path);
5581 leaf = path->nodes[0];
5582 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5583 if (key.objectid >= cache->key.offset + cache->key.objectid)
5585 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5586 key.type != BTRFS_METADATA_ITEM_KEY) {
5591 if (last == key.objectid) {
5592 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5593 last = key.objectid + key.offset;
5595 last = key.objectid + root->nodesize;
5600 ret = check_cache_range(root, cache, last,
5601 key.objectid - last);
5604 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5605 last = key.objectid + key.offset;
5607 last = key.objectid + root->nodesize;
5611 if (last < cache->key.objectid + cache->key.offset)
5612 ret = check_cache_range(root, cache, last,
5613 cache->key.objectid +
5614 cache->key.offset - last);
5617 btrfs_free_path(path);
5620 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5621 fprintf(stderr, "There are still entries left in the space "
5629 static int check_space_cache(struct btrfs_root *root)
5631 struct btrfs_block_group_cache *cache;
5632 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5636 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5637 btrfs_super_generation(root->fs_info->super_copy) !=
5638 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5639 printf("cache and super generation don't match, space cache "
5640 "will be invalidated\n");
5644 if (ctx.progress_enabled) {
5645 ctx.tp = TASK_FREE_SPACE;
5646 task_start(ctx.info);
5650 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5654 start = cache->key.objectid + cache->key.offset;
5655 if (!cache->free_space_ctl) {
5656 if (btrfs_init_free_space_ctl(cache,
5657 root->sectorsize)) {
5662 btrfs_remove_free_space_cache(cache);
5665 if (btrfs_fs_compat_ro(root->fs_info,
5666 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5667 ret = exclude_super_stripes(root, cache);
5669 fprintf(stderr, "could not exclude super stripes: %s\n",
5674 ret = load_free_space_tree(root->fs_info, cache);
5675 free_excluded_extents(root, cache);
5677 fprintf(stderr, "could not load free space tree: %s\n",
5684 ret = load_free_space_cache(root->fs_info, cache);
5689 ret = verify_space_cache(root, cache);
5691 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5692 cache->key.objectid);
5697 task_stop(ctx.info);
5699 return error ? -EINVAL : 0;
5702 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5703 u64 num_bytes, unsigned long leaf_offset,
5704 struct extent_buffer *eb) {
5707 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5709 unsigned long csum_offset;
5713 u64 data_checked = 0;
5719 if (num_bytes % root->sectorsize)
5722 data = malloc(num_bytes);
5726 while (offset < num_bytes) {
5729 read_len = num_bytes - offset;
5730 /* read as much space once a time */
5731 ret = read_extent_data(root, data + offset,
5732 bytenr + offset, &read_len, mirror);
5736 /* verify every 4k data's checksum */
5737 while (data_checked < read_len) {
5739 tmp = offset + data_checked;
5741 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5742 csum, root->sectorsize);
5743 btrfs_csum_final(csum, (u8 *)&csum);
5745 csum_offset = leaf_offset +
5746 tmp / root->sectorsize * csum_size;
5747 read_extent_buffer(eb, (char *)&csum_expected,
5748 csum_offset, csum_size);
5749 /* try another mirror */
5750 if (csum != csum_expected) {
5751 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5752 mirror, bytenr + tmp,
5753 csum, csum_expected);
5754 num_copies = btrfs_num_copies(
5755 &root->fs_info->mapping_tree,
5757 if (mirror < num_copies - 1) {
5762 data_checked += root->sectorsize;
5771 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5774 struct btrfs_path *path;
5775 struct extent_buffer *leaf;
5776 struct btrfs_key key;
5779 path = btrfs_alloc_path();
5781 fprintf(stderr, "Error allocating path\n");
5785 key.objectid = bytenr;
5786 key.type = BTRFS_EXTENT_ITEM_KEY;
5787 key.offset = (u64)-1;
5790 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5793 fprintf(stderr, "Error looking up extent record %d\n", ret);
5794 btrfs_free_path(path);
5797 if (path->slots[0] > 0) {
5800 ret = btrfs_prev_leaf(root, path);
5803 } else if (ret > 0) {
5810 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5813 * Block group items come before extent items if they have the same
5814 * bytenr, so walk back one more just in case. Dear future traveller,
5815 * first congrats on mastering time travel. Now if it's not too much
5816 * trouble could you go back to 2006 and tell Chris to make the
5817 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5818 * EXTENT_ITEM_KEY please?
5820 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5821 if (path->slots[0] > 0) {
5824 ret = btrfs_prev_leaf(root, path);
5827 } else if (ret > 0) {
5832 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5836 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5837 ret = btrfs_next_leaf(root, path);
5839 fprintf(stderr, "Error going to next leaf "
5841 btrfs_free_path(path);
5847 leaf = path->nodes[0];
5848 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5849 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5853 if (key.objectid + key.offset < bytenr) {
5857 if (key.objectid > bytenr + num_bytes)
5860 if (key.objectid == bytenr) {
5861 if (key.offset >= num_bytes) {
5865 num_bytes -= key.offset;
5866 bytenr += key.offset;
5867 } else if (key.objectid < bytenr) {
5868 if (key.objectid + key.offset >= bytenr + num_bytes) {
5872 num_bytes = (bytenr + num_bytes) -
5873 (key.objectid + key.offset);
5874 bytenr = key.objectid + key.offset;
5876 if (key.objectid + key.offset < bytenr + num_bytes) {
5877 u64 new_start = key.objectid + key.offset;
5878 u64 new_bytes = bytenr + num_bytes - new_start;
5881 * Weird case, the extent is in the middle of
5882 * our range, we'll have to search one side
5883 * and then the other. Not sure if this happens
5884 * in real life, but no harm in coding it up
5885 * anyway just in case.
5887 btrfs_release_path(path);
5888 ret = check_extent_exists(root, new_start,
5891 fprintf(stderr, "Right section didn't "
5895 num_bytes = key.objectid - bytenr;
5898 num_bytes = key.objectid - bytenr;
5905 if (num_bytes && !ret) {
5906 fprintf(stderr, "There are no extents for csum range "
5907 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5911 btrfs_free_path(path);
5915 static int check_csums(struct btrfs_root *root)
5917 struct btrfs_path *path;
5918 struct extent_buffer *leaf;
5919 struct btrfs_key key;
5920 u64 offset = 0, num_bytes = 0;
5921 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5925 unsigned long leaf_offset;
5927 root = root->fs_info->csum_root;
5928 if (!extent_buffer_uptodate(root->node)) {
5929 fprintf(stderr, "No valid csum tree found\n");
5933 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5934 key.type = BTRFS_EXTENT_CSUM_KEY;
5937 path = btrfs_alloc_path();
5941 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5943 fprintf(stderr, "Error searching csum tree %d\n", ret);
5944 btrfs_free_path(path);
5948 if (ret > 0 && path->slots[0])
5953 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5954 ret = btrfs_next_leaf(root, path);
5956 fprintf(stderr, "Error going to next leaf "
5963 leaf = path->nodes[0];
5965 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5966 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5971 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5972 csum_size) * root->sectorsize;
5973 if (!check_data_csum)
5974 goto skip_csum_check;
5975 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5976 ret = check_extent_csums(root, key.offset, data_len,
5982 offset = key.offset;
5983 } else if (key.offset != offset + num_bytes) {
5984 ret = check_extent_exists(root, offset, num_bytes);
5986 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5987 "there is no extent record\n",
5988 offset, offset+num_bytes);
5991 offset = key.offset;
5994 num_bytes += data_len;
5998 btrfs_free_path(path);
6002 static int is_dropped_key(struct btrfs_key *key,
6003 struct btrfs_key *drop_key) {
6004 if (key->objectid < drop_key->objectid)
6006 else if (key->objectid == drop_key->objectid) {
6007 if (key->type < drop_key->type)
6009 else if (key->type == drop_key->type) {
6010 if (key->offset < drop_key->offset)
6018 * Here are the rules for FULL_BACKREF.
6020 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6021 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6023 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6024 * if it happened after the relocation occurred since we'll have dropped the
6025 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6026 * have no real way to know for sure.
6028 * We process the blocks one root at a time, and we start from the lowest root
6029 * objectid and go to the highest. So we can just lookup the owner backref for
6030 * the record and if we don't find it then we know it doesn't exist and we have
6033 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6034 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6035 * be set or not and then we can check later once we've gathered all the refs.
6037 static int calc_extent_flag(struct btrfs_root *root,
6038 struct cache_tree *extent_cache,
6039 struct extent_buffer *buf,
6040 struct root_item_record *ri,
6043 struct extent_record *rec;
6044 struct cache_extent *cache;
6045 struct tree_backref *tback;
6048 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6049 /* we have added this extent before */
6053 rec = container_of(cache, struct extent_record, cache);
6056 * Except file/reloc tree, we can not have
6059 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6064 if (buf->start == ri->bytenr)
6067 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6070 owner = btrfs_header_owner(buf);
6071 if (owner == ri->objectid)
6074 tback = find_tree_backref(rec, 0, owner);
6079 if (rec->flag_block_full_backref != FLAG_UNSET &&
6080 rec->flag_block_full_backref != 0)
6081 rec->bad_full_backref = 1;
6084 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6085 if (rec->flag_block_full_backref != FLAG_UNSET &&
6086 rec->flag_block_full_backref != 1)
6087 rec->bad_full_backref = 1;
6091 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6093 fprintf(stderr, "Invalid key type(");
6094 print_key_type(stderr, 0, key_type);
6095 fprintf(stderr, ") found in root(");
6096 print_objectid(stderr, rootid, 0);
6097 fprintf(stderr, ")\n");
6101 * Check if the key is valid with its extent buffer.
6103 * This is a early check in case invalid key exists in a extent buffer
6104 * This is not comprehensive yet, but should prevent wrong key/item passed
6107 static int check_type_with_root(u64 rootid, u8 key_type)
6110 /* Only valid in chunk tree */
6111 case BTRFS_DEV_ITEM_KEY:
6112 case BTRFS_CHUNK_ITEM_KEY:
6113 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6116 /* valid in csum and log tree */
6117 case BTRFS_CSUM_TREE_OBJECTID:
6118 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6122 case BTRFS_EXTENT_ITEM_KEY:
6123 case BTRFS_METADATA_ITEM_KEY:
6124 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6125 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6128 case BTRFS_ROOT_ITEM_KEY:
6129 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6132 case BTRFS_DEV_EXTENT_KEY:
6133 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6139 report_mismatch_key_root(key_type, rootid);
6143 static int run_next_block(struct btrfs_root *root,
6144 struct block_info *bits,
6147 struct cache_tree *pending,
6148 struct cache_tree *seen,
6149 struct cache_tree *reada,
6150 struct cache_tree *nodes,
6151 struct cache_tree *extent_cache,
6152 struct cache_tree *chunk_cache,
6153 struct rb_root *dev_cache,
6154 struct block_group_tree *block_group_cache,
6155 struct device_extent_tree *dev_extent_cache,
6156 struct root_item_record *ri)
6158 struct extent_buffer *buf;
6159 struct extent_record *rec = NULL;
6170 struct btrfs_key key;
6171 struct cache_extent *cache;
6174 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6175 bits_nr, &reada_bits);
6180 for(i = 0; i < nritems; i++) {
6181 ret = add_cache_extent(reada, bits[i].start,
6186 /* fixme, get the parent transid */
6187 readahead_tree_block(root, bits[i].start,
6191 *last = bits[0].start;
6192 bytenr = bits[0].start;
6193 size = bits[0].size;
6195 cache = lookup_cache_extent(pending, bytenr, size);
6197 remove_cache_extent(pending, cache);
6200 cache = lookup_cache_extent(reada, bytenr, size);
6202 remove_cache_extent(reada, cache);
6205 cache = lookup_cache_extent(nodes, bytenr, size);
6207 remove_cache_extent(nodes, cache);
6210 cache = lookup_cache_extent(extent_cache, bytenr, size);
6212 rec = container_of(cache, struct extent_record, cache);
6213 gen = rec->parent_generation;
6216 /* fixme, get the real parent transid */
6217 buf = read_tree_block(root, bytenr, size, gen);
6218 if (!extent_buffer_uptodate(buf)) {
6219 record_bad_block_io(root->fs_info,
6220 extent_cache, bytenr, size);
6224 nritems = btrfs_header_nritems(buf);
6227 if (!init_extent_tree) {
6228 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6229 btrfs_header_level(buf), 1, NULL,
6232 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6234 fprintf(stderr, "Couldn't calc extent flags\n");
6235 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6240 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6242 fprintf(stderr, "Couldn't calc extent flags\n");
6243 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6247 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6249 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6250 ri->objectid == btrfs_header_owner(buf)) {
6252 * Ok we got to this block from it's original owner and
6253 * we have FULL_BACKREF set. Relocation can leave
6254 * converted blocks over so this is altogether possible,
6255 * however it's not possible if the generation > the
6256 * last snapshot, so check for this case.
6258 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6259 btrfs_header_generation(buf) > ri->last_snapshot) {
6260 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6261 rec->bad_full_backref = 1;
6266 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6267 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6268 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6269 rec->bad_full_backref = 1;
6273 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6274 rec->flag_block_full_backref = 1;
6278 rec->flag_block_full_backref = 0;
6280 owner = btrfs_header_owner(buf);
6283 ret = check_block(root, extent_cache, buf, flags);
6287 if (btrfs_is_leaf(buf)) {
6288 btree_space_waste += btrfs_leaf_free_space(root, buf);
6289 for (i = 0; i < nritems; i++) {
6290 struct btrfs_file_extent_item *fi;
6291 btrfs_item_key_to_cpu(buf, &key, i);
6293 * Check key type against the leaf owner.
6294 * Could filter quite a lot of early error if
6297 if (check_type_with_root(btrfs_header_owner(buf),
6299 fprintf(stderr, "ignoring invalid key\n");
6302 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6303 process_extent_item(root, extent_cache, buf,
6307 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6308 process_extent_item(root, extent_cache, buf,
6312 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6314 btrfs_item_size_nr(buf, i);
6317 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6318 process_chunk_item(chunk_cache, &key, buf, i);
6321 if (key.type == BTRFS_DEV_ITEM_KEY) {
6322 process_device_item(dev_cache, &key, buf, i);
6325 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6326 process_block_group_item(block_group_cache,
6330 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6331 process_device_extent_item(dev_extent_cache,
6336 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6337 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6338 process_extent_ref_v0(extent_cache, buf, i);
6345 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6346 ret = add_tree_backref(extent_cache,
6347 key.objectid, 0, key.offset, 0);
6349 error("add_tree_backref failed: %s",
6353 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6354 ret = add_tree_backref(extent_cache,
6355 key.objectid, key.offset, 0, 0);
6357 error("add_tree_backref failed: %s",
6361 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6362 struct btrfs_extent_data_ref *ref;
6363 ref = btrfs_item_ptr(buf, i,
6364 struct btrfs_extent_data_ref);
6365 add_data_backref(extent_cache,
6367 btrfs_extent_data_ref_root(buf, ref),
6368 btrfs_extent_data_ref_objectid(buf,
6370 btrfs_extent_data_ref_offset(buf, ref),
6371 btrfs_extent_data_ref_count(buf, ref),
6372 0, root->sectorsize);
6375 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6376 struct btrfs_shared_data_ref *ref;
6377 ref = btrfs_item_ptr(buf, i,
6378 struct btrfs_shared_data_ref);
6379 add_data_backref(extent_cache,
6380 key.objectid, key.offset, 0, 0, 0,
6381 btrfs_shared_data_ref_count(buf, ref),
6382 0, root->sectorsize);
6385 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6386 struct bad_item *bad;
6388 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6392 bad = malloc(sizeof(struct bad_item));
6395 INIT_LIST_HEAD(&bad->list);
6396 memcpy(&bad->key, &key,
6397 sizeof(struct btrfs_key));
6398 bad->root_id = owner;
6399 list_add_tail(&bad->list, &delete_items);
6402 if (key.type != BTRFS_EXTENT_DATA_KEY)
6404 fi = btrfs_item_ptr(buf, i,
6405 struct btrfs_file_extent_item);
6406 if (btrfs_file_extent_type(buf, fi) ==
6407 BTRFS_FILE_EXTENT_INLINE)
6409 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6412 data_bytes_allocated +=
6413 btrfs_file_extent_disk_num_bytes(buf, fi);
6414 if (data_bytes_allocated < root->sectorsize) {
6417 data_bytes_referenced +=
6418 btrfs_file_extent_num_bytes(buf, fi);
6419 add_data_backref(extent_cache,
6420 btrfs_file_extent_disk_bytenr(buf, fi),
6421 parent, owner, key.objectid, key.offset -
6422 btrfs_file_extent_offset(buf, fi), 1, 1,
6423 btrfs_file_extent_disk_num_bytes(buf, fi));
6427 struct btrfs_key first_key;
6429 first_key.objectid = 0;
6432 btrfs_item_key_to_cpu(buf, &first_key, 0);
6433 level = btrfs_header_level(buf);
6434 for (i = 0; i < nritems; i++) {
6435 struct extent_record tmpl;
6437 ptr = btrfs_node_blockptr(buf, i);
6438 size = root->nodesize;
6439 btrfs_node_key_to_cpu(buf, &key, i);
6441 if ((level == ri->drop_level)
6442 && is_dropped_key(&key, &ri->drop_key)) {
6447 memset(&tmpl, 0, sizeof(tmpl));
6448 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6449 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6454 tmpl.max_size = size;
6455 ret = add_extent_rec(extent_cache, &tmpl);
6459 ret = add_tree_backref(extent_cache, ptr, parent,
6462 error("add_tree_backref failed: %s",
6468 add_pending(nodes, seen, ptr, size);
6470 add_pending(pending, seen, ptr, size);
6473 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6474 nritems) * sizeof(struct btrfs_key_ptr);
6476 total_btree_bytes += buf->len;
6477 if (fs_root_objectid(btrfs_header_owner(buf)))
6478 total_fs_tree_bytes += buf->len;
6479 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6480 total_extent_tree_bytes += buf->len;
6481 if (!found_old_backref &&
6482 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6483 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6484 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6485 found_old_backref = 1;
6487 free_extent_buffer(buf);
6491 static int add_root_to_pending(struct extent_buffer *buf,
6492 struct cache_tree *extent_cache,
6493 struct cache_tree *pending,
6494 struct cache_tree *seen,
6495 struct cache_tree *nodes,
6498 struct extent_record tmpl;
6501 if (btrfs_header_level(buf) > 0)
6502 add_pending(nodes, seen, buf->start, buf->len);
6504 add_pending(pending, seen, buf->start, buf->len);
6506 memset(&tmpl, 0, sizeof(tmpl));
6507 tmpl.start = buf->start;
6512 tmpl.max_size = buf->len;
6513 add_extent_rec(extent_cache, &tmpl);
6515 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6516 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6517 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6520 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6525 /* as we fix the tree, we might be deleting blocks that
6526 * we're tracking for repair. This hook makes sure we
6527 * remove any backrefs for blocks as we are fixing them.
6529 static int free_extent_hook(struct btrfs_trans_handle *trans,
6530 struct btrfs_root *root,
6531 u64 bytenr, u64 num_bytes, u64 parent,
6532 u64 root_objectid, u64 owner, u64 offset,
6535 struct extent_record *rec;
6536 struct cache_extent *cache;
6538 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6540 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6541 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6545 rec = container_of(cache, struct extent_record, cache);
6547 struct data_backref *back;
6548 back = find_data_backref(rec, parent, root_objectid, owner,
6549 offset, 1, bytenr, num_bytes);
6552 if (back->node.found_ref) {
6553 back->found_ref -= refs_to_drop;
6555 rec->refs -= refs_to_drop;
6557 if (back->node.found_extent_tree) {
6558 back->num_refs -= refs_to_drop;
6559 if (rec->extent_item_refs)
6560 rec->extent_item_refs -= refs_to_drop;
6562 if (back->found_ref == 0)
6563 back->node.found_ref = 0;
6564 if (back->num_refs == 0)
6565 back->node.found_extent_tree = 0;
6567 if (!back->node.found_extent_tree && back->node.found_ref) {
6568 list_del(&back->node.list);
6572 struct tree_backref *back;
6573 back = find_tree_backref(rec, parent, root_objectid);
6576 if (back->node.found_ref) {
6579 back->node.found_ref = 0;
6581 if (back->node.found_extent_tree) {
6582 if (rec->extent_item_refs)
6583 rec->extent_item_refs--;
6584 back->node.found_extent_tree = 0;
6586 if (!back->node.found_extent_tree && back->node.found_ref) {
6587 list_del(&back->node.list);
6591 maybe_free_extent_rec(extent_cache, rec);
6596 static int delete_extent_records(struct btrfs_trans_handle *trans,
6597 struct btrfs_root *root,
6598 struct btrfs_path *path,
6599 u64 bytenr, u64 new_len)
6601 struct btrfs_key key;
6602 struct btrfs_key found_key;
6603 struct extent_buffer *leaf;
6608 key.objectid = bytenr;
6610 key.offset = (u64)-1;
6613 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6620 if (path->slots[0] == 0)
6626 leaf = path->nodes[0];
6627 slot = path->slots[0];
6629 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6630 if (found_key.objectid != bytenr)
6633 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6634 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6635 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6636 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6637 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6638 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6639 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6640 btrfs_release_path(path);
6641 if (found_key.type == 0) {
6642 if (found_key.offset == 0)
6644 key.offset = found_key.offset - 1;
6645 key.type = found_key.type;
6647 key.type = found_key.type - 1;
6648 key.offset = (u64)-1;
6652 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6653 found_key.objectid, found_key.type, found_key.offset);
6655 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6658 btrfs_release_path(path);
6660 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6661 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6662 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6663 found_key.offset : root->nodesize;
6665 ret = btrfs_update_block_group(trans, root, bytenr,
6672 btrfs_release_path(path);
6677 * for a single backref, this will allocate a new extent
6678 * and add the backref to it.
6680 static int record_extent(struct btrfs_trans_handle *trans,
6681 struct btrfs_fs_info *info,
6682 struct btrfs_path *path,
6683 struct extent_record *rec,
6684 struct extent_backref *back,
6685 int allocated, u64 flags)
6688 struct btrfs_root *extent_root = info->extent_root;
6689 struct extent_buffer *leaf;
6690 struct btrfs_key ins_key;
6691 struct btrfs_extent_item *ei;
6692 struct tree_backref *tback;
6693 struct data_backref *dback;
6694 struct btrfs_tree_block_info *bi;
6697 rec->max_size = max_t(u64, rec->max_size,
6698 info->extent_root->nodesize);
6701 u32 item_size = sizeof(*ei);
6704 item_size += sizeof(*bi);
6706 ins_key.objectid = rec->start;
6707 ins_key.offset = rec->max_size;
6708 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6710 ret = btrfs_insert_empty_item(trans, extent_root, path,
6711 &ins_key, item_size);
6715 leaf = path->nodes[0];
6716 ei = btrfs_item_ptr(leaf, path->slots[0],
6717 struct btrfs_extent_item);
6719 btrfs_set_extent_refs(leaf, ei, 0);
6720 btrfs_set_extent_generation(leaf, ei, rec->generation);
6722 if (back->is_data) {
6723 btrfs_set_extent_flags(leaf, ei,
6724 BTRFS_EXTENT_FLAG_DATA);
6726 struct btrfs_disk_key copy_key;;
6728 tback = to_tree_backref(back);
6729 bi = (struct btrfs_tree_block_info *)(ei + 1);
6730 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6733 btrfs_set_disk_key_objectid(©_key,
6734 rec->info_objectid);
6735 btrfs_set_disk_key_type(©_key, 0);
6736 btrfs_set_disk_key_offset(©_key, 0);
6738 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6739 btrfs_set_tree_block_key(leaf, bi, ©_key);
6741 btrfs_set_extent_flags(leaf, ei,
6742 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6745 btrfs_mark_buffer_dirty(leaf);
6746 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6747 rec->max_size, 1, 0);
6750 btrfs_release_path(path);
6753 if (back->is_data) {
6757 dback = to_data_backref(back);
6758 if (back->full_backref)
6759 parent = dback->parent;
6763 for (i = 0; i < dback->found_ref; i++) {
6764 /* if parent != 0, we're doing a full backref
6765 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6766 * just makes the backref allocator create a data
6769 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6770 rec->start, rec->max_size,
6774 BTRFS_FIRST_FREE_OBJECTID :
6780 fprintf(stderr, "adding new data backref"
6781 " on %llu %s %llu owner %llu"
6782 " offset %llu found %d\n",
6783 (unsigned long long)rec->start,
6784 back->full_backref ?
6786 back->full_backref ?
6787 (unsigned long long)parent :
6788 (unsigned long long)dback->root,
6789 (unsigned long long)dback->owner,
6790 (unsigned long long)dback->offset,
6795 tback = to_tree_backref(back);
6796 if (back->full_backref)
6797 parent = tback->parent;
6801 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6802 rec->start, rec->max_size,
6803 parent, tback->root, 0, 0);
6804 fprintf(stderr, "adding new tree backref on "
6805 "start %llu len %llu parent %llu root %llu\n",
6806 rec->start, rec->max_size, parent, tback->root);
6809 btrfs_release_path(path);
6813 static struct extent_entry *find_entry(struct list_head *entries,
6814 u64 bytenr, u64 bytes)
6816 struct extent_entry *entry = NULL;
6818 list_for_each_entry(entry, entries, list) {
6819 if (entry->bytenr == bytenr && entry->bytes == bytes)
6826 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6828 struct extent_entry *entry, *best = NULL, *prev = NULL;
6830 list_for_each_entry(entry, entries, list) {
6837 * If there are as many broken entries as entries then we know
6838 * not to trust this particular entry.
6840 if (entry->broken == entry->count)
6844 * If our current entry == best then we can't be sure our best
6845 * is really the best, so we need to keep searching.
6847 if (best && best->count == entry->count) {
6853 /* Prev == entry, not good enough, have to keep searching */
6854 if (!prev->broken && prev->count == entry->count)
6858 best = (prev->count > entry->count) ? prev : entry;
6859 else if (best->count < entry->count)
6867 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6868 struct data_backref *dback, struct extent_entry *entry)
6870 struct btrfs_trans_handle *trans;
6871 struct btrfs_root *root;
6872 struct btrfs_file_extent_item *fi;
6873 struct extent_buffer *leaf;
6874 struct btrfs_key key;
6878 key.objectid = dback->root;
6879 key.type = BTRFS_ROOT_ITEM_KEY;
6880 key.offset = (u64)-1;
6881 root = btrfs_read_fs_root(info, &key);
6883 fprintf(stderr, "Couldn't find root for our ref\n");
6888 * The backref points to the original offset of the extent if it was
6889 * split, so we need to search down to the offset we have and then walk
6890 * forward until we find the backref we're looking for.
6892 key.objectid = dback->owner;
6893 key.type = BTRFS_EXTENT_DATA_KEY;
6894 key.offset = dback->offset;
6895 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6897 fprintf(stderr, "Error looking up ref %d\n", ret);
6902 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6903 ret = btrfs_next_leaf(root, path);
6905 fprintf(stderr, "Couldn't find our ref, next\n");
6909 leaf = path->nodes[0];
6910 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6911 if (key.objectid != dback->owner ||
6912 key.type != BTRFS_EXTENT_DATA_KEY) {
6913 fprintf(stderr, "Couldn't find our ref, search\n");
6916 fi = btrfs_item_ptr(leaf, path->slots[0],
6917 struct btrfs_file_extent_item);
6918 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6919 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6921 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6926 btrfs_release_path(path);
6928 trans = btrfs_start_transaction(root, 1);
6930 return PTR_ERR(trans);
6933 * Ok we have the key of the file extent we want to fix, now we can cow
6934 * down to the thing and fix it.
6936 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6938 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6939 key.objectid, key.type, key.offset, ret);
6943 fprintf(stderr, "Well that's odd, we just found this key "
6944 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6949 leaf = path->nodes[0];
6950 fi = btrfs_item_ptr(leaf, path->slots[0],
6951 struct btrfs_file_extent_item);
6953 if (btrfs_file_extent_compression(leaf, fi) &&
6954 dback->disk_bytenr != entry->bytenr) {
6955 fprintf(stderr, "Ref doesn't match the record start and is "
6956 "compressed, please take a btrfs-image of this file "
6957 "system and send it to a btrfs developer so they can "
6958 "complete this functionality for bytenr %Lu\n",
6959 dback->disk_bytenr);
6964 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6965 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6966 } else if (dback->disk_bytenr > entry->bytenr) {
6967 u64 off_diff, offset;
6969 off_diff = dback->disk_bytenr - entry->bytenr;
6970 offset = btrfs_file_extent_offset(leaf, fi);
6971 if (dback->disk_bytenr + offset +
6972 btrfs_file_extent_num_bytes(leaf, fi) >
6973 entry->bytenr + entry->bytes) {
6974 fprintf(stderr, "Ref is past the entry end, please "
6975 "take a btrfs-image of this file system and "
6976 "send it to a btrfs developer, ref %Lu\n",
6977 dback->disk_bytenr);
6982 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6983 btrfs_set_file_extent_offset(leaf, fi, offset);
6984 } else if (dback->disk_bytenr < entry->bytenr) {
6987 offset = btrfs_file_extent_offset(leaf, fi);
6988 if (dback->disk_bytenr + offset < entry->bytenr) {
6989 fprintf(stderr, "Ref is before the entry start, please"
6990 " take a btrfs-image of this file system and "
6991 "send it to a btrfs developer, ref %Lu\n",
6992 dback->disk_bytenr);
6997 offset += dback->disk_bytenr;
6998 offset -= entry->bytenr;
6999 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7000 btrfs_set_file_extent_offset(leaf, fi, offset);
7003 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7006 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7007 * only do this if we aren't using compression, otherwise it's a
7010 if (!btrfs_file_extent_compression(leaf, fi))
7011 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7013 printf("ram bytes may be wrong?\n");
7014 btrfs_mark_buffer_dirty(leaf);
7016 err = btrfs_commit_transaction(trans, root);
7017 btrfs_release_path(path);
7018 return ret ? ret : err;
7021 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7022 struct extent_record *rec)
7024 struct extent_backref *back;
7025 struct data_backref *dback;
7026 struct extent_entry *entry, *best = NULL;
7029 int broken_entries = 0;
7034 * Metadata is easy and the backrefs should always agree on bytenr and
7035 * size, if not we've got bigger issues.
7040 list_for_each_entry(back, &rec->backrefs, list) {
7041 if (back->full_backref || !back->is_data)
7044 dback = to_data_backref(back);
7047 * We only pay attention to backrefs that we found a real
7050 if (dback->found_ref == 0)
7054 * For now we only catch when the bytes don't match, not the
7055 * bytenr. We can easily do this at the same time, but I want
7056 * to have a fs image to test on before we just add repair
7057 * functionality willy-nilly so we know we won't screw up the
7061 entry = find_entry(&entries, dback->disk_bytenr,
7064 entry = malloc(sizeof(struct extent_entry));
7069 memset(entry, 0, sizeof(*entry));
7070 entry->bytenr = dback->disk_bytenr;
7071 entry->bytes = dback->bytes;
7072 list_add_tail(&entry->list, &entries);
7077 * If we only have on entry we may think the entries agree when
7078 * in reality they don't so we have to do some extra checking.
7080 if (dback->disk_bytenr != rec->start ||
7081 dback->bytes != rec->nr || back->broken)
7092 /* Yay all the backrefs agree, carry on good sir */
7093 if (nr_entries <= 1 && !mismatch)
7096 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7097 "%Lu\n", rec->start);
7100 * First we want to see if the backrefs can agree amongst themselves who
7101 * is right, so figure out which one of the entries has the highest
7104 best = find_most_right_entry(&entries);
7107 * Ok so we may have an even split between what the backrefs think, so
7108 * this is where we use the extent ref to see what it thinks.
7111 entry = find_entry(&entries, rec->start, rec->nr);
7112 if (!entry && (!broken_entries || !rec->found_rec)) {
7113 fprintf(stderr, "Backrefs don't agree with each other "
7114 "and extent record doesn't agree with anybody,"
7115 " so we can't fix bytenr %Lu bytes %Lu\n",
7116 rec->start, rec->nr);
7119 } else if (!entry) {
7121 * Ok our backrefs were broken, we'll assume this is the
7122 * correct value and add an entry for this range.
7124 entry = malloc(sizeof(struct extent_entry));
7129 memset(entry, 0, sizeof(*entry));
7130 entry->bytenr = rec->start;
7131 entry->bytes = rec->nr;
7132 list_add_tail(&entry->list, &entries);
7136 best = find_most_right_entry(&entries);
7138 fprintf(stderr, "Backrefs and extent record evenly "
7139 "split on who is right, this is going to "
7140 "require user input to fix bytenr %Lu bytes "
7141 "%Lu\n", rec->start, rec->nr);
7148 * I don't think this can happen currently as we'll abort() if we catch
7149 * this case higher up, but in case somebody removes that we still can't
7150 * deal with it properly here yet, so just bail out of that's the case.
7152 if (best->bytenr != rec->start) {
7153 fprintf(stderr, "Extent start and backref starts don't match, "
7154 "please use btrfs-image on this file system and send "
7155 "it to a btrfs developer so they can make fsck fix "
7156 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7157 rec->start, rec->nr);
7163 * Ok great we all agreed on an extent record, let's go find the real
7164 * references and fix up the ones that don't match.
7166 list_for_each_entry(back, &rec->backrefs, list) {
7167 if (back->full_backref || !back->is_data)
7170 dback = to_data_backref(back);
7173 * Still ignoring backrefs that don't have a real ref attached
7176 if (dback->found_ref == 0)
7179 if (dback->bytes == best->bytes &&
7180 dback->disk_bytenr == best->bytenr)
7183 ret = repair_ref(info, path, dback, best);
7189 * Ok we messed with the actual refs, which means we need to drop our
7190 * entire cache and go back and rescan. I know this is a huge pain and
7191 * adds a lot of extra work, but it's the only way to be safe. Once all
7192 * the backrefs agree we may not need to do anything to the extent
7197 while (!list_empty(&entries)) {
7198 entry = list_entry(entries.next, struct extent_entry, list);
7199 list_del_init(&entry->list);
7205 static int process_duplicates(struct btrfs_root *root,
7206 struct cache_tree *extent_cache,
7207 struct extent_record *rec)
7209 struct extent_record *good, *tmp;
7210 struct cache_extent *cache;
7214 * If we found a extent record for this extent then return, or if we
7215 * have more than one duplicate we are likely going to need to delete
7218 if (rec->found_rec || rec->num_duplicates > 1)
7221 /* Shouldn't happen but just in case */
7222 BUG_ON(!rec->num_duplicates);
7225 * So this happens if we end up with a backref that doesn't match the
7226 * actual extent entry. So either the backref is bad or the extent
7227 * entry is bad. Either way we want to have the extent_record actually
7228 * reflect what we found in the extent_tree, so we need to take the
7229 * duplicate out and use that as the extent_record since the only way we
7230 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7232 remove_cache_extent(extent_cache, &rec->cache);
7234 good = to_extent_record(rec->dups.next);
7235 list_del_init(&good->list);
7236 INIT_LIST_HEAD(&good->backrefs);
7237 INIT_LIST_HEAD(&good->dups);
7238 good->cache.start = good->start;
7239 good->cache.size = good->nr;
7240 good->content_checked = 0;
7241 good->owner_ref_checked = 0;
7242 good->num_duplicates = 0;
7243 good->refs = rec->refs;
7244 list_splice_init(&rec->backrefs, &good->backrefs);
7246 cache = lookup_cache_extent(extent_cache, good->start,
7250 tmp = container_of(cache, struct extent_record, cache);
7253 * If we find another overlapping extent and it's found_rec is
7254 * set then it's a duplicate and we need to try and delete
7257 if (tmp->found_rec || tmp->num_duplicates > 0) {
7258 if (list_empty(&good->list))
7259 list_add_tail(&good->list,
7260 &duplicate_extents);
7261 good->num_duplicates += tmp->num_duplicates + 1;
7262 list_splice_init(&tmp->dups, &good->dups);
7263 list_del_init(&tmp->list);
7264 list_add_tail(&tmp->list, &good->dups);
7265 remove_cache_extent(extent_cache, &tmp->cache);
7270 * Ok we have another non extent item backed extent rec, so lets
7271 * just add it to this extent and carry on like we did above.
7273 good->refs += tmp->refs;
7274 list_splice_init(&tmp->backrefs, &good->backrefs);
7275 remove_cache_extent(extent_cache, &tmp->cache);
7278 ret = insert_cache_extent(extent_cache, &good->cache);
7281 return good->num_duplicates ? 0 : 1;
7284 static int delete_duplicate_records(struct btrfs_root *root,
7285 struct extent_record *rec)
7287 struct btrfs_trans_handle *trans;
7288 LIST_HEAD(delete_list);
7289 struct btrfs_path *path;
7290 struct extent_record *tmp, *good, *n;
7293 struct btrfs_key key;
7295 path = btrfs_alloc_path();
7302 /* Find the record that covers all of the duplicates. */
7303 list_for_each_entry(tmp, &rec->dups, list) {
7304 if (good->start < tmp->start)
7306 if (good->nr > tmp->nr)
7309 if (tmp->start + tmp->nr < good->start + good->nr) {
7310 fprintf(stderr, "Ok we have overlapping extents that "
7311 "aren't completely covered by each other, this "
7312 "is going to require more careful thought. "
7313 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7314 tmp->start, tmp->nr, good->start, good->nr);
7321 list_add_tail(&rec->list, &delete_list);
7323 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7326 list_move_tail(&tmp->list, &delete_list);
7329 root = root->fs_info->extent_root;
7330 trans = btrfs_start_transaction(root, 1);
7331 if (IS_ERR(trans)) {
7332 ret = PTR_ERR(trans);
7336 list_for_each_entry(tmp, &delete_list, list) {
7337 if (tmp->found_rec == 0)
7339 key.objectid = tmp->start;
7340 key.type = BTRFS_EXTENT_ITEM_KEY;
7341 key.offset = tmp->nr;
7343 /* Shouldn't happen but just in case */
7344 if (tmp->metadata) {
7345 fprintf(stderr, "Well this shouldn't happen, extent "
7346 "record overlaps but is metadata? "
7347 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7351 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7357 ret = btrfs_del_item(trans, root, path);
7360 btrfs_release_path(path);
7363 err = btrfs_commit_transaction(trans, root);
7367 while (!list_empty(&delete_list)) {
7368 tmp = to_extent_record(delete_list.next);
7369 list_del_init(&tmp->list);
7375 while (!list_empty(&rec->dups)) {
7376 tmp = to_extent_record(rec->dups.next);
7377 list_del_init(&tmp->list);
7381 btrfs_free_path(path);
7383 if (!ret && !nr_del)
7384 rec->num_duplicates = 0;
7386 return ret ? ret : nr_del;
7389 static int find_possible_backrefs(struct btrfs_fs_info *info,
7390 struct btrfs_path *path,
7391 struct cache_tree *extent_cache,
7392 struct extent_record *rec)
7394 struct btrfs_root *root;
7395 struct extent_backref *back;
7396 struct data_backref *dback;
7397 struct cache_extent *cache;
7398 struct btrfs_file_extent_item *fi;
7399 struct btrfs_key key;
7403 list_for_each_entry(back, &rec->backrefs, list) {
7404 /* Don't care about full backrefs (poor unloved backrefs) */
7405 if (back->full_backref || !back->is_data)
7408 dback = to_data_backref(back);
7410 /* We found this one, we don't need to do a lookup */
7411 if (dback->found_ref)
7414 key.objectid = dback->root;
7415 key.type = BTRFS_ROOT_ITEM_KEY;
7416 key.offset = (u64)-1;
7418 root = btrfs_read_fs_root(info, &key);
7420 /* No root, definitely a bad ref, skip */
7421 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7423 /* Other err, exit */
7425 return PTR_ERR(root);
7427 key.objectid = dback->owner;
7428 key.type = BTRFS_EXTENT_DATA_KEY;
7429 key.offset = dback->offset;
7430 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7432 btrfs_release_path(path);
7435 /* Didn't find it, we can carry on */
7440 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7441 struct btrfs_file_extent_item);
7442 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7443 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7444 btrfs_release_path(path);
7445 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7447 struct extent_record *tmp;
7448 tmp = container_of(cache, struct extent_record, cache);
7451 * If we found an extent record for the bytenr for this
7452 * particular backref then we can't add it to our
7453 * current extent record. We only want to add backrefs
7454 * that don't have a corresponding extent item in the
7455 * extent tree since they likely belong to this record
7456 * and we need to fix it if it doesn't match bytenrs.
7462 dback->found_ref += 1;
7463 dback->disk_bytenr = bytenr;
7464 dback->bytes = bytes;
7467 * Set this so the verify backref code knows not to trust the
7468 * values in this backref.
7477 * Record orphan data ref into corresponding root.
7479 * Return 0 if the extent item contains data ref and recorded.
7480 * Return 1 if the extent item contains no useful data ref
7481 * On that case, it may contains only shared_dataref or metadata backref
7482 * or the file extent exists(this should be handled by the extent bytenr
7484 * Return <0 if something goes wrong.
7486 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7487 struct extent_record *rec)
7489 struct btrfs_key key;
7490 struct btrfs_root *dest_root;
7491 struct extent_backref *back;
7492 struct data_backref *dback;
7493 struct orphan_data_extent *orphan;
7494 struct btrfs_path *path;
7495 int recorded_data_ref = 0;
7500 path = btrfs_alloc_path();
7503 list_for_each_entry(back, &rec->backrefs, list) {
7504 if (back->full_backref || !back->is_data ||
7505 !back->found_extent_tree)
7507 dback = to_data_backref(back);
7508 if (dback->found_ref)
7510 key.objectid = dback->root;
7511 key.type = BTRFS_ROOT_ITEM_KEY;
7512 key.offset = (u64)-1;
7514 dest_root = btrfs_read_fs_root(fs_info, &key);
7516 /* For non-exist root we just skip it */
7517 if (IS_ERR(dest_root) || !dest_root)
7520 key.objectid = dback->owner;
7521 key.type = BTRFS_EXTENT_DATA_KEY;
7522 key.offset = dback->offset;
7524 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7525 btrfs_release_path(path);
7527 * For ret < 0, it's OK since the fs-tree may be corrupted,
7528 * we need to record it for inode/file extent rebuild.
7529 * For ret > 0, we record it only for file extent rebuild.
7530 * For ret == 0, the file extent exists but only bytenr
7531 * mismatch, let the original bytenr fix routine to handle,
7537 orphan = malloc(sizeof(*orphan));
7542 INIT_LIST_HEAD(&orphan->list);
7543 orphan->root = dback->root;
7544 orphan->objectid = dback->owner;
7545 orphan->offset = dback->offset;
7546 orphan->disk_bytenr = rec->cache.start;
7547 orphan->disk_len = rec->cache.size;
7548 list_add(&dest_root->orphan_data_extents, &orphan->list);
7549 recorded_data_ref = 1;
7552 btrfs_free_path(path);
7554 return !recorded_data_ref;
7560 * when an incorrect extent item is found, this will delete
7561 * all of the existing entries for it and recreate them
7562 * based on what the tree scan found.
7564 static int fixup_extent_refs(struct btrfs_fs_info *info,
7565 struct cache_tree *extent_cache,
7566 struct extent_record *rec)
7568 struct btrfs_trans_handle *trans = NULL;
7570 struct btrfs_path *path;
7571 struct list_head *cur = rec->backrefs.next;
7572 struct cache_extent *cache;
7573 struct extent_backref *back;
7577 if (rec->flag_block_full_backref)
7578 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7580 path = btrfs_alloc_path();
7584 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7586 * Sometimes the backrefs themselves are so broken they don't
7587 * get attached to any meaningful rec, so first go back and
7588 * check any of our backrefs that we couldn't find and throw
7589 * them into the list if we find the backref so that
7590 * verify_backrefs can figure out what to do.
7592 ret = find_possible_backrefs(info, path, extent_cache, rec);
7597 /* step one, make sure all of the backrefs agree */
7598 ret = verify_backrefs(info, path, rec);
7602 trans = btrfs_start_transaction(info->extent_root, 1);
7603 if (IS_ERR(trans)) {
7604 ret = PTR_ERR(trans);
7608 /* step two, delete all the existing records */
7609 ret = delete_extent_records(trans, info->extent_root, path,
7610 rec->start, rec->max_size);
7615 /* was this block corrupt? If so, don't add references to it */
7616 cache = lookup_cache_extent(info->corrupt_blocks,
7617 rec->start, rec->max_size);
7623 /* step three, recreate all the refs we did find */
7624 while(cur != &rec->backrefs) {
7625 back = to_extent_backref(cur);
7629 * if we didn't find any references, don't create a
7632 if (!back->found_ref)
7635 rec->bad_full_backref = 0;
7636 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7644 int err = btrfs_commit_transaction(trans, info->extent_root);
7649 btrfs_free_path(path);
7653 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7654 struct extent_record *rec)
7656 struct btrfs_trans_handle *trans;
7657 struct btrfs_root *root = fs_info->extent_root;
7658 struct btrfs_path *path;
7659 struct btrfs_extent_item *ei;
7660 struct btrfs_key key;
7664 key.objectid = rec->start;
7665 if (rec->metadata) {
7666 key.type = BTRFS_METADATA_ITEM_KEY;
7667 key.offset = rec->info_level;
7669 key.type = BTRFS_EXTENT_ITEM_KEY;
7670 key.offset = rec->max_size;
7673 path = btrfs_alloc_path();
7677 trans = btrfs_start_transaction(root, 0);
7678 if (IS_ERR(trans)) {
7679 btrfs_free_path(path);
7680 return PTR_ERR(trans);
7683 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7685 btrfs_free_path(path);
7686 btrfs_commit_transaction(trans, root);
7689 fprintf(stderr, "Didn't find extent for %llu\n",
7690 (unsigned long long)rec->start);
7691 btrfs_free_path(path);
7692 btrfs_commit_transaction(trans, root);
7696 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7697 struct btrfs_extent_item);
7698 flags = btrfs_extent_flags(path->nodes[0], ei);
7699 if (rec->flag_block_full_backref) {
7700 fprintf(stderr, "setting full backref on %llu\n",
7701 (unsigned long long)key.objectid);
7702 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7704 fprintf(stderr, "clearing full backref on %llu\n",
7705 (unsigned long long)key.objectid);
7706 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7708 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7709 btrfs_mark_buffer_dirty(path->nodes[0]);
7710 btrfs_free_path(path);
7711 return btrfs_commit_transaction(trans, root);
7714 /* right now we only prune from the extent allocation tree */
7715 static int prune_one_block(struct btrfs_trans_handle *trans,
7716 struct btrfs_fs_info *info,
7717 struct btrfs_corrupt_block *corrupt)
7720 struct btrfs_path path;
7721 struct extent_buffer *eb;
7725 int level = corrupt->level + 1;
7727 btrfs_init_path(&path);
7729 /* we want to stop at the parent to our busted block */
7730 path.lowest_level = level;
7732 ret = btrfs_search_slot(trans, info->extent_root,
7733 &corrupt->key, &path, -1, 1);
7738 eb = path.nodes[level];
7745 * hopefully the search gave us the block we want to prune,
7746 * lets try that first
7748 slot = path.slots[level];
7749 found = btrfs_node_blockptr(eb, slot);
7750 if (found == corrupt->cache.start)
7753 nritems = btrfs_header_nritems(eb);
7755 /* the search failed, lets scan this node and hope we find it */
7756 for (slot = 0; slot < nritems; slot++) {
7757 found = btrfs_node_blockptr(eb, slot);
7758 if (found == corrupt->cache.start)
7762 * we couldn't find the bad block. TODO, search all the nodes for pointers
7765 if (eb == info->extent_root->node) {
7770 btrfs_release_path(&path);
7775 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7776 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7779 btrfs_release_path(&path);
7783 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7785 struct btrfs_trans_handle *trans = NULL;
7786 struct cache_extent *cache;
7787 struct btrfs_corrupt_block *corrupt;
7790 cache = search_cache_extent(info->corrupt_blocks, 0);
7794 trans = btrfs_start_transaction(info->extent_root, 1);
7796 return PTR_ERR(trans);
7798 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7799 prune_one_block(trans, info, corrupt);
7800 remove_cache_extent(info->corrupt_blocks, cache);
7803 return btrfs_commit_transaction(trans, info->extent_root);
7807 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7809 struct btrfs_block_group_cache *cache;
7814 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7815 &start, &end, EXTENT_DIRTY);
7818 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7824 cache = btrfs_lookup_first_block_group(fs_info, start);
7829 start = cache->key.objectid + cache->key.offset;
7833 static int check_extent_refs(struct btrfs_root *root,
7834 struct cache_tree *extent_cache)
7836 struct extent_record *rec;
7837 struct cache_extent *cache;
7846 * if we're doing a repair, we have to make sure
7847 * we don't allocate from the problem extents.
7848 * In the worst case, this will be all the
7851 cache = search_cache_extent(extent_cache, 0);
7853 rec = container_of(cache, struct extent_record, cache);
7854 set_extent_dirty(root->fs_info->excluded_extents,
7856 rec->start + rec->max_size - 1,
7858 cache = next_cache_extent(cache);
7861 /* pin down all the corrupted blocks too */
7862 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7864 set_extent_dirty(root->fs_info->excluded_extents,
7866 cache->start + cache->size - 1,
7868 cache = next_cache_extent(cache);
7870 prune_corrupt_blocks(root->fs_info);
7871 reset_cached_block_groups(root->fs_info);
7874 reset_cached_block_groups(root->fs_info);
7877 * We need to delete any duplicate entries we find first otherwise we
7878 * could mess up the extent tree when we have backrefs that actually
7879 * belong to a different extent item and not the weird duplicate one.
7881 while (repair && !list_empty(&duplicate_extents)) {
7882 rec = to_extent_record(duplicate_extents.next);
7883 list_del_init(&rec->list);
7885 /* Sometimes we can find a backref before we find an actual
7886 * extent, so we need to process it a little bit to see if there
7887 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7888 * if this is a backref screwup. If we need to delete stuff
7889 * process_duplicates() will return 0, otherwise it will return
7892 if (process_duplicates(root, extent_cache, rec))
7894 ret = delete_duplicate_records(root, rec);
7898 * delete_duplicate_records will return the number of entries
7899 * deleted, so if it's greater than 0 then we know we actually
7900 * did something and we need to remove.
7914 cache = search_cache_extent(extent_cache, 0);
7917 rec = container_of(cache, struct extent_record, cache);
7918 if (rec->num_duplicates) {
7919 fprintf(stderr, "extent item %llu has multiple extent "
7920 "items\n", (unsigned long long)rec->start);
7925 if (rec->refs != rec->extent_item_refs) {
7926 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7927 (unsigned long long)rec->start,
7928 (unsigned long long)rec->nr);
7929 fprintf(stderr, "extent item %llu, found %llu\n",
7930 (unsigned long long)rec->extent_item_refs,
7931 (unsigned long long)rec->refs);
7932 ret = record_orphan_data_extents(root->fs_info, rec);
7939 * we can't use the extent to repair file
7940 * extent, let the fallback method handle it.
7942 if (!fixed && repair) {
7943 ret = fixup_extent_refs(
7954 if (all_backpointers_checked(rec, 1)) {
7955 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7956 (unsigned long long)rec->start,
7957 (unsigned long long)rec->nr);
7959 if (!fixed && !recorded && repair) {
7960 ret = fixup_extent_refs(root->fs_info,
7969 if (!rec->owner_ref_checked) {
7970 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7971 (unsigned long long)rec->start,
7972 (unsigned long long)rec->nr);
7973 if (!fixed && !recorded && repair) {
7974 ret = fixup_extent_refs(root->fs_info,
7983 if (rec->bad_full_backref) {
7984 fprintf(stderr, "bad full backref, on [%llu]\n",
7985 (unsigned long long)rec->start);
7987 ret = fixup_extent_flags(root->fs_info, rec);
7996 * Although it's not a extent ref's problem, we reuse this
7997 * routine for error reporting.
7998 * No repair function yet.
8000 if (rec->crossing_stripes) {
8002 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8003 rec->start, rec->start + rec->max_size);
8008 if (rec->wrong_chunk_type) {
8010 "bad extent [%llu, %llu), type mismatch with chunk\n",
8011 rec->start, rec->start + rec->max_size);
8016 remove_cache_extent(extent_cache, cache);
8017 free_all_extent_backrefs(rec);
8018 if (!init_extent_tree && repair && (!cur_err || fixed))
8019 clear_extent_dirty(root->fs_info->excluded_extents,
8021 rec->start + rec->max_size - 1,
8027 if (ret && ret != -EAGAIN) {
8028 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8031 struct btrfs_trans_handle *trans;
8033 root = root->fs_info->extent_root;
8034 trans = btrfs_start_transaction(root, 1);
8035 if (IS_ERR(trans)) {
8036 ret = PTR_ERR(trans);
8040 btrfs_fix_block_accounting(trans, root);
8041 ret = btrfs_commit_transaction(trans, root);
8046 fprintf(stderr, "repaired damaged extent references\n");
8052 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8056 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8057 stripe_size = length;
8058 stripe_size /= num_stripes;
8059 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8060 stripe_size = length * 2;
8061 stripe_size /= num_stripes;
8062 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8063 stripe_size = length;
8064 stripe_size /= (num_stripes - 1);
8065 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8066 stripe_size = length;
8067 stripe_size /= (num_stripes - 2);
8069 stripe_size = length;
8075 * Check the chunk with its block group/dev list ref:
8076 * Return 0 if all refs seems valid.
8077 * Return 1 if part of refs seems valid, need later check for rebuild ref
8078 * like missing block group and needs to search extent tree to rebuild them.
8079 * Return -1 if essential refs are missing and unable to rebuild.
8081 static int check_chunk_refs(struct chunk_record *chunk_rec,
8082 struct block_group_tree *block_group_cache,
8083 struct device_extent_tree *dev_extent_cache,
8086 struct cache_extent *block_group_item;
8087 struct block_group_record *block_group_rec;
8088 struct cache_extent *dev_extent_item;
8089 struct device_extent_record *dev_extent_rec;
8093 int metadump_v2 = 0;
8097 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8100 if (block_group_item) {
8101 block_group_rec = container_of(block_group_item,
8102 struct block_group_record,
8104 if (chunk_rec->length != block_group_rec->offset ||
8105 chunk_rec->offset != block_group_rec->objectid ||
8107 chunk_rec->type_flags != block_group_rec->flags)) {
8110 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8111 chunk_rec->objectid,
8116 chunk_rec->type_flags,
8117 block_group_rec->objectid,
8118 block_group_rec->type,
8119 block_group_rec->offset,
8120 block_group_rec->offset,
8121 block_group_rec->objectid,
8122 block_group_rec->flags);
8125 list_del_init(&block_group_rec->list);
8126 chunk_rec->bg_rec = block_group_rec;
8131 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8132 chunk_rec->objectid,
8137 chunk_rec->type_flags);
8144 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8145 chunk_rec->num_stripes);
8146 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8147 devid = chunk_rec->stripes[i].devid;
8148 offset = chunk_rec->stripes[i].offset;
8149 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8150 devid, offset, length);
8151 if (dev_extent_item) {
8152 dev_extent_rec = container_of(dev_extent_item,
8153 struct device_extent_record,
8155 if (dev_extent_rec->objectid != devid ||
8156 dev_extent_rec->offset != offset ||
8157 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8158 dev_extent_rec->length != length) {
8161 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8162 chunk_rec->objectid,
8165 chunk_rec->stripes[i].devid,
8166 chunk_rec->stripes[i].offset,
8167 dev_extent_rec->objectid,
8168 dev_extent_rec->offset,
8169 dev_extent_rec->length);
8172 list_move(&dev_extent_rec->chunk_list,
8173 &chunk_rec->dextents);
8178 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8179 chunk_rec->objectid,
8182 chunk_rec->stripes[i].devid,
8183 chunk_rec->stripes[i].offset);
8190 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8191 int check_chunks(struct cache_tree *chunk_cache,
8192 struct block_group_tree *block_group_cache,
8193 struct device_extent_tree *dev_extent_cache,
8194 struct list_head *good, struct list_head *bad,
8195 struct list_head *rebuild, int silent)
8197 struct cache_extent *chunk_item;
8198 struct chunk_record *chunk_rec;
8199 struct block_group_record *bg_rec;
8200 struct device_extent_record *dext_rec;
8204 chunk_item = first_cache_extent(chunk_cache);
8205 while (chunk_item) {
8206 chunk_rec = container_of(chunk_item, struct chunk_record,
8208 err = check_chunk_refs(chunk_rec, block_group_cache,
8209 dev_extent_cache, silent);
8212 if (err == 0 && good)
8213 list_add_tail(&chunk_rec->list, good);
8214 if (err > 0 && rebuild)
8215 list_add_tail(&chunk_rec->list, rebuild);
8217 list_add_tail(&chunk_rec->list, bad);
8218 chunk_item = next_cache_extent(chunk_item);
8221 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8224 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8232 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8236 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8247 static int check_device_used(struct device_record *dev_rec,
8248 struct device_extent_tree *dext_cache)
8250 struct cache_extent *cache;
8251 struct device_extent_record *dev_extent_rec;
8254 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8256 dev_extent_rec = container_of(cache,
8257 struct device_extent_record,
8259 if (dev_extent_rec->objectid != dev_rec->devid)
8262 list_del_init(&dev_extent_rec->device_list);
8263 total_byte += dev_extent_rec->length;
8264 cache = next_cache_extent(cache);
8267 if (total_byte != dev_rec->byte_used) {
8269 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8270 total_byte, dev_rec->byte_used, dev_rec->objectid,
8271 dev_rec->type, dev_rec->offset);
8278 /* check btrfs_dev_item -> btrfs_dev_extent */
8279 static int check_devices(struct rb_root *dev_cache,
8280 struct device_extent_tree *dev_extent_cache)
8282 struct rb_node *dev_node;
8283 struct device_record *dev_rec;
8284 struct device_extent_record *dext_rec;
8288 dev_node = rb_first(dev_cache);
8290 dev_rec = container_of(dev_node, struct device_record, node);
8291 err = check_device_used(dev_rec, dev_extent_cache);
8295 dev_node = rb_next(dev_node);
8297 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8300 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8301 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8308 static int add_root_item_to_list(struct list_head *head,
8309 u64 objectid, u64 bytenr, u64 last_snapshot,
8310 u8 level, u8 drop_level,
8311 int level_size, struct btrfs_key *drop_key)
8314 struct root_item_record *ri_rec;
8315 ri_rec = malloc(sizeof(*ri_rec));
8318 ri_rec->bytenr = bytenr;
8319 ri_rec->objectid = objectid;
8320 ri_rec->level = level;
8321 ri_rec->level_size = level_size;
8322 ri_rec->drop_level = drop_level;
8323 ri_rec->last_snapshot = last_snapshot;
8325 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8326 list_add_tail(&ri_rec->list, head);
8331 static void free_root_item_list(struct list_head *list)
8333 struct root_item_record *ri_rec;
8335 while (!list_empty(list)) {
8336 ri_rec = list_first_entry(list, struct root_item_record,
8338 list_del_init(&ri_rec->list);
8343 static int deal_root_from_list(struct list_head *list,
8344 struct btrfs_root *root,
8345 struct block_info *bits,
8347 struct cache_tree *pending,
8348 struct cache_tree *seen,
8349 struct cache_tree *reada,
8350 struct cache_tree *nodes,
8351 struct cache_tree *extent_cache,
8352 struct cache_tree *chunk_cache,
8353 struct rb_root *dev_cache,
8354 struct block_group_tree *block_group_cache,
8355 struct device_extent_tree *dev_extent_cache)
8360 while (!list_empty(list)) {
8361 struct root_item_record *rec;
8362 struct extent_buffer *buf;
8363 rec = list_entry(list->next,
8364 struct root_item_record, list);
8366 buf = read_tree_block(root->fs_info->tree_root,
8367 rec->bytenr, rec->level_size, 0);
8368 if (!extent_buffer_uptodate(buf)) {
8369 free_extent_buffer(buf);
8373 ret = add_root_to_pending(buf, extent_cache, pending,
8374 seen, nodes, rec->objectid);
8378 * To rebuild extent tree, we need deal with snapshot
8379 * one by one, otherwise we deal with node firstly which
8380 * can maximize readahead.
8383 ret = run_next_block(root, bits, bits_nr, &last,
8384 pending, seen, reada, nodes,
8385 extent_cache, chunk_cache,
8386 dev_cache, block_group_cache,
8387 dev_extent_cache, rec);
8391 free_extent_buffer(buf);
8392 list_del(&rec->list);
8398 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8399 reada, nodes, extent_cache, chunk_cache,
8400 dev_cache, block_group_cache,
8401 dev_extent_cache, NULL);
8411 static int check_chunks_and_extents(struct btrfs_root *root)
8413 struct rb_root dev_cache;
8414 struct cache_tree chunk_cache;
8415 struct block_group_tree block_group_cache;
8416 struct device_extent_tree dev_extent_cache;
8417 struct cache_tree extent_cache;
8418 struct cache_tree seen;
8419 struct cache_tree pending;
8420 struct cache_tree reada;
8421 struct cache_tree nodes;
8422 struct extent_io_tree excluded_extents;
8423 struct cache_tree corrupt_blocks;
8424 struct btrfs_path path;
8425 struct btrfs_key key;
8426 struct btrfs_key found_key;
8428 struct block_info *bits;
8430 struct extent_buffer *leaf;
8432 struct btrfs_root_item ri;
8433 struct list_head dropping_trees;
8434 struct list_head normal_trees;
8435 struct btrfs_root *root1;
8440 dev_cache = RB_ROOT;
8441 cache_tree_init(&chunk_cache);
8442 block_group_tree_init(&block_group_cache);
8443 device_extent_tree_init(&dev_extent_cache);
8445 cache_tree_init(&extent_cache);
8446 cache_tree_init(&seen);
8447 cache_tree_init(&pending);
8448 cache_tree_init(&nodes);
8449 cache_tree_init(&reada);
8450 cache_tree_init(&corrupt_blocks);
8451 extent_io_tree_init(&excluded_extents);
8452 INIT_LIST_HEAD(&dropping_trees);
8453 INIT_LIST_HEAD(&normal_trees);
8456 root->fs_info->excluded_extents = &excluded_extents;
8457 root->fs_info->fsck_extent_cache = &extent_cache;
8458 root->fs_info->free_extent_hook = free_extent_hook;
8459 root->fs_info->corrupt_blocks = &corrupt_blocks;
8463 bits = malloc(bits_nr * sizeof(struct block_info));
8469 if (ctx.progress_enabled) {
8470 ctx.tp = TASK_EXTENTS;
8471 task_start(ctx.info);
8475 root1 = root->fs_info->tree_root;
8476 level = btrfs_header_level(root1->node);
8477 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8478 root1->node->start, 0, level, 0,
8479 root1->nodesize, NULL);
8482 root1 = root->fs_info->chunk_root;
8483 level = btrfs_header_level(root1->node);
8484 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8485 root1->node->start, 0, level, 0,
8486 root1->nodesize, NULL);
8489 btrfs_init_path(&path);
8492 key.type = BTRFS_ROOT_ITEM_KEY;
8493 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8498 leaf = path.nodes[0];
8499 slot = path.slots[0];
8500 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8501 ret = btrfs_next_leaf(root, &path);
8504 leaf = path.nodes[0];
8505 slot = path.slots[0];
8507 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8508 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8509 unsigned long offset;
8512 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8513 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8514 last_snapshot = btrfs_root_last_snapshot(&ri);
8515 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8516 level = btrfs_root_level(&ri);
8517 level_size = root->nodesize;
8518 ret = add_root_item_to_list(&normal_trees,
8520 btrfs_root_bytenr(&ri),
8521 last_snapshot, level,
8522 0, level_size, NULL);
8526 level = btrfs_root_level(&ri);
8527 level_size = root->nodesize;
8528 objectid = found_key.objectid;
8529 btrfs_disk_key_to_cpu(&found_key,
8531 ret = add_root_item_to_list(&dropping_trees,
8533 btrfs_root_bytenr(&ri),
8534 last_snapshot, level,
8536 level_size, &found_key);
8543 btrfs_release_path(&path);
8546 * check_block can return -EAGAIN if it fixes something, please keep
8547 * this in mind when dealing with return values from these functions, if
8548 * we get -EAGAIN we want to fall through and restart the loop.
8550 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8551 &seen, &reada, &nodes, &extent_cache,
8552 &chunk_cache, &dev_cache, &block_group_cache,
8559 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8560 &pending, &seen, &reada, &nodes,
8561 &extent_cache, &chunk_cache, &dev_cache,
8562 &block_group_cache, &dev_extent_cache);
8569 ret = check_chunks(&chunk_cache, &block_group_cache,
8570 &dev_extent_cache, NULL, NULL, NULL, 0);
8577 ret = check_extent_refs(root, &extent_cache);
8584 ret = check_devices(&dev_cache, &dev_extent_cache);
8589 task_stop(ctx.info);
8591 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8592 extent_io_tree_cleanup(&excluded_extents);
8593 root->fs_info->fsck_extent_cache = NULL;
8594 root->fs_info->free_extent_hook = NULL;
8595 root->fs_info->corrupt_blocks = NULL;
8596 root->fs_info->excluded_extents = NULL;
8599 free_chunk_cache_tree(&chunk_cache);
8600 free_device_cache_tree(&dev_cache);
8601 free_block_group_tree(&block_group_cache);
8602 free_device_extent_tree(&dev_extent_cache);
8603 free_extent_cache_tree(&seen);
8604 free_extent_cache_tree(&pending);
8605 free_extent_cache_tree(&reada);
8606 free_extent_cache_tree(&nodes);
8609 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8610 free_extent_cache_tree(&seen);
8611 free_extent_cache_tree(&pending);
8612 free_extent_cache_tree(&reada);
8613 free_extent_cache_tree(&nodes);
8614 free_chunk_cache_tree(&chunk_cache);
8615 free_block_group_tree(&block_group_cache);
8616 free_device_cache_tree(&dev_cache);
8617 free_device_extent_tree(&dev_extent_cache);
8618 free_extent_record_cache(root->fs_info, &extent_cache);
8619 free_root_item_list(&normal_trees);
8620 free_root_item_list(&dropping_trees);
8621 extent_io_tree_cleanup(&excluded_extents);
8626 * Check backrefs of a tree block given by @bytenr or @eb.
8628 * @root: the root containing the @bytenr or @eb
8629 * @eb: tree block extent buffer, can be NULL
8630 * @bytenr: bytenr of the tree block to search
8631 * @level: tree level of the tree block
8632 * @owner: owner of the tree block
8634 * Return >0 for any error found and output error message
8635 * Return 0 for no error found
8637 static int check_tree_block_ref(struct btrfs_root *root,
8638 struct extent_buffer *eb, u64 bytenr,
8639 int level, u64 owner)
8641 struct btrfs_key key;
8642 struct btrfs_root *extent_root = root->fs_info->extent_root;
8643 struct btrfs_path path;
8644 struct btrfs_extent_item *ei;
8645 struct btrfs_extent_inline_ref *iref;
8646 struct extent_buffer *leaf;
8652 u32 nodesize = root->nodesize;
8659 btrfs_init_path(&path);
8660 key.objectid = bytenr;
8661 if (btrfs_fs_incompat(root->fs_info,
8662 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8663 key.type = BTRFS_METADATA_ITEM_KEY;
8665 key.type = BTRFS_EXTENT_ITEM_KEY;
8666 key.offset = (u64)-1;
8668 /* Search for the backref in extent tree */
8669 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8671 err |= BACKREF_MISSING;
8674 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8676 err |= BACKREF_MISSING;
8680 leaf = path.nodes[0];
8681 slot = path.slots[0];
8682 btrfs_item_key_to_cpu(leaf, &key, slot);
8684 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8686 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8687 skinny_level = (int)key.offset;
8688 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8690 struct btrfs_tree_block_info *info;
8692 info = (struct btrfs_tree_block_info *)(ei + 1);
8693 skinny_level = btrfs_tree_block_level(leaf, info);
8694 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8701 if (!(btrfs_extent_flags(leaf, ei) &
8702 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8704 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8705 key.objectid, nodesize,
8706 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8707 err = BACKREF_MISMATCH;
8709 header_gen = btrfs_header_generation(eb);
8710 extent_gen = btrfs_extent_generation(leaf, ei);
8711 if (header_gen != extent_gen) {
8713 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8714 key.objectid, nodesize, header_gen,
8716 err = BACKREF_MISMATCH;
8718 if (level != skinny_level) {
8720 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8721 key.objectid, nodesize, level, skinny_level);
8722 err = BACKREF_MISMATCH;
8724 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8726 "extent[%llu %u] is referred by other roots than %llu",
8727 key.objectid, nodesize, root->objectid);
8728 err = BACKREF_MISMATCH;
8733 * Iterate the extent/metadata item to find the exact backref
8735 item_size = btrfs_item_size_nr(leaf, slot);
8736 ptr = (unsigned long)iref;
8737 end = (unsigned long)ei + item_size;
8739 iref = (struct btrfs_extent_inline_ref *)ptr;
8740 type = btrfs_extent_inline_ref_type(leaf, iref);
8741 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8743 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8744 (offset == root->objectid || offset == owner)) {
8746 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8747 /* Check if the backref points to valid referencer */
8748 found_ref = !check_tree_block_ref(root, NULL, offset,
8754 ptr += btrfs_extent_inline_ref_size(type);
8758 * Inlined extent item doesn't have what we need, check
8759 * TREE_BLOCK_REF_KEY
8762 btrfs_release_path(&path);
8763 key.objectid = bytenr;
8764 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8765 key.offset = root->objectid;
8767 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8772 err |= BACKREF_MISSING;
8774 btrfs_release_path(&path);
8775 if (eb && (err & BACKREF_MISSING))
8776 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8777 bytenr, nodesize, owner, level);
8782 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8784 * Return >0 any error found and output error message
8785 * Return 0 for no error found
8787 static int check_extent_data_item(struct btrfs_root *root,
8788 struct extent_buffer *eb, int slot)
8790 struct btrfs_file_extent_item *fi;
8791 struct btrfs_path path;
8792 struct btrfs_root *extent_root = root->fs_info->extent_root;
8793 struct btrfs_key fi_key;
8794 struct btrfs_key dbref_key;
8795 struct extent_buffer *leaf;
8796 struct btrfs_extent_item *ei;
8797 struct btrfs_extent_inline_ref *iref;
8798 struct btrfs_extent_data_ref *dref;
8800 u64 file_extent_gen;
8803 u64 extent_num_bytes;
8811 int found_dbackref = 0;
8815 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8816 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8817 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8819 /* Nothing to check for hole and inline data extents */
8820 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8821 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8824 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8825 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8826 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8828 /* Check unaligned disk_num_bytes and num_bytes */
8829 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8831 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8832 fi_key.objectid, fi_key.offset, disk_num_bytes,
8834 err |= BYTES_UNALIGNED;
8836 data_bytes_allocated += disk_num_bytes;
8838 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8840 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8841 fi_key.objectid, fi_key.offset, extent_num_bytes,
8843 err |= BYTES_UNALIGNED;
8845 data_bytes_referenced += extent_num_bytes;
8847 owner = btrfs_header_owner(eb);
8849 /* Check the extent item of the file extent in extent tree */
8850 btrfs_init_path(&path);
8851 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8852 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8853 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8855 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8857 err |= BACKREF_MISSING;
8861 leaf = path.nodes[0];
8862 slot = path.slots[0];
8863 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8865 extent_flags = btrfs_extent_flags(leaf, ei);
8866 extent_gen = btrfs_extent_generation(leaf, ei);
8868 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8870 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8871 disk_bytenr, disk_num_bytes,
8872 BTRFS_EXTENT_FLAG_DATA);
8873 err |= BACKREF_MISMATCH;
8876 if (file_extent_gen < extent_gen) {
8878 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8879 disk_bytenr, disk_num_bytes, file_extent_gen,
8881 err |= BACKREF_MISMATCH;
8884 /* Check data backref inside that extent item */
8885 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8886 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8887 ptr = (unsigned long)iref;
8888 end = (unsigned long)ei + item_size;
8890 iref = (struct btrfs_extent_inline_ref *)ptr;
8891 type = btrfs_extent_inline_ref_type(leaf, iref);
8892 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8894 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8895 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8896 if (ref_root == owner || ref_root == root->objectid)
8898 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8899 found_dbackref = !check_tree_block_ref(root, NULL,
8900 btrfs_extent_inline_ref_offset(leaf, iref),
8906 ptr += btrfs_extent_inline_ref_size(type);
8909 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8910 if (!found_dbackref) {
8911 btrfs_release_path(&path);
8913 btrfs_init_path(&path);
8914 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8915 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8916 dbref_key.offset = hash_extent_data_ref(root->objectid,
8917 fi_key.objectid, fi_key.offset);
8919 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8920 &dbref_key, &path, 0, 0);
8925 if (!found_dbackref)
8926 err |= BACKREF_MISSING;
8928 btrfs_release_path(&path);
8929 if (err & BACKREF_MISSING) {
8930 error("data extent[%llu %llu] backref lost",
8931 disk_bytenr, disk_num_bytes);
8937 * Get real tree block level for the case like shared block
8938 * Return >= 0 as tree level
8939 * Return <0 for error
8941 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8943 struct extent_buffer *eb;
8944 struct btrfs_path path;
8945 struct btrfs_key key;
8946 struct btrfs_extent_item *ei;
8949 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8954 /* Search extent tree for extent generation and level */
8955 key.objectid = bytenr;
8956 key.type = BTRFS_METADATA_ITEM_KEY;
8957 key.offset = (u64)-1;
8959 btrfs_init_path(&path);
8960 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8963 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8971 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8972 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8973 struct btrfs_extent_item);
8974 flags = btrfs_extent_flags(path.nodes[0], ei);
8975 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8980 /* Get transid for later read_tree_block() check */
8981 transid = btrfs_extent_generation(path.nodes[0], ei);
8983 /* Get backref level as one source */
8984 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8985 backref_level = key.offset;
8987 struct btrfs_tree_block_info *info;
8989 info = (struct btrfs_tree_block_info *)(ei + 1);
8990 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8992 btrfs_release_path(&path);
8994 /* Get level from tree block as an alternative source */
8995 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8996 if (!extent_buffer_uptodate(eb)) {
8997 free_extent_buffer(eb);
9000 header_level = btrfs_header_level(eb);
9001 free_extent_buffer(eb);
9003 if (header_level != backref_level)
9005 return header_level;
9008 btrfs_release_path(&path);
9013 * Check if a tree block backref is valid (points to a valid tree block)
9014 * if level == -1, level will be resolved
9015 * Return >0 for any error found and print error message
9017 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9018 u64 bytenr, int level)
9020 struct btrfs_root *root;
9021 struct btrfs_key key;
9022 struct btrfs_path path;
9023 struct extent_buffer *eb;
9024 struct extent_buffer *node;
9025 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9029 /* Query level for level == -1 special case */
9031 level = query_tree_block_level(fs_info, bytenr);
9033 err |= REFERENCER_MISSING;
9037 key.objectid = root_id;
9038 key.type = BTRFS_ROOT_ITEM_KEY;
9039 key.offset = (u64)-1;
9041 root = btrfs_read_fs_root(fs_info, &key);
9043 err |= REFERENCER_MISSING;
9047 /* Read out the tree block to get item/node key */
9048 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9049 if (!extent_buffer_uptodate(eb)) {
9050 err |= REFERENCER_MISSING;
9051 free_extent_buffer(eb);
9055 /* Empty tree, no need to check key */
9056 if (!btrfs_header_nritems(eb) && !level) {
9057 free_extent_buffer(eb);
9062 btrfs_node_key_to_cpu(eb, &key, 0);
9064 btrfs_item_key_to_cpu(eb, &key, 0);
9066 free_extent_buffer(eb);
9068 btrfs_init_path(&path);
9069 path.lowest_level = level;
9070 /* Search with the first key, to ensure we can reach it */
9071 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9073 err |= REFERENCER_MISSING;
9077 node = path.nodes[level];
9078 if (btrfs_header_bytenr(node) != bytenr) {
9080 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9081 bytenr, nodesize, bytenr,
9082 btrfs_header_bytenr(node));
9083 err |= REFERENCER_MISMATCH;
9085 if (btrfs_header_level(node) != level) {
9087 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9088 bytenr, nodesize, level,
9089 btrfs_header_level(node));
9090 err |= REFERENCER_MISMATCH;
9094 btrfs_release_path(&path);
9096 if (err & REFERENCER_MISSING) {
9098 error("extent [%llu %d] lost referencer (owner: %llu)",
9099 bytenr, nodesize, root_id);
9102 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9103 bytenr, nodesize, root_id, level);
9110 * Check referencer for shared block backref
9111 * If level == -1, this function will resolve the level.
9113 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9114 u64 parent, u64 bytenr, int level)
9116 struct extent_buffer *eb;
9117 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9119 int found_parent = 0;
9122 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9123 if (!extent_buffer_uptodate(eb))
9127 level = query_tree_block_level(fs_info, bytenr);
9131 if (level + 1 != btrfs_header_level(eb))
9134 nr = btrfs_header_nritems(eb);
9135 for (i = 0; i < nr; i++) {
9136 if (bytenr == btrfs_node_blockptr(eb, i)) {
9142 free_extent_buffer(eb);
9143 if (!found_parent) {
9145 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9146 bytenr, nodesize, parent, level);
9147 return REFERENCER_MISSING;
9153 * Check referencer for normal (inlined) data ref
9154 * If len == 0, it will be resolved by searching in extent tree
9156 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9157 u64 root_id, u64 objectid, u64 offset,
9158 u64 bytenr, u64 len, u32 count)
9160 struct btrfs_root *root;
9161 struct btrfs_root *extent_root = fs_info->extent_root;
9162 struct btrfs_key key;
9163 struct btrfs_path path;
9164 struct extent_buffer *leaf;
9165 struct btrfs_file_extent_item *fi;
9166 u32 found_count = 0;
9171 key.objectid = bytenr;
9172 key.type = BTRFS_EXTENT_ITEM_KEY;
9173 key.offset = (u64)-1;
9175 btrfs_init_path(&path);
9176 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9179 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9182 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9183 if (key.objectid != bytenr ||
9184 key.type != BTRFS_EXTENT_ITEM_KEY)
9187 btrfs_release_path(&path);
9189 key.objectid = root_id;
9190 key.type = BTRFS_ROOT_ITEM_KEY;
9191 key.offset = (u64)-1;
9192 btrfs_init_path(&path);
9194 root = btrfs_read_fs_root(fs_info, &key);
9198 key.objectid = objectid;
9199 key.type = BTRFS_EXTENT_DATA_KEY;
9201 * It can be nasty as data backref offset is
9202 * file offset - file extent offset, which is smaller or
9203 * equal to original backref offset. The only special case is
9204 * overflow. So we need to special check and do further search.
9206 key.offset = offset & (1ULL << 63) ? 0 : offset;
9208 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9213 * Search afterwards to get correct one
9214 * NOTE: As we must do a comprehensive check on the data backref to
9215 * make sure the dref count also matches, we must iterate all file
9216 * extents for that inode.
9219 leaf = path.nodes[0];
9220 slot = path.slots[0];
9222 btrfs_item_key_to_cpu(leaf, &key, slot);
9223 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9225 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9227 * Except normal disk bytenr and disk num bytes, we still
9228 * need to do extra check on dbackref offset as
9229 * dbackref offset = file_offset - file_extent_offset
9231 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9232 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9233 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9237 ret = btrfs_next_item(root, &path);
9242 btrfs_release_path(&path);
9243 if (found_count != count) {
9245 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9246 bytenr, len, root_id, objectid, offset, count, found_count);
9247 return REFERENCER_MISSING;
9253 * Check if the referencer of a shared data backref exists
9255 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9256 u64 parent, u64 bytenr)
9258 struct extent_buffer *eb;
9259 struct btrfs_key key;
9260 struct btrfs_file_extent_item *fi;
9261 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9263 int found_parent = 0;
9266 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9267 if (!extent_buffer_uptodate(eb))
9270 nr = btrfs_header_nritems(eb);
9271 for (i = 0; i < nr; i++) {
9272 btrfs_item_key_to_cpu(eb, &key, i);
9273 if (key.type != BTRFS_EXTENT_DATA_KEY)
9276 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9277 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9280 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9287 free_extent_buffer(eb);
9288 if (!found_parent) {
9289 error("shared extent %llu referencer lost (parent: %llu)",
9291 return REFERENCER_MISSING;
9297 * This function will check a given extent item, including its backref and
9298 * itself (like crossing stripe boundary and type)
9300 * Since we don't use extent_record anymore, introduce new error bit
9302 static int check_extent_item(struct btrfs_fs_info *fs_info,
9303 struct extent_buffer *eb, int slot)
9305 struct btrfs_extent_item *ei;
9306 struct btrfs_extent_inline_ref *iref;
9307 struct btrfs_extent_data_ref *dref;
9311 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9312 u32 item_size = btrfs_item_size_nr(eb, slot);
9317 struct btrfs_key key;
9321 btrfs_item_key_to_cpu(eb, &key, slot);
9322 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9323 bytes_used += key.offset;
9325 bytes_used += nodesize;
9327 if (item_size < sizeof(*ei)) {
9329 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9330 * old thing when on disk format is still un-determined.
9331 * No need to care about it anymore
9333 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9337 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9338 flags = btrfs_extent_flags(eb, ei);
9340 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9342 if (metadata && check_crossing_stripes(global_info, key.objectid,
9344 error("bad metadata [%llu, %llu) crossing stripe boundary",
9345 key.objectid, key.objectid + nodesize);
9346 err |= CROSSING_STRIPE_BOUNDARY;
9349 ptr = (unsigned long)(ei + 1);
9351 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9352 /* Old EXTENT_ITEM metadata */
9353 struct btrfs_tree_block_info *info;
9355 info = (struct btrfs_tree_block_info *)ptr;
9356 level = btrfs_tree_block_level(eb, info);
9357 ptr += sizeof(struct btrfs_tree_block_info);
9359 /* New METADATA_ITEM */
9362 end = (unsigned long)ei + item_size;
9365 err |= ITEM_SIZE_MISMATCH;
9369 /* Now check every backref in this extent item */
9371 iref = (struct btrfs_extent_inline_ref *)ptr;
9372 type = btrfs_extent_inline_ref_type(eb, iref);
9373 offset = btrfs_extent_inline_ref_offset(eb, iref);
9375 case BTRFS_TREE_BLOCK_REF_KEY:
9376 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9380 case BTRFS_SHARED_BLOCK_REF_KEY:
9381 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9385 case BTRFS_EXTENT_DATA_REF_KEY:
9386 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9387 ret = check_extent_data_backref(fs_info,
9388 btrfs_extent_data_ref_root(eb, dref),
9389 btrfs_extent_data_ref_objectid(eb, dref),
9390 btrfs_extent_data_ref_offset(eb, dref),
9391 key.objectid, key.offset,
9392 btrfs_extent_data_ref_count(eb, dref));
9395 case BTRFS_SHARED_DATA_REF_KEY:
9396 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9400 error("extent[%llu %d %llu] has unknown ref type: %d",
9401 key.objectid, key.type, key.offset, type);
9402 err |= UNKNOWN_TYPE;
9406 ptr += btrfs_extent_inline_ref_size(type);
9415 * Check if a dev extent item is referred correctly by its chunk
9417 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9418 struct extent_buffer *eb, int slot)
9420 struct btrfs_root *chunk_root = fs_info->chunk_root;
9421 struct btrfs_dev_extent *ptr;
9422 struct btrfs_path path;
9423 struct btrfs_key chunk_key;
9424 struct btrfs_key devext_key;
9425 struct btrfs_chunk *chunk;
9426 struct extent_buffer *l;
9430 int found_chunk = 0;
9433 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9434 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9435 length = btrfs_dev_extent_length(eb, ptr);
9437 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9438 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9439 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9441 btrfs_init_path(&path);
9442 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9447 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9448 if (btrfs_chunk_length(l, chunk) != length)
9451 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9452 for (i = 0; i < num_stripes; i++) {
9453 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9454 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9456 if (devid == devext_key.objectid &&
9457 offset == devext_key.offset) {
9463 btrfs_release_path(&path);
9466 "device extent[%llu, %llu, %llu] did not find the related chunk",
9467 devext_key.objectid, devext_key.offset, length);
9468 return REFERENCER_MISSING;
9474 * Check if the used space is correct with the dev item
9476 static int check_dev_item(struct btrfs_fs_info *fs_info,
9477 struct extent_buffer *eb, int slot)
9479 struct btrfs_root *dev_root = fs_info->dev_root;
9480 struct btrfs_dev_item *dev_item;
9481 struct btrfs_path path;
9482 struct btrfs_key key;
9483 struct btrfs_dev_extent *ptr;
9489 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9490 dev_id = btrfs_device_id(eb, dev_item);
9491 used = btrfs_device_bytes_used(eb, dev_item);
9493 key.objectid = dev_id;
9494 key.type = BTRFS_DEV_EXTENT_KEY;
9497 btrfs_init_path(&path);
9498 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9500 btrfs_item_key_to_cpu(eb, &key, slot);
9501 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9502 key.objectid, key.type, key.offset);
9503 btrfs_release_path(&path);
9504 return REFERENCER_MISSING;
9507 /* Iterate dev_extents to calculate the used space of a device */
9509 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9511 if (key.objectid > dev_id)
9513 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9516 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9517 struct btrfs_dev_extent);
9518 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9520 ret = btrfs_next_item(dev_root, &path);
9524 btrfs_release_path(&path);
9526 if (used != total) {
9527 btrfs_item_key_to_cpu(eb, &key, slot);
9529 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9530 total, used, BTRFS_ROOT_TREE_OBJECTID,
9531 BTRFS_DEV_EXTENT_KEY, dev_id);
9532 return ACCOUNTING_MISMATCH;
9538 * Check a block group item with its referener (chunk) and its used space
9539 * with extent/metadata item
9541 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9542 struct extent_buffer *eb, int slot)
9544 struct btrfs_root *extent_root = fs_info->extent_root;
9545 struct btrfs_root *chunk_root = fs_info->chunk_root;
9546 struct btrfs_block_group_item *bi;
9547 struct btrfs_block_group_item bg_item;
9548 struct btrfs_path path;
9549 struct btrfs_key bg_key;
9550 struct btrfs_key chunk_key;
9551 struct btrfs_key extent_key;
9552 struct btrfs_chunk *chunk;
9553 struct extent_buffer *leaf;
9554 struct btrfs_extent_item *ei;
9555 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9563 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9564 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9565 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9566 used = btrfs_block_group_used(&bg_item);
9567 bg_flags = btrfs_block_group_flags(&bg_item);
9569 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9570 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9571 chunk_key.offset = bg_key.objectid;
9573 btrfs_init_path(&path);
9574 /* Search for the referencer chunk */
9575 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9578 "block group[%llu %llu] did not find the related chunk item",
9579 bg_key.objectid, bg_key.offset);
9580 err |= REFERENCER_MISSING;
9582 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9583 struct btrfs_chunk);
9584 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9587 "block group[%llu %llu] related chunk item length does not match",
9588 bg_key.objectid, bg_key.offset);
9589 err |= REFERENCER_MISMATCH;
9592 btrfs_release_path(&path);
9594 /* Search from the block group bytenr */
9595 extent_key.objectid = bg_key.objectid;
9596 extent_key.type = 0;
9597 extent_key.offset = 0;
9599 btrfs_init_path(&path);
9600 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9604 /* Iterate extent tree to account used space */
9606 leaf = path.nodes[0];
9607 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9608 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9611 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9612 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9614 if (extent_key.objectid < bg_key.objectid)
9617 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9620 total += extent_key.offset;
9622 ei = btrfs_item_ptr(leaf, path.slots[0],
9623 struct btrfs_extent_item);
9624 flags = btrfs_extent_flags(leaf, ei);
9625 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9626 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9628 "bad extent[%llu, %llu) type mismatch with chunk",
9629 extent_key.objectid,
9630 extent_key.objectid + extent_key.offset);
9631 err |= CHUNK_TYPE_MISMATCH;
9633 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9634 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9635 BTRFS_BLOCK_GROUP_METADATA))) {
9637 "bad extent[%llu, %llu) type mismatch with chunk",
9638 extent_key.objectid,
9639 extent_key.objectid + nodesize);
9640 err |= CHUNK_TYPE_MISMATCH;
9644 ret = btrfs_next_item(extent_root, &path);
9650 btrfs_release_path(&path);
9652 if (total != used) {
9654 "block group[%llu %llu] used %llu but extent items used %llu",
9655 bg_key.objectid, bg_key.offset, used, total);
9656 err |= ACCOUNTING_MISMATCH;
9662 * Check a chunk item.
9663 * Including checking all referred dev_extents and block group
9665 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9666 struct extent_buffer *eb, int slot)
9668 struct btrfs_root *extent_root = fs_info->extent_root;
9669 struct btrfs_root *dev_root = fs_info->dev_root;
9670 struct btrfs_path path;
9671 struct btrfs_key chunk_key;
9672 struct btrfs_key bg_key;
9673 struct btrfs_key devext_key;
9674 struct btrfs_chunk *chunk;
9675 struct extent_buffer *leaf;
9676 struct btrfs_block_group_item *bi;
9677 struct btrfs_block_group_item bg_item;
9678 struct btrfs_dev_extent *ptr;
9679 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9691 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9692 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9693 length = btrfs_chunk_length(eb, chunk);
9694 chunk_end = chunk_key.offset + length;
9695 if (!IS_ALIGNED(length, sectorsize)) {
9696 error("chunk[%llu %llu) not aligned to %u",
9697 chunk_key.offset, chunk_end, sectorsize);
9698 err |= BYTES_UNALIGNED;
9702 type = btrfs_chunk_type(eb, chunk);
9703 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9704 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9705 error("chunk[%llu %llu) has no chunk type",
9706 chunk_key.offset, chunk_end);
9707 err |= UNKNOWN_TYPE;
9709 if (profile && (profile & (profile - 1))) {
9710 error("chunk[%llu %llu) multiple profiles detected: %llx",
9711 chunk_key.offset, chunk_end, profile);
9712 err |= UNKNOWN_TYPE;
9715 bg_key.objectid = chunk_key.offset;
9716 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9717 bg_key.offset = length;
9719 btrfs_init_path(&path);
9720 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9723 "chunk[%llu %llu) did not find the related block group item",
9724 chunk_key.offset, chunk_end);
9725 err |= REFERENCER_MISSING;
9727 leaf = path.nodes[0];
9728 bi = btrfs_item_ptr(leaf, path.slots[0],
9729 struct btrfs_block_group_item);
9730 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9732 if (btrfs_block_group_flags(&bg_item) != type) {
9734 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9735 chunk_key.offset, chunk_end, type,
9736 btrfs_block_group_flags(&bg_item));
9737 err |= REFERENCER_MISSING;
9741 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9742 for (i = 0; i < num_stripes; i++) {
9743 btrfs_release_path(&path);
9744 btrfs_init_path(&path);
9745 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9746 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9747 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9749 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9754 leaf = path.nodes[0];
9755 ptr = btrfs_item_ptr(leaf, path.slots[0],
9756 struct btrfs_dev_extent);
9757 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9758 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9759 if (objectid != chunk_key.objectid ||
9760 offset != chunk_key.offset ||
9761 btrfs_dev_extent_length(leaf, ptr) != length)
9765 err |= BACKREF_MISSING;
9767 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9768 chunk_key.objectid, chunk_end, i);
9771 btrfs_release_path(&path);
9777 * Main entry function to check known items and update related accounting info
9779 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9781 struct btrfs_fs_info *fs_info = root->fs_info;
9782 struct btrfs_key key;
9785 struct btrfs_extent_data_ref *dref;
9790 btrfs_item_key_to_cpu(eb, &key, slot);
9794 case BTRFS_EXTENT_DATA_KEY:
9795 ret = check_extent_data_item(root, eb, slot);
9798 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9799 ret = check_block_group_item(fs_info, eb, slot);
9802 case BTRFS_DEV_ITEM_KEY:
9803 ret = check_dev_item(fs_info, eb, slot);
9806 case BTRFS_CHUNK_ITEM_KEY:
9807 ret = check_chunk_item(fs_info, eb, slot);
9810 case BTRFS_DEV_EXTENT_KEY:
9811 ret = check_dev_extent_item(fs_info, eb, slot);
9814 case BTRFS_EXTENT_ITEM_KEY:
9815 case BTRFS_METADATA_ITEM_KEY:
9816 ret = check_extent_item(fs_info, eb, slot);
9819 case BTRFS_EXTENT_CSUM_KEY:
9820 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9822 case BTRFS_TREE_BLOCK_REF_KEY:
9823 ret = check_tree_block_backref(fs_info, key.offset,
9827 case BTRFS_EXTENT_DATA_REF_KEY:
9828 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9829 ret = check_extent_data_backref(fs_info,
9830 btrfs_extent_data_ref_root(eb, dref),
9831 btrfs_extent_data_ref_objectid(eb, dref),
9832 btrfs_extent_data_ref_offset(eb, dref),
9834 btrfs_extent_data_ref_count(eb, dref));
9837 case BTRFS_SHARED_BLOCK_REF_KEY:
9838 ret = check_shared_block_backref(fs_info, key.offset,
9842 case BTRFS_SHARED_DATA_REF_KEY:
9843 ret = check_shared_data_backref(fs_info, key.offset,
9851 if (++slot < btrfs_header_nritems(eb))
9858 * Helper function for later fs/subvol tree check. To determine if a tree
9859 * block should be checked.
9860 * This function will ensure only the direct referencer with lowest rootid to
9861 * check a fs/subvolume tree block.
9863 * Backref check at extent tree would detect errors like missing subvolume
9864 * tree, so we can do aggressive check to reduce duplicated checks.
9866 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9868 struct btrfs_root *extent_root = root->fs_info->extent_root;
9869 struct btrfs_key key;
9870 struct btrfs_path path;
9871 struct extent_buffer *leaf;
9873 struct btrfs_extent_item *ei;
9879 struct btrfs_extent_inline_ref *iref;
9882 btrfs_init_path(&path);
9883 key.objectid = btrfs_header_bytenr(eb);
9884 key.type = BTRFS_METADATA_ITEM_KEY;
9885 key.offset = (u64)-1;
9888 * Any failure in backref resolving means we can't determine
9889 * whom the tree block belongs to.
9890 * So in that case, we need to check that tree block
9892 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9896 ret = btrfs_previous_extent_item(extent_root, &path,
9897 btrfs_header_bytenr(eb));
9901 leaf = path.nodes[0];
9902 slot = path.slots[0];
9903 btrfs_item_key_to_cpu(leaf, &key, slot);
9904 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9906 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9907 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9909 struct btrfs_tree_block_info *info;
9911 info = (struct btrfs_tree_block_info *)(ei + 1);
9912 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9915 item_size = btrfs_item_size_nr(leaf, slot);
9916 ptr = (unsigned long)iref;
9917 end = (unsigned long)ei + item_size;
9919 iref = (struct btrfs_extent_inline_ref *)ptr;
9920 type = btrfs_extent_inline_ref_type(leaf, iref);
9921 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9924 * We only check the tree block if current root is
9925 * the lowest referencer of it.
9927 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9928 offset < root->objectid) {
9929 btrfs_release_path(&path);
9933 ptr += btrfs_extent_inline_ref_size(type);
9936 * Normally we should also check keyed tree block ref, but that may be
9937 * very time consuming. Inlined ref should already make us skip a lot
9938 * of refs now. So skip search keyed tree block ref.
9942 btrfs_release_path(&path);
9947 * Traversal function for tree block. We will do:
9948 * 1) Skip shared fs/subvolume tree blocks
9949 * 2) Update related bytes accounting
9950 * 3) Pre-order traversal
9952 static int traverse_tree_block(struct btrfs_root *root,
9953 struct extent_buffer *node)
9955 struct extent_buffer *eb;
9956 struct btrfs_key key;
9957 struct btrfs_key drop_key;
9965 * Skip shared fs/subvolume tree block, in that case they will
9966 * be checked by referencer with lowest rootid
9968 if (is_fstree(root->objectid) && !should_check(root, node))
9971 /* Update bytes accounting */
9972 total_btree_bytes += node->len;
9973 if (fs_root_objectid(btrfs_header_owner(node)))
9974 total_fs_tree_bytes += node->len;
9975 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9976 total_extent_tree_bytes += node->len;
9977 if (!found_old_backref &&
9978 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9979 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9980 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9981 found_old_backref = 1;
9983 /* pre-order tranversal, check itself first */
9984 level = btrfs_header_level(node);
9985 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9986 btrfs_header_level(node),
9987 btrfs_header_owner(node));
9991 "check %s failed root %llu bytenr %llu level %d, force continue check",
9992 level ? "node":"leaf", root->objectid,
9993 btrfs_header_bytenr(node), btrfs_header_level(node));
9996 btree_space_waste += btrfs_leaf_free_space(root, node);
9997 ret = check_leaf_items(root, node);
10002 nr = btrfs_header_nritems(node);
10003 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10004 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10005 sizeof(struct btrfs_key_ptr);
10007 /* Then check all its children */
10008 for (i = 0; i < nr; i++) {
10009 u64 blocknr = btrfs_node_blockptr(node, i);
10011 btrfs_node_key_to_cpu(node, &key, i);
10012 if (level == root->root_item.drop_level &&
10013 is_dropped_key(&key, &drop_key))
10017 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10018 * to call the function itself.
10020 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10021 if (extent_buffer_uptodate(eb)) {
10022 ret = traverse_tree_block(root, eb);
10025 free_extent_buffer(eb);
10032 * Low memory usage version check_chunks_and_extents.
10034 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10036 struct btrfs_path path;
10037 struct btrfs_key key;
10038 struct btrfs_root *root1;
10039 struct btrfs_root *cur_root;
10043 root1 = root->fs_info->chunk_root;
10044 ret = traverse_tree_block(root1, root1->node);
10047 root1 = root->fs_info->tree_root;
10048 ret = traverse_tree_block(root1, root1->node);
10051 btrfs_init_path(&path);
10052 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10054 key.type = BTRFS_ROOT_ITEM_KEY;
10056 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10058 error("cannot find extent treet in tree_root");
10063 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10064 if (key.type != BTRFS_ROOT_ITEM_KEY)
10066 key.offset = (u64)-1;
10068 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10069 if (IS_ERR(cur_root) || !cur_root) {
10070 error("failed to read tree: %lld", key.objectid);
10074 ret = traverse_tree_block(cur_root, cur_root->node);
10078 ret = btrfs_next_item(root1, &path);
10084 btrfs_release_path(&path);
10088 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10089 struct btrfs_root *root, int overwrite)
10091 struct extent_buffer *c;
10092 struct extent_buffer *old = root->node;
10095 struct btrfs_disk_key disk_key = {0,0,0};
10101 extent_buffer_get(c);
10104 c = btrfs_alloc_free_block(trans, root,
10106 root->root_key.objectid,
10107 &disk_key, level, 0, 0);
10110 extent_buffer_get(c);
10114 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10115 btrfs_set_header_level(c, level);
10116 btrfs_set_header_bytenr(c, c->start);
10117 btrfs_set_header_generation(c, trans->transid);
10118 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10119 btrfs_set_header_owner(c, root->root_key.objectid);
10121 write_extent_buffer(c, root->fs_info->fsid,
10122 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10124 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10125 btrfs_header_chunk_tree_uuid(c),
10128 btrfs_mark_buffer_dirty(c);
10130 * this case can happen in the following case:
10132 * 1.overwrite previous root.
10134 * 2.reinit reloc data root, this is because we skip pin
10135 * down reloc data tree before which means we can allocate
10136 * same block bytenr here.
10138 if (old->start == c->start) {
10139 btrfs_set_root_generation(&root->root_item,
10141 root->root_item.level = btrfs_header_level(root->node);
10142 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10143 &root->root_key, &root->root_item);
10145 free_extent_buffer(c);
10149 free_extent_buffer(old);
10151 add_root_to_dirty_list(root);
10155 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10156 struct extent_buffer *eb, int tree_root)
10158 struct extent_buffer *tmp;
10159 struct btrfs_root_item *ri;
10160 struct btrfs_key key;
10163 int level = btrfs_header_level(eb);
10169 * If we have pinned this block before, don't pin it again.
10170 * This can not only avoid forever loop with broken filesystem
10171 * but also give us some speedups.
10173 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10174 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10177 btrfs_pin_extent(fs_info, eb->start, eb->len);
10179 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10180 nritems = btrfs_header_nritems(eb);
10181 for (i = 0; i < nritems; i++) {
10183 btrfs_item_key_to_cpu(eb, &key, i);
10184 if (key.type != BTRFS_ROOT_ITEM_KEY)
10186 /* Skip the extent root and reloc roots */
10187 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10188 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10189 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10191 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10192 bytenr = btrfs_disk_root_bytenr(eb, ri);
10195 * If at any point we start needing the real root we
10196 * will have to build a stump root for the root we are
10197 * in, but for now this doesn't actually use the root so
10198 * just pass in extent_root.
10200 tmp = read_tree_block(fs_info->extent_root, bytenr,
10202 if (!extent_buffer_uptodate(tmp)) {
10203 fprintf(stderr, "Error reading root block\n");
10206 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10207 free_extent_buffer(tmp);
10211 bytenr = btrfs_node_blockptr(eb, i);
10213 /* If we aren't the tree root don't read the block */
10214 if (level == 1 && !tree_root) {
10215 btrfs_pin_extent(fs_info, bytenr, nodesize);
10219 tmp = read_tree_block(fs_info->extent_root, bytenr,
10221 if (!extent_buffer_uptodate(tmp)) {
10222 fprintf(stderr, "Error reading tree block\n");
10225 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10226 free_extent_buffer(tmp);
10235 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10239 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10243 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10246 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10248 struct btrfs_block_group_cache *cache;
10249 struct btrfs_path *path;
10250 struct extent_buffer *leaf;
10251 struct btrfs_chunk *chunk;
10252 struct btrfs_key key;
10256 path = btrfs_alloc_path();
10261 key.type = BTRFS_CHUNK_ITEM_KEY;
10264 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10266 btrfs_free_path(path);
10271 * We do this in case the block groups were screwed up and had alloc
10272 * bits that aren't actually set on the chunks. This happens with
10273 * restored images every time and could happen in real life I guess.
10275 fs_info->avail_data_alloc_bits = 0;
10276 fs_info->avail_metadata_alloc_bits = 0;
10277 fs_info->avail_system_alloc_bits = 0;
10279 /* First we need to create the in-memory block groups */
10281 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10282 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10284 btrfs_free_path(path);
10292 leaf = path->nodes[0];
10293 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10294 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10299 chunk = btrfs_item_ptr(leaf, path->slots[0],
10300 struct btrfs_chunk);
10301 btrfs_add_block_group(fs_info, 0,
10302 btrfs_chunk_type(leaf, chunk),
10303 key.objectid, key.offset,
10304 btrfs_chunk_length(leaf, chunk));
10305 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10306 key.offset + btrfs_chunk_length(leaf, chunk),
10312 cache = btrfs_lookup_first_block_group(fs_info, start);
10316 start = cache->key.objectid + cache->key.offset;
10319 btrfs_free_path(path);
10323 static int reset_balance(struct btrfs_trans_handle *trans,
10324 struct btrfs_fs_info *fs_info)
10326 struct btrfs_root *root = fs_info->tree_root;
10327 struct btrfs_path *path;
10328 struct extent_buffer *leaf;
10329 struct btrfs_key key;
10330 int del_slot, del_nr = 0;
10334 path = btrfs_alloc_path();
10338 key.objectid = BTRFS_BALANCE_OBJECTID;
10339 key.type = BTRFS_BALANCE_ITEM_KEY;
10342 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10347 goto reinit_data_reloc;
10352 ret = btrfs_del_item(trans, root, path);
10355 btrfs_release_path(path);
10357 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10358 key.type = BTRFS_ROOT_ITEM_KEY;
10361 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10365 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10370 ret = btrfs_del_items(trans, root, path,
10377 btrfs_release_path(path);
10380 ret = btrfs_search_slot(trans, root, &key, path,
10387 leaf = path->nodes[0];
10388 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10389 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10391 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10396 del_slot = path->slots[0];
10405 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10409 btrfs_release_path(path);
10412 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10413 key.type = BTRFS_ROOT_ITEM_KEY;
10414 key.offset = (u64)-1;
10415 root = btrfs_read_fs_root(fs_info, &key);
10416 if (IS_ERR(root)) {
10417 fprintf(stderr, "Error reading data reloc tree\n");
10418 ret = PTR_ERR(root);
10421 record_root_in_trans(trans, root);
10422 ret = btrfs_fsck_reinit_root(trans, root, 0);
10425 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10427 btrfs_free_path(path);
10431 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10432 struct btrfs_fs_info *fs_info)
10438 * The only reason we don't do this is because right now we're just
10439 * walking the trees we find and pinning down their bytes, we don't look
10440 * at any of the leaves. In order to do mixed groups we'd have to check
10441 * the leaves of any fs roots and pin down the bytes for any file
10442 * extents we find. Not hard but why do it if we don't have to?
10444 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10445 fprintf(stderr, "We don't support re-initing the extent tree "
10446 "for mixed block groups yet, please notify a btrfs "
10447 "developer you want to do this so they can add this "
10448 "functionality.\n");
10453 * first we need to walk all of the trees except the extent tree and pin
10454 * down the bytes that are in use so we don't overwrite any existing
10457 ret = pin_metadata_blocks(fs_info);
10459 fprintf(stderr, "error pinning down used bytes\n");
10464 * Need to drop all the block groups since we're going to recreate all
10467 btrfs_free_block_groups(fs_info);
10468 ret = reset_block_groups(fs_info);
10470 fprintf(stderr, "error resetting the block groups\n");
10474 /* Ok we can allocate now, reinit the extent root */
10475 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10477 fprintf(stderr, "extent root initialization failed\n");
10479 * When the transaction code is updated we should end the
10480 * transaction, but for now progs only knows about commit so
10481 * just return an error.
10487 * Now we have all the in-memory block groups setup so we can make
10488 * allocations properly, and the metadata we care about is safe since we
10489 * pinned all of it above.
10492 struct btrfs_block_group_cache *cache;
10494 cache = btrfs_lookup_first_block_group(fs_info, start);
10497 start = cache->key.objectid + cache->key.offset;
10498 ret = btrfs_insert_item(trans, fs_info->extent_root,
10499 &cache->key, &cache->item,
10500 sizeof(cache->item));
10502 fprintf(stderr, "Error adding block group\n");
10505 btrfs_extent_post_op(trans, fs_info->extent_root);
10508 ret = reset_balance(trans, fs_info);
10510 fprintf(stderr, "error resetting the pending balance\n");
10515 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10517 struct btrfs_path *path;
10518 struct btrfs_trans_handle *trans;
10519 struct btrfs_key key;
10522 printf("Recowing metadata block %llu\n", eb->start);
10523 key.objectid = btrfs_header_owner(eb);
10524 key.type = BTRFS_ROOT_ITEM_KEY;
10525 key.offset = (u64)-1;
10527 root = btrfs_read_fs_root(root->fs_info, &key);
10528 if (IS_ERR(root)) {
10529 fprintf(stderr, "Couldn't find owner root %llu\n",
10531 return PTR_ERR(root);
10534 path = btrfs_alloc_path();
10538 trans = btrfs_start_transaction(root, 1);
10539 if (IS_ERR(trans)) {
10540 btrfs_free_path(path);
10541 return PTR_ERR(trans);
10544 path->lowest_level = btrfs_header_level(eb);
10545 if (path->lowest_level)
10546 btrfs_node_key_to_cpu(eb, &key, 0);
10548 btrfs_item_key_to_cpu(eb, &key, 0);
10550 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10551 btrfs_commit_transaction(trans, root);
10552 btrfs_free_path(path);
10556 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10558 struct btrfs_path *path;
10559 struct btrfs_trans_handle *trans;
10560 struct btrfs_key key;
10563 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10564 bad->key.type, bad->key.offset);
10565 key.objectid = bad->root_id;
10566 key.type = BTRFS_ROOT_ITEM_KEY;
10567 key.offset = (u64)-1;
10569 root = btrfs_read_fs_root(root->fs_info, &key);
10570 if (IS_ERR(root)) {
10571 fprintf(stderr, "Couldn't find owner root %llu\n",
10573 return PTR_ERR(root);
10576 path = btrfs_alloc_path();
10580 trans = btrfs_start_transaction(root, 1);
10581 if (IS_ERR(trans)) {
10582 btrfs_free_path(path);
10583 return PTR_ERR(trans);
10586 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10592 ret = btrfs_del_item(trans, root, path);
10594 btrfs_commit_transaction(trans, root);
10595 btrfs_free_path(path);
10599 static int zero_log_tree(struct btrfs_root *root)
10601 struct btrfs_trans_handle *trans;
10604 trans = btrfs_start_transaction(root, 1);
10605 if (IS_ERR(trans)) {
10606 ret = PTR_ERR(trans);
10609 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10610 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10611 ret = btrfs_commit_transaction(trans, root);
10615 static int populate_csum(struct btrfs_trans_handle *trans,
10616 struct btrfs_root *csum_root, char *buf, u64 start,
10623 while (offset < len) {
10624 sectorsize = csum_root->sectorsize;
10625 ret = read_extent_data(csum_root, buf, start + offset,
10629 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10630 start + offset, buf, sectorsize);
10633 offset += sectorsize;
10638 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10639 struct btrfs_root *csum_root,
10640 struct btrfs_root *cur_root)
10642 struct btrfs_path *path;
10643 struct btrfs_key key;
10644 struct extent_buffer *node;
10645 struct btrfs_file_extent_item *fi;
10652 path = btrfs_alloc_path();
10655 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10665 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10668 /* Iterate all regular file extents and fill its csum */
10670 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10672 if (key.type != BTRFS_EXTENT_DATA_KEY)
10674 node = path->nodes[0];
10675 slot = path->slots[0];
10676 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10677 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10679 start = btrfs_file_extent_disk_bytenr(node, fi);
10680 len = btrfs_file_extent_disk_num_bytes(node, fi);
10682 ret = populate_csum(trans, csum_root, buf, start, len);
10683 if (ret == -EEXIST)
10689 * TODO: if next leaf is corrupted, jump to nearest next valid
10692 ret = btrfs_next_item(cur_root, path);
10702 btrfs_free_path(path);
10707 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10708 struct btrfs_root *csum_root)
10710 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10711 struct btrfs_path *path;
10712 struct btrfs_root *tree_root = fs_info->tree_root;
10713 struct btrfs_root *cur_root;
10714 struct extent_buffer *node;
10715 struct btrfs_key key;
10719 path = btrfs_alloc_path();
10723 key.objectid = BTRFS_FS_TREE_OBJECTID;
10725 key.type = BTRFS_ROOT_ITEM_KEY;
10727 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10736 node = path->nodes[0];
10737 slot = path->slots[0];
10738 btrfs_item_key_to_cpu(node, &key, slot);
10739 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10741 if (key.type != BTRFS_ROOT_ITEM_KEY)
10743 if (!is_fstree(key.objectid))
10745 key.offset = (u64)-1;
10747 cur_root = btrfs_read_fs_root(fs_info, &key);
10748 if (IS_ERR(cur_root) || !cur_root) {
10749 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10753 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10758 ret = btrfs_next_item(tree_root, path);
10768 btrfs_free_path(path);
10772 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10773 struct btrfs_root *csum_root)
10775 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10776 struct btrfs_path *path;
10777 struct btrfs_extent_item *ei;
10778 struct extent_buffer *leaf;
10780 struct btrfs_key key;
10783 path = btrfs_alloc_path();
10788 key.type = BTRFS_EXTENT_ITEM_KEY;
10791 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10793 btrfs_free_path(path);
10797 buf = malloc(csum_root->sectorsize);
10799 btrfs_free_path(path);
10804 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10805 ret = btrfs_next_leaf(extent_root, path);
10813 leaf = path->nodes[0];
10815 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10816 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10821 ei = btrfs_item_ptr(leaf, path->slots[0],
10822 struct btrfs_extent_item);
10823 if (!(btrfs_extent_flags(leaf, ei) &
10824 BTRFS_EXTENT_FLAG_DATA)) {
10829 ret = populate_csum(trans, csum_root, buf, key.objectid,
10836 btrfs_free_path(path);
10842 * Recalculate the csum and put it into the csum tree.
10844 * Extent tree init will wipe out all the extent info, so in that case, we
10845 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10846 * will use fs/subvol trees to init the csum tree.
10848 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10849 struct btrfs_root *csum_root,
10850 int search_fs_tree)
10852 if (search_fs_tree)
10853 return fill_csum_tree_from_fs(trans, csum_root);
10855 return fill_csum_tree_from_extent(trans, csum_root);
10858 static void free_roots_info_cache(void)
10860 if (!roots_info_cache)
10863 while (!cache_tree_empty(roots_info_cache)) {
10864 struct cache_extent *entry;
10865 struct root_item_info *rii;
10867 entry = first_cache_extent(roots_info_cache);
10870 remove_cache_extent(roots_info_cache, entry);
10871 rii = container_of(entry, struct root_item_info, cache_extent);
10875 free(roots_info_cache);
10876 roots_info_cache = NULL;
10879 static int build_roots_info_cache(struct btrfs_fs_info *info)
10882 struct btrfs_key key;
10883 struct extent_buffer *leaf;
10884 struct btrfs_path *path;
10886 if (!roots_info_cache) {
10887 roots_info_cache = malloc(sizeof(*roots_info_cache));
10888 if (!roots_info_cache)
10890 cache_tree_init(roots_info_cache);
10893 path = btrfs_alloc_path();
10898 key.type = BTRFS_EXTENT_ITEM_KEY;
10901 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10904 leaf = path->nodes[0];
10907 struct btrfs_key found_key;
10908 struct btrfs_extent_item *ei;
10909 struct btrfs_extent_inline_ref *iref;
10910 int slot = path->slots[0];
10915 struct cache_extent *entry;
10916 struct root_item_info *rii;
10918 if (slot >= btrfs_header_nritems(leaf)) {
10919 ret = btrfs_next_leaf(info->extent_root, path);
10926 leaf = path->nodes[0];
10927 slot = path->slots[0];
10930 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10932 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10933 found_key.type != BTRFS_METADATA_ITEM_KEY)
10936 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10937 flags = btrfs_extent_flags(leaf, ei);
10939 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10940 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10943 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10944 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10945 level = found_key.offset;
10947 struct btrfs_tree_block_info *binfo;
10949 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10950 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10951 level = btrfs_tree_block_level(leaf, binfo);
10955 * For a root extent, it must be of the following type and the
10956 * first (and only one) iref in the item.
10958 type = btrfs_extent_inline_ref_type(leaf, iref);
10959 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10962 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10963 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10965 rii = malloc(sizeof(struct root_item_info));
10970 rii->cache_extent.start = root_id;
10971 rii->cache_extent.size = 1;
10972 rii->level = (u8)-1;
10973 entry = &rii->cache_extent;
10974 ret = insert_cache_extent(roots_info_cache, entry);
10977 rii = container_of(entry, struct root_item_info,
10981 ASSERT(rii->cache_extent.start == root_id);
10982 ASSERT(rii->cache_extent.size == 1);
10984 if (level > rii->level || rii->level == (u8)-1) {
10985 rii->level = level;
10986 rii->bytenr = found_key.objectid;
10987 rii->gen = btrfs_extent_generation(leaf, ei);
10988 rii->node_count = 1;
10989 } else if (level == rii->level) {
10997 btrfs_free_path(path);
11002 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11003 struct btrfs_path *path,
11004 const struct btrfs_key *root_key,
11005 const int read_only_mode)
11007 const u64 root_id = root_key->objectid;
11008 struct cache_extent *entry;
11009 struct root_item_info *rii;
11010 struct btrfs_root_item ri;
11011 unsigned long offset;
11013 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11016 "Error: could not find extent items for root %llu\n",
11017 root_key->objectid);
11021 rii = container_of(entry, struct root_item_info, cache_extent);
11022 ASSERT(rii->cache_extent.start == root_id);
11023 ASSERT(rii->cache_extent.size == 1);
11025 if (rii->node_count != 1) {
11027 "Error: could not find btree root extent for root %llu\n",
11032 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11033 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11035 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11036 btrfs_root_level(&ri) != rii->level ||
11037 btrfs_root_generation(&ri) != rii->gen) {
11040 * If we're in repair mode but our caller told us to not update
11041 * the root item, i.e. just check if it needs to be updated, don't
11042 * print this message, since the caller will call us again shortly
11043 * for the same root item without read only mode (the caller will
11044 * open a transaction first).
11046 if (!(read_only_mode && repair))
11048 "%sroot item for root %llu,"
11049 " current bytenr %llu, current gen %llu, current level %u,"
11050 " new bytenr %llu, new gen %llu, new level %u\n",
11051 (read_only_mode ? "" : "fixing "),
11053 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11054 btrfs_root_level(&ri),
11055 rii->bytenr, rii->gen, rii->level);
11057 if (btrfs_root_generation(&ri) > rii->gen) {
11059 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11060 root_id, btrfs_root_generation(&ri), rii->gen);
11064 if (!read_only_mode) {
11065 btrfs_set_root_bytenr(&ri, rii->bytenr);
11066 btrfs_set_root_level(&ri, rii->level);
11067 btrfs_set_root_generation(&ri, rii->gen);
11068 write_extent_buffer(path->nodes[0], &ri,
11069 offset, sizeof(ri));
11079 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11080 * caused read-only snapshots to be corrupted if they were created at a moment
11081 * when the source subvolume/snapshot had orphan items. The issue was that the
11082 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11083 * node instead of the post orphan cleanup root node.
11084 * So this function, and its callees, just detects and fixes those cases. Even
11085 * though the regression was for read-only snapshots, this function applies to
11086 * any snapshot/subvolume root.
11087 * This must be run before any other repair code - not doing it so, makes other
11088 * repair code delete or modify backrefs in the extent tree for example, which
11089 * will result in an inconsistent fs after repairing the root items.
11091 static int repair_root_items(struct btrfs_fs_info *info)
11093 struct btrfs_path *path = NULL;
11094 struct btrfs_key key;
11095 struct extent_buffer *leaf;
11096 struct btrfs_trans_handle *trans = NULL;
11099 int need_trans = 0;
11101 ret = build_roots_info_cache(info);
11105 path = btrfs_alloc_path();
11111 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11112 key.type = BTRFS_ROOT_ITEM_KEY;
11117 * Avoid opening and committing transactions if a leaf doesn't have
11118 * any root items that need to be fixed, so that we avoid rotating
11119 * backup roots unnecessarily.
11122 trans = btrfs_start_transaction(info->tree_root, 1);
11123 if (IS_ERR(trans)) {
11124 ret = PTR_ERR(trans);
11129 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11133 leaf = path->nodes[0];
11136 struct btrfs_key found_key;
11138 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11139 int no_more_keys = find_next_key(path, &key);
11141 btrfs_release_path(path);
11143 ret = btrfs_commit_transaction(trans,
11155 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11157 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11159 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11162 ret = maybe_repair_root_item(info, path, &found_key,
11167 if (!trans && repair) {
11170 btrfs_release_path(path);
11180 free_roots_info_cache();
11181 btrfs_free_path(path);
11183 btrfs_commit_transaction(trans, info->tree_root);
11190 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11192 struct btrfs_trans_handle *trans;
11193 struct btrfs_block_group_cache *bg_cache;
11197 /* Clear all free space cache inodes and its extent data */
11199 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11202 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11205 current = bg_cache->key.objectid + bg_cache->key.offset;
11208 /* Don't forget to set cache_generation to -1 */
11209 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11210 if (IS_ERR(trans)) {
11211 error("failed to update super block cache generation");
11212 return PTR_ERR(trans);
11214 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11215 btrfs_commit_transaction(trans, fs_info->tree_root);
11220 const char * const cmd_check_usage[] = {
11221 "btrfs check [options] <device>",
11222 "Check structural integrity of a filesystem (unmounted).",
11223 "Check structural integrity of an unmounted filesystem. Verify internal",
11224 "trees' consistency and item connectivity. In the repair mode try to",
11225 "fix the problems found. ",
11226 "WARNING: the repair mode is considered dangerous",
11228 "-s|--super <superblock> use this superblock copy",
11229 "-b|--backup use the first valid backup root copy",
11230 "--repair try to repair the filesystem",
11231 "--readonly run in read-only mode (default)",
11232 "--init-csum-tree create a new CRC tree",
11233 "--init-extent-tree create a new extent tree",
11234 "--mode <MODE> allows choice of memory/IO trade-offs",
11235 " where MODE is one of:",
11236 " original - read inodes and extents to memory (requires",
11237 " more memory, does less IO)",
11238 " lowmem - try to use less memory but read blocks again",
11240 "--check-data-csum verify checksums of data blocks",
11241 "-Q|--qgroup-report print a report on qgroup consistency",
11242 "-E|--subvol-extents <subvolid>",
11243 " print subvolume extents and sharing state",
11244 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11245 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11246 "-p|--progress indicate progress",
11247 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11248 " NOTE: v1 support implemented",
11252 int cmd_check(int argc, char **argv)
11254 struct cache_tree root_cache;
11255 struct btrfs_root *root;
11256 struct btrfs_fs_info *info;
11259 u64 tree_root_bytenr = 0;
11260 u64 chunk_root_bytenr = 0;
11261 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11264 int init_csum_tree = 0;
11266 int clear_space_cache = 0;
11267 int qgroup_report = 0;
11268 int qgroups_repaired = 0;
11269 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11273 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11274 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11275 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11276 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11277 static const struct option long_options[] = {
11278 { "super", required_argument, NULL, 's' },
11279 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11280 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11281 { "init-csum-tree", no_argument, NULL,
11282 GETOPT_VAL_INIT_CSUM },
11283 { "init-extent-tree", no_argument, NULL,
11284 GETOPT_VAL_INIT_EXTENT },
11285 { "check-data-csum", no_argument, NULL,
11286 GETOPT_VAL_CHECK_CSUM },
11287 { "backup", no_argument, NULL, 'b' },
11288 { "subvol-extents", required_argument, NULL, 'E' },
11289 { "qgroup-report", no_argument, NULL, 'Q' },
11290 { "tree-root", required_argument, NULL, 'r' },
11291 { "chunk-root", required_argument, NULL,
11292 GETOPT_VAL_CHUNK_TREE },
11293 { "progress", no_argument, NULL, 'p' },
11294 { "mode", required_argument, NULL,
11296 { "clear-space-cache", required_argument, NULL,
11297 GETOPT_VAL_CLEAR_SPACE_CACHE},
11298 { NULL, 0, NULL, 0}
11301 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11305 case 'a': /* ignored */ break;
11307 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11310 num = arg_strtou64(optarg);
11311 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11313 "super mirror should be less than %d",
11314 BTRFS_SUPER_MIRROR_MAX);
11317 bytenr = btrfs_sb_offset(((int)num));
11318 printf("using SB copy %llu, bytenr %llu\n", num,
11319 (unsigned long long)bytenr);
11325 subvolid = arg_strtou64(optarg);
11328 tree_root_bytenr = arg_strtou64(optarg);
11330 case GETOPT_VAL_CHUNK_TREE:
11331 chunk_root_bytenr = arg_strtou64(optarg);
11334 ctx.progress_enabled = true;
11338 usage(cmd_check_usage);
11339 case GETOPT_VAL_REPAIR:
11340 printf("enabling repair mode\n");
11342 ctree_flags |= OPEN_CTREE_WRITES;
11344 case GETOPT_VAL_READONLY:
11347 case GETOPT_VAL_INIT_CSUM:
11348 printf("Creating a new CRC tree\n");
11349 init_csum_tree = 1;
11351 ctree_flags |= OPEN_CTREE_WRITES;
11353 case GETOPT_VAL_INIT_EXTENT:
11354 init_extent_tree = 1;
11355 ctree_flags |= (OPEN_CTREE_WRITES |
11356 OPEN_CTREE_NO_BLOCK_GROUPS);
11359 case GETOPT_VAL_CHECK_CSUM:
11360 check_data_csum = 1;
11362 case GETOPT_VAL_MODE:
11363 check_mode = parse_check_mode(optarg);
11364 if (check_mode == CHECK_MODE_UNKNOWN) {
11365 error("unknown mode: %s", optarg);
11369 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11370 if (strcmp(optarg, "v1") != 0) {
11372 "only v1 support implmented, unrecognized value %s",
11376 clear_space_cache = 1;
11377 ctree_flags |= OPEN_CTREE_WRITES;
11382 if (check_argc_exact(argc - optind, 1))
11383 usage(cmd_check_usage);
11385 if (ctx.progress_enabled) {
11386 ctx.tp = TASK_NOTHING;
11387 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11390 /* This check is the only reason for --readonly to exist */
11391 if (readonly && repair) {
11392 error("repair options are not compatible with --readonly");
11397 * Not supported yet
11399 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11400 error("low memory mode doesn't support repair yet");
11405 cache_tree_init(&root_cache);
11407 if((ret = check_mounted(argv[optind])) < 0) {
11408 error("could not check mount status: %s", strerror(-ret));
11411 error("%s is currently mounted, aborting", argv[optind]);
11416 /* only allow partial opening under repair mode */
11418 ctree_flags |= OPEN_CTREE_PARTIAL;
11420 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11421 chunk_root_bytenr, ctree_flags);
11423 error("cannot open file system");
11428 global_info = info;
11429 root = info->fs_root;
11430 if (clear_space_cache) {
11431 if (btrfs_fs_compat_ro(info,
11432 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11434 "free space cache v2 detected, clearing not implemented");
11438 printf("Clearing free space cache\n");
11439 ret = clear_free_space_cache(info);
11441 error("failed to clear free space cache");
11444 printf("Free space cache cleared\n");
11450 * repair mode will force us to commit transaction which
11451 * will make us fail to load log tree when mounting.
11453 if (repair && btrfs_super_log_root(info->super_copy)) {
11454 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11459 ret = zero_log_tree(root);
11461 error("failed to zero log tree: %d", ret);
11466 uuid_unparse(info->super_copy->fsid, uuidbuf);
11467 if (qgroup_report) {
11468 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11470 ret = qgroup_verify_all(info);
11476 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11477 subvolid, argv[optind], uuidbuf);
11478 ret = print_extent_state(info, subvolid);
11481 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11483 if (!extent_buffer_uptodate(info->tree_root->node) ||
11484 !extent_buffer_uptodate(info->dev_root->node) ||
11485 !extent_buffer_uptodate(info->chunk_root->node)) {
11486 error("critical roots corrupted, unable to check the filesystem");
11491 if (init_extent_tree || init_csum_tree) {
11492 struct btrfs_trans_handle *trans;
11494 trans = btrfs_start_transaction(info->extent_root, 0);
11495 if (IS_ERR(trans)) {
11496 error("error starting transaction");
11497 ret = PTR_ERR(trans);
11501 if (init_extent_tree) {
11502 printf("Creating a new extent tree\n");
11503 ret = reinit_extent_tree(trans, info);
11508 if (init_csum_tree) {
11509 printf("Reinitialize checksum tree\n");
11510 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11512 error("checksum tree initialization failed: %d",
11518 ret = fill_csum_tree(trans, info->csum_root,
11521 error("checksum tree refilling failed: %d", ret);
11526 * Ok now we commit and run the normal fsck, which will add
11527 * extent entries for all of the items it finds.
11529 ret = btrfs_commit_transaction(trans, info->extent_root);
11533 if (!extent_buffer_uptodate(info->extent_root->node)) {
11534 error("critical: extent_root, unable to check the filesystem");
11538 if (!extent_buffer_uptodate(info->csum_root->node)) {
11539 error("critical: csum_root, unable to check the filesystem");
11544 if (!ctx.progress_enabled)
11545 printf("checking extents");
11546 if (check_mode == CHECK_MODE_LOWMEM)
11547 ret = check_chunks_and_extents_v2(root);
11549 ret = check_chunks_and_extents(root);
11551 printf("Errors found in extent allocation tree or chunk allocation");
11553 ret = repair_root_items(info);
11557 fprintf(stderr, "Fixed %d roots.\n", ret);
11559 } else if (ret > 0) {
11561 "Found %d roots with an outdated root item.\n",
11564 "Please run a filesystem check with the option --repair to fix them.\n");
11569 if (!ctx.progress_enabled) {
11570 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11571 fprintf(stderr, "checking free space tree\n");
11573 fprintf(stderr, "checking free space cache\n");
11575 ret = check_space_cache(root);
11580 * We used to have to have these hole extents in between our real
11581 * extents so if we don't have this flag set we need to make sure there
11582 * are no gaps in the file extents for inodes, otherwise we can just
11583 * ignore it when this happens.
11585 no_holes = btrfs_fs_incompat(root->fs_info,
11586 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11587 if (!ctx.progress_enabled)
11588 fprintf(stderr, "checking fs roots\n");
11589 ret = check_fs_roots(root, &root_cache);
11593 fprintf(stderr, "checking csums\n");
11594 ret = check_csums(root);
11598 fprintf(stderr, "checking root refs\n");
11599 ret = check_root_refs(root, &root_cache);
11603 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11604 struct extent_buffer *eb;
11606 eb = list_first_entry(&root->fs_info->recow_ebs,
11607 struct extent_buffer, recow);
11608 list_del_init(&eb->recow);
11609 ret = recow_extent_buffer(root, eb);
11614 while (!list_empty(&delete_items)) {
11615 struct bad_item *bad;
11617 bad = list_first_entry(&delete_items, struct bad_item, list);
11618 list_del_init(&bad->list);
11620 ret = delete_bad_item(root, bad);
11624 if (info->quota_enabled) {
11626 fprintf(stderr, "checking quota groups\n");
11627 err = qgroup_verify_all(info);
11631 err = repair_qgroups(info, &qgroups_repaired);
11636 if (!list_empty(&root->fs_info->recow_ebs)) {
11637 error("transid errors in file system");
11641 /* Don't override original ret */
11642 if (!ret && qgroups_repaired)
11643 ret = qgroups_repaired;
11645 if (found_old_backref) { /*
11646 * there was a disk format change when mixed
11647 * backref was in testing tree. The old format
11648 * existed about one week.
11650 printf("\n * Found old mixed backref format. "
11651 "The old format is not supported! *"
11652 "\n * Please mount the FS in readonly mode, "
11653 "backup data and re-format the FS. *\n\n");
11656 printf("found %llu bytes used err is %d\n",
11657 (unsigned long long)bytes_used, ret);
11658 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11659 printf("total tree bytes: %llu\n",
11660 (unsigned long long)total_btree_bytes);
11661 printf("total fs tree bytes: %llu\n",
11662 (unsigned long long)total_fs_tree_bytes);
11663 printf("total extent tree bytes: %llu\n",
11664 (unsigned long long)total_extent_tree_bytes);
11665 printf("btree space waste bytes: %llu\n",
11666 (unsigned long long)btree_space_waste);
11667 printf("file data blocks allocated: %llu\n referenced %llu\n",
11668 (unsigned long long)data_bytes_allocated,
11669 (unsigned long long)data_bytes_referenced);
11671 free_qgroup_counts();
11672 free_root_recs_tree(&root_cache);
11676 if (ctx.progress_enabled)
11677 task_deinit(ctx.info);