2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 return list_entry(entry, struct extent_backref, list);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
121 * Much like data_backref, just removed the undetermined members
122 * and change it to use list_head.
123 * During extent scan, it is stored in root->orphan_data_extent.
124 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126 struct orphan_data_extent {
127 struct list_head list;
135 struct tree_backref {
136 struct extent_backref node;
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 return container_of(back, struct tree_backref, node);
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
151 struct extent_record {
152 struct list_head backrefs;
153 struct list_head dups;
154 struct list_head list;
155 struct cache_extent cache;
156 struct btrfs_disk_key parent_key;
161 u64 extent_item_refs;
163 u64 parent_generation;
167 unsigned int flag_block_full_backref:2;
168 unsigned int found_rec:1;
169 unsigned int content_checked:1;
170 unsigned int owner_ref_checked:1;
171 unsigned int is_root:1;
172 unsigned int metadata:1;
173 unsigned int bad_full_backref:1;
174 unsigned int crossing_stripes:1;
175 unsigned int wrong_chunk_type:1;
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 return container_of(entry, struct extent_record, list);
183 struct inode_backref {
184 struct list_head list;
185 unsigned int found_dir_item:1;
186 unsigned int found_dir_index:1;
187 unsigned int found_inode_ref:1;
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 return list_entry(entry, struct inode_backref, list);
202 struct root_item_record {
203 struct list_head list;
210 struct btrfs_key drop_key;
213 #define REF_ERR_NO_DIR_ITEM (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX (1 << 1)
215 #define REF_ERR_NO_INODE_REF (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
218 #define REF_ERR_DUP_INODE_REF (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
227 struct file_extent_hole {
233 struct inode_record {
234 struct list_head backrefs;
235 unsigned int checked:1;
236 unsigned int merging:1;
237 unsigned int found_inode_item:1;
238 unsigned int found_dir_item:1;
239 unsigned int found_file_extent:1;
240 unsigned int found_csum_item:1;
241 unsigned int some_csum_missing:1;
242 unsigned int nodatasum:1;
255 struct rb_root holes;
256 struct list_head orphan_extents;
261 #define I_ERR_NO_INODE_ITEM (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
277 struct root_backref {
278 struct list_head list;
279 unsigned int found_dir_item:1;
280 unsigned int found_dir_index:1;
281 unsigned int found_back_ref:1;
282 unsigned int found_forward_ref:1;
283 unsigned int reachable:1;
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 return list_entry(entry, struct root_backref, list);
298 struct list_head backrefs;
299 struct cache_extent cache;
300 unsigned int found_root_item:1;
306 struct cache_extent cache;
311 struct cache_extent cache;
312 struct cache_tree root_cache;
313 struct cache_tree inode_cache;
314 struct inode_record *current;
323 struct walk_control {
324 struct cache_tree shared;
325 struct shared_node *nodes[BTRFS_MAX_LEVEL];
331 struct btrfs_key key;
333 struct list_head list;
336 struct extent_entry {
341 struct list_head list;
344 struct root_item_info {
345 /* level of the root */
347 /* number of nodes at this level, must be 1 for a root */
351 struct cache_extent cache_extent;
355 * Error bit for low memory mode check.
357 * Currently no caller cares about it yet. Just internal use for error
360 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH (1 << 8)
371 static void *print_status_check(void *p)
373 struct task_ctx *priv = p;
374 const char work_indicator[] = { '.', 'o', 'O', 'o' };
376 static char *task_position_string[] = {
378 "checking free space cache",
382 task_period_start(priv->info, 1000 /* 1s */);
384 if (priv->tp == TASK_NOTHING)
388 printf("%s [%c]\r", task_position_string[priv->tp],
389 work_indicator[count % 4]);
392 task_period_wait(priv->info);
397 static int print_status_return(void *p)
405 static enum btrfs_check_mode parse_check_mode(const char *str)
407 if (strcmp(str, "lowmem") == 0)
408 return CHECK_MODE_LOWMEM;
409 if (strcmp(str, "orig") == 0)
410 return CHECK_MODE_ORIGINAL;
411 if (strcmp(str, "original") == 0)
412 return CHECK_MODE_ORIGINAL;
414 return CHECK_MODE_UNKNOWN;
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
420 struct file_extent_hole *hole;
422 if (RB_EMPTY_ROOT(holes))
425 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 struct file_extent_hole *hole1;
432 struct file_extent_hole *hole2;
434 hole1 = rb_entry(node1, struct file_extent_hole, node);
435 hole2 = rb_entry(node2, struct file_extent_hole, node);
437 if (hole1->start > hole2->start)
439 if (hole1->start < hole2->start)
441 /* Now hole1->start == hole2->start */
442 if (hole1->len >= hole2->len)
444 * Hole 1 will be merge center
445 * Same hole will be merged later
448 /* Hole 2 will be merge center */
453 * Add a hole to the record
455 * This will do hole merge for copy_file_extent_holes(),
456 * which will ensure there won't be continuous holes.
458 static int add_file_extent_hole(struct rb_root *holes,
461 struct file_extent_hole *hole;
462 struct file_extent_hole *prev = NULL;
463 struct file_extent_hole *next = NULL;
465 hole = malloc(sizeof(*hole));
470 /* Since compare will not return 0, no -EEXIST will happen */
471 rb_insert(holes, &hole->node, compare_hole);
473 /* simple merge with previous hole */
474 if (rb_prev(&hole->node))
475 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477 if (prev && prev->start + prev->len >= hole->start) {
478 hole->len = hole->start + hole->len - prev->start;
479 hole->start = prev->start;
480 rb_erase(&prev->node, holes);
485 /* iterate merge with next holes */
487 if (!rb_next(&hole->node))
489 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491 if (hole->start + hole->len >= next->start) {
492 if (hole->start + hole->len <= next->start + next->len)
493 hole->len = next->start + next->len -
495 rb_erase(&next->node, holes);
504 static int compare_hole_range(struct rb_node *node, void *data)
506 struct file_extent_hole *hole;
509 hole = (struct file_extent_hole *)data;
512 hole = rb_entry(node, struct file_extent_hole, node);
513 if (start < hole->start)
515 if (start >= hole->start && start < hole->start + hole->len)
521 * Delete a hole in the record
523 * This will do the hole split and is much restrict than add.
525 static int del_file_extent_hole(struct rb_root *holes,
528 struct file_extent_hole *hole;
529 struct file_extent_hole tmp;
534 struct rb_node *node;
541 node = rb_search(holes, &tmp, compare_hole_range, NULL);
544 hole = rb_entry(node, struct file_extent_hole, node);
545 if (start + len > hole->start + hole->len)
549 * Now there will be no overlap, delete the hole and re-add the
550 * split(s) if they exists.
552 if (start > hole->start) {
553 prev_start = hole->start;
554 prev_len = start - hole->start;
557 if (hole->start + hole->len > start + len) {
558 next_start = start + len;
559 next_len = hole->start + hole->len - start - len;
562 rb_erase(node, holes);
565 ret = add_file_extent_hole(holes, prev_start, prev_len);
570 ret = add_file_extent_hole(holes, next_start, next_len);
577 static int copy_file_extent_holes(struct rb_root *dst,
580 struct file_extent_hole *hole;
581 struct rb_node *node;
584 node = rb_first(src);
586 hole = rb_entry(node, struct file_extent_hole, node);
587 ret = add_file_extent_hole(dst, hole->start, hole->len);
590 node = rb_next(node);
595 static void free_file_extent_holes(struct rb_root *holes)
597 struct rb_node *node;
598 struct file_extent_hole *hole;
600 node = rb_first(holes);
602 hole = rb_entry(node, struct file_extent_hole, node);
603 rb_erase(node, holes);
605 node = rb_first(holes);
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612 struct btrfs_root *root)
614 if (root->last_trans != trans->transid) {
615 root->track_dirty = 1;
616 root->last_trans = trans->transid;
617 root->commit_root = root->node;
618 extent_buffer_get(root->node);
622 static u8 imode_to_type(u32 imode)
625 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
627 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
628 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
629 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
630 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
631 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
632 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
635 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 struct device_record *rec1;
642 struct device_record *rec2;
644 rec1 = rb_entry(node1, struct device_record, node);
645 rec2 = rb_entry(node2, struct device_record, node);
646 if (rec1->devid > rec2->devid)
648 else if (rec1->devid < rec2->devid)
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 struct inode_record *rec;
657 struct inode_backref *backref;
658 struct inode_backref *orig;
659 struct inode_backref *tmp;
660 struct orphan_data_extent *src_orphan;
661 struct orphan_data_extent *dst_orphan;
666 rec = malloc(sizeof(*rec));
668 return ERR_PTR(-ENOMEM);
669 memcpy(rec, orig_rec, sizeof(*rec));
671 INIT_LIST_HEAD(&rec->backrefs);
672 INIT_LIST_HEAD(&rec->orphan_extents);
673 rec->holes = RB_ROOT;
675 list_for_each_entry(orig, &orig_rec->backrefs, list) {
676 size = sizeof(*orig) + orig->namelen + 1;
677 backref = malloc(size);
682 memcpy(backref, orig, size);
683 list_add_tail(&backref->list, &rec->backrefs);
685 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686 dst_orphan = malloc(sizeof(*dst_orphan));
691 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
701 rb = rb_first(&rec->holes);
703 struct file_extent_hole *hole;
705 hole = rb_entry(rb, struct file_extent_hole, node);
711 if (!list_empty(&rec->backrefs))
712 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713 list_del(&orig->list);
717 if (!list_empty(&rec->orphan_extents))
718 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719 list_del(&orig->list);
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
731 struct orphan_data_extent *orphan;
733 if (list_empty(orphan_extents))
735 printf("The following data extent is lost in tree %llu:\n",
737 list_for_each_entry(orphan, orphan_extents, list) {
738 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739 orphan->objectid, orphan->offset, orphan->disk_bytenr,
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 u64 root_objectid = root->root_key.objectid;
747 int errors = rec->errors;
751 /* reloc root errors, we print its corresponding fs root objectid*/
752 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753 root_objectid = root->root_key.offset;
754 fprintf(stderr, "reloc");
756 fprintf(stderr, "root %llu inode %llu errors %x",
757 (unsigned long long) root_objectid,
758 (unsigned long long) rec->ino, rec->errors);
760 if (errors & I_ERR_NO_INODE_ITEM)
761 fprintf(stderr, ", no inode item");
762 if (errors & I_ERR_NO_ORPHAN_ITEM)
763 fprintf(stderr, ", no orphan item");
764 if (errors & I_ERR_DUP_INODE_ITEM)
765 fprintf(stderr, ", dup inode item");
766 if (errors & I_ERR_DUP_DIR_INDEX)
767 fprintf(stderr, ", dup dir index");
768 if (errors & I_ERR_ODD_DIR_ITEM)
769 fprintf(stderr, ", odd dir item");
770 if (errors & I_ERR_ODD_FILE_EXTENT)
771 fprintf(stderr, ", odd file extent");
772 if (errors & I_ERR_BAD_FILE_EXTENT)
773 fprintf(stderr, ", bad file extent");
774 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775 fprintf(stderr, ", file extent overlap");
776 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777 fprintf(stderr, ", file extent discount");
778 if (errors & I_ERR_DIR_ISIZE_WRONG)
779 fprintf(stderr, ", dir isize wrong");
780 if (errors & I_ERR_FILE_NBYTES_WRONG)
781 fprintf(stderr, ", nbytes wrong");
782 if (errors & I_ERR_ODD_CSUM_ITEM)
783 fprintf(stderr, ", odd csum item");
784 if (errors & I_ERR_SOME_CSUM_MISSING)
785 fprintf(stderr, ", some csum missing");
786 if (errors & I_ERR_LINK_COUNT_WRONG)
787 fprintf(stderr, ", link count wrong");
788 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789 fprintf(stderr, ", orphan file extent");
790 fprintf(stderr, "\n");
791 /* Print the orphan extents if needed */
792 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795 /* Print the holes if needed */
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797 struct file_extent_hole *hole;
798 struct rb_node *node;
801 node = rb_first(&rec->holes);
802 fprintf(stderr, "Found file extent holes:\n");
805 hole = rb_entry(node, struct file_extent_hole, node);
806 fprintf(stderr, "\tstart: %llu, len: %llu\n",
807 hole->start, hole->len);
808 node = rb_next(node);
811 fprintf(stderr, "\tstart: 0, len: %llu\n",
812 round_up(rec->isize, root->sectorsize));
816 static void print_ref_error(int errors)
818 if (errors & REF_ERR_NO_DIR_ITEM)
819 fprintf(stderr, ", no dir item");
820 if (errors & REF_ERR_NO_DIR_INDEX)
821 fprintf(stderr, ", no dir index");
822 if (errors & REF_ERR_NO_INODE_REF)
823 fprintf(stderr, ", no inode ref");
824 if (errors & REF_ERR_DUP_DIR_ITEM)
825 fprintf(stderr, ", dup dir item");
826 if (errors & REF_ERR_DUP_DIR_INDEX)
827 fprintf(stderr, ", dup dir index");
828 if (errors & REF_ERR_DUP_INODE_REF)
829 fprintf(stderr, ", dup inode ref");
830 if (errors & REF_ERR_INDEX_UNMATCH)
831 fprintf(stderr, ", index mismatch");
832 if (errors & REF_ERR_FILETYPE_UNMATCH)
833 fprintf(stderr, ", filetype mismatch");
834 if (errors & REF_ERR_NAME_TOO_LONG)
835 fprintf(stderr, ", name too long");
836 if (errors & REF_ERR_NO_ROOT_REF)
837 fprintf(stderr, ", no root ref");
838 if (errors & REF_ERR_NO_ROOT_BACKREF)
839 fprintf(stderr, ", no root backref");
840 if (errors & REF_ERR_DUP_ROOT_REF)
841 fprintf(stderr, ", dup root ref");
842 if (errors & REF_ERR_DUP_ROOT_BACKREF)
843 fprintf(stderr, ", dup root backref");
844 fprintf(stderr, "\n");
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
850 struct ptr_node *node;
851 struct cache_extent *cache;
852 struct inode_record *rec = NULL;
855 cache = lookup_cache_extent(inode_cache, ino, 1);
857 node = container_of(cache, struct ptr_node, cache);
859 if (mod && rec->refs > 1) {
860 node->data = clone_inode_rec(rec);
861 if (IS_ERR(node->data))
867 rec = calloc(1, sizeof(*rec));
869 return ERR_PTR(-ENOMEM);
871 rec->extent_start = (u64)-1;
873 INIT_LIST_HEAD(&rec->backrefs);
874 INIT_LIST_HEAD(&rec->orphan_extents);
875 rec->holes = RB_ROOT;
877 node = malloc(sizeof(*node));
880 return ERR_PTR(-ENOMEM);
882 node->cache.start = ino;
883 node->cache.size = 1;
886 if (ino == BTRFS_FREE_INO_OBJECTID)
889 ret = insert_cache_extent(inode_cache, &node->cache);
891 return ERR_PTR(-EEXIST);
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 struct orphan_data_extent *orphan;
900 while (!list_empty(orphan_extents)) {
901 orphan = list_entry(orphan_extents->next,
902 struct orphan_data_extent, list);
903 list_del(&orphan->list);
908 static void free_inode_rec(struct inode_record *rec)
910 struct inode_backref *backref;
915 while (!list_empty(&rec->backrefs)) {
916 backref = to_inode_backref(rec->backrefs.next);
917 list_del(&backref->list);
920 free_orphan_data_extents(&rec->orphan_extents);
921 free_file_extent_holes(&rec->holes);
925 static int can_free_inode_rec(struct inode_record *rec)
927 if (!rec->errors && rec->checked && rec->found_inode_item &&
928 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934 struct inode_record *rec)
936 struct cache_extent *cache;
937 struct inode_backref *tmp, *backref;
938 struct ptr_node *node;
941 if (!rec->found_inode_item)
944 filetype = imode_to_type(rec->imode);
945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946 if (backref->found_dir_item && backref->found_dir_index) {
947 if (backref->filetype != filetype)
948 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949 if (!backref->errors && backref->found_inode_ref &&
950 rec->nlink == rec->found_link) {
951 list_del(&backref->list);
957 if (!rec->checked || rec->merging)
960 if (S_ISDIR(rec->imode)) {
961 if (rec->found_size != rec->isize)
962 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963 if (rec->found_file_extent)
964 rec->errors |= I_ERR_ODD_FILE_EXTENT;
965 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966 if (rec->found_dir_item)
967 rec->errors |= I_ERR_ODD_DIR_ITEM;
968 if (rec->found_size != rec->nbytes)
969 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970 if (rec->nlink > 0 && !no_holes &&
971 (rec->extent_end < rec->isize ||
972 first_extent_gap(&rec->holes) < rec->isize))
973 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
976 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977 if (rec->found_csum_item && rec->nodatasum)
978 rec->errors |= I_ERR_ODD_CSUM_ITEM;
979 if (rec->some_csum_missing && !rec->nodatasum)
980 rec->errors |= I_ERR_SOME_CSUM_MISSING;
983 BUG_ON(rec->refs != 1);
984 if (can_free_inode_rec(rec)) {
985 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986 node = container_of(cache, struct ptr_node, cache);
987 BUG_ON(node->data != rec);
988 remove_cache_extent(inode_cache, &node->cache);
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 struct btrfs_path path;
997 struct btrfs_key key;
1000 key.objectid = BTRFS_ORPHAN_OBJECTID;
1001 key.type = BTRFS_ORPHAN_ITEM_KEY;
1004 btrfs_init_path(&path);
1005 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006 btrfs_release_path(&path);
1012 static int process_inode_item(struct extent_buffer *eb,
1013 int slot, struct btrfs_key *key,
1014 struct shared_node *active_node)
1016 struct inode_record *rec;
1017 struct btrfs_inode_item *item;
1019 rec = active_node->current;
1020 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021 if (rec->found_inode_item) {
1022 rec->errors |= I_ERR_DUP_INODE_ITEM;
1025 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026 rec->nlink = btrfs_inode_nlink(eb, item);
1027 rec->isize = btrfs_inode_size(eb, item);
1028 rec->nbytes = btrfs_inode_nbytes(eb, item);
1029 rec->imode = btrfs_inode_mode(eb, item);
1030 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032 rec->found_inode_item = 1;
1033 if (rec->nlink == 0)
1034 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035 maybe_free_inode_rec(&active_node->inode_cache, rec);
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041 int namelen, u64 dir)
1043 struct inode_backref *backref;
1045 list_for_each_entry(backref, &rec->backrefs, list) {
1046 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048 if (backref->dir != dir || backref->namelen != namelen)
1050 if (memcmp(name, backref->name, namelen))
1055 backref = malloc(sizeof(*backref) + namelen + 1);
1058 memset(backref, 0, sizeof(*backref));
1060 backref->namelen = namelen;
1061 memcpy(backref->name, name, namelen);
1062 backref->name[namelen] = '\0';
1063 list_add_tail(&backref->list, &rec->backrefs);
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068 u64 ino, u64 dir, u64 index,
1069 const char *name, int namelen,
1070 u8 filetype, u8 itemtype, int errors)
1072 struct inode_record *rec;
1073 struct inode_backref *backref;
1075 rec = get_inode_rec(inode_cache, ino, 1);
1076 BUG_ON(IS_ERR(rec));
1077 backref = get_inode_backref(rec, name, namelen, dir);
1080 backref->errors |= errors;
1081 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082 if (backref->found_dir_index)
1083 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084 if (backref->found_inode_ref && backref->index != index)
1085 backref->errors |= REF_ERR_INDEX_UNMATCH;
1086 if (backref->found_dir_item && backref->filetype != filetype)
1087 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089 backref->index = index;
1090 backref->filetype = filetype;
1091 backref->found_dir_index = 1;
1092 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094 if (backref->found_dir_item)
1095 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096 if (backref->found_dir_index && backref->filetype != filetype)
1097 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099 backref->filetype = filetype;
1100 backref->found_dir_item = 1;
1101 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103 if (backref->found_inode_ref)
1104 backref->errors |= REF_ERR_DUP_INODE_REF;
1105 if (backref->found_dir_index && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1108 backref->index = index;
1110 backref->ref_type = itemtype;
1111 backref->found_inode_ref = 1;
1116 maybe_free_inode_rec(inode_cache, rec);
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121 struct cache_tree *dst_cache)
1123 struct inode_backref *backref;
1128 list_for_each_entry(backref, &src->backrefs, list) {
1129 if (backref->found_dir_index) {
1130 add_inode_backref(dst_cache, dst->ino, backref->dir,
1131 backref->index, backref->name,
1132 backref->namelen, backref->filetype,
1133 BTRFS_DIR_INDEX_KEY, backref->errors);
1135 if (backref->found_dir_item) {
1137 add_inode_backref(dst_cache, dst->ino,
1138 backref->dir, 0, backref->name,
1139 backref->namelen, backref->filetype,
1140 BTRFS_DIR_ITEM_KEY, backref->errors);
1142 if (backref->found_inode_ref) {
1143 add_inode_backref(dst_cache, dst->ino,
1144 backref->dir, backref->index,
1145 backref->name, backref->namelen, 0,
1146 backref->ref_type, backref->errors);
1150 if (src->found_dir_item)
1151 dst->found_dir_item = 1;
1152 if (src->found_file_extent)
1153 dst->found_file_extent = 1;
1154 if (src->found_csum_item)
1155 dst->found_csum_item = 1;
1156 if (src->some_csum_missing)
1157 dst->some_csum_missing = 1;
1158 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1164 BUG_ON(src->found_link < dir_count);
1165 dst->found_link += src->found_link - dir_count;
1166 dst->found_size += src->found_size;
1167 if (src->extent_start != (u64)-1) {
1168 if (dst->extent_start == (u64)-1) {
1169 dst->extent_start = src->extent_start;
1170 dst->extent_end = src->extent_end;
1172 if (dst->extent_end > src->extent_start)
1173 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174 else if (dst->extent_end < src->extent_start) {
1175 ret = add_file_extent_hole(&dst->holes,
1177 src->extent_start - dst->extent_end);
1179 if (dst->extent_end < src->extent_end)
1180 dst->extent_end = src->extent_end;
1184 dst->errors |= src->errors;
1185 if (src->found_inode_item) {
1186 if (!dst->found_inode_item) {
1187 dst->nlink = src->nlink;
1188 dst->isize = src->isize;
1189 dst->nbytes = src->nbytes;
1190 dst->imode = src->imode;
1191 dst->nodatasum = src->nodatasum;
1192 dst->found_inode_item = 1;
1194 dst->errors |= I_ERR_DUP_INODE_ITEM;
1202 static int splice_shared_node(struct shared_node *src_node,
1203 struct shared_node *dst_node)
1205 struct cache_extent *cache;
1206 struct ptr_node *node, *ins;
1207 struct cache_tree *src, *dst;
1208 struct inode_record *rec, *conflict;
1209 u64 current_ino = 0;
1213 if (--src_node->refs == 0)
1215 if (src_node->current)
1216 current_ino = src_node->current->ino;
1218 src = &src_node->root_cache;
1219 dst = &dst_node->root_cache;
1221 cache = search_cache_extent(src, 0);
1223 node = container_of(cache, struct ptr_node, cache);
1225 cache = next_cache_extent(cache);
1228 remove_cache_extent(src, &node->cache);
1231 ins = malloc(sizeof(*ins));
1233 ins->cache.start = node->cache.start;
1234 ins->cache.size = node->cache.size;
1238 ret = insert_cache_extent(dst, &ins->cache);
1239 if (ret == -EEXIST) {
1240 conflict = get_inode_rec(dst, rec->ino, 1);
1241 BUG_ON(IS_ERR(conflict));
1242 merge_inode_recs(rec, conflict, dst);
1244 conflict->checked = 1;
1245 if (dst_node->current == conflict)
1246 dst_node->current = NULL;
1248 maybe_free_inode_rec(dst, conflict);
1249 free_inode_rec(rec);
1256 if (src == &src_node->root_cache) {
1257 src = &src_node->inode_cache;
1258 dst = &dst_node->inode_cache;
1262 if (current_ino > 0 && (!dst_node->current ||
1263 current_ino > dst_node->current->ino)) {
1264 if (dst_node->current) {
1265 dst_node->current->checked = 1;
1266 maybe_free_inode_rec(dst, dst_node->current);
1268 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269 BUG_ON(IS_ERR(dst_node->current));
1274 static void free_inode_ptr(struct cache_extent *cache)
1276 struct ptr_node *node;
1277 struct inode_record *rec;
1279 node = container_of(cache, struct ptr_node, cache);
1281 free_inode_rec(rec);
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1290 struct cache_extent *cache;
1291 struct shared_node *node;
1293 cache = lookup_cache_extent(shared, bytenr, 1);
1295 node = container_of(cache, struct shared_node, cache);
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1304 struct shared_node *node;
1306 node = calloc(1, sizeof(*node));
1309 node->cache.start = bytenr;
1310 node->cache.size = 1;
1311 cache_tree_init(&node->root_cache);
1312 cache_tree_init(&node->inode_cache);
1315 ret = insert_cache_extent(shared, &node->cache);
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321 struct walk_control *wc, int level)
1323 struct shared_node *node;
1324 struct shared_node *dest;
1327 if (level == wc->active_node)
1330 BUG_ON(wc->active_node <= level);
1331 node = find_shared_node(&wc->shared, bytenr);
1333 ret = add_shared_node(&wc->shared, bytenr, refs);
1335 node = find_shared_node(&wc->shared, bytenr);
1336 wc->nodes[level] = node;
1337 wc->active_node = level;
1341 if (wc->root_level == wc->active_node &&
1342 btrfs_root_refs(&root->root_item) == 0) {
1343 if (--node->refs == 0) {
1344 free_inode_recs_tree(&node->root_cache);
1345 free_inode_recs_tree(&node->inode_cache);
1346 remove_cache_extent(&wc->shared, &node->cache);
1352 dest = wc->nodes[wc->active_node];
1353 splice_shared_node(node, dest);
1354 if (node->refs == 0) {
1355 remove_cache_extent(&wc->shared, &node->cache);
1361 static int leave_shared_node(struct btrfs_root *root,
1362 struct walk_control *wc, int level)
1364 struct shared_node *node;
1365 struct shared_node *dest;
1368 if (level == wc->root_level)
1371 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1375 BUG_ON(i >= BTRFS_MAX_LEVEL);
1377 node = wc->nodes[wc->active_node];
1378 wc->nodes[wc->active_node] = NULL;
1379 wc->active_node = i;
1381 dest = wc->nodes[wc->active_node];
1382 if (wc->active_node < wc->root_level ||
1383 btrfs_root_refs(&root->root_item) > 0) {
1384 BUG_ON(node->refs <= 1);
1385 splice_shared_node(node, dest);
1387 BUG_ON(node->refs < 2);
1396 * 1 - if the root with id child_root_id is a child of root parent_root_id
1397 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1398 * has other root(s) as parent(s)
1399 * 2 - if the root child_root_id doesn't have any parent roots
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1404 struct btrfs_path path;
1405 struct btrfs_key key;
1406 struct extent_buffer *leaf;
1410 btrfs_init_path(&path);
1412 key.objectid = parent_root_id;
1413 key.type = BTRFS_ROOT_REF_KEY;
1414 key.offset = child_root_id;
1415 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1419 btrfs_release_path(&path);
1423 key.objectid = child_root_id;
1424 key.type = BTRFS_ROOT_BACKREF_KEY;
1426 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1432 leaf = path.nodes[0];
1433 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1437 leaf = path.nodes[0];
1440 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441 if (key.objectid != child_root_id ||
1442 key.type != BTRFS_ROOT_BACKREF_KEY)
1447 if (key.offset == parent_root_id) {
1448 btrfs_release_path(&path);
1455 btrfs_release_path(&path);
1458 return has_parent ? 0 : 2;
1461 static int process_dir_item(struct btrfs_root *root,
1462 struct extent_buffer *eb,
1463 int slot, struct btrfs_key *key,
1464 struct shared_node *active_node)
1474 struct btrfs_dir_item *di;
1475 struct inode_record *rec;
1476 struct cache_tree *root_cache;
1477 struct cache_tree *inode_cache;
1478 struct btrfs_key location;
1479 char namebuf[BTRFS_NAME_LEN];
1481 root_cache = &active_node->root_cache;
1482 inode_cache = &active_node->inode_cache;
1483 rec = active_node->current;
1484 rec->found_dir_item = 1;
1486 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487 total = btrfs_item_size_nr(eb, slot);
1488 while (cur < total) {
1490 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491 name_len = btrfs_dir_name_len(eb, di);
1492 data_len = btrfs_dir_data_len(eb, di);
1493 filetype = btrfs_dir_type(eb, di);
1495 rec->found_size += name_len;
1496 if (name_len <= BTRFS_NAME_LEN) {
1500 len = BTRFS_NAME_LEN;
1501 error = REF_ERR_NAME_TOO_LONG;
1503 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506 add_inode_backref(inode_cache, location.objectid,
1507 key->objectid, key->offset, namebuf,
1508 len, filetype, key->type, error);
1509 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510 add_inode_backref(root_cache, location.objectid,
1511 key->objectid, key->offset,
1512 namebuf, len, filetype,
1515 fprintf(stderr, "invalid location in dir item %u\n",
1517 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518 key->objectid, key->offset, namebuf,
1519 len, filetype, key->type, error);
1522 len = sizeof(*di) + name_len + data_len;
1523 di = (struct btrfs_dir_item *)((char *)di + len);
1526 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527 rec->errors |= I_ERR_DUP_DIR_INDEX;
1532 static int process_inode_ref(struct extent_buffer *eb,
1533 int slot, struct btrfs_key *key,
1534 struct shared_node *active_node)
1542 struct cache_tree *inode_cache;
1543 struct btrfs_inode_ref *ref;
1544 char namebuf[BTRFS_NAME_LEN];
1546 inode_cache = &active_node->inode_cache;
1548 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549 total = btrfs_item_size_nr(eb, slot);
1550 while (cur < total) {
1551 name_len = btrfs_inode_ref_name_len(eb, ref);
1552 index = btrfs_inode_ref_index(eb, ref);
1553 if (name_len <= BTRFS_NAME_LEN) {
1557 len = BTRFS_NAME_LEN;
1558 error = REF_ERR_NAME_TOO_LONG;
1560 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561 add_inode_backref(inode_cache, key->objectid, key->offset,
1562 index, namebuf, len, 0, key->type, error);
1564 len = sizeof(*ref) + name_len;
1565 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1571 static int process_inode_extref(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1582 struct cache_tree *inode_cache;
1583 struct btrfs_inode_extref *extref;
1584 char namebuf[BTRFS_NAME_LEN];
1586 inode_cache = &active_node->inode_cache;
1588 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589 total = btrfs_item_size_nr(eb, slot);
1590 while (cur < total) {
1591 name_len = btrfs_inode_extref_name_len(eb, extref);
1592 index = btrfs_inode_extref_index(eb, extref);
1593 parent = btrfs_inode_extref_parent(eb, extref);
1594 if (name_len <= BTRFS_NAME_LEN) {
1598 len = BTRFS_NAME_LEN;
1599 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf,
1602 (unsigned long)(extref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, parent,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*extref) + name_len;
1607 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615 u64 len, u64 *found)
1617 struct btrfs_key key;
1618 struct btrfs_path path;
1619 struct extent_buffer *leaf;
1624 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626 btrfs_init_path(&path);
1628 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630 key.type = BTRFS_EXTENT_CSUM_KEY;
1632 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1636 if (ret > 0 && path.slots[0] > 0) {
1637 leaf = path.nodes[0];
1638 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640 key.type == BTRFS_EXTENT_CSUM_KEY)
1645 leaf = path.nodes[0];
1646 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1652 leaf = path.nodes[0];
1655 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657 key.type != BTRFS_EXTENT_CSUM_KEY)
1660 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661 if (key.offset >= start + len)
1664 if (key.offset > start)
1667 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669 if (csum_end > start) {
1670 size = min(csum_end - start, len);
1679 btrfs_release_path(&path);
1685 static int process_file_extent(struct btrfs_root *root,
1686 struct extent_buffer *eb,
1687 int slot, struct btrfs_key *key,
1688 struct shared_node *active_node)
1690 struct inode_record *rec;
1691 struct btrfs_file_extent_item *fi;
1693 u64 disk_bytenr = 0;
1694 u64 extent_offset = 0;
1695 u64 mask = root->sectorsize - 1;
1699 rec = active_node->current;
1700 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701 rec->found_file_extent = 1;
1703 if (rec->extent_start == (u64)-1) {
1704 rec->extent_start = key->offset;
1705 rec->extent_end = key->offset;
1708 if (rec->extent_end > key->offset)
1709 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710 else if (rec->extent_end < key->offset) {
1711 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712 key->offset - rec->extent_end);
1717 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718 extent_type = btrfs_file_extent_type(eb, fi);
1720 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724 rec->found_size += num_bytes;
1725 num_bytes = (num_bytes + mask) & ~mask;
1726 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730 extent_offset = btrfs_file_extent_offset(eb, fi);
1731 if (num_bytes == 0 || (num_bytes & mask))
1732 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733 if (num_bytes + extent_offset >
1734 btrfs_file_extent_ram_bytes(eb, fi))
1735 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737 (btrfs_file_extent_compression(eb, fi) ||
1738 btrfs_file_extent_encryption(eb, fi) ||
1739 btrfs_file_extent_other_encoding(eb, fi)))
1740 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741 if (disk_bytenr > 0)
1742 rec->found_size += num_bytes;
1744 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746 rec->extent_end = key->offset + num_bytes;
1749 * The data reloc tree will copy full extents into its inode and then
1750 * copy the corresponding csums. Because the extent it copied could be
1751 * a preallocated extent that hasn't been written to yet there may be no
1752 * csums to copy, ergo we won't have csums for our file extent. This is
1753 * ok so just don't bother checking csums if the inode belongs to the
1756 if (disk_bytenr > 0 &&
1757 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759 if (btrfs_file_extent_compression(eb, fi))
1760 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762 disk_bytenr += extent_offset;
1764 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1767 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769 rec->found_csum_item = 1;
1770 if (found < num_bytes)
1771 rec->some_csum_missing = 1;
1772 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781 struct walk_control *wc)
1783 struct btrfs_key key;
1787 struct cache_tree *inode_cache;
1788 struct shared_node *active_node;
1790 if (wc->root_level == wc->active_node &&
1791 btrfs_root_refs(&root->root_item) == 0)
1794 active_node = wc->nodes[wc->active_node];
1795 inode_cache = &active_node->inode_cache;
1796 nritems = btrfs_header_nritems(eb);
1797 for (i = 0; i < nritems; i++) {
1798 btrfs_item_key_to_cpu(eb, &key, i);
1800 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1805 if (active_node->current == NULL ||
1806 active_node->current->ino < key.objectid) {
1807 if (active_node->current) {
1808 active_node->current->checked = 1;
1809 maybe_free_inode_rec(inode_cache,
1810 active_node->current);
1812 active_node->current = get_inode_rec(inode_cache,
1814 BUG_ON(IS_ERR(active_node->current));
1817 case BTRFS_DIR_ITEM_KEY:
1818 case BTRFS_DIR_INDEX_KEY:
1819 ret = process_dir_item(root, eb, i, &key, active_node);
1821 case BTRFS_INODE_REF_KEY:
1822 ret = process_inode_ref(eb, i, &key, active_node);
1824 case BTRFS_INODE_EXTREF_KEY:
1825 ret = process_inode_extref(eb, i, &key, active_node);
1827 case BTRFS_INODE_ITEM_KEY:
1828 ret = process_inode_item(eb, i, &key, active_node);
1830 case BTRFS_EXTENT_DATA_KEY:
1831 ret = process_file_extent(root, eb, i, &key,
1841 static void reada_walk_down(struct btrfs_root *root,
1842 struct extent_buffer *node, int slot)
1851 level = btrfs_header_level(node);
1855 nritems = btrfs_header_nritems(node);
1856 blocksize = root->nodesize;
1857 for (i = slot; i < nritems; i++) {
1858 bytenr = btrfs_node_blockptr(node, i);
1859 ptr_gen = btrfs_node_ptr_generation(node, i);
1860 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1865 * Check the child node/leaf by the following condition:
1866 * 1. the first item key of the node/leaf should be the same with the one
1868 * 2. block in parent node should match the child node/leaf.
1869 * 3. generation of parent node and child's header should be consistent.
1871 * Or the child node/leaf pointed by the key in parent is not valid.
1873 * We hope to check leaf owner too, but since subvol may share leaves,
1874 * which makes leaf owner check not so strong, key check should be
1875 * sufficient enough for that case.
1877 static int check_child_node(struct btrfs_root *root,
1878 struct extent_buffer *parent, int slot,
1879 struct extent_buffer *child)
1881 struct btrfs_key parent_key;
1882 struct btrfs_key child_key;
1885 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886 if (btrfs_header_level(child) == 0)
1887 btrfs_item_key_to_cpu(child, &child_key, 0);
1889 btrfs_node_key_to_cpu(child, &child_key, 0);
1891 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1894 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895 parent_key.objectid, parent_key.type, parent_key.offset,
1896 child_key.objectid, child_key.type, child_key.offset);
1898 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901 btrfs_node_blockptr(parent, slot),
1902 btrfs_header_bytenr(child));
1904 if (btrfs_node_ptr_generation(parent, slot) !=
1905 btrfs_header_generation(child)) {
1907 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908 btrfs_header_generation(child),
1909 btrfs_node_ptr_generation(parent, slot));
1915 u64 bytenr[BTRFS_MAX_LEVEL];
1916 u64 refs[BTRFS_MAX_LEVEL];
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920 struct walk_control *wc, int *level,
1921 struct node_refs *nrefs)
1923 enum btrfs_tree_block_status status;
1926 struct extent_buffer *next;
1927 struct extent_buffer *cur;
1932 WARN_ON(*level < 0);
1933 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936 refs = nrefs->refs[*level];
1939 ret = btrfs_lookup_extent_info(NULL, root,
1940 path->nodes[*level]->start,
1941 *level, 1, &refs, NULL);
1946 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947 nrefs->refs[*level] = refs;
1951 ret = enter_shared_node(root, path->nodes[*level]->start,
1959 while (*level >= 0) {
1960 WARN_ON(*level < 0);
1961 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962 cur = path->nodes[*level];
1964 if (btrfs_header_level(cur) != *level)
1967 if (path->slots[*level] >= btrfs_header_nritems(cur))
1970 ret = process_one_leaf(root, cur, wc);
1975 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977 blocksize = root->nodesize;
1979 if (bytenr == nrefs->bytenr[*level - 1]) {
1980 refs = nrefs->refs[*level - 1];
1982 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983 *level - 1, 1, &refs, NULL);
1987 nrefs->bytenr[*level - 1] = bytenr;
1988 nrefs->refs[*level - 1] = refs;
1993 ret = enter_shared_node(root, bytenr, refs,
1996 path->slots[*level]++;
2001 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003 free_extent_buffer(next);
2004 reada_walk_down(root, cur, path->slots[*level]);
2005 next = read_tree_block(root, bytenr, blocksize,
2007 if (!extent_buffer_uptodate(next)) {
2008 struct btrfs_key node_key;
2010 btrfs_node_key_to_cpu(path->nodes[*level],
2012 path->slots[*level]);
2013 btrfs_add_corrupt_extent_record(root->fs_info,
2015 path->nodes[*level]->start,
2016 root->nodesize, *level);
2022 ret = check_child_node(root, cur, path->slots[*level], next);
2028 if (btrfs_is_leaf(next))
2029 status = btrfs_check_leaf(root, NULL, next);
2031 status = btrfs_check_node(root, NULL, next);
2032 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033 free_extent_buffer(next);
2038 *level = *level - 1;
2039 free_extent_buffer(path->nodes[*level]);
2040 path->nodes[*level] = next;
2041 path->slots[*level] = 0;
2044 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049 struct walk_control *wc, int *level)
2052 struct extent_buffer *leaf;
2054 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055 leaf = path->nodes[i];
2056 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2061 free_extent_buffer(path->nodes[*level]);
2062 path->nodes[*level] = NULL;
2063 BUG_ON(*level > wc->active_node);
2064 if (*level == wc->active_node)
2065 leave_shared_node(root, wc, *level);
2072 static int check_root_dir(struct inode_record *rec)
2074 struct inode_backref *backref;
2077 if (!rec->found_inode_item || rec->errors)
2079 if (rec->nlink != 1 || rec->found_link != 0)
2081 if (list_empty(&rec->backrefs))
2083 backref = to_inode_backref(rec->backrefs.next);
2084 if (!backref->found_inode_ref)
2086 if (backref->index != 0 || backref->namelen != 2 ||
2087 memcmp(backref->name, "..", 2))
2089 if (backref->found_dir_index || backref->found_dir_item)
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097 struct btrfs_root *root, struct btrfs_path *path,
2098 struct inode_record *rec)
2100 struct btrfs_inode_item *ei;
2101 struct btrfs_key key;
2104 key.objectid = rec->ino;
2105 key.type = BTRFS_INODE_ITEM_KEY;
2106 key.offset = (u64)-1;
2108 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2112 if (!path->slots[0]) {
2119 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120 if (key.objectid != rec->ino) {
2125 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126 struct btrfs_inode_item);
2127 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128 btrfs_mark_buffer_dirty(path->nodes[0]);
2129 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131 root->root_key.objectid);
2133 btrfs_release_path(path);
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 struct btrfs_path *path,
2140 struct inode_record *rec)
2144 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145 btrfs_release_path(path);
2147 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152 struct btrfs_root *root,
2153 struct btrfs_path *path,
2154 struct inode_record *rec)
2156 struct btrfs_inode_item *ei;
2157 struct btrfs_key key;
2160 key.objectid = rec->ino;
2161 key.type = BTRFS_INODE_ITEM_KEY;
2164 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2171 /* Since ret == 0, no need to check anything */
2172 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173 struct btrfs_inode_item);
2174 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175 btrfs_mark_buffer_dirty(path->nodes[0]);
2176 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177 printf("reset nbytes for ino %llu root %llu\n",
2178 rec->ino, root->root_key.objectid);
2180 btrfs_release_path(path);
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185 struct cache_tree *inode_cache,
2186 struct inode_record *rec,
2187 struct inode_backref *backref)
2189 struct btrfs_path path;
2190 struct btrfs_trans_handle *trans;
2191 struct btrfs_dir_item *dir_item;
2192 struct extent_buffer *leaf;
2193 struct btrfs_key key;
2194 struct btrfs_disk_key disk_key;
2195 struct inode_record *dir_rec;
2196 unsigned long name_ptr;
2197 u32 data_size = sizeof(*dir_item) + backref->namelen;
2200 trans = btrfs_start_transaction(root, 1);
2202 return PTR_ERR(trans);
2204 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205 (unsigned long long)rec->ino);
2207 btrfs_init_path(&path);
2208 key.objectid = backref->dir;
2209 key.type = BTRFS_DIR_INDEX_KEY;
2210 key.offset = backref->index;
2211 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2214 leaf = path.nodes[0];
2215 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217 disk_key.objectid = cpu_to_le64(rec->ino);
2218 disk_key.type = BTRFS_INODE_ITEM_KEY;
2219 disk_key.offset = 0;
2221 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223 btrfs_set_dir_data_len(leaf, dir_item, 0);
2224 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225 name_ptr = (unsigned long)(dir_item + 1);
2226 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227 btrfs_mark_buffer_dirty(leaf);
2228 btrfs_release_path(&path);
2229 btrfs_commit_transaction(trans, root);
2231 backref->found_dir_index = 1;
2232 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233 BUG_ON(IS_ERR(dir_rec));
2236 dir_rec->found_size += backref->namelen;
2237 if (dir_rec->found_size == dir_rec->isize &&
2238 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240 if (dir_rec->found_size != dir_rec->isize)
2241 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2246 static int delete_dir_index(struct btrfs_root *root,
2247 struct cache_tree *inode_cache,
2248 struct inode_record *rec,
2249 struct inode_backref *backref)
2251 struct btrfs_trans_handle *trans;
2252 struct btrfs_dir_item *di;
2253 struct btrfs_path *path;
2256 path = btrfs_alloc_path();
2260 trans = btrfs_start_transaction(root, 1);
2261 if (IS_ERR(trans)) {
2262 btrfs_free_path(path);
2263 return PTR_ERR(trans);
2267 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2268 (unsigned long long)backref->dir,
2269 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2270 (unsigned long long)root->objectid);
2272 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2273 backref->name, backref->namelen,
2274 backref->index, -1);
2277 btrfs_free_path(path);
2278 btrfs_commit_transaction(trans, root);
2285 ret = btrfs_del_item(trans, root, path);
2287 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2289 btrfs_free_path(path);
2290 btrfs_commit_transaction(trans, root);
2294 static int create_inode_item(struct btrfs_root *root,
2295 struct inode_record *rec,
2296 struct inode_backref *backref, int root_dir)
2298 struct btrfs_trans_handle *trans;
2299 struct btrfs_inode_item inode_item;
2300 time_t now = time(NULL);
2303 trans = btrfs_start_transaction(root, 1);
2304 if (IS_ERR(trans)) {
2305 ret = PTR_ERR(trans);
2309 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2310 "be incomplete, please check permissions and content after "
2311 "the fsck completes.\n", (unsigned long long)root->objectid,
2312 (unsigned long long)rec->ino);
2314 memset(&inode_item, 0, sizeof(inode_item));
2315 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2317 btrfs_set_stack_inode_nlink(&inode_item, 1);
2319 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2320 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2321 if (rec->found_dir_item) {
2322 if (rec->found_file_extent)
2323 fprintf(stderr, "root %llu inode %llu has both a dir "
2324 "item and extents, unsure if it is a dir or a "
2325 "regular file so setting it as a directory\n",
2326 (unsigned long long)root->objectid,
2327 (unsigned long long)rec->ino);
2328 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2329 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2330 } else if (!rec->found_dir_item) {
2331 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2332 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2334 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2335 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2336 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2337 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2338 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2339 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2340 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2341 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2343 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2345 btrfs_commit_transaction(trans, root);
2349 static int repair_inode_backrefs(struct btrfs_root *root,
2350 struct inode_record *rec,
2351 struct cache_tree *inode_cache,
2354 struct inode_backref *tmp, *backref;
2355 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2359 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2360 if (!delete && rec->ino == root_dirid) {
2361 if (!rec->found_inode_item) {
2362 ret = create_inode_item(root, rec, backref, 1);
2369 /* Index 0 for root dir's are special, don't mess with it */
2370 if (rec->ino == root_dirid && backref->index == 0)
2374 ((backref->found_dir_index && !backref->found_inode_ref) ||
2375 (backref->found_dir_index && backref->found_inode_ref &&
2376 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2377 ret = delete_dir_index(root, inode_cache, rec, backref);
2381 list_del(&backref->list);
2385 if (!delete && !backref->found_dir_index &&
2386 backref->found_dir_item && backref->found_inode_ref) {
2387 ret = add_missing_dir_index(root, inode_cache, rec,
2392 if (backref->found_dir_item &&
2393 backref->found_dir_index &&
2394 backref->found_dir_index) {
2395 if (!backref->errors &&
2396 backref->found_inode_ref) {
2397 list_del(&backref->list);
2403 if (!delete && (!backref->found_dir_index &&
2404 !backref->found_dir_item &&
2405 backref->found_inode_ref)) {
2406 struct btrfs_trans_handle *trans;
2407 struct btrfs_key location;
2409 ret = check_dir_conflict(root, backref->name,
2415 * let nlink fixing routine to handle it,
2416 * which can do it better.
2421 location.objectid = rec->ino;
2422 location.type = BTRFS_INODE_ITEM_KEY;
2423 location.offset = 0;
2425 trans = btrfs_start_transaction(root, 1);
2426 if (IS_ERR(trans)) {
2427 ret = PTR_ERR(trans);
2430 fprintf(stderr, "adding missing dir index/item pair "
2432 (unsigned long long)rec->ino);
2433 ret = btrfs_insert_dir_item(trans, root, backref->name,
2435 backref->dir, &location,
2436 imode_to_type(rec->imode),
2439 btrfs_commit_transaction(trans, root);
2443 if (!delete && (backref->found_inode_ref &&
2444 backref->found_dir_index &&
2445 backref->found_dir_item &&
2446 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2447 !rec->found_inode_item)) {
2448 ret = create_inode_item(root, rec, backref, 0);
2455 return ret ? ret : repaired;
2459 * To determine the file type for nlink/inode_item repair
2461 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2462 * Return -ENOENT if file type is not found.
2464 static int find_file_type(struct inode_record *rec, u8 *type)
2466 struct inode_backref *backref;
2468 /* For inode item recovered case */
2469 if (rec->found_inode_item) {
2470 *type = imode_to_type(rec->imode);
2474 list_for_each_entry(backref, &rec->backrefs, list) {
2475 if (backref->found_dir_index || backref->found_dir_item) {
2476 *type = backref->filetype;
2484 * To determine the file name for nlink repair
2486 * Return 0 if file name is found, set name and namelen.
2487 * Return -ENOENT if file name is not found.
2489 static int find_file_name(struct inode_record *rec,
2490 char *name, int *namelen)
2492 struct inode_backref *backref;
2494 list_for_each_entry(backref, &rec->backrefs, list) {
2495 if (backref->found_dir_index || backref->found_dir_item ||
2496 backref->found_inode_ref) {
2497 memcpy(name, backref->name, backref->namelen);
2498 *namelen = backref->namelen;
2505 /* Reset the nlink of the inode to the correct one */
2506 static int reset_nlink(struct btrfs_trans_handle *trans,
2507 struct btrfs_root *root,
2508 struct btrfs_path *path,
2509 struct inode_record *rec)
2511 struct inode_backref *backref;
2512 struct inode_backref *tmp;
2513 struct btrfs_key key;
2514 struct btrfs_inode_item *inode_item;
2517 /* We don't believe this either, reset it and iterate backref */
2518 rec->found_link = 0;
2520 /* Remove all backref including the valid ones */
2521 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2522 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2523 backref->index, backref->name,
2524 backref->namelen, 0);
2528 /* remove invalid backref, so it won't be added back */
2529 if (!(backref->found_dir_index &&
2530 backref->found_dir_item &&
2531 backref->found_inode_ref)) {
2532 list_del(&backref->list);
2539 /* Set nlink to 0 */
2540 key.objectid = rec->ino;
2541 key.type = BTRFS_INODE_ITEM_KEY;
2543 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2550 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2551 struct btrfs_inode_item);
2552 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2553 btrfs_mark_buffer_dirty(path->nodes[0]);
2554 btrfs_release_path(path);
2557 * Add back valid inode_ref/dir_item/dir_index,
2558 * add_link() will handle the nlink inc, so new nlink must be correct
2560 list_for_each_entry(backref, &rec->backrefs, list) {
2561 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2562 backref->name, backref->namelen,
2563 backref->filetype, &backref->index, 1);
2568 btrfs_release_path(path);
2572 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2573 struct btrfs_root *root,
2574 struct btrfs_path *path,
2575 struct inode_record *rec)
2577 char *dir_name = "lost+found";
2578 char namebuf[BTRFS_NAME_LEN] = {0};
2583 int name_recovered = 0;
2584 int type_recovered = 0;
2588 * Get file name and type first before these invalid inode ref
2589 * are deleted by remove_all_invalid_backref()
2591 name_recovered = !find_file_name(rec, namebuf, &namelen);
2592 type_recovered = !find_file_type(rec, &type);
2594 if (!name_recovered) {
2595 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2596 rec->ino, rec->ino);
2597 namelen = count_digits(rec->ino);
2598 sprintf(namebuf, "%llu", rec->ino);
2601 if (!type_recovered) {
2602 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2604 type = BTRFS_FT_REG_FILE;
2608 ret = reset_nlink(trans, root, path, rec);
2611 "Failed to reset nlink for inode %llu: %s\n",
2612 rec->ino, strerror(-ret));
2616 if (rec->found_link == 0) {
2617 lost_found_ino = root->highest_inode;
2618 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2623 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2624 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2627 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2628 dir_name, strerror(-ret));
2631 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2632 namebuf, namelen, type, NULL, 1);
2634 * Add ".INO" suffix several times to handle case where
2635 * "FILENAME.INO" is already taken by another file.
2637 while (ret == -EEXIST) {
2639 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2641 if (namelen + count_digits(rec->ino) + 1 >
2646 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2648 namelen += count_digits(rec->ino) + 1;
2649 ret = btrfs_add_link(trans, root, rec->ino,
2650 lost_found_ino, namebuf,
2651 namelen, type, NULL, 1);
2655 "Failed to link the inode %llu to %s dir: %s\n",
2656 rec->ino, dir_name, strerror(-ret));
2660 * Just increase the found_link, don't actually add the
2661 * backref. This will make things easier and this inode
2662 * record will be freed after the repair is done.
2663 * So fsck will not report problem about this inode.
2666 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2667 namelen, namebuf, dir_name);
2669 printf("Fixed the nlink of inode %llu\n", rec->ino);
2672 * Clear the flag anyway, or we will loop forever for the same inode
2673 * as it will not be removed from the bad inode list and the dead loop
2676 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2677 btrfs_release_path(path);
2682 * Check if there is any normal(reg or prealloc) file extent for given
2684 * This is used to determine the file type when neither its dir_index/item or
2685 * inode_item exists.
2687 * This will *NOT* report error, if any error happens, just consider it does
2688 * not have any normal file extent.
2690 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2692 struct btrfs_path *path;
2693 struct btrfs_key key;
2694 struct btrfs_key found_key;
2695 struct btrfs_file_extent_item *fi;
2699 path = btrfs_alloc_path();
2703 key.type = BTRFS_EXTENT_DATA_KEY;
2706 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2711 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2712 ret = btrfs_next_leaf(root, path);
2719 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2721 if (found_key.objectid != ino ||
2722 found_key.type != BTRFS_EXTENT_DATA_KEY)
2724 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2725 struct btrfs_file_extent_item);
2726 type = btrfs_file_extent_type(path->nodes[0], fi);
2727 if (type != BTRFS_FILE_EXTENT_INLINE) {
2733 btrfs_free_path(path);
2737 static u32 btrfs_type_to_imode(u8 type)
2739 static u32 imode_by_btrfs_type[] = {
2740 [BTRFS_FT_REG_FILE] = S_IFREG,
2741 [BTRFS_FT_DIR] = S_IFDIR,
2742 [BTRFS_FT_CHRDEV] = S_IFCHR,
2743 [BTRFS_FT_BLKDEV] = S_IFBLK,
2744 [BTRFS_FT_FIFO] = S_IFIFO,
2745 [BTRFS_FT_SOCK] = S_IFSOCK,
2746 [BTRFS_FT_SYMLINK] = S_IFLNK,
2749 return imode_by_btrfs_type[(type)];
2752 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2753 struct btrfs_root *root,
2754 struct btrfs_path *path,
2755 struct inode_record *rec)
2759 int type_recovered = 0;
2762 printf("Trying to rebuild inode:%llu\n", rec->ino);
2764 type_recovered = !find_file_type(rec, &filetype);
2767 * Try to determine inode type if type not found.
2769 * For found regular file extent, it must be FILE.
2770 * For found dir_item/index, it must be DIR.
2772 * For undetermined one, use FILE as fallback.
2775 * 1. If found backref(inode_index/item is already handled) to it,
2777 * Need new inode-inode ref structure to allow search for that.
2779 if (!type_recovered) {
2780 if (rec->found_file_extent &&
2781 find_normal_file_extent(root, rec->ino)) {
2783 filetype = BTRFS_FT_REG_FILE;
2784 } else if (rec->found_dir_item) {
2786 filetype = BTRFS_FT_DIR;
2787 } else if (!list_empty(&rec->orphan_extents)) {
2789 filetype = BTRFS_FT_REG_FILE;
2791 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2794 filetype = BTRFS_FT_REG_FILE;
2798 ret = btrfs_new_inode(trans, root, rec->ino,
2799 mode | btrfs_type_to_imode(filetype));
2804 * Here inode rebuild is done, we only rebuild the inode item,
2805 * don't repair the nlink(like move to lost+found).
2806 * That is the job of nlink repair.
2808 * We just fill the record and return
2810 rec->found_dir_item = 1;
2811 rec->imode = mode | btrfs_type_to_imode(filetype);
2813 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2814 /* Ensure the inode_nlinks repair function will be called */
2815 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2820 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2821 struct btrfs_root *root,
2822 struct btrfs_path *path,
2823 struct inode_record *rec)
2825 struct orphan_data_extent *orphan;
2826 struct orphan_data_extent *tmp;
2829 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2831 * Check for conflicting file extents
2833 * Here we don't know whether the extents is compressed or not,
2834 * so we can only assume it not compressed nor data offset,
2835 * and use its disk_len as extent length.
2837 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2838 orphan->offset, orphan->disk_len, 0);
2839 btrfs_release_path(path);
2844 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2845 orphan->disk_bytenr, orphan->disk_len);
2846 ret = btrfs_free_extent(trans,
2847 root->fs_info->extent_root,
2848 orphan->disk_bytenr, orphan->disk_len,
2849 0, root->objectid, orphan->objectid,
2854 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2855 orphan->offset, orphan->disk_bytenr,
2856 orphan->disk_len, orphan->disk_len);
2860 /* Update file size info */
2861 rec->found_size += orphan->disk_len;
2862 if (rec->found_size == rec->nbytes)
2863 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2865 /* Update the file extent hole info too */
2866 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2870 if (RB_EMPTY_ROOT(&rec->holes))
2871 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2873 list_del(&orphan->list);
2876 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2881 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2882 struct btrfs_root *root,
2883 struct btrfs_path *path,
2884 struct inode_record *rec)
2886 struct rb_node *node;
2887 struct file_extent_hole *hole;
2891 node = rb_first(&rec->holes);
2895 hole = rb_entry(node, struct file_extent_hole, node);
2896 ret = btrfs_punch_hole(trans, root, rec->ino,
2897 hole->start, hole->len);
2900 ret = del_file_extent_hole(&rec->holes, hole->start,
2904 if (RB_EMPTY_ROOT(&rec->holes))
2905 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2906 node = rb_first(&rec->holes);
2908 /* special case for a file losing all its file extent */
2910 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2911 round_up(rec->isize, root->sectorsize));
2915 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2916 rec->ino, root->objectid);
2921 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2923 struct btrfs_trans_handle *trans;
2924 struct btrfs_path *path;
2927 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2928 I_ERR_NO_ORPHAN_ITEM |
2929 I_ERR_LINK_COUNT_WRONG |
2930 I_ERR_NO_INODE_ITEM |
2931 I_ERR_FILE_EXTENT_ORPHAN |
2932 I_ERR_FILE_EXTENT_DISCOUNT|
2933 I_ERR_FILE_NBYTES_WRONG)))
2936 path = btrfs_alloc_path();
2941 * For nlink repair, it may create a dir and add link, so
2942 * 2 for parent(256)'s dir_index and dir_item
2943 * 2 for lost+found dir's inode_item and inode_ref
2944 * 1 for the new inode_ref of the file
2945 * 2 for lost+found dir's dir_index and dir_item for the file
2947 trans = btrfs_start_transaction(root, 7);
2948 if (IS_ERR(trans)) {
2949 btrfs_free_path(path);
2950 return PTR_ERR(trans);
2953 if (rec->errors & I_ERR_NO_INODE_ITEM)
2954 ret = repair_inode_no_item(trans, root, path, rec);
2955 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2956 ret = repair_inode_orphan_extent(trans, root, path, rec);
2957 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2958 ret = repair_inode_discount_extent(trans, root, path, rec);
2959 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2960 ret = repair_inode_isize(trans, root, path, rec);
2961 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2962 ret = repair_inode_orphan_item(trans, root, path, rec);
2963 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2964 ret = repair_inode_nlinks(trans, root, path, rec);
2965 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2966 ret = repair_inode_nbytes(trans, root, path, rec);
2967 btrfs_commit_transaction(trans, root);
2968 btrfs_free_path(path);
2972 static int check_inode_recs(struct btrfs_root *root,
2973 struct cache_tree *inode_cache)
2975 struct cache_extent *cache;
2976 struct ptr_node *node;
2977 struct inode_record *rec;
2978 struct inode_backref *backref;
2983 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2985 if (btrfs_root_refs(&root->root_item) == 0) {
2986 if (!cache_tree_empty(inode_cache))
2987 fprintf(stderr, "warning line %d\n", __LINE__);
2992 * We need to record the highest inode number for later 'lost+found'
2994 * We must select an ino not used/referred by any existing inode, or
2995 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2996 * this may cause 'lost+found' dir has wrong nlinks.
2998 cache = last_cache_extent(inode_cache);
3000 node = container_of(cache, struct ptr_node, cache);
3002 if (rec->ino > root->highest_inode)
3003 root->highest_inode = rec->ino;
3007 * We need to repair backrefs first because we could change some of the
3008 * errors in the inode recs.
3010 * We also need to go through and delete invalid backrefs first and then
3011 * add the correct ones second. We do this because we may get EEXIST
3012 * when adding back the correct index because we hadn't yet deleted the
3015 * For example, if we were missing a dir index then the directories
3016 * isize would be wrong, so if we fixed the isize to what we thought it
3017 * would be and then fixed the backref we'd still have a invalid fs, so
3018 * we need to add back the dir index and then check to see if the isize
3023 if (stage == 3 && !err)
3026 cache = search_cache_extent(inode_cache, 0);
3027 while (repair && cache) {
3028 node = container_of(cache, struct ptr_node, cache);
3030 cache = next_cache_extent(cache);
3032 /* Need to free everything up and rescan */
3034 remove_cache_extent(inode_cache, &node->cache);
3036 free_inode_rec(rec);
3040 if (list_empty(&rec->backrefs))
3043 ret = repair_inode_backrefs(root, rec, inode_cache,
3057 rec = get_inode_rec(inode_cache, root_dirid, 0);
3058 BUG_ON(IS_ERR(rec));
3060 ret = check_root_dir(rec);
3062 fprintf(stderr, "root %llu root dir %llu error\n",
3063 (unsigned long long)root->root_key.objectid,
3064 (unsigned long long)root_dirid);
3065 print_inode_error(root, rec);
3070 struct btrfs_trans_handle *trans;
3072 trans = btrfs_start_transaction(root, 1);
3073 if (IS_ERR(trans)) {
3074 err = PTR_ERR(trans);
3079 "root %llu missing its root dir, recreating\n",
3080 (unsigned long long)root->objectid);
3082 ret = btrfs_make_root_dir(trans, root, root_dirid);
3085 btrfs_commit_transaction(trans, root);
3089 fprintf(stderr, "root %llu root dir %llu not found\n",
3090 (unsigned long long)root->root_key.objectid,
3091 (unsigned long long)root_dirid);
3095 cache = search_cache_extent(inode_cache, 0);
3098 node = container_of(cache, struct ptr_node, cache);
3100 remove_cache_extent(inode_cache, &node->cache);
3102 if (rec->ino == root_dirid ||
3103 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3104 free_inode_rec(rec);
3108 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3109 ret = check_orphan_item(root, rec->ino);
3111 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3112 if (can_free_inode_rec(rec)) {
3113 free_inode_rec(rec);
3118 if (!rec->found_inode_item)
3119 rec->errors |= I_ERR_NO_INODE_ITEM;
3120 if (rec->found_link != rec->nlink)
3121 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3123 ret = try_repair_inode(root, rec);
3124 if (ret == 0 && can_free_inode_rec(rec)) {
3125 free_inode_rec(rec);
3131 if (!(repair && ret == 0))
3133 print_inode_error(root, rec);
3134 list_for_each_entry(backref, &rec->backrefs, list) {
3135 if (!backref->found_dir_item)
3136 backref->errors |= REF_ERR_NO_DIR_ITEM;
3137 if (!backref->found_dir_index)
3138 backref->errors |= REF_ERR_NO_DIR_INDEX;
3139 if (!backref->found_inode_ref)
3140 backref->errors |= REF_ERR_NO_INODE_REF;
3141 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3142 " namelen %u name %s filetype %d errors %x",
3143 (unsigned long long)backref->dir,
3144 (unsigned long long)backref->index,
3145 backref->namelen, backref->name,
3146 backref->filetype, backref->errors);
3147 print_ref_error(backref->errors);
3149 free_inode_rec(rec);
3151 return (error > 0) ? -1 : 0;
3154 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3157 struct cache_extent *cache;
3158 struct root_record *rec = NULL;
3161 cache = lookup_cache_extent(root_cache, objectid, 1);
3163 rec = container_of(cache, struct root_record, cache);
3165 rec = calloc(1, sizeof(*rec));
3167 return ERR_PTR(-ENOMEM);
3168 rec->objectid = objectid;
3169 INIT_LIST_HEAD(&rec->backrefs);
3170 rec->cache.start = objectid;
3171 rec->cache.size = 1;
3173 ret = insert_cache_extent(root_cache, &rec->cache);
3175 return ERR_PTR(-EEXIST);
3180 static struct root_backref *get_root_backref(struct root_record *rec,
3181 u64 ref_root, u64 dir, u64 index,
3182 const char *name, int namelen)
3184 struct root_backref *backref;
3186 list_for_each_entry(backref, &rec->backrefs, list) {
3187 if (backref->ref_root != ref_root || backref->dir != dir ||
3188 backref->namelen != namelen)
3190 if (memcmp(name, backref->name, namelen))
3195 backref = calloc(1, sizeof(*backref) + namelen + 1);
3198 backref->ref_root = ref_root;
3200 backref->index = index;
3201 backref->namelen = namelen;
3202 memcpy(backref->name, name, namelen);
3203 backref->name[namelen] = '\0';
3204 list_add_tail(&backref->list, &rec->backrefs);
3208 static void free_root_record(struct cache_extent *cache)
3210 struct root_record *rec;
3211 struct root_backref *backref;
3213 rec = container_of(cache, struct root_record, cache);
3214 while (!list_empty(&rec->backrefs)) {
3215 backref = to_root_backref(rec->backrefs.next);
3216 list_del(&backref->list);
3223 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3225 static int add_root_backref(struct cache_tree *root_cache,
3226 u64 root_id, u64 ref_root, u64 dir, u64 index,
3227 const char *name, int namelen,
3228 int item_type, int errors)
3230 struct root_record *rec;
3231 struct root_backref *backref;
3233 rec = get_root_rec(root_cache, root_id);
3234 BUG_ON(IS_ERR(rec));
3235 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3238 backref->errors |= errors;
3240 if (item_type != BTRFS_DIR_ITEM_KEY) {
3241 if (backref->found_dir_index || backref->found_back_ref ||
3242 backref->found_forward_ref) {
3243 if (backref->index != index)
3244 backref->errors |= REF_ERR_INDEX_UNMATCH;
3246 backref->index = index;
3250 if (item_type == BTRFS_DIR_ITEM_KEY) {
3251 if (backref->found_forward_ref)
3253 backref->found_dir_item = 1;
3254 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3255 backref->found_dir_index = 1;
3256 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3257 if (backref->found_forward_ref)
3258 backref->errors |= REF_ERR_DUP_ROOT_REF;
3259 else if (backref->found_dir_item)
3261 backref->found_forward_ref = 1;
3262 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3263 if (backref->found_back_ref)
3264 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3265 backref->found_back_ref = 1;
3270 if (backref->found_forward_ref && backref->found_dir_item)
3271 backref->reachable = 1;
3275 static int merge_root_recs(struct btrfs_root *root,
3276 struct cache_tree *src_cache,
3277 struct cache_tree *dst_cache)
3279 struct cache_extent *cache;
3280 struct ptr_node *node;
3281 struct inode_record *rec;
3282 struct inode_backref *backref;
3285 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3286 free_inode_recs_tree(src_cache);
3291 cache = search_cache_extent(src_cache, 0);
3294 node = container_of(cache, struct ptr_node, cache);
3296 remove_cache_extent(src_cache, &node->cache);
3299 ret = is_child_root(root, root->objectid, rec->ino);
3305 list_for_each_entry(backref, &rec->backrefs, list) {
3306 BUG_ON(backref->found_inode_ref);
3307 if (backref->found_dir_item)
3308 add_root_backref(dst_cache, rec->ino,
3309 root->root_key.objectid, backref->dir,
3310 backref->index, backref->name,
3311 backref->namelen, BTRFS_DIR_ITEM_KEY,
3313 if (backref->found_dir_index)
3314 add_root_backref(dst_cache, rec->ino,
3315 root->root_key.objectid, backref->dir,
3316 backref->index, backref->name,
3317 backref->namelen, BTRFS_DIR_INDEX_KEY,
3321 free_inode_rec(rec);
3328 static int check_root_refs(struct btrfs_root *root,
3329 struct cache_tree *root_cache)
3331 struct root_record *rec;
3332 struct root_record *ref_root;
3333 struct root_backref *backref;
3334 struct cache_extent *cache;
3340 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3341 BUG_ON(IS_ERR(rec));
3344 /* fixme: this can not detect circular references */
3347 cache = search_cache_extent(root_cache, 0);
3351 rec = container_of(cache, struct root_record, cache);
3352 cache = next_cache_extent(cache);
3354 if (rec->found_ref == 0)
3357 list_for_each_entry(backref, &rec->backrefs, list) {
3358 if (!backref->reachable)
3361 ref_root = get_root_rec(root_cache,
3363 BUG_ON(IS_ERR(ref_root));
3364 if (ref_root->found_ref > 0)
3367 backref->reachable = 0;
3369 if (rec->found_ref == 0)
3375 cache = search_cache_extent(root_cache, 0);
3379 rec = container_of(cache, struct root_record, cache);
3380 cache = next_cache_extent(cache);
3382 if (rec->found_ref == 0 &&
3383 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3384 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3385 ret = check_orphan_item(root->fs_info->tree_root,
3391 * If we don't have a root item then we likely just have
3392 * a dir item in a snapshot for this root but no actual
3393 * ref key or anything so it's meaningless.
3395 if (!rec->found_root_item)
3398 fprintf(stderr, "fs tree %llu not referenced\n",
3399 (unsigned long long)rec->objectid);
3403 if (rec->found_ref > 0 && !rec->found_root_item)
3405 list_for_each_entry(backref, &rec->backrefs, list) {
3406 if (!backref->found_dir_item)
3407 backref->errors |= REF_ERR_NO_DIR_ITEM;
3408 if (!backref->found_dir_index)
3409 backref->errors |= REF_ERR_NO_DIR_INDEX;
3410 if (!backref->found_back_ref)
3411 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3412 if (!backref->found_forward_ref)
3413 backref->errors |= REF_ERR_NO_ROOT_REF;
3414 if (backref->reachable && backref->errors)
3421 fprintf(stderr, "fs tree %llu refs %u %s\n",
3422 (unsigned long long)rec->objectid, rec->found_ref,
3423 rec->found_root_item ? "" : "not found");
3425 list_for_each_entry(backref, &rec->backrefs, list) {
3426 if (!backref->reachable)
3428 if (!backref->errors && rec->found_root_item)
3430 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3431 " index %llu namelen %u name %s errors %x\n",
3432 (unsigned long long)backref->ref_root,
3433 (unsigned long long)backref->dir,
3434 (unsigned long long)backref->index,
3435 backref->namelen, backref->name,
3437 print_ref_error(backref->errors);
3440 return errors > 0 ? 1 : 0;
3443 static int process_root_ref(struct extent_buffer *eb, int slot,
3444 struct btrfs_key *key,
3445 struct cache_tree *root_cache)
3451 struct btrfs_root_ref *ref;
3452 char namebuf[BTRFS_NAME_LEN];
3455 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3457 dirid = btrfs_root_ref_dirid(eb, ref);
3458 index = btrfs_root_ref_sequence(eb, ref);
3459 name_len = btrfs_root_ref_name_len(eb, ref);
3461 if (name_len <= BTRFS_NAME_LEN) {
3465 len = BTRFS_NAME_LEN;
3466 error = REF_ERR_NAME_TOO_LONG;
3468 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3470 if (key->type == BTRFS_ROOT_REF_KEY) {
3471 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3472 index, namebuf, len, key->type, error);
3474 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3475 index, namebuf, len, key->type, error);
3480 static void free_corrupt_block(struct cache_extent *cache)
3482 struct btrfs_corrupt_block *corrupt;
3484 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3488 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3491 * Repair the btree of the given root.
3493 * The fix is to remove the node key in corrupt_blocks cache_tree.
3494 * and rebalance the tree.
3495 * After the fix, the btree should be writeable.
3497 static int repair_btree(struct btrfs_root *root,
3498 struct cache_tree *corrupt_blocks)
3500 struct btrfs_trans_handle *trans;
3501 struct btrfs_path *path;
3502 struct btrfs_corrupt_block *corrupt;
3503 struct cache_extent *cache;
3504 struct btrfs_key key;
3509 if (cache_tree_empty(corrupt_blocks))
3512 path = btrfs_alloc_path();
3516 trans = btrfs_start_transaction(root, 1);
3517 if (IS_ERR(trans)) {
3518 ret = PTR_ERR(trans);
3519 fprintf(stderr, "Error starting transaction: %s\n",
3523 cache = first_cache_extent(corrupt_blocks);
3525 corrupt = container_of(cache, struct btrfs_corrupt_block,
3527 level = corrupt->level;
3528 path->lowest_level = level;
3529 key.objectid = corrupt->key.objectid;
3530 key.type = corrupt->key.type;
3531 key.offset = corrupt->key.offset;
3534 * Here we don't want to do any tree balance, since it may
3535 * cause a balance with corrupted brother leaf/node,
3536 * so ins_len set to 0 here.
3537 * Balance will be done after all corrupt node/leaf is deleted.
3539 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3542 offset = btrfs_node_blockptr(path->nodes[level],
3543 path->slots[level]);
3545 /* Remove the ptr */
3546 ret = btrfs_del_ptr(trans, root, path, level,
3547 path->slots[level]);
3551 * Remove the corresponding extent
3552 * return value is not concerned.
3554 btrfs_release_path(path);
3555 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3556 0, root->root_key.objectid,
3558 cache = next_cache_extent(cache);
3561 /* Balance the btree using btrfs_search_slot() */
3562 cache = first_cache_extent(corrupt_blocks);
3564 corrupt = container_of(cache, struct btrfs_corrupt_block,
3566 memcpy(&key, &corrupt->key, sizeof(key));
3567 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3570 /* return will always >0 since it won't find the item */
3572 btrfs_release_path(path);
3573 cache = next_cache_extent(cache);
3576 btrfs_commit_transaction(trans, root);
3578 btrfs_free_path(path);
3582 static int check_fs_root(struct btrfs_root *root,
3583 struct cache_tree *root_cache,
3584 struct walk_control *wc)
3590 struct btrfs_path path;
3591 struct shared_node root_node;
3592 struct root_record *rec;
3593 struct btrfs_root_item *root_item = &root->root_item;
3594 struct cache_tree corrupt_blocks;
3595 struct orphan_data_extent *orphan;
3596 struct orphan_data_extent *tmp;
3597 enum btrfs_tree_block_status status;
3598 struct node_refs nrefs;
3601 * Reuse the corrupt_block cache tree to record corrupted tree block
3603 * Unlike the usage in extent tree check, here we do it in a per
3604 * fs/subvol tree base.
3606 cache_tree_init(&corrupt_blocks);
3607 root->fs_info->corrupt_blocks = &corrupt_blocks;
3609 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3610 rec = get_root_rec(root_cache, root->root_key.objectid);
3611 BUG_ON(IS_ERR(rec));
3612 if (btrfs_root_refs(root_item) > 0)
3613 rec->found_root_item = 1;
3616 btrfs_init_path(&path);
3617 memset(&root_node, 0, sizeof(root_node));
3618 cache_tree_init(&root_node.root_cache);
3619 cache_tree_init(&root_node.inode_cache);
3620 memset(&nrefs, 0, sizeof(nrefs));
3622 /* Move the orphan extent record to corresponding inode_record */
3623 list_for_each_entry_safe(orphan, tmp,
3624 &root->orphan_data_extents, list) {
3625 struct inode_record *inode;
3627 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3629 BUG_ON(IS_ERR(inode));
3630 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3631 list_move(&orphan->list, &inode->orphan_extents);
3634 level = btrfs_header_level(root->node);
3635 memset(wc->nodes, 0, sizeof(wc->nodes));
3636 wc->nodes[level] = &root_node;
3637 wc->active_node = level;
3638 wc->root_level = level;
3640 /* We may not have checked the root block, lets do that now */
3641 if (btrfs_is_leaf(root->node))
3642 status = btrfs_check_leaf(root, NULL, root->node);
3644 status = btrfs_check_node(root, NULL, root->node);
3645 if (status != BTRFS_TREE_BLOCK_CLEAN)
3648 if (btrfs_root_refs(root_item) > 0 ||
3649 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3650 path.nodes[level] = root->node;
3651 extent_buffer_get(root->node);
3652 path.slots[level] = 0;
3654 struct btrfs_key key;
3655 struct btrfs_disk_key found_key;
3657 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3658 level = root_item->drop_level;
3659 path.lowest_level = level;
3660 if (level > btrfs_header_level(root->node) ||
3661 level >= BTRFS_MAX_LEVEL) {
3662 error("ignoring invalid drop level: %u", level);
3665 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3668 btrfs_node_key(path.nodes[level], &found_key,
3670 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3671 sizeof(found_key)));
3675 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3681 wret = walk_up_tree(root, &path, wc, &level);
3688 btrfs_release_path(&path);
3690 if (!cache_tree_empty(&corrupt_blocks)) {
3691 struct cache_extent *cache;
3692 struct btrfs_corrupt_block *corrupt;
3694 printf("The following tree block(s) is corrupted in tree %llu:\n",
3695 root->root_key.objectid);
3696 cache = first_cache_extent(&corrupt_blocks);
3698 corrupt = container_of(cache,
3699 struct btrfs_corrupt_block,
3701 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3702 cache->start, corrupt->level,
3703 corrupt->key.objectid, corrupt->key.type,
3704 corrupt->key.offset);
3705 cache = next_cache_extent(cache);
3708 printf("Try to repair the btree for root %llu\n",
3709 root->root_key.objectid);
3710 ret = repair_btree(root, &corrupt_blocks);
3712 fprintf(stderr, "Failed to repair btree: %s\n",
3715 printf("Btree for root %llu is fixed\n",
3716 root->root_key.objectid);
3720 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3724 if (root_node.current) {
3725 root_node.current->checked = 1;
3726 maybe_free_inode_rec(&root_node.inode_cache,
3730 err = check_inode_recs(root, &root_node.inode_cache);
3734 free_corrupt_blocks_tree(&corrupt_blocks);
3735 root->fs_info->corrupt_blocks = NULL;
3736 free_orphan_data_extents(&root->orphan_data_extents);
3740 static int fs_root_objectid(u64 objectid)
3742 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3743 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3745 return is_fstree(objectid);
3748 static int check_fs_roots(struct btrfs_root *root,
3749 struct cache_tree *root_cache)
3751 struct btrfs_path path;
3752 struct btrfs_key key;
3753 struct walk_control wc;
3754 struct extent_buffer *leaf, *tree_node;
3755 struct btrfs_root *tmp_root;
3756 struct btrfs_root *tree_root = root->fs_info->tree_root;
3760 if (ctx.progress_enabled) {
3761 ctx.tp = TASK_FS_ROOTS;
3762 task_start(ctx.info);
3766 * Just in case we made any changes to the extent tree that weren't
3767 * reflected into the free space cache yet.
3770 reset_cached_block_groups(root->fs_info);
3771 memset(&wc, 0, sizeof(wc));
3772 cache_tree_init(&wc.shared);
3773 btrfs_init_path(&path);
3778 key.type = BTRFS_ROOT_ITEM_KEY;
3779 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3784 tree_node = tree_root->node;
3786 if (tree_node != tree_root->node) {
3787 free_root_recs_tree(root_cache);
3788 btrfs_release_path(&path);
3791 leaf = path.nodes[0];
3792 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3793 ret = btrfs_next_leaf(tree_root, &path);
3799 leaf = path.nodes[0];
3801 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3802 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3803 fs_root_objectid(key.objectid)) {
3804 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3805 tmp_root = btrfs_read_fs_root_no_cache(
3806 root->fs_info, &key);
3808 key.offset = (u64)-1;
3809 tmp_root = btrfs_read_fs_root(
3810 root->fs_info, &key);
3812 if (IS_ERR(tmp_root)) {
3816 ret = check_fs_root(tmp_root, root_cache, &wc);
3817 if (ret == -EAGAIN) {
3818 free_root_recs_tree(root_cache);
3819 btrfs_release_path(&path);
3824 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3825 btrfs_free_fs_root(tmp_root);
3826 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3827 key.type == BTRFS_ROOT_BACKREF_KEY) {
3828 process_root_ref(leaf, path.slots[0], &key,
3835 btrfs_release_path(&path);
3837 free_extent_cache_tree(&wc.shared);
3838 if (!cache_tree_empty(&wc.shared))
3839 fprintf(stderr, "warning line %d\n", __LINE__);
3841 task_stop(ctx.info);
3846 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3848 struct list_head *cur = rec->backrefs.next;
3849 struct extent_backref *back;
3850 struct tree_backref *tback;
3851 struct data_backref *dback;
3855 while(cur != &rec->backrefs) {
3856 back = to_extent_backref(cur);
3858 if (!back->found_extent_tree) {
3862 if (back->is_data) {
3863 dback = to_data_backref(back);
3864 fprintf(stderr, "Backref %llu %s %llu"
3865 " owner %llu offset %llu num_refs %lu"
3866 " not found in extent tree\n",
3867 (unsigned long long)rec->start,
3868 back->full_backref ?
3870 back->full_backref ?
3871 (unsigned long long)dback->parent:
3872 (unsigned long long)dback->root,
3873 (unsigned long long)dback->owner,
3874 (unsigned long long)dback->offset,
3875 (unsigned long)dback->num_refs);
3877 tback = to_tree_backref(back);
3878 fprintf(stderr, "Backref %llu parent %llu"
3879 " root %llu not found in extent tree\n",
3880 (unsigned long long)rec->start,
3881 (unsigned long long)tback->parent,
3882 (unsigned long long)tback->root);
3885 if (!back->is_data && !back->found_ref) {
3889 tback = to_tree_backref(back);
3890 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3891 (unsigned long long)rec->start,
3892 back->full_backref ? "parent" : "root",
3893 back->full_backref ?
3894 (unsigned long long)tback->parent :
3895 (unsigned long long)tback->root, back);
3897 if (back->is_data) {
3898 dback = to_data_backref(back);
3899 if (dback->found_ref != dback->num_refs) {
3903 fprintf(stderr, "Incorrect local backref count"
3904 " on %llu %s %llu owner %llu"
3905 " offset %llu found %u wanted %u back %p\n",
3906 (unsigned long long)rec->start,
3907 back->full_backref ?
3909 back->full_backref ?
3910 (unsigned long long)dback->parent:
3911 (unsigned long long)dback->root,
3912 (unsigned long long)dback->owner,
3913 (unsigned long long)dback->offset,
3914 dback->found_ref, dback->num_refs, back);
3916 if (dback->disk_bytenr != rec->start) {
3920 fprintf(stderr, "Backref disk bytenr does not"
3921 " match extent record, bytenr=%llu, "
3922 "ref bytenr=%llu\n",
3923 (unsigned long long)rec->start,
3924 (unsigned long long)dback->disk_bytenr);
3927 if (dback->bytes != rec->nr) {
3931 fprintf(stderr, "Backref bytes do not match "
3932 "extent backref, bytenr=%llu, ref "
3933 "bytes=%llu, backref bytes=%llu\n",
3934 (unsigned long long)rec->start,
3935 (unsigned long long)rec->nr,
3936 (unsigned long long)dback->bytes);
3939 if (!back->is_data) {
3942 dback = to_data_backref(back);
3943 found += dback->found_ref;
3946 if (found != rec->refs) {
3950 fprintf(stderr, "Incorrect global backref count "
3951 "on %llu found %llu wanted %llu\n",
3952 (unsigned long long)rec->start,
3953 (unsigned long long)found,
3954 (unsigned long long)rec->refs);
3960 static int free_all_extent_backrefs(struct extent_record *rec)
3962 struct extent_backref *back;
3963 struct list_head *cur;
3964 while (!list_empty(&rec->backrefs)) {
3965 cur = rec->backrefs.next;
3966 back = to_extent_backref(cur);
3973 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3974 struct cache_tree *extent_cache)
3976 struct cache_extent *cache;
3977 struct extent_record *rec;
3980 cache = first_cache_extent(extent_cache);
3983 rec = container_of(cache, struct extent_record, cache);
3984 remove_cache_extent(extent_cache, cache);
3985 free_all_extent_backrefs(rec);
3990 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3991 struct extent_record *rec)
3993 if (rec->content_checked && rec->owner_ref_checked &&
3994 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3995 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3996 !rec->bad_full_backref && !rec->crossing_stripes &&
3997 !rec->wrong_chunk_type) {
3998 remove_cache_extent(extent_cache, &rec->cache);
3999 free_all_extent_backrefs(rec);
4000 list_del_init(&rec->list);
4006 static int check_owner_ref(struct btrfs_root *root,
4007 struct extent_record *rec,
4008 struct extent_buffer *buf)
4010 struct extent_backref *node;
4011 struct tree_backref *back;
4012 struct btrfs_root *ref_root;
4013 struct btrfs_key key;
4014 struct btrfs_path path;
4015 struct extent_buffer *parent;
4020 list_for_each_entry(node, &rec->backrefs, list) {
4023 if (!node->found_ref)
4025 if (node->full_backref)
4027 back = to_tree_backref(node);
4028 if (btrfs_header_owner(buf) == back->root)
4031 BUG_ON(rec->is_root);
4033 /* try to find the block by search corresponding fs tree */
4034 key.objectid = btrfs_header_owner(buf);
4035 key.type = BTRFS_ROOT_ITEM_KEY;
4036 key.offset = (u64)-1;
4038 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4039 if (IS_ERR(ref_root))
4042 level = btrfs_header_level(buf);
4044 btrfs_item_key_to_cpu(buf, &key, 0);
4046 btrfs_node_key_to_cpu(buf, &key, 0);
4048 btrfs_init_path(&path);
4049 path.lowest_level = level + 1;
4050 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4054 parent = path.nodes[level + 1];
4055 if (parent && buf->start == btrfs_node_blockptr(parent,
4056 path.slots[level + 1]))
4059 btrfs_release_path(&path);
4060 return found ? 0 : 1;
4063 static int is_extent_tree_record(struct extent_record *rec)
4065 struct list_head *cur = rec->backrefs.next;
4066 struct extent_backref *node;
4067 struct tree_backref *back;
4070 while(cur != &rec->backrefs) {
4071 node = to_extent_backref(cur);
4075 back = to_tree_backref(node);
4076 if (node->full_backref)
4078 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4085 static int record_bad_block_io(struct btrfs_fs_info *info,
4086 struct cache_tree *extent_cache,
4089 struct extent_record *rec;
4090 struct cache_extent *cache;
4091 struct btrfs_key key;
4093 cache = lookup_cache_extent(extent_cache, start, len);
4097 rec = container_of(cache, struct extent_record, cache);
4098 if (!is_extent_tree_record(rec))
4101 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4102 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4105 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4106 struct extent_buffer *buf, int slot)
4108 if (btrfs_header_level(buf)) {
4109 struct btrfs_key_ptr ptr1, ptr2;
4111 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4112 sizeof(struct btrfs_key_ptr));
4113 read_extent_buffer(buf, &ptr2,
4114 btrfs_node_key_ptr_offset(slot + 1),
4115 sizeof(struct btrfs_key_ptr));
4116 write_extent_buffer(buf, &ptr1,
4117 btrfs_node_key_ptr_offset(slot + 1),
4118 sizeof(struct btrfs_key_ptr));
4119 write_extent_buffer(buf, &ptr2,
4120 btrfs_node_key_ptr_offset(slot),
4121 sizeof(struct btrfs_key_ptr));
4123 struct btrfs_disk_key key;
4124 btrfs_node_key(buf, &key, 0);
4125 btrfs_fixup_low_keys(root, path, &key,
4126 btrfs_header_level(buf) + 1);
4129 struct btrfs_item *item1, *item2;
4130 struct btrfs_key k1, k2;
4131 char *item1_data, *item2_data;
4132 u32 item1_offset, item2_offset, item1_size, item2_size;
4134 item1 = btrfs_item_nr(slot);
4135 item2 = btrfs_item_nr(slot + 1);
4136 btrfs_item_key_to_cpu(buf, &k1, slot);
4137 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4138 item1_offset = btrfs_item_offset(buf, item1);
4139 item2_offset = btrfs_item_offset(buf, item2);
4140 item1_size = btrfs_item_size(buf, item1);
4141 item2_size = btrfs_item_size(buf, item2);
4143 item1_data = malloc(item1_size);
4146 item2_data = malloc(item2_size);
4152 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4153 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4155 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4156 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4160 btrfs_set_item_offset(buf, item1, item2_offset);
4161 btrfs_set_item_offset(buf, item2, item1_offset);
4162 btrfs_set_item_size(buf, item1, item2_size);
4163 btrfs_set_item_size(buf, item2, item1_size);
4165 path->slots[0] = slot;
4166 btrfs_set_item_key_unsafe(root, path, &k2);
4167 path->slots[0] = slot + 1;
4168 btrfs_set_item_key_unsafe(root, path, &k1);
4173 static int fix_key_order(struct btrfs_trans_handle *trans,
4174 struct btrfs_root *root,
4175 struct btrfs_path *path)
4177 struct extent_buffer *buf;
4178 struct btrfs_key k1, k2;
4180 int level = path->lowest_level;
4183 buf = path->nodes[level];
4184 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4186 btrfs_node_key_to_cpu(buf, &k1, i);
4187 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4189 btrfs_item_key_to_cpu(buf, &k1, i);
4190 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4192 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4194 ret = swap_values(root, path, buf, i);
4197 btrfs_mark_buffer_dirty(buf);
4203 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4204 struct btrfs_root *root,
4205 struct btrfs_path *path,
4206 struct extent_buffer *buf, int slot)
4208 struct btrfs_key key;
4209 int nritems = btrfs_header_nritems(buf);
4211 btrfs_item_key_to_cpu(buf, &key, slot);
4213 /* These are all the keys we can deal with missing. */
4214 if (key.type != BTRFS_DIR_INDEX_KEY &&
4215 key.type != BTRFS_EXTENT_ITEM_KEY &&
4216 key.type != BTRFS_METADATA_ITEM_KEY &&
4217 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4218 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4221 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4222 (unsigned long long)key.objectid, key.type,
4223 (unsigned long long)key.offset, slot, buf->start);
4224 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4225 btrfs_item_nr_offset(slot + 1),
4226 sizeof(struct btrfs_item) *
4227 (nritems - slot - 1));
4228 btrfs_set_header_nritems(buf, nritems - 1);
4230 struct btrfs_disk_key disk_key;
4232 btrfs_item_key(buf, &disk_key, 0);
4233 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4235 btrfs_mark_buffer_dirty(buf);
4239 static int fix_item_offset(struct btrfs_trans_handle *trans,
4240 struct btrfs_root *root,
4241 struct btrfs_path *path)
4243 struct extent_buffer *buf;
4247 /* We should only get this for leaves */
4248 BUG_ON(path->lowest_level);
4249 buf = path->nodes[0];
4251 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4252 unsigned int shift = 0, offset;
4254 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4255 BTRFS_LEAF_DATA_SIZE(root)) {
4256 if (btrfs_item_end_nr(buf, i) >
4257 BTRFS_LEAF_DATA_SIZE(root)) {
4258 ret = delete_bogus_item(trans, root, path,
4262 fprintf(stderr, "item is off the end of the "
4263 "leaf, can't fix\n");
4267 shift = BTRFS_LEAF_DATA_SIZE(root) -
4268 btrfs_item_end_nr(buf, i);
4269 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4270 btrfs_item_offset_nr(buf, i - 1)) {
4271 if (btrfs_item_end_nr(buf, i) >
4272 btrfs_item_offset_nr(buf, i - 1)) {
4273 ret = delete_bogus_item(trans, root, path,
4277 fprintf(stderr, "items overlap, can't fix\n");
4281 shift = btrfs_item_offset_nr(buf, i - 1) -
4282 btrfs_item_end_nr(buf, i);
4287 printf("Shifting item nr %d by %u bytes in block %llu\n",
4288 i, shift, (unsigned long long)buf->start);
4289 offset = btrfs_item_offset_nr(buf, i);
4290 memmove_extent_buffer(buf,
4291 btrfs_leaf_data(buf) + offset + shift,
4292 btrfs_leaf_data(buf) + offset,
4293 btrfs_item_size_nr(buf, i));
4294 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4296 btrfs_mark_buffer_dirty(buf);
4300 * We may have moved things, in which case we want to exit so we don't
4301 * write those changes out. Once we have proper abort functionality in
4302 * progs this can be changed to something nicer.
4309 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4310 * then just return -EIO.
4312 static int try_to_fix_bad_block(struct btrfs_root *root,
4313 struct extent_buffer *buf,
4314 enum btrfs_tree_block_status status)
4316 struct btrfs_trans_handle *trans;
4317 struct ulist *roots;
4318 struct ulist_node *node;
4319 struct btrfs_root *search_root;
4320 struct btrfs_path *path;
4321 struct ulist_iterator iter;
4322 struct btrfs_key root_key, key;
4325 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4326 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4329 path = btrfs_alloc_path();
4333 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4336 btrfs_free_path(path);
4340 ULIST_ITER_INIT(&iter);
4341 while ((node = ulist_next(roots, &iter))) {
4342 root_key.objectid = node->val;
4343 root_key.type = BTRFS_ROOT_ITEM_KEY;
4344 root_key.offset = (u64)-1;
4346 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4353 trans = btrfs_start_transaction(search_root, 0);
4354 if (IS_ERR(trans)) {
4355 ret = PTR_ERR(trans);
4359 path->lowest_level = btrfs_header_level(buf);
4360 path->skip_check_block = 1;
4361 if (path->lowest_level)
4362 btrfs_node_key_to_cpu(buf, &key, 0);
4364 btrfs_item_key_to_cpu(buf, &key, 0);
4365 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4368 btrfs_commit_transaction(trans, search_root);
4371 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4372 ret = fix_key_order(trans, search_root, path);
4373 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4374 ret = fix_item_offset(trans, search_root, path);
4376 btrfs_commit_transaction(trans, search_root);
4379 btrfs_release_path(path);
4380 btrfs_commit_transaction(trans, search_root);
4383 btrfs_free_path(path);
4387 static int check_block(struct btrfs_root *root,
4388 struct cache_tree *extent_cache,
4389 struct extent_buffer *buf, u64 flags)
4391 struct extent_record *rec;
4392 struct cache_extent *cache;
4393 struct btrfs_key key;
4394 enum btrfs_tree_block_status status;
4398 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4401 rec = container_of(cache, struct extent_record, cache);
4402 rec->generation = btrfs_header_generation(buf);
4404 level = btrfs_header_level(buf);
4405 if (btrfs_header_nritems(buf) > 0) {
4408 btrfs_item_key_to_cpu(buf, &key, 0);
4410 btrfs_node_key_to_cpu(buf, &key, 0);
4412 rec->info_objectid = key.objectid;
4414 rec->info_level = level;
4416 if (btrfs_is_leaf(buf))
4417 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4419 status = btrfs_check_node(root, &rec->parent_key, buf);
4421 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4423 status = try_to_fix_bad_block(root, buf, status);
4424 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4426 fprintf(stderr, "bad block %llu\n",
4427 (unsigned long long)buf->start);
4430 * Signal to callers we need to start the scan over
4431 * again since we'll have cowed blocks.
4436 rec->content_checked = 1;
4437 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4438 rec->owner_ref_checked = 1;
4440 ret = check_owner_ref(root, rec, buf);
4442 rec->owner_ref_checked = 1;
4446 maybe_free_extent_rec(extent_cache, rec);
4450 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4451 u64 parent, u64 root)
4453 struct list_head *cur = rec->backrefs.next;
4454 struct extent_backref *node;
4455 struct tree_backref *back;
4457 while(cur != &rec->backrefs) {
4458 node = to_extent_backref(cur);
4462 back = to_tree_backref(node);
4464 if (!node->full_backref)
4466 if (parent == back->parent)
4469 if (node->full_backref)
4471 if (back->root == root)
4478 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4479 u64 parent, u64 root)
4481 struct tree_backref *ref = malloc(sizeof(*ref));
4485 memset(&ref->node, 0, sizeof(ref->node));
4487 ref->parent = parent;
4488 ref->node.full_backref = 1;
4491 ref->node.full_backref = 0;
4493 list_add_tail(&ref->node.list, &rec->backrefs);
4498 static struct data_backref *find_data_backref(struct extent_record *rec,
4499 u64 parent, u64 root,
4500 u64 owner, u64 offset,
4502 u64 disk_bytenr, u64 bytes)
4504 struct list_head *cur = rec->backrefs.next;
4505 struct extent_backref *node;
4506 struct data_backref *back;
4508 while(cur != &rec->backrefs) {
4509 node = to_extent_backref(cur);
4513 back = to_data_backref(node);
4515 if (!node->full_backref)
4517 if (parent == back->parent)
4520 if (node->full_backref)
4522 if (back->root == root && back->owner == owner &&
4523 back->offset == offset) {
4524 if (found_ref && node->found_ref &&
4525 (back->bytes != bytes ||
4526 back->disk_bytenr != disk_bytenr))
4535 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4536 u64 parent, u64 root,
4537 u64 owner, u64 offset,
4540 struct data_backref *ref = malloc(sizeof(*ref));
4544 memset(&ref->node, 0, sizeof(ref->node));
4545 ref->node.is_data = 1;
4548 ref->parent = parent;
4551 ref->node.full_backref = 1;
4555 ref->offset = offset;
4556 ref->node.full_backref = 0;
4558 ref->bytes = max_size;
4561 list_add_tail(&ref->node.list, &rec->backrefs);
4562 if (max_size > rec->max_size)
4563 rec->max_size = max_size;
4567 /* Check if the type of extent matches with its chunk */
4568 static void check_extent_type(struct extent_record *rec)
4570 struct btrfs_block_group_cache *bg_cache;
4572 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4576 /* data extent, check chunk directly*/
4577 if (!rec->metadata) {
4578 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4579 rec->wrong_chunk_type = 1;
4583 /* metadata extent, check the obvious case first */
4584 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4585 BTRFS_BLOCK_GROUP_METADATA))) {
4586 rec->wrong_chunk_type = 1;
4591 * Check SYSTEM extent, as it's also marked as metadata, we can only
4592 * make sure it's a SYSTEM extent by its backref
4594 if (!list_empty(&rec->backrefs)) {
4595 struct extent_backref *node;
4596 struct tree_backref *tback;
4599 node = to_extent_backref(rec->backrefs.next);
4600 if (node->is_data) {
4601 /* tree block shouldn't have data backref */
4602 rec->wrong_chunk_type = 1;
4605 tback = container_of(node, struct tree_backref, node);
4607 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4608 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4610 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4611 if (!(bg_cache->flags & bg_type))
4612 rec->wrong_chunk_type = 1;
4617 * Allocate a new extent record, fill default values from @tmpl and insert int
4618 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4619 * the cache, otherwise it fails.
4621 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4622 struct extent_record *tmpl)
4624 struct extent_record *rec;
4627 rec = malloc(sizeof(*rec));
4630 rec->start = tmpl->start;
4631 rec->max_size = tmpl->max_size;
4632 rec->nr = max(tmpl->nr, tmpl->max_size);
4633 rec->found_rec = tmpl->found_rec;
4634 rec->content_checked = tmpl->content_checked;
4635 rec->owner_ref_checked = tmpl->owner_ref_checked;
4636 rec->num_duplicates = 0;
4637 rec->metadata = tmpl->metadata;
4638 rec->flag_block_full_backref = FLAG_UNSET;
4639 rec->bad_full_backref = 0;
4640 rec->crossing_stripes = 0;
4641 rec->wrong_chunk_type = 0;
4642 rec->is_root = tmpl->is_root;
4643 rec->refs = tmpl->refs;
4644 rec->extent_item_refs = tmpl->extent_item_refs;
4645 rec->parent_generation = tmpl->parent_generation;
4646 INIT_LIST_HEAD(&rec->backrefs);
4647 INIT_LIST_HEAD(&rec->dups);
4648 INIT_LIST_HEAD(&rec->list);
4649 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4650 rec->cache.start = tmpl->start;
4651 rec->cache.size = tmpl->nr;
4652 ret = insert_cache_extent(extent_cache, &rec->cache);
4657 bytes_used += rec->nr;
4660 rec->crossing_stripes = check_crossing_stripes(global_info,
4661 rec->start, global_info->tree_root->nodesize);
4662 check_extent_type(rec);
4667 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4669 * - refs - if found, increase refs
4670 * - is_root - if found, set
4671 * - content_checked - if found, set
4672 * - owner_ref_checked - if found, set
4674 * If not found, create a new one, initialize and insert.
4676 static int add_extent_rec(struct cache_tree *extent_cache,
4677 struct extent_record *tmpl)
4679 struct extent_record *rec;
4680 struct cache_extent *cache;
4684 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4686 rec = container_of(cache, struct extent_record, cache);
4690 rec->nr = max(tmpl->nr, tmpl->max_size);
4693 * We need to make sure to reset nr to whatever the extent
4694 * record says was the real size, this way we can compare it to
4697 if (tmpl->found_rec) {
4698 if (tmpl->start != rec->start || rec->found_rec) {
4699 struct extent_record *tmp;
4702 if (list_empty(&rec->list))
4703 list_add_tail(&rec->list,
4704 &duplicate_extents);
4707 * We have to do this song and dance in case we
4708 * find an extent record that falls inside of
4709 * our current extent record but does not have
4710 * the same objectid.
4712 tmp = malloc(sizeof(*tmp));
4715 tmp->start = tmpl->start;
4716 tmp->max_size = tmpl->max_size;
4719 tmp->metadata = tmpl->metadata;
4720 tmp->extent_item_refs = tmpl->extent_item_refs;
4721 INIT_LIST_HEAD(&tmp->list);
4722 list_add_tail(&tmp->list, &rec->dups);
4723 rec->num_duplicates++;
4730 if (tmpl->extent_item_refs && !dup) {
4731 if (rec->extent_item_refs) {
4732 fprintf(stderr, "block %llu rec "
4733 "extent_item_refs %llu, passed %llu\n",
4734 (unsigned long long)tmpl->start,
4735 (unsigned long long)
4736 rec->extent_item_refs,
4737 (unsigned long long)tmpl->extent_item_refs);
4739 rec->extent_item_refs = tmpl->extent_item_refs;
4743 if (tmpl->content_checked)
4744 rec->content_checked = 1;
4745 if (tmpl->owner_ref_checked)
4746 rec->owner_ref_checked = 1;
4747 memcpy(&rec->parent_key, &tmpl->parent_key,
4748 sizeof(tmpl->parent_key));
4749 if (tmpl->parent_generation)
4750 rec->parent_generation = tmpl->parent_generation;
4751 if (rec->max_size < tmpl->max_size)
4752 rec->max_size = tmpl->max_size;
4755 * A metadata extent can't cross stripe_len boundary, otherwise
4756 * kernel scrub won't be able to handle it.
4757 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4761 rec->crossing_stripes = check_crossing_stripes(
4762 global_info, rec->start,
4763 global_info->tree_root->nodesize);
4764 check_extent_type(rec);
4765 maybe_free_extent_rec(extent_cache, rec);
4769 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4774 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4775 u64 parent, u64 root, int found_ref)
4777 struct extent_record *rec;
4778 struct tree_backref *back;
4779 struct cache_extent *cache;
4782 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4784 struct extent_record tmpl;
4786 memset(&tmpl, 0, sizeof(tmpl));
4787 tmpl.start = bytenr;
4791 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4795 /* really a bug in cache_extent implement now */
4796 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4801 rec = container_of(cache, struct extent_record, cache);
4802 if (rec->start != bytenr) {
4804 * Several cause, from unaligned bytenr to over lapping extents
4809 back = find_tree_backref(rec, parent, root);
4811 back = alloc_tree_backref(rec, parent, root);
4817 if (back->node.found_ref) {
4818 fprintf(stderr, "Extent back ref already exists "
4819 "for %llu parent %llu root %llu \n",
4820 (unsigned long long)bytenr,
4821 (unsigned long long)parent,
4822 (unsigned long long)root);
4824 back->node.found_ref = 1;
4826 if (back->node.found_extent_tree) {
4827 fprintf(stderr, "Extent back ref already exists "
4828 "for %llu parent %llu root %llu \n",
4829 (unsigned long long)bytenr,
4830 (unsigned long long)parent,
4831 (unsigned long long)root);
4833 back->node.found_extent_tree = 1;
4835 check_extent_type(rec);
4836 maybe_free_extent_rec(extent_cache, rec);
4840 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4841 u64 parent, u64 root, u64 owner, u64 offset,
4842 u32 num_refs, int found_ref, u64 max_size)
4844 struct extent_record *rec;
4845 struct data_backref *back;
4846 struct cache_extent *cache;
4849 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4851 struct extent_record tmpl;
4853 memset(&tmpl, 0, sizeof(tmpl));
4854 tmpl.start = bytenr;
4856 tmpl.max_size = max_size;
4858 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4862 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4867 rec = container_of(cache, struct extent_record, cache);
4868 if (rec->max_size < max_size)
4869 rec->max_size = max_size;
4872 * If found_ref is set then max_size is the real size and must match the
4873 * existing refs. So if we have already found a ref then we need to
4874 * make sure that this ref matches the existing one, otherwise we need
4875 * to add a new backref so we can notice that the backrefs don't match
4876 * and we need to figure out who is telling the truth. This is to
4877 * account for that awful fsync bug I introduced where we'd end up with
4878 * a btrfs_file_extent_item that would have its length include multiple
4879 * prealloc extents or point inside of a prealloc extent.
4881 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4884 back = alloc_data_backref(rec, parent, root, owner, offset,
4890 BUG_ON(num_refs != 1);
4891 if (back->node.found_ref)
4892 BUG_ON(back->bytes != max_size);
4893 back->node.found_ref = 1;
4894 back->found_ref += 1;
4895 back->bytes = max_size;
4896 back->disk_bytenr = bytenr;
4898 rec->content_checked = 1;
4899 rec->owner_ref_checked = 1;
4901 if (back->node.found_extent_tree) {
4902 fprintf(stderr, "Extent back ref already exists "
4903 "for %llu parent %llu root %llu "
4904 "owner %llu offset %llu num_refs %lu\n",
4905 (unsigned long long)bytenr,
4906 (unsigned long long)parent,
4907 (unsigned long long)root,
4908 (unsigned long long)owner,
4909 (unsigned long long)offset,
4910 (unsigned long)num_refs);
4912 back->num_refs = num_refs;
4913 back->node.found_extent_tree = 1;
4915 maybe_free_extent_rec(extent_cache, rec);
4919 static int add_pending(struct cache_tree *pending,
4920 struct cache_tree *seen, u64 bytenr, u32 size)
4923 ret = add_cache_extent(seen, bytenr, size);
4926 add_cache_extent(pending, bytenr, size);
4930 static int pick_next_pending(struct cache_tree *pending,
4931 struct cache_tree *reada,
4932 struct cache_tree *nodes,
4933 u64 last, struct block_info *bits, int bits_nr,
4936 unsigned long node_start = last;
4937 struct cache_extent *cache;
4940 cache = search_cache_extent(reada, 0);
4942 bits[0].start = cache->start;
4943 bits[0].size = cache->size;
4948 if (node_start > 32768)
4949 node_start -= 32768;
4951 cache = search_cache_extent(nodes, node_start);
4953 cache = search_cache_extent(nodes, 0);
4956 cache = search_cache_extent(pending, 0);
4961 bits[ret].start = cache->start;
4962 bits[ret].size = cache->size;
4963 cache = next_cache_extent(cache);
4965 } while (cache && ret < bits_nr);
4971 bits[ret].start = cache->start;
4972 bits[ret].size = cache->size;
4973 cache = next_cache_extent(cache);
4975 } while (cache && ret < bits_nr);
4977 if (bits_nr - ret > 8) {
4978 u64 lookup = bits[0].start + bits[0].size;
4979 struct cache_extent *next;
4980 next = search_cache_extent(pending, lookup);
4982 if (next->start - lookup > 32768)
4984 bits[ret].start = next->start;
4985 bits[ret].size = next->size;
4986 lookup = next->start + next->size;
4990 next = next_cache_extent(next);
4998 static void free_chunk_record(struct cache_extent *cache)
5000 struct chunk_record *rec;
5002 rec = container_of(cache, struct chunk_record, cache);
5003 list_del_init(&rec->list);
5004 list_del_init(&rec->dextents);
5008 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5010 cache_tree_free_extents(chunk_cache, free_chunk_record);
5013 static void free_device_record(struct rb_node *node)
5015 struct device_record *rec;
5017 rec = container_of(node, struct device_record, node);
5021 FREE_RB_BASED_TREE(device_cache, free_device_record);
5023 int insert_block_group_record(struct block_group_tree *tree,
5024 struct block_group_record *bg_rec)
5028 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5032 list_add_tail(&bg_rec->list, &tree->block_groups);
5036 static void free_block_group_record(struct cache_extent *cache)
5038 struct block_group_record *rec;
5040 rec = container_of(cache, struct block_group_record, cache);
5041 list_del_init(&rec->list);
5045 void free_block_group_tree(struct block_group_tree *tree)
5047 cache_tree_free_extents(&tree->tree, free_block_group_record);
5050 int insert_device_extent_record(struct device_extent_tree *tree,
5051 struct device_extent_record *de_rec)
5056 * Device extent is a bit different from the other extents, because
5057 * the extents which belong to the different devices may have the
5058 * same start and size, so we need use the special extent cache
5059 * search/insert functions.
5061 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5065 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5066 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5070 static void free_device_extent_record(struct cache_extent *cache)
5072 struct device_extent_record *rec;
5074 rec = container_of(cache, struct device_extent_record, cache);
5075 if (!list_empty(&rec->chunk_list))
5076 list_del_init(&rec->chunk_list);
5077 if (!list_empty(&rec->device_list))
5078 list_del_init(&rec->device_list);
5082 void free_device_extent_tree(struct device_extent_tree *tree)
5084 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5087 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5088 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5089 struct extent_buffer *leaf, int slot)
5091 struct btrfs_extent_ref_v0 *ref0;
5092 struct btrfs_key key;
5095 btrfs_item_key_to_cpu(leaf, &key, slot);
5096 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5097 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5098 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5101 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5102 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5108 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5109 struct btrfs_key *key,
5112 struct btrfs_chunk *ptr;
5113 struct chunk_record *rec;
5116 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5117 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5119 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5121 fprintf(stderr, "memory allocation failed\n");
5125 INIT_LIST_HEAD(&rec->list);
5126 INIT_LIST_HEAD(&rec->dextents);
5129 rec->cache.start = key->offset;
5130 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5132 rec->generation = btrfs_header_generation(leaf);
5134 rec->objectid = key->objectid;
5135 rec->type = key->type;
5136 rec->offset = key->offset;
5138 rec->length = rec->cache.size;
5139 rec->owner = btrfs_chunk_owner(leaf, ptr);
5140 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5141 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5142 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5143 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5144 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5145 rec->num_stripes = num_stripes;
5146 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5148 for (i = 0; i < rec->num_stripes; ++i) {
5149 rec->stripes[i].devid =
5150 btrfs_stripe_devid_nr(leaf, ptr, i);
5151 rec->stripes[i].offset =
5152 btrfs_stripe_offset_nr(leaf, ptr, i);
5153 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5154 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5161 static int process_chunk_item(struct cache_tree *chunk_cache,
5162 struct btrfs_key *key, struct extent_buffer *eb,
5165 struct chunk_record *rec;
5166 struct btrfs_chunk *chunk;
5169 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5171 * Do extra check for this chunk item,
5173 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5174 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5175 * and owner<->key_type check.
5177 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5180 error("chunk(%llu, %llu) is not valid, ignore it",
5181 key->offset, btrfs_chunk_length(eb, chunk));
5184 rec = btrfs_new_chunk_record(eb, key, slot);
5185 ret = insert_cache_extent(chunk_cache, &rec->cache);
5187 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5188 rec->offset, rec->length);
5195 static int process_device_item(struct rb_root *dev_cache,
5196 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5198 struct btrfs_dev_item *ptr;
5199 struct device_record *rec;
5202 ptr = btrfs_item_ptr(eb,
5203 slot, struct btrfs_dev_item);
5205 rec = malloc(sizeof(*rec));
5207 fprintf(stderr, "memory allocation failed\n");
5211 rec->devid = key->offset;
5212 rec->generation = btrfs_header_generation(eb);
5214 rec->objectid = key->objectid;
5215 rec->type = key->type;
5216 rec->offset = key->offset;
5218 rec->devid = btrfs_device_id(eb, ptr);
5219 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5220 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5222 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5224 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5231 struct block_group_record *
5232 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5235 struct btrfs_block_group_item *ptr;
5236 struct block_group_record *rec;
5238 rec = calloc(1, sizeof(*rec));
5240 fprintf(stderr, "memory allocation failed\n");
5244 rec->cache.start = key->objectid;
5245 rec->cache.size = key->offset;
5247 rec->generation = btrfs_header_generation(leaf);
5249 rec->objectid = key->objectid;
5250 rec->type = key->type;
5251 rec->offset = key->offset;
5253 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5254 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5256 INIT_LIST_HEAD(&rec->list);
5261 static int process_block_group_item(struct block_group_tree *block_group_cache,
5262 struct btrfs_key *key,
5263 struct extent_buffer *eb, int slot)
5265 struct block_group_record *rec;
5268 rec = btrfs_new_block_group_record(eb, key, slot);
5269 ret = insert_block_group_record(block_group_cache, rec);
5271 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5272 rec->objectid, rec->offset);
5279 struct device_extent_record *
5280 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5281 struct btrfs_key *key, int slot)
5283 struct device_extent_record *rec;
5284 struct btrfs_dev_extent *ptr;
5286 rec = calloc(1, sizeof(*rec));
5288 fprintf(stderr, "memory allocation failed\n");
5292 rec->cache.objectid = key->objectid;
5293 rec->cache.start = key->offset;
5295 rec->generation = btrfs_header_generation(leaf);
5297 rec->objectid = key->objectid;
5298 rec->type = key->type;
5299 rec->offset = key->offset;
5301 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5302 rec->chunk_objecteid =
5303 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5305 btrfs_dev_extent_chunk_offset(leaf, ptr);
5306 rec->length = btrfs_dev_extent_length(leaf, ptr);
5307 rec->cache.size = rec->length;
5309 INIT_LIST_HEAD(&rec->chunk_list);
5310 INIT_LIST_HEAD(&rec->device_list);
5316 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5317 struct btrfs_key *key, struct extent_buffer *eb,
5320 struct device_extent_record *rec;
5323 rec = btrfs_new_device_extent_record(eb, key, slot);
5324 ret = insert_device_extent_record(dev_extent_cache, rec);
5327 "Device extent[%llu, %llu, %llu] existed.\n",
5328 rec->objectid, rec->offset, rec->length);
5335 static int process_extent_item(struct btrfs_root *root,
5336 struct cache_tree *extent_cache,
5337 struct extent_buffer *eb, int slot)
5339 struct btrfs_extent_item *ei;
5340 struct btrfs_extent_inline_ref *iref;
5341 struct btrfs_extent_data_ref *dref;
5342 struct btrfs_shared_data_ref *sref;
5343 struct btrfs_key key;
5344 struct extent_record tmpl;
5349 u32 item_size = btrfs_item_size_nr(eb, slot);
5355 btrfs_item_key_to_cpu(eb, &key, slot);
5357 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5359 num_bytes = root->nodesize;
5361 num_bytes = key.offset;
5364 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5365 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5366 key.objectid, root->sectorsize);
5369 if (item_size < sizeof(*ei)) {
5370 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5371 struct btrfs_extent_item_v0 *ei0;
5372 BUG_ON(item_size != sizeof(*ei0));
5373 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5374 refs = btrfs_extent_refs_v0(eb, ei0);
5378 memset(&tmpl, 0, sizeof(tmpl));
5379 tmpl.start = key.objectid;
5380 tmpl.nr = num_bytes;
5381 tmpl.extent_item_refs = refs;
5382 tmpl.metadata = metadata;
5384 tmpl.max_size = num_bytes;
5386 return add_extent_rec(extent_cache, &tmpl);
5389 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5390 refs = btrfs_extent_refs(eb, ei);
5391 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5395 if (metadata && num_bytes != root->nodesize) {
5396 error("ignore invalid metadata extent, length %llu does not equal to %u",
5397 num_bytes, root->nodesize);
5400 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5401 error("ignore invalid data extent, length %llu is not aligned to %u",
5402 num_bytes, root->sectorsize);
5406 memset(&tmpl, 0, sizeof(tmpl));
5407 tmpl.start = key.objectid;
5408 tmpl.nr = num_bytes;
5409 tmpl.extent_item_refs = refs;
5410 tmpl.metadata = metadata;
5412 tmpl.max_size = num_bytes;
5413 add_extent_rec(extent_cache, &tmpl);
5415 ptr = (unsigned long)(ei + 1);
5416 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5417 key.type == BTRFS_EXTENT_ITEM_KEY)
5418 ptr += sizeof(struct btrfs_tree_block_info);
5420 end = (unsigned long)ei + item_size;
5422 iref = (struct btrfs_extent_inline_ref *)ptr;
5423 type = btrfs_extent_inline_ref_type(eb, iref);
5424 offset = btrfs_extent_inline_ref_offset(eb, iref);
5426 case BTRFS_TREE_BLOCK_REF_KEY:
5427 ret = add_tree_backref(extent_cache, key.objectid,
5430 error("add_tree_backref failed: %s",
5433 case BTRFS_SHARED_BLOCK_REF_KEY:
5434 ret = add_tree_backref(extent_cache, key.objectid,
5437 error("add_tree_backref failed: %s",
5440 case BTRFS_EXTENT_DATA_REF_KEY:
5441 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5442 add_data_backref(extent_cache, key.objectid, 0,
5443 btrfs_extent_data_ref_root(eb, dref),
5444 btrfs_extent_data_ref_objectid(eb,
5446 btrfs_extent_data_ref_offset(eb, dref),
5447 btrfs_extent_data_ref_count(eb, dref),
5450 case BTRFS_SHARED_DATA_REF_KEY:
5451 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5452 add_data_backref(extent_cache, key.objectid, offset,
5454 btrfs_shared_data_ref_count(eb, sref),
5458 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5459 key.objectid, key.type, num_bytes);
5462 ptr += btrfs_extent_inline_ref_size(type);
5469 static int check_cache_range(struct btrfs_root *root,
5470 struct btrfs_block_group_cache *cache,
5471 u64 offset, u64 bytes)
5473 struct btrfs_free_space *entry;
5479 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5480 bytenr = btrfs_sb_offset(i);
5481 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5482 cache->key.objectid, bytenr, 0,
5483 &logical, &nr, &stripe_len);
5488 if (logical[nr] + stripe_len <= offset)
5490 if (offset + bytes <= logical[nr])
5492 if (logical[nr] == offset) {
5493 if (stripe_len >= bytes) {
5497 bytes -= stripe_len;
5498 offset += stripe_len;
5499 } else if (logical[nr] < offset) {
5500 if (logical[nr] + stripe_len >=
5505 bytes = (offset + bytes) -
5506 (logical[nr] + stripe_len);
5507 offset = logical[nr] + stripe_len;
5510 * Could be tricky, the super may land in the
5511 * middle of the area we're checking. First
5512 * check the easiest case, it's at the end.
5514 if (logical[nr] + stripe_len >=
5516 bytes = logical[nr] - offset;
5520 /* Check the left side */
5521 ret = check_cache_range(root, cache,
5523 logical[nr] - offset);
5529 /* Now we continue with the right side */
5530 bytes = (offset + bytes) -
5531 (logical[nr] + stripe_len);
5532 offset = logical[nr] + stripe_len;
5539 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5541 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5542 offset, offset+bytes);
5546 if (entry->offset != offset) {
5547 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5552 if (entry->bytes != bytes) {
5553 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5554 bytes, entry->bytes, offset);
5558 unlink_free_space(cache->free_space_ctl, entry);
5563 static int verify_space_cache(struct btrfs_root *root,
5564 struct btrfs_block_group_cache *cache)
5566 struct btrfs_path *path;
5567 struct extent_buffer *leaf;
5568 struct btrfs_key key;
5572 path = btrfs_alloc_path();
5576 root = root->fs_info->extent_root;
5578 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5580 key.objectid = last;
5582 key.type = BTRFS_EXTENT_ITEM_KEY;
5584 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5589 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5590 ret = btrfs_next_leaf(root, path);
5598 leaf = path->nodes[0];
5599 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5600 if (key.objectid >= cache->key.offset + cache->key.objectid)
5602 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5603 key.type != BTRFS_METADATA_ITEM_KEY) {
5608 if (last == key.objectid) {
5609 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5610 last = key.objectid + key.offset;
5612 last = key.objectid + root->nodesize;
5617 ret = check_cache_range(root, cache, last,
5618 key.objectid - last);
5621 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5622 last = key.objectid + key.offset;
5624 last = key.objectid + root->nodesize;
5628 if (last < cache->key.objectid + cache->key.offset)
5629 ret = check_cache_range(root, cache, last,
5630 cache->key.objectid +
5631 cache->key.offset - last);
5634 btrfs_free_path(path);
5637 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5638 fprintf(stderr, "There are still entries left in the space "
5646 static int check_space_cache(struct btrfs_root *root)
5648 struct btrfs_block_group_cache *cache;
5649 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5653 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5654 btrfs_super_generation(root->fs_info->super_copy) !=
5655 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5656 printf("cache and super generation don't match, space cache "
5657 "will be invalidated\n");
5661 if (ctx.progress_enabled) {
5662 ctx.tp = TASK_FREE_SPACE;
5663 task_start(ctx.info);
5667 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5671 start = cache->key.objectid + cache->key.offset;
5672 if (!cache->free_space_ctl) {
5673 if (btrfs_init_free_space_ctl(cache,
5674 root->sectorsize)) {
5679 btrfs_remove_free_space_cache(cache);
5682 if (btrfs_fs_compat_ro(root->fs_info,
5683 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5684 ret = exclude_super_stripes(root, cache);
5686 fprintf(stderr, "could not exclude super stripes: %s\n",
5691 ret = load_free_space_tree(root->fs_info, cache);
5692 free_excluded_extents(root, cache);
5694 fprintf(stderr, "could not load free space tree: %s\n",
5701 ret = load_free_space_cache(root->fs_info, cache);
5706 ret = verify_space_cache(root, cache);
5708 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5709 cache->key.objectid);
5714 task_stop(ctx.info);
5716 return error ? -EINVAL : 0;
5719 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5720 u64 num_bytes, unsigned long leaf_offset,
5721 struct extent_buffer *eb) {
5724 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5726 unsigned long csum_offset;
5730 u64 data_checked = 0;
5736 if (num_bytes % root->sectorsize)
5739 data = malloc(num_bytes);
5743 while (offset < num_bytes) {
5746 read_len = num_bytes - offset;
5747 /* read as much space once a time */
5748 ret = read_extent_data(root, data + offset,
5749 bytenr + offset, &read_len, mirror);
5753 /* verify every 4k data's checksum */
5754 while (data_checked < read_len) {
5756 tmp = offset + data_checked;
5758 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5759 csum, root->sectorsize);
5760 btrfs_csum_final(csum, (u8 *)&csum);
5762 csum_offset = leaf_offset +
5763 tmp / root->sectorsize * csum_size;
5764 read_extent_buffer(eb, (char *)&csum_expected,
5765 csum_offset, csum_size);
5766 /* try another mirror */
5767 if (csum != csum_expected) {
5768 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5769 mirror, bytenr + tmp,
5770 csum, csum_expected);
5771 num_copies = btrfs_num_copies(
5772 &root->fs_info->mapping_tree,
5774 if (mirror < num_copies - 1) {
5779 data_checked += root->sectorsize;
5788 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5791 struct btrfs_path *path;
5792 struct extent_buffer *leaf;
5793 struct btrfs_key key;
5796 path = btrfs_alloc_path();
5798 fprintf(stderr, "Error allocating path\n");
5802 key.objectid = bytenr;
5803 key.type = BTRFS_EXTENT_ITEM_KEY;
5804 key.offset = (u64)-1;
5807 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5810 fprintf(stderr, "Error looking up extent record %d\n", ret);
5811 btrfs_free_path(path);
5814 if (path->slots[0] > 0) {
5817 ret = btrfs_prev_leaf(root, path);
5820 } else if (ret > 0) {
5827 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5830 * Block group items come before extent items if they have the same
5831 * bytenr, so walk back one more just in case. Dear future traveller,
5832 * first congrats on mastering time travel. Now if it's not too much
5833 * trouble could you go back to 2006 and tell Chris to make the
5834 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5835 * EXTENT_ITEM_KEY please?
5837 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5838 if (path->slots[0] > 0) {
5841 ret = btrfs_prev_leaf(root, path);
5844 } else if (ret > 0) {
5849 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5853 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5854 ret = btrfs_next_leaf(root, path);
5856 fprintf(stderr, "Error going to next leaf "
5858 btrfs_free_path(path);
5864 leaf = path->nodes[0];
5865 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5866 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5870 if (key.objectid + key.offset < bytenr) {
5874 if (key.objectid > bytenr + num_bytes)
5877 if (key.objectid == bytenr) {
5878 if (key.offset >= num_bytes) {
5882 num_bytes -= key.offset;
5883 bytenr += key.offset;
5884 } else if (key.objectid < bytenr) {
5885 if (key.objectid + key.offset >= bytenr + num_bytes) {
5889 num_bytes = (bytenr + num_bytes) -
5890 (key.objectid + key.offset);
5891 bytenr = key.objectid + key.offset;
5893 if (key.objectid + key.offset < bytenr + num_bytes) {
5894 u64 new_start = key.objectid + key.offset;
5895 u64 new_bytes = bytenr + num_bytes - new_start;
5898 * Weird case, the extent is in the middle of
5899 * our range, we'll have to search one side
5900 * and then the other. Not sure if this happens
5901 * in real life, but no harm in coding it up
5902 * anyway just in case.
5904 btrfs_release_path(path);
5905 ret = check_extent_exists(root, new_start,
5908 fprintf(stderr, "Right section didn't "
5912 num_bytes = key.objectid - bytenr;
5915 num_bytes = key.objectid - bytenr;
5922 if (num_bytes && !ret) {
5923 fprintf(stderr, "There are no extents for csum range "
5924 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5928 btrfs_free_path(path);
5932 static int check_csums(struct btrfs_root *root)
5934 struct btrfs_path *path;
5935 struct extent_buffer *leaf;
5936 struct btrfs_key key;
5937 u64 offset = 0, num_bytes = 0;
5938 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5942 unsigned long leaf_offset;
5944 root = root->fs_info->csum_root;
5945 if (!extent_buffer_uptodate(root->node)) {
5946 fprintf(stderr, "No valid csum tree found\n");
5950 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5951 key.type = BTRFS_EXTENT_CSUM_KEY;
5954 path = btrfs_alloc_path();
5958 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5960 fprintf(stderr, "Error searching csum tree %d\n", ret);
5961 btrfs_free_path(path);
5965 if (ret > 0 && path->slots[0])
5970 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5971 ret = btrfs_next_leaf(root, path);
5973 fprintf(stderr, "Error going to next leaf "
5980 leaf = path->nodes[0];
5982 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5983 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5988 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5989 csum_size) * root->sectorsize;
5990 if (!check_data_csum)
5991 goto skip_csum_check;
5992 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5993 ret = check_extent_csums(root, key.offset, data_len,
5999 offset = key.offset;
6000 } else if (key.offset != offset + num_bytes) {
6001 ret = check_extent_exists(root, offset, num_bytes);
6003 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6004 "there is no extent record\n",
6005 offset, offset+num_bytes);
6008 offset = key.offset;
6011 num_bytes += data_len;
6015 btrfs_free_path(path);
6019 static int is_dropped_key(struct btrfs_key *key,
6020 struct btrfs_key *drop_key) {
6021 if (key->objectid < drop_key->objectid)
6023 else if (key->objectid == drop_key->objectid) {
6024 if (key->type < drop_key->type)
6026 else if (key->type == drop_key->type) {
6027 if (key->offset < drop_key->offset)
6035 * Here are the rules for FULL_BACKREF.
6037 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6038 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6040 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6041 * if it happened after the relocation occurred since we'll have dropped the
6042 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6043 * have no real way to know for sure.
6045 * We process the blocks one root at a time, and we start from the lowest root
6046 * objectid and go to the highest. So we can just lookup the owner backref for
6047 * the record and if we don't find it then we know it doesn't exist and we have
6050 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6051 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6052 * be set or not and then we can check later once we've gathered all the refs.
6054 static int calc_extent_flag(struct btrfs_root *root,
6055 struct cache_tree *extent_cache,
6056 struct extent_buffer *buf,
6057 struct root_item_record *ri,
6060 struct extent_record *rec;
6061 struct cache_extent *cache;
6062 struct tree_backref *tback;
6065 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6066 /* we have added this extent before */
6070 rec = container_of(cache, struct extent_record, cache);
6073 * Except file/reloc tree, we can not have
6076 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6081 if (buf->start == ri->bytenr)
6084 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6087 owner = btrfs_header_owner(buf);
6088 if (owner == ri->objectid)
6091 tback = find_tree_backref(rec, 0, owner);
6096 if (rec->flag_block_full_backref != FLAG_UNSET &&
6097 rec->flag_block_full_backref != 0)
6098 rec->bad_full_backref = 1;
6101 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6102 if (rec->flag_block_full_backref != FLAG_UNSET &&
6103 rec->flag_block_full_backref != 1)
6104 rec->bad_full_backref = 1;
6108 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6110 fprintf(stderr, "Invalid key type(");
6111 print_key_type(stderr, 0, key_type);
6112 fprintf(stderr, ") found in root(");
6113 print_objectid(stderr, rootid, 0);
6114 fprintf(stderr, ")\n");
6118 * Check if the key is valid with its extent buffer.
6120 * This is a early check in case invalid key exists in a extent buffer
6121 * This is not comprehensive yet, but should prevent wrong key/item passed
6124 static int check_type_with_root(u64 rootid, u8 key_type)
6127 /* Only valid in chunk tree */
6128 case BTRFS_DEV_ITEM_KEY:
6129 case BTRFS_CHUNK_ITEM_KEY:
6130 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6133 /* valid in csum and log tree */
6134 case BTRFS_CSUM_TREE_OBJECTID:
6135 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6139 case BTRFS_EXTENT_ITEM_KEY:
6140 case BTRFS_METADATA_ITEM_KEY:
6141 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6142 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6145 case BTRFS_ROOT_ITEM_KEY:
6146 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6149 case BTRFS_DEV_EXTENT_KEY:
6150 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6156 report_mismatch_key_root(key_type, rootid);
6160 static int run_next_block(struct btrfs_root *root,
6161 struct block_info *bits,
6164 struct cache_tree *pending,
6165 struct cache_tree *seen,
6166 struct cache_tree *reada,
6167 struct cache_tree *nodes,
6168 struct cache_tree *extent_cache,
6169 struct cache_tree *chunk_cache,
6170 struct rb_root *dev_cache,
6171 struct block_group_tree *block_group_cache,
6172 struct device_extent_tree *dev_extent_cache,
6173 struct root_item_record *ri)
6175 struct extent_buffer *buf;
6176 struct extent_record *rec = NULL;
6187 struct btrfs_key key;
6188 struct cache_extent *cache;
6191 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6192 bits_nr, &reada_bits);
6197 for(i = 0; i < nritems; i++) {
6198 ret = add_cache_extent(reada, bits[i].start,
6203 /* fixme, get the parent transid */
6204 readahead_tree_block(root, bits[i].start,
6208 *last = bits[0].start;
6209 bytenr = bits[0].start;
6210 size = bits[0].size;
6212 cache = lookup_cache_extent(pending, bytenr, size);
6214 remove_cache_extent(pending, cache);
6217 cache = lookup_cache_extent(reada, bytenr, size);
6219 remove_cache_extent(reada, cache);
6222 cache = lookup_cache_extent(nodes, bytenr, size);
6224 remove_cache_extent(nodes, cache);
6227 cache = lookup_cache_extent(extent_cache, bytenr, size);
6229 rec = container_of(cache, struct extent_record, cache);
6230 gen = rec->parent_generation;
6233 /* fixme, get the real parent transid */
6234 buf = read_tree_block(root, bytenr, size, gen);
6235 if (!extent_buffer_uptodate(buf)) {
6236 record_bad_block_io(root->fs_info,
6237 extent_cache, bytenr, size);
6241 nritems = btrfs_header_nritems(buf);
6244 if (!init_extent_tree) {
6245 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6246 btrfs_header_level(buf), 1, NULL,
6249 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6251 fprintf(stderr, "Couldn't calc extent flags\n");
6252 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6257 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6259 fprintf(stderr, "Couldn't calc extent flags\n");
6260 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6264 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6266 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6267 ri->objectid == btrfs_header_owner(buf)) {
6269 * Ok we got to this block from it's original owner and
6270 * we have FULL_BACKREF set. Relocation can leave
6271 * converted blocks over so this is altogether possible,
6272 * however it's not possible if the generation > the
6273 * last snapshot, so check for this case.
6275 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6276 btrfs_header_generation(buf) > ri->last_snapshot) {
6277 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6278 rec->bad_full_backref = 1;
6283 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6284 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6285 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6286 rec->bad_full_backref = 1;
6290 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6291 rec->flag_block_full_backref = 1;
6295 rec->flag_block_full_backref = 0;
6297 owner = btrfs_header_owner(buf);
6300 ret = check_block(root, extent_cache, buf, flags);
6304 if (btrfs_is_leaf(buf)) {
6305 btree_space_waste += btrfs_leaf_free_space(root, buf);
6306 for (i = 0; i < nritems; i++) {
6307 struct btrfs_file_extent_item *fi;
6308 btrfs_item_key_to_cpu(buf, &key, i);
6310 * Check key type against the leaf owner.
6311 * Could filter quite a lot of early error if
6314 if (check_type_with_root(btrfs_header_owner(buf),
6316 fprintf(stderr, "ignoring invalid key\n");
6319 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6320 process_extent_item(root, extent_cache, buf,
6324 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6325 process_extent_item(root, extent_cache, buf,
6329 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6331 btrfs_item_size_nr(buf, i);
6334 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6335 process_chunk_item(chunk_cache, &key, buf, i);
6338 if (key.type == BTRFS_DEV_ITEM_KEY) {
6339 process_device_item(dev_cache, &key, buf, i);
6342 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6343 process_block_group_item(block_group_cache,
6347 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6348 process_device_extent_item(dev_extent_cache,
6353 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6354 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6355 process_extent_ref_v0(extent_cache, buf, i);
6362 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6363 ret = add_tree_backref(extent_cache,
6364 key.objectid, 0, key.offset, 0);
6366 error("add_tree_backref failed: %s",
6370 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6371 ret = add_tree_backref(extent_cache,
6372 key.objectid, key.offset, 0, 0);
6374 error("add_tree_backref failed: %s",
6378 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6379 struct btrfs_extent_data_ref *ref;
6380 ref = btrfs_item_ptr(buf, i,
6381 struct btrfs_extent_data_ref);
6382 add_data_backref(extent_cache,
6384 btrfs_extent_data_ref_root(buf, ref),
6385 btrfs_extent_data_ref_objectid(buf,
6387 btrfs_extent_data_ref_offset(buf, ref),
6388 btrfs_extent_data_ref_count(buf, ref),
6389 0, root->sectorsize);
6392 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6393 struct btrfs_shared_data_ref *ref;
6394 ref = btrfs_item_ptr(buf, i,
6395 struct btrfs_shared_data_ref);
6396 add_data_backref(extent_cache,
6397 key.objectid, key.offset, 0, 0, 0,
6398 btrfs_shared_data_ref_count(buf, ref),
6399 0, root->sectorsize);
6402 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6403 struct bad_item *bad;
6405 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6409 bad = malloc(sizeof(struct bad_item));
6412 INIT_LIST_HEAD(&bad->list);
6413 memcpy(&bad->key, &key,
6414 sizeof(struct btrfs_key));
6415 bad->root_id = owner;
6416 list_add_tail(&bad->list, &delete_items);
6419 if (key.type != BTRFS_EXTENT_DATA_KEY)
6421 fi = btrfs_item_ptr(buf, i,
6422 struct btrfs_file_extent_item);
6423 if (btrfs_file_extent_type(buf, fi) ==
6424 BTRFS_FILE_EXTENT_INLINE)
6426 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6429 data_bytes_allocated +=
6430 btrfs_file_extent_disk_num_bytes(buf, fi);
6431 if (data_bytes_allocated < root->sectorsize) {
6434 data_bytes_referenced +=
6435 btrfs_file_extent_num_bytes(buf, fi);
6436 add_data_backref(extent_cache,
6437 btrfs_file_extent_disk_bytenr(buf, fi),
6438 parent, owner, key.objectid, key.offset -
6439 btrfs_file_extent_offset(buf, fi), 1, 1,
6440 btrfs_file_extent_disk_num_bytes(buf, fi));
6444 struct btrfs_key first_key;
6446 first_key.objectid = 0;
6449 btrfs_item_key_to_cpu(buf, &first_key, 0);
6450 level = btrfs_header_level(buf);
6451 for (i = 0; i < nritems; i++) {
6452 struct extent_record tmpl;
6454 ptr = btrfs_node_blockptr(buf, i);
6455 size = root->nodesize;
6456 btrfs_node_key_to_cpu(buf, &key, i);
6458 if ((level == ri->drop_level)
6459 && is_dropped_key(&key, &ri->drop_key)) {
6464 memset(&tmpl, 0, sizeof(tmpl));
6465 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6466 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6471 tmpl.max_size = size;
6472 ret = add_extent_rec(extent_cache, &tmpl);
6476 ret = add_tree_backref(extent_cache, ptr, parent,
6479 error("add_tree_backref failed: %s",
6485 add_pending(nodes, seen, ptr, size);
6487 add_pending(pending, seen, ptr, size);
6490 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6491 nritems) * sizeof(struct btrfs_key_ptr);
6493 total_btree_bytes += buf->len;
6494 if (fs_root_objectid(btrfs_header_owner(buf)))
6495 total_fs_tree_bytes += buf->len;
6496 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6497 total_extent_tree_bytes += buf->len;
6498 if (!found_old_backref &&
6499 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6500 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6501 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6502 found_old_backref = 1;
6504 free_extent_buffer(buf);
6508 static int add_root_to_pending(struct extent_buffer *buf,
6509 struct cache_tree *extent_cache,
6510 struct cache_tree *pending,
6511 struct cache_tree *seen,
6512 struct cache_tree *nodes,
6515 struct extent_record tmpl;
6518 if (btrfs_header_level(buf) > 0)
6519 add_pending(nodes, seen, buf->start, buf->len);
6521 add_pending(pending, seen, buf->start, buf->len);
6523 memset(&tmpl, 0, sizeof(tmpl));
6524 tmpl.start = buf->start;
6529 tmpl.max_size = buf->len;
6530 add_extent_rec(extent_cache, &tmpl);
6532 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6533 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6534 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6537 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6542 /* as we fix the tree, we might be deleting blocks that
6543 * we're tracking for repair. This hook makes sure we
6544 * remove any backrefs for blocks as we are fixing them.
6546 static int free_extent_hook(struct btrfs_trans_handle *trans,
6547 struct btrfs_root *root,
6548 u64 bytenr, u64 num_bytes, u64 parent,
6549 u64 root_objectid, u64 owner, u64 offset,
6552 struct extent_record *rec;
6553 struct cache_extent *cache;
6555 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6557 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6558 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6562 rec = container_of(cache, struct extent_record, cache);
6564 struct data_backref *back;
6565 back = find_data_backref(rec, parent, root_objectid, owner,
6566 offset, 1, bytenr, num_bytes);
6569 if (back->node.found_ref) {
6570 back->found_ref -= refs_to_drop;
6572 rec->refs -= refs_to_drop;
6574 if (back->node.found_extent_tree) {
6575 back->num_refs -= refs_to_drop;
6576 if (rec->extent_item_refs)
6577 rec->extent_item_refs -= refs_to_drop;
6579 if (back->found_ref == 0)
6580 back->node.found_ref = 0;
6581 if (back->num_refs == 0)
6582 back->node.found_extent_tree = 0;
6584 if (!back->node.found_extent_tree && back->node.found_ref) {
6585 list_del(&back->node.list);
6589 struct tree_backref *back;
6590 back = find_tree_backref(rec, parent, root_objectid);
6593 if (back->node.found_ref) {
6596 back->node.found_ref = 0;
6598 if (back->node.found_extent_tree) {
6599 if (rec->extent_item_refs)
6600 rec->extent_item_refs--;
6601 back->node.found_extent_tree = 0;
6603 if (!back->node.found_extent_tree && back->node.found_ref) {
6604 list_del(&back->node.list);
6608 maybe_free_extent_rec(extent_cache, rec);
6613 static int delete_extent_records(struct btrfs_trans_handle *trans,
6614 struct btrfs_root *root,
6615 struct btrfs_path *path,
6616 u64 bytenr, u64 new_len)
6618 struct btrfs_key key;
6619 struct btrfs_key found_key;
6620 struct extent_buffer *leaf;
6625 key.objectid = bytenr;
6627 key.offset = (u64)-1;
6630 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6637 if (path->slots[0] == 0)
6643 leaf = path->nodes[0];
6644 slot = path->slots[0];
6646 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6647 if (found_key.objectid != bytenr)
6650 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6651 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6652 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6653 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6654 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6655 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6656 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6657 btrfs_release_path(path);
6658 if (found_key.type == 0) {
6659 if (found_key.offset == 0)
6661 key.offset = found_key.offset - 1;
6662 key.type = found_key.type;
6664 key.type = found_key.type - 1;
6665 key.offset = (u64)-1;
6669 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6670 found_key.objectid, found_key.type, found_key.offset);
6672 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6675 btrfs_release_path(path);
6677 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6678 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6679 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6680 found_key.offset : root->nodesize;
6682 ret = btrfs_update_block_group(trans, root, bytenr,
6689 btrfs_release_path(path);
6694 * for a single backref, this will allocate a new extent
6695 * and add the backref to it.
6697 static int record_extent(struct btrfs_trans_handle *trans,
6698 struct btrfs_fs_info *info,
6699 struct btrfs_path *path,
6700 struct extent_record *rec,
6701 struct extent_backref *back,
6702 int allocated, u64 flags)
6705 struct btrfs_root *extent_root = info->extent_root;
6706 struct extent_buffer *leaf;
6707 struct btrfs_key ins_key;
6708 struct btrfs_extent_item *ei;
6709 struct tree_backref *tback;
6710 struct data_backref *dback;
6711 struct btrfs_tree_block_info *bi;
6714 rec->max_size = max_t(u64, rec->max_size,
6715 info->extent_root->nodesize);
6718 u32 item_size = sizeof(*ei);
6721 item_size += sizeof(*bi);
6723 ins_key.objectid = rec->start;
6724 ins_key.offset = rec->max_size;
6725 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6727 ret = btrfs_insert_empty_item(trans, extent_root, path,
6728 &ins_key, item_size);
6732 leaf = path->nodes[0];
6733 ei = btrfs_item_ptr(leaf, path->slots[0],
6734 struct btrfs_extent_item);
6736 btrfs_set_extent_refs(leaf, ei, 0);
6737 btrfs_set_extent_generation(leaf, ei, rec->generation);
6739 if (back->is_data) {
6740 btrfs_set_extent_flags(leaf, ei,
6741 BTRFS_EXTENT_FLAG_DATA);
6743 struct btrfs_disk_key copy_key;;
6745 tback = to_tree_backref(back);
6746 bi = (struct btrfs_tree_block_info *)(ei + 1);
6747 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6750 btrfs_set_disk_key_objectid(©_key,
6751 rec->info_objectid);
6752 btrfs_set_disk_key_type(©_key, 0);
6753 btrfs_set_disk_key_offset(©_key, 0);
6755 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6756 btrfs_set_tree_block_key(leaf, bi, ©_key);
6758 btrfs_set_extent_flags(leaf, ei,
6759 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6762 btrfs_mark_buffer_dirty(leaf);
6763 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6764 rec->max_size, 1, 0);
6767 btrfs_release_path(path);
6770 if (back->is_data) {
6774 dback = to_data_backref(back);
6775 if (back->full_backref)
6776 parent = dback->parent;
6780 for (i = 0; i < dback->found_ref; i++) {
6781 /* if parent != 0, we're doing a full backref
6782 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6783 * just makes the backref allocator create a data
6786 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6787 rec->start, rec->max_size,
6791 BTRFS_FIRST_FREE_OBJECTID :
6797 fprintf(stderr, "adding new data backref"
6798 " on %llu %s %llu owner %llu"
6799 " offset %llu found %d\n",
6800 (unsigned long long)rec->start,
6801 back->full_backref ?
6803 back->full_backref ?
6804 (unsigned long long)parent :
6805 (unsigned long long)dback->root,
6806 (unsigned long long)dback->owner,
6807 (unsigned long long)dback->offset,
6812 tback = to_tree_backref(back);
6813 if (back->full_backref)
6814 parent = tback->parent;
6818 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6819 rec->start, rec->max_size,
6820 parent, tback->root, 0, 0);
6821 fprintf(stderr, "adding new tree backref on "
6822 "start %llu len %llu parent %llu root %llu\n",
6823 rec->start, rec->max_size, parent, tback->root);
6826 btrfs_release_path(path);
6830 static struct extent_entry *find_entry(struct list_head *entries,
6831 u64 bytenr, u64 bytes)
6833 struct extent_entry *entry = NULL;
6835 list_for_each_entry(entry, entries, list) {
6836 if (entry->bytenr == bytenr && entry->bytes == bytes)
6843 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6845 struct extent_entry *entry, *best = NULL, *prev = NULL;
6847 list_for_each_entry(entry, entries, list) {
6854 * If there are as many broken entries as entries then we know
6855 * not to trust this particular entry.
6857 if (entry->broken == entry->count)
6861 * If our current entry == best then we can't be sure our best
6862 * is really the best, so we need to keep searching.
6864 if (best && best->count == entry->count) {
6870 /* Prev == entry, not good enough, have to keep searching */
6871 if (!prev->broken && prev->count == entry->count)
6875 best = (prev->count > entry->count) ? prev : entry;
6876 else if (best->count < entry->count)
6884 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6885 struct data_backref *dback, struct extent_entry *entry)
6887 struct btrfs_trans_handle *trans;
6888 struct btrfs_root *root;
6889 struct btrfs_file_extent_item *fi;
6890 struct extent_buffer *leaf;
6891 struct btrfs_key key;
6895 key.objectid = dback->root;
6896 key.type = BTRFS_ROOT_ITEM_KEY;
6897 key.offset = (u64)-1;
6898 root = btrfs_read_fs_root(info, &key);
6900 fprintf(stderr, "Couldn't find root for our ref\n");
6905 * The backref points to the original offset of the extent if it was
6906 * split, so we need to search down to the offset we have and then walk
6907 * forward until we find the backref we're looking for.
6909 key.objectid = dback->owner;
6910 key.type = BTRFS_EXTENT_DATA_KEY;
6911 key.offset = dback->offset;
6912 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6914 fprintf(stderr, "Error looking up ref %d\n", ret);
6919 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6920 ret = btrfs_next_leaf(root, path);
6922 fprintf(stderr, "Couldn't find our ref, next\n");
6926 leaf = path->nodes[0];
6927 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6928 if (key.objectid != dback->owner ||
6929 key.type != BTRFS_EXTENT_DATA_KEY) {
6930 fprintf(stderr, "Couldn't find our ref, search\n");
6933 fi = btrfs_item_ptr(leaf, path->slots[0],
6934 struct btrfs_file_extent_item);
6935 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6936 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6938 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6943 btrfs_release_path(path);
6945 trans = btrfs_start_transaction(root, 1);
6947 return PTR_ERR(trans);
6950 * Ok we have the key of the file extent we want to fix, now we can cow
6951 * down to the thing and fix it.
6953 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6955 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6956 key.objectid, key.type, key.offset, ret);
6960 fprintf(stderr, "Well that's odd, we just found this key "
6961 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6966 leaf = path->nodes[0];
6967 fi = btrfs_item_ptr(leaf, path->slots[0],
6968 struct btrfs_file_extent_item);
6970 if (btrfs_file_extent_compression(leaf, fi) &&
6971 dback->disk_bytenr != entry->bytenr) {
6972 fprintf(stderr, "Ref doesn't match the record start and is "
6973 "compressed, please take a btrfs-image of this file "
6974 "system and send it to a btrfs developer so they can "
6975 "complete this functionality for bytenr %Lu\n",
6976 dback->disk_bytenr);
6981 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6982 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6983 } else if (dback->disk_bytenr > entry->bytenr) {
6984 u64 off_diff, offset;
6986 off_diff = dback->disk_bytenr - entry->bytenr;
6987 offset = btrfs_file_extent_offset(leaf, fi);
6988 if (dback->disk_bytenr + offset +
6989 btrfs_file_extent_num_bytes(leaf, fi) >
6990 entry->bytenr + entry->bytes) {
6991 fprintf(stderr, "Ref is past the entry end, please "
6992 "take a btrfs-image of this file system and "
6993 "send it to a btrfs developer, ref %Lu\n",
6994 dback->disk_bytenr);
6999 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7000 btrfs_set_file_extent_offset(leaf, fi, offset);
7001 } else if (dback->disk_bytenr < entry->bytenr) {
7004 offset = btrfs_file_extent_offset(leaf, fi);
7005 if (dback->disk_bytenr + offset < entry->bytenr) {
7006 fprintf(stderr, "Ref is before the entry start, please"
7007 " take a btrfs-image of this file system and "
7008 "send it to a btrfs developer, ref %Lu\n",
7009 dback->disk_bytenr);
7014 offset += dback->disk_bytenr;
7015 offset -= entry->bytenr;
7016 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7017 btrfs_set_file_extent_offset(leaf, fi, offset);
7020 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7023 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7024 * only do this if we aren't using compression, otherwise it's a
7027 if (!btrfs_file_extent_compression(leaf, fi))
7028 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7030 printf("ram bytes may be wrong?\n");
7031 btrfs_mark_buffer_dirty(leaf);
7033 err = btrfs_commit_transaction(trans, root);
7034 btrfs_release_path(path);
7035 return ret ? ret : err;
7038 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7039 struct extent_record *rec)
7041 struct extent_backref *back;
7042 struct data_backref *dback;
7043 struct extent_entry *entry, *best = NULL;
7046 int broken_entries = 0;
7051 * Metadata is easy and the backrefs should always agree on bytenr and
7052 * size, if not we've got bigger issues.
7057 list_for_each_entry(back, &rec->backrefs, list) {
7058 if (back->full_backref || !back->is_data)
7061 dback = to_data_backref(back);
7064 * We only pay attention to backrefs that we found a real
7067 if (dback->found_ref == 0)
7071 * For now we only catch when the bytes don't match, not the
7072 * bytenr. We can easily do this at the same time, but I want
7073 * to have a fs image to test on before we just add repair
7074 * functionality willy-nilly so we know we won't screw up the
7078 entry = find_entry(&entries, dback->disk_bytenr,
7081 entry = malloc(sizeof(struct extent_entry));
7086 memset(entry, 0, sizeof(*entry));
7087 entry->bytenr = dback->disk_bytenr;
7088 entry->bytes = dback->bytes;
7089 list_add_tail(&entry->list, &entries);
7094 * If we only have on entry we may think the entries agree when
7095 * in reality they don't so we have to do some extra checking.
7097 if (dback->disk_bytenr != rec->start ||
7098 dback->bytes != rec->nr || back->broken)
7109 /* Yay all the backrefs agree, carry on good sir */
7110 if (nr_entries <= 1 && !mismatch)
7113 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7114 "%Lu\n", rec->start);
7117 * First we want to see if the backrefs can agree amongst themselves who
7118 * is right, so figure out which one of the entries has the highest
7121 best = find_most_right_entry(&entries);
7124 * Ok so we may have an even split between what the backrefs think, so
7125 * this is where we use the extent ref to see what it thinks.
7128 entry = find_entry(&entries, rec->start, rec->nr);
7129 if (!entry && (!broken_entries || !rec->found_rec)) {
7130 fprintf(stderr, "Backrefs don't agree with each other "
7131 "and extent record doesn't agree with anybody,"
7132 " so we can't fix bytenr %Lu bytes %Lu\n",
7133 rec->start, rec->nr);
7136 } else if (!entry) {
7138 * Ok our backrefs were broken, we'll assume this is the
7139 * correct value and add an entry for this range.
7141 entry = malloc(sizeof(struct extent_entry));
7146 memset(entry, 0, sizeof(*entry));
7147 entry->bytenr = rec->start;
7148 entry->bytes = rec->nr;
7149 list_add_tail(&entry->list, &entries);
7153 best = find_most_right_entry(&entries);
7155 fprintf(stderr, "Backrefs and extent record evenly "
7156 "split on who is right, this is going to "
7157 "require user input to fix bytenr %Lu bytes "
7158 "%Lu\n", rec->start, rec->nr);
7165 * I don't think this can happen currently as we'll abort() if we catch
7166 * this case higher up, but in case somebody removes that we still can't
7167 * deal with it properly here yet, so just bail out of that's the case.
7169 if (best->bytenr != rec->start) {
7170 fprintf(stderr, "Extent start and backref starts don't match, "
7171 "please use btrfs-image on this file system and send "
7172 "it to a btrfs developer so they can make fsck fix "
7173 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7174 rec->start, rec->nr);
7180 * Ok great we all agreed on an extent record, let's go find the real
7181 * references and fix up the ones that don't match.
7183 list_for_each_entry(back, &rec->backrefs, list) {
7184 if (back->full_backref || !back->is_data)
7187 dback = to_data_backref(back);
7190 * Still ignoring backrefs that don't have a real ref attached
7193 if (dback->found_ref == 0)
7196 if (dback->bytes == best->bytes &&
7197 dback->disk_bytenr == best->bytenr)
7200 ret = repair_ref(info, path, dback, best);
7206 * Ok we messed with the actual refs, which means we need to drop our
7207 * entire cache and go back and rescan. I know this is a huge pain and
7208 * adds a lot of extra work, but it's the only way to be safe. Once all
7209 * the backrefs agree we may not need to do anything to the extent
7214 while (!list_empty(&entries)) {
7215 entry = list_entry(entries.next, struct extent_entry, list);
7216 list_del_init(&entry->list);
7222 static int process_duplicates(struct btrfs_root *root,
7223 struct cache_tree *extent_cache,
7224 struct extent_record *rec)
7226 struct extent_record *good, *tmp;
7227 struct cache_extent *cache;
7231 * If we found a extent record for this extent then return, or if we
7232 * have more than one duplicate we are likely going to need to delete
7235 if (rec->found_rec || rec->num_duplicates > 1)
7238 /* Shouldn't happen but just in case */
7239 BUG_ON(!rec->num_duplicates);
7242 * So this happens if we end up with a backref that doesn't match the
7243 * actual extent entry. So either the backref is bad or the extent
7244 * entry is bad. Either way we want to have the extent_record actually
7245 * reflect what we found in the extent_tree, so we need to take the
7246 * duplicate out and use that as the extent_record since the only way we
7247 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7249 remove_cache_extent(extent_cache, &rec->cache);
7251 good = to_extent_record(rec->dups.next);
7252 list_del_init(&good->list);
7253 INIT_LIST_HEAD(&good->backrefs);
7254 INIT_LIST_HEAD(&good->dups);
7255 good->cache.start = good->start;
7256 good->cache.size = good->nr;
7257 good->content_checked = 0;
7258 good->owner_ref_checked = 0;
7259 good->num_duplicates = 0;
7260 good->refs = rec->refs;
7261 list_splice_init(&rec->backrefs, &good->backrefs);
7263 cache = lookup_cache_extent(extent_cache, good->start,
7267 tmp = container_of(cache, struct extent_record, cache);
7270 * If we find another overlapping extent and it's found_rec is
7271 * set then it's a duplicate and we need to try and delete
7274 if (tmp->found_rec || tmp->num_duplicates > 0) {
7275 if (list_empty(&good->list))
7276 list_add_tail(&good->list,
7277 &duplicate_extents);
7278 good->num_duplicates += tmp->num_duplicates + 1;
7279 list_splice_init(&tmp->dups, &good->dups);
7280 list_del_init(&tmp->list);
7281 list_add_tail(&tmp->list, &good->dups);
7282 remove_cache_extent(extent_cache, &tmp->cache);
7287 * Ok we have another non extent item backed extent rec, so lets
7288 * just add it to this extent and carry on like we did above.
7290 good->refs += tmp->refs;
7291 list_splice_init(&tmp->backrefs, &good->backrefs);
7292 remove_cache_extent(extent_cache, &tmp->cache);
7295 ret = insert_cache_extent(extent_cache, &good->cache);
7298 return good->num_duplicates ? 0 : 1;
7301 static int delete_duplicate_records(struct btrfs_root *root,
7302 struct extent_record *rec)
7304 struct btrfs_trans_handle *trans;
7305 LIST_HEAD(delete_list);
7306 struct btrfs_path *path;
7307 struct extent_record *tmp, *good, *n;
7310 struct btrfs_key key;
7312 path = btrfs_alloc_path();
7319 /* Find the record that covers all of the duplicates. */
7320 list_for_each_entry(tmp, &rec->dups, list) {
7321 if (good->start < tmp->start)
7323 if (good->nr > tmp->nr)
7326 if (tmp->start + tmp->nr < good->start + good->nr) {
7327 fprintf(stderr, "Ok we have overlapping extents that "
7328 "aren't completely covered by each other, this "
7329 "is going to require more careful thought. "
7330 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7331 tmp->start, tmp->nr, good->start, good->nr);
7338 list_add_tail(&rec->list, &delete_list);
7340 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7343 list_move_tail(&tmp->list, &delete_list);
7346 root = root->fs_info->extent_root;
7347 trans = btrfs_start_transaction(root, 1);
7348 if (IS_ERR(trans)) {
7349 ret = PTR_ERR(trans);
7353 list_for_each_entry(tmp, &delete_list, list) {
7354 if (tmp->found_rec == 0)
7356 key.objectid = tmp->start;
7357 key.type = BTRFS_EXTENT_ITEM_KEY;
7358 key.offset = tmp->nr;
7360 /* Shouldn't happen but just in case */
7361 if (tmp->metadata) {
7362 fprintf(stderr, "Well this shouldn't happen, extent "
7363 "record overlaps but is metadata? "
7364 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7368 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7374 ret = btrfs_del_item(trans, root, path);
7377 btrfs_release_path(path);
7380 err = btrfs_commit_transaction(trans, root);
7384 while (!list_empty(&delete_list)) {
7385 tmp = to_extent_record(delete_list.next);
7386 list_del_init(&tmp->list);
7392 while (!list_empty(&rec->dups)) {
7393 tmp = to_extent_record(rec->dups.next);
7394 list_del_init(&tmp->list);
7398 btrfs_free_path(path);
7400 if (!ret && !nr_del)
7401 rec->num_duplicates = 0;
7403 return ret ? ret : nr_del;
7406 static int find_possible_backrefs(struct btrfs_fs_info *info,
7407 struct btrfs_path *path,
7408 struct cache_tree *extent_cache,
7409 struct extent_record *rec)
7411 struct btrfs_root *root;
7412 struct extent_backref *back;
7413 struct data_backref *dback;
7414 struct cache_extent *cache;
7415 struct btrfs_file_extent_item *fi;
7416 struct btrfs_key key;
7420 list_for_each_entry(back, &rec->backrefs, list) {
7421 /* Don't care about full backrefs (poor unloved backrefs) */
7422 if (back->full_backref || !back->is_data)
7425 dback = to_data_backref(back);
7427 /* We found this one, we don't need to do a lookup */
7428 if (dback->found_ref)
7431 key.objectid = dback->root;
7432 key.type = BTRFS_ROOT_ITEM_KEY;
7433 key.offset = (u64)-1;
7435 root = btrfs_read_fs_root(info, &key);
7437 /* No root, definitely a bad ref, skip */
7438 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7440 /* Other err, exit */
7442 return PTR_ERR(root);
7444 key.objectid = dback->owner;
7445 key.type = BTRFS_EXTENT_DATA_KEY;
7446 key.offset = dback->offset;
7447 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7449 btrfs_release_path(path);
7452 /* Didn't find it, we can carry on */
7457 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7458 struct btrfs_file_extent_item);
7459 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7460 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7461 btrfs_release_path(path);
7462 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7464 struct extent_record *tmp;
7465 tmp = container_of(cache, struct extent_record, cache);
7468 * If we found an extent record for the bytenr for this
7469 * particular backref then we can't add it to our
7470 * current extent record. We only want to add backrefs
7471 * that don't have a corresponding extent item in the
7472 * extent tree since they likely belong to this record
7473 * and we need to fix it if it doesn't match bytenrs.
7479 dback->found_ref += 1;
7480 dback->disk_bytenr = bytenr;
7481 dback->bytes = bytes;
7484 * Set this so the verify backref code knows not to trust the
7485 * values in this backref.
7494 * Record orphan data ref into corresponding root.
7496 * Return 0 if the extent item contains data ref and recorded.
7497 * Return 1 if the extent item contains no useful data ref
7498 * On that case, it may contains only shared_dataref or metadata backref
7499 * or the file extent exists(this should be handled by the extent bytenr
7501 * Return <0 if something goes wrong.
7503 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7504 struct extent_record *rec)
7506 struct btrfs_key key;
7507 struct btrfs_root *dest_root;
7508 struct extent_backref *back;
7509 struct data_backref *dback;
7510 struct orphan_data_extent *orphan;
7511 struct btrfs_path *path;
7512 int recorded_data_ref = 0;
7517 path = btrfs_alloc_path();
7520 list_for_each_entry(back, &rec->backrefs, list) {
7521 if (back->full_backref || !back->is_data ||
7522 !back->found_extent_tree)
7524 dback = to_data_backref(back);
7525 if (dback->found_ref)
7527 key.objectid = dback->root;
7528 key.type = BTRFS_ROOT_ITEM_KEY;
7529 key.offset = (u64)-1;
7531 dest_root = btrfs_read_fs_root(fs_info, &key);
7533 /* For non-exist root we just skip it */
7534 if (IS_ERR(dest_root) || !dest_root)
7537 key.objectid = dback->owner;
7538 key.type = BTRFS_EXTENT_DATA_KEY;
7539 key.offset = dback->offset;
7541 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7542 btrfs_release_path(path);
7544 * For ret < 0, it's OK since the fs-tree may be corrupted,
7545 * we need to record it for inode/file extent rebuild.
7546 * For ret > 0, we record it only for file extent rebuild.
7547 * For ret == 0, the file extent exists but only bytenr
7548 * mismatch, let the original bytenr fix routine to handle,
7554 orphan = malloc(sizeof(*orphan));
7559 INIT_LIST_HEAD(&orphan->list);
7560 orphan->root = dback->root;
7561 orphan->objectid = dback->owner;
7562 orphan->offset = dback->offset;
7563 orphan->disk_bytenr = rec->cache.start;
7564 orphan->disk_len = rec->cache.size;
7565 list_add(&dest_root->orphan_data_extents, &orphan->list);
7566 recorded_data_ref = 1;
7569 btrfs_free_path(path);
7571 return !recorded_data_ref;
7577 * when an incorrect extent item is found, this will delete
7578 * all of the existing entries for it and recreate them
7579 * based on what the tree scan found.
7581 static int fixup_extent_refs(struct btrfs_fs_info *info,
7582 struct cache_tree *extent_cache,
7583 struct extent_record *rec)
7585 struct btrfs_trans_handle *trans = NULL;
7587 struct btrfs_path *path;
7588 struct list_head *cur = rec->backrefs.next;
7589 struct cache_extent *cache;
7590 struct extent_backref *back;
7594 if (rec->flag_block_full_backref)
7595 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7597 path = btrfs_alloc_path();
7601 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7603 * Sometimes the backrefs themselves are so broken they don't
7604 * get attached to any meaningful rec, so first go back and
7605 * check any of our backrefs that we couldn't find and throw
7606 * them into the list if we find the backref so that
7607 * verify_backrefs can figure out what to do.
7609 ret = find_possible_backrefs(info, path, extent_cache, rec);
7614 /* step one, make sure all of the backrefs agree */
7615 ret = verify_backrefs(info, path, rec);
7619 trans = btrfs_start_transaction(info->extent_root, 1);
7620 if (IS_ERR(trans)) {
7621 ret = PTR_ERR(trans);
7625 /* step two, delete all the existing records */
7626 ret = delete_extent_records(trans, info->extent_root, path,
7627 rec->start, rec->max_size);
7632 /* was this block corrupt? If so, don't add references to it */
7633 cache = lookup_cache_extent(info->corrupt_blocks,
7634 rec->start, rec->max_size);
7640 /* step three, recreate all the refs we did find */
7641 while(cur != &rec->backrefs) {
7642 back = to_extent_backref(cur);
7646 * if we didn't find any references, don't create a
7649 if (!back->found_ref)
7652 rec->bad_full_backref = 0;
7653 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7661 int err = btrfs_commit_transaction(trans, info->extent_root);
7666 btrfs_free_path(path);
7670 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7671 struct extent_record *rec)
7673 struct btrfs_trans_handle *trans;
7674 struct btrfs_root *root = fs_info->extent_root;
7675 struct btrfs_path *path;
7676 struct btrfs_extent_item *ei;
7677 struct btrfs_key key;
7681 key.objectid = rec->start;
7682 if (rec->metadata) {
7683 key.type = BTRFS_METADATA_ITEM_KEY;
7684 key.offset = rec->info_level;
7686 key.type = BTRFS_EXTENT_ITEM_KEY;
7687 key.offset = rec->max_size;
7690 path = btrfs_alloc_path();
7694 trans = btrfs_start_transaction(root, 0);
7695 if (IS_ERR(trans)) {
7696 btrfs_free_path(path);
7697 return PTR_ERR(trans);
7700 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7702 btrfs_free_path(path);
7703 btrfs_commit_transaction(trans, root);
7706 fprintf(stderr, "Didn't find extent for %llu\n",
7707 (unsigned long long)rec->start);
7708 btrfs_free_path(path);
7709 btrfs_commit_transaction(trans, root);
7713 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7714 struct btrfs_extent_item);
7715 flags = btrfs_extent_flags(path->nodes[0], ei);
7716 if (rec->flag_block_full_backref) {
7717 fprintf(stderr, "setting full backref on %llu\n",
7718 (unsigned long long)key.objectid);
7719 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7721 fprintf(stderr, "clearing full backref on %llu\n",
7722 (unsigned long long)key.objectid);
7723 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7725 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7726 btrfs_mark_buffer_dirty(path->nodes[0]);
7727 btrfs_free_path(path);
7728 return btrfs_commit_transaction(trans, root);
7731 /* right now we only prune from the extent allocation tree */
7732 static int prune_one_block(struct btrfs_trans_handle *trans,
7733 struct btrfs_fs_info *info,
7734 struct btrfs_corrupt_block *corrupt)
7737 struct btrfs_path path;
7738 struct extent_buffer *eb;
7742 int level = corrupt->level + 1;
7744 btrfs_init_path(&path);
7746 /* we want to stop at the parent to our busted block */
7747 path.lowest_level = level;
7749 ret = btrfs_search_slot(trans, info->extent_root,
7750 &corrupt->key, &path, -1, 1);
7755 eb = path.nodes[level];
7762 * hopefully the search gave us the block we want to prune,
7763 * lets try that first
7765 slot = path.slots[level];
7766 found = btrfs_node_blockptr(eb, slot);
7767 if (found == corrupt->cache.start)
7770 nritems = btrfs_header_nritems(eb);
7772 /* the search failed, lets scan this node and hope we find it */
7773 for (slot = 0; slot < nritems; slot++) {
7774 found = btrfs_node_blockptr(eb, slot);
7775 if (found == corrupt->cache.start)
7779 * we couldn't find the bad block. TODO, search all the nodes for pointers
7782 if (eb == info->extent_root->node) {
7787 btrfs_release_path(&path);
7792 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7793 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7796 btrfs_release_path(&path);
7800 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7802 struct btrfs_trans_handle *trans = NULL;
7803 struct cache_extent *cache;
7804 struct btrfs_corrupt_block *corrupt;
7807 cache = search_cache_extent(info->corrupt_blocks, 0);
7811 trans = btrfs_start_transaction(info->extent_root, 1);
7813 return PTR_ERR(trans);
7815 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7816 prune_one_block(trans, info, corrupt);
7817 remove_cache_extent(info->corrupt_blocks, cache);
7820 return btrfs_commit_transaction(trans, info->extent_root);
7824 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7826 struct btrfs_block_group_cache *cache;
7831 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7832 &start, &end, EXTENT_DIRTY);
7835 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7841 cache = btrfs_lookup_first_block_group(fs_info, start);
7846 start = cache->key.objectid + cache->key.offset;
7850 static int check_extent_refs(struct btrfs_root *root,
7851 struct cache_tree *extent_cache)
7853 struct extent_record *rec;
7854 struct cache_extent *cache;
7863 * if we're doing a repair, we have to make sure
7864 * we don't allocate from the problem extents.
7865 * In the worst case, this will be all the
7868 cache = search_cache_extent(extent_cache, 0);
7870 rec = container_of(cache, struct extent_record, cache);
7871 set_extent_dirty(root->fs_info->excluded_extents,
7873 rec->start + rec->max_size - 1,
7875 cache = next_cache_extent(cache);
7878 /* pin down all the corrupted blocks too */
7879 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7881 set_extent_dirty(root->fs_info->excluded_extents,
7883 cache->start + cache->size - 1,
7885 cache = next_cache_extent(cache);
7887 prune_corrupt_blocks(root->fs_info);
7888 reset_cached_block_groups(root->fs_info);
7891 reset_cached_block_groups(root->fs_info);
7894 * We need to delete any duplicate entries we find first otherwise we
7895 * could mess up the extent tree when we have backrefs that actually
7896 * belong to a different extent item and not the weird duplicate one.
7898 while (repair && !list_empty(&duplicate_extents)) {
7899 rec = to_extent_record(duplicate_extents.next);
7900 list_del_init(&rec->list);
7902 /* Sometimes we can find a backref before we find an actual
7903 * extent, so we need to process it a little bit to see if there
7904 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7905 * if this is a backref screwup. If we need to delete stuff
7906 * process_duplicates() will return 0, otherwise it will return
7909 if (process_duplicates(root, extent_cache, rec))
7911 ret = delete_duplicate_records(root, rec);
7915 * delete_duplicate_records will return the number of entries
7916 * deleted, so if it's greater than 0 then we know we actually
7917 * did something and we need to remove.
7931 cache = search_cache_extent(extent_cache, 0);
7934 rec = container_of(cache, struct extent_record, cache);
7935 if (rec->num_duplicates) {
7936 fprintf(stderr, "extent item %llu has multiple extent "
7937 "items\n", (unsigned long long)rec->start);
7942 if (rec->refs != rec->extent_item_refs) {
7943 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7944 (unsigned long long)rec->start,
7945 (unsigned long long)rec->nr);
7946 fprintf(stderr, "extent item %llu, found %llu\n",
7947 (unsigned long long)rec->extent_item_refs,
7948 (unsigned long long)rec->refs);
7949 ret = record_orphan_data_extents(root->fs_info, rec);
7956 * we can't use the extent to repair file
7957 * extent, let the fallback method handle it.
7959 if (!fixed && repair) {
7960 ret = fixup_extent_refs(
7971 if (all_backpointers_checked(rec, 1)) {
7972 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7973 (unsigned long long)rec->start,
7974 (unsigned long long)rec->nr);
7976 if (!fixed && !recorded && repair) {
7977 ret = fixup_extent_refs(root->fs_info,
7986 if (!rec->owner_ref_checked) {
7987 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7988 (unsigned long long)rec->start,
7989 (unsigned long long)rec->nr);
7990 if (!fixed && !recorded && repair) {
7991 ret = fixup_extent_refs(root->fs_info,
8000 if (rec->bad_full_backref) {
8001 fprintf(stderr, "bad full backref, on [%llu]\n",
8002 (unsigned long long)rec->start);
8004 ret = fixup_extent_flags(root->fs_info, rec);
8013 * Although it's not a extent ref's problem, we reuse this
8014 * routine for error reporting.
8015 * No repair function yet.
8017 if (rec->crossing_stripes) {
8019 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8020 rec->start, rec->start + rec->max_size);
8025 if (rec->wrong_chunk_type) {
8027 "bad extent [%llu, %llu), type mismatch with chunk\n",
8028 rec->start, rec->start + rec->max_size);
8033 remove_cache_extent(extent_cache, cache);
8034 free_all_extent_backrefs(rec);
8035 if (!init_extent_tree && repair && (!cur_err || fixed))
8036 clear_extent_dirty(root->fs_info->excluded_extents,
8038 rec->start + rec->max_size - 1,
8044 if (ret && ret != -EAGAIN) {
8045 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8048 struct btrfs_trans_handle *trans;
8050 root = root->fs_info->extent_root;
8051 trans = btrfs_start_transaction(root, 1);
8052 if (IS_ERR(trans)) {
8053 ret = PTR_ERR(trans);
8057 btrfs_fix_block_accounting(trans, root);
8058 ret = btrfs_commit_transaction(trans, root);
8063 fprintf(stderr, "repaired damaged extent references\n");
8069 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8073 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8074 stripe_size = length;
8075 stripe_size /= num_stripes;
8076 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8077 stripe_size = length * 2;
8078 stripe_size /= num_stripes;
8079 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8080 stripe_size = length;
8081 stripe_size /= (num_stripes - 1);
8082 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8083 stripe_size = length;
8084 stripe_size /= (num_stripes - 2);
8086 stripe_size = length;
8092 * Check the chunk with its block group/dev list ref:
8093 * Return 0 if all refs seems valid.
8094 * Return 1 if part of refs seems valid, need later check for rebuild ref
8095 * like missing block group and needs to search extent tree to rebuild them.
8096 * Return -1 if essential refs are missing and unable to rebuild.
8098 static int check_chunk_refs(struct chunk_record *chunk_rec,
8099 struct block_group_tree *block_group_cache,
8100 struct device_extent_tree *dev_extent_cache,
8103 struct cache_extent *block_group_item;
8104 struct block_group_record *block_group_rec;
8105 struct cache_extent *dev_extent_item;
8106 struct device_extent_record *dev_extent_rec;
8110 int metadump_v2 = 0;
8114 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8117 if (block_group_item) {
8118 block_group_rec = container_of(block_group_item,
8119 struct block_group_record,
8121 if (chunk_rec->length != block_group_rec->offset ||
8122 chunk_rec->offset != block_group_rec->objectid ||
8124 chunk_rec->type_flags != block_group_rec->flags)) {
8127 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8128 chunk_rec->objectid,
8133 chunk_rec->type_flags,
8134 block_group_rec->objectid,
8135 block_group_rec->type,
8136 block_group_rec->offset,
8137 block_group_rec->offset,
8138 block_group_rec->objectid,
8139 block_group_rec->flags);
8142 list_del_init(&block_group_rec->list);
8143 chunk_rec->bg_rec = block_group_rec;
8148 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8149 chunk_rec->objectid,
8154 chunk_rec->type_flags);
8161 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8162 chunk_rec->num_stripes);
8163 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8164 devid = chunk_rec->stripes[i].devid;
8165 offset = chunk_rec->stripes[i].offset;
8166 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8167 devid, offset, length);
8168 if (dev_extent_item) {
8169 dev_extent_rec = container_of(dev_extent_item,
8170 struct device_extent_record,
8172 if (dev_extent_rec->objectid != devid ||
8173 dev_extent_rec->offset != offset ||
8174 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8175 dev_extent_rec->length != length) {
8178 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8179 chunk_rec->objectid,
8182 chunk_rec->stripes[i].devid,
8183 chunk_rec->stripes[i].offset,
8184 dev_extent_rec->objectid,
8185 dev_extent_rec->offset,
8186 dev_extent_rec->length);
8189 list_move(&dev_extent_rec->chunk_list,
8190 &chunk_rec->dextents);
8195 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8196 chunk_rec->objectid,
8199 chunk_rec->stripes[i].devid,
8200 chunk_rec->stripes[i].offset);
8207 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8208 int check_chunks(struct cache_tree *chunk_cache,
8209 struct block_group_tree *block_group_cache,
8210 struct device_extent_tree *dev_extent_cache,
8211 struct list_head *good, struct list_head *bad,
8212 struct list_head *rebuild, int silent)
8214 struct cache_extent *chunk_item;
8215 struct chunk_record *chunk_rec;
8216 struct block_group_record *bg_rec;
8217 struct device_extent_record *dext_rec;
8221 chunk_item = first_cache_extent(chunk_cache);
8222 while (chunk_item) {
8223 chunk_rec = container_of(chunk_item, struct chunk_record,
8225 err = check_chunk_refs(chunk_rec, block_group_cache,
8226 dev_extent_cache, silent);
8229 if (err == 0 && good)
8230 list_add_tail(&chunk_rec->list, good);
8231 if (err > 0 && rebuild)
8232 list_add_tail(&chunk_rec->list, rebuild);
8234 list_add_tail(&chunk_rec->list, bad);
8235 chunk_item = next_cache_extent(chunk_item);
8238 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8241 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8249 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8253 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8264 static int check_device_used(struct device_record *dev_rec,
8265 struct device_extent_tree *dext_cache)
8267 struct cache_extent *cache;
8268 struct device_extent_record *dev_extent_rec;
8271 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8273 dev_extent_rec = container_of(cache,
8274 struct device_extent_record,
8276 if (dev_extent_rec->objectid != dev_rec->devid)
8279 list_del_init(&dev_extent_rec->device_list);
8280 total_byte += dev_extent_rec->length;
8281 cache = next_cache_extent(cache);
8284 if (total_byte != dev_rec->byte_used) {
8286 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8287 total_byte, dev_rec->byte_used, dev_rec->objectid,
8288 dev_rec->type, dev_rec->offset);
8295 /* check btrfs_dev_item -> btrfs_dev_extent */
8296 static int check_devices(struct rb_root *dev_cache,
8297 struct device_extent_tree *dev_extent_cache)
8299 struct rb_node *dev_node;
8300 struct device_record *dev_rec;
8301 struct device_extent_record *dext_rec;
8305 dev_node = rb_first(dev_cache);
8307 dev_rec = container_of(dev_node, struct device_record, node);
8308 err = check_device_used(dev_rec, dev_extent_cache);
8312 dev_node = rb_next(dev_node);
8314 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8317 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8318 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8325 static int add_root_item_to_list(struct list_head *head,
8326 u64 objectid, u64 bytenr, u64 last_snapshot,
8327 u8 level, u8 drop_level,
8328 int level_size, struct btrfs_key *drop_key)
8331 struct root_item_record *ri_rec;
8332 ri_rec = malloc(sizeof(*ri_rec));
8335 ri_rec->bytenr = bytenr;
8336 ri_rec->objectid = objectid;
8337 ri_rec->level = level;
8338 ri_rec->level_size = level_size;
8339 ri_rec->drop_level = drop_level;
8340 ri_rec->last_snapshot = last_snapshot;
8342 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8343 list_add_tail(&ri_rec->list, head);
8348 static void free_root_item_list(struct list_head *list)
8350 struct root_item_record *ri_rec;
8352 while (!list_empty(list)) {
8353 ri_rec = list_first_entry(list, struct root_item_record,
8355 list_del_init(&ri_rec->list);
8360 static int deal_root_from_list(struct list_head *list,
8361 struct btrfs_root *root,
8362 struct block_info *bits,
8364 struct cache_tree *pending,
8365 struct cache_tree *seen,
8366 struct cache_tree *reada,
8367 struct cache_tree *nodes,
8368 struct cache_tree *extent_cache,
8369 struct cache_tree *chunk_cache,
8370 struct rb_root *dev_cache,
8371 struct block_group_tree *block_group_cache,
8372 struct device_extent_tree *dev_extent_cache)
8377 while (!list_empty(list)) {
8378 struct root_item_record *rec;
8379 struct extent_buffer *buf;
8380 rec = list_entry(list->next,
8381 struct root_item_record, list);
8383 buf = read_tree_block(root->fs_info->tree_root,
8384 rec->bytenr, rec->level_size, 0);
8385 if (!extent_buffer_uptodate(buf)) {
8386 free_extent_buffer(buf);
8390 ret = add_root_to_pending(buf, extent_cache, pending,
8391 seen, nodes, rec->objectid);
8395 * To rebuild extent tree, we need deal with snapshot
8396 * one by one, otherwise we deal with node firstly which
8397 * can maximize readahead.
8400 ret = run_next_block(root, bits, bits_nr, &last,
8401 pending, seen, reada, nodes,
8402 extent_cache, chunk_cache,
8403 dev_cache, block_group_cache,
8404 dev_extent_cache, rec);
8408 free_extent_buffer(buf);
8409 list_del(&rec->list);
8415 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8416 reada, nodes, extent_cache, chunk_cache,
8417 dev_cache, block_group_cache,
8418 dev_extent_cache, NULL);
8428 static int check_chunks_and_extents(struct btrfs_root *root)
8430 struct rb_root dev_cache;
8431 struct cache_tree chunk_cache;
8432 struct block_group_tree block_group_cache;
8433 struct device_extent_tree dev_extent_cache;
8434 struct cache_tree extent_cache;
8435 struct cache_tree seen;
8436 struct cache_tree pending;
8437 struct cache_tree reada;
8438 struct cache_tree nodes;
8439 struct extent_io_tree excluded_extents;
8440 struct cache_tree corrupt_blocks;
8441 struct btrfs_path path;
8442 struct btrfs_key key;
8443 struct btrfs_key found_key;
8445 struct block_info *bits;
8447 struct extent_buffer *leaf;
8449 struct btrfs_root_item ri;
8450 struct list_head dropping_trees;
8451 struct list_head normal_trees;
8452 struct btrfs_root *root1;
8457 dev_cache = RB_ROOT;
8458 cache_tree_init(&chunk_cache);
8459 block_group_tree_init(&block_group_cache);
8460 device_extent_tree_init(&dev_extent_cache);
8462 cache_tree_init(&extent_cache);
8463 cache_tree_init(&seen);
8464 cache_tree_init(&pending);
8465 cache_tree_init(&nodes);
8466 cache_tree_init(&reada);
8467 cache_tree_init(&corrupt_blocks);
8468 extent_io_tree_init(&excluded_extents);
8469 INIT_LIST_HEAD(&dropping_trees);
8470 INIT_LIST_HEAD(&normal_trees);
8473 root->fs_info->excluded_extents = &excluded_extents;
8474 root->fs_info->fsck_extent_cache = &extent_cache;
8475 root->fs_info->free_extent_hook = free_extent_hook;
8476 root->fs_info->corrupt_blocks = &corrupt_blocks;
8480 bits = malloc(bits_nr * sizeof(struct block_info));
8486 if (ctx.progress_enabled) {
8487 ctx.tp = TASK_EXTENTS;
8488 task_start(ctx.info);
8492 root1 = root->fs_info->tree_root;
8493 level = btrfs_header_level(root1->node);
8494 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8495 root1->node->start, 0, level, 0,
8496 root1->nodesize, NULL);
8499 root1 = root->fs_info->chunk_root;
8500 level = btrfs_header_level(root1->node);
8501 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8502 root1->node->start, 0, level, 0,
8503 root1->nodesize, NULL);
8506 btrfs_init_path(&path);
8509 key.type = BTRFS_ROOT_ITEM_KEY;
8510 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8515 leaf = path.nodes[0];
8516 slot = path.slots[0];
8517 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8518 ret = btrfs_next_leaf(root, &path);
8521 leaf = path.nodes[0];
8522 slot = path.slots[0];
8524 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8525 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8526 unsigned long offset;
8529 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8530 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8531 last_snapshot = btrfs_root_last_snapshot(&ri);
8532 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8533 level = btrfs_root_level(&ri);
8534 level_size = root->nodesize;
8535 ret = add_root_item_to_list(&normal_trees,
8537 btrfs_root_bytenr(&ri),
8538 last_snapshot, level,
8539 0, level_size, NULL);
8543 level = btrfs_root_level(&ri);
8544 level_size = root->nodesize;
8545 objectid = found_key.objectid;
8546 btrfs_disk_key_to_cpu(&found_key,
8548 ret = add_root_item_to_list(&dropping_trees,
8550 btrfs_root_bytenr(&ri),
8551 last_snapshot, level,
8553 level_size, &found_key);
8560 btrfs_release_path(&path);
8563 * check_block can return -EAGAIN if it fixes something, please keep
8564 * this in mind when dealing with return values from these functions, if
8565 * we get -EAGAIN we want to fall through and restart the loop.
8567 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8568 &seen, &reada, &nodes, &extent_cache,
8569 &chunk_cache, &dev_cache, &block_group_cache,
8576 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8577 &pending, &seen, &reada, &nodes,
8578 &extent_cache, &chunk_cache, &dev_cache,
8579 &block_group_cache, &dev_extent_cache);
8586 ret = check_chunks(&chunk_cache, &block_group_cache,
8587 &dev_extent_cache, NULL, NULL, NULL, 0);
8594 ret = check_extent_refs(root, &extent_cache);
8601 ret = check_devices(&dev_cache, &dev_extent_cache);
8606 task_stop(ctx.info);
8608 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8609 extent_io_tree_cleanup(&excluded_extents);
8610 root->fs_info->fsck_extent_cache = NULL;
8611 root->fs_info->free_extent_hook = NULL;
8612 root->fs_info->corrupt_blocks = NULL;
8613 root->fs_info->excluded_extents = NULL;
8616 free_chunk_cache_tree(&chunk_cache);
8617 free_device_cache_tree(&dev_cache);
8618 free_block_group_tree(&block_group_cache);
8619 free_device_extent_tree(&dev_extent_cache);
8620 free_extent_cache_tree(&seen);
8621 free_extent_cache_tree(&pending);
8622 free_extent_cache_tree(&reada);
8623 free_extent_cache_tree(&nodes);
8626 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8627 free_extent_cache_tree(&seen);
8628 free_extent_cache_tree(&pending);
8629 free_extent_cache_tree(&reada);
8630 free_extent_cache_tree(&nodes);
8631 free_chunk_cache_tree(&chunk_cache);
8632 free_block_group_tree(&block_group_cache);
8633 free_device_cache_tree(&dev_cache);
8634 free_device_extent_tree(&dev_extent_cache);
8635 free_extent_record_cache(root->fs_info, &extent_cache);
8636 free_root_item_list(&normal_trees);
8637 free_root_item_list(&dropping_trees);
8638 extent_io_tree_cleanup(&excluded_extents);
8643 * Check backrefs of a tree block given by @bytenr or @eb.
8645 * @root: the root containing the @bytenr or @eb
8646 * @eb: tree block extent buffer, can be NULL
8647 * @bytenr: bytenr of the tree block to search
8648 * @level: tree level of the tree block
8649 * @owner: owner of the tree block
8651 * Return >0 for any error found and output error message
8652 * Return 0 for no error found
8654 static int check_tree_block_ref(struct btrfs_root *root,
8655 struct extent_buffer *eb, u64 bytenr,
8656 int level, u64 owner)
8658 struct btrfs_key key;
8659 struct btrfs_root *extent_root = root->fs_info->extent_root;
8660 struct btrfs_path path;
8661 struct btrfs_extent_item *ei;
8662 struct btrfs_extent_inline_ref *iref;
8663 struct extent_buffer *leaf;
8669 u32 nodesize = root->nodesize;
8676 btrfs_init_path(&path);
8677 key.objectid = bytenr;
8678 if (btrfs_fs_incompat(root->fs_info,
8679 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8680 key.type = BTRFS_METADATA_ITEM_KEY;
8682 key.type = BTRFS_EXTENT_ITEM_KEY;
8683 key.offset = (u64)-1;
8685 /* Search for the backref in extent tree */
8686 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8688 err |= BACKREF_MISSING;
8691 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8693 err |= BACKREF_MISSING;
8697 leaf = path.nodes[0];
8698 slot = path.slots[0];
8699 btrfs_item_key_to_cpu(leaf, &key, slot);
8701 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8703 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8704 skinny_level = (int)key.offset;
8705 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8707 struct btrfs_tree_block_info *info;
8709 info = (struct btrfs_tree_block_info *)(ei + 1);
8710 skinny_level = btrfs_tree_block_level(leaf, info);
8711 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8718 if (!(btrfs_extent_flags(leaf, ei) &
8719 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8721 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8722 key.objectid, nodesize,
8723 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8724 err = BACKREF_MISMATCH;
8726 header_gen = btrfs_header_generation(eb);
8727 extent_gen = btrfs_extent_generation(leaf, ei);
8728 if (header_gen != extent_gen) {
8730 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8731 key.objectid, nodesize, header_gen,
8733 err = BACKREF_MISMATCH;
8735 if (level != skinny_level) {
8737 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8738 key.objectid, nodesize, level, skinny_level);
8739 err = BACKREF_MISMATCH;
8741 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8743 "extent[%llu %u] is referred by other roots than %llu",
8744 key.objectid, nodesize, root->objectid);
8745 err = BACKREF_MISMATCH;
8750 * Iterate the extent/metadata item to find the exact backref
8752 item_size = btrfs_item_size_nr(leaf, slot);
8753 ptr = (unsigned long)iref;
8754 end = (unsigned long)ei + item_size;
8756 iref = (struct btrfs_extent_inline_ref *)ptr;
8757 type = btrfs_extent_inline_ref_type(leaf, iref);
8758 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8760 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8761 (offset == root->objectid || offset == owner)) {
8763 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8764 /* Check if the backref points to valid referencer */
8765 found_ref = !check_tree_block_ref(root, NULL, offset,
8771 ptr += btrfs_extent_inline_ref_size(type);
8775 * Inlined extent item doesn't have what we need, check
8776 * TREE_BLOCK_REF_KEY
8779 btrfs_release_path(&path);
8780 key.objectid = bytenr;
8781 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8782 key.offset = root->objectid;
8784 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8789 err |= BACKREF_MISSING;
8791 btrfs_release_path(&path);
8792 if (eb && (err & BACKREF_MISSING))
8793 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8794 bytenr, nodesize, owner, level);
8799 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8801 * Return >0 any error found and output error message
8802 * Return 0 for no error found
8804 static int check_extent_data_item(struct btrfs_root *root,
8805 struct extent_buffer *eb, int slot)
8807 struct btrfs_file_extent_item *fi;
8808 struct btrfs_path path;
8809 struct btrfs_root *extent_root = root->fs_info->extent_root;
8810 struct btrfs_key fi_key;
8811 struct btrfs_key dbref_key;
8812 struct extent_buffer *leaf;
8813 struct btrfs_extent_item *ei;
8814 struct btrfs_extent_inline_ref *iref;
8815 struct btrfs_extent_data_ref *dref;
8817 u64 file_extent_gen;
8820 u64 extent_num_bytes;
8828 int found_dbackref = 0;
8832 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8833 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8834 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8836 /* Nothing to check for hole and inline data extents */
8837 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8838 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8841 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8842 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8843 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8845 /* Check unaligned disk_num_bytes and num_bytes */
8846 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8848 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8849 fi_key.objectid, fi_key.offset, disk_num_bytes,
8851 err |= BYTES_UNALIGNED;
8853 data_bytes_allocated += disk_num_bytes;
8855 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8857 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8858 fi_key.objectid, fi_key.offset, extent_num_bytes,
8860 err |= BYTES_UNALIGNED;
8862 data_bytes_referenced += extent_num_bytes;
8864 owner = btrfs_header_owner(eb);
8866 /* Check the extent item of the file extent in extent tree */
8867 btrfs_init_path(&path);
8868 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8869 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8870 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8872 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8874 err |= BACKREF_MISSING;
8878 leaf = path.nodes[0];
8879 slot = path.slots[0];
8880 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8882 extent_flags = btrfs_extent_flags(leaf, ei);
8883 extent_gen = btrfs_extent_generation(leaf, ei);
8885 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8887 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8888 disk_bytenr, disk_num_bytes,
8889 BTRFS_EXTENT_FLAG_DATA);
8890 err |= BACKREF_MISMATCH;
8893 if (file_extent_gen < extent_gen) {
8895 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8896 disk_bytenr, disk_num_bytes, file_extent_gen,
8898 err |= BACKREF_MISMATCH;
8901 /* Check data backref inside that extent item */
8902 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8903 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8904 ptr = (unsigned long)iref;
8905 end = (unsigned long)ei + item_size;
8907 iref = (struct btrfs_extent_inline_ref *)ptr;
8908 type = btrfs_extent_inline_ref_type(leaf, iref);
8909 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8911 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8912 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8913 if (ref_root == owner || ref_root == root->objectid)
8915 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8916 found_dbackref = !check_tree_block_ref(root, NULL,
8917 btrfs_extent_inline_ref_offset(leaf, iref),
8923 ptr += btrfs_extent_inline_ref_size(type);
8926 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8927 if (!found_dbackref) {
8928 btrfs_release_path(&path);
8930 btrfs_init_path(&path);
8931 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8932 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8933 dbref_key.offset = hash_extent_data_ref(root->objectid,
8934 fi_key.objectid, fi_key.offset);
8936 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8937 &dbref_key, &path, 0, 0);
8942 if (!found_dbackref)
8943 err |= BACKREF_MISSING;
8945 btrfs_release_path(&path);
8946 if (err & BACKREF_MISSING) {
8947 error("data extent[%llu %llu] backref lost",
8948 disk_bytenr, disk_num_bytes);
8954 * Get real tree block level for the case like shared block
8955 * Return >= 0 as tree level
8956 * Return <0 for error
8958 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8960 struct extent_buffer *eb;
8961 struct btrfs_path path;
8962 struct btrfs_key key;
8963 struct btrfs_extent_item *ei;
8966 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8971 /* Search extent tree for extent generation and level */
8972 key.objectid = bytenr;
8973 key.type = BTRFS_METADATA_ITEM_KEY;
8974 key.offset = (u64)-1;
8976 btrfs_init_path(&path);
8977 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8980 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8988 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8989 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8990 struct btrfs_extent_item);
8991 flags = btrfs_extent_flags(path.nodes[0], ei);
8992 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8997 /* Get transid for later read_tree_block() check */
8998 transid = btrfs_extent_generation(path.nodes[0], ei);
9000 /* Get backref level as one source */
9001 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9002 backref_level = key.offset;
9004 struct btrfs_tree_block_info *info;
9006 info = (struct btrfs_tree_block_info *)(ei + 1);
9007 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9009 btrfs_release_path(&path);
9011 /* Get level from tree block as an alternative source */
9012 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9013 if (!extent_buffer_uptodate(eb)) {
9014 free_extent_buffer(eb);
9017 header_level = btrfs_header_level(eb);
9018 free_extent_buffer(eb);
9020 if (header_level != backref_level)
9022 return header_level;
9025 btrfs_release_path(&path);
9030 * Check if a tree block backref is valid (points to a valid tree block)
9031 * if level == -1, level will be resolved
9032 * Return >0 for any error found and print error message
9034 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9035 u64 bytenr, int level)
9037 struct btrfs_root *root;
9038 struct btrfs_key key;
9039 struct btrfs_path path;
9040 struct extent_buffer *eb;
9041 struct extent_buffer *node;
9042 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9046 /* Query level for level == -1 special case */
9048 level = query_tree_block_level(fs_info, bytenr);
9050 err |= REFERENCER_MISSING;
9054 key.objectid = root_id;
9055 key.type = BTRFS_ROOT_ITEM_KEY;
9056 key.offset = (u64)-1;
9058 root = btrfs_read_fs_root(fs_info, &key);
9060 err |= REFERENCER_MISSING;
9064 /* Read out the tree block to get item/node key */
9065 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9066 if (!extent_buffer_uptodate(eb)) {
9067 err |= REFERENCER_MISSING;
9068 free_extent_buffer(eb);
9072 /* Empty tree, no need to check key */
9073 if (!btrfs_header_nritems(eb) && !level) {
9074 free_extent_buffer(eb);
9079 btrfs_node_key_to_cpu(eb, &key, 0);
9081 btrfs_item_key_to_cpu(eb, &key, 0);
9083 free_extent_buffer(eb);
9085 btrfs_init_path(&path);
9086 path.lowest_level = level;
9087 /* Search with the first key, to ensure we can reach it */
9088 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9090 err |= REFERENCER_MISSING;
9094 node = path.nodes[level];
9095 if (btrfs_header_bytenr(node) != bytenr) {
9097 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9098 bytenr, nodesize, bytenr,
9099 btrfs_header_bytenr(node));
9100 err |= REFERENCER_MISMATCH;
9102 if (btrfs_header_level(node) != level) {
9104 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9105 bytenr, nodesize, level,
9106 btrfs_header_level(node));
9107 err |= REFERENCER_MISMATCH;
9111 btrfs_release_path(&path);
9113 if (err & REFERENCER_MISSING) {
9115 error("extent [%llu %d] lost referencer (owner: %llu)",
9116 bytenr, nodesize, root_id);
9119 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9120 bytenr, nodesize, root_id, level);
9127 * Check referencer for shared block backref
9128 * If level == -1, this function will resolve the level.
9130 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9131 u64 parent, u64 bytenr, int level)
9133 struct extent_buffer *eb;
9134 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9136 int found_parent = 0;
9139 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9140 if (!extent_buffer_uptodate(eb))
9144 level = query_tree_block_level(fs_info, bytenr);
9148 if (level + 1 != btrfs_header_level(eb))
9151 nr = btrfs_header_nritems(eb);
9152 for (i = 0; i < nr; i++) {
9153 if (bytenr == btrfs_node_blockptr(eb, i)) {
9159 free_extent_buffer(eb);
9160 if (!found_parent) {
9162 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9163 bytenr, nodesize, parent, level);
9164 return REFERENCER_MISSING;
9170 * Check referencer for normal (inlined) data ref
9171 * If len == 0, it will be resolved by searching in extent tree
9173 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9174 u64 root_id, u64 objectid, u64 offset,
9175 u64 bytenr, u64 len, u32 count)
9177 struct btrfs_root *root;
9178 struct btrfs_root *extent_root = fs_info->extent_root;
9179 struct btrfs_key key;
9180 struct btrfs_path path;
9181 struct extent_buffer *leaf;
9182 struct btrfs_file_extent_item *fi;
9183 u32 found_count = 0;
9188 key.objectid = bytenr;
9189 key.type = BTRFS_EXTENT_ITEM_KEY;
9190 key.offset = (u64)-1;
9192 btrfs_init_path(&path);
9193 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9196 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9199 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9200 if (key.objectid != bytenr ||
9201 key.type != BTRFS_EXTENT_ITEM_KEY)
9204 btrfs_release_path(&path);
9206 key.objectid = root_id;
9207 key.type = BTRFS_ROOT_ITEM_KEY;
9208 key.offset = (u64)-1;
9209 btrfs_init_path(&path);
9211 root = btrfs_read_fs_root(fs_info, &key);
9215 key.objectid = objectid;
9216 key.type = BTRFS_EXTENT_DATA_KEY;
9218 * It can be nasty as data backref offset is
9219 * file offset - file extent offset, which is smaller or
9220 * equal to original backref offset. The only special case is
9221 * overflow. So we need to special check and do further search.
9223 key.offset = offset & (1ULL << 63) ? 0 : offset;
9225 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9230 * Search afterwards to get correct one
9231 * NOTE: As we must do a comprehensive check on the data backref to
9232 * make sure the dref count also matches, we must iterate all file
9233 * extents for that inode.
9236 leaf = path.nodes[0];
9237 slot = path.slots[0];
9239 btrfs_item_key_to_cpu(leaf, &key, slot);
9240 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9242 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9244 * Except normal disk bytenr and disk num bytes, we still
9245 * need to do extra check on dbackref offset as
9246 * dbackref offset = file_offset - file_extent_offset
9248 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9249 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9250 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9254 ret = btrfs_next_item(root, &path);
9259 btrfs_release_path(&path);
9260 if (found_count != count) {
9262 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9263 bytenr, len, root_id, objectid, offset, count, found_count);
9264 return REFERENCER_MISSING;
9270 * Check if the referencer of a shared data backref exists
9272 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9273 u64 parent, u64 bytenr)
9275 struct extent_buffer *eb;
9276 struct btrfs_key key;
9277 struct btrfs_file_extent_item *fi;
9278 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9280 int found_parent = 0;
9283 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9284 if (!extent_buffer_uptodate(eb))
9287 nr = btrfs_header_nritems(eb);
9288 for (i = 0; i < nr; i++) {
9289 btrfs_item_key_to_cpu(eb, &key, i);
9290 if (key.type != BTRFS_EXTENT_DATA_KEY)
9293 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9294 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9297 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9304 free_extent_buffer(eb);
9305 if (!found_parent) {
9306 error("shared extent %llu referencer lost (parent: %llu)",
9308 return REFERENCER_MISSING;
9314 * This function will check a given extent item, including its backref and
9315 * itself (like crossing stripe boundary and type)
9317 * Since we don't use extent_record anymore, introduce new error bit
9319 static int check_extent_item(struct btrfs_fs_info *fs_info,
9320 struct extent_buffer *eb, int slot)
9322 struct btrfs_extent_item *ei;
9323 struct btrfs_extent_inline_ref *iref;
9324 struct btrfs_extent_data_ref *dref;
9328 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9329 u32 item_size = btrfs_item_size_nr(eb, slot);
9334 struct btrfs_key key;
9338 btrfs_item_key_to_cpu(eb, &key, slot);
9339 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9340 bytes_used += key.offset;
9342 bytes_used += nodesize;
9344 if (item_size < sizeof(*ei)) {
9346 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9347 * old thing when on disk format is still un-determined.
9348 * No need to care about it anymore
9350 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9354 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9355 flags = btrfs_extent_flags(eb, ei);
9357 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9359 if (metadata && check_crossing_stripes(global_info, key.objectid,
9361 error("bad metadata [%llu, %llu) crossing stripe boundary",
9362 key.objectid, key.objectid + nodesize);
9363 err |= CROSSING_STRIPE_BOUNDARY;
9366 ptr = (unsigned long)(ei + 1);
9368 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9369 /* Old EXTENT_ITEM metadata */
9370 struct btrfs_tree_block_info *info;
9372 info = (struct btrfs_tree_block_info *)ptr;
9373 level = btrfs_tree_block_level(eb, info);
9374 ptr += sizeof(struct btrfs_tree_block_info);
9376 /* New METADATA_ITEM */
9379 end = (unsigned long)ei + item_size;
9382 err |= ITEM_SIZE_MISMATCH;
9386 /* Now check every backref in this extent item */
9388 iref = (struct btrfs_extent_inline_ref *)ptr;
9389 type = btrfs_extent_inline_ref_type(eb, iref);
9390 offset = btrfs_extent_inline_ref_offset(eb, iref);
9392 case BTRFS_TREE_BLOCK_REF_KEY:
9393 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9397 case BTRFS_SHARED_BLOCK_REF_KEY:
9398 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9402 case BTRFS_EXTENT_DATA_REF_KEY:
9403 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9404 ret = check_extent_data_backref(fs_info,
9405 btrfs_extent_data_ref_root(eb, dref),
9406 btrfs_extent_data_ref_objectid(eb, dref),
9407 btrfs_extent_data_ref_offset(eb, dref),
9408 key.objectid, key.offset,
9409 btrfs_extent_data_ref_count(eb, dref));
9412 case BTRFS_SHARED_DATA_REF_KEY:
9413 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9417 error("extent[%llu %d %llu] has unknown ref type: %d",
9418 key.objectid, key.type, key.offset, type);
9419 err |= UNKNOWN_TYPE;
9423 ptr += btrfs_extent_inline_ref_size(type);
9432 * Check if a dev extent item is referred correctly by its chunk
9434 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9435 struct extent_buffer *eb, int slot)
9437 struct btrfs_root *chunk_root = fs_info->chunk_root;
9438 struct btrfs_dev_extent *ptr;
9439 struct btrfs_path path;
9440 struct btrfs_key chunk_key;
9441 struct btrfs_key devext_key;
9442 struct btrfs_chunk *chunk;
9443 struct extent_buffer *l;
9447 int found_chunk = 0;
9450 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9451 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9452 length = btrfs_dev_extent_length(eb, ptr);
9454 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9455 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9456 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9458 btrfs_init_path(&path);
9459 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9464 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9465 if (btrfs_chunk_length(l, chunk) != length)
9468 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9469 for (i = 0; i < num_stripes; i++) {
9470 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9471 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9473 if (devid == devext_key.objectid &&
9474 offset == devext_key.offset) {
9480 btrfs_release_path(&path);
9483 "device extent[%llu, %llu, %llu] did not find the related chunk",
9484 devext_key.objectid, devext_key.offset, length);
9485 return REFERENCER_MISSING;
9491 * Check if the used space is correct with the dev item
9493 static int check_dev_item(struct btrfs_fs_info *fs_info,
9494 struct extent_buffer *eb, int slot)
9496 struct btrfs_root *dev_root = fs_info->dev_root;
9497 struct btrfs_dev_item *dev_item;
9498 struct btrfs_path path;
9499 struct btrfs_key key;
9500 struct btrfs_dev_extent *ptr;
9506 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9507 dev_id = btrfs_device_id(eb, dev_item);
9508 used = btrfs_device_bytes_used(eb, dev_item);
9510 key.objectid = dev_id;
9511 key.type = BTRFS_DEV_EXTENT_KEY;
9514 btrfs_init_path(&path);
9515 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9517 btrfs_item_key_to_cpu(eb, &key, slot);
9518 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9519 key.objectid, key.type, key.offset);
9520 btrfs_release_path(&path);
9521 return REFERENCER_MISSING;
9524 /* Iterate dev_extents to calculate the used space of a device */
9526 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9528 if (key.objectid > dev_id)
9530 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9533 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9534 struct btrfs_dev_extent);
9535 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9537 ret = btrfs_next_item(dev_root, &path);
9541 btrfs_release_path(&path);
9543 if (used != total) {
9544 btrfs_item_key_to_cpu(eb, &key, slot);
9546 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9547 total, used, BTRFS_ROOT_TREE_OBJECTID,
9548 BTRFS_DEV_EXTENT_KEY, dev_id);
9549 return ACCOUNTING_MISMATCH;
9555 * Check a block group item with its referener (chunk) and its used space
9556 * with extent/metadata item
9558 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9559 struct extent_buffer *eb, int slot)
9561 struct btrfs_root *extent_root = fs_info->extent_root;
9562 struct btrfs_root *chunk_root = fs_info->chunk_root;
9563 struct btrfs_block_group_item *bi;
9564 struct btrfs_block_group_item bg_item;
9565 struct btrfs_path path;
9566 struct btrfs_key bg_key;
9567 struct btrfs_key chunk_key;
9568 struct btrfs_key extent_key;
9569 struct btrfs_chunk *chunk;
9570 struct extent_buffer *leaf;
9571 struct btrfs_extent_item *ei;
9572 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9580 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9581 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9582 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9583 used = btrfs_block_group_used(&bg_item);
9584 bg_flags = btrfs_block_group_flags(&bg_item);
9586 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9587 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9588 chunk_key.offset = bg_key.objectid;
9590 btrfs_init_path(&path);
9591 /* Search for the referencer chunk */
9592 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9595 "block group[%llu %llu] did not find the related chunk item",
9596 bg_key.objectid, bg_key.offset);
9597 err |= REFERENCER_MISSING;
9599 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9600 struct btrfs_chunk);
9601 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9604 "block group[%llu %llu] related chunk item length does not match",
9605 bg_key.objectid, bg_key.offset);
9606 err |= REFERENCER_MISMATCH;
9609 btrfs_release_path(&path);
9611 /* Search from the block group bytenr */
9612 extent_key.objectid = bg_key.objectid;
9613 extent_key.type = 0;
9614 extent_key.offset = 0;
9616 btrfs_init_path(&path);
9617 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9621 /* Iterate extent tree to account used space */
9623 leaf = path.nodes[0];
9624 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9625 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9628 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9629 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9631 if (extent_key.objectid < bg_key.objectid)
9634 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9637 total += extent_key.offset;
9639 ei = btrfs_item_ptr(leaf, path.slots[0],
9640 struct btrfs_extent_item);
9641 flags = btrfs_extent_flags(leaf, ei);
9642 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9643 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9645 "bad extent[%llu, %llu) type mismatch with chunk",
9646 extent_key.objectid,
9647 extent_key.objectid + extent_key.offset);
9648 err |= CHUNK_TYPE_MISMATCH;
9650 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9651 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9652 BTRFS_BLOCK_GROUP_METADATA))) {
9654 "bad extent[%llu, %llu) type mismatch with chunk",
9655 extent_key.objectid,
9656 extent_key.objectid + nodesize);
9657 err |= CHUNK_TYPE_MISMATCH;
9661 ret = btrfs_next_item(extent_root, &path);
9667 btrfs_release_path(&path);
9669 if (total != used) {
9671 "block group[%llu %llu] used %llu but extent items used %llu",
9672 bg_key.objectid, bg_key.offset, used, total);
9673 err |= ACCOUNTING_MISMATCH;
9679 * Check a chunk item.
9680 * Including checking all referred dev_extents and block group
9682 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9683 struct extent_buffer *eb, int slot)
9685 struct btrfs_root *extent_root = fs_info->extent_root;
9686 struct btrfs_root *dev_root = fs_info->dev_root;
9687 struct btrfs_path path;
9688 struct btrfs_key chunk_key;
9689 struct btrfs_key bg_key;
9690 struct btrfs_key devext_key;
9691 struct btrfs_chunk *chunk;
9692 struct extent_buffer *leaf;
9693 struct btrfs_block_group_item *bi;
9694 struct btrfs_block_group_item bg_item;
9695 struct btrfs_dev_extent *ptr;
9696 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9708 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9709 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9710 length = btrfs_chunk_length(eb, chunk);
9711 chunk_end = chunk_key.offset + length;
9712 if (!IS_ALIGNED(length, sectorsize)) {
9713 error("chunk[%llu %llu) not aligned to %u",
9714 chunk_key.offset, chunk_end, sectorsize);
9715 err |= BYTES_UNALIGNED;
9719 type = btrfs_chunk_type(eb, chunk);
9720 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9721 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9722 error("chunk[%llu %llu) has no chunk type",
9723 chunk_key.offset, chunk_end);
9724 err |= UNKNOWN_TYPE;
9726 if (profile && (profile & (profile - 1))) {
9727 error("chunk[%llu %llu) multiple profiles detected: %llx",
9728 chunk_key.offset, chunk_end, profile);
9729 err |= UNKNOWN_TYPE;
9732 bg_key.objectid = chunk_key.offset;
9733 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9734 bg_key.offset = length;
9736 btrfs_init_path(&path);
9737 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9740 "chunk[%llu %llu) did not find the related block group item",
9741 chunk_key.offset, chunk_end);
9742 err |= REFERENCER_MISSING;
9744 leaf = path.nodes[0];
9745 bi = btrfs_item_ptr(leaf, path.slots[0],
9746 struct btrfs_block_group_item);
9747 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9749 if (btrfs_block_group_flags(&bg_item) != type) {
9751 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9752 chunk_key.offset, chunk_end, type,
9753 btrfs_block_group_flags(&bg_item));
9754 err |= REFERENCER_MISSING;
9758 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9759 for (i = 0; i < num_stripes; i++) {
9760 btrfs_release_path(&path);
9761 btrfs_init_path(&path);
9762 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9763 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9764 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9766 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9771 leaf = path.nodes[0];
9772 ptr = btrfs_item_ptr(leaf, path.slots[0],
9773 struct btrfs_dev_extent);
9774 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9775 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9776 if (objectid != chunk_key.objectid ||
9777 offset != chunk_key.offset ||
9778 btrfs_dev_extent_length(leaf, ptr) != length)
9782 err |= BACKREF_MISSING;
9784 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9785 chunk_key.objectid, chunk_end, i);
9788 btrfs_release_path(&path);
9794 * Main entry function to check known items and update related accounting info
9796 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9798 struct btrfs_fs_info *fs_info = root->fs_info;
9799 struct btrfs_key key;
9802 struct btrfs_extent_data_ref *dref;
9807 btrfs_item_key_to_cpu(eb, &key, slot);
9811 case BTRFS_EXTENT_DATA_KEY:
9812 ret = check_extent_data_item(root, eb, slot);
9815 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9816 ret = check_block_group_item(fs_info, eb, slot);
9819 case BTRFS_DEV_ITEM_KEY:
9820 ret = check_dev_item(fs_info, eb, slot);
9823 case BTRFS_CHUNK_ITEM_KEY:
9824 ret = check_chunk_item(fs_info, eb, slot);
9827 case BTRFS_DEV_EXTENT_KEY:
9828 ret = check_dev_extent_item(fs_info, eb, slot);
9831 case BTRFS_EXTENT_ITEM_KEY:
9832 case BTRFS_METADATA_ITEM_KEY:
9833 ret = check_extent_item(fs_info, eb, slot);
9836 case BTRFS_EXTENT_CSUM_KEY:
9837 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9839 case BTRFS_TREE_BLOCK_REF_KEY:
9840 ret = check_tree_block_backref(fs_info, key.offset,
9844 case BTRFS_EXTENT_DATA_REF_KEY:
9845 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9846 ret = check_extent_data_backref(fs_info,
9847 btrfs_extent_data_ref_root(eb, dref),
9848 btrfs_extent_data_ref_objectid(eb, dref),
9849 btrfs_extent_data_ref_offset(eb, dref),
9851 btrfs_extent_data_ref_count(eb, dref));
9854 case BTRFS_SHARED_BLOCK_REF_KEY:
9855 ret = check_shared_block_backref(fs_info, key.offset,
9859 case BTRFS_SHARED_DATA_REF_KEY:
9860 ret = check_shared_data_backref(fs_info, key.offset,
9868 if (++slot < btrfs_header_nritems(eb))
9875 * Helper function for later fs/subvol tree check. To determine if a tree
9876 * block should be checked.
9877 * This function will ensure only the direct referencer with lowest rootid to
9878 * check a fs/subvolume tree block.
9880 * Backref check at extent tree would detect errors like missing subvolume
9881 * tree, so we can do aggressive check to reduce duplicated checks.
9883 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9885 struct btrfs_root *extent_root = root->fs_info->extent_root;
9886 struct btrfs_key key;
9887 struct btrfs_path path;
9888 struct extent_buffer *leaf;
9890 struct btrfs_extent_item *ei;
9896 struct btrfs_extent_inline_ref *iref;
9899 btrfs_init_path(&path);
9900 key.objectid = btrfs_header_bytenr(eb);
9901 key.type = BTRFS_METADATA_ITEM_KEY;
9902 key.offset = (u64)-1;
9905 * Any failure in backref resolving means we can't determine
9906 * whom the tree block belongs to.
9907 * So in that case, we need to check that tree block
9909 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9913 ret = btrfs_previous_extent_item(extent_root, &path,
9914 btrfs_header_bytenr(eb));
9918 leaf = path.nodes[0];
9919 slot = path.slots[0];
9920 btrfs_item_key_to_cpu(leaf, &key, slot);
9921 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9923 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9924 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9926 struct btrfs_tree_block_info *info;
9928 info = (struct btrfs_tree_block_info *)(ei + 1);
9929 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9932 item_size = btrfs_item_size_nr(leaf, slot);
9933 ptr = (unsigned long)iref;
9934 end = (unsigned long)ei + item_size;
9936 iref = (struct btrfs_extent_inline_ref *)ptr;
9937 type = btrfs_extent_inline_ref_type(leaf, iref);
9938 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9941 * We only check the tree block if current root is
9942 * the lowest referencer of it.
9944 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9945 offset < root->objectid) {
9946 btrfs_release_path(&path);
9950 ptr += btrfs_extent_inline_ref_size(type);
9953 * Normally we should also check keyed tree block ref, but that may be
9954 * very time consuming. Inlined ref should already make us skip a lot
9955 * of refs now. So skip search keyed tree block ref.
9959 btrfs_release_path(&path);
9964 * Traversal function for tree block. We will do:
9965 * 1) Skip shared fs/subvolume tree blocks
9966 * 2) Update related bytes accounting
9967 * 3) Pre-order traversal
9969 static int traverse_tree_block(struct btrfs_root *root,
9970 struct extent_buffer *node)
9972 struct extent_buffer *eb;
9973 struct btrfs_key key;
9974 struct btrfs_key drop_key;
9982 * Skip shared fs/subvolume tree block, in that case they will
9983 * be checked by referencer with lowest rootid
9985 if (is_fstree(root->objectid) && !should_check(root, node))
9988 /* Update bytes accounting */
9989 total_btree_bytes += node->len;
9990 if (fs_root_objectid(btrfs_header_owner(node)))
9991 total_fs_tree_bytes += node->len;
9992 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9993 total_extent_tree_bytes += node->len;
9994 if (!found_old_backref &&
9995 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9996 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9997 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9998 found_old_backref = 1;
10000 /* pre-order tranversal, check itself first */
10001 level = btrfs_header_level(node);
10002 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10003 btrfs_header_level(node),
10004 btrfs_header_owner(node));
10008 "check %s failed root %llu bytenr %llu level %d, force continue check",
10009 level ? "node":"leaf", root->objectid,
10010 btrfs_header_bytenr(node), btrfs_header_level(node));
10013 btree_space_waste += btrfs_leaf_free_space(root, node);
10014 ret = check_leaf_items(root, node);
10019 nr = btrfs_header_nritems(node);
10020 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10021 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10022 sizeof(struct btrfs_key_ptr);
10024 /* Then check all its children */
10025 for (i = 0; i < nr; i++) {
10026 u64 blocknr = btrfs_node_blockptr(node, i);
10028 btrfs_node_key_to_cpu(node, &key, i);
10029 if (level == root->root_item.drop_level &&
10030 is_dropped_key(&key, &drop_key))
10034 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10035 * to call the function itself.
10037 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10038 if (extent_buffer_uptodate(eb)) {
10039 ret = traverse_tree_block(root, eb);
10042 free_extent_buffer(eb);
10049 * Low memory usage version check_chunks_and_extents.
10051 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10053 struct btrfs_path path;
10054 struct btrfs_key key;
10055 struct btrfs_root *root1;
10056 struct btrfs_root *cur_root;
10060 root1 = root->fs_info->chunk_root;
10061 ret = traverse_tree_block(root1, root1->node);
10064 root1 = root->fs_info->tree_root;
10065 ret = traverse_tree_block(root1, root1->node);
10068 btrfs_init_path(&path);
10069 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10071 key.type = BTRFS_ROOT_ITEM_KEY;
10073 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10075 error("cannot find extent treet in tree_root");
10080 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10081 if (key.type != BTRFS_ROOT_ITEM_KEY)
10083 key.offset = (u64)-1;
10085 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10086 if (IS_ERR(cur_root) || !cur_root) {
10087 error("failed to read tree: %lld", key.objectid);
10091 ret = traverse_tree_block(cur_root, cur_root->node);
10095 ret = btrfs_next_item(root1, &path);
10101 btrfs_release_path(&path);
10105 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10106 struct btrfs_root *root, int overwrite)
10108 struct extent_buffer *c;
10109 struct extent_buffer *old = root->node;
10112 struct btrfs_disk_key disk_key = {0,0,0};
10118 extent_buffer_get(c);
10121 c = btrfs_alloc_free_block(trans, root,
10123 root->root_key.objectid,
10124 &disk_key, level, 0, 0);
10127 extent_buffer_get(c);
10131 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10132 btrfs_set_header_level(c, level);
10133 btrfs_set_header_bytenr(c, c->start);
10134 btrfs_set_header_generation(c, trans->transid);
10135 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10136 btrfs_set_header_owner(c, root->root_key.objectid);
10138 write_extent_buffer(c, root->fs_info->fsid,
10139 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10141 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10142 btrfs_header_chunk_tree_uuid(c),
10145 btrfs_mark_buffer_dirty(c);
10147 * this case can happen in the following case:
10149 * 1.overwrite previous root.
10151 * 2.reinit reloc data root, this is because we skip pin
10152 * down reloc data tree before which means we can allocate
10153 * same block bytenr here.
10155 if (old->start == c->start) {
10156 btrfs_set_root_generation(&root->root_item,
10158 root->root_item.level = btrfs_header_level(root->node);
10159 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10160 &root->root_key, &root->root_item);
10162 free_extent_buffer(c);
10166 free_extent_buffer(old);
10168 add_root_to_dirty_list(root);
10172 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10173 struct extent_buffer *eb, int tree_root)
10175 struct extent_buffer *tmp;
10176 struct btrfs_root_item *ri;
10177 struct btrfs_key key;
10180 int level = btrfs_header_level(eb);
10186 * If we have pinned this block before, don't pin it again.
10187 * This can not only avoid forever loop with broken filesystem
10188 * but also give us some speedups.
10190 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10191 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10194 btrfs_pin_extent(fs_info, eb->start, eb->len);
10196 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10197 nritems = btrfs_header_nritems(eb);
10198 for (i = 0; i < nritems; i++) {
10200 btrfs_item_key_to_cpu(eb, &key, i);
10201 if (key.type != BTRFS_ROOT_ITEM_KEY)
10203 /* Skip the extent root and reloc roots */
10204 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10205 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10206 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10208 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10209 bytenr = btrfs_disk_root_bytenr(eb, ri);
10212 * If at any point we start needing the real root we
10213 * will have to build a stump root for the root we are
10214 * in, but for now this doesn't actually use the root so
10215 * just pass in extent_root.
10217 tmp = read_tree_block(fs_info->extent_root, bytenr,
10219 if (!extent_buffer_uptodate(tmp)) {
10220 fprintf(stderr, "Error reading root block\n");
10223 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10224 free_extent_buffer(tmp);
10228 bytenr = btrfs_node_blockptr(eb, i);
10230 /* If we aren't the tree root don't read the block */
10231 if (level == 1 && !tree_root) {
10232 btrfs_pin_extent(fs_info, bytenr, nodesize);
10236 tmp = read_tree_block(fs_info->extent_root, bytenr,
10238 if (!extent_buffer_uptodate(tmp)) {
10239 fprintf(stderr, "Error reading tree block\n");
10242 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10243 free_extent_buffer(tmp);
10252 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10256 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10260 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10263 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10265 struct btrfs_block_group_cache *cache;
10266 struct btrfs_path *path;
10267 struct extent_buffer *leaf;
10268 struct btrfs_chunk *chunk;
10269 struct btrfs_key key;
10273 path = btrfs_alloc_path();
10278 key.type = BTRFS_CHUNK_ITEM_KEY;
10281 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10283 btrfs_free_path(path);
10288 * We do this in case the block groups were screwed up and had alloc
10289 * bits that aren't actually set on the chunks. This happens with
10290 * restored images every time and could happen in real life I guess.
10292 fs_info->avail_data_alloc_bits = 0;
10293 fs_info->avail_metadata_alloc_bits = 0;
10294 fs_info->avail_system_alloc_bits = 0;
10296 /* First we need to create the in-memory block groups */
10298 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10299 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10301 btrfs_free_path(path);
10309 leaf = path->nodes[0];
10310 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10311 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10316 chunk = btrfs_item_ptr(leaf, path->slots[0],
10317 struct btrfs_chunk);
10318 btrfs_add_block_group(fs_info, 0,
10319 btrfs_chunk_type(leaf, chunk),
10320 key.objectid, key.offset,
10321 btrfs_chunk_length(leaf, chunk));
10322 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10323 key.offset + btrfs_chunk_length(leaf, chunk),
10329 cache = btrfs_lookup_first_block_group(fs_info, start);
10333 start = cache->key.objectid + cache->key.offset;
10336 btrfs_free_path(path);
10340 static int reset_balance(struct btrfs_trans_handle *trans,
10341 struct btrfs_fs_info *fs_info)
10343 struct btrfs_root *root = fs_info->tree_root;
10344 struct btrfs_path *path;
10345 struct extent_buffer *leaf;
10346 struct btrfs_key key;
10347 int del_slot, del_nr = 0;
10351 path = btrfs_alloc_path();
10355 key.objectid = BTRFS_BALANCE_OBJECTID;
10356 key.type = BTRFS_BALANCE_ITEM_KEY;
10359 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10364 goto reinit_data_reloc;
10369 ret = btrfs_del_item(trans, root, path);
10372 btrfs_release_path(path);
10374 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10375 key.type = BTRFS_ROOT_ITEM_KEY;
10378 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10382 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10387 ret = btrfs_del_items(trans, root, path,
10394 btrfs_release_path(path);
10397 ret = btrfs_search_slot(trans, root, &key, path,
10404 leaf = path->nodes[0];
10405 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10406 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10408 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10413 del_slot = path->slots[0];
10422 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10426 btrfs_release_path(path);
10429 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10430 key.type = BTRFS_ROOT_ITEM_KEY;
10431 key.offset = (u64)-1;
10432 root = btrfs_read_fs_root(fs_info, &key);
10433 if (IS_ERR(root)) {
10434 fprintf(stderr, "Error reading data reloc tree\n");
10435 ret = PTR_ERR(root);
10438 record_root_in_trans(trans, root);
10439 ret = btrfs_fsck_reinit_root(trans, root, 0);
10442 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10444 btrfs_free_path(path);
10448 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10449 struct btrfs_fs_info *fs_info)
10455 * The only reason we don't do this is because right now we're just
10456 * walking the trees we find and pinning down their bytes, we don't look
10457 * at any of the leaves. In order to do mixed groups we'd have to check
10458 * the leaves of any fs roots and pin down the bytes for any file
10459 * extents we find. Not hard but why do it if we don't have to?
10461 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10462 fprintf(stderr, "We don't support re-initing the extent tree "
10463 "for mixed block groups yet, please notify a btrfs "
10464 "developer you want to do this so they can add this "
10465 "functionality.\n");
10470 * first we need to walk all of the trees except the extent tree and pin
10471 * down the bytes that are in use so we don't overwrite any existing
10474 ret = pin_metadata_blocks(fs_info);
10476 fprintf(stderr, "error pinning down used bytes\n");
10481 * Need to drop all the block groups since we're going to recreate all
10484 btrfs_free_block_groups(fs_info);
10485 ret = reset_block_groups(fs_info);
10487 fprintf(stderr, "error resetting the block groups\n");
10491 /* Ok we can allocate now, reinit the extent root */
10492 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10494 fprintf(stderr, "extent root initialization failed\n");
10496 * When the transaction code is updated we should end the
10497 * transaction, but for now progs only knows about commit so
10498 * just return an error.
10504 * Now we have all the in-memory block groups setup so we can make
10505 * allocations properly, and the metadata we care about is safe since we
10506 * pinned all of it above.
10509 struct btrfs_block_group_cache *cache;
10511 cache = btrfs_lookup_first_block_group(fs_info, start);
10514 start = cache->key.objectid + cache->key.offset;
10515 ret = btrfs_insert_item(trans, fs_info->extent_root,
10516 &cache->key, &cache->item,
10517 sizeof(cache->item));
10519 fprintf(stderr, "Error adding block group\n");
10522 btrfs_extent_post_op(trans, fs_info->extent_root);
10525 ret = reset_balance(trans, fs_info);
10527 fprintf(stderr, "error resetting the pending balance\n");
10532 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10534 struct btrfs_path *path;
10535 struct btrfs_trans_handle *trans;
10536 struct btrfs_key key;
10539 printf("Recowing metadata block %llu\n", eb->start);
10540 key.objectid = btrfs_header_owner(eb);
10541 key.type = BTRFS_ROOT_ITEM_KEY;
10542 key.offset = (u64)-1;
10544 root = btrfs_read_fs_root(root->fs_info, &key);
10545 if (IS_ERR(root)) {
10546 fprintf(stderr, "Couldn't find owner root %llu\n",
10548 return PTR_ERR(root);
10551 path = btrfs_alloc_path();
10555 trans = btrfs_start_transaction(root, 1);
10556 if (IS_ERR(trans)) {
10557 btrfs_free_path(path);
10558 return PTR_ERR(trans);
10561 path->lowest_level = btrfs_header_level(eb);
10562 if (path->lowest_level)
10563 btrfs_node_key_to_cpu(eb, &key, 0);
10565 btrfs_item_key_to_cpu(eb, &key, 0);
10567 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10568 btrfs_commit_transaction(trans, root);
10569 btrfs_free_path(path);
10573 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10575 struct btrfs_path *path;
10576 struct btrfs_trans_handle *trans;
10577 struct btrfs_key key;
10580 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10581 bad->key.type, bad->key.offset);
10582 key.objectid = bad->root_id;
10583 key.type = BTRFS_ROOT_ITEM_KEY;
10584 key.offset = (u64)-1;
10586 root = btrfs_read_fs_root(root->fs_info, &key);
10587 if (IS_ERR(root)) {
10588 fprintf(stderr, "Couldn't find owner root %llu\n",
10590 return PTR_ERR(root);
10593 path = btrfs_alloc_path();
10597 trans = btrfs_start_transaction(root, 1);
10598 if (IS_ERR(trans)) {
10599 btrfs_free_path(path);
10600 return PTR_ERR(trans);
10603 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10609 ret = btrfs_del_item(trans, root, path);
10611 btrfs_commit_transaction(trans, root);
10612 btrfs_free_path(path);
10616 static int zero_log_tree(struct btrfs_root *root)
10618 struct btrfs_trans_handle *trans;
10621 trans = btrfs_start_transaction(root, 1);
10622 if (IS_ERR(trans)) {
10623 ret = PTR_ERR(trans);
10626 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10627 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10628 ret = btrfs_commit_transaction(trans, root);
10632 static int populate_csum(struct btrfs_trans_handle *trans,
10633 struct btrfs_root *csum_root, char *buf, u64 start,
10640 while (offset < len) {
10641 sectorsize = csum_root->sectorsize;
10642 ret = read_extent_data(csum_root, buf, start + offset,
10646 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10647 start + offset, buf, sectorsize);
10650 offset += sectorsize;
10655 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10656 struct btrfs_root *csum_root,
10657 struct btrfs_root *cur_root)
10659 struct btrfs_path *path;
10660 struct btrfs_key key;
10661 struct extent_buffer *node;
10662 struct btrfs_file_extent_item *fi;
10669 path = btrfs_alloc_path();
10672 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10682 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10685 /* Iterate all regular file extents and fill its csum */
10687 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10689 if (key.type != BTRFS_EXTENT_DATA_KEY)
10691 node = path->nodes[0];
10692 slot = path->slots[0];
10693 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10694 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10696 start = btrfs_file_extent_disk_bytenr(node, fi);
10697 len = btrfs_file_extent_disk_num_bytes(node, fi);
10699 ret = populate_csum(trans, csum_root, buf, start, len);
10700 if (ret == -EEXIST)
10706 * TODO: if next leaf is corrupted, jump to nearest next valid
10709 ret = btrfs_next_item(cur_root, path);
10719 btrfs_free_path(path);
10724 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10725 struct btrfs_root *csum_root)
10727 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10728 struct btrfs_path *path;
10729 struct btrfs_root *tree_root = fs_info->tree_root;
10730 struct btrfs_root *cur_root;
10731 struct extent_buffer *node;
10732 struct btrfs_key key;
10736 path = btrfs_alloc_path();
10740 key.objectid = BTRFS_FS_TREE_OBJECTID;
10742 key.type = BTRFS_ROOT_ITEM_KEY;
10744 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10753 node = path->nodes[0];
10754 slot = path->slots[0];
10755 btrfs_item_key_to_cpu(node, &key, slot);
10756 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10758 if (key.type != BTRFS_ROOT_ITEM_KEY)
10760 if (!is_fstree(key.objectid))
10762 key.offset = (u64)-1;
10764 cur_root = btrfs_read_fs_root(fs_info, &key);
10765 if (IS_ERR(cur_root) || !cur_root) {
10766 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10770 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10775 ret = btrfs_next_item(tree_root, path);
10785 btrfs_free_path(path);
10789 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10790 struct btrfs_root *csum_root)
10792 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10793 struct btrfs_path *path;
10794 struct btrfs_extent_item *ei;
10795 struct extent_buffer *leaf;
10797 struct btrfs_key key;
10800 path = btrfs_alloc_path();
10805 key.type = BTRFS_EXTENT_ITEM_KEY;
10808 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10810 btrfs_free_path(path);
10814 buf = malloc(csum_root->sectorsize);
10816 btrfs_free_path(path);
10821 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10822 ret = btrfs_next_leaf(extent_root, path);
10830 leaf = path->nodes[0];
10832 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10833 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10838 ei = btrfs_item_ptr(leaf, path->slots[0],
10839 struct btrfs_extent_item);
10840 if (!(btrfs_extent_flags(leaf, ei) &
10841 BTRFS_EXTENT_FLAG_DATA)) {
10846 ret = populate_csum(trans, csum_root, buf, key.objectid,
10853 btrfs_free_path(path);
10859 * Recalculate the csum and put it into the csum tree.
10861 * Extent tree init will wipe out all the extent info, so in that case, we
10862 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10863 * will use fs/subvol trees to init the csum tree.
10865 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10866 struct btrfs_root *csum_root,
10867 int search_fs_tree)
10869 if (search_fs_tree)
10870 return fill_csum_tree_from_fs(trans, csum_root);
10872 return fill_csum_tree_from_extent(trans, csum_root);
10875 static void free_roots_info_cache(void)
10877 if (!roots_info_cache)
10880 while (!cache_tree_empty(roots_info_cache)) {
10881 struct cache_extent *entry;
10882 struct root_item_info *rii;
10884 entry = first_cache_extent(roots_info_cache);
10887 remove_cache_extent(roots_info_cache, entry);
10888 rii = container_of(entry, struct root_item_info, cache_extent);
10892 free(roots_info_cache);
10893 roots_info_cache = NULL;
10896 static int build_roots_info_cache(struct btrfs_fs_info *info)
10899 struct btrfs_key key;
10900 struct extent_buffer *leaf;
10901 struct btrfs_path *path;
10903 if (!roots_info_cache) {
10904 roots_info_cache = malloc(sizeof(*roots_info_cache));
10905 if (!roots_info_cache)
10907 cache_tree_init(roots_info_cache);
10910 path = btrfs_alloc_path();
10915 key.type = BTRFS_EXTENT_ITEM_KEY;
10918 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10921 leaf = path->nodes[0];
10924 struct btrfs_key found_key;
10925 struct btrfs_extent_item *ei;
10926 struct btrfs_extent_inline_ref *iref;
10927 int slot = path->slots[0];
10932 struct cache_extent *entry;
10933 struct root_item_info *rii;
10935 if (slot >= btrfs_header_nritems(leaf)) {
10936 ret = btrfs_next_leaf(info->extent_root, path);
10943 leaf = path->nodes[0];
10944 slot = path->slots[0];
10947 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10949 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10950 found_key.type != BTRFS_METADATA_ITEM_KEY)
10953 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10954 flags = btrfs_extent_flags(leaf, ei);
10956 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10957 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10960 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10961 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10962 level = found_key.offset;
10964 struct btrfs_tree_block_info *binfo;
10966 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10967 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10968 level = btrfs_tree_block_level(leaf, binfo);
10972 * For a root extent, it must be of the following type and the
10973 * first (and only one) iref in the item.
10975 type = btrfs_extent_inline_ref_type(leaf, iref);
10976 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10979 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10980 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10982 rii = malloc(sizeof(struct root_item_info));
10987 rii->cache_extent.start = root_id;
10988 rii->cache_extent.size = 1;
10989 rii->level = (u8)-1;
10990 entry = &rii->cache_extent;
10991 ret = insert_cache_extent(roots_info_cache, entry);
10994 rii = container_of(entry, struct root_item_info,
10998 ASSERT(rii->cache_extent.start == root_id);
10999 ASSERT(rii->cache_extent.size == 1);
11001 if (level > rii->level || rii->level == (u8)-1) {
11002 rii->level = level;
11003 rii->bytenr = found_key.objectid;
11004 rii->gen = btrfs_extent_generation(leaf, ei);
11005 rii->node_count = 1;
11006 } else if (level == rii->level) {
11014 btrfs_free_path(path);
11019 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11020 struct btrfs_path *path,
11021 const struct btrfs_key *root_key,
11022 const int read_only_mode)
11024 const u64 root_id = root_key->objectid;
11025 struct cache_extent *entry;
11026 struct root_item_info *rii;
11027 struct btrfs_root_item ri;
11028 unsigned long offset;
11030 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11033 "Error: could not find extent items for root %llu\n",
11034 root_key->objectid);
11038 rii = container_of(entry, struct root_item_info, cache_extent);
11039 ASSERT(rii->cache_extent.start == root_id);
11040 ASSERT(rii->cache_extent.size == 1);
11042 if (rii->node_count != 1) {
11044 "Error: could not find btree root extent for root %llu\n",
11049 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11050 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11052 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11053 btrfs_root_level(&ri) != rii->level ||
11054 btrfs_root_generation(&ri) != rii->gen) {
11057 * If we're in repair mode but our caller told us to not update
11058 * the root item, i.e. just check if it needs to be updated, don't
11059 * print this message, since the caller will call us again shortly
11060 * for the same root item without read only mode (the caller will
11061 * open a transaction first).
11063 if (!(read_only_mode && repair))
11065 "%sroot item for root %llu,"
11066 " current bytenr %llu, current gen %llu, current level %u,"
11067 " new bytenr %llu, new gen %llu, new level %u\n",
11068 (read_only_mode ? "" : "fixing "),
11070 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11071 btrfs_root_level(&ri),
11072 rii->bytenr, rii->gen, rii->level);
11074 if (btrfs_root_generation(&ri) > rii->gen) {
11076 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11077 root_id, btrfs_root_generation(&ri), rii->gen);
11081 if (!read_only_mode) {
11082 btrfs_set_root_bytenr(&ri, rii->bytenr);
11083 btrfs_set_root_level(&ri, rii->level);
11084 btrfs_set_root_generation(&ri, rii->gen);
11085 write_extent_buffer(path->nodes[0], &ri,
11086 offset, sizeof(ri));
11096 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11097 * caused read-only snapshots to be corrupted if they were created at a moment
11098 * when the source subvolume/snapshot had orphan items. The issue was that the
11099 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11100 * node instead of the post orphan cleanup root node.
11101 * So this function, and its callees, just detects and fixes those cases. Even
11102 * though the regression was for read-only snapshots, this function applies to
11103 * any snapshot/subvolume root.
11104 * This must be run before any other repair code - not doing it so, makes other
11105 * repair code delete or modify backrefs in the extent tree for example, which
11106 * will result in an inconsistent fs after repairing the root items.
11108 static int repair_root_items(struct btrfs_fs_info *info)
11110 struct btrfs_path *path = NULL;
11111 struct btrfs_key key;
11112 struct extent_buffer *leaf;
11113 struct btrfs_trans_handle *trans = NULL;
11116 int need_trans = 0;
11118 ret = build_roots_info_cache(info);
11122 path = btrfs_alloc_path();
11128 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11129 key.type = BTRFS_ROOT_ITEM_KEY;
11134 * Avoid opening and committing transactions if a leaf doesn't have
11135 * any root items that need to be fixed, so that we avoid rotating
11136 * backup roots unnecessarily.
11139 trans = btrfs_start_transaction(info->tree_root, 1);
11140 if (IS_ERR(trans)) {
11141 ret = PTR_ERR(trans);
11146 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11150 leaf = path->nodes[0];
11153 struct btrfs_key found_key;
11155 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11156 int no_more_keys = find_next_key(path, &key);
11158 btrfs_release_path(path);
11160 ret = btrfs_commit_transaction(trans,
11172 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11174 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11176 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11179 ret = maybe_repair_root_item(info, path, &found_key,
11184 if (!trans && repair) {
11187 btrfs_release_path(path);
11197 free_roots_info_cache();
11198 btrfs_free_path(path);
11200 btrfs_commit_transaction(trans, info->tree_root);
11207 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11209 struct btrfs_trans_handle *trans;
11210 struct btrfs_block_group_cache *bg_cache;
11214 /* Clear all free space cache inodes and its extent data */
11216 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11219 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11222 current = bg_cache->key.objectid + bg_cache->key.offset;
11225 /* Don't forget to set cache_generation to -1 */
11226 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11227 if (IS_ERR(trans)) {
11228 error("failed to update super block cache generation");
11229 return PTR_ERR(trans);
11231 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11232 btrfs_commit_transaction(trans, fs_info->tree_root);
11237 const char * const cmd_check_usage[] = {
11238 "btrfs check [options] <device>",
11239 "Check structural integrity of a filesystem (unmounted).",
11240 "Check structural integrity of an unmounted filesystem. Verify internal",
11241 "trees' consistency and item connectivity. In the repair mode try to",
11242 "fix the problems found. ",
11243 "WARNING: the repair mode is considered dangerous",
11245 "-s|--super <superblock> use this superblock copy",
11246 "-b|--backup use the first valid backup root copy",
11247 "--repair try to repair the filesystem",
11248 "--readonly run in read-only mode (default)",
11249 "--init-csum-tree create a new CRC tree",
11250 "--init-extent-tree create a new extent tree",
11251 "--mode <MODE> allows choice of memory/IO trade-offs",
11252 " where MODE is one of:",
11253 " original - read inodes and extents to memory (requires",
11254 " more memory, does less IO)",
11255 " lowmem - try to use less memory but read blocks again",
11257 "--check-data-csum verify checksums of data blocks",
11258 "-Q|--qgroup-report print a report on qgroup consistency",
11259 "-E|--subvol-extents <subvolid>",
11260 " print subvolume extents and sharing state",
11261 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11262 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11263 "-p|--progress indicate progress",
11264 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11265 " NOTE: v1 support implemented",
11269 int cmd_check(int argc, char **argv)
11271 struct cache_tree root_cache;
11272 struct btrfs_root *root;
11273 struct btrfs_fs_info *info;
11276 u64 tree_root_bytenr = 0;
11277 u64 chunk_root_bytenr = 0;
11278 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11281 int init_csum_tree = 0;
11283 int clear_space_cache = 0;
11284 int qgroup_report = 0;
11285 int qgroups_repaired = 0;
11286 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11290 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11291 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11292 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11293 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11294 static const struct option long_options[] = {
11295 { "super", required_argument, NULL, 's' },
11296 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11297 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11298 { "init-csum-tree", no_argument, NULL,
11299 GETOPT_VAL_INIT_CSUM },
11300 { "init-extent-tree", no_argument, NULL,
11301 GETOPT_VAL_INIT_EXTENT },
11302 { "check-data-csum", no_argument, NULL,
11303 GETOPT_VAL_CHECK_CSUM },
11304 { "backup", no_argument, NULL, 'b' },
11305 { "subvol-extents", required_argument, NULL, 'E' },
11306 { "qgroup-report", no_argument, NULL, 'Q' },
11307 { "tree-root", required_argument, NULL, 'r' },
11308 { "chunk-root", required_argument, NULL,
11309 GETOPT_VAL_CHUNK_TREE },
11310 { "progress", no_argument, NULL, 'p' },
11311 { "mode", required_argument, NULL,
11313 { "clear-space-cache", required_argument, NULL,
11314 GETOPT_VAL_CLEAR_SPACE_CACHE},
11315 { NULL, 0, NULL, 0}
11318 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11322 case 'a': /* ignored */ break;
11324 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11327 num = arg_strtou64(optarg);
11328 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11330 "super mirror should be less than %d",
11331 BTRFS_SUPER_MIRROR_MAX);
11334 bytenr = btrfs_sb_offset(((int)num));
11335 printf("using SB copy %llu, bytenr %llu\n", num,
11336 (unsigned long long)bytenr);
11342 subvolid = arg_strtou64(optarg);
11345 tree_root_bytenr = arg_strtou64(optarg);
11347 case GETOPT_VAL_CHUNK_TREE:
11348 chunk_root_bytenr = arg_strtou64(optarg);
11351 ctx.progress_enabled = true;
11355 usage(cmd_check_usage);
11356 case GETOPT_VAL_REPAIR:
11357 printf("enabling repair mode\n");
11359 ctree_flags |= OPEN_CTREE_WRITES;
11361 case GETOPT_VAL_READONLY:
11364 case GETOPT_VAL_INIT_CSUM:
11365 printf("Creating a new CRC tree\n");
11366 init_csum_tree = 1;
11368 ctree_flags |= OPEN_CTREE_WRITES;
11370 case GETOPT_VAL_INIT_EXTENT:
11371 init_extent_tree = 1;
11372 ctree_flags |= (OPEN_CTREE_WRITES |
11373 OPEN_CTREE_NO_BLOCK_GROUPS);
11376 case GETOPT_VAL_CHECK_CSUM:
11377 check_data_csum = 1;
11379 case GETOPT_VAL_MODE:
11380 check_mode = parse_check_mode(optarg);
11381 if (check_mode == CHECK_MODE_UNKNOWN) {
11382 error("unknown mode: %s", optarg);
11386 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11387 if (strcmp(optarg, "v1") != 0) {
11389 "only v1 support implmented, unrecognized value %s",
11393 clear_space_cache = 1;
11394 ctree_flags |= OPEN_CTREE_WRITES;
11399 if (check_argc_exact(argc - optind, 1))
11400 usage(cmd_check_usage);
11402 if (ctx.progress_enabled) {
11403 ctx.tp = TASK_NOTHING;
11404 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11407 /* This check is the only reason for --readonly to exist */
11408 if (readonly && repair) {
11409 error("repair options are not compatible with --readonly");
11414 * Not supported yet
11416 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11417 error("low memory mode doesn't support repair yet");
11422 cache_tree_init(&root_cache);
11424 if((ret = check_mounted(argv[optind])) < 0) {
11425 error("could not check mount status: %s", strerror(-ret));
11428 error("%s is currently mounted, aborting", argv[optind]);
11433 /* only allow partial opening under repair mode */
11435 ctree_flags |= OPEN_CTREE_PARTIAL;
11437 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11438 chunk_root_bytenr, ctree_flags);
11440 error("cannot open file system");
11445 global_info = info;
11446 root = info->fs_root;
11447 if (clear_space_cache) {
11448 if (btrfs_fs_compat_ro(info,
11449 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11451 "free space cache v2 detected, clearing not implemented");
11455 printf("Clearing free space cache\n");
11456 ret = clear_free_space_cache(info);
11458 error("failed to clear free space cache");
11461 printf("Free space cache cleared\n");
11467 * repair mode will force us to commit transaction which
11468 * will make us fail to load log tree when mounting.
11470 if (repair && btrfs_super_log_root(info->super_copy)) {
11471 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11476 ret = zero_log_tree(root);
11478 error("failed to zero log tree: %d", ret);
11483 uuid_unparse(info->super_copy->fsid, uuidbuf);
11484 if (qgroup_report) {
11485 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11487 ret = qgroup_verify_all(info);
11493 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11494 subvolid, argv[optind], uuidbuf);
11495 ret = print_extent_state(info, subvolid);
11498 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11500 if (!extent_buffer_uptodate(info->tree_root->node) ||
11501 !extent_buffer_uptodate(info->dev_root->node) ||
11502 !extent_buffer_uptodate(info->chunk_root->node)) {
11503 error("critical roots corrupted, unable to check the filesystem");
11508 if (init_extent_tree || init_csum_tree) {
11509 struct btrfs_trans_handle *trans;
11511 trans = btrfs_start_transaction(info->extent_root, 0);
11512 if (IS_ERR(trans)) {
11513 error("error starting transaction");
11514 ret = PTR_ERR(trans);
11518 if (init_extent_tree) {
11519 printf("Creating a new extent tree\n");
11520 ret = reinit_extent_tree(trans, info);
11525 if (init_csum_tree) {
11526 printf("Reinitialize checksum tree\n");
11527 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11529 error("checksum tree initialization failed: %d",
11535 ret = fill_csum_tree(trans, info->csum_root,
11538 error("checksum tree refilling failed: %d", ret);
11543 * Ok now we commit and run the normal fsck, which will add
11544 * extent entries for all of the items it finds.
11546 ret = btrfs_commit_transaction(trans, info->extent_root);
11550 if (!extent_buffer_uptodate(info->extent_root->node)) {
11551 error("critical: extent_root, unable to check the filesystem");
11555 if (!extent_buffer_uptodate(info->csum_root->node)) {
11556 error("critical: csum_root, unable to check the filesystem");
11561 if (!ctx.progress_enabled)
11562 printf("checking extents");
11563 if (check_mode == CHECK_MODE_LOWMEM)
11564 ret = check_chunks_and_extents_v2(root);
11566 ret = check_chunks_and_extents(root);
11568 printf("Errors found in extent allocation tree or chunk allocation");
11570 ret = repair_root_items(info);
11574 fprintf(stderr, "Fixed %d roots.\n", ret);
11576 } else if (ret > 0) {
11578 "Found %d roots with an outdated root item.\n",
11581 "Please run a filesystem check with the option --repair to fix them.\n");
11586 if (!ctx.progress_enabled) {
11587 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11588 fprintf(stderr, "checking free space tree\n");
11590 fprintf(stderr, "checking free space cache\n");
11592 ret = check_space_cache(root);
11597 * We used to have to have these hole extents in between our real
11598 * extents so if we don't have this flag set we need to make sure there
11599 * are no gaps in the file extents for inodes, otherwise we can just
11600 * ignore it when this happens.
11602 no_holes = btrfs_fs_incompat(root->fs_info,
11603 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11604 if (!ctx.progress_enabled)
11605 fprintf(stderr, "checking fs roots\n");
11606 ret = check_fs_roots(root, &root_cache);
11610 fprintf(stderr, "checking csums\n");
11611 ret = check_csums(root);
11615 fprintf(stderr, "checking root refs\n");
11616 ret = check_root_refs(root, &root_cache);
11620 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11621 struct extent_buffer *eb;
11623 eb = list_first_entry(&root->fs_info->recow_ebs,
11624 struct extent_buffer, recow);
11625 list_del_init(&eb->recow);
11626 ret = recow_extent_buffer(root, eb);
11631 while (!list_empty(&delete_items)) {
11632 struct bad_item *bad;
11634 bad = list_first_entry(&delete_items, struct bad_item, list);
11635 list_del_init(&bad->list);
11637 ret = delete_bad_item(root, bad);
11641 if (info->quota_enabled) {
11643 fprintf(stderr, "checking quota groups\n");
11644 err = qgroup_verify_all(info);
11648 err = repair_qgroups(info, &qgroups_repaired);
11653 if (!list_empty(&root->fs_info->recow_ebs)) {
11654 error("transid errors in file system");
11658 /* Don't override original ret */
11659 if (!ret && qgroups_repaired)
11660 ret = qgroups_repaired;
11662 if (found_old_backref) { /*
11663 * there was a disk format change when mixed
11664 * backref was in testing tree. The old format
11665 * existed about one week.
11667 printf("\n * Found old mixed backref format. "
11668 "The old format is not supported! *"
11669 "\n * Please mount the FS in readonly mode, "
11670 "backup data and re-format the FS. *\n\n");
11673 printf("found %llu bytes used err is %d\n",
11674 (unsigned long long)bytes_used, ret);
11675 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11676 printf("total tree bytes: %llu\n",
11677 (unsigned long long)total_btree_bytes);
11678 printf("total fs tree bytes: %llu\n",
11679 (unsigned long long)total_fs_tree_bytes);
11680 printf("total extent tree bytes: %llu\n",
11681 (unsigned long long)total_extent_tree_bytes);
11682 printf("btree space waste bytes: %llu\n",
11683 (unsigned long long)btree_space_waste);
11684 printf("file data blocks allocated: %llu\n referenced %llu\n",
11685 (unsigned long long)data_bytes_allocated,
11686 (unsigned long long)data_bytes_referenced);
11688 free_qgroup_counts();
11689 free_root_recs_tree(&root_cache);
11693 if (ctx.progress_enabled)
11694 task_deinit(ctx.info);