2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 return container_of(back, struct data_backref, node);
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145 struct data_backref *back1 = to_data_backref(ext1);
146 struct data_backref *back2 = to_data_backref(ext2);
148 WARN_ON(!ext1->is_data);
149 WARN_ON(!ext2->is_data);
151 /* parent and root are a union, so this covers both */
152 if (back1->parent > back2->parent)
154 if (back1->parent < back2->parent)
157 /* This is a full backref and the parents match. */
158 if (back1->node.full_backref)
161 if (back1->owner > back2->owner)
163 if (back1->owner < back2->owner)
166 if (back1->offset > back2->offset)
168 if (back1->offset < back2->offset)
171 if (back1->found_ref && back2->found_ref) {
172 if (back1->disk_bytenr > back2->disk_bytenr)
174 if (back1->disk_bytenr < back2->disk_bytenr)
177 if (back1->bytes > back2->bytes)
179 if (back1->bytes < back2->bytes)
187 * Much like data_backref, just removed the undetermined members
188 * and change it to use list_head.
189 * During extent scan, it is stored in root->orphan_data_extent.
190 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192 struct orphan_data_extent {
193 struct list_head list;
201 struct tree_backref {
202 struct extent_backref node;
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 return container_of(back, struct tree_backref, node);
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218 struct tree_backref *back1 = to_tree_backref(ext1);
219 struct tree_backref *back2 = to_tree_backref(ext2);
221 WARN_ON(ext1->is_data);
222 WARN_ON(ext2->is_data);
224 /* parent and root are a union, so this covers both */
225 if (back1->parent > back2->parent)
227 if (back1->parent < back2->parent)
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238 if (ext1->is_data > ext2->is_data)
241 if (ext1->is_data < ext2->is_data)
244 if (ext1->full_backref > ext2->full_backref)
246 if (ext1->full_backref < ext2->full_backref)
250 return compare_data_backref(node1, node2);
252 return compare_tree_backref(node1, node2);
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
258 struct extent_record {
259 struct list_head backrefs;
260 struct list_head dups;
261 struct rb_root backref_tree;
262 struct list_head list;
263 struct cache_extent cache;
264 struct btrfs_disk_key parent_key;
269 u64 extent_item_refs;
271 u64 parent_generation;
275 unsigned int flag_block_full_backref:2;
276 unsigned int found_rec:1;
277 unsigned int content_checked:1;
278 unsigned int owner_ref_checked:1;
279 unsigned int is_root:1;
280 unsigned int metadata:1;
281 unsigned int bad_full_backref:1;
282 unsigned int crossing_stripes:1;
283 unsigned int wrong_chunk_type:1;
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 return container_of(entry, struct extent_record, list);
291 struct inode_backref {
292 struct list_head list;
293 unsigned int found_dir_item:1;
294 unsigned int found_dir_index:1;
295 unsigned int found_inode_ref:1;
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 return list_entry(entry, struct inode_backref, list);
310 struct root_item_record {
311 struct list_head list;
317 struct btrfs_key drop_key;
320 #define REF_ERR_NO_DIR_ITEM (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX (1 << 1)
322 #define REF_ERR_NO_INODE_REF (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
325 #define REF_ERR_DUP_INODE_REF (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
334 struct file_extent_hole {
340 struct inode_record {
341 struct list_head backrefs;
342 unsigned int checked:1;
343 unsigned int merging:1;
344 unsigned int found_inode_item:1;
345 unsigned int found_dir_item:1;
346 unsigned int found_file_extent:1;
347 unsigned int found_csum_item:1;
348 unsigned int some_csum_missing:1;
349 unsigned int nodatasum:1;
362 struct rb_root holes;
363 struct list_head orphan_extents;
368 #define I_ERR_NO_INODE_ITEM (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
384 struct root_backref {
385 struct list_head list;
386 unsigned int found_dir_item:1;
387 unsigned int found_dir_index:1;
388 unsigned int found_back_ref:1;
389 unsigned int found_forward_ref:1;
390 unsigned int reachable:1;
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 return list_entry(entry, struct root_backref, list);
405 struct list_head backrefs;
406 struct cache_extent cache;
407 unsigned int found_root_item:1;
413 struct cache_extent cache;
418 struct cache_extent cache;
419 struct cache_tree root_cache;
420 struct cache_tree inode_cache;
421 struct inode_record *current;
430 struct walk_control {
431 struct cache_tree shared;
432 struct shared_node *nodes[BTRFS_MAX_LEVEL];
438 struct btrfs_key key;
440 struct list_head list;
443 struct extent_entry {
448 struct list_head list;
451 struct root_item_info {
452 /* level of the root */
454 /* number of nodes at this level, must be 1 for a root */
458 struct cache_extent cache_extent;
462 * Error bit for low memory mode check.
464 * Currently no caller cares about it yet. Just internal use for error
467 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH (1 << 8)
478 static void *print_status_check(void *p)
480 struct task_ctx *priv = p;
481 const char work_indicator[] = { '.', 'o', 'O', 'o' };
483 static char *task_position_string[] = {
485 "checking free space cache",
489 task_period_start(priv->info, 1000 /* 1s */);
491 if (priv->tp == TASK_NOTHING)
495 printf("%s [%c]\r", task_position_string[priv->tp],
496 work_indicator[count % 4]);
499 task_period_wait(priv->info);
504 static int print_status_return(void *p)
512 static enum btrfs_check_mode parse_check_mode(const char *str)
514 if (strcmp(str, "lowmem") == 0)
515 return CHECK_MODE_LOWMEM;
516 if (strcmp(str, "orig") == 0)
517 return CHECK_MODE_ORIGINAL;
518 if (strcmp(str, "original") == 0)
519 return CHECK_MODE_ORIGINAL;
521 return CHECK_MODE_UNKNOWN;
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
527 struct file_extent_hole *hole;
529 if (RB_EMPTY_ROOT(holes))
532 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 struct file_extent_hole *hole1;
539 struct file_extent_hole *hole2;
541 hole1 = rb_entry(node1, struct file_extent_hole, node);
542 hole2 = rb_entry(node2, struct file_extent_hole, node);
544 if (hole1->start > hole2->start)
546 if (hole1->start < hole2->start)
548 /* Now hole1->start == hole2->start */
549 if (hole1->len >= hole2->len)
551 * Hole 1 will be merge center
552 * Same hole will be merged later
555 /* Hole 2 will be merge center */
560 * Add a hole to the record
562 * This will do hole merge for copy_file_extent_holes(),
563 * which will ensure there won't be continuous holes.
565 static int add_file_extent_hole(struct rb_root *holes,
568 struct file_extent_hole *hole;
569 struct file_extent_hole *prev = NULL;
570 struct file_extent_hole *next = NULL;
572 hole = malloc(sizeof(*hole));
577 /* Since compare will not return 0, no -EEXIST will happen */
578 rb_insert(holes, &hole->node, compare_hole);
580 /* simple merge with previous hole */
581 if (rb_prev(&hole->node))
582 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584 if (prev && prev->start + prev->len >= hole->start) {
585 hole->len = hole->start + hole->len - prev->start;
586 hole->start = prev->start;
587 rb_erase(&prev->node, holes);
592 /* iterate merge with next holes */
594 if (!rb_next(&hole->node))
596 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598 if (hole->start + hole->len >= next->start) {
599 if (hole->start + hole->len <= next->start + next->len)
600 hole->len = next->start + next->len -
602 rb_erase(&next->node, holes);
611 static int compare_hole_range(struct rb_node *node, void *data)
613 struct file_extent_hole *hole;
616 hole = (struct file_extent_hole *)data;
619 hole = rb_entry(node, struct file_extent_hole, node);
620 if (start < hole->start)
622 if (start >= hole->start && start < hole->start + hole->len)
628 * Delete a hole in the record
630 * This will do the hole split and is much restrict than add.
632 static int del_file_extent_hole(struct rb_root *holes,
635 struct file_extent_hole *hole;
636 struct file_extent_hole tmp;
641 struct rb_node *node;
648 node = rb_search(holes, &tmp, compare_hole_range, NULL);
651 hole = rb_entry(node, struct file_extent_hole, node);
652 if (start + len > hole->start + hole->len)
656 * Now there will be no overlap, delete the hole and re-add the
657 * split(s) if they exists.
659 if (start > hole->start) {
660 prev_start = hole->start;
661 prev_len = start - hole->start;
664 if (hole->start + hole->len > start + len) {
665 next_start = start + len;
666 next_len = hole->start + hole->len - start - len;
669 rb_erase(node, holes);
672 ret = add_file_extent_hole(holes, prev_start, prev_len);
677 ret = add_file_extent_hole(holes, next_start, next_len);
684 static int copy_file_extent_holes(struct rb_root *dst,
687 struct file_extent_hole *hole;
688 struct rb_node *node;
691 node = rb_first(src);
693 hole = rb_entry(node, struct file_extent_hole, node);
694 ret = add_file_extent_hole(dst, hole->start, hole->len);
697 node = rb_next(node);
702 static void free_file_extent_holes(struct rb_root *holes)
704 struct rb_node *node;
705 struct file_extent_hole *hole;
707 node = rb_first(holes);
709 hole = rb_entry(node, struct file_extent_hole, node);
710 rb_erase(node, holes);
712 node = rb_first(holes);
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719 struct btrfs_root *root)
721 if (root->last_trans != trans->transid) {
722 root->track_dirty = 1;
723 root->last_trans = trans->transid;
724 root->commit_root = root->node;
725 extent_buffer_get(root->node);
729 static u8 imode_to_type(u32 imode)
732 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
734 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
735 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
736 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
737 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
738 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
739 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
742 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 struct device_record *rec1;
749 struct device_record *rec2;
751 rec1 = rb_entry(node1, struct device_record, node);
752 rec2 = rb_entry(node2, struct device_record, node);
753 if (rec1->devid > rec2->devid)
755 else if (rec1->devid < rec2->devid)
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 struct inode_record *rec;
764 struct inode_backref *backref;
765 struct inode_backref *orig;
766 struct inode_backref *tmp;
767 struct orphan_data_extent *src_orphan;
768 struct orphan_data_extent *dst_orphan;
773 rec = malloc(sizeof(*rec));
775 return ERR_PTR(-ENOMEM);
776 memcpy(rec, orig_rec, sizeof(*rec));
778 INIT_LIST_HEAD(&rec->backrefs);
779 INIT_LIST_HEAD(&rec->orphan_extents);
780 rec->holes = RB_ROOT;
782 list_for_each_entry(orig, &orig_rec->backrefs, list) {
783 size = sizeof(*orig) + orig->namelen + 1;
784 backref = malloc(size);
789 memcpy(backref, orig, size);
790 list_add_tail(&backref->list, &rec->backrefs);
792 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793 dst_orphan = malloc(sizeof(*dst_orphan));
798 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
808 rb = rb_first(&rec->holes);
810 struct file_extent_hole *hole;
812 hole = rb_entry(rb, struct file_extent_hole, node);
818 if (!list_empty(&rec->backrefs))
819 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820 list_del(&orig->list);
824 if (!list_empty(&rec->orphan_extents))
825 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826 list_del(&orig->list);
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
838 struct orphan_data_extent *orphan;
840 if (list_empty(orphan_extents))
842 printf("The following data extent is lost in tree %llu:\n",
844 list_for_each_entry(orphan, orphan_extents, list) {
845 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846 orphan->objectid, orphan->offset, orphan->disk_bytenr,
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 u64 root_objectid = root->root_key.objectid;
854 int errors = rec->errors;
858 /* reloc root errors, we print its corresponding fs root objectid*/
859 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860 root_objectid = root->root_key.offset;
861 fprintf(stderr, "reloc");
863 fprintf(stderr, "root %llu inode %llu errors %x",
864 (unsigned long long) root_objectid,
865 (unsigned long long) rec->ino, rec->errors);
867 if (errors & I_ERR_NO_INODE_ITEM)
868 fprintf(stderr, ", no inode item");
869 if (errors & I_ERR_NO_ORPHAN_ITEM)
870 fprintf(stderr, ", no orphan item");
871 if (errors & I_ERR_DUP_INODE_ITEM)
872 fprintf(stderr, ", dup inode item");
873 if (errors & I_ERR_DUP_DIR_INDEX)
874 fprintf(stderr, ", dup dir index");
875 if (errors & I_ERR_ODD_DIR_ITEM)
876 fprintf(stderr, ", odd dir item");
877 if (errors & I_ERR_ODD_FILE_EXTENT)
878 fprintf(stderr, ", odd file extent");
879 if (errors & I_ERR_BAD_FILE_EXTENT)
880 fprintf(stderr, ", bad file extent");
881 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882 fprintf(stderr, ", file extent overlap");
883 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884 fprintf(stderr, ", file extent discount");
885 if (errors & I_ERR_DIR_ISIZE_WRONG)
886 fprintf(stderr, ", dir isize wrong");
887 if (errors & I_ERR_FILE_NBYTES_WRONG)
888 fprintf(stderr, ", nbytes wrong");
889 if (errors & I_ERR_ODD_CSUM_ITEM)
890 fprintf(stderr, ", odd csum item");
891 if (errors & I_ERR_SOME_CSUM_MISSING)
892 fprintf(stderr, ", some csum missing");
893 if (errors & I_ERR_LINK_COUNT_WRONG)
894 fprintf(stderr, ", link count wrong");
895 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896 fprintf(stderr, ", orphan file extent");
897 fprintf(stderr, "\n");
898 /* Print the orphan extents if needed */
899 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902 /* Print the holes if needed */
903 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904 struct file_extent_hole *hole;
905 struct rb_node *node;
908 node = rb_first(&rec->holes);
909 fprintf(stderr, "Found file extent holes:\n");
912 hole = rb_entry(node, struct file_extent_hole, node);
913 fprintf(stderr, "\tstart: %llu, len: %llu\n",
914 hole->start, hole->len);
915 node = rb_next(node);
918 fprintf(stderr, "\tstart: 0, len: %llu\n",
920 root->fs_info->sectorsize));
924 static void print_ref_error(int errors)
926 if (errors & REF_ERR_NO_DIR_ITEM)
927 fprintf(stderr, ", no dir item");
928 if (errors & REF_ERR_NO_DIR_INDEX)
929 fprintf(stderr, ", no dir index");
930 if (errors & REF_ERR_NO_INODE_REF)
931 fprintf(stderr, ", no inode ref");
932 if (errors & REF_ERR_DUP_DIR_ITEM)
933 fprintf(stderr, ", dup dir item");
934 if (errors & REF_ERR_DUP_DIR_INDEX)
935 fprintf(stderr, ", dup dir index");
936 if (errors & REF_ERR_DUP_INODE_REF)
937 fprintf(stderr, ", dup inode ref");
938 if (errors & REF_ERR_INDEX_UNMATCH)
939 fprintf(stderr, ", index mismatch");
940 if (errors & REF_ERR_FILETYPE_UNMATCH)
941 fprintf(stderr, ", filetype mismatch");
942 if (errors & REF_ERR_NAME_TOO_LONG)
943 fprintf(stderr, ", name too long");
944 if (errors & REF_ERR_NO_ROOT_REF)
945 fprintf(stderr, ", no root ref");
946 if (errors & REF_ERR_NO_ROOT_BACKREF)
947 fprintf(stderr, ", no root backref");
948 if (errors & REF_ERR_DUP_ROOT_REF)
949 fprintf(stderr, ", dup root ref");
950 if (errors & REF_ERR_DUP_ROOT_BACKREF)
951 fprintf(stderr, ", dup root backref");
952 fprintf(stderr, "\n");
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958 struct ptr_node *node;
959 struct cache_extent *cache;
960 struct inode_record *rec = NULL;
963 cache = lookup_cache_extent(inode_cache, ino, 1);
965 node = container_of(cache, struct ptr_node, cache);
967 if (mod && rec->refs > 1) {
968 node->data = clone_inode_rec(rec);
969 if (IS_ERR(node->data))
975 rec = calloc(1, sizeof(*rec));
977 return ERR_PTR(-ENOMEM);
979 rec->extent_start = (u64)-1;
981 INIT_LIST_HEAD(&rec->backrefs);
982 INIT_LIST_HEAD(&rec->orphan_extents);
983 rec->holes = RB_ROOT;
985 node = malloc(sizeof(*node));
988 return ERR_PTR(-ENOMEM);
990 node->cache.start = ino;
991 node->cache.size = 1;
994 if (ino == BTRFS_FREE_INO_OBJECTID)
997 ret = insert_cache_extent(inode_cache, &node->cache);
999 return ERR_PTR(-EEXIST);
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 struct orphan_data_extent *orphan;
1008 while (!list_empty(orphan_extents)) {
1009 orphan = list_entry(orphan_extents->next,
1010 struct orphan_data_extent, list);
1011 list_del(&orphan->list);
1016 static void free_inode_rec(struct inode_record *rec)
1018 struct inode_backref *backref;
1020 if (--rec->refs > 0)
1023 while (!list_empty(&rec->backrefs)) {
1024 backref = to_inode_backref(rec->backrefs.next);
1025 list_del(&backref->list);
1028 free_orphan_data_extents(&rec->orphan_extents);
1029 free_file_extent_holes(&rec->holes);
1033 static int can_free_inode_rec(struct inode_record *rec)
1035 if (!rec->errors && rec->checked && rec->found_inode_item &&
1036 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042 struct inode_record *rec)
1044 struct cache_extent *cache;
1045 struct inode_backref *tmp, *backref;
1046 struct ptr_node *node;
1049 if (!rec->found_inode_item)
1052 filetype = imode_to_type(rec->imode);
1053 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054 if (backref->found_dir_item && backref->found_dir_index) {
1055 if (backref->filetype != filetype)
1056 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057 if (!backref->errors && backref->found_inode_ref &&
1058 rec->nlink == rec->found_link) {
1059 list_del(&backref->list);
1065 if (!rec->checked || rec->merging)
1068 if (S_ISDIR(rec->imode)) {
1069 if (rec->found_size != rec->isize)
1070 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071 if (rec->found_file_extent)
1072 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074 if (rec->found_dir_item)
1075 rec->errors |= I_ERR_ODD_DIR_ITEM;
1076 if (rec->found_size != rec->nbytes)
1077 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078 if (rec->nlink > 0 && !no_holes &&
1079 (rec->extent_end < rec->isize ||
1080 first_extent_gap(&rec->holes) < rec->isize))
1081 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1084 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085 if (rec->found_csum_item && rec->nodatasum)
1086 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087 if (rec->some_csum_missing && !rec->nodatasum)
1088 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1091 BUG_ON(rec->refs != 1);
1092 if (can_free_inode_rec(rec)) {
1093 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094 node = container_of(cache, struct ptr_node, cache);
1095 BUG_ON(node->data != rec);
1096 remove_cache_extent(inode_cache, &node->cache);
1098 free_inode_rec(rec);
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 struct btrfs_path path;
1105 struct btrfs_key key;
1108 key.objectid = BTRFS_ORPHAN_OBJECTID;
1109 key.type = BTRFS_ORPHAN_ITEM_KEY;
1112 btrfs_init_path(&path);
1113 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114 btrfs_release_path(&path);
1120 static int process_inode_item(struct extent_buffer *eb,
1121 int slot, struct btrfs_key *key,
1122 struct shared_node *active_node)
1124 struct inode_record *rec;
1125 struct btrfs_inode_item *item;
1127 rec = active_node->current;
1128 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129 if (rec->found_inode_item) {
1130 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134 rec->nlink = btrfs_inode_nlink(eb, item);
1135 rec->isize = btrfs_inode_size(eb, item);
1136 rec->nbytes = btrfs_inode_nbytes(eb, item);
1137 rec->imode = btrfs_inode_mode(eb, item);
1138 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140 rec->found_inode_item = 1;
1141 if (rec->nlink == 0)
1142 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143 maybe_free_inode_rec(&active_node->inode_cache, rec);
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149 int namelen, u64 dir)
1151 struct inode_backref *backref;
1153 list_for_each_entry(backref, &rec->backrefs, list) {
1154 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156 if (backref->dir != dir || backref->namelen != namelen)
1158 if (memcmp(name, backref->name, namelen))
1163 backref = malloc(sizeof(*backref) + namelen + 1);
1166 memset(backref, 0, sizeof(*backref));
1168 backref->namelen = namelen;
1169 memcpy(backref->name, name, namelen);
1170 backref->name[namelen] = '\0';
1171 list_add_tail(&backref->list, &rec->backrefs);
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176 u64 ino, u64 dir, u64 index,
1177 const char *name, int namelen,
1178 u8 filetype, u8 itemtype, int errors)
1180 struct inode_record *rec;
1181 struct inode_backref *backref;
1183 rec = get_inode_rec(inode_cache, ino, 1);
1184 BUG_ON(IS_ERR(rec));
1185 backref = get_inode_backref(rec, name, namelen, dir);
1188 backref->errors |= errors;
1189 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190 if (backref->found_dir_index)
1191 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192 if (backref->found_inode_ref && backref->index != index)
1193 backref->errors |= REF_ERR_INDEX_UNMATCH;
1194 if (backref->found_dir_item && backref->filetype != filetype)
1195 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197 backref->index = index;
1198 backref->filetype = filetype;
1199 backref->found_dir_index = 1;
1200 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202 if (backref->found_dir_item)
1203 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204 if (backref->found_dir_index && backref->filetype != filetype)
1205 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207 backref->filetype = filetype;
1208 backref->found_dir_item = 1;
1209 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211 if (backref->found_inode_ref)
1212 backref->errors |= REF_ERR_DUP_INODE_REF;
1213 if (backref->found_dir_index && backref->index != index)
1214 backref->errors |= REF_ERR_INDEX_UNMATCH;
1216 backref->index = index;
1218 backref->ref_type = itemtype;
1219 backref->found_inode_ref = 1;
1224 maybe_free_inode_rec(inode_cache, rec);
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229 struct cache_tree *dst_cache)
1231 struct inode_backref *backref;
1236 list_for_each_entry(backref, &src->backrefs, list) {
1237 if (backref->found_dir_index) {
1238 add_inode_backref(dst_cache, dst->ino, backref->dir,
1239 backref->index, backref->name,
1240 backref->namelen, backref->filetype,
1241 BTRFS_DIR_INDEX_KEY, backref->errors);
1243 if (backref->found_dir_item) {
1245 add_inode_backref(dst_cache, dst->ino,
1246 backref->dir, 0, backref->name,
1247 backref->namelen, backref->filetype,
1248 BTRFS_DIR_ITEM_KEY, backref->errors);
1250 if (backref->found_inode_ref) {
1251 add_inode_backref(dst_cache, dst->ino,
1252 backref->dir, backref->index,
1253 backref->name, backref->namelen, 0,
1254 backref->ref_type, backref->errors);
1258 if (src->found_dir_item)
1259 dst->found_dir_item = 1;
1260 if (src->found_file_extent)
1261 dst->found_file_extent = 1;
1262 if (src->found_csum_item)
1263 dst->found_csum_item = 1;
1264 if (src->some_csum_missing)
1265 dst->some_csum_missing = 1;
1266 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1272 BUG_ON(src->found_link < dir_count);
1273 dst->found_link += src->found_link - dir_count;
1274 dst->found_size += src->found_size;
1275 if (src->extent_start != (u64)-1) {
1276 if (dst->extent_start == (u64)-1) {
1277 dst->extent_start = src->extent_start;
1278 dst->extent_end = src->extent_end;
1280 if (dst->extent_end > src->extent_start)
1281 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282 else if (dst->extent_end < src->extent_start) {
1283 ret = add_file_extent_hole(&dst->holes,
1285 src->extent_start - dst->extent_end);
1287 if (dst->extent_end < src->extent_end)
1288 dst->extent_end = src->extent_end;
1292 dst->errors |= src->errors;
1293 if (src->found_inode_item) {
1294 if (!dst->found_inode_item) {
1295 dst->nlink = src->nlink;
1296 dst->isize = src->isize;
1297 dst->nbytes = src->nbytes;
1298 dst->imode = src->imode;
1299 dst->nodatasum = src->nodatasum;
1300 dst->found_inode_item = 1;
1302 dst->errors |= I_ERR_DUP_INODE_ITEM;
1310 static int splice_shared_node(struct shared_node *src_node,
1311 struct shared_node *dst_node)
1313 struct cache_extent *cache;
1314 struct ptr_node *node, *ins;
1315 struct cache_tree *src, *dst;
1316 struct inode_record *rec, *conflict;
1317 u64 current_ino = 0;
1321 if (--src_node->refs == 0)
1323 if (src_node->current)
1324 current_ino = src_node->current->ino;
1326 src = &src_node->root_cache;
1327 dst = &dst_node->root_cache;
1329 cache = search_cache_extent(src, 0);
1331 node = container_of(cache, struct ptr_node, cache);
1333 cache = next_cache_extent(cache);
1336 remove_cache_extent(src, &node->cache);
1339 ins = malloc(sizeof(*ins));
1341 ins->cache.start = node->cache.start;
1342 ins->cache.size = node->cache.size;
1346 ret = insert_cache_extent(dst, &ins->cache);
1347 if (ret == -EEXIST) {
1348 conflict = get_inode_rec(dst, rec->ino, 1);
1349 BUG_ON(IS_ERR(conflict));
1350 merge_inode_recs(rec, conflict, dst);
1352 conflict->checked = 1;
1353 if (dst_node->current == conflict)
1354 dst_node->current = NULL;
1356 maybe_free_inode_rec(dst, conflict);
1357 free_inode_rec(rec);
1364 if (src == &src_node->root_cache) {
1365 src = &src_node->inode_cache;
1366 dst = &dst_node->inode_cache;
1370 if (current_ino > 0 && (!dst_node->current ||
1371 current_ino > dst_node->current->ino)) {
1372 if (dst_node->current) {
1373 dst_node->current->checked = 1;
1374 maybe_free_inode_rec(dst, dst_node->current);
1376 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377 BUG_ON(IS_ERR(dst_node->current));
1382 static void free_inode_ptr(struct cache_extent *cache)
1384 struct ptr_node *node;
1385 struct inode_record *rec;
1387 node = container_of(cache, struct ptr_node, cache);
1389 free_inode_rec(rec);
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398 struct cache_extent *cache;
1399 struct shared_node *node;
1401 cache = lookup_cache_extent(shared, bytenr, 1);
1403 node = container_of(cache, struct shared_node, cache);
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1412 struct shared_node *node;
1414 node = calloc(1, sizeof(*node));
1417 node->cache.start = bytenr;
1418 node->cache.size = 1;
1419 cache_tree_init(&node->root_cache);
1420 cache_tree_init(&node->inode_cache);
1423 ret = insert_cache_extent(shared, &node->cache);
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429 struct walk_control *wc, int level)
1431 struct shared_node *node;
1432 struct shared_node *dest;
1435 if (level == wc->active_node)
1438 BUG_ON(wc->active_node <= level);
1439 node = find_shared_node(&wc->shared, bytenr);
1441 ret = add_shared_node(&wc->shared, bytenr, refs);
1443 node = find_shared_node(&wc->shared, bytenr);
1444 wc->nodes[level] = node;
1445 wc->active_node = level;
1449 if (wc->root_level == wc->active_node &&
1450 btrfs_root_refs(&root->root_item) == 0) {
1451 if (--node->refs == 0) {
1452 free_inode_recs_tree(&node->root_cache);
1453 free_inode_recs_tree(&node->inode_cache);
1454 remove_cache_extent(&wc->shared, &node->cache);
1460 dest = wc->nodes[wc->active_node];
1461 splice_shared_node(node, dest);
1462 if (node->refs == 0) {
1463 remove_cache_extent(&wc->shared, &node->cache);
1469 static int leave_shared_node(struct btrfs_root *root,
1470 struct walk_control *wc, int level)
1472 struct shared_node *node;
1473 struct shared_node *dest;
1476 if (level == wc->root_level)
1479 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1483 BUG_ON(i >= BTRFS_MAX_LEVEL);
1485 node = wc->nodes[wc->active_node];
1486 wc->nodes[wc->active_node] = NULL;
1487 wc->active_node = i;
1489 dest = wc->nodes[wc->active_node];
1490 if (wc->active_node < wc->root_level ||
1491 btrfs_root_refs(&root->root_item) > 0) {
1492 BUG_ON(node->refs <= 1);
1493 splice_shared_node(node, dest);
1495 BUG_ON(node->refs < 2);
1504 * 1 - if the root with id child_root_id is a child of root parent_root_id
1505 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1506 * has other root(s) as parent(s)
1507 * 2 - if the root child_root_id doesn't have any parent roots
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512 struct btrfs_path path;
1513 struct btrfs_key key;
1514 struct extent_buffer *leaf;
1518 btrfs_init_path(&path);
1520 key.objectid = parent_root_id;
1521 key.type = BTRFS_ROOT_REF_KEY;
1522 key.offset = child_root_id;
1523 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1527 btrfs_release_path(&path);
1531 key.objectid = child_root_id;
1532 key.type = BTRFS_ROOT_BACKREF_KEY;
1534 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1540 leaf = path.nodes[0];
1541 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545 leaf = path.nodes[0];
1548 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549 if (key.objectid != child_root_id ||
1550 key.type != BTRFS_ROOT_BACKREF_KEY)
1555 if (key.offset == parent_root_id) {
1556 btrfs_release_path(&path);
1563 btrfs_release_path(&path);
1566 return has_parent ? 0 : 2;
1569 static int process_dir_item(struct extent_buffer *eb,
1570 int slot, struct btrfs_key *key,
1571 struct shared_node *active_node)
1581 struct btrfs_dir_item *di;
1582 struct inode_record *rec;
1583 struct cache_tree *root_cache;
1584 struct cache_tree *inode_cache;
1585 struct btrfs_key location;
1586 char namebuf[BTRFS_NAME_LEN];
1588 root_cache = &active_node->root_cache;
1589 inode_cache = &active_node->inode_cache;
1590 rec = active_node->current;
1591 rec->found_dir_item = 1;
1593 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594 total = btrfs_item_size_nr(eb, slot);
1595 while (cur < total) {
1597 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598 name_len = btrfs_dir_name_len(eb, di);
1599 data_len = btrfs_dir_data_len(eb, di);
1600 filetype = btrfs_dir_type(eb, di);
1602 rec->found_size += name_len;
1603 if (cur + sizeof(*di) + name_len > total ||
1604 name_len > BTRFS_NAME_LEN) {
1605 error = REF_ERR_NAME_TOO_LONG;
1607 if (cur + sizeof(*di) > total)
1609 len = min_t(u32, total - cur - sizeof(*di),
1616 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619 key->offset != btrfs_name_hash(namebuf, len)) {
1620 rec->errors |= I_ERR_ODD_DIR_ITEM;
1621 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622 key->objectid, key->offset, namebuf, len, filetype,
1623 key->offset, btrfs_name_hash(namebuf, len));
1626 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627 add_inode_backref(inode_cache, location.objectid,
1628 key->objectid, key->offset, namebuf,
1629 len, filetype, key->type, error);
1630 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631 add_inode_backref(root_cache, location.objectid,
1632 key->objectid, key->offset,
1633 namebuf, len, filetype,
1636 fprintf(stderr, "invalid location in dir item %u\n",
1638 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639 key->objectid, key->offset, namebuf,
1640 len, filetype, key->type, error);
1643 len = sizeof(*di) + name_len + data_len;
1644 di = (struct btrfs_dir_item *)((char *)di + len);
1647 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648 rec->errors |= I_ERR_DUP_DIR_INDEX;
1653 static int process_inode_ref(struct extent_buffer *eb,
1654 int slot, struct btrfs_key *key,
1655 struct shared_node *active_node)
1663 struct cache_tree *inode_cache;
1664 struct btrfs_inode_ref *ref;
1665 char namebuf[BTRFS_NAME_LEN];
1667 inode_cache = &active_node->inode_cache;
1669 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670 total = btrfs_item_size_nr(eb, slot);
1671 while (cur < total) {
1672 name_len = btrfs_inode_ref_name_len(eb, ref);
1673 index = btrfs_inode_ref_index(eb, ref);
1675 /* inode_ref + namelen should not cross item boundary */
1676 if (cur + sizeof(*ref) + name_len > total ||
1677 name_len > BTRFS_NAME_LEN) {
1678 if (total < cur + sizeof(*ref))
1681 /* Still try to read out the remaining part */
1682 len = min_t(u32, total - cur - sizeof(*ref),
1684 error = REF_ERR_NAME_TOO_LONG;
1690 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691 add_inode_backref(inode_cache, key->objectid, key->offset,
1692 index, namebuf, len, 0, key->type, error);
1694 len = sizeof(*ref) + name_len;
1695 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1701 static int process_inode_extref(struct extent_buffer *eb,
1702 int slot, struct btrfs_key *key,
1703 struct shared_node *active_node)
1712 struct cache_tree *inode_cache;
1713 struct btrfs_inode_extref *extref;
1714 char namebuf[BTRFS_NAME_LEN];
1716 inode_cache = &active_node->inode_cache;
1718 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719 total = btrfs_item_size_nr(eb, slot);
1720 while (cur < total) {
1721 name_len = btrfs_inode_extref_name_len(eb, extref);
1722 index = btrfs_inode_extref_index(eb, extref);
1723 parent = btrfs_inode_extref_parent(eb, extref);
1724 if (name_len <= BTRFS_NAME_LEN) {
1728 len = BTRFS_NAME_LEN;
1729 error = REF_ERR_NAME_TOO_LONG;
1731 read_extent_buffer(eb, namebuf,
1732 (unsigned long)(extref + 1), len);
1733 add_inode_backref(inode_cache, key->objectid, parent,
1734 index, namebuf, len, 0, key->type, error);
1736 len = sizeof(*extref) + name_len;
1737 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745 u64 len, u64 *found)
1747 struct btrfs_key key;
1748 struct btrfs_path path;
1749 struct extent_buffer *leaf;
1754 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756 btrfs_init_path(&path);
1758 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760 key.type = BTRFS_EXTENT_CSUM_KEY;
1762 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1766 if (ret > 0 && path.slots[0] > 0) {
1767 leaf = path.nodes[0];
1768 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770 key.type == BTRFS_EXTENT_CSUM_KEY)
1775 leaf = path.nodes[0];
1776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1782 leaf = path.nodes[0];
1785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787 key.type != BTRFS_EXTENT_CSUM_KEY)
1790 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791 if (key.offset >= start + len)
1794 if (key.offset > start)
1797 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798 csum_end = key.offset + (size / csum_size) *
1799 root->fs_info->sectorsize;
1800 if (csum_end > start) {
1801 size = min(csum_end - start, len);
1810 btrfs_release_path(&path);
1816 static int process_file_extent(struct btrfs_root *root,
1817 struct extent_buffer *eb,
1818 int slot, struct btrfs_key *key,
1819 struct shared_node *active_node)
1821 struct inode_record *rec;
1822 struct btrfs_file_extent_item *fi;
1824 u64 disk_bytenr = 0;
1825 u64 extent_offset = 0;
1826 u64 mask = root->fs_info->sectorsize - 1;
1830 rec = active_node->current;
1831 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832 rec->found_file_extent = 1;
1834 if (rec->extent_start == (u64)-1) {
1835 rec->extent_start = key->offset;
1836 rec->extent_end = key->offset;
1839 if (rec->extent_end > key->offset)
1840 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841 else if (rec->extent_end < key->offset) {
1842 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843 key->offset - rec->extent_end);
1848 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849 extent_type = btrfs_file_extent_type(eb, fi);
1851 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855 rec->found_size += num_bytes;
1856 num_bytes = (num_bytes + mask) & ~mask;
1857 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861 extent_offset = btrfs_file_extent_offset(eb, fi);
1862 if (num_bytes == 0 || (num_bytes & mask))
1863 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864 if (num_bytes + extent_offset >
1865 btrfs_file_extent_ram_bytes(eb, fi))
1866 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868 (btrfs_file_extent_compression(eb, fi) ||
1869 btrfs_file_extent_encryption(eb, fi) ||
1870 btrfs_file_extent_other_encoding(eb, fi)))
1871 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872 if (disk_bytenr > 0)
1873 rec->found_size += num_bytes;
1875 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877 rec->extent_end = key->offset + num_bytes;
1880 * The data reloc tree will copy full extents into its inode and then
1881 * copy the corresponding csums. Because the extent it copied could be
1882 * a preallocated extent that hasn't been written to yet there may be no
1883 * csums to copy, ergo we won't have csums for our file extent. This is
1884 * ok so just don't bother checking csums if the inode belongs to the
1887 if (disk_bytenr > 0 &&
1888 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890 if (btrfs_file_extent_compression(eb, fi))
1891 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893 disk_bytenr += extent_offset;
1895 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1898 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900 rec->found_csum_item = 1;
1901 if (found < num_bytes)
1902 rec->some_csum_missing = 1;
1903 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912 struct walk_control *wc)
1914 struct btrfs_key key;
1918 struct cache_tree *inode_cache;
1919 struct shared_node *active_node;
1921 if (wc->root_level == wc->active_node &&
1922 btrfs_root_refs(&root->root_item) == 0)
1925 active_node = wc->nodes[wc->active_node];
1926 inode_cache = &active_node->inode_cache;
1927 nritems = btrfs_header_nritems(eb);
1928 for (i = 0; i < nritems; i++) {
1929 btrfs_item_key_to_cpu(eb, &key, i);
1931 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1936 if (active_node->current == NULL ||
1937 active_node->current->ino < key.objectid) {
1938 if (active_node->current) {
1939 active_node->current->checked = 1;
1940 maybe_free_inode_rec(inode_cache,
1941 active_node->current);
1943 active_node->current = get_inode_rec(inode_cache,
1945 BUG_ON(IS_ERR(active_node->current));
1948 case BTRFS_DIR_ITEM_KEY:
1949 case BTRFS_DIR_INDEX_KEY:
1950 ret = process_dir_item(eb, i, &key, active_node);
1952 case BTRFS_INODE_REF_KEY:
1953 ret = process_inode_ref(eb, i, &key, active_node);
1955 case BTRFS_INODE_EXTREF_KEY:
1956 ret = process_inode_extref(eb, i, &key, active_node);
1958 case BTRFS_INODE_ITEM_KEY:
1959 ret = process_inode_item(eb, i, &key, active_node);
1961 case BTRFS_EXTENT_DATA_KEY:
1962 ret = process_file_extent(root, eb, i, &key,
1973 u64 bytenr[BTRFS_MAX_LEVEL];
1974 u64 refs[BTRFS_MAX_LEVEL];
1975 int need_check[BTRFS_MAX_LEVEL];
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979 struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981 unsigned int ext_ref);
1984 * Returns >0 Found error, not fatal, should continue
1985 * Returns <0 Fatal error, must exit the whole check
1986 * Returns 0 No errors found
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989 struct node_refs *nrefs, int *level, int ext_ref)
1991 struct extent_buffer *cur = path->nodes[0];
1992 struct btrfs_key key;
1996 int root_level = btrfs_header_level(root->node);
1998 int ret = 0; /* Final return value */
1999 int err = 0; /* Positive error bitmap */
2001 cur_bytenr = cur->start;
2003 /* skip to first inode item or the first inode number change */
2004 nritems = btrfs_header_nritems(cur);
2005 for (i = 0; i < nritems; i++) {
2006 btrfs_item_key_to_cpu(cur, &key, i);
2008 first_ino = key.objectid;
2009 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010 (first_ino && first_ino != key.objectid))
2014 path->slots[0] = nritems;
2020 err |= check_inode_item(root, path, ext_ref);
2022 /* modify cur since check_inode_item may change path */
2023 cur = path->nodes[0];
2025 if (err & LAST_ITEM)
2028 /* still have inode items in thie leaf */
2029 if (cur->start == cur_bytenr)
2033 * we have switched to another leaf, above nodes may
2034 * have changed, here walk down the path, if a node
2035 * or leaf is shared, check whether we can skip this
2038 for (i = root_level; i >= 0; i--) {
2039 if (path->nodes[i]->start == nrefs->bytenr[i])
2042 ret = update_nodes_refs(root,
2043 path->nodes[i]->start,
2048 if (!nrefs->need_check[i]) {
2054 for (i = 0; i < *level; i++) {
2055 free_extent_buffer(path->nodes[i]);
2056 path->nodes[i] = NULL;
2065 static void reada_walk_down(struct btrfs_root *root,
2066 struct extent_buffer *node, int slot)
2068 struct btrfs_fs_info *fs_info = root->fs_info;
2075 level = btrfs_header_level(node);
2079 nritems = btrfs_header_nritems(node);
2080 for (i = slot; i < nritems; i++) {
2081 bytenr = btrfs_node_blockptr(node, i);
2082 ptr_gen = btrfs_node_ptr_generation(node, i);
2083 readahead_tree_block(fs_info, bytenr, ptr_gen);
2088 * Check the child node/leaf by the following condition:
2089 * 1. the first item key of the node/leaf should be the same with the one
2091 * 2. block in parent node should match the child node/leaf.
2092 * 3. generation of parent node and child's header should be consistent.
2094 * Or the child node/leaf pointed by the key in parent is not valid.
2096 * We hope to check leaf owner too, but since subvol may share leaves,
2097 * which makes leaf owner check not so strong, key check should be
2098 * sufficient enough for that case.
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101 struct extent_buffer *child)
2103 struct btrfs_key parent_key;
2104 struct btrfs_key child_key;
2107 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108 if (btrfs_header_level(child) == 0)
2109 btrfs_item_key_to_cpu(child, &child_key, 0);
2111 btrfs_node_key_to_cpu(child, &child_key, 0);
2113 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2116 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117 parent_key.objectid, parent_key.type, parent_key.offset,
2118 child_key.objectid, child_key.type, child_key.offset);
2120 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2122 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123 btrfs_node_blockptr(parent, slot),
2124 btrfs_header_bytenr(child));
2126 if (btrfs_node_ptr_generation(parent, slot) !=
2127 btrfs_header_generation(child)) {
2129 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130 btrfs_header_generation(child),
2131 btrfs_node_ptr_generation(parent, slot));
2137 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138 * in every fs or file tree check. Here we find its all root ids, and only check
2139 * it in the fs or file tree which has the smallest root id.
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2143 struct rb_node *node;
2144 struct ulist_node *u;
2146 if (roots->nnodes == 1)
2149 node = rb_first(&roots->root);
2150 u = rb_entry(node, struct ulist_node, rb_node);
2152 * current root id is not smallest, we skip it and let it be checked
2153 * in the fs or file tree who hash the smallest root id.
2155 if (root->objectid != u->val)
2162 * for a tree node or leaf, we record its reference count, so later if we still
2163 * process this node or leaf, don't need to compute its reference count again.
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166 struct node_refs *nrefs, u64 level)
2170 struct ulist *roots;
2172 if (nrefs->bytenr[level] != bytenr) {
2173 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174 level, 1, &refs, NULL);
2178 nrefs->bytenr[level] = bytenr;
2179 nrefs->refs[level] = refs;
2181 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2186 check = need_check(root, roots);
2188 nrefs->need_check[level] = check;
2190 nrefs->need_check[level] = 1;
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198 struct walk_control *wc, int *level,
2199 struct node_refs *nrefs)
2201 enum btrfs_tree_block_status status;
2204 struct btrfs_fs_info *fs_info = root->fs_info;
2205 struct extent_buffer *next;
2206 struct extent_buffer *cur;
2210 WARN_ON(*level < 0);
2211 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214 refs = nrefs->refs[*level];
2217 ret = btrfs_lookup_extent_info(NULL, root,
2218 path->nodes[*level]->start,
2219 *level, 1, &refs, NULL);
2224 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225 nrefs->refs[*level] = refs;
2229 ret = enter_shared_node(root, path->nodes[*level]->start,
2237 while (*level >= 0) {
2238 WARN_ON(*level < 0);
2239 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240 cur = path->nodes[*level];
2242 if (btrfs_header_level(cur) != *level)
2245 if (path->slots[*level] >= btrfs_header_nritems(cur))
2248 ret = process_one_leaf(root, cur, wc);
2253 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2256 if (bytenr == nrefs->bytenr[*level - 1]) {
2257 refs = nrefs->refs[*level - 1];
2259 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260 *level - 1, 1, &refs, NULL);
2264 nrefs->bytenr[*level - 1] = bytenr;
2265 nrefs->refs[*level - 1] = refs;
2270 ret = enter_shared_node(root, bytenr, refs,
2273 path->slots[*level]++;
2278 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280 free_extent_buffer(next);
2281 reada_walk_down(root, cur, path->slots[*level]);
2282 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283 if (!extent_buffer_uptodate(next)) {
2284 struct btrfs_key node_key;
2286 btrfs_node_key_to_cpu(path->nodes[*level],
2288 path->slots[*level]);
2289 btrfs_add_corrupt_extent_record(root->fs_info,
2291 path->nodes[*level]->start,
2292 root->fs_info->nodesize,
2299 ret = check_child_node(cur, path->slots[*level], next);
2301 free_extent_buffer(next);
2306 if (btrfs_is_leaf(next))
2307 status = btrfs_check_leaf(root, NULL, next);
2309 status = btrfs_check_node(root, NULL, next);
2310 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311 free_extent_buffer(next);
2316 *level = *level - 1;
2317 free_extent_buffer(path->nodes[*level]);
2318 path->nodes[*level] = next;
2319 path->slots[*level] = 0;
2322 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327 unsigned int ext_ref);
2330 * Returns >0 Found error, should continue
2331 * Returns <0 Fatal error, must exit the whole check
2332 * Returns 0 No errors found
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335 int *level, struct node_refs *nrefs, int ext_ref)
2337 enum btrfs_tree_block_status status;
2340 struct btrfs_fs_info *fs_info = root->fs_info;
2341 struct extent_buffer *next;
2342 struct extent_buffer *cur;
2345 WARN_ON(*level < 0);
2346 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2348 ret = update_nodes_refs(root, path->nodes[*level]->start,
2353 while (*level >= 0) {
2354 WARN_ON(*level < 0);
2355 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356 cur = path->nodes[*level];
2358 if (btrfs_header_level(cur) != *level)
2361 if (path->slots[*level] >= btrfs_header_nritems(cur))
2363 /* Don't forgot to check leaf/node validation */
2365 ret = btrfs_check_leaf(root, NULL, cur);
2366 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2370 ret = process_one_leaf_v2(root, path, nrefs,
2372 cur = path->nodes[*level];
2375 ret = btrfs_check_node(root, NULL, cur);
2376 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2381 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2384 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2387 if (!nrefs->need_check[*level - 1]) {
2388 path->slots[*level]++;
2392 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394 free_extent_buffer(next);
2395 reada_walk_down(root, cur, path->slots[*level]);
2396 next = read_tree_block(fs_info, bytenr, ptr_gen);
2397 if (!extent_buffer_uptodate(next)) {
2398 struct btrfs_key node_key;
2400 btrfs_node_key_to_cpu(path->nodes[*level],
2402 path->slots[*level]);
2403 btrfs_add_corrupt_extent_record(fs_info,
2405 path->nodes[*level]->start,
2413 ret = check_child_node(cur, path->slots[*level], next);
2417 if (btrfs_is_leaf(next))
2418 status = btrfs_check_leaf(root, NULL, next);
2420 status = btrfs_check_node(root, NULL, next);
2421 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422 free_extent_buffer(next);
2427 *level = *level - 1;
2428 free_extent_buffer(path->nodes[*level]);
2429 path->nodes[*level] = next;
2430 path->slots[*level] = 0;
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436 struct walk_control *wc, int *level)
2439 struct extent_buffer *leaf;
2441 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442 leaf = path->nodes[i];
2443 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2448 free_extent_buffer(path->nodes[*level]);
2449 path->nodes[*level] = NULL;
2450 BUG_ON(*level > wc->active_node);
2451 if (*level == wc->active_node)
2452 leave_shared_node(root, wc, *level);
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2463 struct extent_buffer *leaf;
2465 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466 leaf = path->nodes[i];
2467 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2472 free_extent_buffer(path->nodes[*level]);
2473 path->nodes[*level] = NULL;
2480 static int check_root_dir(struct inode_record *rec)
2482 struct inode_backref *backref;
2485 if (!rec->found_inode_item || rec->errors)
2487 if (rec->nlink != 1 || rec->found_link != 0)
2489 if (list_empty(&rec->backrefs))
2491 backref = to_inode_backref(rec->backrefs.next);
2492 if (!backref->found_inode_ref)
2494 if (backref->index != 0 || backref->namelen != 2 ||
2495 memcmp(backref->name, "..", 2))
2497 if (backref->found_dir_index || backref->found_dir_item)
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505 struct btrfs_root *root, struct btrfs_path *path,
2506 struct inode_record *rec)
2508 struct btrfs_inode_item *ei;
2509 struct btrfs_key key;
2512 key.objectid = rec->ino;
2513 key.type = BTRFS_INODE_ITEM_KEY;
2514 key.offset = (u64)-1;
2516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (!path->slots[0]) {
2527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528 if (key.objectid != rec->ino) {
2533 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534 struct btrfs_inode_item);
2535 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536 btrfs_mark_buffer_dirty(path->nodes[0]);
2537 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539 root->root_key.objectid);
2541 btrfs_release_path(path);
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546 struct btrfs_root *root,
2547 struct btrfs_path *path,
2548 struct inode_record *rec)
2552 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553 btrfs_release_path(path);
2555 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560 struct btrfs_root *root,
2561 struct btrfs_path *path,
2562 struct inode_record *rec)
2564 struct btrfs_inode_item *ei;
2565 struct btrfs_key key;
2568 key.objectid = rec->ino;
2569 key.type = BTRFS_INODE_ITEM_KEY;
2572 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2579 /* Since ret == 0, no need to check anything */
2580 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581 struct btrfs_inode_item);
2582 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583 btrfs_mark_buffer_dirty(path->nodes[0]);
2584 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585 printf("reset nbytes for ino %llu root %llu\n",
2586 rec->ino, root->root_key.objectid);
2588 btrfs_release_path(path);
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593 struct cache_tree *inode_cache,
2594 struct inode_record *rec,
2595 struct inode_backref *backref)
2597 struct btrfs_path path;
2598 struct btrfs_trans_handle *trans;
2599 struct btrfs_dir_item *dir_item;
2600 struct extent_buffer *leaf;
2601 struct btrfs_key key;
2602 struct btrfs_disk_key disk_key;
2603 struct inode_record *dir_rec;
2604 unsigned long name_ptr;
2605 u32 data_size = sizeof(*dir_item) + backref->namelen;
2608 trans = btrfs_start_transaction(root, 1);
2610 return PTR_ERR(trans);
2612 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613 (unsigned long long)rec->ino);
2615 btrfs_init_path(&path);
2616 key.objectid = backref->dir;
2617 key.type = BTRFS_DIR_INDEX_KEY;
2618 key.offset = backref->index;
2619 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2622 leaf = path.nodes[0];
2623 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625 disk_key.objectid = cpu_to_le64(rec->ino);
2626 disk_key.type = BTRFS_INODE_ITEM_KEY;
2627 disk_key.offset = 0;
2629 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631 btrfs_set_dir_data_len(leaf, dir_item, 0);
2632 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633 name_ptr = (unsigned long)(dir_item + 1);
2634 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635 btrfs_mark_buffer_dirty(leaf);
2636 btrfs_release_path(&path);
2637 btrfs_commit_transaction(trans, root);
2639 backref->found_dir_index = 1;
2640 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641 BUG_ON(IS_ERR(dir_rec));
2644 dir_rec->found_size += backref->namelen;
2645 if (dir_rec->found_size == dir_rec->isize &&
2646 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648 if (dir_rec->found_size != dir_rec->isize)
2649 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2654 static int delete_dir_index(struct btrfs_root *root,
2655 struct inode_backref *backref)
2657 struct btrfs_trans_handle *trans;
2658 struct btrfs_dir_item *di;
2659 struct btrfs_path path;
2662 trans = btrfs_start_transaction(root, 1);
2664 return PTR_ERR(trans);
2666 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667 (unsigned long long)backref->dir,
2668 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669 (unsigned long long)root->objectid);
2671 btrfs_init_path(&path);
2672 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673 backref->name, backref->namelen,
2674 backref->index, -1);
2677 btrfs_release_path(&path);
2678 btrfs_commit_transaction(trans, root);
2685 ret = btrfs_del_item(trans, root, &path);
2687 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689 btrfs_release_path(&path);
2690 btrfs_commit_transaction(trans, root);
2694 static int create_inode_item(struct btrfs_root *root,
2695 struct inode_record *rec,
2698 struct btrfs_trans_handle *trans;
2699 struct btrfs_inode_item inode_item;
2700 time_t now = time(NULL);
2703 trans = btrfs_start_transaction(root, 1);
2704 if (IS_ERR(trans)) {
2705 ret = PTR_ERR(trans);
2709 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2710 "be incomplete, please check permissions and content after "
2711 "the fsck completes.\n", (unsigned long long)root->objectid,
2712 (unsigned long long)rec->ino);
2714 memset(&inode_item, 0, sizeof(inode_item));
2715 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2717 btrfs_set_stack_inode_nlink(&inode_item, 1);
2719 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2720 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2721 if (rec->found_dir_item) {
2722 if (rec->found_file_extent)
2723 fprintf(stderr, "root %llu inode %llu has both a dir "
2724 "item and extents, unsure if it is a dir or a "
2725 "regular file so setting it as a directory\n",
2726 (unsigned long long)root->objectid,
2727 (unsigned long long)rec->ino);
2728 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2729 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2730 } else if (!rec->found_dir_item) {
2731 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2732 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2734 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2735 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2736 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2737 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2738 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2739 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2740 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2741 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2743 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2745 btrfs_commit_transaction(trans, root);
2749 static int repair_inode_backrefs(struct btrfs_root *root,
2750 struct inode_record *rec,
2751 struct cache_tree *inode_cache,
2754 struct inode_backref *tmp, *backref;
2755 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2759 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2760 if (!delete && rec->ino == root_dirid) {
2761 if (!rec->found_inode_item) {
2762 ret = create_inode_item(root, rec, 1);
2769 /* Index 0 for root dir's are special, don't mess with it */
2770 if (rec->ino == root_dirid && backref->index == 0)
2774 ((backref->found_dir_index && !backref->found_inode_ref) ||
2775 (backref->found_dir_index && backref->found_inode_ref &&
2776 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2777 ret = delete_dir_index(root, backref);
2781 list_del(&backref->list);
2786 if (!delete && !backref->found_dir_index &&
2787 backref->found_dir_item && backref->found_inode_ref) {
2788 ret = add_missing_dir_index(root, inode_cache, rec,
2793 if (backref->found_dir_item &&
2794 backref->found_dir_index) {
2795 if (!backref->errors &&
2796 backref->found_inode_ref) {
2797 list_del(&backref->list);
2804 if (!delete && (!backref->found_dir_index &&
2805 !backref->found_dir_item &&
2806 backref->found_inode_ref)) {
2807 struct btrfs_trans_handle *trans;
2808 struct btrfs_key location;
2810 ret = check_dir_conflict(root, backref->name,
2816 * let nlink fixing routine to handle it,
2817 * which can do it better.
2822 location.objectid = rec->ino;
2823 location.type = BTRFS_INODE_ITEM_KEY;
2824 location.offset = 0;
2826 trans = btrfs_start_transaction(root, 1);
2827 if (IS_ERR(trans)) {
2828 ret = PTR_ERR(trans);
2831 fprintf(stderr, "adding missing dir index/item pair "
2833 (unsigned long long)rec->ino);
2834 ret = btrfs_insert_dir_item(trans, root, backref->name,
2836 backref->dir, &location,
2837 imode_to_type(rec->imode),
2840 btrfs_commit_transaction(trans, root);
2844 if (!delete && (backref->found_inode_ref &&
2845 backref->found_dir_index &&
2846 backref->found_dir_item &&
2847 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2848 !rec->found_inode_item)) {
2849 ret = create_inode_item(root, rec, 0);
2856 return ret ? ret : repaired;
2860 * To determine the file type for nlink/inode_item repair
2862 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2863 * Return -ENOENT if file type is not found.
2865 static int find_file_type(struct inode_record *rec, u8 *type)
2867 struct inode_backref *backref;
2869 /* For inode item recovered case */
2870 if (rec->found_inode_item) {
2871 *type = imode_to_type(rec->imode);
2875 list_for_each_entry(backref, &rec->backrefs, list) {
2876 if (backref->found_dir_index || backref->found_dir_item) {
2877 *type = backref->filetype;
2885 * To determine the file name for nlink repair
2887 * Return 0 if file name is found, set name and namelen.
2888 * Return -ENOENT if file name is not found.
2890 static int find_file_name(struct inode_record *rec,
2891 char *name, int *namelen)
2893 struct inode_backref *backref;
2895 list_for_each_entry(backref, &rec->backrefs, list) {
2896 if (backref->found_dir_index || backref->found_dir_item ||
2897 backref->found_inode_ref) {
2898 memcpy(name, backref->name, backref->namelen);
2899 *namelen = backref->namelen;
2906 /* Reset the nlink of the inode to the correct one */
2907 static int reset_nlink(struct btrfs_trans_handle *trans,
2908 struct btrfs_root *root,
2909 struct btrfs_path *path,
2910 struct inode_record *rec)
2912 struct inode_backref *backref;
2913 struct inode_backref *tmp;
2914 struct btrfs_key key;
2915 struct btrfs_inode_item *inode_item;
2918 /* We don't believe this either, reset it and iterate backref */
2919 rec->found_link = 0;
2921 /* Remove all backref including the valid ones */
2922 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2923 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2924 backref->index, backref->name,
2925 backref->namelen, 0);
2929 /* remove invalid backref, so it won't be added back */
2930 if (!(backref->found_dir_index &&
2931 backref->found_dir_item &&
2932 backref->found_inode_ref)) {
2933 list_del(&backref->list);
2940 /* Set nlink to 0 */
2941 key.objectid = rec->ino;
2942 key.type = BTRFS_INODE_ITEM_KEY;
2944 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2951 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2952 struct btrfs_inode_item);
2953 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2954 btrfs_mark_buffer_dirty(path->nodes[0]);
2955 btrfs_release_path(path);
2958 * Add back valid inode_ref/dir_item/dir_index,
2959 * add_link() will handle the nlink inc, so new nlink must be correct
2961 list_for_each_entry(backref, &rec->backrefs, list) {
2962 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2963 backref->name, backref->namelen,
2964 backref->filetype, &backref->index, 1);
2969 btrfs_release_path(path);
2973 static int get_highest_inode(struct btrfs_trans_handle *trans,
2974 struct btrfs_root *root,
2975 struct btrfs_path *path,
2978 struct btrfs_key key, found_key;
2981 btrfs_init_path(path);
2982 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2984 key.type = BTRFS_INODE_ITEM_KEY;
2985 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2987 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2988 path->slots[0] - 1);
2989 *highest_ino = found_key.objectid;
2992 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2994 btrfs_release_path(path);
2998 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2999 struct btrfs_root *root,
3000 struct btrfs_path *path,
3001 struct inode_record *rec)
3003 char *dir_name = "lost+found";
3004 char namebuf[BTRFS_NAME_LEN] = {0};
3009 int name_recovered = 0;
3010 int type_recovered = 0;
3014 * Get file name and type first before these invalid inode ref
3015 * are deleted by remove_all_invalid_backref()
3017 name_recovered = !find_file_name(rec, namebuf, &namelen);
3018 type_recovered = !find_file_type(rec, &type);
3020 if (!name_recovered) {
3021 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3022 rec->ino, rec->ino);
3023 namelen = count_digits(rec->ino);
3024 sprintf(namebuf, "%llu", rec->ino);
3027 if (!type_recovered) {
3028 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3030 type = BTRFS_FT_REG_FILE;
3034 ret = reset_nlink(trans, root, path, rec);
3037 "Failed to reset nlink for inode %llu: %s\n",
3038 rec->ino, strerror(-ret));
3042 if (rec->found_link == 0) {
3043 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3047 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3048 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3051 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3052 dir_name, strerror(-ret));
3055 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3056 namebuf, namelen, type, NULL, 1);
3058 * Add ".INO" suffix several times to handle case where
3059 * "FILENAME.INO" is already taken by another file.
3061 while (ret == -EEXIST) {
3063 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3065 if (namelen + count_digits(rec->ino) + 1 >
3070 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3072 namelen += count_digits(rec->ino) + 1;
3073 ret = btrfs_add_link(trans, root, rec->ino,
3074 lost_found_ino, namebuf,
3075 namelen, type, NULL, 1);
3079 "Failed to link the inode %llu to %s dir: %s\n",
3080 rec->ino, dir_name, strerror(-ret));
3084 * Just increase the found_link, don't actually add the
3085 * backref. This will make things easier and this inode
3086 * record will be freed after the repair is done.
3087 * So fsck will not report problem about this inode.
3090 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3091 namelen, namebuf, dir_name);
3093 printf("Fixed the nlink of inode %llu\n", rec->ino);
3096 * Clear the flag anyway, or we will loop forever for the same inode
3097 * as it will not be removed from the bad inode list and the dead loop
3100 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3101 btrfs_release_path(path);
3106 * Check if there is any normal(reg or prealloc) file extent for given
3108 * This is used to determine the file type when neither its dir_index/item or
3109 * inode_item exists.
3111 * This will *NOT* report error, if any error happens, just consider it does
3112 * not have any normal file extent.
3114 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3116 struct btrfs_path path;
3117 struct btrfs_key key;
3118 struct btrfs_key found_key;
3119 struct btrfs_file_extent_item *fi;
3123 btrfs_init_path(&path);
3125 key.type = BTRFS_EXTENT_DATA_KEY;
3128 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3133 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3134 ret = btrfs_next_leaf(root, &path);
3141 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3143 if (found_key.objectid != ino ||
3144 found_key.type != BTRFS_EXTENT_DATA_KEY)
3146 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3147 struct btrfs_file_extent_item);
3148 type = btrfs_file_extent_type(path.nodes[0], fi);
3149 if (type != BTRFS_FILE_EXTENT_INLINE) {
3155 btrfs_release_path(&path);
3159 static u32 btrfs_type_to_imode(u8 type)
3161 static u32 imode_by_btrfs_type[] = {
3162 [BTRFS_FT_REG_FILE] = S_IFREG,
3163 [BTRFS_FT_DIR] = S_IFDIR,
3164 [BTRFS_FT_CHRDEV] = S_IFCHR,
3165 [BTRFS_FT_BLKDEV] = S_IFBLK,
3166 [BTRFS_FT_FIFO] = S_IFIFO,
3167 [BTRFS_FT_SOCK] = S_IFSOCK,
3168 [BTRFS_FT_SYMLINK] = S_IFLNK,
3171 return imode_by_btrfs_type[(type)];
3174 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3175 struct btrfs_root *root,
3176 struct btrfs_path *path,
3177 struct inode_record *rec)
3181 int type_recovered = 0;
3184 printf("Trying to rebuild inode:%llu\n", rec->ino);
3186 type_recovered = !find_file_type(rec, &filetype);
3189 * Try to determine inode type if type not found.
3191 * For found regular file extent, it must be FILE.
3192 * For found dir_item/index, it must be DIR.
3194 * For undetermined one, use FILE as fallback.
3197 * 1. If found backref(inode_index/item is already handled) to it,
3199 * Need new inode-inode ref structure to allow search for that.
3201 if (!type_recovered) {
3202 if (rec->found_file_extent &&
3203 find_normal_file_extent(root, rec->ino)) {
3205 filetype = BTRFS_FT_REG_FILE;
3206 } else if (rec->found_dir_item) {
3208 filetype = BTRFS_FT_DIR;
3209 } else if (!list_empty(&rec->orphan_extents)) {
3211 filetype = BTRFS_FT_REG_FILE;
3213 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3216 filetype = BTRFS_FT_REG_FILE;
3220 ret = btrfs_new_inode(trans, root, rec->ino,
3221 mode | btrfs_type_to_imode(filetype));
3226 * Here inode rebuild is done, we only rebuild the inode item,
3227 * don't repair the nlink(like move to lost+found).
3228 * That is the job of nlink repair.
3230 * We just fill the record and return
3232 rec->found_dir_item = 1;
3233 rec->imode = mode | btrfs_type_to_imode(filetype);
3235 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3236 /* Ensure the inode_nlinks repair function will be called */
3237 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3242 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3243 struct btrfs_root *root,
3244 struct btrfs_path *path,
3245 struct inode_record *rec)
3247 struct orphan_data_extent *orphan;
3248 struct orphan_data_extent *tmp;
3251 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3253 * Check for conflicting file extents
3255 * Here we don't know whether the extents is compressed or not,
3256 * so we can only assume it not compressed nor data offset,
3257 * and use its disk_len as extent length.
3259 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3260 orphan->offset, orphan->disk_len, 0);
3261 btrfs_release_path(path);
3266 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3267 orphan->disk_bytenr, orphan->disk_len);
3268 ret = btrfs_free_extent(trans,
3269 root->fs_info->extent_root,
3270 orphan->disk_bytenr, orphan->disk_len,
3271 0, root->objectid, orphan->objectid,
3276 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3277 orphan->offset, orphan->disk_bytenr,
3278 orphan->disk_len, orphan->disk_len);
3282 /* Update file size info */
3283 rec->found_size += orphan->disk_len;
3284 if (rec->found_size == rec->nbytes)
3285 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3287 /* Update the file extent hole info too */
3288 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3292 if (RB_EMPTY_ROOT(&rec->holes))
3293 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3295 list_del(&orphan->list);
3298 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3303 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3304 struct btrfs_root *root,
3305 struct btrfs_path *path,
3306 struct inode_record *rec)
3308 struct rb_node *node;
3309 struct file_extent_hole *hole;
3313 node = rb_first(&rec->holes);
3317 hole = rb_entry(node, struct file_extent_hole, node);
3318 ret = btrfs_punch_hole(trans, root, rec->ino,
3319 hole->start, hole->len);
3322 ret = del_file_extent_hole(&rec->holes, hole->start,
3326 if (RB_EMPTY_ROOT(&rec->holes))
3327 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3328 node = rb_first(&rec->holes);
3330 /* special case for a file losing all its file extent */
3332 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3333 round_up(rec->isize,
3334 root->fs_info->sectorsize));
3338 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3339 rec->ino, root->objectid);
3344 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3346 struct btrfs_trans_handle *trans;
3347 struct btrfs_path path;
3350 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3351 I_ERR_NO_ORPHAN_ITEM |
3352 I_ERR_LINK_COUNT_WRONG |
3353 I_ERR_NO_INODE_ITEM |
3354 I_ERR_FILE_EXTENT_ORPHAN |
3355 I_ERR_FILE_EXTENT_DISCOUNT|
3356 I_ERR_FILE_NBYTES_WRONG)))
3360 * For nlink repair, it may create a dir and add link, so
3361 * 2 for parent(256)'s dir_index and dir_item
3362 * 2 for lost+found dir's inode_item and inode_ref
3363 * 1 for the new inode_ref of the file
3364 * 2 for lost+found dir's dir_index and dir_item for the file
3366 trans = btrfs_start_transaction(root, 7);
3368 return PTR_ERR(trans);
3370 btrfs_init_path(&path);
3371 if (rec->errors & I_ERR_NO_INODE_ITEM)
3372 ret = repair_inode_no_item(trans, root, &path, rec);
3373 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3374 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3375 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3376 ret = repair_inode_discount_extent(trans, root, &path, rec);
3377 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3378 ret = repair_inode_isize(trans, root, &path, rec);
3379 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3380 ret = repair_inode_orphan_item(trans, root, &path, rec);
3381 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3382 ret = repair_inode_nlinks(trans, root, &path, rec);
3383 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3384 ret = repair_inode_nbytes(trans, root, &path, rec);
3385 btrfs_commit_transaction(trans, root);
3386 btrfs_release_path(&path);
3390 static int check_inode_recs(struct btrfs_root *root,
3391 struct cache_tree *inode_cache)
3393 struct cache_extent *cache;
3394 struct ptr_node *node;
3395 struct inode_record *rec;
3396 struct inode_backref *backref;
3401 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3403 if (btrfs_root_refs(&root->root_item) == 0) {
3404 if (!cache_tree_empty(inode_cache))
3405 fprintf(stderr, "warning line %d\n", __LINE__);
3410 * We need to repair backrefs first because we could change some of the
3411 * errors in the inode recs.
3413 * We also need to go through and delete invalid backrefs first and then
3414 * add the correct ones second. We do this because we may get EEXIST
3415 * when adding back the correct index because we hadn't yet deleted the
3418 * For example, if we were missing a dir index then the directories
3419 * isize would be wrong, so if we fixed the isize to what we thought it
3420 * would be and then fixed the backref we'd still have a invalid fs, so
3421 * we need to add back the dir index and then check to see if the isize
3426 if (stage == 3 && !err)
3429 cache = search_cache_extent(inode_cache, 0);
3430 while (repair && cache) {
3431 node = container_of(cache, struct ptr_node, cache);
3433 cache = next_cache_extent(cache);
3435 /* Need to free everything up and rescan */
3437 remove_cache_extent(inode_cache, &node->cache);
3439 free_inode_rec(rec);
3443 if (list_empty(&rec->backrefs))
3446 ret = repair_inode_backrefs(root, rec, inode_cache,
3460 rec = get_inode_rec(inode_cache, root_dirid, 0);
3461 BUG_ON(IS_ERR(rec));
3463 ret = check_root_dir(rec);
3465 fprintf(stderr, "root %llu root dir %llu error\n",
3466 (unsigned long long)root->root_key.objectid,
3467 (unsigned long long)root_dirid);
3468 print_inode_error(root, rec);
3473 struct btrfs_trans_handle *trans;
3475 trans = btrfs_start_transaction(root, 1);
3476 if (IS_ERR(trans)) {
3477 err = PTR_ERR(trans);
3482 "root %llu missing its root dir, recreating\n",
3483 (unsigned long long)root->objectid);
3485 ret = btrfs_make_root_dir(trans, root, root_dirid);
3488 btrfs_commit_transaction(trans, root);
3492 fprintf(stderr, "root %llu root dir %llu not found\n",
3493 (unsigned long long)root->root_key.objectid,
3494 (unsigned long long)root_dirid);
3498 cache = search_cache_extent(inode_cache, 0);
3501 node = container_of(cache, struct ptr_node, cache);
3503 remove_cache_extent(inode_cache, &node->cache);
3505 if (rec->ino == root_dirid ||
3506 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3507 free_inode_rec(rec);
3511 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3512 ret = check_orphan_item(root, rec->ino);
3514 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3515 if (can_free_inode_rec(rec)) {
3516 free_inode_rec(rec);
3521 if (!rec->found_inode_item)
3522 rec->errors |= I_ERR_NO_INODE_ITEM;
3523 if (rec->found_link != rec->nlink)
3524 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3526 ret = try_repair_inode(root, rec);
3527 if (ret == 0 && can_free_inode_rec(rec)) {
3528 free_inode_rec(rec);
3534 if (!(repair && ret == 0))
3536 print_inode_error(root, rec);
3537 list_for_each_entry(backref, &rec->backrefs, list) {
3538 if (!backref->found_dir_item)
3539 backref->errors |= REF_ERR_NO_DIR_ITEM;
3540 if (!backref->found_dir_index)
3541 backref->errors |= REF_ERR_NO_DIR_INDEX;
3542 if (!backref->found_inode_ref)
3543 backref->errors |= REF_ERR_NO_INODE_REF;
3544 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3545 " namelen %u name %s filetype %d errors %x",
3546 (unsigned long long)backref->dir,
3547 (unsigned long long)backref->index,
3548 backref->namelen, backref->name,
3549 backref->filetype, backref->errors);
3550 print_ref_error(backref->errors);
3552 free_inode_rec(rec);
3554 return (error > 0) ? -1 : 0;
3557 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3560 struct cache_extent *cache;
3561 struct root_record *rec = NULL;
3564 cache = lookup_cache_extent(root_cache, objectid, 1);
3566 rec = container_of(cache, struct root_record, cache);
3568 rec = calloc(1, sizeof(*rec));
3570 return ERR_PTR(-ENOMEM);
3571 rec->objectid = objectid;
3572 INIT_LIST_HEAD(&rec->backrefs);
3573 rec->cache.start = objectid;
3574 rec->cache.size = 1;
3576 ret = insert_cache_extent(root_cache, &rec->cache);
3578 return ERR_PTR(-EEXIST);
3583 static struct root_backref *get_root_backref(struct root_record *rec,
3584 u64 ref_root, u64 dir, u64 index,
3585 const char *name, int namelen)
3587 struct root_backref *backref;
3589 list_for_each_entry(backref, &rec->backrefs, list) {
3590 if (backref->ref_root != ref_root || backref->dir != dir ||
3591 backref->namelen != namelen)
3593 if (memcmp(name, backref->name, namelen))
3598 backref = calloc(1, sizeof(*backref) + namelen + 1);
3601 backref->ref_root = ref_root;
3603 backref->index = index;
3604 backref->namelen = namelen;
3605 memcpy(backref->name, name, namelen);
3606 backref->name[namelen] = '\0';
3607 list_add_tail(&backref->list, &rec->backrefs);
3611 static void free_root_record(struct cache_extent *cache)
3613 struct root_record *rec;
3614 struct root_backref *backref;
3616 rec = container_of(cache, struct root_record, cache);
3617 while (!list_empty(&rec->backrefs)) {
3618 backref = to_root_backref(rec->backrefs.next);
3619 list_del(&backref->list);
3626 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3628 static int add_root_backref(struct cache_tree *root_cache,
3629 u64 root_id, u64 ref_root, u64 dir, u64 index,
3630 const char *name, int namelen,
3631 int item_type, int errors)
3633 struct root_record *rec;
3634 struct root_backref *backref;
3636 rec = get_root_rec(root_cache, root_id);
3637 BUG_ON(IS_ERR(rec));
3638 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3641 backref->errors |= errors;
3643 if (item_type != BTRFS_DIR_ITEM_KEY) {
3644 if (backref->found_dir_index || backref->found_back_ref ||
3645 backref->found_forward_ref) {
3646 if (backref->index != index)
3647 backref->errors |= REF_ERR_INDEX_UNMATCH;
3649 backref->index = index;
3653 if (item_type == BTRFS_DIR_ITEM_KEY) {
3654 if (backref->found_forward_ref)
3656 backref->found_dir_item = 1;
3657 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3658 backref->found_dir_index = 1;
3659 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3660 if (backref->found_forward_ref)
3661 backref->errors |= REF_ERR_DUP_ROOT_REF;
3662 else if (backref->found_dir_item)
3664 backref->found_forward_ref = 1;
3665 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3666 if (backref->found_back_ref)
3667 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3668 backref->found_back_ref = 1;
3673 if (backref->found_forward_ref && backref->found_dir_item)
3674 backref->reachable = 1;
3678 static int merge_root_recs(struct btrfs_root *root,
3679 struct cache_tree *src_cache,
3680 struct cache_tree *dst_cache)
3682 struct cache_extent *cache;
3683 struct ptr_node *node;
3684 struct inode_record *rec;
3685 struct inode_backref *backref;
3688 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3689 free_inode_recs_tree(src_cache);
3694 cache = search_cache_extent(src_cache, 0);
3697 node = container_of(cache, struct ptr_node, cache);
3699 remove_cache_extent(src_cache, &node->cache);
3702 ret = is_child_root(root, root->objectid, rec->ino);
3708 list_for_each_entry(backref, &rec->backrefs, list) {
3709 BUG_ON(backref->found_inode_ref);
3710 if (backref->found_dir_item)
3711 add_root_backref(dst_cache, rec->ino,
3712 root->root_key.objectid, backref->dir,
3713 backref->index, backref->name,
3714 backref->namelen, BTRFS_DIR_ITEM_KEY,
3716 if (backref->found_dir_index)
3717 add_root_backref(dst_cache, rec->ino,
3718 root->root_key.objectid, backref->dir,
3719 backref->index, backref->name,
3720 backref->namelen, BTRFS_DIR_INDEX_KEY,
3724 free_inode_rec(rec);
3731 static int check_root_refs(struct btrfs_root *root,
3732 struct cache_tree *root_cache)
3734 struct root_record *rec;
3735 struct root_record *ref_root;
3736 struct root_backref *backref;
3737 struct cache_extent *cache;
3743 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3744 BUG_ON(IS_ERR(rec));
3747 /* fixme: this can not detect circular references */
3750 cache = search_cache_extent(root_cache, 0);
3754 rec = container_of(cache, struct root_record, cache);
3755 cache = next_cache_extent(cache);
3757 if (rec->found_ref == 0)
3760 list_for_each_entry(backref, &rec->backrefs, list) {
3761 if (!backref->reachable)
3764 ref_root = get_root_rec(root_cache,
3766 BUG_ON(IS_ERR(ref_root));
3767 if (ref_root->found_ref > 0)
3770 backref->reachable = 0;
3772 if (rec->found_ref == 0)
3778 cache = search_cache_extent(root_cache, 0);
3782 rec = container_of(cache, struct root_record, cache);
3783 cache = next_cache_extent(cache);
3785 if (rec->found_ref == 0 &&
3786 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3787 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3788 ret = check_orphan_item(root->fs_info->tree_root,
3794 * If we don't have a root item then we likely just have
3795 * a dir item in a snapshot for this root but no actual
3796 * ref key or anything so it's meaningless.
3798 if (!rec->found_root_item)
3801 fprintf(stderr, "fs tree %llu not referenced\n",
3802 (unsigned long long)rec->objectid);
3806 if (rec->found_ref > 0 && !rec->found_root_item)
3808 list_for_each_entry(backref, &rec->backrefs, list) {
3809 if (!backref->found_dir_item)
3810 backref->errors |= REF_ERR_NO_DIR_ITEM;
3811 if (!backref->found_dir_index)
3812 backref->errors |= REF_ERR_NO_DIR_INDEX;
3813 if (!backref->found_back_ref)
3814 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3815 if (!backref->found_forward_ref)
3816 backref->errors |= REF_ERR_NO_ROOT_REF;
3817 if (backref->reachable && backref->errors)
3824 fprintf(stderr, "fs tree %llu refs %u %s\n",
3825 (unsigned long long)rec->objectid, rec->found_ref,
3826 rec->found_root_item ? "" : "not found");
3828 list_for_each_entry(backref, &rec->backrefs, list) {
3829 if (!backref->reachable)
3831 if (!backref->errors && rec->found_root_item)
3833 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3834 " index %llu namelen %u name %s errors %x\n",
3835 (unsigned long long)backref->ref_root,
3836 (unsigned long long)backref->dir,
3837 (unsigned long long)backref->index,
3838 backref->namelen, backref->name,
3840 print_ref_error(backref->errors);
3843 return errors > 0 ? 1 : 0;
3846 static int process_root_ref(struct extent_buffer *eb, int slot,
3847 struct btrfs_key *key,
3848 struct cache_tree *root_cache)
3854 struct btrfs_root_ref *ref;
3855 char namebuf[BTRFS_NAME_LEN];
3858 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3860 dirid = btrfs_root_ref_dirid(eb, ref);
3861 index = btrfs_root_ref_sequence(eb, ref);
3862 name_len = btrfs_root_ref_name_len(eb, ref);
3864 if (name_len <= BTRFS_NAME_LEN) {
3868 len = BTRFS_NAME_LEN;
3869 error = REF_ERR_NAME_TOO_LONG;
3871 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3873 if (key->type == BTRFS_ROOT_REF_KEY) {
3874 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3875 index, namebuf, len, key->type, error);
3877 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3878 index, namebuf, len, key->type, error);
3883 static void free_corrupt_block(struct cache_extent *cache)
3885 struct btrfs_corrupt_block *corrupt;
3887 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3891 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3894 * Repair the btree of the given root.
3896 * The fix is to remove the node key in corrupt_blocks cache_tree.
3897 * and rebalance the tree.
3898 * After the fix, the btree should be writeable.
3900 static int repair_btree(struct btrfs_root *root,
3901 struct cache_tree *corrupt_blocks)
3903 struct btrfs_trans_handle *trans;
3904 struct btrfs_path path;
3905 struct btrfs_corrupt_block *corrupt;
3906 struct cache_extent *cache;
3907 struct btrfs_key key;
3912 if (cache_tree_empty(corrupt_blocks))
3915 trans = btrfs_start_transaction(root, 1);
3916 if (IS_ERR(trans)) {
3917 ret = PTR_ERR(trans);
3918 fprintf(stderr, "Error starting transaction: %s\n",
3922 btrfs_init_path(&path);
3923 cache = first_cache_extent(corrupt_blocks);
3925 corrupt = container_of(cache, struct btrfs_corrupt_block,
3927 level = corrupt->level;
3928 path.lowest_level = level;
3929 key.objectid = corrupt->key.objectid;
3930 key.type = corrupt->key.type;
3931 key.offset = corrupt->key.offset;
3934 * Here we don't want to do any tree balance, since it may
3935 * cause a balance with corrupted brother leaf/node,
3936 * so ins_len set to 0 here.
3937 * Balance will be done after all corrupt node/leaf is deleted.
3939 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3942 offset = btrfs_node_blockptr(path.nodes[level],
3945 /* Remove the ptr */
3946 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3950 * Remove the corresponding extent
3951 * return value is not concerned.
3953 btrfs_release_path(&path);
3954 ret = btrfs_free_extent(trans, root, offset,
3955 root->fs_info->nodesize, 0,
3956 root->root_key.objectid, level - 1, 0);
3957 cache = next_cache_extent(cache);
3960 /* Balance the btree using btrfs_search_slot() */
3961 cache = first_cache_extent(corrupt_blocks);
3963 corrupt = container_of(cache, struct btrfs_corrupt_block,
3965 memcpy(&key, &corrupt->key, sizeof(key));
3966 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3969 /* return will always >0 since it won't find the item */
3971 btrfs_release_path(&path);
3972 cache = next_cache_extent(cache);
3975 btrfs_commit_transaction(trans, root);
3976 btrfs_release_path(&path);
3980 static int check_fs_root(struct btrfs_root *root,
3981 struct cache_tree *root_cache,
3982 struct walk_control *wc)
3988 struct btrfs_path path;
3989 struct shared_node root_node;
3990 struct root_record *rec;
3991 struct btrfs_root_item *root_item = &root->root_item;
3992 struct cache_tree corrupt_blocks;
3993 struct orphan_data_extent *orphan;
3994 struct orphan_data_extent *tmp;
3995 enum btrfs_tree_block_status status;
3996 struct node_refs nrefs;
3999 * Reuse the corrupt_block cache tree to record corrupted tree block
4001 * Unlike the usage in extent tree check, here we do it in a per
4002 * fs/subvol tree base.
4004 cache_tree_init(&corrupt_blocks);
4005 root->fs_info->corrupt_blocks = &corrupt_blocks;
4007 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4008 rec = get_root_rec(root_cache, root->root_key.objectid);
4009 BUG_ON(IS_ERR(rec));
4010 if (btrfs_root_refs(root_item) > 0)
4011 rec->found_root_item = 1;
4014 btrfs_init_path(&path);
4015 memset(&root_node, 0, sizeof(root_node));
4016 cache_tree_init(&root_node.root_cache);
4017 cache_tree_init(&root_node.inode_cache);
4018 memset(&nrefs, 0, sizeof(nrefs));
4020 /* Move the orphan extent record to corresponding inode_record */
4021 list_for_each_entry_safe(orphan, tmp,
4022 &root->orphan_data_extents, list) {
4023 struct inode_record *inode;
4025 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4027 BUG_ON(IS_ERR(inode));
4028 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4029 list_move(&orphan->list, &inode->orphan_extents);
4032 level = btrfs_header_level(root->node);
4033 memset(wc->nodes, 0, sizeof(wc->nodes));
4034 wc->nodes[level] = &root_node;
4035 wc->active_node = level;
4036 wc->root_level = level;
4038 /* We may not have checked the root block, lets do that now */
4039 if (btrfs_is_leaf(root->node))
4040 status = btrfs_check_leaf(root, NULL, root->node);
4042 status = btrfs_check_node(root, NULL, root->node);
4043 if (status != BTRFS_TREE_BLOCK_CLEAN)
4046 if (btrfs_root_refs(root_item) > 0 ||
4047 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4048 path.nodes[level] = root->node;
4049 extent_buffer_get(root->node);
4050 path.slots[level] = 0;
4052 struct btrfs_key key;
4053 struct btrfs_disk_key found_key;
4055 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4056 level = root_item->drop_level;
4057 path.lowest_level = level;
4058 if (level > btrfs_header_level(root->node) ||
4059 level >= BTRFS_MAX_LEVEL) {
4060 error("ignoring invalid drop level: %u", level);
4063 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4066 btrfs_node_key(path.nodes[level], &found_key,
4068 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4069 sizeof(found_key)));
4073 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4079 wret = walk_up_tree(root, &path, wc, &level);
4086 btrfs_release_path(&path);
4088 if (!cache_tree_empty(&corrupt_blocks)) {
4089 struct cache_extent *cache;
4090 struct btrfs_corrupt_block *corrupt;
4092 printf("The following tree block(s) is corrupted in tree %llu:\n",
4093 root->root_key.objectid);
4094 cache = first_cache_extent(&corrupt_blocks);
4096 corrupt = container_of(cache,
4097 struct btrfs_corrupt_block,
4099 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4100 cache->start, corrupt->level,
4101 corrupt->key.objectid, corrupt->key.type,
4102 corrupt->key.offset);
4103 cache = next_cache_extent(cache);
4106 printf("Try to repair the btree for root %llu\n",
4107 root->root_key.objectid);
4108 ret = repair_btree(root, &corrupt_blocks);
4110 fprintf(stderr, "Failed to repair btree: %s\n",
4113 printf("Btree for root %llu is fixed\n",
4114 root->root_key.objectid);
4118 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4122 if (root_node.current) {
4123 root_node.current->checked = 1;
4124 maybe_free_inode_rec(&root_node.inode_cache,
4128 err = check_inode_recs(root, &root_node.inode_cache);
4132 free_corrupt_blocks_tree(&corrupt_blocks);
4133 root->fs_info->corrupt_blocks = NULL;
4134 free_orphan_data_extents(&root->orphan_data_extents);
4138 static int fs_root_objectid(u64 objectid)
4140 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4141 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4143 return is_fstree(objectid);
4146 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4147 struct cache_tree *root_cache)
4149 struct btrfs_path path;
4150 struct btrfs_key key;
4151 struct walk_control wc;
4152 struct extent_buffer *leaf, *tree_node;
4153 struct btrfs_root *tmp_root;
4154 struct btrfs_root *tree_root = fs_info->tree_root;
4158 if (ctx.progress_enabled) {
4159 ctx.tp = TASK_FS_ROOTS;
4160 task_start(ctx.info);
4164 * Just in case we made any changes to the extent tree that weren't
4165 * reflected into the free space cache yet.
4168 reset_cached_block_groups(fs_info);
4169 memset(&wc, 0, sizeof(wc));
4170 cache_tree_init(&wc.shared);
4171 btrfs_init_path(&path);
4176 key.type = BTRFS_ROOT_ITEM_KEY;
4177 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4182 tree_node = tree_root->node;
4184 if (tree_node != tree_root->node) {
4185 free_root_recs_tree(root_cache);
4186 btrfs_release_path(&path);
4189 leaf = path.nodes[0];
4190 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4191 ret = btrfs_next_leaf(tree_root, &path);
4197 leaf = path.nodes[0];
4199 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4200 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4201 fs_root_objectid(key.objectid)) {
4202 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4203 tmp_root = btrfs_read_fs_root_no_cache(
4206 key.offset = (u64)-1;
4207 tmp_root = btrfs_read_fs_root(
4210 if (IS_ERR(tmp_root)) {
4214 ret = check_fs_root(tmp_root, root_cache, &wc);
4215 if (ret == -EAGAIN) {
4216 free_root_recs_tree(root_cache);
4217 btrfs_release_path(&path);
4222 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4223 btrfs_free_fs_root(tmp_root);
4224 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4225 key.type == BTRFS_ROOT_BACKREF_KEY) {
4226 process_root_ref(leaf, path.slots[0], &key,
4233 btrfs_release_path(&path);
4235 free_extent_cache_tree(&wc.shared);
4236 if (!cache_tree_empty(&wc.shared))
4237 fprintf(stderr, "warning line %d\n", __LINE__);
4239 task_stop(ctx.info);
4245 * Find the @index according by @ino and name.
4246 * Notice:time efficiency is O(N)
4248 * @root: the root of the fs/file tree
4249 * @index_ret: the index as return value
4250 * @namebuf: the name to match
4251 * @name_len: the length of name to match
4252 * @file_type: the file_type of INODE_ITEM to match
4254 * Returns 0 if found and *@index_ret will be modified with right value
4255 * Returns< 0 not found and *@index_ret will be (u64)-1
4257 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4258 u64 *index_ret, char *namebuf, u32 name_len,
4261 struct btrfs_path path;
4262 struct extent_buffer *node;
4263 struct btrfs_dir_item *di;
4264 struct btrfs_key key;
4265 struct btrfs_key location;
4266 char name[BTRFS_NAME_LEN] = {0};
4278 /* search from the last index */
4279 key.objectid = dirid;
4280 key.offset = (u64)-1;
4281 key.type = BTRFS_DIR_INDEX_KEY;
4283 btrfs_init_path(&path);
4284 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4289 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4292 *index_ret = (64)-1;
4295 /* Check whether inode_id/filetype/name match */
4296 node = path.nodes[0];
4297 slot = path.slots[0];
4298 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4299 total = btrfs_item_size_nr(node, slot);
4300 while (cur < total) {
4302 len = btrfs_dir_name_len(node, di);
4303 data_len = btrfs_dir_data_len(node, di);
4305 btrfs_dir_item_key_to_cpu(node, di, &location);
4306 if (location.objectid != location_id ||
4307 location.type != BTRFS_INODE_ITEM_KEY ||
4308 location.offset != 0)
4311 filetype = btrfs_dir_type(node, di);
4312 if (file_type != filetype)
4315 if (len > BTRFS_NAME_LEN)
4316 len = BTRFS_NAME_LEN;
4318 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4319 if (len != name_len || strncmp(namebuf, name, len))
4322 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4323 *index_ret = key.offset;
4327 len += sizeof(*di) + data_len;
4328 di = (struct btrfs_dir_item *)((char *)di + len);
4334 btrfs_release_path(&path);
4339 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4340 * INODE_REF/INODE_EXTREF match.
4342 * @root: the root of the fs/file tree
4343 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4344 * value while find index
4345 * @location_key: location key of the struct btrfs_dir_item to match
4346 * @name: the name to match
4347 * @namelen: the length of name
4348 * @file_type: the type of file to math
4350 * Return 0 if no error occurred.
4351 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4352 * DIR_ITEM/DIR_INDEX
4353 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4354 * and DIR_ITEM/DIR_INDEX mismatch
4356 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4357 struct btrfs_key *location_key, char *name,
4358 u32 namelen, u8 file_type)
4360 struct btrfs_path path;
4361 struct extent_buffer *node;
4362 struct btrfs_dir_item *di;
4363 struct btrfs_key location;
4364 char namebuf[BTRFS_NAME_LEN] = {0};
4373 /* get the index by traversing all index */
4374 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4375 ret = find_dir_index(root, key->objectid,
4376 location_key->objectid, &key->offset,
4377 name, namelen, file_type);
4379 ret = DIR_INDEX_MISSING;
4383 btrfs_init_path(&path);
4384 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4386 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4391 /* Check whether inode_id/filetype/name match */
4392 node = path.nodes[0];
4393 slot = path.slots[0];
4394 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4395 total = btrfs_item_size_nr(node, slot);
4396 while (cur < total) {
4397 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4398 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4400 len = btrfs_dir_name_len(node, di);
4401 data_len = btrfs_dir_data_len(node, di);
4403 btrfs_dir_item_key_to_cpu(node, di, &location);
4404 if (location.objectid != location_key->objectid ||
4405 location.type != location_key->type ||
4406 location.offset != location_key->offset)
4409 filetype = btrfs_dir_type(node, di);
4410 if (file_type != filetype)
4413 if (len > BTRFS_NAME_LEN) {
4414 len = BTRFS_NAME_LEN;
4415 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4417 key->type == BTRFS_DIR_ITEM_KEY ?
4418 "DIR_ITEM" : "DIR_INDEX",
4419 key->objectid, key->offset, len);
4421 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4423 if (len != namelen || strncmp(namebuf, name, len))
4429 len += sizeof(*di) + data_len;
4430 di = (struct btrfs_dir_item *)((char *)di + len);
4435 btrfs_release_path(&path);
4440 * Prints inode ref error message
4442 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4443 u64 index, const char *namebuf, int name_len,
4444 u8 filetype, int err)
4449 /* root dir error */
4450 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4452 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4453 root->objectid, key->objectid, key->offset, namebuf);
4458 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4459 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4460 root->objectid, key->offset,
4461 btrfs_name_hash(namebuf, name_len),
4462 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4464 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4465 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4466 root->objectid, key->offset, index,
4467 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4472 * Traverse the given INODE_REF and call find_dir_item() to find related
4473 * DIR_ITEM/DIR_INDEX.
4475 * @root: the root of the fs/file tree
4476 * @ref_key: the key of the INODE_REF
4477 * @refs: the count of INODE_REF
4478 * @mode: the st_mode of INODE_ITEM
4480 * Return 0 if no error occurred.
4482 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4483 struct btrfs_path *path, char *name_ret,
4484 u32 *namelen_ret, u64 *refs, int mode)
4486 struct btrfs_key key;
4487 struct btrfs_key location;
4488 struct btrfs_inode_ref *ref;
4489 struct extent_buffer *node;
4490 char namebuf[BTRFS_NAME_LEN] = {0};
4500 location.objectid = ref_key->objectid;
4501 location.type = BTRFS_INODE_ITEM_KEY;
4502 location.offset = 0;
4503 node = path->nodes[0];
4504 slot = path->slots[0];
4506 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4507 total = btrfs_item_size_nr(node, slot);
4510 /* Update inode ref count */
4514 index = btrfs_inode_ref_index(node, ref);
4515 name_len = btrfs_inode_ref_name_len(node, ref);
4516 if (cur + sizeof(*ref) + name_len > total ||
4517 name_len > BTRFS_NAME_LEN) {
4518 warning("root %llu INODE_REF[%llu %llu] name too long",
4519 root->objectid, ref_key->objectid, ref_key->offset);
4521 if (total < cur + sizeof(*ref))
4523 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4528 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4530 /* copy the fisrt name found to name_ret */
4531 if (*refs == 1 && name_ret) {
4532 memcpy(name_ret, namebuf, len);
4536 /* Check root dir ref */
4537 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4538 if (index != 0 || len != strlen("..") ||
4539 strncmp("..", namebuf, len) ||
4540 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4541 /* set err bits then repair will delete the ref */
4542 err |= DIR_INDEX_MISSING;
4543 err |= DIR_ITEM_MISSING;
4548 /* Find related DIR_INDEX */
4549 key.objectid = ref_key->offset;
4550 key.type = BTRFS_DIR_INDEX_KEY;
4552 tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4554 /* Find related dir_item */
4555 key.objectid = ref_key->offset;
4556 key.type = BTRFS_DIR_ITEM_KEY;
4557 key.offset = btrfs_name_hash(namebuf, len);
4558 tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4561 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4562 imode_to_type(mode), tmp_err);
4564 len = sizeof(*ref) + name_len;
4565 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4575 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4576 * DIR_ITEM/DIR_INDEX.
4578 * @root: the root of the fs/file tree
4579 * @ref_key: the key of the INODE_EXTREF
4580 * @refs: the count of INODE_EXTREF
4581 * @mode: the st_mode of INODE_ITEM
4583 * Return 0 if no error occurred.
4585 static int check_inode_extref(struct btrfs_root *root,
4586 struct btrfs_key *ref_key,
4587 struct extent_buffer *node, int slot, u64 *refs,
4590 struct btrfs_key key;
4591 struct btrfs_key location;
4592 struct btrfs_inode_extref *extref;
4593 char namebuf[BTRFS_NAME_LEN] = {0};
4603 location.objectid = ref_key->objectid;
4604 location.type = BTRFS_INODE_ITEM_KEY;
4605 location.offset = 0;
4607 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4608 total = btrfs_item_size_nr(node, slot);
4611 /* update inode ref count */
4613 name_len = btrfs_inode_extref_name_len(node, extref);
4614 index = btrfs_inode_extref_index(node, extref);
4615 parent = btrfs_inode_extref_parent(node, extref);
4616 if (name_len <= BTRFS_NAME_LEN) {
4619 len = BTRFS_NAME_LEN;
4620 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4621 root->objectid, ref_key->objectid, ref_key->offset);
4623 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4625 /* Check root dir ref name */
4626 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4627 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4628 root->objectid, ref_key->objectid, ref_key->offset,
4630 err |= ROOT_DIR_ERROR;
4633 /* find related dir_index */
4634 key.objectid = parent;
4635 key.type = BTRFS_DIR_INDEX_KEY;
4637 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4640 /* find related dir_item */
4641 key.objectid = parent;
4642 key.type = BTRFS_DIR_ITEM_KEY;
4643 key.offset = btrfs_name_hash(namebuf, len);
4644 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4647 len = sizeof(*extref) + name_len;
4648 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4658 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4659 * DIR_ITEM/DIR_INDEX match.
4660 * Return with @index_ret.
4662 * @root: the root of the fs/file tree
4663 * @key: the key of the INODE_REF/INODE_EXTREF
4664 * @name: the name in the INODE_REF/INODE_EXTREF
4665 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4666 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4667 * value (64)-1 means do not check index
4668 * @ext_ref: the EXTENDED_IREF feature
4670 * Return 0 if no error occurred.
4671 * Return >0 for error bitmap
4673 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4674 char *name, int namelen, u64 *index_ret,
4675 unsigned int ext_ref)
4677 struct btrfs_path path;
4678 struct btrfs_inode_ref *ref;
4679 struct btrfs_inode_extref *extref;
4680 struct extent_buffer *node;
4681 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4694 btrfs_init_path(&path);
4695 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4697 ret = INODE_REF_MISSING;
4701 node = path.nodes[0];
4702 slot = path.slots[0];
4704 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4705 total = btrfs_item_size_nr(node, slot);
4707 /* Iterate all entry of INODE_REF */
4708 while (cur < total) {
4709 ret = INODE_REF_MISSING;
4711 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4712 ref_index = btrfs_inode_ref_index(node, ref);
4713 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4716 if (cur + sizeof(*ref) + ref_namelen > total ||
4717 ref_namelen > BTRFS_NAME_LEN) {
4718 warning("root %llu INODE %s[%llu %llu] name too long",
4720 key->type == BTRFS_INODE_REF_KEY ?
4722 key->objectid, key->offset);
4724 if (cur + sizeof(*ref) > total)
4726 len = min_t(u32, total - cur - sizeof(*ref),
4732 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4735 if (len != namelen || strncmp(ref_namebuf, name, len))
4738 *index_ret = ref_index;
4742 len = sizeof(*ref) + ref_namelen;
4743 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4748 /* Skip if not support EXTENDED_IREF feature */
4752 btrfs_release_path(&path);
4753 btrfs_init_path(&path);
4755 dir_id = key->offset;
4756 key->type = BTRFS_INODE_EXTREF_KEY;
4757 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4759 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4761 ret = INODE_REF_MISSING;
4765 node = path.nodes[0];
4766 slot = path.slots[0];
4768 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4770 total = btrfs_item_size_nr(node, slot);
4772 /* Iterate all entry of INODE_EXTREF */
4773 while (cur < total) {
4774 ret = INODE_REF_MISSING;
4776 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4777 ref_index = btrfs_inode_extref_index(node, extref);
4778 parent = btrfs_inode_extref_parent(node, extref);
4779 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4782 if (parent != dir_id)
4785 if (ref_namelen <= BTRFS_NAME_LEN) {
4788 len = BTRFS_NAME_LEN;
4789 warning("root %llu INODE %s[%llu %llu] name too long",
4791 key->type == BTRFS_INODE_REF_KEY ?
4793 key->objectid, key->offset);
4795 read_extent_buffer(node, ref_namebuf,
4796 (unsigned long)(extref + 1), len);
4798 if (len != namelen || strncmp(ref_namebuf, name, len))
4801 *index_ret = ref_index;
4806 len = sizeof(*extref) + ref_namelen;
4807 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4812 btrfs_release_path(&path);
4816 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4817 u64 ino, u64 index, const char *namebuf,
4818 int name_len, u8 filetype, int err)
4820 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4821 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4822 root->objectid, key->objectid, key->offset, namebuf,
4824 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4827 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4828 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4829 root->objectid, key->objectid, index, namebuf, filetype,
4830 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4833 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4835 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4836 root->objectid, ino, index, namebuf, filetype,
4837 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4840 if (err & INODE_REF_MISSING)
4842 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4843 root->objectid, ino, key->objectid, namebuf, filetype);
4848 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4849 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4851 * @root: the root of the fs/file tree
4852 * @key: the key of the INODE_REF/INODE_EXTREF
4854 * @size: the st_size of the INODE_ITEM
4855 * @ext_ref: the EXTENDED_IREF feature
4857 * Return 0 if no error occurred.
4859 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4860 struct btrfs_path *path, u64 *size,
4861 unsigned int ext_ref)
4863 struct btrfs_dir_item *di;
4864 struct btrfs_inode_item *ii;
4865 struct btrfs_key key;
4866 struct btrfs_key location;
4867 struct extent_buffer *node;
4869 char namebuf[BTRFS_NAME_LEN] = {0};
4882 node = path->nodes[0];
4883 slot = path->slots[0];
4885 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4886 * ignore index check.
4888 if (di_key->type == BTRFS_DIR_INDEX_KEY)
4889 index = di_key->offset;
4893 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4894 total = btrfs_item_size_nr(node, slot);
4895 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4897 while (cur < total) {
4898 data_len = btrfs_dir_data_len(node, di);
4901 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4903 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
4904 di_key->objectid, di_key->offset, data_len);
4906 name_len = btrfs_dir_name_len(node, di);
4907 if (name_len <= BTRFS_NAME_LEN) {
4910 len = BTRFS_NAME_LEN;
4911 warning("root %llu %s[%llu %llu] name too long",
4913 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
4914 di_key->objectid, di_key->offset);
4916 (*size) += name_len;
4917 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4919 filetype = btrfs_dir_type(node, di);
4921 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
4922 di_key->offset != btrfs_name_hash(namebuf, len)) {
4924 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4925 root->objectid, di_key->objectid, di_key->offset,
4926 namebuf, len, filetype, di_key->offset,
4927 btrfs_name_hash(namebuf, len));
4930 btrfs_dir_item_key_to_cpu(node, di, &location);
4931 /* Ignore related ROOT_ITEM check */
4932 if (location.type == BTRFS_ROOT_ITEM_KEY)
4935 btrfs_release_path(path);
4936 /* Check relative INODE_ITEM(existence/filetype) */
4937 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
4939 tmp_err |= INODE_ITEM_MISSING;
4943 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
4944 struct btrfs_inode_item);
4945 mode = btrfs_inode_mode(path->nodes[0], ii);
4946 if (imode_to_type(mode) != filetype) {
4947 tmp_err |= INODE_ITEM_MISMATCH;
4951 /* Check relative INODE_REF/INODE_EXTREF */
4952 key.objectid = location.objectid;
4953 key.type = BTRFS_INODE_REF_KEY;
4954 key.offset = di_key->objectid;
4955 tmp_err |= find_inode_ref(root, &key, namebuf, len,
4958 /* check relative INDEX/ITEM */
4959 key.objectid = di_key->objectid;
4960 if (key.type == BTRFS_DIR_ITEM_KEY) {
4961 key.type = BTRFS_DIR_INDEX_KEY;
4964 key.type = BTRFS_DIR_ITEM_KEY;
4965 key.offset = btrfs_name_hash(namebuf, name_len);
4968 tmp_err |= find_dir_item(root, &key, &location, namebuf,
4969 name_len, filetype);
4970 /* find_dir_item may find index */
4971 if (key.type == BTRFS_DIR_INDEX_KEY)
4974 btrfs_release_path(path);
4975 print_dir_item_err(root, di_key, location.objectid, index,
4976 namebuf, name_len, filetype, tmp_err);
4978 len = sizeof(*di) + name_len + data_len;
4979 di = (struct btrfs_dir_item *)((char *)di + len);
4982 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4983 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4984 root->objectid, di_key->objectid,
4991 btrfs_release_path(path);
4992 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
4994 err |= ret > 0 ? -ENOENT : ret;
4999 * Check file extent datasum/hole, update the size of the file extents,
5000 * check and update the last offset of the file extent.
5002 * @root: the root of fs/file tree.
5003 * @fkey: the key of the file extent.
5004 * @nodatasum: INODE_NODATASUM feature.
5005 * @size: the sum of all EXTENT_DATA items size for this inode.
5006 * @end: the offset of the last extent.
5008 * Return 0 if no error occurred.
5010 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5011 struct extent_buffer *node, int slot,
5012 unsigned int nodatasum, u64 *size, u64 *end)
5014 struct btrfs_file_extent_item *fi;
5017 u64 extent_num_bytes;
5019 u64 csum_found; /* In byte size, sectorsize aligned */
5020 u64 search_start; /* Logical range start we search for csum */
5021 u64 search_len; /* Logical range len we search for csum */
5022 unsigned int extent_type;
5023 unsigned int is_hole;
5028 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5030 /* Check inline extent */
5031 extent_type = btrfs_file_extent_type(node, fi);
5032 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5033 struct btrfs_item *e = btrfs_item_nr(slot);
5034 u32 item_inline_len;
5036 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5037 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5038 compressed = btrfs_file_extent_compression(node, fi);
5039 if (extent_num_bytes == 0) {
5041 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5042 root->objectid, fkey->objectid, fkey->offset);
5043 err |= FILE_EXTENT_ERROR;
5045 if (!compressed && extent_num_bytes != item_inline_len) {
5047 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5048 root->objectid, fkey->objectid, fkey->offset,
5049 extent_num_bytes, item_inline_len);
5050 err |= FILE_EXTENT_ERROR;
5052 *end += extent_num_bytes;
5053 *size += extent_num_bytes;
5057 /* Check extent type */
5058 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5059 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5060 err |= FILE_EXTENT_ERROR;
5061 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5062 root->objectid, fkey->objectid, fkey->offset);
5066 /* Check REG_EXTENT/PREALLOC_EXTENT */
5067 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5068 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5069 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5070 extent_offset = btrfs_file_extent_offset(node, fi);
5071 compressed = btrfs_file_extent_compression(node, fi);
5072 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5075 * Check EXTENT_DATA csum
5077 * For plain (uncompressed) extent, we should only check the range
5078 * we're referring to, as it's possible that part of prealloc extent
5079 * has been written, and has csum:
5081 * |<--- Original large preallocated extent A ---->|
5082 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5085 * For compressed extent, we should check the whole range.
5088 search_start = disk_bytenr + extent_offset;
5089 search_len = extent_num_bytes;
5091 search_start = disk_bytenr;
5092 search_len = disk_num_bytes;
5094 ret = count_csum_range(root, search_start, search_len, &csum_found);
5095 if (csum_found > 0 && nodatasum) {
5096 err |= ODD_CSUM_ITEM;
5097 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5098 root->objectid, fkey->objectid, fkey->offset);
5099 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5100 !is_hole && (ret < 0 || csum_found < search_len)) {
5101 err |= CSUM_ITEM_MISSING;
5102 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5103 root->objectid, fkey->objectid, fkey->offset,
5104 csum_found, search_len);
5105 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5106 err |= ODD_CSUM_ITEM;
5107 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5108 root->objectid, fkey->objectid, fkey->offset, csum_found);
5111 /* Check EXTENT_DATA hole */
5112 if (!no_holes && *end != fkey->offset) {
5113 err |= FILE_EXTENT_ERROR;
5114 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5115 root->objectid, fkey->objectid, fkey->offset);
5118 *end += extent_num_bytes;
5120 *size += extent_num_bytes;
5126 * Set inode item nbytes to @nbytes
5128 * Returns 0 on success
5129 * Returns != 0 on error
5131 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5132 struct btrfs_path *path,
5133 u64 ino, u64 nbytes)
5135 struct btrfs_trans_handle *trans;
5136 struct btrfs_inode_item *ii;
5137 struct btrfs_key key;
5138 struct btrfs_key research_key;
5142 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5145 key.type = BTRFS_INODE_ITEM_KEY;
5148 trans = btrfs_start_transaction(root, 1);
5149 if (IS_ERR(trans)) {
5150 ret = PTR_ERR(trans);
5155 btrfs_release_path(path);
5156 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5164 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5165 struct btrfs_inode_item);
5166 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5167 btrfs_mark_buffer_dirty(path->nodes[0]);
5169 btrfs_commit_transaction(trans, root);
5172 error("failed to set nbytes in inode %llu root %llu",
5173 ino, root->root_key.objectid);
5175 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5176 root->root_key.objectid, nbytes);
5179 btrfs_release_path(path);
5180 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5187 * Set directory inode isize to @isize.
5189 * Returns 0 on success.
5190 * Returns != 0 on error.
5192 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5193 struct btrfs_path *path,
5196 struct btrfs_trans_handle *trans;
5197 struct btrfs_inode_item *ii;
5198 struct btrfs_key key;
5199 struct btrfs_key research_key;
5203 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5206 key.type = BTRFS_INODE_ITEM_KEY;
5209 trans = btrfs_start_transaction(root, 1);
5210 if (IS_ERR(trans)) {
5211 ret = PTR_ERR(trans);
5216 btrfs_release_path(path);
5217 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5225 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5226 struct btrfs_inode_item);
5227 btrfs_set_inode_size(path->nodes[0], ii, isize);
5228 btrfs_mark_buffer_dirty(path->nodes[0]);
5230 btrfs_commit_transaction(trans, root);
5233 error("failed to set isize in inode %llu root %llu",
5234 ino, root->root_key.objectid);
5236 printf("Set isize in inode %llu root %llu to %llu\n",
5237 ino, root->root_key.objectid, isize);
5239 btrfs_release_path(path);
5240 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5247 * Wrapper function for btrfs_add_orphan_item().
5249 * Returns 0 on success.
5250 * Returns != 0 on error.
5252 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5253 struct btrfs_path *path, u64 ino)
5255 struct btrfs_trans_handle *trans;
5256 struct btrfs_key research_key;
5260 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5262 trans = btrfs_start_transaction(root, 1);
5263 if (IS_ERR(trans)) {
5264 ret = PTR_ERR(trans);
5269 btrfs_release_path(path);
5270 ret = btrfs_add_orphan_item(trans, root, path, ino);
5272 btrfs_commit_transaction(trans, root);
5275 error("failed to add inode %llu as orphan item root %llu",
5276 ino, root->root_key.objectid);
5278 printf("Added inode %llu as orphan item root %llu\n",
5279 ino, root->root_key.objectid);
5281 btrfs_release_path(path);
5282 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5289 * Check INODE_ITEM and related ITEMs (the same inode number)
5290 * 1. check link count
5291 * 2. check inode ref/extref
5292 * 3. check dir item/index
5294 * @ext_ref: the EXTENDED_IREF feature
5296 * Return 0 if no error occurred.
5297 * Return >0 for error or hit the traversal is done(by error bitmap)
5299 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5300 unsigned int ext_ref)
5302 struct extent_buffer *node;
5303 struct btrfs_inode_item *ii;
5304 struct btrfs_key key;
5313 u64 extent_size = 0;
5315 unsigned int nodatasum;
5319 char namebuf[BTRFS_NAME_LEN] = {0};
5322 node = path->nodes[0];
5323 slot = path->slots[0];
5325 btrfs_item_key_to_cpu(node, &key, slot);
5326 inode_id = key.objectid;
5328 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5329 ret = btrfs_next_item(root, path);
5335 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5336 isize = btrfs_inode_size(node, ii);
5337 nbytes = btrfs_inode_nbytes(node, ii);
5338 mode = btrfs_inode_mode(node, ii);
5339 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5340 nlink = btrfs_inode_nlink(node, ii);
5341 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5344 ret = btrfs_next_item(root, path);
5346 /* out will fill 'err' rusing current statistics */
5348 } else if (ret > 0) {
5353 node = path->nodes[0];
5354 slot = path->slots[0];
5355 btrfs_item_key_to_cpu(node, &key, slot);
5356 if (key.objectid != inode_id)
5360 case BTRFS_INODE_REF_KEY:
5361 ret = check_inode_ref(root, &key, path, namebuf,
5362 &name_len, &refs, mode);
5365 case BTRFS_INODE_EXTREF_KEY:
5366 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5367 warning("root %llu EXTREF[%llu %llu] isn't supported",
5368 root->objectid, key.objectid,
5370 ret = check_inode_extref(root, &key, node, slot, &refs,
5374 case BTRFS_DIR_ITEM_KEY:
5375 case BTRFS_DIR_INDEX_KEY:
5377 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5378 root->objectid, inode_id,
5379 imode_to_type(mode), key.objectid,
5382 ret = check_dir_item(root, &key, path, &size, ext_ref);
5385 case BTRFS_EXTENT_DATA_KEY:
5387 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5388 root->objectid, inode_id, key.objectid,
5391 ret = check_file_extent(root, &key, node, slot,
5392 nodatasum, &extent_size,
5396 case BTRFS_XATTR_ITEM_KEY:
5399 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5400 key.objectid, key.type, key.offset);
5405 /* verify INODE_ITEM nlink/isize/nbytes */
5408 err |= LINK_COUNT_ERROR;
5409 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5410 root->objectid, inode_id, nlink);
5414 * Just a warning, as dir inode nbytes is just an
5415 * instructive value.
5417 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5418 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5419 root->objectid, inode_id,
5420 root->fs_info->nodesize);
5423 if (isize != size) {
5425 ret = repair_dir_isize_lowmem(root, path,
5427 if (!repair || ret) {
5430 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5431 root->objectid, inode_id, isize, size);
5435 if (nlink != refs) {
5436 err |= LINK_COUNT_ERROR;
5437 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5438 root->objectid, inode_id, nlink, refs);
5439 } else if (!nlink) {
5441 ret = repair_inode_orphan_item_lowmem(root,
5443 if (!repair || ret) {
5445 error("root %llu INODE[%llu] is orphan item",
5446 root->objectid, inode_id);
5450 if (!nbytes && !no_holes && extent_end < isize) {
5451 err |= NBYTES_ERROR;
5452 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5453 root->objectid, inode_id, isize);
5456 if (nbytes != extent_size) {
5458 ret = repair_inode_nbytes_lowmem(root, path,
5459 inode_id, extent_size);
5460 if (!repair || ret) {
5461 err |= NBYTES_ERROR;
5463 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5464 root->objectid, inode_id, nbytes,
5474 * check first root dir's inode_item and inode_ref
5476 * returns 0 means no error
5477 * returns >0 means error
5478 * returns <0 means fatal error
5480 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5482 struct btrfs_path path;
5483 struct btrfs_key key;
5484 struct btrfs_inode_item *ii;
5490 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5491 key.type = BTRFS_INODE_ITEM_KEY;
5494 /* For root being dropped, we don't need to check first inode */
5495 if (btrfs_root_refs(&root->root_item) == 0 &&
5496 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5497 BTRFS_FIRST_FREE_OBJECTID)
5500 btrfs_init_path(&path);
5501 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5506 err |= INODE_ITEM_MISSING;
5508 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5509 struct btrfs_inode_item);
5510 mode = btrfs_inode_mode(path.nodes[0], ii);
5511 if (imode_to_type(mode) != BTRFS_FT_DIR)
5512 err |= INODE_ITEM_MISMATCH;
5515 /* lookup first inode ref */
5516 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5517 key.type = BTRFS_INODE_REF_KEY;
5518 /* special index value */
5521 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5527 btrfs_release_path(&path);
5528 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5529 error("root dir INODE_ITEM is %s",
5530 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5531 if (err & INODE_REF_MISSING)
5532 error("root dir INODE_REF is missing");
5534 return ret < 0 ? ret : err;
5537 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5538 u64 parent, u64 root)
5540 struct rb_node *node;
5541 struct tree_backref *back = NULL;
5542 struct tree_backref match = {
5549 match.parent = parent;
5550 match.node.full_backref = 1;
5555 node = rb_search(&rec->backref_tree, &match.node.node,
5556 (rb_compare_keys)compare_extent_backref, NULL);
5558 back = to_tree_backref(rb_node_to_extent_backref(node));
5563 static struct data_backref *find_data_backref(struct extent_record *rec,
5564 u64 parent, u64 root,
5565 u64 owner, u64 offset,
5567 u64 disk_bytenr, u64 bytes)
5569 struct rb_node *node;
5570 struct data_backref *back = NULL;
5571 struct data_backref match = {
5578 .found_ref = found_ref,
5579 .disk_bytenr = disk_bytenr,
5583 match.parent = parent;
5584 match.node.full_backref = 1;
5589 node = rb_search(&rec->backref_tree, &match.node.node,
5590 (rb_compare_keys)compare_extent_backref, NULL);
5592 back = to_data_backref(rb_node_to_extent_backref(node));
5597 * Iterate all item on the tree and call check_inode_item() to check.
5599 * @root: the root of the tree to be checked.
5600 * @ext_ref: the EXTENDED_IREF feature
5602 * Return 0 if no error found.
5603 * Return <0 for error.
5605 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5607 struct btrfs_path path;
5608 struct node_refs nrefs;
5609 struct btrfs_root_item *root_item = &root->root_item;
5615 * We need to manually check the first inode item(256)
5616 * As the following traversal function will only start from
5617 * the first inode item in the leaf, if inode item(256) is missing
5618 * we will just skip it forever.
5620 ret = check_fs_first_inode(root, ext_ref);
5625 memset(&nrefs, 0, sizeof(nrefs));
5626 level = btrfs_header_level(root->node);
5627 btrfs_init_path(&path);
5629 if (btrfs_root_refs(root_item) > 0 ||
5630 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5631 path.nodes[level] = root->node;
5632 path.slots[level] = 0;
5633 extent_buffer_get(root->node);
5635 struct btrfs_key key;
5637 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5638 level = root_item->drop_level;
5639 path.lowest_level = level;
5640 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5647 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5650 /* if ret is negative, walk shall stop */
5656 ret = walk_up_tree_v2(root, &path, &level);
5658 /* Normal exit, reset ret to err */
5665 btrfs_release_path(&path);
5670 * Find the relative ref for root_ref and root_backref.
5672 * @root: the root of the root tree.
5673 * @ref_key: the key of the root ref.
5675 * Return 0 if no error occurred.
5677 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5678 struct extent_buffer *node, int slot)
5680 struct btrfs_path path;
5681 struct btrfs_key key;
5682 struct btrfs_root_ref *ref;
5683 struct btrfs_root_ref *backref;
5684 char ref_name[BTRFS_NAME_LEN] = {0};
5685 char backref_name[BTRFS_NAME_LEN] = {0};
5691 u32 backref_namelen;
5696 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5697 ref_dirid = btrfs_root_ref_dirid(node, ref);
5698 ref_seq = btrfs_root_ref_sequence(node, ref);
5699 ref_namelen = btrfs_root_ref_name_len(node, ref);
5701 if (ref_namelen <= BTRFS_NAME_LEN) {
5704 len = BTRFS_NAME_LEN;
5705 warning("%s[%llu %llu] ref_name too long",
5706 ref_key->type == BTRFS_ROOT_REF_KEY ?
5707 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5710 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5712 /* Find relative root_ref */
5713 key.objectid = ref_key->offset;
5714 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5715 key.offset = ref_key->objectid;
5717 btrfs_init_path(&path);
5718 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5720 err |= ROOT_REF_MISSING;
5721 error("%s[%llu %llu] couldn't find relative ref",
5722 ref_key->type == BTRFS_ROOT_REF_KEY ?
5723 "ROOT_REF" : "ROOT_BACKREF",
5724 ref_key->objectid, ref_key->offset);
5728 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5729 struct btrfs_root_ref);
5730 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5731 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5732 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5734 if (backref_namelen <= BTRFS_NAME_LEN) {
5735 len = backref_namelen;
5737 len = BTRFS_NAME_LEN;
5738 warning("%s[%llu %llu] ref_name too long",
5739 key.type == BTRFS_ROOT_REF_KEY ?
5740 "ROOT_REF" : "ROOT_BACKREF",
5741 key.objectid, key.offset);
5743 read_extent_buffer(path.nodes[0], backref_name,
5744 (unsigned long)(backref + 1), len);
5746 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5747 ref_namelen != backref_namelen ||
5748 strncmp(ref_name, backref_name, len)) {
5749 err |= ROOT_REF_MISMATCH;
5750 error("%s[%llu %llu] mismatch relative ref",
5751 ref_key->type == BTRFS_ROOT_REF_KEY ?
5752 "ROOT_REF" : "ROOT_BACKREF",
5753 ref_key->objectid, ref_key->offset);
5756 btrfs_release_path(&path);
5761 * Check all fs/file tree in low_memory mode.
5763 * 1. for fs tree root item, call check_fs_root_v2()
5764 * 2. for fs tree root ref/backref, call check_root_ref()
5766 * Return 0 if no error occurred.
5768 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5770 struct btrfs_root *tree_root = fs_info->tree_root;
5771 struct btrfs_root *cur_root = NULL;
5772 struct btrfs_path path;
5773 struct btrfs_key key;
5774 struct extent_buffer *node;
5775 unsigned int ext_ref;
5780 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5782 btrfs_init_path(&path);
5783 key.objectid = BTRFS_FS_TREE_OBJECTID;
5785 key.type = BTRFS_ROOT_ITEM_KEY;
5787 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5791 } else if (ret > 0) {
5797 node = path.nodes[0];
5798 slot = path.slots[0];
5799 btrfs_item_key_to_cpu(node, &key, slot);
5800 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5802 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5803 fs_root_objectid(key.objectid)) {
5804 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5805 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5808 key.offset = (u64)-1;
5809 cur_root = btrfs_read_fs_root(fs_info, &key);
5812 if (IS_ERR(cur_root)) {
5813 error("Fail to read fs/subvol tree: %lld",
5819 ret = check_fs_root_v2(cur_root, ext_ref);
5822 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5823 btrfs_free_fs_root(cur_root);
5824 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5825 key.type == BTRFS_ROOT_BACKREF_KEY) {
5826 ret = check_root_ref(tree_root, &key, node, slot);
5830 ret = btrfs_next_item(tree_root, &path);
5840 btrfs_release_path(&path);
5844 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5845 struct cache_tree *root_cache)
5849 if (!ctx.progress_enabled)
5850 fprintf(stderr, "checking fs roots\n");
5851 if (check_mode == CHECK_MODE_LOWMEM)
5852 ret = check_fs_roots_v2(fs_info);
5854 ret = check_fs_roots(fs_info, root_cache);
5859 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5861 struct extent_backref *back, *tmp;
5862 struct tree_backref *tback;
5863 struct data_backref *dback;
5867 rbtree_postorder_for_each_entry_safe(back, tmp,
5868 &rec->backref_tree, node) {
5869 if (!back->found_extent_tree) {
5873 if (back->is_data) {
5874 dback = to_data_backref(back);
5875 fprintf(stderr, "Data backref %llu %s %llu"
5876 " owner %llu offset %llu num_refs %lu"
5877 " not found in extent tree\n",
5878 (unsigned long long)rec->start,
5879 back->full_backref ?
5881 back->full_backref ?
5882 (unsigned long long)dback->parent:
5883 (unsigned long long)dback->root,
5884 (unsigned long long)dback->owner,
5885 (unsigned long long)dback->offset,
5886 (unsigned long)dback->num_refs);
5888 tback = to_tree_backref(back);
5889 fprintf(stderr, "Tree backref %llu parent %llu"
5890 " root %llu not found in extent tree\n",
5891 (unsigned long long)rec->start,
5892 (unsigned long long)tback->parent,
5893 (unsigned long long)tback->root);
5896 if (!back->is_data && !back->found_ref) {
5900 tback = to_tree_backref(back);
5901 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5902 (unsigned long long)rec->start,
5903 back->full_backref ? "parent" : "root",
5904 back->full_backref ?
5905 (unsigned long long)tback->parent :
5906 (unsigned long long)tback->root, back);
5908 if (back->is_data) {
5909 dback = to_data_backref(back);
5910 if (dback->found_ref != dback->num_refs) {
5914 fprintf(stderr, "Incorrect local backref count"
5915 " on %llu %s %llu owner %llu"
5916 " offset %llu found %u wanted %u back %p\n",
5917 (unsigned long long)rec->start,
5918 back->full_backref ?
5920 back->full_backref ?
5921 (unsigned long long)dback->parent:
5922 (unsigned long long)dback->root,
5923 (unsigned long long)dback->owner,
5924 (unsigned long long)dback->offset,
5925 dback->found_ref, dback->num_refs, back);
5927 if (dback->disk_bytenr != rec->start) {
5931 fprintf(stderr, "Backref disk bytenr does not"
5932 " match extent record, bytenr=%llu, "
5933 "ref bytenr=%llu\n",
5934 (unsigned long long)rec->start,
5935 (unsigned long long)dback->disk_bytenr);
5938 if (dback->bytes != rec->nr) {
5942 fprintf(stderr, "Backref bytes do not match "
5943 "extent backref, bytenr=%llu, ref "
5944 "bytes=%llu, backref bytes=%llu\n",
5945 (unsigned long long)rec->start,
5946 (unsigned long long)rec->nr,
5947 (unsigned long long)dback->bytes);
5950 if (!back->is_data) {
5953 dback = to_data_backref(back);
5954 found += dback->found_ref;
5957 if (found != rec->refs) {
5961 fprintf(stderr, "Incorrect global backref count "
5962 "on %llu found %llu wanted %llu\n",
5963 (unsigned long long)rec->start,
5964 (unsigned long long)found,
5965 (unsigned long long)rec->refs);
5971 static void __free_one_backref(struct rb_node *node)
5973 struct extent_backref *back = rb_node_to_extent_backref(node);
5978 static void free_all_extent_backrefs(struct extent_record *rec)
5980 rb_free_nodes(&rec->backref_tree, __free_one_backref);
5983 static void free_extent_record_cache(struct cache_tree *extent_cache)
5985 struct cache_extent *cache;
5986 struct extent_record *rec;
5989 cache = first_cache_extent(extent_cache);
5992 rec = container_of(cache, struct extent_record, cache);
5993 remove_cache_extent(extent_cache, cache);
5994 free_all_extent_backrefs(rec);
5999 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6000 struct extent_record *rec)
6002 if (rec->content_checked && rec->owner_ref_checked &&
6003 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6004 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6005 !rec->bad_full_backref && !rec->crossing_stripes &&
6006 !rec->wrong_chunk_type) {
6007 remove_cache_extent(extent_cache, &rec->cache);
6008 free_all_extent_backrefs(rec);
6009 list_del_init(&rec->list);
6015 static int check_owner_ref(struct btrfs_root *root,
6016 struct extent_record *rec,
6017 struct extent_buffer *buf)
6019 struct extent_backref *node, *tmp;
6020 struct tree_backref *back;
6021 struct btrfs_root *ref_root;
6022 struct btrfs_key key;
6023 struct btrfs_path path;
6024 struct extent_buffer *parent;
6029 rbtree_postorder_for_each_entry_safe(node, tmp,
6030 &rec->backref_tree, node) {
6033 if (!node->found_ref)
6035 if (node->full_backref)
6037 back = to_tree_backref(node);
6038 if (btrfs_header_owner(buf) == back->root)
6041 BUG_ON(rec->is_root);
6043 /* try to find the block by search corresponding fs tree */
6044 key.objectid = btrfs_header_owner(buf);
6045 key.type = BTRFS_ROOT_ITEM_KEY;
6046 key.offset = (u64)-1;
6048 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6049 if (IS_ERR(ref_root))
6052 level = btrfs_header_level(buf);
6054 btrfs_item_key_to_cpu(buf, &key, 0);
6056 btrfs_node_key_to_cpu(buf, &key, 0);
6058 btrfs_init_path(&path);
6059 path.lowest_level = level + 1;
6060 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6064 parent = path.nodes[level + 1];
6065 if (parent && buf->start == btrfs_node_blockptr(parent,
6066 path.slots[level + 1]))
6069 btrfs_release_path(&path);
6070 return found ? 0 : 1;
6073 static int is_extent_tree_record(struct extent_record *rec)
6075 struct extent_backref *node, *tmp;
6076 struct tree_backref *back;
6079 rbtree_postorder_for_each_entry_safe(node, tmp,
6080 &rec->backref_tree, node) {
6083 back = to_tree_backref(node);
6084 if (node->full_backref)
6086 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6093 static int record_bad_block_io(struct btrfs_fs_info *info,
6094 struct cache_tree *extent_cache,
6097 struct extent_record *rec;
6098 struct cache_extent *cache;
6099 struct btrfs_key key;
6101 cache = lookup_cache_extent(extent_cache, start, len);
6105 rec = container_of(cache, struct extent_record, cache);
6106 if (!is_extent_tree_record(rec))
6109 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6110 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6113 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6114 struct extent_buffer *buf, int slot)
6116 if (btrfs_header_level(buf)) {
6117 struct btrfs_key_ptr ptr1, ptr2;
6119 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6120 sizeof(struct btrfs_key_ptr));
6121 read_extent_buffer(buf, &ptr2,
6122 btrfs_node_key_ptr_offset(slot + 1),
6123 sizeof(struct btrfs_key_ptr));
6124 write_extent_buffer(buf, &ptr1,
6125 btrfs_node_key_ptr_offset(slot + 1),
6126 sizeof(struct btrfs_key_ptr));
6127 write_extent_buffer(buf, &ptr2,
6128 btrfs_node_key_ptr_offset(slot),
6129 sizeof(struct btrfs_key_ptr));
6131 struct btrfs_disk_key key;
6132 btrfs_node_key(buf, &key, 0);
6133 btrfs_fixup_low_keys(root, path, &key,
6134 btrfs_header_level(buf) + 1);
6137 struct btrfs_item *item1, *item2;
6138 struct btrfs_key k1, k2;
6139 char *item1_data, *item2_data;
6140 u32 item1_offset, item2_offset, item1_size, item2_size;
6142 item1 = btrfs_item_nr(slot);
6143 item2 = btrfs_item_nr(slot + 1);
6144 btrfs_item_key_to_cpu(buf, &k1, slot);
6145 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6146 item1_offset = btrfs_item_offset(buf, item1);
6147 item2_offset = btrfs_item_offset(buf, item2);
6148 item1_size = btrfs_item_size(buf, item1);
6149 item2_size = btrfs_item_size(buf, item2);
6151 item1_data = malloc(item1_size);
6154 item2_data = malloc(item2_size);
6160 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6161 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6163 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6164 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6168 btrfs_set_item_offset(buf, item1, item2_offset);
6169 btrfs_set_item_offset(buf, item2, item1_offset);
6170 btrfs_set_item_size(buf, item1, item2_size);
6171 btrfs_set_item_size(buf, item2, item1_size);
6173 path->slots[0] = slot;
6174 btrfs_set_item_key_unsafe(root, path, &k2);
6175 path->slots[0] = slot + 1;
6176 btrfs_set_item_key_unsafe(root, path, &k1);
6181 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6183 struct extent_buffer *buf;
6184 struct btrfs_key k1, k2;
6186 int level = path->lowest_level;
6189 buf = path->nodes[level];
6190 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6192 btrfs_node_key_to_cpu(buf, &k1, i);
6193 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6195 btrfs_item_key_to_cpu(buf, &k1, i);
6196 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6198 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6200 ret = swap_values(root, path, buf, i);
6203 btrfs_mark_buffer_dirty(buf);
6209 static int delete_bogus_item(struct btrfs_root *root,
6210 struct btrfs_path *path,
6211 struct extent_buffer *buf, int slot)
6213 struct btrfs_key key;
6214 int nritems = btrfs_header_nritems(buf);
6216 btrfs_item_key_to_cpu(buf, &key, slot);
6218 /* These are all the keys we can deal with missing. */
6219 if (key.type != BTRFS_DIR_INDEX_KEY &&
6220 key.type != BTRFS_EXTENT_ITEM_KEY &&
6221 key.type != BTRFS_METADATA_ITEM_KEY &&
6222 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6223 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6226 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6227 (unsigned long long)key.objectid, key.type,
6228 (unsigned long long)key.offset, slot, buf->start);
6229 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6230 btrfs_item_nr_offset(slot + 1),
6231 sizeof(struct btrfs_item) *
6232 (nritems - slot - 1));
6233 btrfs_set_header_nritems(buf, nritems - 1);
6235 struct btrfs_disk_key disk_key;
6237 btrfs_item_key(buf, &disk_key, 0);
6238 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6240 btrfs_mark_buffer_dirty(buf);
6244 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6246 struct extent_buffer *buf;
6250 /* We should only get this for leaves */
6251 BUG_ON(path->lowest_level);
6252 buf = path->nodes[0];
6254 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6255 unsigned int shift = 0, offset;
6257 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6258 BTRFS_LEAF_DATA_SIZE(root)) {
6259 if (btrfs_item_end_nr(buf, i) >
6260 BTRFS_LEAF_DATA_SIZE(root)) {
6261 ret = delete_bogus_item(root, path, buf, i);
6264 fprintf(stderr, "item is off the end of the "
6265 "leaf, can't fix\n");
6269 shift = BTRFS_LEAF_DATA_SIZE(root) -
6270 btrfs_item_end_nr(buf, i);
6271 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6272 btrfs_item_offset_nr(buf, i - 1)) {
6273 if (btrfs_item_end_nr(buf, i) >
6274 btrfs_item_offset_nr(buf, i - 1)) {
6275 ret = delete_bogus_item(root, path, buf, i);
6278 fprintf(stderr, "items overlap, can't fix\n");
6282 shift = btrfs_item_offset_nr(buf, i - 1) -
6283 btrfs_item_end_nr(buf, i);
6288 printf("Shifting item nr %d by %u bytes in block %llu\n",
6289 i, shift, (unsigned long long)buf->start);
6290 offset = btrfs_item_offset_nr(buf, i);
6291 memmove_extent_buffer(buf,
6292 btrfs_leaf_data(buf) + offset + shift,
6293 btrfs_leaf_data(buf) + offset,
6294 btrfs_item_size_nr(buf, i));
6295 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6297 btrfs_mark_buffer_dirty(buf);
6301 * We may have moved things, in which case we want to exit so we don't
6302 * write those changes out. Once we have proper abort functionality in
6303 * progs this can be changed to something nicer.
6310 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6311 * then just return -EIO.
6313 static int try_to_fix_bad_block(struct btrfs_root *root,
6314 struct extent_buffer *buf,
6315 enum btrfs_tree_block_status status)
6317 struct btrfs_trans_handle *trans;
6318 struct ulist *roots;
6319 struct ulist_node *node;
6320 struct btrfs_root *search_root;
6321 struct btrfs_path path;
6322 struct ulist_iterator iter;
6323 struct btrfs_key root_key, key;
6326 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6327 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6330 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6334 btrfs_init_path(&path);
6335 ULIST_ITER_INIT(&iter);
6336 while ((node = ulist_next(roots, &iter))) {
6337 root_key.objectid = node->val;
6338 root_key.type = BTRFS_ROOT_ITEM_KEY;
6339 root_key.offset = (u64)-1;
6341 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6348 trans = btrfs_start_transaction(search_root, 0);
6349 if (IS_ERR(trans)) {
6350 ret = PTR_ERR(trans);
6354 path.lowest_level = btrfs_header_level(buf);
6355 path.skip_check_block = 1;
6356 if (path.lowest_level)
6357 btrfs_node_key_to_cpu(buf, &key, 0);
6359 btrfs_item_key_to_cpu(buf, &key, 0);
6360 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6363 btrfs_commit_transaction(trans, search_root);
6366 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6367 ret = fix_key_order(search_root, &path);
6368 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6369 ret = fix_item_offset(search_root, &path);
6371 btrfs_commit_transaction(trans, search_root);
6374 btrfs_release_path(&path);
6375 btrfs_commit_transaction(trans, search_root);
6378 btrfs_release_path(&path);
6382 static int check_block(struct btrfs_root *root,
6383 struct cache_tree *extent_cache,
6384 struct extent_buffer *buf, u64 flags)
6386 struct extent_record *rec;
6387 struct cache_extent *cache;
6388 struct btrfs_key key;
6389 enum btrfs_tree_block_status status;
6393 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6396 rec = container_of(cache, struct extent_record, cache);
6397 rec->generation = btrfs_header_generation(buf);
6399 level = btrfs_header_level(buf);
6400 if (btrfs_header_nritems(buf) > 0) {
6403 btrfs_item_key_to_cpu(buf, &key, 0);
6405 btrfs_node_key_to_cpu(buf, &key, 0);
6407 rec->info_objectid = key.objectid;
6409 rec->info_level = level;
6411 if (btrfs_is_leaf(buf))
6412 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6414 status = btrfs_check_node(root, &rec->parent_key, buf);
6416 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6418 status = try_to_fix_bad_block(root, buf, status);
6419 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6421 fprintf(stderr, "bad block %llu\n",
6422 (unsigned long long)buf->start);
6425 * Signal to callers we need to start the scan over
6426 * again since we'll have cowed blocks.
6431 rec->content_checked = 1;
6432 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6433 rec->owner_ref_checked = 1;
6435 ret = check_owner_ref(root, rec, buf);
6437 rec->owner_ref_checked = 1;
6441 maybe_free_extent_rec(extent_cache, rec);
6446 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6447 u64 parent, u64 root)
6449 struct list_head *cur = rec->backrefs.next;
6450 struct extent_backref *node;
6451 struct tree_backref *back;
6453 while(cur != &rec->backrefs) {
6454 node = to_extent_backref(cur);
6458 back = to_tree_backref(node);
6460 if (!node->full_backref)
6462 if (parent == back->parent)
6465 if (node->full_backref)
6467 if (back->root == root)
6475 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6476 u64 parent, u64 root)
6478 struct tree_backref *ref = malloc(sizeof(*ref));
6482 memset(&ref->node, 0, sizeof(ref->node));
6484 ref->parent = parent;
6485 ref->node.full_backref = 1;
6488 ref->node.full_backref = 0;
6495 static struct data_backref *find_data_backref(struct extent_record *rec,
6496 u64 parent, u64 root,
6497 u64 owner, u64 offset,
6499 u64 disk_bytenr, u64 bytes)
6501 struct list_head *cur = rec->backrefs.next;
6502 struct extent_backref *node;
6503 struct data_backref *back;
6505 while(cur != &rec->backrefs) {
6506 node = to_extent_backref(cur);
6510 back = to_data_backref(node);
6512 if (!node->full_backref)
6514 if (parent == back->parent)
6517 if (node->full_backref)
6519 if (back->root == root && back->owner == owner &&
6520 back->offset == offset) {
6521 if (found_ref && node->found_ref &&
6522 (back->bytes != bytes ||
6523 back->disk_bytenr != disk_bytenr))
6533 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6534 u64 parent, u64 root,
6535 u64 owner, u64 offset,
6538 struct data_backref *ref = malloc(sizeof(*ref));
6542 memset(&ref->node, 0, sizeof(ref->node));
6543 ref->node.is_data = 1;
6546 ref->parent = parent;
6549 ref->node.full_backref = 1;
6553 ref->offset = offset;
6554 ref->node.full_backref = 0;
6556 ref->bytes = max_size;
6559 if (max_size > rec->max_size)
6560 rec->max_size = max_size;
6564 /* Check if the type of extent matches with its chunk */
6565 static void check_extent_type(struct extent_record *rec)
6567 struct btrfs_block_group_cache *bg_cache;
6569 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6573 /* data extent, check chunk directly*/
6574 if (!rec->metadata) {
6575 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6576 rec->wrong_chunk_type = 1;
6580 /* metadata extent, check the obvious case first */
6581 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6582 BTRFS_BLOCK_GROUP_METADATA))) {
6583 rec->wrong_chunk_type = 1;
6588 * Check SYSTEM extent, as it's also marked as metadata, we can only
6589 * make sure it's a SYSTEM extent by its backref
6591 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6592 struct extent_backref *node;
6593 struct tree_backref *tback;
6596 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6597 if (node->is_data) {
6598 /* tree block shouldn't have data backref */
6599 rec->wrong_chunk_type = 1;
6602 tback = container_of(node, struct tree_backref, node);
6604 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6605 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6607 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6608 if (!(bg_cache->flags & bg_type))
6609 rec->wrong_chunk_type = 1;
6614 * Allocate a new extent record, fill default values from @tmpl and insert int
6615 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6616 * the cache, otherwise it fails.
6618 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6619 struct extent_record *tmpl)
6621 struct extent_record *rec;
6624 BUG_ON(tmpl->max_size == 0);
6625 rec = malloc(sizeof(*rec));
6628 rec->start = tmpl->start;
6629 rec->max_size = tmpl->max_size;
6630 rec->nr = max(tmpl->nr, tmpl->max_size);
6631 rec->found_rec = tmpl->found_rec;
6632 rec->content_checked = tmpl->content_checked;
6633 rec->owner_ref_checked = tmpl->owner_ref_checked;
6634 rec->num_duplicates = 0;
6635 rec->metadata = tmpl->metadata;
6636 rec->flag_block_full_backref = FLAG_UNSET;
6637 rec->bad_full_backref = 0;
6638 rec->crossing_stripes = 0;
6639 rec->wrong_chunk_type = 0;
6640 rec->is_root = tmpl->is_root;
6641 rec->refs = tmpl->refs;
6642 rec->extent_item_refs = tmpl->extent_item_refs;
6643 rec->parent_generation = tmpl->parent_generation;
6644 INIT_LIST_HEAD(&rec->backrefs);
6645 INIT_LIST_HEAD(&rec->dups);
6646 INIT_LIST_HEAD(&rec->list);
6647 rec->backref_tree = RB_ROOT;
6648 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6649 rec->cache.start = tmpl->start;
6650 rec->cache.size = tmpl->nr;
6651 ret = insert_cache_extent(extent_cache, &rec->cache);
6656 bytes_used += rec->nr;
6659 rec->crossing_stripes = check_crossing_stripes(global_info,
6660 rec->start, global_info->nodesize);
6661 check_extent_type(rec);
6666 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6668 * - refs - if found, increase refs
6669 * - is_root - if found, set
6670 * - content_checked - if found, set
6671 * - owner_ref_checked - if found, set
6673 * If not found, create a new one, initialize and insert.
6675 static int add_extent_rec(struct cache_tree *extent_cache,
6676 struct extent_record *tmpl)
6678 struct extent_record *rec;
6679 struct cache_extent *cache;
6683 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6685 rec = container_of(cache, struct extent_record, cache);
6689 rec->nr = max(tmpl->nr, tmpl->max_size);
6692 * We need to make sure to reset nr to whatever the extent
6693 * record says was the real size, this way we can compare it to
6696 if (tmpl->found_rec) {
6697 if (tmpl->start != rec->start || rec->found_rec) {
6698 struct extent_record *tmp;
6701 if (list_empty(&rec->list))
6702 list_add_tail(&rec->list,
6703 &duplicate_extents);
6706 * We have to do this song and dance in case we
6707 * find an extent record that falls inside of
6708 * our current extent record but does not have
6709 * the same objectid.
6711 tmp = malloc(sizeof(*tmp));
6714 tmp->start = tmpl->start;
6715 tmp->max_size = tmpl->max_size;
6718 tmp->metadata = tmpl->metadata;
6719 tmp->extent_item_refs = tmpl->extent_item_refs;
6720 INIT_LIST_HEAD(&tmp->list);
6721 list_add_tail(&tmp->list, &rec->dups);
6722 rec->num_duplicates++;
6729 if (tmpl->extent_item_refs && !dup) {
6730 if (rec->extent_item_refs) {
6731 fprintf(stderr, "block %llu rec "
6732 "extent_item_refs %llu, passed %llu\n",
6733 (unsigned long long)tmpl->start,
6734 (unsigned long long)
6735 rec->extent_item_refs,
6736 (unsigned long long)tmpl->extent_item_refs);
6738 rec->extent_item_refs = tmpl->extent_item_refs;
6742 if (tmpl->content_checked)
6743 rec->content_checked = 1;
6744 if (tmpl->owner_ref_checked)
6745 rec->owner_ref_checked = 1;
6746 memcpy(&rec->parent_key, &tmpl->parent_key,
6747 sizeof(tmpl->parent_key));
6748 if (tmpl->parent_generation)
6749 rec->parent_generation = tmpl->parent_generation;
6750 if (rec->max_size < tmpl->max_size)
6751 rec->max_size = tmpl->max_size;
6754 * A metadata extent can't cross stripe_len boundary, otherwise
6755 * kernel scrub won't be able to handle it.
6756 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6760 rec->crossing_stripes = check_crossing_stripes(
6761 global_info, rec->start,
6762 global_info->nodesize);
6763 check_extent_type(rec);
6764 maybe_free_extent_rec(extent_cache, rec);
6768 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6773 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6774 u64 parent, u64 root, int found_ref)
6776 struct extent_record *rec;
6777 struct tree_backref *back;
6778 struct cache_extent *cache;
6780 bool insert = false;
6782 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6784 struct extent_record tmpl;
6786 memset(&tmpl, 0, sizeof(tmpl));
6787 tmpl.start = bytenr;
6792 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6796 /* really a bug in cache_extent implement now */
6797 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6802 rec = container_of(cache, struct extent_record, cache);
6803 if (rec->start != bytenr) {
6805 * Several cause, from unaligned bytenr to over lapping extents
6810 back = find_tree_backref(rec, parent, root);
6812 back = alloc_tree_backref(rec, parent, root);
6819 if (back->node.found_ref) {
6820 fprintf(stderr, "Extent back ref already exists "
6821 "for %llu parent %llu root %llu \n",
6822 (unsigned long long)bytenr,
6823 (unsigned long long)parent,
6824 (unsigned long long)root);
6826 back->node.found_ref = 1;
6828 if (back->node.found_extent_tree) {
6829 fprintf(stderr, "Extent back ref already exists "
6830 "for %llu parent %llu root %llu \n",
6831 (unsigned long long)bytenr,
6832 (unsigned long long)parent,
6833 (unsigned long long)root);
6835 back->node.found_extent_tree = 1;
6838 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6839 compare_extent_backref));
6840 check_extent_type(rec);
6841 maybe_free_extent_rec(extent_cache, rec);
6845 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6846 u64 parent, u64 root, u64 owner, u64 offset,
6847 u32 num_refs, int found_ref, u64 max_size)
6849 struct extent_record *rec;
6850 struct data_backref *back;
6851 struct cache_extent *cache;
6853 bool insert = false;
6855 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6857 struct extent_record tmpl;
6859 memset(&tmpl, 0, sizeof(tmpl));
6860 tmpl.start = bytenr;
6862 tmpl.max_size = max_size;
6864 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6868 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6873 rec = container_of(cache, struct extent_record, cache);
6874 if (rec->max_size < max_size)
6875 rec->max_size = max_size;
6878 * If found_ref is set then max_size is the real size and must match the
6879 * existing refs. So if we have already found a ref then we need to
6880 * make sure that this ref matches the existing one, otherwise we need
6881 * to add a new backref so we can notice that the backrefs don't match
6882 * and we need to figure out who is telling the truth. This is to
6883 * account for that awful fsync bug I introduced where we'd end up with
6884 * a btrfs_file_extent_item that would have its length include multiple
6885 * prealloc extents or point inside of a prealloc extent.
6887 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6890 back = alloc_data_backref(rec, parent, root, owner, offset,
6897 BUG_ON(num_refs != 1);
6898 if (back->node.found_ref)
6899 BUG_ON(back->bytes != max_size);
6900 back->node.found_ref = 1;
6901 back->found_ref += 1;
6902 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6903 back->bytes = max_size;
6904 back->disk_bytenr = bytenr;
6906 /* Need to reinsert if not already in the tree */
6908 rb_erase(&back->node.node, &rec->backref_tree);
6913 rec->content_checked = 1;
6914 rec->owner_ref_checked = 1;
6916 if (back->node.found_extent_tree) {
6917 fprintf(stderr, "Extent back ref already exists "
6918 "for %llu parent %llu root %llu "
6919 "owner %llu offset %llu num_refs %lu\n",
6920 (unsigned long long)bytenr,
6921 (unsigned long long)parent,
6922 (unsigned long long)root,
6923 (unsigned long long)owner,
6924 (unsigned long long)offset,
6925 (unsigned long)num_refs);
6927 back->num_refs = num_refs;
6928 back->node.found_extent_tree = 1;
6931 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6932 compare_extent_backref));
6934 maybe_free_extent_rec(extent_cache, rec);
6938 static int add_pending(struct cache_tree *pending,
6939 struct cache_tree *seen, u64 bytenr, u32 size)
6942 ret = add_cache_extent(seen, bytenr, size);
6945 add_cache_extent(pending, bytenr, size);
6949 static int pick_next_pending(struct cache_tree *pending,
6950 struct cache_tree *reada,
6951 struct cache_tree *nodes,
6952 u64 last, struct block_info *bits, int bits_nr,
6955 unsigned long node_start = last;
6956 struct cache_extent *cache;
6959 cache = search_cache_extent(reada, 0);
6961 bits[0].start = cache->start;
6962 bits[0].size = cache->size;
6967 if (node_start > 32768)
6968 node_start -= 32768;
6970 cache = search_cache_extent(nodes, node_start);
6972 cache = search_cache_extent(nodes, 0);
6975 cache = search_cache_extent(pending, 0);
6980 bits[ret].start = cache->start;
6981 bits[ret].size = cache->size;
6982 cache = next_cache_extent(cache);
6984 } while (cache && ret < bits_nr);
6990 bits[ret].start = cache->start;
6991 bits[ret].size = cache->size;
6992 cache = next_cache_extent(cache);
6994 } while (cache && ret < bits_nr);
6996 if (bits_nr - ret > 8) {
6997 u64 lookup = bits[0].start + bits[0].size;
6998 struct cache_extent *next;
6999 next = search_cache_extent(pending, lookup);
7001 if (next->start - lookup > 32768)
7003 bits[ret].start = next->start;
7004 bits[ret].size = next->size;
7005 lookup = next->start + next->size;
7009 next = next_cache_extent(next);
7017 static void free_chunk_record(struct cache_extent *cache)
7019 struct chunk_record *rec;
7021 rec = container_of(cache, struct chunk_record, cache);
7022 list_del_init(&rec->list);
7023 list_del_init(&rec->dextents);
7027 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7029 cache_tree_free_extents(chunk_cache, free_chunk_record);
7032 static void free_device_record(struct rb_node *node)
7034 struct device_record *rec;
7036 rec = container_of(node, struct device_record, node);
7040 FREE_RB_BASED_TREE(device_cache, free_device_record);
7042 int insert_block_group_record(struct block_group_tree *tree,
7043 struct block_group_record *bg_rec)
7047 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7051 list_add_tail(&bg_rec->list, &tree->block_groups);
7055 static void free_block_group_record(struct cache_extent *cache)
7057 struct block_group_record *rec;
7059 rec = container_of(cache, struct block_group_record, cache);
7060 list_del_init(&rec->list);
7064 void free_block_group_tree(struct block_group_tree *tree)
7066 cache_tree_free_extents(&tree->tree, free_block_group_record);
7069 int insert_device_extent_record(struct device_extent_tree *tree,
7070 struct device_extent_record *de_rec)
7075 * Device extent is a bit different from the other extents, because
7076 * the extents which belong to the different devices may have the
7077 * same start and size, so we need use the special extent cache
7078 * search/insert functions.
7080 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7084 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7085 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7089 static void free_device_extent_record(struct cache_extent *cache)
7091 struct device_extent_record *rec;
7093 rec = container_of(cache, struct device_extent_record, cache);
7094 if (!list_empty(&rec->chunk_list))
7095 list_del_init(&rec->chunk_list);
7096 if (!list_empty(&rec->device_list))
7097 list_del_init(&rec->device_list);
7101 void free_device_extent_tree(struct device_extent_tree *tree)
7103 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7106 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7107 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7108 struct extent_buffer *leaf, int slot)
7110 struct btrfs_extent_ref_v0 *ref0;
7111 struct btrfs_key key;
7114 btrfs_item_key_to_cpu(leaf, &key, slot);
7115 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7116 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7117 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7120 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7121 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7127 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7128 struct btrfs_key *key,
7131 struct btrfs_chunk *ptr;
7132 struct chunk_record *rec;
7135 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7136 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7138 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7140 fprintf(stderr, "memory allocation failed\n");
7144 INIT_LIST_HEAD(&rec->list);
7145 INIT_LIST_HEAD(&rec->dextents);
7148 rec->cache.start = key->offset;
7149 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7151 rec->generation = btrfs_header_generation(leaf);
7153 rec->objectid = key->objectid;
7154 rec->type = key->type;
7155 rec->offset = key->offset;
7157 rec->length = rec->cache.size;
7158 rec->owner = btrfs_chunk_owner(leaf, ptr);
7159 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7160 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7161 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7162 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7163 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7164 rec->num_stripes = num_stripes;
7165 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7167 for (i = 0; i < rec->num_stripes; ++i) {
7168 rec->stripes[i].devid =
7169 btrfs_stripe_devid_nr(leaf, ptr, i);
7170 rec->stripes[i].offset =
7171 btrfs_stripe_offset_nr(leaf, ptr, i);
7172 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7173 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7180 static int process_chunk_item(struct cache_tree *chunk_cache,
7181 struct btrfs_key *key, struct extent_buffer *eb,
7184 struct chunk_record *rec;
7185 struct btrfs_chunk *chunk;
7188 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7190 * Do extra check for this chunk item,
7192 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7193 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7194 * and owner<->key_type check.
7196 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7199 error("chunk(%llu, %llu) is not valid, ignore it",
7200 key->offset, btrfs_chunk_length(eb, chunk));
7203 rec = btrfs_new_chunk_record(eb, key, slot);
7204 ret = insert_cache_extent(chunk_cache, &rec->cache);
7206 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7207 rec->offset, rec->length);
7214 static int process_device_item(struct rb_root *dev_cache,
7215 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7217 struct btrfs_dev_item *ptr;
7218 struct device_record *rec;
7221 ptr = btrfs_item_ptr(eb,
7222 slot, struct btrfs_dev_item);
7224 rec = malloc(sizeof(*rec));
7226 fprintf(stderr, "memory allocation failed\n");
7230 rec->devid = key->offset;
7231 rec->generation = btrfs_header_generation(eb);
7233 rec->objectid = key->objectid;
7234 rec->type = key->type;
7235 rec->offset = key->offset;
7237 rec->devid = btrfs_device_id(eb, ptr);
7238 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7239 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7241 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7243 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7250 struct block_group_record *
7251 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7254 struct btrfs_block_group_item *ptr;
7255 struct block_group_record *rec;
7257 rec = calloc(1, sizeof(*rec));
7259 fprintf(stderr, "memory allocation failed\n");
7263 rec->cache.start = key->objectid;
7264 rec->cache.size = key->offset;
7266 rec->generation = btrfs_header_generation(leaf);
7268 rec->objectid = key->objectid;
7269 rec->type = key->type;
7270 rec->offset = key->offset;
7272 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7273 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7275 INIT_LIST_HEAD(&rec->list);
7280 static int process_block_group_item(struct block_group_tree *block_group_cache,
7281 struct btrfs_key *key,
7282 struct extent_buffer *eb, int slot)
7284 struct block_group_record *rec;
7287 rec = btrfs_new_block_group_record(eb, key, slot);
7288 ret = insert_block_group_record(block_group_cache, rec);
7290 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7291 rec->objectid, rec->offset);
7298 struct device_extent_record *
7299 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7300 struct btrfs_key *key, int slot)
7302 struct device_extent_record *rec;
7303 struct btrfs_dev_extent *ptr;
7305 rec = calloc(1, sizeof(*rec));
7307 fprintf(stderr, "memory allocation failed\n");
7311 rec->cache.objectid = key->objectid;
7312 rec->cache.start = key->offset;
7314 rec->generation = btrfs_header_generation(leaf);
7316 rec->objectid = key->objectid;
7317 rec->type = key->type;
7318 rec->offset = key->offset;
7320 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7321 rec->chunk_objecteid =
7322 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7324 btrfs_dev_extent_chunk_offset(leaf, ptr);
7325 rec->length = btrfs_dev_extent_length(leaf, ptr);
7326 rec->cache.size = rec->length;
7328 INIT_LIST_HEAD(&rec->chunk_list);
7329 INIT_LIST_HEAD(&rec->device_list);
7335 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7336 struct btrfs_key *key, struct extent_buffer *eb,
7339 struct device_extent_record *rec;
7342 rec = btrfs_new_device_extent_record(eb, key, slot);
7343 ret = insert_device_extent_record(dev_extent_cache, rec);
7346 "Device extent[%llu, %llu, %llu] existed.\n",
7347 rec->objectid, rec->offset, rec->length);
7354 static int process_extent_item(struct btrfs_root *root,
7355 struct cache_tree *extent_cache,
7356 struct extent_buffer *eb, int slot)
7358 struct btrfs_extent_item *ei;
7359 struct btrfs_extent_inline_ref *iref;
7360 struct btrfs_extent_data_ref *dref;
7361 struct btrfs_shared_data_ref *sref;
7362 struct btrfs_key key;
7363 struct extent_record tmpl;
7368 u32 item_size = btrfs_item_size_nr(eb, slot);
7374 btrfs_item_key_to_cpu(eb, &key, slot);
7376 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7378 num_bytes = root->fs_info->nodesize;
7380 num_bytes = key.offset;
7383 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7384 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7385 key.objectid, root->fs_info->sectorsize);
7388 if (item_size < sizeof(*ei)) {
7389 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7390 struct btrfs_extent_item_v0 *ei0;
7391 BUG_ON(item_size != sizeof(*ei0));
7392 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7393 refs = btrfs_extent_refs_v0(eb, ei0);
7397 memset(&tmpl, 0, sizeof(tmpl));
7398 tmpl.start = key.objectid;
7399 tmpl.nr = num_bytes;
7400 tmpl.extent_item_refs = refs;
7401 tmpl.metadata = metadata;
7403 tmpl.max_size = num_bytes;
7405 return add_extent_rec(extent_cache, &tmpl);
7408 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7409 refs = btrfs_extent_refs(eb, ei);
7410 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7414 if (metadata && num_bytes != root->fs_info->nodesize) {
7415 error("ignore invalid metadata extent, length %llu does not equal to %u",
7416 num_bytes, root->fs_info->nodesize);
7419 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7420 error("ignore invalid data extent, length %llu is not aligned to %u",
7421 num_bytes, root->fs_info->sectorsize);
7425 memset(&tmpl, 0, sizeof(tmpl));
7426 tmpl.start = key.objectid;
7427 tmpl.nr = num_bytes;
7428 tmpl.extent_item_refs = refs;
7429 tmpl.metadata = metadata;
7431 tmpl.max_size = num_bytes;
7432 add_extent_rec(extent_cache, &tmpl);
7434 ptr = (unsigned long)(ei + 1);
7435 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7436 key.type == BTRFS_EXTENT_ITEM_KEY)
7437 ptr += sizeof(struct btrfs_tree_block_info);
7439 end = (unsigned long)ei + item_size;
7441 iref = (struct btrfs_extent_inline_ref *)ptr;
7442 type = btrfs_extent_inline_ref_type(eb, iref);
7443 offset = btrfs_extent_inline_ref_offset(eb, iref);
7445 case BTRFS_TREE_BLOCK_REF_KEY:
7446 ret = add_tree_backref(extent_cache, key.objectid,
7450 "add_tree_backref failed (extent items tree block): %s",
7453 case BTRFS_SHARED_BLOCK_REF_KEY:
7454 ret = add_tree_backref(extent_cache, key.objectid,
7458 "add_tree_backref failed (extent items shared block): %s",
7461 case BTRFS_EXTENT_DATA_REF_KEY:
7462 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7463 add_data_backref(extent_cache, key.objectid, 0,
7464 btrfs_extent_data_ref_root(eb, dref),
7465 btrfs_extent_data_ref_objectid(eb,
7467 btrfs_extent_data_ref_offset(eb, dref),
7468 btrfs_extent_data_ref_count(eb, dref),
7471 case BTRFS_SHARED_DATA_REF_KEY:
7472 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7473 add_data_backref(extent_cache, key.objectid, offset,
7475 btrfs_shared_data_ref_count(eb, sref),
7479 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7480 key.objectid, key.type, num_bytes);
7483 ptr += btrfs_extent_inline_ref_size(type);
7490 static int check_cache_range(struct btrfs_root *root,
7491 struct btrfs_block_group_cache *cache,
7492 u64 offset, u64 bytes)
7494 struct btrfs_free_space *entry;
7500 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7501 bytenr = btrfs_sb_offset(i);
7502 ret = btrfs_rmap_block(root->fs_info,
7503 cache->key.objectid, bytenr, 0,
7504 &logical, &nr, &stripe_len);
7509 if (logical[nr] + stripe_len <= offset)
7511 if (offset + bytes <= logical[nr])
7513 if (logical[nr] == offset) {
7514 if (stripe_len >= bytes) {
7518 bytes -= stripe_len;
7519 offset += stripe_len;
7520 } else if (logical[nr] < offset) {
7521 if (logical[nr] + stripe_len >=
7526 bytes = (offset + bytes) -
7527 (logical[nr] + stripe_len);
7528 offset = logical[nr] + stripe_len;
7531 * Could be tricky, the super may land in the
7532 * middle of the area we're checking. First
7533 * check the easiest case, it's at the end.
7535 if (logical[nr] + stripe_len >=
7537 bytes = logical[nr] - offset;
7541 /* Check the left side */
7542 ret = check_cache_range(root, cache,
7544 logical[nr] - offset);
7550 /* Now we continue with the right side */
7551 bytes = (offset + bytes) -
7552 (logical[nr] + stripe_len);
7553 offset = logical[nr] + stripe_len;
7560 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7562 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7563 offset, offset+bytes);
7567 if (entry->offset != offset) {
7568 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7573 if (entry->bytes != bytes) {
7574 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7575 bytes, entry->bytes, offset);
7579 unlink_free_space(cache->free_space_ctl, entry);
7584 static int verify_space_cache(struct btrfs_root *root,
7585 struct btrfs_block_group_cache *cache)
7587 struct btrfs_path path;
7588 struct extent_buffer *leaf;
7589 struct btrfs_key key;
7593 root = root->fs_info->extent_root;
7595 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7597 btrfs_init_path(&path);
7598 key.objectid = last;
7600 key.type = BTRFS_EXTENT_ITEM_KEY;
7601 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7606 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7607 ret = btrfs_next_leaf(root, &path);
7615 leaf = path.nodes[0];
7616 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7617 if (key.objectid >= cache->key.offset + cache->key.objectid)
7619 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7620 key.type != BTRFS_METADATA_ITEM_KEY) {
7625 if (last == key.objectid) {
7626 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7627 last = key.objectid + key.offset;
7629 last = key.objectid + root->fs_info->nodesize;
7634 ret = check_cache_range(root, cache, last,
7635 key.objectid - last);
7638 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7639 last = key.objectid + key.offset;
7641 last = key.objectid + root->fs_info->nodesize;
7645 if (last < cache->key.objectid + cache->key.offset)
7646 ret = check_cache_range(root, cache, last,
7647 cache->key.objectid +
7648 cache->key.offset - last);
7651 btrfs_release_path(&path);
7654 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7655 fprintf(stderr, "There are still entries left in the space "
7663 static int check_space_cache(struct btrfs_root *root)
7665 struct btrfs_block_group_cache *cache;
7666 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7670 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7671 btrfs_super_generation(root->fs_info->super_copy) !=
7672 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7673 printf("cache and super generation don't match, space cache "
7674 "will be invalidated\n");
7678 if (ctx.progress_enabled) {
7679 ctx.tp = TASK_FREE_SPACE;
7680 task_start(ctx.info);
7684 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7688 start = cache->key.objectid + cache->key.offset;
7689 if (!cache->free_space_ctl) {
7690 if (btrfs_init_free_space_ctl(cache,
7691 root->fs_info->sectorsize)) {
7696 btrfs_remove_free_space_cache(cache);
7699 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7700 ret = exclude_super_stripes(root, cache);
7702 fprintf(stderr, "could not exclude super stripes: %s\n",
7707 ret = load_free_space_tree(root->fs_info, cache);
7708 free_excluded_extents(root, cache);
7710 fprintf(stderr, "could not load free space tree: %s\n",
7717 ret = load_free_space_cache(root->fs_info, cache);
7722 ret = verify_space_cache(root, cache);
7724 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7725 cache->key.objectid);
7730 task_stop(ctx.info);
7732 return error ? -EINVAL : 0;
7735 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7736 u64 num_bytes, unsigned long leaf_offset,
7737 struct extent_buffer *eb) {
7739 struct btrfs_fs_info *fs_info = root->fs_info;
7741 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7743 unsigned long csum_offset;
7747 u64 data_checked = 0;
7753 if (num_bytes % fs_info->sectorsize)
7756 data = malloc(num_bytes);
7760 while (offset < num_bytes) {
7763 read_len = num_bytes - offset;
7764 /* read as much space once a time */
7765 ret = read_extent_data(fs_info, data + offset,
7766 bytenr + offset, &read_len, mirror);
7770 /* verify every 4k data's checksum */
7771 while (data_checked < read_len) {
7773 tmp = offset + data_checked;
7775 csum = btrfs_csum_data((char *)data + tmp,
7776 csum, fs_info->sectorsize);
7777 btrfs_csum_final(csum, (u8 *)&csum);
7779 csum_offset = leaf_offset +
7780 tmp / fs_info->sectorsize * csum_size;
7781 read_extent_buffer(eb, (char *)&csum_expected,
7782 csum_offset, csum_size);
7783 /* try another mirror */
7784 if (csum != csum_expected) {
7785 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7786 mirror, bytenr + tmp,
7787 csum, csum_expected);
7788 num_copies = btrfs_num_copies(root->fs_info,
7790 if (mirror < num_copies - 1) {
7795 data_checked += fs_info->sectorsize;
7804 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7807 struct btrfs_path path;
7808 struct extent_buffer *leaf;
7809 struct btrfs_key key;
7812 btrfs_init_path(&path);
7813 key.objectid = bytenr;
7814 key.type = BTRFS_EXTENT_ITEM_KEY;
7815 key.offset = (u64)-1;
7818 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7821 fprintf(stderr, "Error looking up extent record %d\n", ret);
7822 btrfs_release_path(&path);
7825 if (path.slots[0] > 0) {
7828 ret = btrfs_prev_leaf(root, &path);
7831 } else if (ret > 0) {
7838 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7841 * Block group items come before extent items if they have the same
7842 * bytenr, so walk back one more just in case. Dear future traveller,
7843 * first congrats on mastering time travel. Now if it's not too much
7844 * trouble could you go back to 2006 and tell Chris to make the
7845 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7846 * EXTENT_ITEM_KEY please?
7848 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7849 if (path.slots[0] > 0) {
7852 ret = btrfs_prev_leaf(root, &path);
7855 } else if (ret > 0) {
7860 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7864 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7865 ret = btrfs_next_leaf(root, &path);
7867 fprintf(stderr, "Error going to next leaf "
7869 btrfs_release_path(&path);
7875 leaf = path.nodes[0];
7876 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7877 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7881 if (key.objectid + key.offset < bytenr) {
7885 if (key.objectid > bytenr + num_bytes)
7888 if (key.objectid == bytenr) {
7889 if (key.offset >= num_bytes) {
7893 num_bytes -= key.offset;
7894 bytenr += key.offset;
7895 } else if (key.objectid < bytenr) {
7896 if (key.objectid + key.offset >= bytenr + num_bytes) {
7900 num_bytes = (bytenr + num_bytes) -
7901 (key.objectid + key.offset);
7902 bytenr = key.objectid + key.offset;
7904 if (key.objectid + key.offset < bytenr + num_bytes) {
7905 u64 new_start = key.objectid + key.offset;
7906 u64 new_bytes = bytenr + num_bytes - new_start;
7909 * Weird case, the extent is in the middle of
7910 * our range, we'll have to search one side
7911 * and then the other. Not sure if this happens
7912 * in real life, but no harm in coding it up
7913 * anyway just in case.
7915 btrfs_release_path(&path);
7916 ret = check_extent_exists(root, new_start,
7919 fprintf(stderr, "Right section didn't "
7923 num_bytes = key.objectid - bytenr;
7926 num_bytes = key.objectid - bytenr;
7933 if (num_bytes && !ret) {
7934 fprintf(stderr, "There are no extents for csum range "
7935 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7939 btrfs_release_path(&path);
7943 static int check_csums(struct btrfs_root *root)
7945 struct btrfs_path path;
7946 struct extent_buffer *leaf;
7947 struct btrfs_key key;
7948 u64 offset = 0, num_bytes = 0;
7949 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7953 unsigned long leaf_offset;
7955 root = root->fs_info->csum_root;
7956 if (!extent_buffer_uptodate(root->node)) {
7957 fprintf(stderr, "No valid csum tree found\n");
7961 btrfs_init_path(&path);
7962 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7963 key.type = BTRFS_EXTENT_CSUM_KEY;
7965 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7967 fprintf(stderr, "Error searching csum tree %d\n", ret);
7968 btrfs_release_path(&path);
7972 if (ret > 0 && path.slots[0])
7977 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7978 ret = btrfs_next_leaf(root, &path);
7980 fprintf(stderr, "Error going to next leaf "
7987 leaf = path.nodes[0];
7989 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7990 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7995 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7996 csum_size) * root->fs_info->sectorsize;
7997 if (!check_data_csum)
7998 goto skip_csum_check;
7999 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8000 ret = check_extent_csums(root, key.offset, data_len,
8006 offset = key.offset;
8007 } else if (key.offset != offset + num_bytes) {
8008 ret = check_extent_exists(root, offset, num_bytes);
8010 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8011 "there is no extent record\n",
8012 offset, offset+num_bytes);
8015 offset = key.offset;
8018 num_bytes += data_len;
8022 btrfs_release_path(&path);
8026 static int is_dropped_key(struct btrfs_key *key,
8027 struct btrfs_key *drop_key) {
8028 if (key->objectid < drop_key->objectid)
8030 else if (key->objectid == drop_key->objectid) {
8031 if (key->type < drop_key->type)
8033 else if (key->type == drop_key->type) {
8034 if (key->offset < drop_key->offset)
8042 * Here are the rules for FULL_BACKREF.
8044 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8045 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8047 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8048 * if it happened after the relocation occurred since we'll have dropped the
8049 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8050 * have no real way to know for sure.
8052 * We process the blocks one root at a time, and we start from the lowest root
8053 * objectid and go to the highest. So we can just lookup the owner backref for
8054 * the record and if we don't find it then we know it doesn't exist and we have
8057 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8058 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8059 * be set or not and then we can check later once we've gathered all the refs.
8061 static int calc_extent_flag(struct cache_tree *extent_cache,
8062 struct extent_buffer *buf,
8063 struct root_item_record *ri,
8066 struct extent_record *rec;
8067 struct cache_extent *cache;
8068 struct tree_backref *tback;
8071 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8072 /* we have added this extent before */
8076 rec = container_of(cache, struct extent_record, cache);
8079 * Except file/reloc tree, we can not have
8082 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8087 if (buf->start == ri->bytenr)
8090 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8093 owner = btrfs_header_owner(buf);
8094 if (owner == ri->objectid)
8097 tback = find_tree_backref(rec, 0, owner);
8102 if (rec->flag_block_full_backref != FLAG_UNSET &&
8103 rec->flag_block_full_backref != 0)
8104 rec->bad_full_backref = 1;
8107 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8108 if (rec->flag_block_full_backref != FLAG_UNSET &&
8109 rec->flag_block_full_backref != 1)
8110 rec->bad_full_backref = 1;
8114 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8116 fprintf(stderr, "Invalid key type(");
8117 print_key_type(stderr, 0, key_type);
8118 fprintf(stderr, ") found in root(");
8119 print_objectid(stderr, rootid, 0);
8120 fprintf(stderr, ")\n");
8124 * Check if the key is valid with its extent buffer.
8126 * This is a early check in case invalid key exists in a extent buffer
8127 * This is not comprehensive yet, but should prevent wrong key/item passed
8130 static int check_type_with_root(u64 rootid, u8 key_type)
8133 /* Only valid in chunk tree */
8134 case BTRFS_DEV_ITEM_KEY:
8135 case BTRFS_CHUNK_ITEM_KEY:
8136 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8139 /* valid in csum and log tree */
8140 case BTRFS_CSUM_TREE_OBJECTID:
8141 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8145 case BTRFS_EXTENT_ITEM_KEY:
8146 case BTRFS_METADATA_ITEM_KEY:
8147 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8148 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8151 case BTRFS_ROOT_ITEM_KEY:
8152 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8155 case BTRFS_DEV_EXTENT_KEY:
8156 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8162 report_mismatch_key_root(key_type, rootid);
8166 static int run_next_block(struct btrfs_root *root,
8167 struct block_info *bits,
8170 struct cache_tree *pending,
8171 struct cache_tree *seen,
8172 struct cache_tree *reada,
8173 struct cache_tree *nodes,
8174 struct cache_tree *extent_cache,
8175 struct cache_tree *chunk_cache,
8176 struct rb_root *dev_cache,
8177 struct block_group_tree *block_group_cache,
8178 struct device_extent_tree *dev_extent_cache,
8179 struct root_item_record *ri)
8181 struct btrfs_fs_info *fs_info = root->fs_info;
8182 struct extent_buffer *buf;
8183 struct extent_record *rec = NULL;
8194 struct btrfs_key key;
8195 struct cache_extent *cache;
8198 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8199 bits_nr, &reada_bits);
8204 for(i = 0; i < nritems; i++) {
8205 ret = add_cache_extent(reada, bits[i].start,
8210 /* fixme, get the parent transid */
8211 readahead_tree_block(fs_info, bits[i].start, 0);
8214 *last = bits[0].start;
8215 bytenr = bits[0].start;
8216 size = bits[0].size;
8218 cache = lookup_cache_extent(pending, bytenr, size);
8220 remove_cache_extent(pending, cache);
8223 cache = lookup_cache_extent(reada, bytenr, size);
8225 remove_cache_extent(reada, cache);
8228 cache = lookup_cache_extent(nodes, bytenr, size);
8230 remove_cache_extent(nodes, cache);
8233 cache = lookup_cache_extent(extent_cache, bytenr, size);
8235 rec = container_of(cache, struct extent_record, cache);
8236 gen = rec->parent_generation;
8239 /* fixme, get the real parent transid */
8240 buf = read_tree_block(root->fs_info, bytenr, gen);
8241 if (!extent_buffer_uptodate(buf)) {
8242 record_bad_block_io(root->fs_info,
8243 extent_cache, bytenr, size);
8247 nritems = btrfs_header_nritems(buf);
8250 if (!init_extent_tree) {
8251 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8252 btrfs_header_level(buf), 1, NULL,
8255 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8257 fprintf(stderr, "Couldn't calc extent flags\n");
8258 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8263 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8265 fprintf(stderr, "Couldn't calc extent flags\n");
8266 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8270 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8272 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8273 ri->objectid == btrfs_header_owner(buf)) {
8275 * Ok we got to this block from it's original owner and
8276 * we have FULL_BACKREF set. Relocation can leave
8277 * converted blocks over so this is altogether possible,
8278 * however it's not possible if the generation > the
8279 * last snapshot, so check for this case.
8281 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8282 btrfs_header_generation(buf) > ri->last_snapshot) {
8283 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8284 rec->bad_full_backref = 1;
8289 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8290 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8291 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8292 rec->bad_full_backref = 1;
8296 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8297 rec->flag_block_full_backref = 1;
8301 rec->flag_block_full_backref = 0;
8303 owner = btrfs_header_owner(buf);
8306 ret = check_block(root, extent_cache, buf, flags);
8310 if (btrfs_is_leaf(buf)) {
8311 btree_space_waste += btrfs_leaf_free_space(root, buf);
8312 for (i = 0; i < nritems; i++) {
8313 struct btrfs_file_extent_item *fi;
8314 btrfs_item_key_to_cpu(buf, &key, i);
8316 * Check key type against the leaf owner.
8317 * Could filter quite a lot of early error if
8320 if (check_type_with_root(btrfs_header_owner(buf),
8322 fprintf(stderr, "ignoring invalid key\n");
8325 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8326 process_extent_item(root, extent_cache, buf,
8330 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8331 process_extent_item(root, extent_cache, buf,
8335 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8337 btrfs_item_size_nr(buf, i);
8340 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8341 process_chunk_item(chunk_cache, &key, buf, i);
8344 if (key.type == BTRFS_DEV_ITEM_KEY) {
8345 process_device_item(dev_cache, &key, buf, i);
8348 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8349 process_block_group_item(block_group_cache,
8353 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8354 process_device_extent_item(dev_extent_cache,
8359 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8360 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8361 process_extent_ref_v0(extent_cache, buf, i);
8368 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8369 ret = add_tree_backref(extent_cache,
8370 key.objectid, 0, key.offset, 0);
8373 "add_tree_backref failed (leaf tree block): %s",
8377 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8378 ret = add_tree_backref(extent_cache,
8379 key.objectid, key.offset, 0, 0);
8382 "add_tree_backref failed (leaf shared block): %s",
8386 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8387 struct btrfs_extent_data_ref *ref;
8388 ref = btrfs_item_ptr(buf, i,
8389 struct btrfs_extent_data_ref);
8390 add_data_backref(extent_cache,
8392 btrfs_extent_data_ref_root(buf, ref),
8393 btrfs_extent_data_ref_objectid(buf,
8395 btrfs_extent_data_ref_offset(buf, ref),
8396 btrfs_extent_data_ref_count(buf, ref),
8397 0, root->fs_info->sectorsize);
8400 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8401 struct btrfs_shared_data_ref *ref;
8402 ref = btrfs_item_ptr(buf, i,
8403 struct btrfs_shared_data_ref);
8404 add_data_backref(extent_cache,
8405 key.objectid, key.offset, 0, 0, 0,
8406 btrfs_shared_data_ref_count(buf, ref),
8407 0, root->fs_info->sectorsize);
8410 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8411 struct bad_item *bad;
8413 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8417 bad = malloc(sizeof(struct bad_item));
8420 INIT_LIST_HEAD(&bad->list);
8421 memcpy(&bad->key, &key,
8422 sizeof(struct btrfs_key));
8423 bad->root_id = owner;
8424 list_add_tail(&bad->list, &delete_items);
8427 if (key.type != BTRFS_EXTENT_DATA_KEY)
8429 fi = btrfs_item_ptr(buf, i,
8430 struct btrfs_file_extent_item);
8431 if (btrfs_file_extent_type(buf, fi) ==
8432 BTRFS_FILE_EXTENT_INLINE)
8434 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8437 data_bytes_allocated +=
8438 btrfs_file_extent_disk_num_bytes(buf, fi);
8439 if (data_bytes_allocated < root->fs_info->sectorsize) {
8442 data_bytes_referenced +=
8443 btrfs_file_extent_num_bytes(buf, fi);
8444 add_data_backref(extent_cache,
8445 btrfs_file_extent_disk_bytenr(buf, fi),
8446 parent, owner, key.objectid, key.offset -
8447 btrfs_file_extent_offset(buf, fi), 1, 1,
8448 btrfs_file_extent_disk_num_bytes(buf, fi));
8452 struct btrfs_key first_key;
8454 first_key.objectid = 0;
8457 btrfs_item_key_to_cpu(buf, &first_key, 0);
8458 level = btrfs_header_level(buf);
8459 for (i = 0; i < nritems; i++) {
8460 struct extent_record tmpl;
8462 ptr = btrfs_node_blockptr(buf, i);
8463 size = root->fs_info->nodesize;
8464 btrfs_node_key_to_cpu(buf, &key, i);
8466 if ((level == ri->drop_level)
8467 && is_dropped_key(&key, &ri->drop_key)) {
8472 memset(&tmpl, 0, sizeof(tmpl));
8473 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8474 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8479 tmpl.max_size = size;
8480 ret = add_extent_rec(extent_cache, &tmpl);
8484 ret = add_tree_backref(extent_cache, ptr, parent,
8488 "add_tree_backref failed (non-leaf block): %s",
8494 add_pending(nodes, seen, ptr, size);
8496 add_pending(pending, seen, ptr, size);
8499 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8500 nritems) * sizeof(struct btrfs_key_ptr);
8502 total_btree_bytes += buf->len;
8503 if (fs_root_objectid(btrfs_header_owner(buf)))
8504 total_fs_tree_bytes += buf->len;
8505 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8506 total_extent_tree_bytes += buf->len;
8508 free_extent_buffer(buf);
8512 static int add_root_to_pending(struct extent_buffer *buf,
8513 struct cache_tree *extent_cache,
8514 struct cache_tree *pending,
8515 struct cache_tree *seen,
8516 struct cache_tree *nodes,
8519 struct extent_record tmpl;
8522 if (btrfs_header_level(buf) > 0)
8523 add_pending(nodes, seen, buf->start, buf->len);
8525 add_pending(pending, seen, buf->start, buf->len);
8527 memset(&tmpl, 0, sizeof(tmpl));
8528 tmpl.start = buf->start;
8533 tmpl.max_size = buf->len;
8534 add_extent_rec(extent_cache, &tmpl);
8536 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8537 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8538 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8541 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8546 /* as we fix the tree, we might be deleting blocks that
8547 * we're tracking for repair. This hook makes sure we
8548 * remove any backrefs for blocks as we are fixing them.
8550 static int free_extent_hook(struct btrfs_trans_handle *trans,
8551 struct btrfs_root *root,
8552 u64 bytenr, u64 num_bytes, u64 parent,
8553 u64 root_objectid, u64 owner, u64 offset,
8556 struct extent_record *rec;
8557 struct cache_extent *cache;
8559 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8561 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8562 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8566 rec = container_of(cache, struct extent_record, cache);
8568 struct data_backref *back;
8569 back = find_data_backref(rec, parent, root_objectid, owner,
8570 offset, 1, bytenr, num_bytes);
8573 if (back->node.found_ref) {
8574 back->found_ref -= refs_to_drop;
8576 rec->refs -= refs_to_drop;
8578 if (back->node.found_extent_tree) {
8579 back->num_refs -= refs_to_drop;
8580 if (rec->extent_item_refs)
8581 rec->extent_item_refs -= refs_to_drop;
8583 if (back->found_ref == 0)
8584 back->node.found_ref = 0;
8585 if (back->num_refs == 0)
8586 back->node.found_extent_tree = 0;
8588 if (!back->node.found_extent_tree && back->node.found_ref) {
8589 rb_erase(&back->node.node, &rec->backref_tree);
8593 struct tree_backref *back;
8594 back = find_tree_backref(rec, parent, root_objectid);
8597 if (back->node.found_ref) {
8600 back->node.found_ref = 0;
8602 if (back->node.found_extent_tree) {
8603 if (rec->extent_item_refs)
8604 rec->extent_item_refs--;
8605 back->node.found_extent_tree = 0;
8607 if (!back->node.found_extent_tree && back->node.found_ref) {
8608 rb_erase(&back->node.node, &rec->backref_tree);
8612 maybe_free_extent_rec(extent_cache, rec);
8617 static int delete_extent_records(struct btrfs_trans_handle *trans,
8618 struct btrfs_root *root,
8619 struct btrfs_path *path,
8622 struct btrfs_key key;
8623 struct btrfs_key found_key;
8624 struct extent_buffer *leaf;
8629 key.objectid = bytenr;
8631 key.offset = (u64)-1;
8634 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8641 if (path->slots[0] == 0)
8647 leaf = path->nodes[0];
8648 slot = path->slots[0];
8650 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8651 if (found_key.objectid != bytenr)
8654 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8655 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8656 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8657 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8658 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8659 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8660 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8661 btrfs_release_path(path);
8662 if (found_key.type == 0) {
8663 if (found_key.offset == 0)
8665 key.offset = found_key.offset - 1;
8666 key.type = found_key.type;
8668 key.type = found_key.type - 1;
8669 key.offset = (u64)-1;
8673 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8674 found_key.objectid, found_key.type, found_key.offset);
8676 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8679 btrfs_release_path(path);
8681 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8682 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8683 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8684 found_key.offset : root->fs_info->nodesize;
8686 ret = btrfs_update_block_group(trans, root, bytenr,
8693 btrfs_release_path(path);
8698 * for a single backref, this will allocate a new extent
8699 * and add the backref to it.
8701 static int record_extent(struct btrfs_trans_handle *trans,
8702 struct btrfs_fs_info *info,
8703 struct btrfs_path *path,
8704 struct extent_record *rec,
8705 struct extent_backref *back,
8706 int allocated, u64 flags)
8709 struct btrfs_root *extent_root = info->extent_root;
8710 struct extent_buffer *leaf;
8711 struct btrfs_key ins_key;
8712 struct btrfs_extent_item *ei;
8713 struct data_backref *dback;
8714 struct btrfs_tree_block_info *bi;
8717 rec->max_size = max_t(u64, rec->max_size,
8721 u32 item_size = sizeof(*ei);
8724 item_size += sizeof(*bi);
8726 ins_key.objectid = rec->start;
8727 ins_key.offset = rec->max_size;
8728 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8730 ret = btrfs_insert_empty_item(trans, extent_root, path,
8731 &ins_key, item_size);
8735 leaf = path->nodes[0];
8736 ei = btrfs_item_ptr(leaf, path->slots[0],
8737 struct btrfs_extent_item);
8739 btrfs_set_extent_refs(leaf, ei, 0);
8740 btrfs_set_extent_generation(leaf, ei, rec->generation);
8742 if (back->is_data) {
8743 btrfs_set_extent_flags(leaf, ei,
8744 BTRFS_EXTENT_FLAG_DATA);
8746 struct btrfs_disk_key copy_key;;
8748 bi = (struct btrfs_tree_block_info *)(ei + 1);
8749 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8752 btrfs_set_disk_key_objectid(©_key,
8753 rec->info_objectid);
8754 btrfs_set_disk_key_type(©_key, 0);
8755 btrfs_set_disk_key_offset(©_key, 0);
8757 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8758 btrfs_set_tree_block_key(leaf, bi, ©_key);
8760 btrfs_set_extent_flags(leaf, ei,
8761 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8764 btrfs_mark_buffer_dirty(leaf);
8765 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8766 rec->max_size, 1, 0);
8769 btrfs_release_path(path);
8772 if (back->is_data) {
8776 dback = to_data_backref(back);
8777 if (back->full_backref)
8778 parent = dback->parent;
8782 for (i = 0; i < dback->found_ref; i++) {
8783 /* if parent != 0, we're doing a full backref
8784 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8785 * just makes the backref allocator create a data
8788 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8789 rec->start, rec->max_size,
8793 BTRFS_FIRST_FREE_OBJECTID :
8799 fprintf(stderr, "adding new data backref"
8800 " on %llu %s %llu owner %llu"
8801 " offset %llu found %d\n",
8802 (unsigned long long)rec->start,
8803 back->full_backref ?
8805 back->full_backref ?
8806 (unsigned long long)parent :
8807 (unsigned long long)dback->root,
8808 (unsigned long long)dback->owner,
8809 (unsigned long long)dback->offset,
8813 struct tree_backref *tback;
8815 tback = to_tree_backref(back);
8816 if (back->full_backref)
8817 parent = tback->parent;
8821 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8822 rec->start, rec->max_size,
8823 parent, tback->root, 0, 0);
8824 fprintf(stderr, "adding new tree backref on "
8825 "start %llu len %llu parent %llu root %llu\n",
8826 rec->start, rec->max_size, parent, tback->root);
8829 btrfs_release_path(path);
8833 static struct extent_entry *find_entry(struct list_head *entries,
8834 u64 bytenr, u64 bytes)
8836 struct extent_entry *entry = NULL;
8838 list_for_each_entry(entry, entries, list) {
8839 if (entry->bytenr == bytenr && entry->bytes == bytes)
8846 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8848 struct extent_entry *entry, *best = NULL, *prev = NULL;
8850 list_for_each_entry(entry, entries, list) {
8852 * If there are as many broken entries as entries then we know
8853 * not to trust this particular entry.
8855 if (entry->broken == entry->count)
8859 * Special case, when there are only two entries and 'best' is
8869 * If our current entry == best then we can't be sure our best
8870 * is really the best, so we need to keep searching.
8872 if (best && best->count == entry->count) {
8878 /* Prev == entry, not good enough, have to keep searching */
8879 if (!prev->broken && prev->count == entry->count)
8883 best = (prev->count > entry->count) ? prev : entry;
8884 else if (best->count < entry->count)
8892 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8893 struct data_backref *dback, struct extent_entry *entry)
8895 struct btrfs_trans_handle *trans;
8896 struct btrfs_root *root;
8897 struct btrfs_file_extent_item *fi;
8898 struct extent_buffer *leaf;
8899 struct btrfs_key key;
8903 key.objectid = dback->root;
8904 key.type = BTRFS_ROOT_ITEM_KEY;
8905 key.offset = (u64)-1;
8906 root = btrfs_read_fs_root(info, &key);
8908 fprintf(stderr, "Couldn't find root for our ref\n");
8913 * The backref points to the original offset of the extent if it was
8914 * split, so we need to search down to the offset we have and then walk
8915 * forward until we find the backref we're looking for.
8917 key.objectid = dback->owner;
8918 key.type = BTRFS_EXTENT_DATA_KEY;
8919 key.offset = dback->offset;
8920 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8922 fprintf(stderr, "Error looking up ref %d\n", ret);
8927 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8928 ret = btrfs_next_leaf(root, path);
8930 fprintf(stderr, "Couldn't find our ref, next\n");
8934 leaf = path->nodes[0];
8935 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8936 if (key.objectid != dback->owner ||
8937 key.type != BTRFS_EXTENT_DATA_KEY) {
8938 fprintf(stderr, "Couldn't find our ref, search\n");
8941 fi = btrfs_item_ptr(leaf, path->slots[0],
8942 struct btrfs_file_extent_item);
8943 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8944 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8946 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8951 btrfs_release_path(path);
8953 trans = btrfs_start_transaction(root, 1);
8955 return PTR_ERR(trans);
8958 * Ok we have the key of the file extent we want to fix, now we can cow
8959 * down to the thing and fix it.
8961 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8963 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8964 key.objectid, key.type, key.offset, ret);
8968 fprintf(stderr, "Well that's odd, we just found this key "
8969 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8974 leaf = path->nodes[0];
8975 fi = btrfs_item_ptr(leaf, path->slots[0],
8976 struct btrfs_file_extent_item);
8978 if (btrfs_file_extent_compression(leaf, fi) &&
8979 dback->disk_bytenr != entry->bytenr) {
8980 fprintf(stderr, "Ref doesn't match the record start and is "
8981 "compressed, please take a btrfs-image of this file "
8982 "system and send it to a btrfs developer so they can "
8983 "complete this functionality for bytenr %Lu\n",
8984 dback->disk_bytenr);
8989 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8990 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8991 } else if (dback->disk_bytenr > entry->bytenr) {
8992 u64 off_diff, offset;
8994 off_diff = dback->disk_bytenr - entry->bytenr;
8995 offset = btrfs_file_extent_offset(leaf, fi);
8996 if (dback->disk_bytenr + offset +
8997 btrfs_file_extent_num_bytes(leaf, fi) >
8998 entry->bytenr + entry->bytes) {
8999 fprintf(stderr, "Ref is past the entry end, please "
9000 "take a btrfs-image of this file system and "
9001 "send it to a btrfs developer, ref %Lu\n",
9002 dback->disk_bytenr);
9007 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9008 btrfs_set_file_extent_offset(leaf, fi, offset);
9009 } else if (dback->disk_bytenr < entry->bytenr) {
9012 offset = btrfs_file_extent_offset(leaf, fi);
9013 if (dback->disk_bytenr + offset < entry->bytenr) {
9014 fprintf(stderr, "Ref is before the entry start, please"
9015 " take a btrfs-image of this file system and "
9016 "send it to a btrfs developer, ref %Lu\n",
9017 dback->disk_bytenr);
9022 offset += dback->disk_bytenr;
9023 offset -= entry->bytenr;
9024 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9025 btrfs_set_file_extent_offset(leaf, fi, offset);
9028 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9031 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9032 * only do this if we aren't using compression, otherwise it's a
9035 if (!btrfs_file_extent_compression(leaf, fi))
9036 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9038 printf("ram bytes may be wrong?\n");
9039 btrfs_mark_buffer_dirty(leaf);
9041 err = btrfs_commit_transaction(trans, root);
9042 btrfs_release_path(path);
9043 return ret ? ret : err;
9046 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9047 struct extent_record *rec)
9049 struct extent_backref *back, *tmp;
9050 struct data_backref *dback;
9051 struct extent_entry *entry, *best = NULL;
9054 int broken_entries = 0;
9059 * Metadata is easy and the backrefs should always agree on bytenr and
9060 * size, if not we've got bigger issues.
9065 rbtree_postorder_for_each_entry_safe(back, tmp,
9066 &rec->backref_tree, node) {
9067 if (back->full_backref || !back->is_data)
9070 dback = to_data_backref(back);
9073 * We only pay attention to backrefs that we found a real
9076 if (dback->found_ref == 0)
9080 * For now we only catch when the bytes don't match, not the
9081 * bytenr. We can easily do this at the same time, but I want
9082 * to have a fs image to test on before we just add repair
9083 * functionality willy-nilly so we know we won't screw up the
9087 entry = find_entry(&entries, dback->disk_bytenr,
9090 entry = malloc(sizeof(struct extent_entry));
9095 memset(entry, 0, sizeof(*entry));
9096 entry->bytenr = dback->disk_bytenr;
9097 entry->bytes = dback->bytes;
9098 list_add_tail(&entry->list, &entries);
9103 * If we only have on entry we may think the entries agree when
9104 * in reality they don't so we have to do some extra checking.
9106 if (dback->disk_bytenr != rec->start ||
9107 dback->bytes != rec->nr || back->broken)
9118 /* Yay all the backrefs agree, carry on good sir */
9119 if (nr_entries <= 1 && !mismatch)
9122 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9123 "%Lu\n", rec->start);
9126 * First we want to see if the backrefs can agree amongst themselves who
9127 * is right, so figure out which one of the entries has the highest
9130 best = find_most_right_entry(&entries);
9133 * Ok so we may have an even split between what the backrefs think, so
9134 * this is where we use the extent ref to see what it thinks.
9137 entry = find_entry(&entries, rec->start, rec->nr);
9138 if (!entry && (!broken_entries || !rec->found_rec)) {
9139 fprintf(stderr, "Backrefs don't agree with each other "
9140 "and extent record doesn't agree with anybody,"
9141 " so we can't fix bytenr %Lu bytes %Lu\n",
9142 rec->start, rec->nr);
9145 } else if (!entry) {
9147 * Ok our backrefs were broken, we'll assume this is the
9148 * correct value and add an entry for this range.
9150 entry = malloc(sizeof(struct extent_entry));
9155 memset(entry, 0, sizeof(*entry));
9156 entry->bytenr = rec->start;
9157 entry->bytes = rec->nr;
9158 list_add_tail(&entry->list, &entries);
9162 best = find_most_right_entry(&entries);
9164 fprintf(stderr, "Backrefs and extent record evenly "
9165 "split on who is right, this is going to "
9166 "require user input to fix bytenr %Lu bytes "
9167 "%Lu\n", rec->start, rec->nr);
9174 * I don't think this can happen currently as we'll abort() if we catch
9175 * this case higher up, but in case somebody removes that we still can't
9176 * deal with it properly here yet, so just bail out of that's the case.
9178 if (best->bytenr != rec->start) {
9179 fprintf(stderr, "Extent start and backref starts don't match, "
9180 "please use btrfs-image on this file system and send "
9181 "it to a btrfs developer so they can make fsck fix "
9182 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9183 rec->start, rec->nr);
9189 * Ok great we all agreed on an extent record, let's go find the real
9190 * references and fix up the ones that don't match.
9192 rbtree_postorder_for_each_entry_safe(back, tmp,
9193 &rec->backref_tree, node) {
9194 if (back->full_backref || !back->is_data)
9197 dback = to_data_backref(back);
9200 * Still ignoring backrefs that don't have a real ref attached
9203 if (dback->found_ref == 0)
9206 if (dback->bytes == best->bytes &&
9207 dback->disk_bytenr == best->bytenr)
9210 ret = repair_ref(info, path, dback, best);
9216 * Ok we messed with the actual refs, which means we need to drop our
9217 * entire cache and go back and rescan. I know this is a huge pain and
9218 * adds a lot of extra work, but it's the only way to be safe. Once all
9219 * the backrefs agree we may not need to do anything to the extent
9224 while (!list_empty(&entries)) {
9225 entry = list_entry(entries.next, struct extent_entry, list);
9226 list_del_init(&entry->list);
9232 static int process_duplicates(struct cache_tree *extent_cache,
9233 struct extent_record *rec)
9235 struct extent_record *good, *tmp;
9236 struct cache_extent *cache;
9240 * If we found a extent record for this extent then return, or if we
9241 * have more than one duplicate we are likely going to need to delete
9244 if (rec->found_rec || rec->num_duplicates > 1)
9247 /* Shouldn't happen but just in case */
9248 BUG_ON(!rec->num_duplicates);
9251 * So this happens if we end up with a backref that doesn't match the
9252 * actual extent entry. So either the backref is bad or the extent
9253 * entry is bad. Either way we want to have the extent_record actually
9254 * reflect what we found in the extent_tree, so we need to take the
9255 * duplicate out and use that as the extent_record since the only way we
9256 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9258 remove_cache_extent(extent_cache, &rec->cache);
9260 good = to_extent_record(rec->dups.next);
9261 list_del_init(&good->list);
9262 INIT_LIST_HEAD(&good->backrefs);
9263 INIT_LIST_HEAD(&good->dups);
9264 good->cache.start = good->start;
9265 good->cache.size = good->nr;
9266 good->content_checked = 0;
9267 good->owner_ref_checked = 0;
9268 good->num_duplicates = 0;
9269 good->refs = rec->refs;
9270 list_splice_init(&rec->backrefs, &good->backrefs);
9272 cache = lookup_cache_extent(extent_cache, good->start,
9276 tmp = container_of(cache, struct extent_record, cache);
9279 * If we find another overlapping extent and it's found_rec is
9280 * set then it's a duplicate and we need to try and delete
9283 if (tmp->found_rec || tmp->num_duplicates > 0) {
9284 if (list_empty(&good->list))
9285 list_add_tail(&good->list,
9286 &duplicate_extents);
9287 good->num_duplicates += tmp->num_duplicates + 1;
9288 list_splice_init(&tmp->dups, &good->dups);
9289 list_del_init(&tmp->list);
9290 list_add_tail(&tmp->list, &good->dups);
9291 remove_cache_extent(extent_cache, &tmp->cache);
9296 * Ok we have another non extent item backed extent rec, so lets
9297 * just add it to this extent and carry on like we did above.
9299 good->refs += tmp->refs;
9300 list_splice_init(&tmp->backrefs, &good->backrefs);
9301 remove_cache_extent(extent_cache, &tmp->cache);
9304 ret = insert_cache_extent(extent_cache, &good->cache);
9307 return good->num_duplicates ? 0 : 1;
9310 static int delete_duplicate_records(struct btrfs_root *root,
9311 struct extent_record *rec)
9313 struct btrfs_trans_handle *trans;
9314 LIST_HEAD(delete_list);
9315 struct btrfs_path path;
9316 struct extent_record *tmp, *good, *n;
9319 struct btrfs_key key;
9321 btrfs_init_path(&path);
9324 /* Find the record that covers all of the duplicates. */
9325 list_for_each_entry(tmp, &rec->dups, list) {
9326 if (good->start < tmp->start)
9328 if (good->nr > tmp->nr)
9331 if (tmp->start + tmp->nr < good->start + good->nr) {
9332 fprintf(stderr, "Ok we have overlapping extents that "
9333 "aren't completely covered by each other, this "
9334 "is going to require more careful thought. "
9335 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9336 tmp->start, tmp->nr, good->start, good->nr);
9343 list_add_tail(&rec->list, &delete_list);
9345 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9348 list_move_tail(&tmp->list, &delete_list);
9351 root = root->fs_info->extent_root;
9352 trans = btrfs_start_transaction(root, 1);
9353 if (IS_ERR(trans)) {
9354 ret = PTR_ERR(trans);
9358 list_for_each_entry(tmp, &delete_list, list) {
9359 if (tmp->found_rec == 0)
9361 key.objectid = tmp->start;
9362 key.type = BTRFS_EXTENT_ITEM_KEY;
9363 key.offset = tmp->nr;
9365 /* Shouldn't happen but just in case */
9366 if (tmp->metadata) {
9367 fprintf(stderr, "Well this shouldn't happen, extent "
9368 "record overlaps but is metadata? "
9369 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9373 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9379 ret = btrfs_del_item(trans, root, &path);
9382 btrfs_release_path(&path);
9385 err = btrfs_commit_transaction(trans, root);
9389 while (!list_empty(&delete_list)) {
9390 tmp = to_extent_record(delete_list.next);
9391 list_del_init(&tmp->list);
9397 while (!list_empty(&rec->dups)) {
9398 tmp = to_extent_record(rec->dups.next);
9399 list_del_init(&tmp->list);
9403 btrfs_release_path(&path);
9405 if (!ret && !nr_del)
9406 rec->num_duplicates = 0;
9408 return ret ? ret : nr_del;
9411 static int find_possible_backrefs(struct btrfs_fs_info *info,
9412 struct btrfs_path *path,
9413 struct cache_tree *extent_cache,
9414 struct extent_record *rec)
9416 struct btrfs_root *root;
9417 struct extent_backref *back, *tmp;
9418 struct data_backref *dback;
9419 struct cache_extent *cache;
9420 struct btrfs_file_extent_item *fi;
9421 struct btrfs_key key;
9425 rbtree_postorder_for_each_entry_safe(back, tmp,
9426 &rec->backref_tree, node) {
9427 /* Don't care about full backrefs (poor unloved backrefs) */
9428 if (back->full_backref || !back->is_data)
9431 dback = to_data_backref(back);
9433 /* We found this one, we don't need to do a lookup */
9434 if (dback->found_ref)
9437 key.objectid = dback->root;
9438 key.type = BTRFS_ROOT_ITEM_KEY;
9439 key.offset = (u64)-1;
9441 root = btrfs_read_fs_root(info, &key);
9443 /* No root, definitely a bad ref, skip */
9444 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9446 /* Other err, exit */
9448 return PTR_ERR(root);
9450 key.objectid = dback->owner;
9451 key.type = BTRFS_EXTENT_DATA_KEY;
9452 key.offset = dback->offset;
9453 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9455 btrfs_release_path(path);
9458 /* Didn't find it, we can carry on */
9463 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9464 struct btrfs_file_extent_item);
9465 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9466 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9467 btrfs_release_path(path);
9468 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9470 struct extent_record *tmp;
9471 tmp = container_of(cache, struct extent_record, cache);
9474 * If we found an extent record for the bytenr for this
9475 * particular backref then we can't add it to our
9476 * current extent record. We only want to add backrefs
9477 * that don't have a corresponding extent item in the
9478 * extent tree since they likely belong to this record
9479 * and we need to fix it if it doesn't match bytenrs.
9485 dback->found_ref += 1;
9486 dback->disk_bytenr = bytenr;
9487 dback->bytes = bytes;
9490 * Set this so the verify backref code knows not to trust the
9491 * values in this backref.
9500 * Record orphan data ref into corresponding root.
9502 * Return 0 if the extent item contains data ref and recorded.
9503 * Return 1 if the extent item contains no useful data ref
9504 * On that case, it may contains only shared_dataref or metadata backref
9505 * or the file extent exists(this should be handled by the extent bytenr
9507 * Return <0 if something goes wrong.
9509 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9510 struct extent_record *rec)
9512 struct btrfs_key key;
9513 struct btrfs_root *dest_root;
9514 struct extent_backref *back, *tmp;
9515 struct data_backref *dback;
9516 struct orphan_data_extent *orphan;
9517 struct btrfs_path path;
9518 int recorded_data_ref = 0;
9523 btrfs_init_path(&path);
9524 rbtree_postorder_for_each_entry_safe(back, tmp,
9525 &rec->backref_tree, node) {
9526 if (back->full_backref || !back->is_data ||
9527 !back->found_extent_tree)
9529 dback = to_data_backref(back);
9530 if (dback->found_ref)
9532 key.objectid = dback->root;
9533 key.type = BTRFS_ROOT_ITEM_KEY;
9534 key.offset = (u64)-1;
9536 dest_root = btrfs_read_fs_root(fs_info, &key);
9538 /* For non-exist root we just skip it */
9539 if (IS_ERR(dest_root) || !dest_root)
9542 key.objectid = dback->owner;
9543 key.type = BTRFS_EXTENT_DATA_KEY;
9544 key.offset = dback->offset;
9546 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9547 btrfs_release_path(&path);
9549 * For ret < 0, it's OK since the fs-tree may be corrupted,
9550 * we need to record it for inode/file extent rebuild.
9551 * For ret > 0, we record it only for file extent rebuild.
9552 * For ret == 0, the file extent exists but only bytenr
9553 * mismatch, let the original bytenr fix routine to handle,
9559 orphan = malloc(sizeof(*orphan));
9564 INIT_LIST_HEAD(&orphan->list);
9565 orphan->root = dback->root;
9566 orphan->objectid = dback->owner;
9567 orphan->offset = dback->offset;
9568 orphan->disk_bytenr = rec->cache.start;
9569 orphan->disk_len = rec->cache.size;
9570 list_add(&dest_root->orphan_data_extents, &orphan->list);
9571 recorded_data_ref = 1;
9574 btrfs_release_path(&path);
9576 return !recorded_data_ref;
9582 * when an incorrect extent item is found, this will delete
9583 * all of the existing entries for it and recreate them
9584 * based on what the tree scan found.
9586 static int fixup_extent_refs(struct btrfs_fs_info *info,
9587 struct cache_tree *extent_cache,
9588 struct extent_record *rec)
9590 struct btrfs_trans_handle *trans = NULL;
9592 struct btrfs_path path;
9593 struct cache_extent *cache;
9594 struct extent_backref *back, *tmp;
9598 if (rec->flag_block_full_backref)
9599 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9601 btrfs_init_path(&path);
9602 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9604 * Sometimes the backrefs themselves are so broken they don't
9605 * get attached to any meaningful rec, so first go back and
9606 * check any of our backrefs that we couldn't find and throw
9607 * them into the list if we find the backref so that
9608 * verify_backrefs can figure out what to do.
9610 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9615 /* step one, make sure all of the backrefs agree */
9616 ret = verify_backrefs(info, &path, rec);
9620 trans = btrfs_start_transaction(info->extent_root, 1);
9621 if (IS_ERR(trans)) {
9622 ret = PTR_ERR(trans);
9626 /* step two, delete all the existing records */
9627 ret = delete_extent_records(trans, info->extent_root, &path,
9633 /* was this block corrupt? If so, don't add references to it */
9634 cache = lookup_cache_extent(info->corrupt_blocks,
9635 rec->start, rec->max_size);
9641 /* step three, recreate all the refs we did find */
9642 rbtree_postorder_for_each_entry_safe(back, tmp,
9643 &rec->backref_tree, node) {
9645 * if we didn't find any references, don't create a
9648 if (!back->found_ref)
9651 rec->bad_full_backref = 0;
9652 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9660 int err = btrfs_commit_transaction(trans, info->extent_root);
9666 fprintf(stderr, "Repaired extent references for %llu\n",
9667 (unsigned long long)rec->start);
9669 btrfs_release_path(&path);
9673 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9674 struct extent_record *rec)
9676 struct btrfs_trans_handle *trans;
9677 struct btrfs_root *root = fs_info->extent_root;
9678 struct btrfs_path path;
9679 struct btrfs_extent_item *ei;
9680 struct btrfs_key key;
9684 key.objectid = rec->start;
9685 if (rec->metadata) {
9686 key.type = BTRFS_METADATA_ITEM_KEY;
9687 key.offset = rec->info_level;
9689 key.type = BTRFS_EXTENT_ITEM_KEY;
9690 key.offset = rec->max_size;
9693 trans = btrfs_start_transaction(root, 0);
9695 return PTR_ERR(trans);
9697 btrfs_init_path(&path);
9698 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9700 btrfs_release_path(&path);
9701 btrfs_commit_transaction(trans, root);
9704 fprintf(stderr, "Didn't find extent for %llu\n",
9705 (unsigned long long)rec->start);
9706 btrfs_release_path(&path);
9707 btrfs_commit_transaction(trans, root);
9711 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9712 struct btrfs_extent_item);
9713 flags = btrfs_extent_flags(path.nodes[0], ei);
9714 if (rec->flag_block_full_backref) {
9715 fprintf(stderr, "setting full backref on %llu\n",
9716 (unsigned long long)key.objectid);
9717 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9719 fprintf(stderr, "clearing full backref on %llu\n",
9720 (unsigned long long)key.objectid);
9721 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9723 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9724 btrfs_mark_buffer_dirty(path.nodes[0]);
9725 btrfs_release_path(&path);
9726 ret = btrfs_commit_transaction(trans, root);
9728 fprintf(stderr, "Repaired extent flags for %llu\n",
9729 (unsigned long long)rec->start);
9734 /* right now we only prune from the extent allocation tree */
9735 static int prune_one_block(struct btrfs_trans_handle *trans,
9736 struct btrfs_fs_info *info,
9737 struct btrfs_corrupt_block *corrupt)
9740 struct btrfs_path path;
9741 struct extent_buffer *eb;
9745 int level = corrupt->level + 1;
9747 btrfs_init_path(&path);
9749 /* we want to stop at the parent to our busted block */
9750 path.lowest_level = level;
9752 ret = btrfs_search_slot(trans, info->extent_root,
9753 &corrupt->key, &path, -1, 1);
9758 eb = path.nodes[level];
9765 * hopefully the search gave us the block we want to prune,
9766 * lets try that first
9768 slot = path.slots[level];
9769 found = btrfs_node_blockptr(eb, slot);
9770 if (found == corrupt->cache.start)
9773 nritems = btrfs_header_nritems(eb);
9775 /* the search failed, lets scan this node and hope we find it */
9776 for (slot = 0; slot < nritems; slot++) {
9777 found = btrfs_node_blockptr(eb, slot);
9778 if (found == corrupt->cache.start)
9782 * we couldn't find the bad block. TODO, search all the nodes for pointers
9785 if (eb == info->extent_root->node) {
9790 btrfs_release_path(&path);
9795 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9796 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9799 btrfs_release_path(&path);
9803 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9805 struct btrfs_trans_handle *trans = NULL;
9806 struct cache_extent *cache;
9807 struct btrfs_corrupt_block *corrupt;
9810 cache = search_cache_extent(info->corrupt_blocks, 0);
9814 trans = btrfs_start_transaction(info->extent_root, 1);
9816 return PTR_ERR(trans);
9818 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9819 prune_one_block(trans, info, corrupt);
9820 remove_cache_extent(info->corrupt_blocks, cache);
9823 return btrfs_commit_transaction(trans, info->extent_root);
9827 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9829 struct btrfs_block_group_cache *cache;
9834 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9835 &start, &end, EXTENT_DIRTY);
9838 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9843 cache = btrfs_lookup_first_block_group(fs_info, start);
9848 start = cache->key.objectid + cache->key.offset;
9852 static int check_extent_refs(struct btrfs_root *root,
9853 struct cache_tree *extent_cache)
9855 struct extent_record *rec;
9856 struct cache_extent *cache;
9862 * if we're doing a repair, we have to make sure
9863 * we don't allocate from the problem extents.
9864 * In the worst case, this will be all the
9867 cache = search_cache_extent(extent_cache, 0);
9869 rec = container_of(cache, struct extent_record, cache);
9870 set_extent_dirty(root->fs_info->excluded_extents,
9872 rec->start + rec->max_size - 1);
9873 cache = next_cache_extent(cache);
9876 /* pin down all the corrupted blocks too */
9877 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9879 set_extent_dirty(root->fs_info->excluded_extents,
9881 cache->start + cache->size - 1);
9882 cache = next_cache_extent(cache);
9884 prune_corrupt_blocks(root->fs_info);
9885 reset_cached_block_groups(root->fs_info);
9888 reset_cached_block_groups(root->fs_info);
9891 * We need to delete any duplicate entries we find first otherwise we
9892 * could mess up the extent tree when we have backrefs that actually
9893 * belong to a different extent item and not the weird duplicate one.
9895 while (repair && !list_empty(&duplicate_extents)) {
9896 rec = to_extent_record(duplicate_extents.next);
9897 list_del_init(&rec->list);
9899 /* Sometimes we can find a backref before we find an actual
9900 * extent, so we need to process it a little bit to see if there
9901 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9902 * if this is a backref screwup. If we need to delete stuff
9903 * process_duplicates() will return 0, otherwise it will return
9906 if (process_duplicates(extent_cache, rec))
9908 ret = delete_duplicate_records(root, rec);
9912 * delete_duplicate_records will return the number of entries
9913 * deleted, so if it's greater than 0 then we know we actually
9914 * did something and we need to remove.
9927 cache = search_cache_extent(extent_cache, 0);
9930 rec = container_of(cache, struct extent_record, cache);
9931 if (rec->num_duplicates) {
9932 fprintf(stderr, "extent item %llu has multiple extent "
9933 "items\n", (unsigned long long)rec->start);
9937 if (rec->refs != rec->extent_item_refs) {
9938 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9939 (unsigned long long)rec->start,
9940 (unsigned long long)rec->nr);
9941 fprintf(stderr, "extent item %llu, found %llu\n",
9942 (unsigned long long)rec->extent_item_refs,
9943 (unsigned long long)rec->refs);
9944 ret = record_orphan_data_extents(root->fs_info, rec);
9950 if (all_backpointers_checked(rec, 1)) {
9951 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9952 (unsigned long long)rec->start,
9953 (unsigned long long)rec->nr);
9957 if (!rec->owner_ref_checked) {
9958 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9959 (unsigned long long)rec->start,
9960 (unsigned long long)rec->nr);
9965 if (repair && fix) {
9966 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9972 if (rec->bad_full_backref) {
9973 fprintf(stderr, "bad full backref, on [%llu]\n",
9974 (unsigned long long)rec->start);
9976 ret = fixup_extent_flags(root->fs_info, rec);
9984 * Although it's not a extent ref's problem, we reuse this
9985 * routine for error reporting.
9986 * No repair function yet.
9988 if (rec->crossing_stripes) {
9990 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9991 rec->start, rec->start + rec->max_size);
9995 if (rec->wrong_chunk_type) {
9997 "bad extent [%llu, %llu), type mismatch with chunk\n",
9998 rec->start, rec->start + rec->max_size);
10002 remove_cache_extent(extent_cache, cache);
10003 free_all_extent_backrefs(rec);
10004 if (!init_extent_tree && repair && (!cur_err || fix))
10005 clear_extent_dirty(root->fs_info->excluded_extents,
10007 rec->start + rec->max_size - 1);
10012 if (ret && ret != -EAGAIN) {
10013 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10016 struct btrfs_trans_handle *trans;
10018 root = root->fs_info->extent_root;
10019 trans = btrfs_start_transaction(root, 1);
10020 if (IS_ERR(trans)) {
10021 ret = PTR_ERR(trans);
10025 ret = btrfs_fix_block_accounting(trans, root);
10028 ret = btrfs_commit_transaction(trans, root);
10037 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10041 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10042 stripe_size = length;
10043 stripe_size /= num_stripes;
10044 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10045 stripe_size = length * 2;
10046 stripe_size /= num_stripes;
10047 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10048 stripe_size = length;
10049 stripe_size /= (num_stripes - 1);
10050 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10051 stripe_size = length;
10052 stripe_size /= (num_stripes - 2);
10054 stripe_size = length;
10056 return stripe_size;
10060 * Check the chunk with its block group/dev list ref:
10061 * Return 0 if all refs seems valid.
10062 * Return 1 if part of refs seems valid, need later check for rebuild ref
10063 * like missing block group and needs to search extent tree to rebuild them.
10064 * Return -1 if essential refs are missing and unable to rebuild.
10066 static int check_chunk_refs(struct chunk_record *chunk_rec,
10067 struct block_group_tree *block_group_cache,
10068 struct device_extent_tree *dev_extent_cache,
10071 struct cache_extent *block_group_item;
10072 struct block_group_record *block_group_rec;
10073 struct cache_extent *dev_extent_item;
10074 struct device_extent_record *dev_extent_rec;
10078 int metadump_v2 = 0;
10082 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10084 chunk_rec->length);
10085 if (block_group_item) {
10086 block_group_rec = container_of(block_group_item,
10087 struct block_group_record,
10089 if (chunk_rec->length != block_group_rec->offset ||
10090 chunk_rec->offset != block_group_rec->objectid ||
10092 chunk_rec->type_flags != block_group_rec->flags)) {
10095 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10096 chunk_rec->objectid,
10101 chunk_rec->type_flags,
10102 block_group_rec->objectid,
10103 block_group_rec->type,
10104 block_group_rec->offset,
10105 block_group_rec->offset,
10106 block_group_rec->objectid,
10107 block_group_rec->flags);
10110 list_del_init(&block_group_rec->list);
10111 chunk_rec->bg_rec = block_group_rec;
10116 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10117 chunk_rec->objectid,
10122 chunk_rec->type_flags);
10129 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10130 chunk_rec->num_stripes);
10131 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10132 devid = chunk_rec->stripes[i].devid;
10133 offset = chunk_rec->stripes[i].offset;
10134 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10135 devid, offset, length);
10136 if (dev_extent_item) {
10137 dev_extent_rec = container_of(dev_extent_item,
10138 struct device_extent_record,
10140 if (dev_extent_rec->objectid != devid ||
10141 dev_extent_rec->offset != offset ||
10142 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10143 dev_extent_rec->length != length) {
10146 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10147 chunk_rec->objectid,
10150 chunk_rec->stripes[i].devid,
10151 chunk_rec->stripes[i].offset,
10152 dev_extent_rec->objectid,
10153 dev_extent_rec->offset,
10154 dev_extent_rec->length);
10157 list_move(&dev_extent_rec->chunk_list,
10158 &chunk_rec->dextents);
10163 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10164 chunk_rec->objectid,
10167 chunk_rec->stripes[i].devid,
10168 chunk_rec->stripes[i].offset);
10175 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10176 int check_chunks(struct cache_tree *chunk_cache,
10177 struct block_group_tree *block_group_cache,
10178 struct device_extent_tree *dev_extent_cache,
10179 struct list_head *good, struct list_head *bad,
10180 struct list_head *rebuild, int silent)
10182 struct cache_extent *chunk_item;
10183 struct chunk_record *chunk_rec;
10184 struct block_group_record *bg_rec;
10185 struct device_extent_record *dext_rec;
10189 chunk_item = first_cache_extent(chunk_cache);
10190 while (chunk_item) {
10191 chunk_rec = container_of(chunk_item, struct chunk_record,
10193 err = check_chunk_refs(chunk_rec, block_group_cache,
10194 dev_extent_cache, silent);
10197 if (err == 0 && good)
10198 list_add_tail(&chunk_rec->list, good);
10199 if (err > 0 && rebuild)
10200 list_add_tail(&chunk_rec->list, rebuild);
10201 if (err < 0 && bad)
10202 list_add_tail(&chunk_rec->list, bad);
10203 chunk_item = next_cache_extent(chunk_item);
10206 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10209 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10217 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10221 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10222 dext_rec->objectid,
10232 static int check_device_used(struct device_record *dev_rec,
10233 struct device_extent_tree *dext_cache)
10235 struct cache_extent *cache;
10236 struct device_extent_record *dev_extent_rec;
10237 u64 total_byte = 0;
10239 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10241 dev_extent_rec = container_of(cache,
10242 struct device_extent_record,
10244 if (dev_extent_rec->objectid != dev_rec->devid)
10247 list_del_init(&dev_extent_rec->device_list);
10248 total_byte += dev_extent_rec->length;
10249 cache = next_cache_extent(cache);
10252 if (total_byte != dev_rec->byte_used) {
10254 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10255 total_byte, dev_rec->byte_used, dev_rec->objectid,
10256 dev_rec->type, dev_rec->offset);
10263 /* check btrfs_dev_item -> btrfs_dev_extent */
10264 static int check_devices(struct rb_root *dev_cache,
10265 struct device_extent_tree *dev_extent_cache)
10267 struct rb_node *dev_node;
10268 struct device_record *dev_rec;
10269 struct device_extent_record *dext_rec;
10273 dev_node = rb_first(dev_cache);
10275 dev_rec = container_of(dev_node, struct device_record, node);
10276 err = check_device_used(dev_rec, dev_extent_cache);
10280 dev_node = rb_next(dev_node);
10282 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10285 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10286 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10293 static int add_root_item_to_list(struct list_head *head,
10294 u64 objectid, u64 bytenr, u64 last_snapshot,
10295 u8 level, u8 drop_level,
10296 struct btrfs_key *drop_key)
10299 struct root_item_record *ri_rec;
10300 ri_rec = malloc(sizeof(*ri_rec));
10303 ri_rec->bytenr = bytenr;
10304 ri_rec->objectid = objectid;
10305 ri_rec->level = level;
10306 ri_rec->drop_level = drop_level;
10307 ri_rec->last_snapshot = last_snapshot;
10309 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10310 list_add_tail(&ri_rec->list, head);
10315 static void free_root_item_list(struct list_head *list)
10317 struct root_item_record *ri_rec;
10319 while (!list_empty(list)) {
10320 ri_rec = list_first_entry(list, struct root_item_record,
10322 list_del_init(&ri_rec->list);
10327 static int deal_root_from_list(struct list_head *list,
10328 struct btrfs_root *root,
10329 struct block_info *bits,
10331 struct cache_tree *pending,
10332 struct cache_tree *seen,
10333 struct cache_tree *reada,
10334 struct cache_tree *nodes,
10335 struct cache_tree *extent_cache,
10336 struct cache_tree *chunk_cache,
10337 struct rb_root *dev_cache,
10338 struct block_group_tree *block_group_cache,
10339 struct device_extent_tree *dev_extent_cache)
10344 while (!list_empty(list)) {
10345 struct root_item_record *rec;
10346 struct extent_buffer *buf;
10347 rec = list_entry(list->next,
10348 struct root_item_record, list);
10350 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10351 if (!extent_buffer_uptodate(buf)) {
10352 free_extent_buffer(buf);
10356 ret = add_root_to_pending(buf, extent_cache, pending,
10357 seen, nodes, rec->objectid);
10361 * To rebuild extent tree, we need deal with snapshot
10362 * one by one, otherwise we deal with node firstly which
10363 * can maximize readahead.
10366 ret = run_next_block(root, bits, bits_nr, &last,
10367 pending, seen, reada, nodes,
10368 extent_cache, chunk_cache,
10369 dev_cache, block_group_cache,
10370 dev_extent_cache, rec);
10374 free_extent_buffer(buf);
10375 list_del(&rec->list);
10381 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10382 reada, nodes, extent_cache, chunk_cache,
10383 dev_cache, block_group_cache,
10384 dev_extent_cache, NULL);
10394 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10396 struct rb_root dev_cache;
10397 struct cache_tree chunk_cache;
10398 struct block_group_tree block_group_cache;
10399 struct device_extent_tree dev_extent_cache;
10400 struct cache_tree extent_cache;
10401 struct cache_tree seen;
10402 struct cache_tree pending;
10403 struct cache_tree reada;
10404 struct cache_tree nodes;
10405 struct extent_io_tree excluded_extents;
10406 struct cache_tree corrupt_blocks;
10407 struct btrfs_path path;
10408 struct btrfs_key key;
10409 struct btrfs_key found_key;
10411 struct block_info *bits;
10413 struct extent_buffer *leaf;
10415 struct btrfs_root_item ri;
10416 struct list_head dropping_trees;
10417 struct list_head normal_trees;
10418 struct btrfs_root *root1;
10419 struct btrfs_root *root;
10423 root = fs_info->fs_root;
10424 dev_cache = RB_ROOT;
10425 cache_tree_init(&chunk_cache);
10426 block_group_tree_init(&block_group_cache);
10427 device_extent_tree_init(&dev_extent_cache);
10429 cache_tree_init(&extent_cache);
10430 cache_tree_init(&seen);
10431 cache_tree_init(&pending);
10432 cache_tree_init(&nodes);
10433 cache_tree_init(&reada);
10434 cache_tree_init(&corrupt_blocks);
10435 extent_io_tree_init(&excluded_extents);
10436 INIT_LIST_HEAD(&dropping_trees);
10437 INIT_LIST_HEAD(&normal_trees);
10440 fs_info->excluded_extents = &excluded_extents;
10441 fs_info->fsck_extent_cache = &extent_cache;
10442 fs_info->free_extent_hook = free_extent_hook;
10443 fs_info->corrupt_blocks = &corrupt_blocks;
10447 bits = malloc(bits_nr * sizeof(struct block_info));
10453 if (ctx.progress_enabled) {
10454 ctx.tp = TASK_EXTENTS;
10455 task_start(ctx.info);
10459 root1 = fs_info->tree_root;
10460 level = btrfs_header_level(root1->node);
10461 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10462 root1->node->start, 0, level, 0, NULL);
10465 root1 = fs_info->chunk_root;
10466 level = btrfs_header_level(root1->node);
10467 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10468 root1->node->start, 0, level, 0, NULL);
10471 btrfs_init_path(&path);
10474 key.type = BTRFS_ROOT_ITEM_KEY;
10475 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10479 leaf = path.nodes[0];
10480 slot = path.slots[0];
10481 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10482 ret = btrfs_next_leaf(root, &path);
10485 leaf = path.nodes[0];
10486 slot = path.slots[0];
10488 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10489 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10490 unsigned long offset;
10493 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10494 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10495 last_snapshot = btrfs_root_last_snapshot(&ri);
10496 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10497 level = btrfs_root_level(&ri);
10498 ret = add_root_item_to_list(&normal_trees,
10499 found_key.objectid,
10500 btrfs_root_bytenr(&ri),
10501 last_snapshot, level,
10506 level = btrfs_root_level(&ri);
10507 objectid = found_key.objectid;
10508 btrfs_disk_key_to_cpu(&found_key,
10509 &ri.drop_progress);
10510 ret = add_root_item_to_list(&dropping_trees,
10512 btrfs_root_bytenr(&ri),
10513 last_snapshot, level,
10514 ri.drop_level, &found_key);
10521 btrfs_release_path(&path);
10524 * check_block can return -EAGAIN if it fixes something, please keep
10525 * this in mind when dealing with return values from these functions, if
10526 * we get -EAGAIN we want to fall through and restart the loop.
10528 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10529 &seen, &reada, &nodes, &extent_cache,
10530 &chunk_cache, &dev_cache, &block_group_cache,
10531 &dev_extent_cache);
10533 if (ret == -EAGAIN)
10537 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10538 &pending, &seen, &reada, &nodes,
10539 &extent_cache, &chunk_cache, &dev_cache,
10540 &block_group_cache, &dev_extent_cache);
10542 if (ret == -EAGAIN)
10547 ret = check_chunks(&chunk_cache, &block_group_cache,
10548 &dev_extent_cache, NULL, NULL, NULL, 0);
10550 if (ret == -EAGAIN)
10555 ret = check_extent_refs(root, &extent_cache);
10557 if (ret == -EAGAIN)
10562 ret = check_devices(&dev_cache, &dev_extent_cache);
10567 task_stop(ctx.info);
10569 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10570 extent_io_tree_cleanup(&excluded_extents);
10571 fs_info->fsck_extent_cache = NULL;
10572 fs_info->free_extent_hook = NULL;
10573 fs_info->corrupt_blocks = NULL;
10574 fs_info->excluded_extents = NULL;
10577 free_chunk_cache_tree(&chunk_cache);
10578 free_device_cache_tree(&dev_cache);
10579 free_block_group_tree(&block_group_cache);
10580 free_device_extent_tree(&dev_extent_cache);
10581 free_extent_cache_tree(&seen);
10582 free_extent_cache_tree(&pending);
10583 free_extent_cache_tree(&reada);
10584 free_extent_cache_tree(&nodes);
10585 free_root_item_list(&normal_trees);
10586 free_root_item_list(&dropping_trees);
10589 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10590 free_extent_cache_tree(&seen);
10591 free_extent_cache_tree(&pending);
10592 free_extent_cache_tree(&reada);
10593 free_extent_cache_tree(&nodes);
10594 free_chunk_cache_tree(&chunk_cache);
10595 free_block_group_tree(&block_group_cache);
10596 free_device_cache_tree(&dev_cache);
10597 free_device_extent_tree(&dev_extent_cache);
10598 free_extent_record_cache(&extent_cache);
10599 free_root_item_list(&normal_trees);
10600 free_root_item_list(&dropping_trees);
10601 extent_io_tree_cleanup(&excluded_extents);
10606 * Check backrefs of a tree block given by @bytenr or @eb.
10608 * @root: the root containing the @bytenr or @eb
10609 * @eb: tree block extent buffer, can be NULL
10610 * @bytenr: bytenr of the tree block to search
10611 * @level: tree level of the tree block
10612 * @owner: owner of the tree block
10614 * Return >0 for any error found and output error message
10615 * Return 0 for no error found
10617 static int check_tree_block_ref(struct btrfs_root *root,
10618 struct extent_buffer *eb, u64 bytenr,
10619 int level, u64 owner)
10621 struct btrfs_key key;
10622 struct btrfs_root *extent_root = root->fs_info->extent_root;
10623 struct btrfs_path path;
10624 struct btrfs_extent_item *ei;
10625 struct btrfs_extent_inline_ref *iref;
10626 struct extent_buffer *leaf;
10632 u32 nodesize = root->fs_info->nodesize;
10635 int tree_reloc_root = 0;
10640 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10641 btrfs_header_bytenr(root->node) == bytenr)
10642 tree_reloc_root = 1;
10644 btrfs_init_path(&path);
10645 key.objectid = bytenr;
10646 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10647 key.type = BTRFS_METADATA_ITEM_KEY;
10649 key.type = BTRFS_EXTENT_ITEM_KEY;
10650 key.offset = (u64)-1;
10652 /* Search for the backref in extent tree */
10653 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10655 err |= BACKREF_MISSING;
10658 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10660 err |= BACKREF_MISSING;
10664 leaf = path.nodes[0];
10665 slot = path.slots[0];
10666 btrfs_item_key_to_cpu(leaf, &key, slot);
10668 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10670 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10671 skinny_level = (int)key.offset;
10672 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10674 struct btrfs_tree_block_info *info;
10676 info = (struct btrfs_tree_block_info *)(ei + 1);
10677 skinny_level = btrfs_tree_block_level(leaf, info);
10678 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10685 if (!(btrfs_extent_flags(leaf, ei) &
10686 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10688 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10689 key.objectid, nodesize,
10690 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10691 err = BACKREF_MISMATCH;
10693 header_gen = btrfs_header_generation(eb);
10694 extent_gen = btrfs_extent_generation(leaf, ei);
10695 if (header_gen != extent_gen) {
10697 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10698 key.objectid, nodesize, header_gen,
10700 err = BACKREF_MISMATCH;
10702 if (level != skinny_level) {
10704 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10705 key.objectid, nodesize, level, skinny_level);
10706 err = BACKREF_MISMATCH;
10708 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10710 "extent[%llu %u] is referred by other roots than %llu",
10711 key.objectid, nodesize, root->objectid);
10712 err = BACKREF_MISMATCH;
10717 * Iterate the extent/metadata item to find the exact backref
10719 item_size = btrfs_item_size_nr(leaf, slot);
10720 ptr = (unsigned long)iref;
10721 end = (unsigned long)ei + item_size;
10722 while (ptr < end) {
10723 iref = (struct btrfs_extent_inline_ref *)ptr;
10724 type = btrfs_extent_inline_ref_type(leaf, iref);
10725 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10727 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10728 (offset == root->objectid || offset == owner)) {
10730 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10732 * Backref of tree reloc root points to itself, no need
10733 * to check backref any more.
10735 if (tree_reloc_root)
10738 /* Check if the backref points to valid referencer */
10739 found_ref = !check_tree_block_ref(root, NULL,
10740 offset, level + 1, owner);
10745 ptr += btrfs_extent_inline_ref_size(type);
10749 * Inlined extent item doesn't have what we need, check
10750 * TREE_BLOCK_REF_KEY
10753 btrfs_release_path(&path);
10754 key.objectid = bytenr;
10755 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10756 key.offset = root->objectid;
10758 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10763 err |= BACKREF_MISSING;
10765 btrfs_release_path(&path);
10766 if (eb && (err & BACKREF_MISSING))
10767 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10768 bytenr, nodesize, owner, level);
10773 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10775 * Return >0 any error found and output error message
10776 * Return 0 for no error found
10778 static int check_extent_data_item(struct btrfs_root *root,
10779 struct extent_buffer *eb, int slot)
10781 struct btrfs_file_extent_item *fi;
10782 struct btrfs_path path;
10783 struct btrfs_root *extent_root = root->fs_info->extent_root;
10784 struct btrfs_key fi_key;
10785 struct btrfs_key dbref_key;
10786 struct extent_buffer *leaf;
10787 struct btrfs_extent_item *ei;
10788 struct btrfs_extent_inline_ref *iref;
10789 struct btrfs_extent_data_ref *dref;
10792 u64 disk_num_bytes;
10793 u64 extent_num_bytes;
10800 int found_dbackref = 0;
10804 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10805 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10807 /* Nothing to check for hole and inline data extents */
10808 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10809 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10812 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10813 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10814 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10816 /* Check unaligned disk_num_bytes and num_bytes */
10817 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10819 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10820 fi_key.objectid, fi_key.offset, disk_num_bytes,
10821 root->fs_info->sectorsize);
10822 err |= BYTES_UNALIGNED;
10824 data_bytes_allocated += disk_num_bytes;
10826 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10828 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10829 fi_key.objectid, fi_key.offset, extent_num_bytes,
10830 root->fs_info->sectorsize);
10831 err |= BYTES_UNALIGNED;
10833 data_bytes_referenced += extent_num_bytes;
10835 owner = btrfs_header_owner(eb);
10837 /* Check the extent item of the file extent in extent tree */
10838 btrfs_init_path(&path);
10839 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10840 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10841 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10843 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10847 leaf = path.nodes[0];
10848 slot = path.slots[0];
10849 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10851 extent_flags = btrfs_extent_flags(leaf, ei);
10853 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10855 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10856 disk_bytenr, disk_num_bytes,
10857 BTRFS_EXTENT_FLAG_DATA);
10858 err |= BACKREF_MISMATCH;
10861 /* Check data backref inside that extent item */
10862 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10863 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10864 ptr = (unsigned long)iref;
10865 end = (unsigned long)ei + item_size;
10866 while (ptr < end) {
10867 iref = (struct btrfs_extent_inline_ref *)ptr;
10868 type = btrfs_extent_inline_ref_type(leaf, iref);
10869 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10871 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10872 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10873 if (ref_root == owner || ref_root == root->objectid)
10874 found_dbackref = 1;
10875 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10876 found_dbackref = !check_tree_block_ref(root, NULL,
10877 btrfs_extent_inline_ref_offset(leaf, iref),
10881 if (found_dbackref)
10883 ptr += btrfs_extent_inline_ref_size(type);
10886 if (!found_dbackref) {
10887 btrfs_release_path(&path);
10889 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10890 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10891 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10892 dbref_key.offset = hash_extent_data_ref(root->objectid,
10893 fi_key.objectid, fi_key.offset);
10895 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10896 &dbref_key, &path, 0, 0);
10898 found_dbackref = 1;
10902 btrfs_release_path(&path);
10905 * Neither inlined nor EXTENT_DATA_REF found, try
10906 * SHARED_DATA_REF as last chance.
10908 dbref_key.objectid = disk_bytenr;
10909 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10910 dbref_key.offset = eb->start;
10912 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10913 &dbref_key, &path, 0, 0);
10915 found_dbackref = 1;
10921 if (!found_dbackref)
10922 err |= BACKREF_MISSING;
10923 btrfs_release_path(&path);
10924 if (err & BACKREF_MISSING) {
10925 error("data extent[%llu %llu] backref lost",
10926 disk_bytenr, disk_num_bytes);
10932 * Get real tree block level for the case like shared block
10933 * Return >= 0 as tree level
10934 * Return <0 for error
10936 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10938 struct extent_buffer *eb;
10939 struct btrfs_path path;
10940 struct btrfs_key key;
10941 struct btrfs_extent_item *ei;
10948 /* Search extent tree for extent generation and level */
10949 key.objectid = bytenr;
10950 key.type = BTRFS_METADATA_ITEM_KEY;
10951 key.offset = (u64)-1;
10953 btrfs_init_path(&path);
10954 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10957 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10965 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10966 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10967 struct btrfs_extent_item);
10968 flags = btrfs_extent_flags(path.nodes[0], ei);
10969 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10974 /* Get transid for later read_tree_block() check */
10975 transid = btrfs_extent_generation(path.nodes[0], ei);
10977 /* Get backref level as one source */
10978 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10979 backref_level = key.offset;
10981 struct btrfs_tree_block_info *info;
10983 info = (struct btrfs_tree_block_info *)(ei + 1);
10984 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10986 btrfs_release_path(&path);
10988 /* Get level from tree block as an alternative source */
10989 eb = read_tree_block(fs_info, bytenr, transid);
10990 if (!extent_buffer_uptodate(eb)) {
10991 free_extent_buffer(eb);
10994 header_level = btrfs_header_level(eb);
10995 free_extent_buffer(eb);
10997 if (header_level != backref_level)
10999 return header_level;
11002 btrfs_release_path(&path);
11007 * Check if a tree block backref is valid (points to a valid tree block)
11008 * if level == -1, level will be resolved
11009 * Return >0 for any error found and print error message
11011 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11012 u64 bytenr, int level)
11014 struct btrfs_root *root;
11015 struct btrfs_key key;
11016 struct btrfs_path path;
11017 struct extent_buffer *eb;
11018 struct extent_buffer *node;
11019 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11023 /* Query level for level == -1 special case */
11025 level = query_tree_block_level(fs_info, bytenr);
11027 err |= REFERENCER_MISSING;
11031 key.objectid = root_id;
11032 key.type = BTRFS_ROOT_ITEM_KEY;
11033 key.offset = (u64)-1;
11035 root = btrfs_read_fs_root(fs_info, &key);
11036 if (IS_ERR(root)) {
11037 err |= REFERENCER_MISSING;
11041 /* Read out the tree block to get item/node key */
11042 eb = read_tree_block(fs_info, bytenr, 0);
11043 if (!extent_buffer_uptodate(eb)) {
11044 err |= REFERENCER_MISSING;
11045 free_extent_buffer(eb);
11049 /* Empty tree, no need to check key */
11050 if (!btrfs_header_nritems(eb) && !level) {
11051 free_extent_buffer(eb);
11056 btrfs_node_key_to_cpu(eb, &key, 0);
11058 btrfs_item_key_to_cpu(eb, &key, 0);
11060 free_extent_buffer(eb);
11062 btrfs_init_path(&path);
11063 path.lowest_level = level;
11064 /* Search with the first key, to ensure we can reach it */
11065 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11067 err |= REFERENCER_MISSING;
11071 node = path.nodes[level];
11072 if (btrfs_header_bytenr(node) != bytenr) {
11074 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11075 bytenr, nodesize, bytenr,
11076 btrfs_header_bytenr(node));
11077 err |= REFERENCER_MISMATCH;
11079 if (btrfs_header_level(node) != level) {
11081 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11082 bytenr, nodesize, level,
11083 btrfs_header_level(node));
11084 err |= REFERENCER_MISMATCH;
11088 btrfs_release_path(&path);
11090 if (err & REFERENCER_MISSING) {
11092 error("extent [%llu %d] lost referencer (owner: %llu)",
11093 bytenr, nodesize, root_id);
11096 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11097 bytenr, nodesize, root_id, level);
11104 * Check if tree block @eb is tree reloc root.
11105 * Return 0 if it's not or any problem happens
11106 * Return 1 if it's a tree reloc root
11108 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11109 struct extent_buffer *eb)
11111 struct btrfs_root *tree_reloc_root;
11112 struct btrfs_key key;
11113 u64 bytenr = btrfs_header_bytenr(eb);
11114 u64 owner = btrfs_header_owner(eb);
11117 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11118 key.offset = owner;
11119 key.type = BTRFS_ROOT_ITEM_KEY;
11121 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11122 if (IS_ERR(tree_reloc_root))
11125 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11127 btrfs_free_fs_root(tree_reloc_root);
11132 * Check referencer for shared block backref
11133 * If level == -1, this function will resolve the level.
11135 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11136 u64 parent, u64 bytenr, int level)
11138 struct extent_buffer *eb;
11140 int found_parent = 0;
11143 eb = read_tree_block(fs_info, parent, 0);
11144 if (!extent_buffer_uptodate(eb))
11148 level = query_tree_block_level(fs_info, bytenr);
11152 /* It's possible it's a tree reloc root */
11153 if (parent == bytenr) {
11154 if (is_tree_reloc_root(fs_info, eb))
11159 if (level + 1 != btrfs_header_level(eb))
11162 nr = btrfs_header_nritems(eb);
11163 for (i = 0; i < nr; i++) {
11164 if (bytenr == btrfs_node_blockptr(eb, i)) {
11170 free_extent_buffer(eb);
11171 if (!found_parent) {
11173 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11174 bytenr, fs_info->nodesize, parent, level);
11175 return REFERENCER_MISSING;
11181 * Check referencer for normal (inlined) data ref
11182 * If len == 0, it will be resolved by searching in extent tree
11184 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11185 u64 root_id, u64 objectid, u64 offset,
11186 u64 bytenr, u64 len, u32 count)
11188 struct btrfs_root *root;
11189 struct btrfs_root *extent_root = fs_info->extent_root;
11190 struct btrfs_key key;
11191 struct btrfs_path path;
11192 struct extent_buffer *leaf;
11193 struct btrfs_file_extent_item *fi;
11194 u32 found_count = 0;
11199 key.objectid = bytenr;
11200 key.type = BTRFS_EXTENT_ITEM_KEY;
11201 key.offset = (u64)-1;
11203 btrfs_init_path(&path);
11204 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11207 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11210 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11211 if (key.objectid != bytenr ||
11212 key.type != BTRFS_EXTENT_ITEM_KEY)
11215 btrfs_release_path(&path);
11217 key.objectid = root_id;
11218 key.type = BTRFS_ROOT_ITEM_KEY;
11219 key.offset = (u64)-1;
11220 btrfs_init_path(&path);
11222 root = btrfs_read_fs_root(fs_info, &key);
11226 key.objectid = objectid;
11227 key.type = BTRFS_EXTENT_DATA_KEY;
11229 * It can be nasty as data backref offset is
11230 * file offset - file extent offset, which is smaller or
11231 * equal to original backref offset. The only special case is
11232 * overflow. So we need to special check and do further search.
11234 key.offset = offset & (1ULL << 63) ? 0 : offset;
11236 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11241 * Search afterwards to get correct one
11242 * NOTE: As we must do a comprehensive check on the data backref to
11243 * make sure the dref count also matches, we must iterate all file
11244 * extents for that inode.
11247 leaf = path.nodes[0];
11248 slot = path.slots[0];
11250 if (slot >= btrfs_header_nritems(leaf))
11252 btrfs_item_key_to_cpu(leaf, &key, slot);
11253 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11255 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11257 * Except normal disk bytenr and disk num bytes, we still
11258 * need to do extra check on dbackref offset as
11259 * dbackref offset = file_offset - file_extent_offset
11261 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11262 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11263 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11268 ret = btrfs_next_item(root, &path);
11273 btrfs_release_path(&path);
11274 if (found_count != count) {
11276 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11277 bytenr, len, root_id, objectid, offset, count, found_count);
11278 return REFERENCER_MISSING;
11284 * Check if the referencer of a shared data backref exists
11286 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11287 u64 parent, u64 bytenr)
11289 struct extent_buffer *eb;
11290 struct btrfs_key key;
11291 struct btrfs_file_extent_item *fi;
11293 int found_parent = 0;
11296 eb = read_tree_block(fs_info, parent, 0);
11297 if (!extent_buffer_uptodate(eb))
11300 nr = btrfs_header_nritems(eb);
11301 for (i = 0; i < nr; i++) {
11302 btrfs_item_key_to_cpu(eb, &key, i);
11303 if (key.type != BTRFS_EXTENT_DATA_KEY)
11306 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11307 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11310 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11317 free_extent_buffer(eb);
11318 if (!found_parent) {
11319 error("shared extent %llu referencer lost (parent: %llu)",
11321 return REFERENCER_MISSING;
11327 * This function will check a given extent item, including its backref and
11328 * itself (like crossing stripe boundary and type)
11330 * Since we don't use extent_record anymore, introduce new error bit
11332 static int check_extent_item(struct btrfs_fs_info *fs_info,
11333 struct extent_buffer *eb, int slot)
11335 struct btrfs_extent_item *ei;
11336 struct btrfs_extent_inline_ref *iref;
11337 struct btrfs_extent_data_ref *dref;
11341 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11342 u32 item_size = btrfs_item_size_nr(eb, slot);
11347 struct btrfs_key key;
11351 btrfs_item_key_to_cpu(eb, &key, slot);
11352 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11353 bytes_used += key.offset;
11355 bytes_used += nodesize;
11357 if (item_size < sizeof(*ei)) {
11359 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11360 * old thing when on disk format is still un-determined.
11361 * No need to care about it anymore
11363 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11367 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11368 flags = btrfs_extent_flags(eb, ei);
11370 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11372 if (metadata && check_crossing_stripes(global_info, key.objectid,
11374 error("bad metadata [%llu, %llu) crossing stripe boundary",
11375 key.objectid, key.objectid + nodesize);
11376 err |= CROSSING_STRIPE_BOUNDARY;
11379 ptr = (unsigned long)(ei + 1);
11381 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11382 /* Old EXTENT_ITEM metadata */
11383 struct btrfs_tree_block_info *info;
11385 info = (struct btrfs_tree_block_info *)ptr;
11386 level = btrfs_tree_block_level(eb, info);
11387 ptr += sizeof(struct btrfs_tree_block_info);
11389 /* New METADATA_ITEM */
11390 level = key.offset;
11392 end = (unsigned long)ei + item_size;
11395 /* Reached extent item end normally */
11399 /* Beyond extent item end, wrong item size */
11401 err |= ITEM_SIZE_MISMATCH;
11402 error("extent item at bytenr %llu slot %d has wrong size",
11407 /* Now check every backref in this extent item */
11408 iref = (struct btrfs_extent_inline_ref *)ptr;
11409 type = btrfs_extent_inline_ref_type(eb, iref);
11410 offset = btrfs_extent_inline_ref_offset(eb, iref);
11412 case BTRFS_TREE_BLOCK_REF_KEY:
11413 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11417 case BTRFS_SHARED_BLOCK_REF_KEY:
11418 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11422 case BTRFS_EXTENT_DATA_REF_KEY:
11423 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11424 ret = check_extent_data_backref(fs_info,
11425 btrfs_extent_data_ref_root(eb, dref),
11426 btrfs_extent_data_ref_objectid(eb, dref),
11427 btrfs_extent_data_ref_offset(eb, dref),
11428 key.objectid, key.offset,
11429 btrfs_extent_data_ref_count(eb, dref));
11432 case BTRFS_SHARED_DATA_REF_KEY:
11433 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11437 error("extent[%llu %d %llu] has unknown ref type: %d",
11438 key.objectid, key.type, key.offset, type);
11439 err |= UNKNOWN_TYPE;
11443 ptr += btrfs_extent_inline_ref_size(type);
11451 * Check if a dev extent item is referred correctly by its chunk
11453 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11454 struct extent_buffer *eb, int slot)
11456 struct btrfs_root *chunk_root = fs_info->chunk_root;
11457 struct btrfs_dev_extent *ptr;
11458 struct btrfs_path path;
11459 struct btrfs_key chunk_key;
11460 struct btrfs_key devext_key;
11461 struct btrfs_chunk *chunk;
11462 struct extent_buffer *l;
11466 int found_chunk = 0;
11469 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11470 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11471 length = btrfs_dev_extent_length(eb, ptr);
11473 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11474 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11475 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11477 btrfs_init_path(&path);
11478 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11483 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11484 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11489 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11492 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11493 for (i = 0; i < num_stripes; i++) {
11494 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11495 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11497 if (devid == devext_key.objectid &&
11498 offset == devext_key.offset) {
11504 btrfs_release_path(&path);
11505 if (!found_chunk) {
11507 "device extent[%llu, %llu, %llu] did not find the related chunk",
11508 devext_key.objectid, devext_key.offset, length);
11509 return REFERENCER_MISSING;
11515 * Check if the used space is correct with the dev item
11517 static int check_dev_item(struct btrfs_fs_info *fs_info,
11518 struct extent_buffer *eb, int slot)
11520 struct btrfs_root *dev_root = fs_info->dev_root;
11521 struct btrfs_dev_item *dev_item;
11522 struct btrfs_path path;
11523 struct btrfs_key key;
11524 struct btrfs_dev_extent *ptr;
11530 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11531 dev_id = btrfs_device_id(eb, dev_item);
11532 used = btrfs_device_bytes_used(eb, dev_item);
11534 key.objectid = dev_id;
11535 key.type = BTRFS_DEV_EXTENT_KEY;
11538 btrfs_init_path(&path);
11539 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11541 btrfs_item_key_to_cpu(eb, &key, slot);
11542 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11543 key.objectid, key.type, key.offset);
11544 btrfs_release_path(&path);
11545 return REFERENCER_MISSING;
11548 /* Iterate dev_extents to calculate the used space of a device */
11550 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11553 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11554 if (key.objectid > dev_id)
11556 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11559 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11560 struct btrfs_dev_extent);
11561 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11563 ret = btrfs_next_item(dev_root, &path);
11567 btrfs_release_path(&path);
11569 if (used != total) {
11570 btrfs_item_key_to_cpu(eb, &key, slot);
11572 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11573 total, used, BTRFS_ROOT_TREE_OBJECTID,
11574 BTRFS_DEV_EXTENT_KEY, dev_id);
11575 return ACCOUNTING_MISMATCH;
11581 * Check a block group item with its referener (chunk) and its used space
11582 * with extent/metadata item
11584 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11585 struct extent_buffer *eb, int slot)
11587 struct btrfs_root *extent_root = fs_info->extent_root;
11588 struct btrfs_root *chunk_root = fs_info->chunk_root;
11589 struct btrfs_block_group_item *bi;
11590 struct btrfs_block_group_item bg_item;
11591 struct btrfs_path path;
11592 struct btrfs_key bg_key;
11593 struct btrfs_key chunk_key;
11594 struct btrfs_key extent_key;
11595 struct btrfs_chunk *chunk;
11596 struct extent_buffer *leaf;
11597 struct btrfs_extent_item *ei;
11598 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11606 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11607 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11608 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11609 used = btrfs_block_group_used(&bg_item);
11610 bg_flags = btrfs_block_group_flags(&bg_item);
11612 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11613 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11614 chunk_key.offset = bg_key.objectid;
11616 btrfs_init_path(&path);
11617 /* Search for the referencer chunk */
11618 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11621 "block group[%llu %llu] did not find the related chunk item",
11622 bg_key.objectid, bg_key.offset);
11623 err |= REFERENCER_MISSING;
11625 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11626 struct btrfs_chunk);
11627 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11630 "block group[%llu %llu] related chunk item length does not match",
11631 bg_key.objectid, bg_key.offset);
11632 err |= REFERENCER_MISMATCH;
11635 btrfs_release_path(&path);
11637 /* Search from the block group bytenr */
11638 extent_key.objectid = bg_key.objectid;
11639 extent_key.type = 0;
11640 extent_key.offset = 0;
11642 btrfs_init_path(&path);
11643 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11647 /* Iterate extent tree to account used space */
11649 leaf = path.nodes[0];
11651 /* Search slot can point to the last item beyond leaf nritems */
11652 if (path.slots[0] >= btrfs_header_nritems(leaf))
11655 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11656 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11659 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11660 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11662 if (extent_key.objectid < bg_key.objectid)
11665 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11668 total += extent_key.offset;
11670 ei = btrfs_item_ptr(leaf, path.slots[0],
11671 struct btrfs_extent_item);
11672 flags = btrfs_extent_flags(leaf, ei);
11673 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11674 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11676 "bad extent[%llu, %llu) type mismatch with chunk",
11677 extent_key.objectid,
11678 extent_key.objectid + extent_key.offset);
11679 err |= CHUNK_TYPE_MISMATCH;
11681 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11682 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11683 BTRFS_BLOCK_GROUP_METADATA))) {
11685 "bad extent[%llu, %llu) type mismatch with chunk",
11686 extent_key.objectid,
11687 extent_key.objectid + nodesize);
11688 err |= CHUNK_TYPE_MISMATCH;
11692 ret = btrfs_next_item(extent_root, &path);
11698 btrfs_release_path(&path);
11700 if (total != used) {
11702 "block group[%llu %llu] used %llu but extent items used %llu",
11703 bg_key.objectid, bg_key.offset, used, total);
11704 err |= ACCOUNTING_MISMATCH;
11710 * Check a chunk item.
11711 * Including checking all referred dev_extents and block group
11713 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11714 struct extent_buffer *eb, int slot)
11716 struct btrfs_root *extent_root = fs_info->extent_root;
11717 struct btrfs_root *dev_root = fs_info->dev_root;
11718 struct btrfs_path path;
11719 struct btrfs_key chunk_key;
11720 struct btrfs_key bg_key;
11721 struct btrfs_key devext_key;
11722 struct btrfs_chunk *chunk;
11723 struct extent_buffer *leaf;
11724 struct btrfs_block_group_item *bi;
11725 struct btrfs_block_group_item bg_item;
11726 struct btrfs_dev_extent *ptr;
11738 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11739 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11740 length = btrfs_chunk_length(eb, chunk);
11741 chunk_end = chunk_key.offset + length;
11742 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11745 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11747 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11750 type = btrfs_chunk_type(eb, chunk);
11752 bg_key.objectid = chunk_key.offset;
11753 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11754 bg_key.offset = length;
11756 btrfs_init_path(&path);
11757 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11760 "chunk[%llu %llu) did not find the related block group item",
11761 chunk_key.offset, chunk_end);
11762 err |= REFERENCER_MISSING;
11764 leaf = path.nodes[0];
11765 bi = btrfs_item_ptr(leaf, path.slots[0],
11766 struct btrfs_block_group_item);
11767 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11769 if (btrfs_block_group_flags(&bg_item) != type) {
11771 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11772 chunk_key.offset, chunk_end, type,
11773 btrfs_block_group_flags(&bg_item));
11774 err |= REFERENCER_MISSING;
11778 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11779 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11780 for (i = 0; i < num_stripes; i++) {
11781 btrfs_release_path(&path);
11782 btrfs_init_path(&path);
11783 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11784 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11785 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11787 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11790 goto not_match_dev;
11792 leaf = path.nodes[0];
11793 ptr = btrfs_item_ptr(leaf, path.slots[0],
11794 struct btrfs_dev_extent);
11795 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11796 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11797 if (objectid != chunk_key.objectid ||
11798 offset != chunk_key.offset ||
11799 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11800 goto not_match_dev;
11803 err |= BACKREF_MISSING;
11805 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11806 chunk_key.objectid, chunk_end, i);
11809 btrfs_release_path(&path);
11815 * Main entry function to check known items and update related accounting info
11817 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11819 struct btrfs_fs_info *fs_info = root->fs_info;
11820 struct btrfs_key key;
11823 struct btrfs_extent_data_ref *dref;
11828 btrfs_item_key_to_cpu(eb, &key, slot);
11832 case BTRFS_EXTENT_DATA_KEY:
11833 ret = check_extent_data_item(root, eb, slot);
11836 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11837 ret = check_block_group_item(fs_info, eb, slot);
11840 case BTRFS_DEV_ITEM_KEY:
11841 ret = check_dev_item(fs_info, eb, slot);
11844 case BTRFS_CHUNK_ITEM_KEY:
11845 ret = check_chunk_item(fs_info, eb, slot);
11848 case BTRFS_DEV_EXTENT_KEY:
11849 ret = check_dev_extent_item(fs_info, eb, slot);
11852 case BTRFS_EXTENT_ITEM_KEY:
11853 case BTRFS_METADATA_ITEM_KEY:
11854 ret = check_extent_item(fs_info, eb, slot);
11857 case BTRFS_EXTENT_CSUM_KEY:
11858 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11860 case BTRFS_TREE_BLOCK_REF_KEY:
11861 ret = check_tree_block_backref(fs_info, key.offset,
11865 case BTRFS_EXTENT_DATA_REF_KEY:
11866 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11867 ret = check_extent_data_backref(fs_info,
11868 btrfs_extent_data_ref_root(eb, dref),
11869 btrfs_extent_data_ref_objectid(eb, dref),
11870 btrfs_extent_data_ref_offset(eb, dref),
11872 btrfs_extent_data_ref_count(eb, dref));
11875 case BTRFS_SHARED_BLOCK_REF_KEY:
11876 ret = check_shared_block_backref(fs_info, key.offset,
11880 case BTRFS_SHARED_DATA_REF_KEY:
11881 ret = check_shared_data_backref(fs_info, key.offset,
11889 if (++slot < btrfs_header_nritems(eb))
11896 * Helper function for later fs/subvol tree check. To determine if a tree
11897 * block should be checked.
11898 * This function will ensure only the direct referencer with lowest rootid to
11899 * check a fs/subvolume tree block.
11901 * Backref check at extent tree would detect errors like missing subvolume
11902 * tree, so we can do aggressive check to reduce duplicated checks.
11904 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11906 struct btrfs_root *extent_root = root->fs_info->extent_root;
11907 struct btrfs_key key;
11908 struct btrfs_path path;
11909 struct extent_buffer *leaf;
11911 struct btrfs_extent_item *ei;
11917 struct btrfs_extent_inline_ref *iref;
11920 btrfs_init_path(&path);
11921 key.objectid = btrfs_header_bytenr(eb);
11922 key.type = BTRFS_METADATA_ITEM_KEY;
11923 key.offset = (u64)-1;
11926 * Any failure in backref resolving means we can't determine
11927 * whom the tree block belongs to.
11928 * So in that case, we need to check that tree block
11930 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11934 ret = btrfs_previous_extent_item(extent_root, &path,
11935 btrfs_header_bytenr(eb));
11939 leaf = path.nodes[0];
11940 slot = path.slots[0];
11941 btrfs_item_key_to_cpu(leaf, &key, slot);
11942 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11944 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11945 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11947 struct btrfs_tree_block_info *info;
11949 info = (struct btrfs_tree_block_info *)(ei + 1);
11950 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11953 item_size = btrfs_item_size_nr(leaf, slot);
11954 ptr = (unsigned long)iref;
11955 end = (unsigned long)ei + item_size;
11956 while (ptr < end) {
11957 iref = (struct btrfs_extent_inline_ref *)ptr;
11958 type = btrfs_extent_inline_ref_type(leaf, iref);
11959 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11962 * We only check the tree block if current root is
11963 * the lowest referencer of it.
11965 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11966 offset < root->objectid) {
11967 btrfs_release_path(&path);
11971 ptr += btrfs_extent_inline_ref_size(type);
11974 * Normally we should also check keyed tree block ref, but that may be
11975 * very time consuming. Inlined ref should already make us skip a lot
11976 * of refs now. So skip search keyed tree block ref.
11980 btrfs_release_path(&path);
11985 * Traversal function for tree block. We will do:
11986 * 1) Skip shared fs/subvolume tree blocks
11987 * 2) Update related bytes accounting
11988 * 3) Pre-order traversal
11990 static int traverse_tree_block(struct btrfs_root *root,
11991 struct extent_buffer *node)
11993 struct extent_buffer *eb;
11994 struct btrfs_key key;
11995 struct btrfs_key drop_key;
12003 * Skip shared fs/subvolume tree block, in that case they will
12004 * be checked by referencer with lowest rootid
12006 if (is_fstree(root->objectid) && !should_check(root, node))
12009 /* Update bytes accounting */
12010 total_btree_bytes += node->len;
12011 if (fs_root_objectid(btrfs_header_owner(node)))
12012 total_fs_tree_bytes += node->len;
12013 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12014 total_extent_tree_bytes += node->len;
12016 /* pre-order tranversal, check itself first */
12017 level = btrfs_header_level(node);
12018 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12019 btrfs_header_level(node),
12020 btrfs_header_owner(node));
12024 "check %s failed root %llu bytenr %llu level %d, force continue check",
12025 level ? "node":"leaf", root->objectid,
12026 btrfs_header_bytenr(node), btrfs_header_level(node));
12029 btree_space_waste += btrfs_leaf_free_space(root, node);
12030 ret = check_leaf_items(root, node);
12035 nr = btrfs_header_nritems(node);
12036 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12037 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12038 sizeof(struct btrfs_key_ptr);
12040 /* Then check all its children */
12041 for (i = 0; i < nr; i++) {
12042 u64 blocknr = btrfs_node_blockptr(node, i);
12044 btrfs_node_key_to_cpu(node, &key, i);
12045 if (level == root->root_item.drop_level &&
12046 is_dropped_key(&key, &drop_key))
12050 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12051 * to call the function itself.
12053 eb = read_tree_block(root->fs_info, blocknr, 0);
12054 if (extent_buffer_uptodate(eb)) {
12055 ret = traverse_tree_block(root, eb);
12058 free_extent_buffer(eb);
12065 * Low memory usage version check_chunks_and_extents.
12067 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12069 struct btrfs_path path;
12070 struct btrfs_key key;
12071 struct btrfs_root *root1;
12072 struct btrfs_root *root;
12073 struct btrfs_root *cur_root;
12077 root = fs_info->fs_root;
12079 root1 = root->fs_info->chunk_root;
12080 ret = traverse_tree_block(root1, root1->node);
12083 root1 = root->fs_info->tree_root;
12084 ret = traverse_tree_block(root1, root1->node);
12087 btrfs_init_path(&path);
12088 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12090 key.type = BTRFS_ROOT_ITEM_KEY;
12092 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12094 error("cannot find extent treet in tree_root");
12099 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12100 if (key.type != BTRFS_ROOT_ITEM_KEY)
12102 key.offset = (u64)-1;
12104 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12105 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12108 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12109 if (IS_ERR(cur_root) || !cur_root) {
12110 error("failed to read tree: %lld", key.objectid);
12114 ret = traverse_tree_block(cur_root, cur_root->node);
12117 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12118 btrfs_free_fs_root(cur_root);
12120 ret = btrfs_next_item(root1, &path);
12126 btrfs_release_path(&path);
12130 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12134 if (!ctx.progress_enabled)
12135 fprintf(stderr, "checking extents\n");
12136 if (check_mode == CHECK_MODE_LOWMEM)
12137 ret = check_chunks_and_extents_v2(fs_info);
12139 ret = check_chunks_and_extents(fs_info);
12144 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12145 struct btrfs_root *root, int overwrite)
12147 struct extent_buffer *c;
12148 struct extent_buffer *old = root->node;
12151 struct btrfs_disk_key disk_key = {0,0,0};
12157 extent_buffer_get(c);
12160 c = btrfs_alloc_free_block(trans, root,
12161 root->fs_info->nodesize,
12162 root->root_key.objectid,
12163 &disk_key, level, 0, 0);
12166 extent_buffer_get(c);
12170 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12171 btrfs_set_header_level(c, level);
12172 btrfs_set_header_bytenr(c, c->start);
12173 btrfs_set_header_generation(c, trans->transid);
12174 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12175 btrfs_set_header_owner(c, root->root_key.objectid);
12177 write_extent_buffer(c, root->fs_info->fsid,
12178 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12180 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12181 btrfs_header_chunk_tree_uuid(c),
12184 btrfs_mark_buffer_dirty(c);
12186 * this case can happen in the following case:
12188 * 1.overwrite previous root.
12190 * 2.reinit reloc data root, this is because we skip pin
12191 * down reloc data tree before which means we can allocate
12192 * same block bytenr here.
12194 if (old->start == c->start) {
12195 btrfs_set_root_generation(&root->root_item,
12197 root->root_item.level = btrfs_header_level(root->node);
12198 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12199 &root->root_key, &root->root_item);
12201 free_extent_buffer(c);
12205 free_extent_buffer(old);
12207 add_root_to_dirty_list(root);
12211 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12212 struct extent_buffer *eb, int tree_root)
12214 struct extent_buffer *tmp;
12215 struct btrfs_root_item *ri;
12216 struct btrfs_key key;
12218 int level = btrfs_header_level(eb);
12224 * If we have pinned this block before, don't pin it again.
12225 * This can not only avoid forever loop with broken filesystem
12226 * but also give us some speedups.
12228 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12229 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12232 btrfs_pin_extent(fs_info, eb->start, eb->len);
12234 nritems = btrfs_header_nritems(eb);
12235 for (i = 0; i < nritems; i++) {
12237 btrfs_item_key_to_cpu(eb, &key, i);
12238 if (key.type != BTRFS_ROOT_ITEM_KEY)
12240 /* Skip the extent root and reloc roots */
12241 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12242 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12243 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12245 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12246 bytenr = btrfs_disk_root_bytenr(eb, ri);
12249 * If at any point we start needing the real root we
12250 * will have to build a stump root for the root we are
12251 * in, but for now this doesn't actually use the root so
12252 * just pass in extent_root.
12254 tmp = read_tree_block(fs_info, bytenr, 0);
12255 if (!extent_buffer_uptodate(tmp)) {
12256 fprintf(stderr, "Error reading root block\n");
12259 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12260 free_extent_buffer(tmp);
12264 bytenr = btrfs_node_blockptr(eb, i);
12266 /* If we aren't the tree root don't read the block */
12267 if (level == 1 && !tree_root) {
12268 btrfs_pin_extent(fs_info, bytenr,
12269 fs_info->nodesize);
12273 tmp = read_tree_block(fs_info, bytenr, 0);
12274 if (!extent_buffer_uptodate(tmp)) {
12275 fprintf(stderr, "Error reading tree block\n");
12278 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12279 free_extent_buffer(tmp);
12288 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12292 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12296 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12299 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12301 struct btrfs_block_group_cache *cache;
12302 struct btrfs_path path;
12303 struct extent_buffer *leaf;
12304 struct btrfs_chunk *chunk;
12305 struct btrfs_key key;
12309 btrfs_init_path(&path);
12311 key.type = BTRFS_CHUNK_ITEM_KEY;
12313 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12315 btrfs_release_path(&path);
12320 * We do this in case the block groups were screwed up and had alloc
12321 * bits that aren't actually set on the chunks. This happens with
12322 * restored images every time and could happen in real life I guess.
12324 fs_info->avail_data_alloc_bits = 0;
12325 fs_info->avail_metadata_alloc_bits = 0;
12326 fs_info->avail_system_alloc_bits = 0;
12328 /* First we need to create the in-memory block groups */
12330 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12331 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12333 btrfs_release_path(&path);
12341 leaf = path.nodes[0];
12342 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12343 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12348 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12349 btrfs_add_block_group(fs_info, 0,
12350 btrfs_chunk_type(leaf, chunk),
12351 key.objectid, key.offset,
12352 btrfs_chunk_length(leaf, chunk));
12353 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12354 key.offset + btrfs_chunk_length(leaf, chunk));
12359 cache = btrfs_lookup_first_block_group(fs_info, start);
12363 start = cache->key.objectid + cache->key.offset;
12366 btrfs_release_path(&path);
12370 static int reset_balance(struct btrfs_trans_handle *trans,
12371 struct btrfs_fs_info *fs_info)
12373 struct btrfs_root *root = fs_info->tree_root;
12374 struct btrfs_path path;
12375 struct extent_buffer *leaf;
12376 struct btrfs_key key;
12377 int del_slot, del_nr = 0;
12381 btrfs_init_path(&path);
12382 key.objectid = BTRFS_BALANCE_OBJECTID;
12383 key.type = BTRFS_BALANCE_ITEM_KEY;
12385 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12390 goto reinit_data_reloc;
12395 ret = btrfs_del_item(trans, root, &path);
12398 btrfs_release_path(&path);
12400 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12401 key.type = BTRFS_ROOT_ITEM_KEY;
12403 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12407 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12412 ret = btrfs_del_items(trans, root, &path,
12419 btrfs_release_path(&path);
12422 ret = btrfs_search_slot(trans, root, &key, &path,
12429 leaf = path.nodes[0];
12430 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12431 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12433 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12438 del_slot = path.slots[0];
12447 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12451 btrfs_release_path(&path);
12454 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12455 key.type = BTRFS_ROOT_ITEM_KEY;
12456 key.offset = (u64)-1;
12457 root = btrfs_read_fs_root(fs_info, &key);
12458 if (IS_ERR(root)) {
12459 fprintf(stderr, "Error reading data reloc tree\n");
12460 ret = PTR_ERR(root);
12463 record_root_in_trans(trans, root);
12464 ret = btrfs_fsck_reinit_root(trans, root, 0);
12467 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12469 btrfs_release_path(&path);
12473 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12474 struct btrfs_fs_info *fs_info)
12480 * The only reason we don't do this is because right now we're just
12481 * walking the trees we find and pinning down their bytes, we don't look
12482 * at any of the leaves. In order to do mixed groups we'd have to check
12483 * the leaves of any fs roots and pin down the bytes for any file
12484 * extents we find. Not hard but why do it if we don't have to?
12486 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12487 fprintf(stderr, "We don't support re-initing the extent tree "
12488 "for mixed block groups yet, please notify a btrfs "
12489 "developer you want to do this so they can add this "
12490 "functionality.\n");
12495 * first we need to walk all of the trees except the extent tree and pin
12496 * down the bytes that are in use so we don't overwrite any existing
12499 ret = pin_metadata_blocks(fs_info);
12501 fprintf(stderr, "error pinning down used bytes\n");
12506 * Need to drop all the block groups since we're going to recreate all
12509 btrfs_free_block_groups(fs_info);
12510 ret = reset_block_groups(fs_info);
12512 fprintf(stderr, "error resetting the block groups\n");
12516 /* Ok we can allocate now, reinit the extent root */
12517 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12519 fprintf(stderr, "extent root initialization failed\n");
12521 * When the transaction code is updated we should end the
12522 * transaction, but for now progs only knows about commit so
12523 * just return an error.
12529 * Now we have all the in-memory block groups setup so we can make
12530 * allocations properly, and the metadata we care about is safe since we
12531 * pinned all of it above.
12534 struct btrfs_block_group_cache *cache;
12536 cache = btrfs_lookup_first_block_group(fs_info, start);
12539 start = cache->key.objectid + cache->key.offset;
12540 ret = btrfs_insert_item(trans, fs_info->extent_root,
12541 &cache->key, &cache->item,
12542 sizeof(cache->item));
12544 fprintf(stderr, "Error adding block group\n");
12547 btrfs_extent_post_op(trans, fs_info->extent_root);
12550 ret = reset_balance(trans, fs_info);
12552 fprintf(stderr, "error resetting the pending balance\n");
12557 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12559 struct btrfs_path path;
12560 struct btrfs_trans_handle *trans;
12561 struct btrfs_key key;
12564 printf("Recowing metadata block %llu\n", eb->start);
12565 key.objectid = btrfs_header_owner(eb);
12566 key.type = BTRFS_ROOT_ITEM_KEY;
12567 key.offset = (u64)-1;
12569 root = btrfs_read_fs_root(root->fs_info, &key);
12570 if (IS_ERR(root)) {
12571 fprintf(stderr, "Couldn't find owner root %llu\n",
12573 return PTR_ERR(root);
12576 trans = btrfs_start_transaction(root, 1);
12578 return PTR_ERR(trans);
12580 btrfs_init_path(&path);
12581 path.lowest_level = btrfs_header_level(eb);
12582 if (path.lowest_level)
12583 btrfs_node_key_to_cpu(eb, &key, 0);
12585 btrfs_item_key_to_cpu(eb, &key, 0);
12587 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12588 btrfs_commit_transaction(trans, root);
12589 btrfs_release_path(&path);
12593 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12595 struct btrfs_path path;
12596 struct btrfs_trans_handle *trans;
12597 struct btrfs_key key;
12600 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12601 bad->key.type, bad->key.offset);
12602 key.objectid = bad->root_id;
12603 key.type = BTRFS_ROOT_ITEM_KEY;
12604 key.offset = (u64)-1;
12606 root = btrfs_read_fs_root(root->fs_info, &key);
12607 if (IS_ERR(root)) {
12608 fprintf(stderr, "Couldn't find owner root %llu\n",
12610 return PTR_ERR(root);
12613 trans = btrfs_start_transaction(root, 1);
12615 return PTR_ERR(trans);
12617 btrfs_init_path(&path);
12618 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12624 ret = btrfs_del_item(trans, root, &path);
12626 btrfs_commit_transaction(trans, root);
12627 btrfs_release_path(&path);
12631 static int zero_log_tree(struct btrfs_root *root)
12633 struct btrfs_trans_handle *trans;
12636 trans = btrfs_start_transaction(root, 1);
12637 if (IS_ERR(trans)) {
12638 ret = PTR_ERR(trans);
12641 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12642 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12643 ret = btrfs_commit_transaction(trans, root);
12647 static int populate_csum(struct btrfs_trans_handle *trans,
12648 struct btrfs_root *csum_root, char *buf, u64 start,
12651 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12656 while (offset < len) {
12657 sectorsize = fs_info->sectorsize;
12658 ret = read_extent_data(fs_info, buf, start + offset,
12662 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12663 start + offset, buf, sectorsize);
12666 offset += sectorsize;
12671 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12672 struct btrfs_root *csum_root,
12673 struct btrfs_root *cur_root)
12675 struct btrfs_path path;
12676 struct btrfs_key key;
12677 struct extent_buffer *node;
12678 struct btrfs_file_extent_item *fi;
12685 buf = malloc(cur_root->fs_info->sectorsize);
12689 btrfs_init_path(&path);
12693 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12696 /* Iterate all regular file extents and fill its csum */
12698 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12700 if (key.type != BTRFS_EXTENT_DATA_KEY)
12702 node = path.nodes[0];
12703 slot = path.slots[0];
12704 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12705 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12707 start = btrfs_file_extent_disk_bytenr(node, fi);
12708 len = btrfs_file_extent_disk_num_bytes(node, fi);
12710 ret = populate_csum(trans, csum_root, buf, start, len);
12711 if (ret == -EEXIST)
12717 * TODO: if next leaf is corrupted, jump to nearest next valid
12720 ret = btrfs_next_item(cur_root, &path);
12730 btrfs_release_path(&path);
12735 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12736 struct btrfs_root *csum_root)
12738 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12739 struct btrfs_path path;
12740 struct btrfs_root *tree_root = fs_info->tree_root;
12741 struct btrfs_root *cur_root;
12742 struct extent_buffer *node;
12743 struct btrfs_key key;
12747 btrfs_init_path(&path);
12748 key.objectid = BTRFS_FS_TREE_OBJECTID;
12750 key.type = BTRFS_ROOT_ITEM_KEY;
12751 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12760 node = path.nodes[0];
12761 slot = path.slots[0];
12762 btrfs_item_key_to_cpu(node, &key, slot);
12763 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12765 if (key.type != BTRFS_ROOT_ITEM_KEY)
12767 if (!is_fstree(key.objectid))
12769 key.offset = (u64)-1;
12771 cur_root = btrfs_read_fs_root(fs_info, &key);
12772 if (IS_ERR(cur_root) || !cur_root) {
12773 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12777 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12782 ret = btrfs_next_item(tree_root, &path);
12792 btrfs_release_path(&path);
12796 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12797 struct btrfs_root *csum_root)
12799 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12800 struct btrfs_path path;
12801 struct btrfs_extent_item *ei;
12802 struct extent_buffer *leaf;
12804 struct btrfs_key key;
12807 btrfs_init_path(&path);
12809 key.type = BTRFS_EXTENT_ITEM_KEY;
12811 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12813 btrfs_release_path(&path);
12817 buf = malloc(csum_root->fs_info->sectorsize);
12819 btrfs_release_path(&path);
12824 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12825 ret = btrfs_next_leaf(extent_root, &path);
12833 leaf = path.nodes[0];
12835 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12836 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12841 ei = btrfs_item_ptr(leaf, path.slots[0],
12842 struct btrfs_extent_item);
12843 if (!(btrfs_extent_flags(leaf, ei) &
12844 BTRFS_EXTENT_FLAG_DATA)) {
12849 ret = populate_csum(trans, csum_root, buf, key.objectid,
12856 btrfs_release_path(&path);
12862 * Recalculate the csum and put it into the csum tree.
12864 * Extent tree init will wipe out all the extent info, so in that case, we
12865 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12866 * will use fs/subvol trees to init the csum tree.
12868 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12869 struct btrfs_root *csum_root,
12870 int search_fs_tree)
12872 if (search_fs_tree)
12873 return fill_csum_tree_from_fs(trans, csum_root);
12875 return fill_csum_tree_from_extent(trans, csum_root);
12878 static void free_roots_info_cache(void)
12880 if (!roots_info_cache)
12883 while (!cache_tree_empty(roots_info_cache)) {
12884 struct cache_extent *entry;
12885 struct root_item_info *rii;
12887 entry = first_cache_extent(roots_info_cache);
12890 remove_cache_extent(roots_info_cache, entry);
12891 rii = container_of(entry, struct root_item_info, cache_extent);
12895 free(roots_info_cache);
12896 roots_info_cache = NULL;
12899 static int build_roots_info_cache(struct btrfs_fs_info *info)
12902 struct btrfs_key key;
12903 struct extent_buffer *leaf;
12904 struct btrfs_path path;
12906 if (!roots_info_cache) {
12907 roots_info_cache = malloc(sizeof(*roots_info_cache));
12908 if (!roots_info_cache)
12910 cache_tree_init(roots_info_cache);
12913 btrfs_init_path(&path);
12915 key.type = BTRFS_EXTENT_ITEM_KEY;
12917 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12920 leaf = path.nodes[0];
12923 struct btrfs_key found_key;
12924 struct btrfs_extent_item *ei;
12925 struct btrfs_extent_inline_ref *iref;
12926 int slot = path.slots[0];
12931 struct cache_extent *entry;
12932 struct root_item_info *rii;
12934 if (slot >= btrfs_header_nritems(leaf)) {
12935 ret = btrfs_next_leaf(info->extent_root, &path);
12942 leaf = path.nodes[0];
12943 slot = path.slots[0];
12946 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12948 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12949 found_key.type != BTRFS_METADATA_ITEM_KEY)
12952 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12953 flags = btrfs_extent_flags(leaf, ei);
12955 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12956 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12959 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12960 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12961 level = found_key.offset;
12963 struct btrfs_tree_block_info *binfo;
12965 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12966 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12967 level = btrfs_tree_block_level(leaf, binfo);
12971 * For a root extent, it must be of the following type and the
12972 * first (and only one) iref in the item.
12974 type = btrfs_extent_inline_ref_type(leaf, iref);
12975 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12978 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12979 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12981 rii = malloc(sizeof(struct root_item_info));
12986 rii->cache_extent.start = root_id;
12987 rii->cache_extent.size = 1;
12988 rii->level = (u8)-1;
12989 entry = &rii->cache_extent;
12990 ret = insert_cache_extent(roots_info_cache, entry);
12993 rii = container_of(entry, struct root_item_info,
12997 ASSERT(rii->cache_extent.start == root_id);
12998 ASSERT(rii->cache_extent.size == 1);
13000 if (level > rii->level || rii->level == (u8)-1) {
13001 rii->level = level;
13002 rii->bytenr = found_key.objectid;
13003 rii->gen = btrfs_extent_generation(leaf, ei);
13004 rii->node_count = 1;
13005 } else if (level == rii->level) {
13013 btrfs_release_path(&path);
13018 static int maybe_repair_root_item(struct btrfs_path *path,
13019 const struct btrfs_key *root_key,
13020 const int read_only_mode)
13022 const u64 root_id = root_key->objectid;
13023 struct cache_extent *entry;
13024 struct root_item_info *rii;
13025 struct btrfs_root_item ri;
13026 unsigned long offset;
13028 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13031 "Error: could not find extent items for root %llu\n",
13032 root_key->objectid);
13036 rii = container_of(entry, struct root_item_info, cache_extent);
13037 ASSERT(rii->cache_extent.start == root_id);
13038 ASSERT(rii->cache_extent.size == 1);
13040 if (rii->node_count != 1) {
13042 "Error: could not find btree root extent for root %llu\n",
13047 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13048 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13050 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13051 btrfs_root_level(&ri) != rii->level ||
13052 btrfs_root_generation(&ri) != rii->gen) {
13055 * If we're in repair mode but our caller told us to not update
13056 * the root item, i.e. just check if it needs to be updated, don't
13057 * print this message, since the caller will call us again shortly
13058 * for the same root item without read only mode (the caller will
13059 * open a transaction first).
13061 if (!(read_only_mode && repair))
13063 "%sroot item for root %llu,"
13064 " current bytenr %llu, current gen %llu, current level %u,"
13065 " new bytenr %llu, new gen %llu, new level %u\n",
13066 (read_only_mode ? "" : "fixing "),
13068 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13069 btrfs_root_level(&ri),
13070 rii->bytenr, rii->gen, rii->level);
13072 if (btrfs_root_generation(&ri) > rii->gen) {
13074 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13075 root_id, btrfs_root_generation(&ri), rii->gen);
13079 if (!read_only_mode) {
13080 btrfs_set_root_bytenr(&ri, rii->bytenr);
13081 btrfs_set_root_level(&ri, rii->level);
13082 btrfs_set_root_generation(&ri, rii->gen);
13083 write_extent_buffer(path->nodes[0], &ri,
13084 offset, sizeof(ri));
13094 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13095 * caused read-only snapshots to be corrupted if they were created at a moment
13096 * when the source subvolume/snapshot had orphan items. The issue was that the
13097 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13098 * node instead of the post orphan cleanup root node.
13099 * So this function, and its callees, just detects and fixes those cases. Even
13100 * though the regression was for read-only snapshots, this function applies to
13101 * any snapshot/subvolume root.
13102 * This must be run before any other repair code - not doing it so, makes other
13103 * repair code delete or modify backrefs in the extent tree for example, which
13104 * will result in an inconsistent fs after repairing the root items.
13106 static int repair_root_items(struct btrfs_fs_info *info)
13108 struct btrfs_path path;
13109 struct btrfs_key key;
13110 struct extent_buffer *leaf;
13111 struct btrfs_trans_handle *trans = NULL;
13114 int need_trans = 0;
13116 btrfs_init_path(&path);
13118 ret = build_roots_info_cache(info);
13122 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13123 key.type = BTRFS_ROOT_ITEM_KEY;
13128 * Avoid opening and committing transactions if a leaf doesn't have
13129 * any root items that need to be fixed, so that we avoid rotating
13130 * backup roots unnecessarily.
13133 trans = btrfs_start_transaction(info->tree_root, 1);
13134 if (IS_ERR(trans)) {
13135 ret = PTR_ERR(trans);
13140 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13144 leaf = path.nodes[0];
13147 struct btrfs_key found_key;
13149 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13150 int no_more_keys = find_next_key(&path, &key);
13152 btrfs_release_path(&path);
13154 ret = btrfs_commit_transaction(trans,
13166 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13168 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13170 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13173 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13177 if (!trans && repair) {
13180 btrfs_release_path(&path);
13190 free_roots_info_cache();
13191 btrfs_release_path(&path);
13193 btrfs_commit_transaction(trans, info->tree_root);
13200 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13202 struct btrfs_trans_handle *trans;
13203 struct btrfs_block_group_cache *bg_cache;
13207 /* Clear all free space cache inodes and its extent data */
13209 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13212 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13215 current = bg_cache->key.objectid + bg_cache->key.offset;
13218 /* Don't forget to set cache_generation to -1 */
13219 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13220 if (IS_ERR(trans)) {
13221 error("failed to update super block cache generation");
13222 return PTR_ERR(trans);
13224 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13225 btrfs_commit_transaction(trans, fs_info->tree_root);
13230 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13235 if (clear_version == 1) {
13236 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13238 "free space cache v2 detected, use --clear-space-cache v2");
13242 printf("Clearing free space cache\n");
13243 ret = clear_free_space_cache(fs_info);
13245 error("failed to clear free space cache");
13248 printf("Free space cache cleared\n");
13250 } else if (clear_version == 2) {
13251 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13252 printf("no free space cache v2 to clear\n");
13256 printf("Clear free space cache v2\n");
13257 ret = btrfs_clear_free_space_tree(fs_info);
13259 error("failed to clear free space cache v2: %d", ret);
13262 printf("free space cache v2 cleared\n");
13269 const char * const cmd_check_usage[] = {
13270 "btrfs check [options] <device>",
13271 "Check structural integrity of a filesystem (unmounted).",
13272 "Check structural integrity of an unmounted filesystem. Verify internal",
13273 "trees' consistency and item connectivity. In the repair mode try to",
13274 "fix the problems found. ",
13275 "WARNING: the repair mode is considered dangerous",
13277 "-s|--super <superblock> use this superblock copy",
13278 "-b|--backup use the first valid backup root copy",
13279 "--force skip mount checks, repair is not possible",
13280 "--repair try to repair the filesystem",
13281 "--readonly run in read-only mode (default)",
13282 "--init-csum-tree create a new CRC tree",
13283 "--init-extent-tree create a new extent tree",
13284 "--mode <MODE> allows choice of memory/IO trade-offs",
13285 " where MODE is one of:",
13286 " original - read inodes and extents to memory (requires",
13287 " more memory, does less IO)",
13288 " lowmem - try to use less memory but read blocks again",
13290 "--check-data-csum verify checksums of data blocks",
13291 "-Q|--qgroup-report print a report on qgroup consistency",
13292 "-E|--subvol-extents <subvolid>",
13293 " print subvolume extents and sharing state",
13294 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13295 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13296 "-p|--progress indicate progress",
13297 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13301 int cmd_check(int argc, char **argv)
13303 struct cache_tree root_cache;
13304 struct btrfs_root *root;
13305 struct btrfs_fs_info *info;
13308 u64 tree_root_bytenr = 0;
13309 u64 chunk_root_bytenr = 0;
13310 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13314 int init_csum_tree = 0;
13316 int clear_space_cache = 0;
13317 int qgroup_report = 0;
13318 int qgroups_repaired = 0;
13319 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13324 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13325 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13326 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13327 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13328 GETOPT_VAL_FORCE };
13329 static const struct option long_options[] = {
13330 { "super", required_argument, NULL, 's' },
13331 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13332 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13333 { "init-csum-tree", no_argument, NULL,
13334 GETOPT_VAL_INIT_CSUM },
13335 { "init-extent-tree", no_argument, NULL,
13336 GETOPT_VAL_INIT_EXTENT },
13337 { "check-data-csum", no_argument, NULL,
13338 GETOPT_VAL_CHECK_CSUM },
13339 { "backup", no_argument, NULL, 'b' },
13340 { "subvol-extents", required_argument, NULL, 'E' },
13341 { "qgroup-report", no_argument, NULL, 'Q' },
13342 { "tree-root", required_argument, NULL, 'r' },
13343 { "chunk-root", required_argument, NULL,
13344 GETOPT_VAL_CHUNK_TREE },
13345 { "progress", no_argument, NULL, 'p' },
13346 { "mode", required_argument, NULL,
13348 { "clear-space-cache", required_argument, NULL,
13349 GETOPT_VAL_CLEAR_SPACE_CACHE},
13350 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13351 { NULL, 0, NULL, 0}
13354 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13358 case 'a': /* ignored */ break;
13360 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13363 num = arg_strtou64(optarg);
13364 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13366 "super mirror should be less than %d",
13367 BTRFS_SUPER_MIRROR_MAX);
13370 bytenr = btrfs_sb_offset(((int)num));
13371 printf("using SB copy %llu, bytenr %llu\n", num,
13372 (unsigned long long)bytenr);
13378 subvolid = arg_strtou64(optarg);
13381 tree_root_bytenr = arg_strtou64(optarg);
13383 case GETOPT_VAL_CHUNK_TREE:
13384 chunk_root_bytenr = arg_strtou64(optarg);
13387 ctx.progress_enabled = true;
13391 usage(cmd_check_usage);
13392 case GETOPT_VAL_REPAIR:
13393 printf("enabling repair mode\n");
13395 ctree_flags |= OPEN_CTREE_WRITES;
13397 case GETOPT_VAL_READONLY:
13400 case GETOPT_VAL_INIT_CSUM:
13401 printf("Creating a new CRC tree\n");
13402 init_csum_tree = 1;
13404 ctree_flags |= OPEN_CTREE_WRITES;
13406 case GETOPT_VAL_INIT_EXTENT:
13407 init_extent_tree = 1;
13408 ctree_flags |= (OPEN_CTREE_WRITES |
13409 OPEN_CTREE_NO_BLOCK_GROUPS);
13412 case GETOPT_VAL_CHECK_CSUM:
13413 check_data_csum = 1;
13415 case GETOPT_VAL_MODE:
13416 check_mode = parse_check_mode(optarg);
13417 if (check_mode == CHECK_MODE_UNKNOWN) {
13418 error("unknown mode: %s", optarg);
13422 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13423 if (strcmp(optarg, "v1") == 0) {
13424 clear_space_cache = 1;
13425 } else if (strcmp(optarg, "v2") == 0) {
13426 clear_space_cache = 2;
13427 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13430 "invalid argument to --clear-space-cache, must be v1 or v2");
13433 ctree_flags |= OPEN_CTREE_WRITES;
13435 case GETOPT_VAL_FORCE:
13441 if (check_argc_exact(argc - optind, 1))
13442 usage(cmd_check_usage);
13444 if (ctx.progress_enabled) {
13445 ctx.tp = TASK_NOTHING;
13446 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13449 /* This check is the only reason for --readonly to exist */
13450 if (readonly && repair) {
13451 error("repair options are not compatible with --readonly");
13456 * experimental and dangerous
13458 if (repair && check_mode == CHECK_MODE_LOWMEM)
13459 warning("low-memory mode repair support is only partial");
13462 cache_tree_init(&root_cache);
13464 ret = check_mounted(argv[optind]);
13467 error("could not check mount status: %s",
13473 "%s is currently mounted, use --force if you really intend to check the filesystem",
13481 error("repair and --force is not yet supported");
13488 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13492 "filesystem mounted, continuing because of --force");
13494 /* A block device is mounted in exclusive mode by kernel */
13495 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13498 /* only allow partial opening under repair mode */
13500 ctree_flags |= OPEN_CTREE_PARTIAL;
13502 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13503 chunk_root_bytenr, ctree_flags);
13505 error("cannot open file system");
13511 global_info = info;
13512 root = info->fs_root;
13513 uuid_unparse(info->super_copy->fsid, uuidbuf);
13515 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13518 * Check the bare minimum before starting anything else that could rely
13519 * on it, namely the tree roots, any local consistency checks
13521 if (!extent_buffer_uptodate(info->tree_root->node) ||
13522 !extent_buffer_uptodate(info->dev_root->node) ||
13523 !extent_buffer_uptodate(info->chunk_root->node)) {
13524 error("critical roots corrupted, unable to check the filesystem");
13530 if (clear_space_cache) {
13531 ret = do_clear_free_space_cache(info, clear_space_cache);
13537 * repair mode will force us to commit transaction which
13538 * will make us fail to load log tree when mounting.
13540 if (repair && btrfs_super_log_root(info->super_copy)) {
13541 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13547 ret = zero_log_tree(root);
13550 error("failed to zero log tree: %d", ret);
13555 if (qgroup_report) {
13556 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13558 ret = qgroup_verify_all(info);
13565 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13566 subvolid, argv[optind], uuidbuf);
13567 ret = print_extent_state(info, subvolid);
13572 if (init_extent_tree || init_csum_tree) {
13573 struct btrfs_trans_handle *trans;
13575 trans = btrfs_start_transaction(info->extent_root, 0);
13576 if (IS_ERR(trans)) {
13577 error("error starting transaction");
13578 ret = PTR_ERR(trans);
13583 if (init_extent_tree) {
13584 printf("Creating a new extent tree\n");
13585 ret = reinit_extent_tree(trans, info);
13591 if (init_csum_tree) {
13592 printf("Reinitialize checksum tree\n");
13593 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13595 error("checksum tree initialization failed: %d",
13602 ret = fill_csum_tree(trans, info->csum_root,
13606 error("checksum tree refilling failed: %d", ret);
13611 * Ok now we commit and run the normal fsck, which will add
13612 * extent entries for all of the items it finds.
13614 ret = btrfs_commit_transaction(trans, info->extent_root);
13619 if (!extent_buffer_uptodate(info->extent_root->node)) {
13620 error("critical: extent_root, unable to check the filesystem");
13625 if (!extent_buffer_uptodate(info->csum_root->node)) {
13626 error("critical: csum_root, unable to check the filesystem");
13632 ret = do_check_chunks_and_extents(info);
13636 "errors found in extent allocation tree or chunk allocation");
13638 ret = repair_root_items(info);
13641 error("failed to repair root items: %s", strerror(-ret));
13645 fprintf(stderr, "Fixed %d roots.\n", ret);
13647 } else if (ret > 0) {
13649 "Found %d roots with an outdated root item.\n",
13652 "Please run a filesystem check with the option --repair to fix them.\n");
13658 if (!ctx.progress_enabled) {
13659 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13660 fprintf(stderr, "checking free space tree\n");
13662 fprintf(stderr, "checking free space cache\n");
13664 ret = check_space_cache(root);
13667 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13668 error("errors found in free space tree");
13670 error("errors found in free space cache");
13675 * We used to have to have these hole extents in between our real
13676 * extents so if we don't have this flag set we need to make sure there
13677 * are no gaps in the file extents for inodes, otherwise we can just
13678 * ignore it when this happens.
13680 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13681 ret = do_check_fs_roots(info, &root_cache);
13684 error("errors found in fs roots");
13688 fprintf(stderr, "checking csums\n");
13689 ret = check_csums(root);
13692 error("errors found in csum tree");
13696 fprintf(stderr, "checking root refs\n");
13697 /* For low memory mode, check_fs_roots_v2 handles root refs */
13698 if (check_mode != CHECK_MODE_LOWMEM) {
13699 ret = check_root_refs(root, &root_cache);
13702 error("errors found in root refs");
13707 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13708 struct extent_buffer *eb;
13710 eb = list_first_entry(&root->fs_info->recow_ebs,
13711 struct extent_buffer, recow);
13712 list_del_init(&eb->recow);
13713 ret = recow_extent_buffer(root, eb);
13716 error("fails to fix transid errors");
13721 while (!list_empty(&delete_items)) {
13722 struct bad_item *bad;
13724 bad = list_first_entry(&delete_items, struct bad_item, list);
13725 list_del_init(&bad->list);
13727 ret = delete_bad_item(root, bad);
13733 if (info->quota_enabled) {
13734 fprintf(stderr, "checking quota groups\n");
13735 ret = qgroup_verify_all(info);
13738 error("failed to check quota groups");
13742 ret = repair_qgroups(info, &qgroups_repaired);
13745 error("failed to repair quota groups");
13751 if (!list_empty(&root->fs_info->recow_ebs)) {
13752 error("transid errors in file system");
13757 printf("found %llu bytes used, ",
13758 (unsigned long long)bytes_used);
13760 printf("error(s) found\n");
13762 printf("no error found\n");
13763 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13764 printf("total tree bytes: %llu\n",
13765 (unsigned long long)total_btree_bytes);
13766 printf("total fs tree bytes: %llu\n",
13767 (unsigned long long)total_fs_tree_bytes);
13768 printf("total extent tree bytes: %llu\n",
13769 (unsigned long long)total_extent_tree_bytes);
13770 printf("btree space waste bytes: %llu\n",
13771 (unsigned long long)btree_space_waste);
13772 printf("file data blocks allocated: %llu\n referenced %llu\n",
13773 (unsigned long long)data_bytes_allocated,
13774 (unsigned long long)data_bytes_referenced);
13776 free_qgroup_counts();
13777 free_root_recs_tree(&root_cache);
13781 if (ctx.progress_enabled)
13782 task_deinit(ctx.info);