2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 return container_of(back, struct data_backref, node);
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145 struct data_backref *back1 = to_data_backref(ext1);
146 struct data_backref *back2 = to_data_backref(ext2);
148 WARN_ON(!ext1->is_data);
149 WARN_ON(!ext2->is_data);
151 /* parent and root are a union, so this covers both */
152 if (back1->parent > back2->parent)
154 if (back1->parent < back2->parent)
157 /* This is a full backref and the parents match. */
158 if (back1->node.full_backref)
161 if (back1->owner > back2->owner)
163 if (back1->owner < back2->owner)
166 if (back1->offset > back2->offset)
168 if (back1->offset < back2->offset)
171 if (back1->found_ref && back2->found_ref) {
172 if (back1->disk_bytenr > back2->disk_bytenr)
174 if (back1->disk_bytenr < back2->disk_bytenr)
177 if (back1->bytes > back2->bytes)
179 if (back1->bytes < back2->bytes)
187 * Much like data_backref, just removed the undetermined members
188 * and change it to use list_head.
189 * During extent scan, it is stored in root->orphan_data_extent.
190 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192 struct orphan_data_extent {
193 struct list_head list;
201 struct tree_backref {
202 struct extent_backref node;
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 return container_of(back, struct tree_backref, node);
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218 struct tree_backref *back1 = to_tree_backref(ext1);
219 struct tree_backref *back2 = to_tree_backref(ext2);
221 WARN_ON(ext1->is_data);
222 WARN_ON(ext2->is_data);
224 /* parent and root are a union, so this covers both */
225 if (back1->parent > back2->parent)
227 if (back1->parent < back2->parent)
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238 if (ext1->is_data > ext2->is_data)
241 if (ext1->is_data < ext2->is_data)
244 if (ext1->full_backref > ext2->full_backref)
246 if (ext1->full_backref < ext2->full_backref)
250 return compare_data_backref(node1, node2);
252 return compare_tree_backref(node1, node2);
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
258 struct extent_record {
259 struct list_head backrefs;
260 struct list_head dups;
261 struct rb_root backref_tree;
262 struct list_head list;
263 struct cache_extent cache;
264 struct btrfs_disk_key parent_key;
269 u64 extent_item_refs;
271 u64 parent_generation;
275 unsigned int flag_block_full_backref:2;
276 unsigned int found_rec:1;
277 unsigned int content_checked:1;
278 unsigned int owner_ref_checked:1;
279 unsigned int is_root:1;
280 unsigned int metadata:1;
281 unsigned int bad_full_backref:1;
282 unsigned int crossing_stripes:1;
283 unsigned int wrong_chunk_type:1;
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 return container_of(entry, struct extent_record, list);
291 struct inode_backref {
292 struct list_head list;
293 unsigned int found_dir_item:1;
294 unsigned int found_dir_index:1;
295 unsigned int found_inode_ref:1;
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 return list_entry(entry, struct inode_backref, list);
310 struct root_item_record {
311 struct list_head list;
317 struct btrfs_key drop_key;
320 #define REF_ERR_NO_DIR_ITEM (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX (1 << 1)
322 #define REF_ERR_NO_INODE_REF (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
325 #define REF_ERR_DUP_INODE_REF (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
334 struct file_extent_hole {
340 struct inode_record {
341 struct list_head backrefs;
342 unsigned int checked:1;
343 unsigned int merging:1;
344 unsigned int found_inode_item:1;
345 unsigned int found_dir_item:1;
346 unsigned int found_file_extent:1;
347 unsigned int found_csum_item:1;
348 unsigned int some_csum_missing:1;
349 unsigned int nodatasum:1;
362 struct rb_root holes;
363 struct list_head orphan_extents;
368 #define I_ERR_NO_INODE_ITEM (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
384 struct root_backref {
385 struct list_head list;
386 unsigned int found_dir_item:1;
387 unsigned int found_dir_index:1;
388 unsigned int found_back_ref:1;
389 unsigned int found_forward_ref:1;
390 unsigned int reachable:1;
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 return list_entry(entry, struct root_backref, list);
405 struct list_head backrefs;
406 struct cache_extent cache;
407 unsigned int found_root_item:1;
413 struct cache_extent cache;
418 struct cache_extent cache;
419 struct cache_tree root_cache;
420 struct cache_tree inode_cache;
421 struct inode_record *current;
430 struct walk_control {
431 struct cache_tree shared;
432 struct shared_node *nodes[BTRFS_MAX_LEVEL];
438 struct btrfs_key key;
440 struct list_head list;
443 struct extent_entry {
448 struct list_head list;
451 struct root_item_info {
452 /* level of the root */
454 /* number of nodes at this level, must be 1 for a root */
458 struct cache_extent cache_extent;
462 * Error bit for low memory mode check.
464 * Currently no caller cares about it yet. Just internal use for error
467 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH (1 << 8)
478 static void *print_status_check(void *p)
480 struct task_ctx *priv = p;
481 const char work_indicator[] = { '.', 'o', 'O', 'o' };
483 static char *task_position_string[] = {
485 "checking free space cache",
489 task_period_start(priv->info, 1000 /* 1s */);
491 if (priv->tp == TASK_NOTHING)
495 printf("%s [%c]\r", task_position_string[priv->tp],
496 work_indicator[count % 4]);
499 task_period_wait(priv->info);
504 static int print_status_return(void *p)
512 static enum btrfs_check_mode parse_check_mode(const char *str)
514 if (strcmp(str, "lowmem") == 0)
515 return CHECK_MODE_LOWMEM;
516 if (strcmp(str, "orig") == 0)
517 return CHECK_MODE_ORIGINAL;
518 if (strcmp(str, "original") == 0)
519 return CHECK_MODE_ORIGINAL;
521 return CHECK_MODE_UNKNOWN;
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
527 struct file_extent_hole *hole;
529 if (RB_EMPTY_ROOT(holes))
532 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 struct file_extent_hole *hole1;
539 struct file_extent_hole *hole2;
541 hole1 = rb_entry(node1, struct file_extent_hole, node);
542 hole2 = rb_entry(node2, struct file_extent_hole, node);
544 if (hole1->start > hole2->start)
546 if (hole1->start < hole2->start)
548 /* Now hole1->start == hole2->start */
549 if (hole1->len >= hole2->len)
551 * Hole 1 will be merge center
552 * Same hole will be merged later
555 /* Hole 2 will be merge center */
560 * Add a hole to the record
562 * This will do hole merge for copy_file_extent_holes(),
563 * which will ensure there won't be continuous holes.
565 static int add_file_extent_hole(struct rb_root *holes,
568 struct file_extent_hole *hole;
569 struct file_extent_hole *prev = NULL;
570 struct file_extent_hole *next = NULL;
572 hole = malloc(sizeof(*hole));
577 /* Since compare will not return 0, no -EEXIST will happen */
578 rb_insert(holes, &hole->node, compare_hole);
580 /* simple merge with previous hole */
581 if (rb_prev(&hole->node))
582 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584 if (prev && prev->start + prev->len >= hole->start) {
585 hole->len = hole->start + hole->len - prev->start;
586 hole->start = prev->start;
587 rb_erase(&prev->node, holes);
592 /* iterate merge with next holes */
594 if (!rb_next(&hole->node))
596 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598 if (hole->start + hole->len >= next->start) {
599 if (hole->start + hole->len <= next->start + next->len)
600 hole->len = next->start + next->len -
602 rb_erase(&next->node, holes);
611 static int compare_hole_range(struct rb_node *node, void *data)
613 struct file_extent_hole *hole;
616 hole = (struct file_extent_hole *)data;
619 hole = rb_entry(node, struct file_extent_hole, node);
620 if (start < hole->start)
622 if (start >= hole->start && start < hole->start + hole->len)
628 * Delete a hole in the record
630 * This will do the hole split and is much restrict than add.
632 static int del_file_extent_hole(struct rb_root *holes,
635 struct file_extent_hole *hole;
636 struct file_extent_hole tmp;
641 struct rb_node *node;
648 node = rb_search(holes, &tmp, compare_hole_range, NULL);
651 hole = rb_entry(node, struct file_extent_hole, node);
652 if (start + len > hole->start + hole->len)
656 * Now there will be no overlap, delete the hole and re-add the
657 * split(s) if they exists.
659 if (start > hole->start) {
660 prev_start = hole->start;
661 prev_len = start - hole->start;
664 if (hole->start + hole->len > start + len) {
665 next_start = start + len;
666 next_len = hole->start + hole->len - start - len;
669 rb_erase(node, holes);
672 ret = add_file_extent_hole(holes, prev_start, prev_len);
677 ret = add_file_extent_hole(holes, next_start, next_len);
684 static int copy_file_extent_holes(struct rb_root *dst,
687 struct file_extent_hole *hole;
688 struct rb_node *node;
691 node = rb_first(src);
693 hole = rb_entry(node, struct file_extent_hole, node);
694 ret = add_file_extent_hole(dst, hole->start, hole->len);
697 node = rb_next(node);
702 static void free_file_extent_holes(struct rb_root *holes)
704 struct rb_node *node;
705 struct file_extent_hole *hole;
707 node = rb_first(holes);
709 hole = rb_entry(node, struct file_extent_hole, node);
710 rb_erase(node, holes);
712 node = rb_first(holes);
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719 struct btrfs_root *root)
721 if (root->last_trans != trans->transid) {
722 root->track_dirty = 1;
723 root->last_trans = trans->transid;
724 root->commit_root = root->node;
725 extent_buffer_get(root->node);
729 static u8 imode_to_type(u32 imode)
732 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
734 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
735 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
736 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
737 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
738 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
739 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
742 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 struct device_record *rec1;
749 struct device_record *rec2;
751 rec1 = rb_entry(node1, struct device_record, node);
752 rec2 = rb_entry(node2, struct device_record, node);
753 if (rec1->devid > rec2->devid)
755 else if (rec1->devid < rec2->devid)
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 struct inode_record *rec;
764 struct inode_backref *backref;
765 struct inode_backref *orig;
766 struct inode_backref *tmp;
767 struct orphan_data_extent *src_orphan;
768 struct orphan_data_extent *dst_orphan;
773 rec = malloc(sizeof(*rec));
775 return ERR_PTR(-ENOMEM);
776 memcpy(rec, orig_rec, sizeof(*rec));
778 INIT_LIST_HEAD(&rec->backrefs);
779 INIT_LIST_HEAD(&rec->orphan_extents);
780 rec->holes = RB_ROOT;
782 list_for_each_entry(orig, &orig_rec->backrefs, list) {
783 size = sizeof(*orig) + orig->namelen + 1;
784 backref = malloc(size);
789 memcpy(backref, orig, size);
790 list_add_tail(&backref->list, &rec->backrefs);
792 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793 dst_orphan = malloc(sizeof(*dst_orphan));
798 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
808 rb = rb_first(&rec->holes);
810 struct file_extent_hole *hole;
812 hole = rb_entry(rb, struct file_extent_hole, node);
818 if (!list_empty(&rec->backrefs))
819 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820 list_del(&orig->list);
824 if (!list_empty(&rec->orphan_extents))
825 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826 list_del(&orig->list);
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
838 struct orphan_data_extent *orphan;
840 if (list_empty(orphan_extents))
842 printf("The following data extent is lost in tree %llu:\n",
844 list_for_each_entry(orphan, orphan_extents, list) {
845 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846 orphan->objectid, orphan->offset, orphan->disk_bytenr,
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 u64 root_objectid = root->root_key.objectid;
854 int errors = rec->errors;
858 /* reloc root errors, we print its corresponding fs root objectid*/
859 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860 root_objectid = root->root_key.offset;
861 fprintf(stderr, "reloc");
863 fprintf(stderr, "root %llu inode %llu errors %x",
864 (unsigned long long) root_objectid,
865 (unsigned long long) rec->ino, rec->errors);
867 if (errors & I_ERR_NO_INODE_ITEM)
868 fprintf(stderr, ", no inode item");
869 if (errors & I_ERR_NO_ORPHAN_ITEM)
870 fprintf(stderr, ", no orphan item");
871 if (errors & I_ERR_DUP_INODE_ITEM)
872 fprintf(stderr, ", dup inode item");
873 if (errors & I_ERR_DUP_DIR_INDEX)
874 fprintf(stderr, ", dup dir index");
875 if (errors & I_ERR_ODD_DIR_ITEM)
876 fprintf(stderr, ", odd dir item");
877 if (errors & I_ERR_ODD_FILE_EXTENT)
878 fprintf(stderr, ", odd file extent");
879 if (errors & I_ERR_BAD_FILE_EXTENT)
880 fprintf(stderr, ", bad file extent");
881 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882 fprintf(stderr, ", file extent overlap");
883 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884 fprintf(stderr, ", file extent discount");
885 if (errors & I_ERR_DIR_ISIZE_WRONG)
886 fprintf(stderr, ", dir isize wrong");
887 if (errors & I_ERR_FILE_NBYTES_WRONG)
888 fprintf(stderr, ", nbytes wrong");
889 if (errors & I_ERR_ODD_CSUM_ITEM)
890 fprintf(stderr, ", odd csum item");
891 if (errors & I_ERR_SOME_CSUM_MISSING)
892 fprintf(stderr, ", some csum missing");
893 if (errors & I_ERR_LINK_COUNT_WRONG)
894 fprintf(stderr, ", link count wrong");
895 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896 fprintf(stderr, ", orphan file extent");
897 fprintf(stderr, "\n");
898 /* Print the orphan extents if needed */
899 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902 /* Print the holes if needed */
903 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904 struct file_extent_hole *hole;
905 struct rb_node *node;
908 node = rb_first(&rec->holes);
909 fprintf(stderr, "Found file extent holes:\n");
912 hole = rb_entry(node, struct file_extent_hole, node);
913 fprintf(stderr, "\tstart: %llu, len: %llu\n",
914 hole->start, hole->len);
915 node = rb_next(node);
918 fprintf(stderr, "\tstart: 0, len: %llu\n",
920 root->fs_info->sectorsize));
924 static void print_ref_error(int errors)
926 if (errors & REF_ERR_NO_DIR_ITEM)
927 fprintf(stderr, ", no dir item");
928 if (errors & REF_ERR_NO_DIR_INDEX)
929 fprintf(stderr, ", no dir index");
930 if (errors & REF_ERR_NO_INODE_REF)
931 fprintf(stderr, ", no inode ref");
932 if (errors & REF_ERR_DUP_DIR_ITEM)
933 fprintf(stderr, ", dup dir item");
934 if (errors & REF_ERR_DUP_DIR_INDEX)
935 fprintf(stderr, ", dup dir index");
936 if (errors & REF_ERR_DUP_INODE_REF)
937 fprintf(stderr, ", dup inode ref");
938 if (errors & REF_ERR_INDEX_UNMATCH)
939 fprintf(stderr, ", index mismatch");
940 if (errors & REF_ERR_FILETYPE_UNMATCH)
941 fprintf(stderr, ", filetype mismatch");
942 if (errors & REF_ERR_NAME_TOO_LONG)
943 fprintf(stderr, ", name too long");
944 if (errors & REF_ERR_NO_ROOT_REF)
945 fprintf(stderr, ", no root ref");
946 if (errors & REF_ERR_NO_ROOT_BACKREF)
947 fprintf(stderr, ", no root backref");
948 if (errors & REF_ERR_DUP_ROOT_REF)
949 fprintf(stderr, ", dup root ref");
950 if (errors & REF_ERR_DUP_ROOT_BACKREF)
951 fprintf(stderr, ", dup root backref");
952 fprintf(stderr, "\n");
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958 struct ptr_node *node;
959 struct cache_extent *cache;
960 struct inode_record *rec = NULL;
963 cache = lookup_cache_extent(inode_cache, ino, 1);
965 node = container_of(cache, struct ptr_node, cache);
967 if (mod && rec->refs > 1) {
968 node->data = clone_inode_rec(rec);
969 if (IS_ERR(node->data))
975 rec = calloc(1, sizeof(*rec));
977 return ERR_PTR(-ENOMEM);
979 rec->extent_start = (u64)-1;
981 INIT_LIST_HEAD(&rec->backrefs);
982 INIT_LIST_HEAD(&rec->orphan_extents);
983 rec->holes = RB_ROOT;
985 node = malloc(sizeof(*node));
988 return ERR_PTR(-ENOMEM);
990 node->cache.start = ino;
991 node->cache.size = 1;
994 if (ino == BTRFS_FREE_INO_OBJECTID)
997 ret = insert_cache_extent(inode_cache, &node->cache);
999 return ERR_PTR(-EEXIST);
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 struct orphan_data_extent *orphan;
1008 while (!list_empty(orphan_extents)) {
1009 orphan = list_entry(orphan_extents->next,
1010 struct orphan_data_extent, list);
1011 list_del(&orphan->list);
1016 static void free_inode_rec(struct inode_record *rec)
1018 struct inode_backref *backref;
1020 if (--rec->refs > 0)
1023 while (!list_empty(&rec->backrefs)) {
1024 backref = to_inode_backref(rec->backrefs.next);
1025 list_del(&backref->list);
1028 free_orphan_data_extents(&rec->orphan_extents);
1029 free_file_extent_holes(&rec->holes);
1033 static int can_free_inode_rec(struct inode_record *rec)
1035 if (!rec->errors && rec->checked && rec->found_inode_item &&
1036 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042 struct inode_record *rec)
1044 struct cache_extent *cache;
1045 struct inode_backref *tmp, *backref;
1046 struct ptr_node *node;
1049 if (!rec->found_inode_item)
1052 filetype = imode_to_type(rec->imode);
1053 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054 if (backref->found_dir_item && backref->found_dir_index) {
1055 if (backref->filetype != filetype)
1056 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057 if (!backref->errors && backref->found_inode_ref &&
1058 rec->nlink == rec->found_link) {
1059 list_del(&backref->list);
1065 if (!rec->checked || rec->merging)
1068 if (S_ISDIR(rec->imode)) {
1069 if (rec->found_size != rec->isize)
1070 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071 if (rec->found_file_extent)
1072 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074 if (rec->found_dir_item)
1075 rec->errors |= I_ERR_ODD_DIR_ITEM;
1076 if (rec->found_size != rec->nbytes)
1077 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078 if (rec->nlink > 0 && !no_holes &&
1079 (rec->extent_end < rec->isize ||
1080 first_extent_gap(&rec->holes) < rec->isize))
1081 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1084 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085 if (rec->found_csum_item && rec->nodatasum)
1086 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087 if (rec->some_csum_missing && !rec->nodatasum)
1088 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1091 BUG_ON(rec->refs != 1);
1092 if (can_free_inode_rec(rec)) {
1093 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094 node = container_of(cache, struct ptr_node, cache);
1095 BUG_ON(node->data != rec);
1096 remove_cache_extent(inode_cache, &node->cache);
1098 free_inode_rec(rec);
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 struct btrfs_path path;
1105 struct btrfs_key key;
1108 key.objectid = BTRFS_ORPHAN_OBJECTID;
1109 key.type = BTRFS_ORPHAN_ITEM_KEY;
1112 btrfs_init_path(&path);
1113 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114 btrfs_release_path(&path);
1120 static int process_inode_item(struct extent_buffer *eb,
1121 int slot, struct btrfs_key *key,
1122 struct shared_node *active_node)
1124 struct inode_record *rec;
1125 struct btrfs_inode_item *item;
1127 rec = active_node->current;
1128 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129 if (rec->found_inode_item) {
1130 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134 rec->nlink = btrfs_inode_nlink(eb, item);
1135 rec->isize = btrfs_inode_size(eb, item);
1136 rec->nbytes = btrfs_inode_nbytes(eb, item);
1137 rec->imode = btrfs_inode_mode(eb, item);
1138 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140 rec->found_inode_item = 1;
1141 if (rec->nlink == 0)
1142 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143 maybe_free_inode_rec(&active_node->inode_cache, rec);
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149 int namelen, u64 dir)
1151 struct inode_backref *backref;
1153 list_for_each_entry(backref, &rec->backrefs, list) {
1154 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156 if (backref->dir != dir || backref->namelen != namelen)
1158 if (memcmp(name, backref->name, namelen))
1163 backref = malloc(sizeof(*backref) + namelen + 1);
1166 memset(backref, 0, sizeof(*backref));
1168 backref->namelen = namelen;
1169 memcpy(backref->name, name, namelen);
1170 backref->name[namelen] = '\0';
1171 list_add_tail(&backref->list, &rec->backrefs);
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176 u64 ino, u64 dir, u64 index,
1177 const char *name, int namelen,
1178 u8 filetype, u8 itemtype, int errors)
1180 struct inode_record *rec;
1181 struct inode_backref *backref;
1183 rec = get_inode_rec(inode_cache, ino, 1);
1184 BUG_ON(IS_ERR(rec));
1185 backref = get_inode_backref(rec, name, namelen, dir);
1188 backref->errors |= errors;
1189 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190 if (backref->found_dir_index)
1191 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192 if (backref->found_inode_ref && backref->index != index)
1193 backref->errors |= REF_ERR_INDEX_UNMATCH;
1194 if (backref->found_dir_item && backref->filetype != filetype)
1195 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197 backref->index = index;
1198 backref->filetype = filetype;
1199 backref->found_dir_index = 1;
1200 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202 if (backref->found_dir_item)
1203 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204 if (backref->found_dir_index && backref->filetype != filetype)
1205 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207 backref->filetype = filetype;
1208 backref->found_dir_item = 1;
1209 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211 if (backref->found_inode_ref)
1212 backref->errors |= REF_ERR_DUP_INODE_REF;
1213 if (backref->found_dir_index && backref->index != index)
1214 backref->errors |= REF_ERR_INDEX_UNMATCH;
1216 backref->index = index;
1218 backref->ref_type = itemtype;
1219 backref->found_inode_ref = 1;
1224 maybe_free_inode_rec(inode_cache, rec);
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229 struct cache_tree *dst_cache)
1231 struct inode_backref *backref;
1236 list_for_each_entry(backref, &src->backrefs, list) {
1237 if (backref->found_dir_index) {
1238 add_inode_backref(dst_cache, dst->ino, backref->dir,
1239 backref->index, backref->name,
1240 backref->namelen, backref->filetype,
1241 BTRFS_DIR_INDEX_KEY, backref->errors);
1243 if (backref->found_dir_item) {
1245 add_inode_backref(dst_cache, dst->ino,
1246 backref->dir, 0, backref->name,
1247 backref->namelen, backref->filetype,
1248 BTRFS_DIR_ITEM_KEY, backref->errors);
1250 if (backref->found_inode_ref) {
1251 add_inode_backref(dst_cache, dst->ino,
1252 backref->dir, backref->index,
1253 backref->name, backref->namelen, 0,
1254 backref->ref_type, backref->errors);
1258 if (src->found_dir_item)
1259 dst->found_dir_item = 1;
1260 if (src->found_file_extent)
1261 dst->found_file_extent = 1;
1262 if (src->found_csum_item)
1263 dst->found_csum_item = 1;
1264 if (src->some_csum_missing)
1265 dst->some_csum_missing = 1;
1266 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1272 BUG_ON(src->found_link < dir_count);
1273 dst->found_link += src->found_link - dir_count;
1274 dst->found_size += src->found_size;
1275 if (src->extent_start != (u64)-1) {
1276 if (dst->extent_start == (u64)-1) {
1277 dst->extent_start = src->extent_start;
1278 dst->extent_end = src->extent_end;
1280 if (dst->extent_end > src->extent_start)
1281 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282 else if (dst->extent_end < src->extent_start) {
1283 ret = add_file_extent_hole(&dst->holes,
1285 src->extent_start - dst->extent_end);
1287 if (dst->extent_end < src->extent_end)
1288 dst->extent_end = src->extent_end;
1292 dst->errors |= src->errors;
1293 if (src->found_inode_item) {
1294 if (!dst->found_inode_item) {
1295 dst->nlink = src->nlink;
1296 dst->isize = src->isize;
1297 dst->nbytes = src->nbytes;
1298 dst->imode = src->imode;
1299 dst->nodatasum = src->nodatasum;
1300 dst->found_inode_item = 1;
1302 dst->errors |= I_ERR_DUP_INODE_ITEM;
1310 static int splice_shared_node(struct shared_node *src_node,
1311 struct shared_node *dst_node)
1313 struct cache_extent *cache;
1314 struct ptr_node *node, *ins;
1315 struct cache_tree *src, *dst;
1316 struct inode_record *rec, *conflict;
1317 u64 current_ino = 0;
1321 if (--src_node->refs == 0)
1323 if (src_node->current)
1324 current_ino = src_node->current->ino;
1326 src = &src_node->root_cache;
1327 dst = &dst_node->root_cache;
1329 cache = search_cache_extent(src, 0);
1331 node = container_of(cache, struct ptr_node, cache);
1333 cache = next_cache_extent(cache);
1336 remove_cache_extent(src, &node->cache);
1339 ins = malloc(sizeof(*ins));
1341 ins->cache.start = node->cache.start;
1342 ins->cache.size = node->cache.size;
1346 ret = insert_cache_extent(dst, &ins->cache);
1347 if (ret == -EEXIST) {
1348 conflict = get_inode_rec(dst, rec->ino, 1);
1349 BUG_ON(IS_ERR(conflict));
1350 merge_inode_recs(rec, conflict, dst);
1352 conflict->checked = 1;
1353 if (dst_node->current == conflict)
1354 dst_node->current = NULL;
1356 maybe_free_inode_rec(dst, conflict);
1357 free_inode_rec(rec);
1364 if (src == &src_node->root_cache) {
1365 src = &src_node->inode_cache;
1366 dst = &dst_node->inode_cache;
1370 if (current_ino > 0 && (!dst_node->current ||
1371 current_ino > dst_node->current->ino)) {
1372 if (dst_node->current) {
1373 dst_node->current->checked = 1;
1374 maybe_free_inode_rec(dst, dst_node->current);
1376 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377 BUG_ON(IS_ERR(dst_node->current));
1382 static void free_inode_ptr(struct cache_extent *cache)
1384 struct ptr_node *node;
1385 struct inode_record *rec;
1387 node = container_of(cache, struct ptr_node, cache);
1389 free_inode_rec(rec);
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398 struct cache_extent *cache;
1399 struct shared_node *node;
1401 cache = lookup_cache_extent(shared, bytenr, 1);
1403 node = container_of(cache, struct shared_node, cache);
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1412 struct shared_node *node;
1414 node = calloc(1, sizeof(*node));
1417 node->cache.start = bytenr;
1418 node->cache.size = 1;
1419 cache_tree_init(&node->root_cache);
1420 cache_tree_init(&node->inode_cache);
1423 ret = insert_cache_extent(shared, &node->cache);
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429 struct walk_control *wc, int level)
1431 struct shared_node *node;
1432 struct shared_node *dest;
1435 if (level == wc->active_node)
1438 BUG_ON(wc->active_node <= level);
1439 node = find_shared_node(&wc->shared, bytenr);
1441 ret = add_shared_node(&wc->shared, bytenr, refs);
1443 node = find_shared_node(&wc->shared, bytenr);
1444 wc->nodes[level] = node;
1445 wc->active_node = level;
1449 if (wc->root_level == wc->active_node &&
1450 btrfs_root_refs(&root->root_item) == 0) {
1451 if (--node->refs == 0) {
1452 free_inode_recs_tree(&node->root_cache);
1453 free_inode_recs_tree(&node->inode_cache);
1454 remove_cache_extent(&wc->shared, &node->cache);
1460 dest = wc->nodes[wc->active_node];
1461 splice_shared_node(node, dest);
1462 if (node->refs == 0) {
1463 remove_cache_extent(&wc->shared, &node->cache);
1469 static int leave_shared_node(struct btrfs_root *root,
1470 struct walk_control *wc, int level)
1472 struct shared_node *node;
1473 struct shared_node *dest;
1476 if (level == wc->root_level)
1479 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1483 BUG_ON(i >= BTRFS_MAX_LEVEL);
1485 node = wc->nodes[wc->active_node];
1486 wc->nodes[wc->active_node] = NULL;
1487 wc->active_node = i;
1489 dest = wc->nodes[wc->active_node];
1490 if (wc->active_node < wc->root_level ||
1491 btrfs_root_refs(&root->root_item) > 0) {
1492 BUG_ON(node->refs <= 1);
1493 splice_shared_node(node, dest);
1495 BUG_ON(node->refs < 2);
1504 * 1 - if the root with id child_root_id is a child of root parent_root_id
1505 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1506 * has other root(s) as parent(s)
1507 * 2 - if the root child_root_id doesn't have any parent roots
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512 struct btrfs_path path;
1513 struct btrfs_key key;
1514 struct extent_buffer *leaf;
1518 btrfs_init_path(&path);
1520 key.objectid = parent_root_id;
1521 key.type = BTRFS_ROOT_REF_KEY;
1522 key.offset = child_root_id;
1523 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1527 btrfs_release_path(&path);
1531 key.objectid = child_root_id;
1532 key.type = BTRFS_ROOT_BACKREF_KEY;
1534 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1540 leaf = path.nodes[0];
1541 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545 leaf = path.nodes[0];
1548 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549 if (key.objectid != child_root_id ||
1550 key.type != BTRFS_ROOT_BACKREF_KEY)
1555 if (key.offset == parent_root_id) {
1556 btrfs_release_path(&path);
1563 btrfs_release_path(&path);
1566 return has_parent ? 0 : 2;
1569 static int process_dir_item(struct extent_buffer *eb,
1570 int slot, struct btrfs_key *key,
1571 struct shared_node *active_node)
1581 struct btrfs_dir_item *di;
1582 struct inode_record *rec;
1583 struct cache_tree *root_cache;
1584 struct cache_tree *inode_cache;
1585 struct btrfs_key location;
1586 char namebuf[BTRFS_NAME_LEN];
1588 root_cache = &active_node->root_cache;
1589 inode_cache = &active_node->inode_cache;
1590 rec = active_node->current;
1591 rec->found_dir_item = 1;
1593 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594 total = btrfs_item_size_nr(eb, slot);
1595 while (cur < total) {
1597 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598 name_len = btrfs_dir_name_len(eb, di);
1599 data_len = btrfs_dir_data_len(eb, di);
1600 filetype = btrfs_dir_type(eb, di);
1602 rec->found_size += name_len;
1603 if (cur + sizeof(*di) + name_len > total ||
1604 name_len > BTRFS_NAME_LEN) {
1605 error = REF_ERR_NAME_TOO_LONG;
1607 if (cur + sizeof(*di) > total)
1609 len = min_t(u32, total - cur - sizeof(*di),
1616 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619 key->offset != btrfs_name_hash(namebuf, len)) {
1620 rec->errors |= I_ERR_ODD_DIR_ITEM;
1621 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622 key->objectid, key->offset, namebuf, len, filetype,
1623 key->offset, btrfs_name_hash(namebuf, len));
1626 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627 add_inode_backref(inode_cache, location.objectid,
1628 key->objectid, key->offset, namebuf,
1629 len, filetype, key->type, error);
1630 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631 add_inode_backref(root_cache, location.objectid,
1632 key->objectid, key->offset,
1633 namebuf, len, filetype,
1636 fprintf(stderr, "invalid location in dir item %u\n",
1638 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639 key->objectid, key->offset, namebuf,
1640 len, filetype, key->type, error);
1643 len = sizeof(*di) + name_len + data_len;
1644 di = (struct btrfs_dir_item *)((char *)di + len);
1647 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648 rec->errors |= I_ERR_DUP_DIR_INDEX;
1653 static int process_inode_ref(struct extent_buffer *eb,
1654 int slot, struct btrfs_key *key,
1655 struct shared_node *active_node)
1663 struct cache_tree *inode_cache;
1664 struct btrfs_inode_ref *ref;
1665 char namebuf[BTRFS_NAME_LEN];
1667 inode_cache = &active_node->inode_cache;
1669 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670 total = btrfs_item_size_nr(eb, slot);
1671 while (cur < total) {
1672 name_len = btrfs_inode_ref_name_len(eb, ref);
1673 index = btrfs_inode_ref_index(eb, ref);
1675 /* inode_ref + namelen should not cross item boundary */
1676 if (cur + sizeof(*ref) + name_len > total ||
1677 name_len > BTRFS_NAME_LEN) {
1678 if (total < cur + sizeof(*ref))
1681 /* Still try to read out the remaining part */
1682 len = min_t(u32, total - cur - sizeof(*ref),
1684 error = REF_ERR_NAME_TOO_LONG;
1690 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691 add_inode_backref(inode_cache, key->objectid, key->offset,
1692 index, namebuf, len, 0, key->type, error);
1694 len = sizeof(*ref) + name_len;
1695 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1701 static int process_inode_extref(struct extent_buffer *eb,
1702 int slot, struct btrfs_key *key,
1703 struct shared_node *active_node)
1712 struct cache_tree *inode_cache;
1713 struct btrfs_inode_extref *extref;
1714 char namebuf[BTRFS_NAME_LEN];
1716 inode_cache = &active_node->inode_cache;
1718 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719 total = btrfs_item_size_nr(eb, slot);
1720 while (cur < total) {
1721 name_len = btrfs_inode_extref_name_len(eb, extref);
1722 index = btrfs_inode_extref_index(eb, extref);
1723 parent = btrfs_inode_extref_parent(eb, extref);
1724 if (name_len <= BTRFS_NAME_LEN) {
1728 len = BTRFS_NAME_LEN;
1729 error = REF_ERR_NAME_TOO_LONG;
1731 read_extent_buffer(eb, namebuf,
1732 (unsigned long)(extref + 1), len);
1733 add_inode_backref(inode_cache, key->objectid, parent,
1734 index, namebuf, len, 0, key->type, error);
1736 len = sizeof(*extref) + name_len;
1737 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745 u64 len, u64 *found)
1747 struct btrfs_key key;
1748 struct btrfs_path path;
1749 struct extent_buffer *leaf;
1754 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756 btrfs_init_path(&path);
1758 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760 key.type = BTRFS_EXTENT_CSUM_KEY;
1762 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1766 if (ret > 0 && path.slots[0] > 0) {
1767 leaf = path.nodes[0];
1768 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770 key.type == BTRFS_EXTENT_CSUM_KEY)
1775 leaf = path.nodes[0];
1776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1782 leaf = path.nodes[0];
1785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787 key.type != BTRFS_EXTENT_CSUM_KEY)
1790 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791 if (key.offset >= start + len)
1794 if (key.offset > start)
1797 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798 csum_end = key.offset + (size / csum_size) *
1799 root->fs_info->sectorsize;
1800 if (csum_end > start) {
1801 size = min(csum_end - start, len);
1810 btrfs_release_path(&path);
1816 static int process_file_extent(struct btrfs_root *root,
1817 struct extent_buffer *eb,
1818 int slot, struct btrfs_key *key,
1819 struct shared_node *active_node)
1821 struct inode_record *rec;
1822 struct btrfs_file_extent_item *fi;
1824 u64 disk_bytenr = 0;
1825 u64 extent_offset = 0;
1826 u64 mask = root->fs_info->sectorsize - 1;
1830 rec = active_node->current;
1831 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832 rec->found_file_extent = 1;
1834 if (rec->extent_start == (u64)-1) {
1835 rec->extent_start = key->offset;
1836 rec->extent_end = key->offset;
1839 if (rec->extent_end > key->offset)
1840 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841 else if (rec->extent_end < key->offset) {
1842 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843 key->offset - rec->extent_end);
1848 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849 extent_type = btrfs_file_extent_type(eb, fi);
1851 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855 rec->found_size += num_bytes;
1856 num_bytes = (num_bytes + mask) & ~mask;
1857 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861 extent_offset = btrfs_file_extent_offset(eb, fi);
1862 if (num_bytes == 0 || (num_bytes & mask))
1863 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864 if (num_bytes + extent_offset >
1865 btrfs_file_extent_ram_bytes(eb, fi))
1866 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868 (btrfs_file_extent_compression(eb, fi) ||
1869 btrfs_file_extent_encryption(eb, fi) ||
1870 btrfs_file_extent_other_encoding(eb, fi)))
1871 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872 if (disk_bytenr > 0)
1873 rec->found_size += num_bytes;
1875 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877 rec->extent_end = key->offset + num_bytes;
1880 * The data reloc tree will copy full extents into its inode and then
1881 * copy the corresponding csums. Because the extent it copied could be
1882 * a preallocated extent that hasn't been written to yet there may be no
1883 * csums to copy, ergo we won't have csums for our file extent. This is
1884 * ok so just don't bother checking csums if the inode belongs to the
1887 if (disk_bytenr > 0 &&
1888 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890 if (btrfs_file_extent_compression(eb, fi))
1891 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893 disk_bytenr += extent_offset;
1895 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1898 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900 rec->found_csum_item = 1;
1901 if (found < num_bytes)
1902 rec->some_csum_missing = 1;
1903 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912 struct walk_control *wc)
1914 struct btrfs_key key;
1918 struct cache_tree *inode_cache;
1919 struct shared_node *active_node;
1921 if (wc->root_level == wc->active_node &&
1922 btrfs_root_refs(&root->root_item) == 0)
1925 active_node = wc->nodes[wc->active_node];
1926 inode_cache = &active_node->inode_cache;
1927 nritems = btrfs_header_nritems(eb);
1928 for (i = 0; i < nritems; i++) {
1929 btrfs_item_key_to_cpu(eb, &key, i);
1931 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1936 if (active_node->current == NULL ||
1937 active_node->current->ino < key.objectid) {
1938 if (active_node->current) {
1939 active_node->current->checked = 1;
1940 maybe_free_inode_rec(inode_cache,
1941 active_node->current);
1943 active_node->current = get_inode_rec(inode_cache,
1945 BUG_ON(IS_ERR(active_node->current));
1948 case BTRFS_DIR_ITEM_KEY:
1949 case BTRFS_DIR_INDEX_KEY:
1950 ret = process_dir_item(eb, i, &key, active_node);
1952 case BTRFS_INODE_REF_KEY:
1953 ret = process_inode_ref(eb, i, &key, active_node);
1955 case BTRFS_INODE_EXTREF_KEY:
1956 ret = process_inode_extref(eb, i, &key, active_node);
1958 case BTRFS_INODE_ITEM_KEY:
1959 ret = process_inode_item(eb, i, &key, active_node);
1961 case BTRFS_EXTENT_DATA_KEY:
1962 ret = process_file_extent(root, eb, i, &key,
1973 u64 bytenr[BTRFS_MAX_LEVEL];
1974 u64 refs[BTRFS_MAX_LEVEL];
1975 int need_check[BTRFS_MAX_LEVEL];
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979 struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981 unsigned int ext_ref);
1984 * Returns >0 Found error, not fatal, should continue
1985 * Returns <0 Fatal error, must exit the whole check
1986 * Returns 0 No errors found
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989 struct node_refs *nrefs, int *level, int ext_ref)
1991 struct extent_buffer *cur = path->nodes[0];
1992 struct btrfs_key key;
1996 int root_level = btrfs_header_level(root->node);
1998 int ret = 0; /* Final return value */
1999 int err = 0; /* Positive error bitmap */
2001 cur_bytenr = cur->start;
2003 /* skip to first inode item or the first inode number change */
2004 nritems = btrfs_header_nritems(cur);
2005 for (i = 0; i < nritems; i++) {
2006 btrfs_item_key_to_cpu(cur, &key, i);
2008 first_ino = key.objectid;
2009 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010 (first_ino && first_ino != key.objectid))
2014 path->slots[0] = nritems;
2020 err |= check_inode_item(root, path, ext_ref);
2022 /* modify cur since check_inode_item may change path */
2023 cur = path->nodes[0];
2025 if (err & LAST_ITEM)
2028 /* still have inode items in thie leaf */
2029 if (cur->start == cur_bytenr)
2033 * we have switched to another leaf, above nodes may
2034 * have changed, here walk down the path, if a node
2035 * or leaf is shared, check whether we can skip this
2038 for (i = root_level; i >= 0; i--) {
2039 if (path->nodes[i]->start == nrefs->bytenr[i])
2042 ret = update_nodes_refs(root,
2043 path->nodes[i]->start,
2048 if (!nrefs->need_check[i]) {
2054 for (i = 0; i < *level; i++) {
2055 free_extent_buffer(path->nodes[i]);
2056 path->nodes[i] = NULL;
2065 static void reada_walk_down(struct btrfs_root *root,
2066 struct extent_buffer *node, int slot)
2068 struct btrfs_fs_info *fs_info = root->fs_info;
2075 level = btrfs_header_level(node);
2079 nritems = btrfs_header_nritems(node);
2080 for (i = slot; i < nritems; i++) {
2081 bytenr = btrfs_node_blockptr(node, i);
2082 ptr_gen = btrfs_node_ptr_generation(node, i);
2083 readahead_tree_block(fs_info, bytenr, ptr_gen);
2088 * Check the child node/leaf by the following condition:
2089 * 1. the first item key of the node/leaf should be the same with the one
2091 * 2. block in parent node should match the child node/leaf.
2092 * 3. generation of parent node and child's header should be consistent.
2094 * Or the child node/leaf pointed by the key in parent is not valid.
2096 * We hope to check leaf owner too, but since subvol may share leaves,
2097 * which makes leaf owner check not so strong, key check should be
2098 * sufficient enough for that case.
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101 struct extent_buffer *child)
2103 struct btrfs_key parent_key;
2104 struct btrfs_key child_key;
2107 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108 if (btrfs_header_level(child) == 0)
2109 btrfs_item_key_to_cpu(child, &child_key, 0);
2111 btrfs_node_key_to_cpu(child, &child_key, 0);
2113 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2116 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117 parent_key.objectid, parent_key.type, parent_key.offset,
2118 child_key.objectid, child_key.type, child_key.offset);
2120 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2122 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123 btrfs_node_blockptr(parent, slot),
2124 btrfs_header_bytenr(child));
2126 if (btrfs_node_ptr_generation(parent, slot) !=
2127 btrfs_header_generation(child)) {
2129 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130 btrfs_header_generation(child),
2131 btrfs_node_ptr_generation(parent, slot));
2137 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138 * in every fs or file tree check. Here we find its all root ids, and only check
2139 * it in the fs or file tree which has the smallest root id.
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2143 struct rb_node *node;
2144 struct ulist_node *u;
2146 if (roots->nnodes == 1)
2149 node = rb_first(&roots->root);
2150 u = rb_entry(node, struct ulist_node, rb_node);
2152 * current root id is not smallest, we skip it and let it be checked
2153 * in the fs or file tree who hash the smallest root id.
2155 if (root->objectid != u->val)
2162 * for a tree node or leaf, we record its reference count, so later if we still
2163 * process this node or leaf, don't need to compute its reference count again.
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166 struct node_refs *nrefs, u64 level)
2170 struct ulist *roots;
2172 if (nrefs->bytenr[level] != bytenr) {
2173 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174 level, 1, &refs, NULL);
2178 nrefs->bytenr[level] = bytenr;
2179 nrefs->refs[level] = refs;
2181 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2186 check = need_check(root, roots);
2188 nrefs->need_check[level] = check;
2190 nrefs->need_check[level] = 1;
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198 struct walk_control *wc, int *level,
2199 struct node_refs *nrefs)
2201 enum btrfs_tree_block_status status;
2204 struct btrfs_fs_info *fs_info = root->fs_info;
2205 struct extent_buffer *next;
2206 struct extent_buffer *cur;
2210 WARN_ON(*level < 0);
2211 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214 refs = nrefs->refs[*level];
2217 ret = btrfs_lookup_extent_info(NULL, root,
2218 path->nodes[*level]->start,
2219 *level, 1, &refs, NULL);
2224 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225 nrefs->refs[*level] = refs;
2229 ret = enter_shared_node(root, path->nodes[*level]->start,
2237 while (*level >= 0) {
2238 WARN_ON(*level < 0);
2239 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240 cur = path->nodes[*level];
2242 if (btrfs_header_level(cur) != *level)
2245 if (path->slots[*level] >= btrfs_header_nritems(cur))
2248 ret = process_one_leaf(root, cur, wc);
2253 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2256 if (bytenr == nrefs->bytenr[*level - 1]) {
2257 refs = nrefs->refs[*level - 1];
2259 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260 *level - 1, 1, &refs, NULL);
2264 nrefs->bytenr[*level - 1] = bytenr;
2265 nrefs->refs[*level - 1] = refs;
2270 ret = enter_shared_node(root, bytenr, refs,
2273 path->slots[*level]++;
2278 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280 free_extent_buffer(next);
2281 reada_walk_down(root, cur, path->slots[*level]);
2282 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283 if (!extent_buffer_uptodate(next)) {
2284 struct btrfs_key node_key;
2286 btrfs_node_key_to_cpu(path->nodes[*level],
2288 path->slots[*level]);
2289 btrfs_add_corrupt_extent_record(root->fs_info,
2291 path->nodes[*level]->start,
2292 root->fs_info->nodesize,
2299 ret = check_child_node(cur, path->slots[*level], next);
2301 free_extent_buffer(next);
2306 if (btrfs_is_leaf(next))
2307 status = btrfs_check_leaf(root, NULL, next);
2309 status = btrfs_check_node(root, NULL, next);
2310 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311 free_extent_buffer(next);
2316 *level = *level - 1;
2317 free_extent_buffer(path->nodes[*level]);
2318 path->nodes[*level] = next;
2319 path->slots[*level] = 0;
2322 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327 unsigned int ext_ref);
2330 * Returns >0 Found error, should continue
2331 * Returns <0 Fatal error, must exit the whole check
2332 * Returns 0 No errors found
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335 int *level, struct node_refs *nrefs, int ext_ref)
2337 enum btrfs_tree_block_status status;
2340 struct btrfs_fs_info *fs_info = root->fs_info;
2341 struct extent_buffer *next;
2342 struct extent_buffer *cur;
2345 WARN_ON(*level < 0);
2346 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2348 ret = update_nodes_refs(root, path->nodes[*level]->start,
2353 while (*level >= 0) {
2354 WARN_ON(*level < 0);
2355 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356 cur = path->nodes[*level];
2358 if (btrfs_header_level(cur) != *level)
2361 if (path->slots[*level] >= btrfs_header_nritems(cur))
2363 /* Don't forgot to check leaf/node validation */
2365 ret = btrfs_check_leaf(root, NULL, cur);
2366 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2370 ret = process_one_leaf_v2(root, path, nrefs,
2372 cur = path->nodes[*level];
2375 ret = btrfs_check_node(root, NULL, cur);
2376 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2381 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2384 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2387 if (!nrefs->need_check[*level - 1]) {
2388 path->slots[*level]++;
2392 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394 free_extent_buffer(next);
2395 reada_walk_down(root, cur, path->slots[*level]);
2396 next = read_tree_block(fs_info, bytenr, ptr_gen);
2397 if (!extent_buffer_uptodate(next)) {
2398 struct btrfs_key node_key;
2400 btrfs_node_key_to_cpu(path->nodes[*level],
2402 path->slots[*level]);
2403 btrfs_add_corrupt_extent_record(fs_info,
2405 path->nodes[*level]->start,
2413 ret = check_child_node(cur, path->slots[*level], next);
2417 if (btrfs_is_leaf(next))
2418 status = btrfs_check_leaf(root, NULL, next);
2420 status = btrfs_check_node(root, NULL, next);
2421 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422 free_extent_buffer(next);
2427 *level = *level - 1;
2428 free_extent_buffer(path->nodes[*level]);
2429 path->nodes[*level] = next;
2430 path->slots[*level] = 0;
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436 struct walk_control *wc, int *level)
2439 struct extent_buffer *leaf;
2441 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442 leaf = path->nodes[i];
2443 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2448 free_extent_buffer(path->nodes[*level]);
2449 path->nodes[*level] = NULL;
2450 BUG_ON(*level > wc->active_node);
2451 if (*level == wc->active_node)
2452 leave_shared_node(root, wc, *level);
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2463 struct extent_buffer *leaf;
2465 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466 leaf = path->nodes[i];
2467 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2472 free_extent_buffer(path->nodes[*level]);
2473 path->nodes[*level] = NULL;
2480 static int check_root_dir(struct inode_record *rec)
2482 struct inode_backref *backref;
2485 if (!rec->found_inode_item || rec->errors)
2487 if (rec->nlink != 1 || rec->found_link != 0)
2489 if (list_empty(&rec->backrefs))
2491 backref = to_inode_backref(rec->backrefs.next);
2492 if (!backref->found_inode_ref)
2494 if (backref->index != 0 || backref->namelen != 2 ||
2495 memcmp(backref->name, "..", 2))
2497 if (backref->found_dir_index || backref->found_dir_item)
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505 struct btrfs_root *root, struct btrfs_path *path,
2506 struct inode_record *rec)
2508 struct btrfs_inode_item *ei;
2509 struct btrfs_key key;
2512 key.objectid = rec->ino;
2513 key.type = BTRFS_INODE_ITEM_KEY;
2514 key.offset = (u64)-1;
2516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (!path->slots[0]) {
2527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528 if (key.objectid != rec->ino) {
2533 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534 struct btrfs_inode_item);
2535 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536 btrfs_mark_buffer_dirty(path->nodes[0]);
2537 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539 root->root_key.objectid);
2541 btrfs_release_path(path);
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546 struct btrfs_root *root,
2547 struct btrfs_path *path,
2548 struct inode_record *rec)
2552 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553 btrfs_release_path(path);
2555 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560 struct btrfs_root *root,
2561 struct btrfs_path *path,
2562 struct inode_record *rec)
2564 struct btrfs_inode_item *ei;
2565 struct btrfs_key key;
2568 key.objectid = rec->ino;
2569 key.type = BTRFS_INODE_ITEM_KEY;
2572 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2579 /* Since ret == 0, no need to check anything */
2580 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581 struct btrfs_inode_item);
2582 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583 btrfs_mark_buffer_dirty(path->nodes[0]);
2584 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585 printf("reset nbytes for ino %llu root %llu\n",
2586 rec->ino, root->root_key.objectid);
2588 btrfs_release_path(path);
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593 struct cache_tree *inode_cache,
2594 struct inode_record *rec,
2595 struct inode_backref *backref)
2597 struct btrfs_path path;
2598 struct btrfs_trans_handle *trans;
2599 struct btrfs_dir_item *dir_item;
2600 struct extent_buffer *leaf;
2601 struct btrfs_key key;
2602 struct btrfs_disk_key disk_key;
2603 struct inode_record *dir_rec;
2604 unsigned long name_ptr;
2605 u32 data_size = sizeof(*dir_item) + backref->namelen;
2608 trans = btrfs_start_transaction(root, 1);
2610 return PTR_ERR(trans);
2612 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613 (unsigned long long)rec->ino);
2615 btrfs_init_path(&path);
2616 key.objectid = backref->dir;
2617 key.type = BTRFS_DIR_INDEX_KEY;
2618 key.offset = backref->index;
2619 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2622 leaf = path.nodes[0];
2623 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625 disk_key.objectid = cpu_to_le64(rec->ino);
2626 disk_key.type = BTRFS_INODE_ITEM_KEY;
2627 disk_key.offset = 0;
2629 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631 btrfs_set_dir_data_len(leaf, dir_item, 0);
2632 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633 name_ptr = (unsigned long)(dir_item + 1);
2634 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635 btrfs_mark_buffer_dirty(leaf);
2636 btrfs_release_path(&path);
2637 btrfs_commit_transaction(trans, root);
2639 backref->found_dir_index = 1;
2640 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641 BUG_ON(IS_ERR(dir_rec));
2644 dir_rec->found_size += backref->namelen;
2645 if (dir_rec->found_size == dir_rec->isize &&
2646 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648 if (dir_rec->found_size != dir_rec->isize)
2649 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2654 static int delete_dir_index(struct btrfs_root *root,
2655 struct inode_backref *backref)
2657 struct btrfs_trans_handle *trans;
2658 struct btrfs_dir_item *di;
2659 struct btrfs_path path;
2662 trans = btrfs_start_transaction(root, 1);
2664 return PTR_ERR(trans);
2666 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667 (unsigned long long)backref->dir,
2668 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669 (unsigned long long)root->objectid);
2671 btrfs_init_path(&path);
2672 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673 backref->name, backref->namelen,
2674 backref->index, -1);
2677 btrfs_release_path(&path);
2678 btrfs_commit_transaction(trans, root);
2685 ret = btrfs_del_item(trans, root, &path);
2687 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689 btrfs_release_path(&path);
2690 btrfs_commit_transaction(trans, root);
2694 static int __create_inode_item(struct btrfs_trans_handle *trans,
2695 struct btrfs_root *root, u64 ino, u64 size,
2696 u64 nbytes, u64 nlink, u32 mode)
2698 struct btrfs_inode_item ii;
2699 time_t now = time(NULL);
2702 btrfs_set_stack_inode_size(&ii, size);
2703 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2704 btrfs_set_stack_inode_nlink(&ii, nlink);
2705 btrfs_set_stack_inode_mode(&ii, mode);
2706 btrfs_set_stack_inode_generation(&ii, trans->transid);
2707 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2708 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2709 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2710 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2711 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2712 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2713 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2715 ret = btrfs_insert_inode(trans, root, ino, &ii);
2718 warning("root %llu inode %llu recreating inode item, this may "
2719 "be incomplete, please check permissions and content after "
2720 "the fsck completes.\n", (unsigned long long)root->objectid,
2721 (unsigned long long)ino);
2726 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2727 struct btrfs_root *root, u64 ino,
2730 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2732 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2735 static int create_inode_item(struct btrfs_root *root,
2736 struct inode_record *rec, int root_dir)
2738 struct btrfs_trans_handle *trans;
2744 trans = btrfs_start_transaction(root, 1);
2745 if (IS_ERR(trans)) {
2746 ret = PTR_ERR(trans);
2750 nlink = root_dir ? 1 : rec->found_link;
2751 if (rec->found_dir_item) {
2752 if (rec->found_file_extent)
2753 fprintf(stderr, "root %llu inode %llu has both a dir "
2754 "item and extents, unsure if it is a dir or a "
2755 "regular file so setting it as a directory\n",
2756 (unsigned long long)root->objectid,
2757 (unsigned long long)rec->ino);
2758 mode = S_IFDIR | 0755;
2759 size = rec->found_size;
2760 } else if (!rec->found_dir_item) {
2761 size = rec->extent_end;
2762 mode = S_IFREG | 0755;
2765 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2767 btrfs_commit_transaction(trans, root);
2771 static int repair_inode_backrefs(struct btrfs_root *root,
2772 struct inode_record *rec,
2773 struct cache_tree *inode_cache,
2776 struct inode_backref *tmp, *backref;
2777 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2781 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2782 if (!delete && rec->ino == root_dirid) {
2783 if (!rec->found_inode_item) {
2784 ret = create_inode_item(root, rec, 1);
2791 /* Index 0 for root dir's are special, don't mess with it */
2792 if (rec->ino == root_dirid && backref->index == 0)
2796 ((backref->found_dir_index && !backref->found_inode_ref) ||
2797 (backref->found_dir_index && backref->found_inode_ref &&
2798 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2799 ret = delete_dir_index(root, backref);
2803 list_del(&backref->list);
2808 if (!delete && !backref->found_dir_index &&
2809 backref->found_dir_item && backref->found_inode_ref) {
2810 ret = add_missing_dir_index(root, inode_cache, rec,
2815 if (backref->found_dir_item &&
2816 backref->found_dir_index) {
2817 if (!backref->errors &&
2818 backref->found_inode_ref) {
2819 list_del(&backref->list);
2826 if (!delete && (!backref->found_dir_index &&
2827 !backref->found_dir_item &&
2828 backref->found_inode_ref)) {
2829 struct btrfs_trans_handle *trans;
2830 struct btrfs_key location;
2832 ret = check_dir_conflict(root, backref->name,
2838 * let nlink fixing routine to handle it,
2839 * which can do it better.
2844 location.objectid = rec->ino;
2845 location.type = BTRFS_INODE_ITEM_KEY;
2846 location.offset = 0;
2848 trans = btrfs_start_transaction(root, 1);
2849 if (IS_ERR(trans)) {
2850 ret = PTR_ERR(trans);
2853 fprintf(stderr, "adding missing dir index/item pair "
2855 (unsigned long long)rec->ino);
2856 ret = btrfs_insert_dir_item(trans, root, backref->name,
2858 backref->dir, &location,
2859 imode_to_type(rec->imode),
2862 btrfs_commit_transaction(trans, root);
2866 if (!delete && (backref->found_inode_ref &&
2867 backref->found_dir_index &&
2868 backref->found_dir_item &&
2869 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2870 !rec->found_inode_item)) {
2871 ret = create_inode_item(root, rec, 0);
2878 return ret ? ret : repaired;
2882 * To determine the file type for nlink/inode_item repair
2884 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2885 * Return -ENOENT if file type is not found.
2887 static int find_file_type(struct inode_record *rec, u8 *type)
2889 struct inode_backref *backref;
2891 /* For inode item recovered case */
2892 if (rec->found_inode_item) {
2893 *type = imode_to_type(rec->imode);
2897 list_for_each_entry(backref, &rec->backrefs, list) {
2898 if (backref->found_dir_index || backref->found_dir_item) {
2899 *type = backref->filetype;
2907 * To determine the file name for nlink repair
2909 * Return 0 if file name is found, set name and namelen.
2910 * Return -ENOENT if file name is not found.
2912 static int find_file_name(struct inode_record *rec,
2913 char *name, int *namelen)
2915 struct inode_backref *backref;
2917 list_for_each_entry(backref, &rec->backrefs, list) {
2918 if (backref->found_dir_index || backref->found_dir_item ||
2919 backref->found_inode_ref) {
2920 memcpy(name, backref->name, backref->namelen);
2921 *namelen = backref->namelen;
2928 /* Reset the nlink of the inode to the correct one */
2929 static int reset_nlink(struct btrfs_trans_handle *trans,
2930 struct btrfs_root *root,
2931 struct btrfs_path *path,
2932 struct inode_record *rec)
2934 struct inode_backref *backref;
2935 struct inode_backref *tmp;
2936 struct btrfs_key key;
2937 struct btrfs_inode_item *inode_item;
2940 /* We don't believe this either, reset it and iterate backref */
2941 rec->found_link = 0;
2943 /* Remove all backref including the valid ones */
2944 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2945 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2946 backref->index, backref->name,
2947 backref->namelen, 0);
2951 /* remove invalid backref, so it won't be added back */
2952 if (!(backref->found_dir_index &&
2953 backref->found_dir_item &&
2954 backref->found_inode_ref)) {
2955 list_del(&backref->list);
2962 /* Set nlink to 0 */
2963 key.objectid = rec->ino;
2964 key.type = BTRFS_INODE_ITEM_KEY;
2966 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2973 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2974 struct btrfs_inode_item);
2975 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2976 btrfs_mark_buffer_dirty(path->nodes[0]);
2977 btrfs_release_path(path);
2980 * Add back valid inode_ref/dir_item/dir_index,
2981 * add_link() will handle the nlink inc, so new nlink must be correct
2983 list_for_each_entry(backref, &rec->backrefs, list) {
2984 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2985 backref->name, backref->namelen,
2986 backref->filetype, &backref->index, 1, 0);
2991 btrfs_release_path(path);
2995 static int get_highest_inode(struct btrfs_trans_handle *trans,
2996 struct btrfs_root *root,
2997 struct btrfs_path *path,
3000 struct btrfs_key key, found_key;
3003 btrfs_init_path(path);
3004 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3006 key.type = BTRFS_INODE_ITEM_KEY;
3007 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3009 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3010 path->slots[0] - 1);
3011 *highest_ino = found_key.objectid;
3014 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3016 btrfs_release_path(path);
3020 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3021 struct btrfs_root *root,
3022 struct btrfs_path *path,
3023 struct inode_record *rec)
3025 char *dir_name = "lost+found";
3026 char namebuf[BTRFS_NAME_LEN] = {0};
3031 int name_recovered = 0;
3032 int type_recovered = 0;
3036 * Get file name and type first before these invalid inode ref
3037 * are deleted by remove_all_invalid_backref()
3039 name_recovered = !find_file_name(rec, namebuf, &namelen);
3040 type_recovered = !find_file_type(rec, &type);
3042 if (!name_recovered) {
3043 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3044 rec->ino, rec->ino);
3045 namelen = count_digits(rec->ino);
3046 sprintf(namebuf, "%llu", rec->ino);
3049 if (!type_recovered) {
3050 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3052 type = BTRFS_FT_REG_FILE;
3056 ret = reset_nlink(trans, root, path, rec);
3059 "Failed to reset nlink for inode %llu: %s\n",
3060 rec->ino, strerror(-ret));
3064 if (rec->found_link == 0) {
3065 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3069 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3070 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3073 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3074 dir_name, strerror(-ret));
3077 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3078 namebuf, namelen, type, NULL, 1, 0);
3080 * Add ".INO" suffix several times to handle case where
3081 * "FILENAME.INO" is already taken by another file.
3083 while (ret == -EEXIST) {
3085 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3087 if (namelen + count_digits(rec->ino) + 1 >
3092 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3094 namelen += count_digits(rec->ino) + 1;
3095 ret = btrfs_add_link(trans, root, rec->ino,
3096 lost_found_ino, namebuf,
3097 namelen, type, NULL, 1, 0);
3101 "Failed to link the inode %llu to %s dir: %s\n",
3102 rec->ino, dir_name, strerror(-ret));
3106 * Just increase the found_link, don't actually add the
3107 * backref. This will make things easier and this inode
3108 * record will be freed after the repair is done.
3109 * So fsck will not report problem about this inode.
3112 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3113 namelen, namebuf, dir_name);
3115 printf("Fixed the nlink of inode %llu\n", rec->ino);
3118 * Clear the flag anyway, or we will loop forever for the same inode
3119 * as it will not be removed from the bad inode list and the dead loop
3122 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3123 btrfs_release_path(path);
3128 * Check if there is any normal(reg or prealloc) file extent for given
3130 * This is used to determine the file type when neither its dir_index/item or
3131 * inode_item exists.
3133 * This will *NOT* report error, if any error happens, just consider it does
3134 * not have any normal file extent.
3136 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3138 struct btrfs_path path;
3139 struct btrfs_key key;
3140 struct btrfs_key found_key;
3141 struct btrfs_file_extent_item *fi;
3145 btrfs_init_path(&path);
3147 key.type = BTRFS_EXTENT_DATA_KEY;
3150 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3155 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3156 ret = btrfs_next_leaf(root, &path);
3163 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3165 if (found_key.objectid != ino ||
3166 found_key.type != BTRFS_EXTENT_DATA_KEY)
3168 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3169 struct btrfs_file_extent_item);
3170 type = btrfs_file_extent_type(path.nodes[0], fi);
3171 if (type != BTRFS_FILE_EXTENT_INLINE) {
3177 btrfs_release_path(&path);
3181 static u32 btrfs_type_to_imode(u8 type)
3183 static u32 imode_by_btrfs_type[] = {
3184 [BTRFS_FT_REG_FILE] = S_IFREG,
3185 [BTRFS_FT_DIR] = S_IFDIR,
3186 [BTRFS_FT_CHRDEV] = S_IFCHR,
3187 [BTRFS_FT_BLKDEV] = S_IFBLK,
3188 [BTRFS_FT_FIFO] = S_IFIFO,
3189 [BTRFS_FT_SOCK] = S_IFSOCK,
3190 [BTRFS_FT_SYMLINK] = S_IFLNK,
3193 return imode_by_btrfs_type[(type)];
3196 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3197 struct btrfs_root *root,
3198 struct btrfs_path *path,
3199 struct inode_record *rec)
3203 int type_recovered = 0;
3206 printf("Trying to rebuild inode:%llu\n", rec->ino);
3208 type_recovered = !find_file_type(rec, &filetype);
3211 * Try to determine inode type if type not found.
3213 * For found regular file extent, it must be FILE.
3214 * For found dir_item/index, it must be DIR.
3216 * For undetermined one, use FILE as fallback.
3219 * 1. If found backref(inode_index/item is already handled) to it,
3221 * Need new inode-inode ref structure to allow search for that.
3223 if (!type_recovered) {
3224 if (rec->found_file_extent &&
3225 find_normal_file_extent(root, rec->ino)) {
3227 filetype = BTRFS_FT_REG_FILE;
3228 } else if (rec->found_dir_item) {
3230 filetype = BTRFS_FT_DIR;
3231 } else if (!list_empty(&rec->orphan_extents)) {
3233 filetype = BTRFS_FT_REG_FILE;
3235 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3238 filetype = BTRFS_FT_REG_FILE;
3242 ret = btrfs_new_inode(trans, root, rec->ino,
3243 mode | btrfs_type_to_imode(filetype));
3248 * Here inode rebuild is done, we only rebuild the inode item,
3249 * don't repair the nlink(like move to lost+found).
3250 * That is the job of nlink repair.
3252 * We just fill the record and return
3254 rec->found_dir_item = 1;
3255 rec->imode = mode | btrfs_type_to_imode(filetype);
3257 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3258 /* Ensure the inode_nlinks repair function will be called */
3259 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3264 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3265 struct btrfs_root *root,
3266 struct btrfs_path *path,
3267 struct inode_record *rec)
3269 struct orphan_data_extent *orphan;
3270 struct orphan_data_extent *tmp;
3273 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3275 * Check for conflicting file extents
3277 * Here we don't know whether the extents is compressed or not,
3278 * so we can only assume it not compressed nor data offset,
3279 * and use its disk_len as extent length.
3281 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3282 orphan->offset, orphan->disk_len, 0);
3283 btrfs_release_path(path);
3288 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3289 orphan->disk_bytenr, orphan->disk_len);
3290 ret = btrfs_free_extent(trans,
3291 root->fs_info->extent_root,
3292 orphan->disk_bytenr, orphan->disk_len,
3293 0, root->objectid, orphan->objectid,
3298 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3299 orphan->offset, orphan->disk_bytenr,
3300 orphan->disk_len, orphan->disk_len);
3304 /* Update file size info */
3305 rec->found_size += orphan->disk_len;
3306 if (rec->found_size == rec->nbytes)
3307 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3309 /* Update the file extent hole info too */
3310 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3314 if (RB_EMPTY_ROOT(&rec->holes))
3315 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3317 list_del(&orphan->list);
3320 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3325 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3326 struct btrfs_root *root,
3327 struct btrfs_path *path,
3328 struct inode_record *rec)
3330 struct rb_node *node;
3331 struct file_extent_hole *hole;
3335 node = rb_first(&rec->holes);
3339 hole = rb_entry(node, struct file_extent_hole, node);
3340 ret = btrfs_punch_hole(trans, root, rec->ino,
3341 hole->start, hole->len);
3344 ret = del_file_extent_hole(&rec->holes, hole->start,
3348 if (RB_EMPTY_ROOT(&rec->holes))
3349 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3350 node = rb_first(&rec->holes);
3352 /* special case for a file losing all its file extent */
3354 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3355 round_up(rec->isize,
3356 root->fs_info->sectorsize));
3360 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3361 rec->ino, root->objectid);
3366 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3368 struct btrfs_trans_handle *trans;
3369 struct btrfs_path path;
3372 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3373 I_ERR_NO_ORPHAN_ITEM |
3374 I_ERR_LINK_COUNT_WRONG |
3375 I_ERR_NO_INODE_ITEM |
3376 I_ERR_FILE_EXTENT_ORPHAN |
3377 I_ERR_FILE_EXTENT_DISCOUNT|
3378 I_ERR_FILE_NBYTES_WRONG)))
3382 * For nlink repair, it may create a dir and add link, so
3383 * 2 for parent(256)'s dir_index and dir_item
3384 * 2 for lost+found dir's inode_item and inode_ref
3385 * 1 for the new inode_ref of the file
3386 * 2 for lost+found dir's dir_index and dir_item for the file
3388 trans = btrfs_start_transaction(root, 7);
3390 return PTR_ERR(trans);
3392 btrfs_init_path(&path);
3393 if (rec->errors & I_ERR_NO_INODE_ITEM)
3394 ret = repair_inode_no_item(trans, root, &path, rec);
3395 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3396 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3397 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3398 ret = repair_inode_discount_extent(trans, root, &path, rec);
3399 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3400 ret = repair_inode_isize(trans, root, &path, rec);
3401 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3402 ret = repair_inode_orphan_item(trans, root, &path, rec);
3403 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3404 ret = repair_inode_nlinks(trans, root, &path, rec);
3405 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3406 ret = repair_inode_nbytes(trans, root, &path, rec);
3407 btrfs_commit_transaction(trans, root);
3408 btrfs_release_path(&path);
3412 static int check_inode_recs(struct btrfs_root *root,
3413 struct cache_tree *inode_cache)
3415 struct cache_extent *cache;
3416 struct ptr_node *node;
3417 struct inode_record *rec;
3418 struct inode_backref *backref;
3423 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3425 if (btrfs_root_refs(&root->root_item) == 0) {
3426 if (!cache_tree_empty(inode_cache))
3427 fprintf(stderr, "warning line %d\n", __LINE__);
3432 * We need to repair backrefs first because we could change some of the
3433 * errors in the inode recs.
3435 * We also need to go through and delete invalid backrefs first and then
3436 * add the correct ones second. We do this because we may get EEXIST
3437 * when adding back the correct index because we hadn't yet deleted the
3440 * For example, if we were missing a dir index then the directories
3441 * isize would be wrong, so if we fixed the isize to what we thought it
3442 * would be and then fixed the backref we'd still have a invalid fs, so
3443 * we need to add back the dir index and then check to see if the isize
3448 if (stage == 3 && !err)
3451 cache = search_cache_extent(inode_cache, 0);
3452 while (repair && cache) {
3453 node = container_of(cache, struct ptr_node, cache);
3455 cache = next_cache_extent(cache);
3457 /* Need to free everything up and rescan */
3459 remove_cache_extent(inode_cache, &node->cache);
3461 free_inode_rec(rec);
3465 if (list_empty(&rec->backrefs))
3468 ret = repair_inode_backrefs(root, rec, inode_cache,
3482 rec = get_inode_rec(inode_cache, root_dirid, 0);
3483 BUG_ON(IS_ERR(rec));
3485 ret = check_root_dir(rec);
3487 fprintf(stderr, "root %llu root dir %llu error\n",
3488 (unsigned long long)root->root_key.objectid,
3489 (unsigned long long)root_dirid);
3490 print_inode_error(root, rec);
3495 struct btrfs_trans_handle *trans;
3497 trans = btrfs_start_transaction(root, 1);
3498 if (IS_ERR(trans)) {
3499 err = PTR_ERR(trans);
3504 "root %llu missing its root dir, recreating\n",
3505 (unsigned long long)root->objectid);
3507 ret = btrfs_make_root_dir(trans, root, root_dirid);
3510 btrfs_commit_transaction(trans, root);
3514 fprintf(stderr, "root %llu root dir %llu not found\n",
3515 (unsigned long long)root->root_key.objectid,
3516 (unsigned long long)root_dirid);
3520 cache = search_cache_extent(inode_cache, 0);
3523 node = container_of(cache, struct ptr_node, cache);
3525 remove_cache_extent(inode_cache, &node->cache);
3527 if (rec->ino == root_dirid ||
3528 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3529 free_inode_rec(rec);
3533 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3534 ret = check_orphan_item(root, rec->ino);
3536 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3537 if (can_free_inode_rec(rec)) {
3538 free_inode_rec(rec);
3543 if (!rec->found_inode_item)
3544 rec->errors |= I_ERR_NO_INODE_ITEM;
3545 if (rec->found_link != rec->nlink)
3546 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3548 ret = try_repair_inode(root, rec);
3549 if (ret == 0 && can_free_inode_rec(rec)) {
3550 free_inode_rec(rec);
3556 if (!(repair && ret == 0))
3558 print_inode_error(root, rec);
3559 list_for_each_entry(backref, &rec->backrefs, list) {
3560 if (!backref->found_dir_item)
3561 backref->errors |= REF_ERR_NO_DIR_ITEM;
3562 if (!backref->found_dir_index)
3563 backref->errors |= REF_ERR_NO_DIR_INDEX;
3564 if (!backref->found_inode_ref)
3565 backref->errors |= REF_ERR_NO_INODE_REF;
3566 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3567 " namelen %u name %s filetype %d errors %x",
3568 (unsigned long long)backref->dir,
3569 (unsigned long long)backref->index,
3570 backref->namelen, backref->name,
3571 backref->filetype, backref->errors);
3572 print_ref_error(backref->errors);
3574 free_inode_rec(rec);
3576 return (error > 0) ? -1 : 0;
3579 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3582 struct cache_extent *cache;
3583 struct root_record *rec = NULL;
3586 cache = lookup_cache_extent(root_cache, objectid, 1);
3588 rec = container_of(cache, struct root_record, cache);
3590 rec = calloc(1, sizeof(*rec));
3592 return ERR_PTR(-ENOMEM);
3593 rec->objectid = objectid;
3594 INIT_LIST_HEAD(&rec->backrefs);
3595 rec->cache.start = objectid;
3596 rec->cache.size = 1;
3598 ret = insert_cache_extent(root_cache, &rec->cache);
3600 return ERR_PTR(-EEXIST);
3605 static struct root_backref *get_root_backref(struct root_record *rec,
3606 u64 ref_root, u64 dir, u64 index,
3607 const char *name, int namelen)
3609 struct root_backref *backref;
3611 list_for_each_entry(backref, &rec->backrefs, list) {
3612 if (backref->ref_root != ref_root || backref->dir != dir ||
3613 backref->namelen != namelen)
3615 if (memcmp(name, backref->name, namelen))
3620 backref = calloc(1, sizeof(*backref) + namelen + 1);
3623 backref->ref_root = ref_root;
3625 backref->index = index;
3626 backref->namelen = namelen;
3627 memcpy(backref->name, name, namelen);
3628 backref->name[namelen] = '\0';
3629 list_add_tail(&backref->list, &rec->backrefs);
3633 static void free_root_record(struct cache_extent *cache)
3635 struct root_record *rec;
3636 struct root_backref *backref;
3638 rec = container_of(cache, struct root_record, cache);
3639 while (!list_empty(&rec->backrefs)) {
3640 backref = to_root_backref(rec->backrefs.next);
3641 list_del(&backref->list);
3648 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3650 static int add_root_backref(struct cache_tree *root_cache,
3651 u64 root_id, u64 ref_root, u64 dir, u64 index,
3652 const char *name, int namelen,
3653 int item_type, int errors)
3655 struct root_record *rec;
3656 struct root_backref *backref;
3658 rec = get_root_rec(root_cache, root_id);
3659 BUG_ON(IS_ERR(rec));
3660 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3663 backref->errors |= errors;
3665 if (item_type != BTRFS_DIR_ITEM_KEY) {
3666 if (backref->found_dir_index || backref->found_back_ref ||
3667 backref->found_forward_ref) {
3668 if (backref->index != index)
3669 backref->errors |= REF_ERR_INDEX_UNMATCH;
3671 backref->index = index;
3675 if (item_type == BTRFS_DIR_ITEM_KEY) {
3676 if (backref->found_forward_ref)
3678 backref->found_dir_item = 1;
3679 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3680 backref->found_dir_index = 1;
3681 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3682 if (backref->found_forward_ref)
3683 backref->errors |= REF_ERR_DUP_ROOT_REF;
3684 else if (backref->found_dir_item)
3686 backref->found_forward_ref = 1;
3687 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3688 if (backref->found_back_ref)
3689 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3690 backref->found_back_ref = 1;
3695 if (backref->found_forward_ref && backref->found_dir_item)
3696 backref->reachable = 1;
3700 static int merge_root_recs(struct btrfs_root *root,
3701 struct cache_tree *src_cache,
3702 struct cache_tree *dst_cache)
3704 struct cache_extent *cache;
3705 struct ptr_node *node;
3706 struct inode_record *rec;
3707 struct inode_backref *backref;
3710 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3711 free_inode_recs_tree(src_cache);
3716 cache = search_cache_extent(src_cache, 0);
3719 node = container_of(cache, struct ptr_node, cache);
3721 remove_cache_extent(src_cache, &node->cache);
3724 ret = is_child_root(root, root->objectid, rec->ino);
3730 list_for_each_entry(backref, &rec->backrefs, list) {
3731 BUG_ON(backref->found_inode_ref);
3732 if (backref->found_dir_item)
3733 add_root_backref(dst_cache, rec->ino,
3734 root->root_key.objectid, backref->dir,
3735 backref->index, backref->name,
3736 backref->namelen, BTRFS_DIR_ITEM_KEY,
3738 if (backref->found_dir_index)
3739 add_root_backref(dst_cache, rec->ino,
3740 root->root_key.objectid, backref->dir,
3741 backref->index, backref->name,
3742 backref->namelen, BTRFS_DIR_INDEX_KEY,
3746 free_inode_rec(rec);
3753 static int check_root_refs(struct btrfs_root *root,
3754 struct cache_tree *root_cache)
3756 struct root_record *rec;
3757 struct root_record *ref_root;
3758 struct root_backref *backref;
3759 struct cache_extent *cache;
3765 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3766 BUG_ON(IS_ERR(rec));
3769 /* fixme: this can not detect circular references */
3772 cache = search_cache_extent(root_cache, 0);
3776 rec = container_of(cache, struct root_record, cache);
3777 cache = next_cache_extent(cache);
3779 if (rec->found_ref == 0)
3782 list_for_each_entry(backref, &rec->backrefs, list) {
3783 if (!backref->reachable)
3786 ref_root = get_root_rec(root_cache,
3788 BUG_ON(IS_ERR(ref_root));
3789 if (ref_root->found_ref > 0)
3792 backref->reachable = 0;
3794 if (rec->found_ref == 0)
3800 cache = search_cache_extent(root_cache, 0);
3804 rec = container_of(cache, struct root_record, cache);
3805 cache = next_cache_extent(cache);
3807 if (rec->found_ref == 0 &&
3808 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3809 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3810 ret = check_orphan_item(root->fs_info->tree_root,
3816 * If we don't have a root item then we likely just have
3817 * a dir item in a snapshot for this root but no actual
3818 * ref key or anything so it's meaningless.
3820 if (!rec->found_root_item)
3823 fprintf(stderr, "fs tree %llu not referenced\n",
3824 (unsigned long long)rec->objectid);
3828 if (rec->found_ref > 0 && !rec->found_root_item)
3830 list_for_each_entry(backref, &rec->backrefs, list) {
3831 if (!backref->found_dir_item)
3832 backref->errors |= REF_ERR_NO_DIR_ITEM;
3833 if (!backref->found_dir_index)
3834 backref->errors |= REF_ERR_NO_DIR_INDEX;
3835 if (!backref->found_back_ref)
3836 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3837 if (!backref->found_forward_ref)
3838 backref->errors |= REF_ERR_NO_ROOT_REF;
3839 if (backref->reachable && backref->errors)
3846 fprintf(stderr, "fs tree %llu refs %u %s\n",
3847 (unsigned long long)rec->objectid, rec->found_ref,
3848 rec->found_root_item ? "" : "not found");
3850 list_for_each_entry(backref, &rec->backrefs, list) {
3851 if (!backref->reachable)
3853 if (!backref->errors && rec->found_root_item)
3855 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3856 " index %llu namelen %u name %s errors %x\n",
3857 (unsigned long long)backref->ref_root,
3858 (unsigned long long)backref->dir,
3859 (unsigned long long)backref->index,
3860 backref->namelen, backref->name,
3862 print_ref_error(backref->errors);
3865 return errors > 0 ? 1 : 0;
3868 static int process_root_ref(struct extent_buffer *eb, int slot,
3869 struct btrfs_key *key,
3870 struct cache_tree *root_cache)
3876 struct btrfs_root_ref *ref;
3877 char namebuf[BTRFS_NAME_LEN];
3880 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3882 dirid = btrfs_root_ref_dirid(eb, ref);
3883 index = btrfs_root_ref_sequence(eb, ref);
3884 name_len = btrfs_root_ref_name_len(eb, ref);
3886 if (name_len <= BTRFS_NAME_LEN) {
3890 len = BTRFS_NAME_LEN;
3891 error = REF_ERR_NAME_TOO_LONG;
3893 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3895 if (key->type == BTRFS_ROOT_REF_KEY) {
3896 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3897 index, namebuf, len, key->type, error);
3899 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3900 index, namebuf, len, key->type, error);
3905 static void free_corrupt_block(struct cache_extent *cache)
3907 struct btrfs_corrupt_block *corrupt;
3909 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3913 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3916 * Repair the btree of the given root.
3918 * The fix is to remove the node key in corrupt_blocks cache_tree.
3919 * and rebalance the tree.
3920 * After the fix, the btree should be writeable.
3922 static int repair_btree(struct btrfs_root *root,
3923 struct cache_tree *corrupt_blocks)
3925 struct btrfs_trans_handle *trans;
3926 struct btrfs_path path;
3927 struct btrfs_corrupt_block *corrupt;
3928 struct cache_extent *cache;
3929 struct btrfs_key key;
3934 if (cache_tree_empty(corrupt_blocks))
3937 trans = btrfs_start_transaction(root, 1);
3938 if (IS_ERR(trans)) {
3939 ret = PTR_ERR(trans);
3940 fprintf(stderr, "Error starting transaction: %s\n",
3944 btrfs_init_path(&path);
3945 cache = first_cache_extent(corrupt_blocks);
3947 corrupt = container_of(cache, struct btrfs_corrupt_block,
3949 level = corrupt->level;
3950 path.lowest_level = level;
3951 key.objectid = corrupt->key.objectid;
3952 key.type = corrupt->key.type;
3953 key.offset = corrupt->key.offset;
3956 * Here we don't want to do any tree balance, since it may
3957 * cause a balance with corrupted brother leaf/node,
3958 * so ins_len set to 0 here.
3959 * Balance will be done after all corrupt node/leaf is deleted.
3961 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3964 offset = btrfs_node_blockptr(path.nodes[level],
3967 /* Remove the ptr */
3968 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3972 * Remove the corresponding extent
3973 * return value is not concerned.
3975 btrfs_release_path(&path);
3976 ret = btrfs_free_extent(trans, root, offset,
3977 root->fs_info->nodesize, 0,
3978 root->root_key.objectid, level - 1, 0);
3979 cache = next_cache_extent(cache);
3982 /* Balance the btree using btrfs_search_slot() */
3983 cache = first_cache_extent(corrupt_blocks);
3985 corrupt = container_of(cache, struct btrfs_corrupt_block,
3987 memcpy(&key, &corrupt->key, sizeof(key));
3988 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3991 /* return will always >0 since it won't find the item */
3993 btrfs_release_path(&path);
3994 cache = next_cache_extent(cache);
3997 btrfs_commit_transaction(trans, root);
3998 btrfs_release_path(&path);
4002 static int check_fs_root(struct btrfs_root *root,
4003 struct cache_tree *root_cache,
4004 struct walk_control *wc)
4010 struct btrfs_path path;
4011 struct shared_node root_node;
4012 struct root_record *rec;
4013 struct btrfs_root_item *root_item = &root->root_item;
4014 struct cache_tree corrupt_blocks;
4015 struct orphan_data_extent *orphan;
4016 struct orphan_data_extent *tmp;
4017 enum btrfs_tree_block_status status;
4018 struct node_refs nrefs;
4021 * Reuse the corrupt_block cache tree to record corrupted tree block
4023 * Unlike the usage in extent tree check, here we do it in a per
4024 * fs/subvol tree base.
4026 cache_tree_init(&corrupt_blocks);
4027 root->fs_info->corrupt_blocks = &corrupt_blocks;
4029 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4030 rec = get_root_rec(root_cache, root->root_key.objectid);
4031 BUG_ON(IS_ERR(rec));
4032 if (btrfs_root_refs(root_item) > 0)
4033 rec->found_root_item = 1;
4036 btrfs_init_path(&path);
4037 memset(&root_node, 0, sizeof(root_node));
4038 cache_tree_init(&root_node.root_cache);
4039 cache_tree_init(&root_node.inode_cache);
4040 memset(&nrefs, 0, sizeof(nrefs));
4042 /* Move the orphan extent record to corresponding inode_record */
4043 list_for_each_entry_safe(orphan, tmp,
4044 &root->orphan_data_extents, list) {
4045 struct inode_record *inode;
4047 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4049 BUG_ON(IS_ERR(inode));
4050 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4051 list_move(&orphan->list, &inode->orphan_extents);
4054 level = btrfs_header_level(root->node);
4055 memset(wc->nodes, 0, sizeof(wc->nodes));
4056 wc->nodes[level] = &root_node;
4057 wc->active_node = level;
4058 wc->root_level = level;
4060 /* We may not have checked the root block, lets do that now */
4061 if (btrfs_is_leaf(root->node))
4062 status = btrfs_check_leaf(root, NULL, root->node);
4064 status = btrfs_check_node(root, NULL, root->node);
4065 if (status != BTRFS_TREE_BLOCK_CLEAN)
4068 if (btrfs_root_refs(root_item) > 0 ||
4069 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4070 path.nodes[level] = root->node;
4071 extent_buffer_get(root->node);
4072 path.slots[level] = 0;
4074 struct btrfs_key key;
4075 struct btrfs_disk_key found_key;
4077 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4078 level = root_item->drop_level;
4079 path.lowest_level = level;
4080 if (level > btrfs_header_level(root->node) ||
4081 level >= BTRFS_MAX_LEVEL) {
4082 error("ignoring invalid drop level: %u", level);
4085 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4088 btrfs_node_key(path.nodes[level], &found_key,
4090 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4091 sizeof(found_key)));
4095 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4101 wret = walk_up_tree(root, &path, wc, &level);
4108 btrfs_release_path(&path);
4110 if (!cache_tree_empty(&corrupt_blocks)) {
4111 struct cache_extent *cache;
4112 struct btrfs_corrupt_block *corrupt;
4114 printf("The following tree block(s) is corrupted in tree %llu:\n",
4115 root->root_key.objectid);
4116 cache = first_cache_extent(&corrupt_blocks);
4118 corrupt = container_of(cache,
4119 struct btrfs_corrupt_block,
4121 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4122 cache->start, corrupt->level,
4123 corrupt->key.objectid, corrupt->key.type,
4124 corrupt->key.offset);
4125 cache = next_cache_extent(cache);
4128 printf("Try to repair the btree for root %llu\n",
4129 root->root_key.objectid);
4130 ret = repair_btree(root, &corrupt_blocks);
4132 fprintf(stderr, "Failed to repair btree: %s\n",
4135 printf("Btree for root %llu is fixed\n",
4136 root->root_key.objectid);
4140 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4144 if (root_node.current) {
4145 root_node.current->checked = 1;
4146 maybe_free_inode_rec(&root_node.inode_cache,
4150 err = check_inode_recs(root, &root_node.inode_cache);
4154 free_corrupt_blocks_tree(&corrupt_blocks);
4155 root->fs_info->corrupt_blocks = NULL;
4156 free_orphan_data_extents(&root->orphan_data_extents);
4160 static int fs_root_objectid(u64 objectid)
4162 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4163 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4165 return is_fstree(objectid);
4168 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4169 struct cache_tree *root_cache)
4171 struct btrfs_path path;
4172 struct btrfs_key key;
4173 struct walk_control wc;
4174 struct extent_buffer *leaf, *tree_node;
4175 struct btrfs_root *tmp_root;
4176 struct btrfs_root *tree_root = fs_info->tree_root;
4180 if (ctx.progress_enabled) {
4181 ctx.tp = TASK_FS_ROOTS;
4182 task_start(ctx.info);
4186 * Just in case we made any changes to the extent tree that weren't
4187 * reflected into the free space cache yet.
4190 reset_cached_block_groups(fs_info);
4191 memset(&wc, 0, sizeof(wc));
4192 cache_tree_init(&wc.shared);
4193 btrfs_init_path(&path);
4198 key.type = BTRFS_ROOT_ITEM_KEY;
4199 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4204 tree_node = tree_root->node;
4206 if (tree_node != tree_root->node) {
4207 free_root_recs_tree(root_cache);
4208 btrfs_release_path(&path);
4211 leaf = path.nodes[0];
4212 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4213 ret = btrfs_next_leaf(tree_root, &path);
4219 leaf = path.nodes[0];
4221 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4222 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4223 fs_root_objectid(key.objectid)) {
4224 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4225 tmp_root = btrfs_read_fs_root_no_cache(
4228 key.offset = (u64)-1;
4229 tmp_root = btrfs_read_fs_root(
4232 if (IS_ERR(tmp_root)) {
4236 ret = check_fs_root(tmp_root, root_cache, &wc);
4237 if (ret == -EAGAIN) {
4238 free_root_recs_tree(root_cache);
4239 btrfs_release_path(&path);
4244 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4245 btrfs_free_fs_root(tmp_root);
4246 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4247 key.type == BTRFS_ROOT_BACKREF_KEY) {
4248 process_root_ref(leaf, path.slots[0], &key,
4255 btrfs_release_path(&path);
4257 free_extent_cache_tree(&wc.shared);
4258 if (!cache_tree_empty(&wc.shared))
4259 fprintf(stderr, "warning line %d\n", __LINE__);
4261 task_stop(ctx.info);
4267 * Find the @index according by @ino and name.
4268 * Notice:time efficiency is O(N)
4270 * @root: the root of the fs/file tree
4271 * @index_ret: the index as return value
4272 * @namebuf: the name to match
4273 * @name_len: the length of name to match
4274 * @file_type: the file_type of INODE_ITEM to match
4276 * Returns 0 if found and *@index_ret will be modified with right value
4277 * Returns< 0 not found and *@index_ret will be (u64)-1
4279 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4280 u64 *index_ret, char *namebuf, u32 name_len,
4283 struct btrfs_path path;
4284 struct extent_buffer *node;
4285 struct btrfs_dir_item *di;
4286 struct btrfs_key key;
4287 struct btrfs_key location;
4288 char name[BTRFS_NAME_LEN] = {0};
4300 /* search from the last index */
4301 key.objectid = dirid;
4302 key.offset = (u64)-1;
4303 key.type = BTRFS_DIR_INDEX_KEY;
4305 btrfs_init_path(&path);
4306 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4311 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4314 *index_ret = (64)-1;
4317 /* Check whether inode_id/filetype/name match */
4318 node = path.nodes[0];
4319 slot = path.slots[0];
4320 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4321 total = btrfs_item_size_nr(node, slot);
4322 while (cur < total) {
4324 len = btrfs_dir_name_len(node, di);
4325 data_len = btrfs_dir_data_len(node, di);
4327 btrfs_dir_item_key_to_cpu(node, di, &location);
4328 if (location.objectid != location_id ||
4329 location.type != BTRFS_INODE_ITEM_KEY ||
4330 location.offset != 0)
4333 filetype = btrfs_dir_type(node, di);
4334 if (file_type != filetype)
4337 if (len > BTRFS_NAME_LEN)
4338 len = BTRFS_NAME_LEN;
4340 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4341 if (len != name_len || strncmp(namebuf, name, len))
4344 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4345 *index_ret = key.offset;
4349 len += sizeof(*di) + data_len;
4350 di = (struct btrfs_dir_item *)((char *)di + len);
4356 btrfs_release_path(&path);
4361 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4362 * INODE_REF/INODE_EXTREF match.
4364 * @root: the root of the fs/file tree
4365 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4366 * value while find index
4367 * @location_key: location key of the struct btrfs_dir_item to match
4368 * @name: the name to match
4369 * @namelen: the length of name
4370 * @file_type: the type of file to math
4372 * Return 0 if no error occurred.
4373 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4374 * DIR_ITEM/DIR_INDEX
4375 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4376 * and DIR_ITEM/DIR_INDEX mismatch
4378 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4379 struct btrfs_key *location_key, char *name,
4380 u32 namelen, u8 file_type)
4382 struct btrfs_path path;
4383 struct extent_buffer *node;
4384 struct btrfs_dir_item *di;
4385 struct btrfs_key location;
4386 char namebuf[BTRFS_NAME_LEN] = {0};
4395 /* get the index by traversing all index */
4396 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4397 ret = find_dir_index(root, key->objectid,
4398 location_key->objectid, &key->offset,
4399 name, namelen, file_type);
4401 ret = DIR_INDEX_MISSING;
4405 btrfs_init_path(&path);
4406 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4408 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4413 /* Check whether inode_id/filetype/name match */
4414 node = path.nodes[0];
4415 slot = path.slots[0];
4416 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4417 total = btrfs_item_size_nr(node, slot);
4418 while (cur < total) {
4419 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4420 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4422 len = btrfs_dir_name_len(node, di);
4423 data_len = btrfs_dir_data_len(node, di);
4425 btrfs_dir_item_key_to_cpu(node, di, &location);
4426 if (location.objectid != location_key->objectid ||
4427 location.type != location_key->type ||
4428 location.offset != location_key->offset)
4431 filetype = btrfs_dir_type(node, di);
4432 if (file_type != filetype)
4435 if (len > BTRFS_NAME_LEN) {
4436 len = BTRFS_NAME_LEN;
4437 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4439 key->type == BTRFS_DIR_ITEM_KEY ?
4440 "DIR_ITEM" : "DIR_INDEX",
4441 key->objectid, key->offset, len);
4443 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4445 if (len != namelen || strncmp(namebuf, name, len))
4451 len += sizeof(*di) + data_len;
4452 di = (struct btrfs_dir_item *)((char *)di + len);
4457 btrfs_release_path(&path);
4462 * Prints inode ref error message
4464 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4465 u64 index, const char *namebuf, int name_len,
4466 u8 filetype, int err)
4471 /* root dir error */
4472 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4474 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4475 root->objectid, key->objectid, key->offset, namebuf);
4480 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4481 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4482 root->objectid, key->offset,
4483 btrfs_name_hash(namebuf, name_len),
4484 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4486 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4487 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4488 root->objectid, key->offset, index,
4489 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4494 * Insert the missing inode item.
4496 * Returns 0 means success.
4497 * Returns <0 means error.
4499 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4502 struct btrfs_key key;
4503 struct btrfs_trans_handle *trans;
4504 struct btrfs_path path;
4508 key.type = BTRFS_INODE_ITEM_KEY;
4511 btrfs_init_path(&path);
4512 trans = btrfs_start_transaction(root, 1);
4513 if (IS_ERR(trans)) {
4518 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4519 if (ret < 0 || !ret)
4522 /* insert inode item */
4523 create_inode_item_lowmem(trans, root, ino, filetype);
4526 btrfs_commit_transaction(trans, root);
4529 error("failed to repair root %llu INODE ITEM[%llu] missing",
4530 root->objectid, ino);
4531 btrfs_release_path(&path);
4536 * The ternary means dir item, dir index and relative inode ref.
4537 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4538 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4540 * If two of three is missing or mismatched, delete the existing one.
4541 * If one of three is missing or mismatched, add the missing one.
4543 * returns 0 means success.
4544 * returns not 0 means on error;
4546 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4547 u64 index, char *name, int name_len, u8 filetype,
4550 struct btrfs_trans_handle *trans;
4555 * stage shall be one of following valild values:
4556 * 0: Fine, nothing to do.
4557 * 1: One of three is wrong, so add missing one.
4558 * 2: Two of three is wrong, so delete existed one.
4560 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4562 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4564 if (err & (INODE_REF_MISSING))
4567 /* stage must be smllarer than 3 */
4570 trans = btrfs_start_transaction(root, 1);
4572 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4577 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4578 filetype, &index, 1, 1);
4582 btrfs_commit_transaction(trans, root);
4585 error("fail to repair inode %llu name %s filetype %u",
4586 ino, name, filetype);
4588 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4589 stage == 2 ? "Delete" : "Add",
4590 ino, name, filetype);
4596 * Traverse the given INODE_REF and call find_dir_item() to find related
4597 * DIR_ITEM/DIR_INDEX.
4599 * @root: the root of the fs/file tree
4600 * @ref_key: the key of the INODE_REF
4601 * @path the path provides node and slot
4602 * @refs: the count of INODE_REF
4603 * @mode: the st_mode of INODE_ITEM
4604 * @name_ret: returns with the first ref's name
4605 * @name_len_ret: len of the name_ret
4607 * Return 0 if no error occurred.
4609 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4610 struct btrfs_path *path, char *name_ret,
4611 u32 *namelen_ret, u64 *refs_ret, int mode)
4613 struct btrfs_key key;
4614 struct btrfs_key location;
4615 struct btrfs_inode_ref *ref;
4616 struct extent_buffer *node;
4617 char namebuf[BTRFS_NAME_LEN] = {0};
4627 int need_research = 0;
4635 /* since after repair, path and the dir item may be changed */
4636 if (need_research) {
4638 btrfs_release_path(path);
4639 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4640 /* the item was deleted, let path point to the last checked item */
4642 if (path->slots[0] == 0)
4643 btrfs_prev_leaf(root, path);
4651 location.objectid = ref_key->objectid;
4652 location.type = BTRFS_INODE_ITEM_KEY;
4653 location.offset = 0;
4654 node = path->nodes[0];
4655 slot = path->slots[0];
4657 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4658 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4659 total = btrfs_item_size_nr(node, slot);
4662 /* Update inode ref count */
4665 index = btrfs_inode_ref_index(node, ref);
4666 name_len = btrfs_inode_ref_name_len(node, ref);
4668 if (name_len <= BTRFS_NAME_LEN) {
4671 len = BTRFS_NAME_LEN;
4672 warning("root %llu INODE_REF[%llu %llu] name too long",
4673 root->objectid, ref_key->objectid, ref_key->offset);
4676 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4678 /* copy the first name found to name_ret */
4679 if (refs == 1 && name_ret) {
4680 memcpy(name_ret, namebuf, len);
4684 /* Check root dir ref */
4685 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4686 if (index != 0 || len != strlen("..") ||
4687 strncmp("..", namebuf, len) ||
4688 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4689 /* set err bits then repair will delete the ref */
4690 err |= DIR_INDEX_MISSING;
4691 err |= DIR_ITEM_MISSING;
4696 /* Find related DIR_INDEX */
4697 key.objectid = ref_key->offset;
4698 key.type = BTRFS_DIR_INDEX_KEY;
4700 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4701 imode_to_type(mode));
4703 /* Find related dir_item */
4704 key.objectid = ref_key->offset;
4705 key.type = BTRFS_DIR_ITEM_KEY;
4706 key.offset = btrfs_name_hash(namebuf, len);
4707 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4708 imode_to_type(mode));
4710 if (tmp_err && repair) {
4711 ret = repair_ternary_lowmem(root, ref_key->offset,
4712 ref_key->objectid, index, namebuf,
4713 name_len, imode_to_type(mode),
4720 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4721 imode_to_type(mode), tmp_err);
4723 len = sizeof(*ref) + name_len;
4724 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4735 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4736 * DIR_ITEM/DIR_INDEX.
4738 * @root: the root of the fs/file tree
4739 * @ref_key: the key of the INODE_EXTREF
4740 * @refs: the count of INODE_EXTREF
4741 * @mode: the st_mode of INODE_ITEM
4743 * Return 0 if no error occurred.
4745 static int check_inode_extref(struct btrfs_root *root,
4746 struct btrfs_key *ref_key,
4747 struct extent_buffer *node, int slot, u64 *refs,
4750 struct btrfs_key key;
4751 struct btrfs_key location;
4752 struct btrfs_inode_extref *extref;
4753 char namebuf[BTRFS_NAME_LEN] = {0};
4763 location.objectid = ref_key->objectid;
4764 location.type = BTRFS_INODE_ITEM_KEY;
4765 location.offset = 0;
4767 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4768 total = btrfs_item_size_nr(node, slot);
4771 /* update inode ref count */
4773 name_len = btrfs_inode_extref_name_len(node, extref);
4774 index = btrfs_inode_extref_index(node, extref);
4775 parent = btrfs_inode_extref_parent(node, extref);
4776 if (name_len <= BTRFS_NAME_LEN) {
4779 len = BTRFS_NAME_LEN;
4780 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4781 root->objectid, ref_key->objectid, ref_key->offset);
4783 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4785 /* Check root dir ref name */
4786 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4787 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4788 root->objectid, ref_key->objectid, ref_key->offset,
4790 err |= ROOT_DIR_ERROR;
4793 /* find related dir_index */
4794 key.objectid = parent;
4795 key.type = BTRFS_DIR_INDEX_KEY;
4797 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4800 /* find related dir_item */
4801 key.objectid = parent;
4802 key.type = BTRFS_DIR_ITEM_KEY;
4803 key.offset = btrfs_name_hash(namebuf, len);
4804 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4807 len = sizeof(*extref) + name_len;
4808 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4818 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4819 * DIR_ITEM/DIR_INDEX match.
4820 * Return with @index_ret.
4822 * @root: the root of the fs/file tree
4823 * @key: the key of the INODE_REF/INODE_EXTREF
4824 * @name: the name in the INODE_REF/INODE_EXTREF
4825 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4826 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4827 * value (64)-1 means do not check index
4828 * @ext_ref: the EXTENDED_IREF feature
4830 * Return 0 if no error occurred.
4831 * Return >0 for error bitmap
4833 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4834 char *name, int namelen, u64 *index_ret,
4835 unsigned int ext_ref)
4837 struct btrfs_path path;
4838 struct btrfs_inode_ref *ref;
4839 struct btrfs_inode_extref *extref;
4840 struct extent_buffer *node;
4841 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4854 btrfs_init_path(&path);
4855 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4857 ret = INODE_REF_MISSING;
4861 node = path.nodes[0];
4862 slot = path.slots[0];
4864 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4865 total = btrfs_item_size_nr(node, slot);
4867 /* Iterate all entry of INODE_REF */
4868 while (cur < total) {
4869 ret = INODE_REF_MISSING;
4871 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4872 ref_index = btrfs_inode_ref_index(node, ref);
4873 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4876 if (cur + sizeof(*ref) + ref_namelen > total ||
4877 ref_namelen > BTRFS_NAME_LEN) {
4878 warning("root %llu INODE %s[%llu %llu] name too long",
4880 key->type == BTRFS_INODE_REF_KEY ?
4882 key->objectid, key->offset);
4884 if (cur + sizeof(*ref) > total)
4886 len = min_t(u32, total - cur - sizeof(*ref),
4892 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4895 if (len != namelen || strncmp(ref_namebuf, name, len))
4898 *index_ret = ref_index;
4902 len = sizeof(*ref) + ref_namelen;
4903 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4908 /* Skip if not support EXTENDED_IREF feature */
4912 btrfs_release_path(&path);
4913 btrfs_init_path(&path);
4915 dir_id = key->offset;
4916 key->type = BTRFS_INODE_EXTREF_KEY;
4917 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4919 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4921 ret = INODE_REF_MISSING;
4925 node = path.nodes[0];
4926 slot = path.slots[0];
4928 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4930 total = btrfs_item_size_nr(node, slot);
4932 /* Iterate all entry of INODE_EXTREF */
4933 while (cur < total) {
4934 ret = INODE_REF_MISSING;
4936 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4937 ref_index = btrfs_inode_extref_index(node, extref);
4938 parent = btrfs_inode_extref_parent(node, extref);
4939 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4942 if (parent != dir_id)
4945 if (ref_namelen <= BTRFS_NAME_LEN) {
4948 len = BTRFS_NAME_LEN;
4949 warning("root %llu INODE %s[%llu %llu] name too long",
4951 key->type == BTRFS_INODE_REF_KEY ?
4953 key->objectid, key->offset);
4955 read_extent_buffer(node, ref_namebuf,
4956 (unsigned long)(extref + 1), len);
4958 if (len != namelen || strncmp(ref_namebuf, name, len))
4961 *index_ret = ref_index;
4966 len = sizeof(*extref) + ref_namelen;
4967 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4972 btrfs_release_path(&path);
4976 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4977 u64 ino, u64 index, const char *namebuf,
4978 int name_len, u8 filetype, int err)
4980 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4981 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4982 root->objectid, key->objectid, key->offset, namebuf,
4984 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4987 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4988 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4989 root->objectid, key->objectid, index, namebuf, filetype,
4990 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4993 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4995 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4996 root->objectid, ino, index, namebuf, filetype,
4997 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5000 if (err & INODE_REF_MISSING)
5002 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5003 root->objectid, ino, key->objectid, namebuf, filetype);
5008 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5010 * Returns error after repair
5012 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5013 u64 index, u8 filetype, char *namebuf, u32 name_len,
5018 if (err & INODE_ITEM_MISSING) {
5019 ret = repair_inode_item_missing(root, ino, filetype);
5021 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5024 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5025 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5026 name_len, filetype, err);
5028 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5029 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5030 err &= ~(INODE_REF_MISSING);
5037 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5038 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5040 * @root: the root of the fs/file tree
5041 * @key: the key of the INODE_REF/INODE_EXTREF
5043 * @size: the st_size of the INODE_ITEM
5044 * @ext_ref: the EXTENDED_IREF feature
5046 * Return 0 if no error occurred.
5048 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5049 struct btrfs_path *path, u64 *size,
5050 unsigned int ext_ref)
5052 struct btrfs_dir_item *di;
5053 struct btrfs_inode_item *ii;
5054 struct btrfs_key key;
5055 struct btrfs_key location;
5056 struct extent_buffer *node;
5058 char namebuf[BTRFS_NAME_LEN] = {0};
5070 int need_research = 0;
5073 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5074 * ignore index check.
5076 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5077 index = di_key->offset;
5084 /* since after repair, path and the dir item may be changed */
5085 if (need_research) {
5087 btrfs_release_path(path);
5088 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5089 /* the item was deleted, let path point the last checked item */
5091 if (path->slots[0] == 0)
5092 btrfs_prev_leaf(root, path);
5100 node = path->nodes[0];
5101 slot = path->slots[0];
5103 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5104 total = btrfs_item_size_nr(node, slot);
5105 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5107 while (cur < total) {
5108 data_len = btrfs_dir_data_len(node, di);
5111 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5113 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5114 di_key->objectid, di_key->offset, data_len);
5116 name_len = btrfs_dir_name_len(node, di);
5117 if (name_len <= BTRFS_NAME_LEN) {
5120 len = BTRFS_NAME_LEN;
5121 warning("root %llu %s[%llu %llu] name too long",
5123 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5124 di_key->objectid, di_key->offset);
5126 (*size) += name_len;
5127 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5129 filetype = btrfs_dir_type(node, di);
5131 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5132 di_key->offset != btrfs_name_hash(namebuf, len)) {
5134 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5135 root->objectid, di_key->objectid, di_key->offset,
5136 namebuf, len, filetype, di_key->offset,
5137 btrfs_name_hash(namebuf, len));
5140 btrfs_dir_item_key_to_cpu(node, di, &location);
5141 /* Ignore related ROOT_ITEM check */
5142 if (location.type == BTRFS_ROOT_ITEM_KEY)
5145 btrfs_release_path(path);
5146 /* Check relative INODE_ITEM(existence/filetype) */
5147 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5149 tmp_err |= INODE_ITEM_MISSING;
5153 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5154 struct btrfs_inode_item);
5155 mode = btrfs_inode_mode(path->nodes[0], ii);
5156 if (imode_to_type(mode) != filetype) {
5157 tmp_err |= INODE_ITEM_MISMATCH;
5161 /* Check relative INODE_REF/INODE_EXTREF */
5162 key.objectid = location.objectid;
5163 key.type = BTRFS_INODE_REF_KEY;
5164 key.offset = di_key->objectid;
5165 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5168 /* check relative INDEX/ITEM */
5169 key.objectid = di_key->objectid;
5170 if (key.type == BTRFS_DIR_ITEM_KEY) {
5171 key.type = BTRFS_DIR_INDEX_KEY;
5174 key.type = BTRFS_DIR_ITEM_KEY;
5175 key.offset = btrfs_name_hash(namebuf, name_len);
5178 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5179 name_len, filetype);
5180 /* find_dir_item may find index */
5181 if (key.type == BTRFS_DIR_INDEX_KEY)
5185 if (tmp_err && repair) {
5186 ret = repair_dir_item(root, di_key->objectid,
5187 location.objectid, index,
5188 imode_to_type(mode), namebuf,
5190 if (ret != tmp_err) {
5195 btrfs_release_path(path);
5196 print_dir_item_err(root, di_key, location.objectid, index,
5197 namebuf, name_len, filetype, tmp_err);
5199 len = sizeof(*di) + name_len + data_len;
5200 di = (struct btrfs_dir_item *)((char *)di + len);
5203 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5204 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5205 root->objectid, di_key->objectid,
5212 btrfs_release_path(path);
5213 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5215 err |= ret > 0 ? -ENOENT : ret;
5220 * Check file extent datasum/hole, update the size of the file extents,
5221 * check and update the last offset of the file extent.
5223 * @root: the root of fs/file tree.
5224 * @fkey: the key of the file extent.
5225 * @nodatasum: INODE_NODATASUM feature.
5226 * @size: the sum of all EXTENT_DATA items size for this inode.
5227 * @end: the offset of the last extent.
5229 * Return 0 if no error occurred.
5231 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5232 struct extent_buffer *node, int slot,
5233 unsigned int nodatasum, u64 *size, u64 *end)
5235 struct btrfs_file_extent_item *fi;
5238 u64 extent_num_bytes;
5240 u64 csum_found; /* In byte size, sectorsize aligned */
5241 u64 search_start; /* Logical range start we search for csum */
5242 u64 search_len; /* Logical range len we search for csum */
5243 unsigned int extent_type;
5244 unsigned int is_hole;
5249 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5251 /* Check inline extent */
5252 extent_type = btrfs_file_extent_type(node, fi);
5253 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5254 struct btrfs_item *e = btrfs_item_nr(slot);
5255 u32 item_inline_len;
5257 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5258 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5259 compressed = btrfs_file_extent_compression(node, fi);
5260 if (extent_num_bytes == 0) {
5262 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5263 root->objectid, fkey->objectid, fkey->offset);
5264 err |= FILE_EXTENT_ERROR;
5266 if (!compressed && extent_num_bytes != item_inline_len) {
5268 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5269 root->objectid, fkey->objectid, fkey->offset,
5270 extent_num_bytes, item_inline_len);
5271 err |= FILE_EXTENT_ERROR;
5273 *end += extent_num_bytes;
5274 *size += extent_num_bytes;
5278 /* Check extent type */
5279 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5280 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5281 err |= FILE_EXTENT_ERROR;
5282 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5283 root->objectid, fkey->objectid, fkey->offset);
5287 /* Check REG_EXTENT/PREALLOC_EXTENT */
5288 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5289 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5290 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5291 extent_offset = btrfs_file_extent_offset(node, fi);
5292 compressed = btrfs_file_extent_compression(node, fi);
5293 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5296 * Check EXTENT_DATA csum
5298 * For plain (uncompressed) extent, we should only check the range
5299 * we're referring to, as it's possible that part of prealloc extent
5300 * has been written, and has csum:
5302 * |<--- Original large preallocated extent A ---->|
5303 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5306 * For compressed extent, we should check the whole range.
5309 search_start = disk_bytenr + extent_offset;
5310 search_len = extent_num_bytes;
5312 search_start = disk_bytenr;
5313 search_len = disk_num_bytes;
5315 ret = count_csum_range(root, search_start, search_len, &csum_found);
5316 if (csum_found > 0 && nodatasum) {
5317 err |= ODD_CSUM_ITEM;
5318 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5319 root->objectid, fkey->objectid, fkey->offset);
5320 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5321 !is_hole && (ret < 0 || csum_found < search_len)) {
5322 err |= CSUM_ITEM_MISSING;
5323 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5324 root->objectid, fkey->objectid, fkey->offset,
5325 csum_found, search_len);
5326 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5327 err |= ODD_CSUM_ITEM;
5328 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5329 root->objectid, fkey->objectid, fkey->offset, csum_found);
5332 /* Check EXTENT_DATA hole */
5333 if (!no_holes && *end != fkey->offset) {
5334 err |= FILE_EXTENT_ERROR;
5335 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5336 root->objectid, fkey->objectid, fkey->offset);
5339 *end += extent_num_bytes;
5341 *size += extent_num_bytes;
5347 * Set inode item nbytes to @nbytes
5349 * Returns 0 on success
5350 * Returns != 0 on error
5352 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5353 struct btrfs_path *path,
5354 u64 ino, u64 nbytes)
5356 struct btrfs_trans_handle *trans;
5357 struct btrfs_inode_item *ii;
5358 struct btrfs_key key;
5359 struct btrfs_key research_key;
5363 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5366 key.type = BTRFS_INODE_ITEM_KEY;
5369 trans = btrfs_start_transaction(root, 1);
5370 if (IS_ERR(trans)) {
5371 ret = PTR_ERR(trans);
5376 btrfs_release_path(path);
5377 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5385 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5386 struct btrfs_inode_item);
5387 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5388 btrfs_mark_buffer_dirty(path->nodes[0]);
5390 btrfs_commit_transaction(trans, root);
5393 error("failed to set nbytes in inode %llu root %llu",
5394 ino, root->root_key.objectid);
5396 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5397 root->root_key.objectid, nbytes);
5400 btrfs_release_path(path);
5401 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5408 * Set directory inode isize to @isize.
5410 * Returns 0 on success.
5411 * Returns != 0 on error.
5413 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5414 struct btrfs_path *path,
5417 struct btrfs_trans_handle *trans;
5418 struct btrfs_inode_item *ii;
5419 struct btrfs_key key;
5420 struct btrfs_key research_key;
5424 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5427 key.type = BTRFS_INODE_ITEM_KEY;
5430 trans = btrfs_start_transaction(root, 1);
5431 if (IS_ERR(trans)) {
5432 ret = PTR_ERR(trans);
5437 btrfs_release_path(path);
5438 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5446 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5447 struct btrfs_inode_item);
5448 btrfs_set_inode_size(path->nodes[0], ii, isize);
5449 btrfs_mark_buffer_dirty(path->nodes[0]);
5451 btrfs_commit_transaction(trans, root);
5454 error("failed to set isize in inode %llu root %llu",
5455 ino, root->root_key.objectid);
5457 printf("Set isize in inode %llu root %llu to %llu\n",
5458 ino, root->root_key.objectid, isize);
5460 btrfs_release_path(path);
5461 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5468 * Wrapper function for btrfs_add_orphan_item().
5470 * Returns 0 on success.
5471 * Returns != 0 on error.
5473 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5474 struct btrfs_path *path, u64 ino)
5476 struct btrfs_trans_handle *trans;
5477 struct btrfs_key research_key;
5481 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5483 trans = btrfs_start_transaction(root, 1);
5484 if (IS_ERR(trans)) {
5485 ret = PTR_ERR(trans);
5490 btrfs_release_path(path);
5491 ret = btrfs_add_orphan_item(trans, root, path, ino);
5493 btrfs_commit_transaction(trans, root);
5496 error("failed to add inode %llu as orphan item root %llu",
5497 ino, root->root_key.objectid);
5499 printf("Added inode %llu as orphan item root %llu\n",
5500 ino, root->root_key.objectid);
5502 btrfs_release_path(path);
5503 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5510 * Check INODE_ITEM and related ITEMs (the same inode number)
5511 * 1. check link count
5512 * 2. check inode ref/extref
5513 * 3. check dir item/index
5515 * @ext_ref: the EXTENDED_IREF feature
5517 * Return 0 if no error occurred.
5518 * Return >0 for error or hit the traversal is done(by error bitmap)
5520 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5521 unsigned int ext_ref)
5523 struct extent_buffer *node;
5524 struct btrfs_inode_item *ii;
5525 struct btrfs_key key;
5534 u64 extent_size = 0;
5536 unsigned int nodatasum;
5540 char namebuf[BTRFS_NAME_LEN] = {0};
5543 node = path->nodes[0];
5544 slot = path->slots[0];
5546 btrfs_item_key_to_cpu(node, &key, slot);
5547 inode_id = key.objectid;
5549 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5550 ret = btrfs_next_item(root, path);
5556 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5557 isize = btrfs_inode_size(node, ii);
5558 nbytes = btrfs_inode_nbytes(node, ii);
5559 mode = btrfs_inode_mode(node, ii);
5560 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5561 nlink = btrfs_inode_nlink(node, ii);
5562 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5565 ret = btrfs_next_item(root, path);
5567 /* out will fill 'err' rusing current statistics */
5569 } else if (ret > 0) {
5574 node = path->nodes[0];
5575 slot = path->slots[0];
5576 btrfs_item_key_to_cpu(node, &key, slot);
5577 if (key.objectid != inode_id)
5581 case BTRFS_INODE_REF_KEY:
5582 ret = check_inode_ref(root, &key, path, namebuf,
5583 &name_len, &refs, mode);
5586 case BTRFS_INODE_EXTREF_KEY:
5587 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5588 warning("root %llu EXTREF[%llu %llu] isn't supported",
5589 root->objectid, key.objectid,
5591 ret = check_inode_extref(root, &key, node, slot, &refs,
5595 case BTRFS_DIR_ITEM_KEY:
5596 case BTRFS_DIR_INDEX_KEY:
5598 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5599 root->objectid, inode_id,
5600 imode_to_type(mode), key.objectid,
5603 ret = check_dir_item(root, &key, path, &size, ext_ref);
5606 case BTRFS_EXTENT_DATA_KEY:
5608 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5609 root->objectid, inode_id, key.objectid,
5612 ret = check_file_extent(root, &key, node, slot,
5613 nodatasum, &extent_size,
5617 case BTRFS_XATTR_ITEM_KEY:
5620 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5621 key.objectid, key.type, key.offset);
5626 /* verify INODE_ITEM nlink/isize/nbytes */
5629 err |= LINK_COUNT_ERROR;
5630 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5631 root->objectid, inode_id, nlink);
5635 * Just a warning, as dir inode nbytes is just an
5636 * instructive value.
5638 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5639 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5640 root->objectid, inode_id,
5641 root->fs_info->nodesize);
5644 if (isize != size) {
5646 ret = repair_dir_isize_lowmem(root, path,
5648 if (!repair || ret) {
5651 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5652 root->objectid, inode_id, isize, size);
5656 if (nlink != refs) {
5657 err |= LINK_COUNT_ERROR;
5658 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5659 root->objectid, inode_id, nlink, refs);
5660 } else if (!nlink) {
5662 ret = repair_inode_orphan_item_lowmem(root,
5664 if (!repair || ret) {
5666 error("root %llu INODE[%llu] is orphan item",
5667 root->objectid, inode_id);
5671 if (!nbytes && !no_holes && extent_end < isize) {
5672 err |= NBYTES_ERROR;
5673 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5674 root->objectid, inode_id, isize);
5677 if (nbytes != extent_size) {
5679 ret = repair_inode_nbytes_lowmem(root, path,
5680 inode_id, extent_size);
5681 if (!repair || ret) {
5682 err |= NBYTES_ERROR;
5684 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5685 root->objectid, inode_id, nbytes,
5695 * Insert the missing inode item and inode ref.
5697 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5698 * Root dir should be handled specially because root dir is the root of fs.
5700 * returns err (>0 or 0) after repair
5702 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5704 struct btrfs_trans_handle *trans;
5705 struct btrfs_key key;
5706 struct btrfs_path path;
5707 int filetype = BTRFS_FT_DIR;
5710 btrfs_init_path(&path);
5712 if (err & INODE_REF_MISSING) {
5713 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5714 key.type = BTRFS_INODE_REF_KEY;
5715 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5717 trans = btrfs_start_transaction(root, 1);
5718 if (IS_ERR(trans)) {
5719 ret = PTR_ERR(trans);
5723 btrfs_release_path(&path);
5724 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5728 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5729 BTRFS_FIRST_FREE_OBJECTID,
5730 BTRFS_FIRST_FREE_OBJECTID, 0);
5734 printf("Add INODE_REF[%llu %llu] name %s\n",
5735 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5737 err &= ~INODE_REF_MISSING;
5740 error("fail to insert first inode's ref");
5741 btrfs_commit_transaction(trans, root);
5744 if (err & INODE_ITEM_MISSING) {
5745 ret = repair_inode_item_missing(root,
5746 BTRFS_FIRST_FREE_OBJECTID, filetype);
5749 err &= ~INODE_ITEM_MISSING;
5753 error("fail to repair first inode");
5754 btrfs_release_path(&path);
5759 * check first root dir's inode_item and inode_ref
5761 * returns 0 means no error
5762 * returns >0 means error
5763 * returns <0 means fatal error
5765 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5767 struct btrfs_path path;
5768 struct btrfs_key key;
5769 struct btrfs_inode_item *ii;
5775 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5776 key.type = BTRFS_INODE_ITEM_KEY;
5779 /* For root being dropped, we don't need to check first inode */
5780 if (btrfs_root_refs(&root->root_item) == 0 &&
5781 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5782 BTRFS_FIRST_FREE_OBJECTID)
5785 btrfs_init_path(&path);
5786 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5791 err |= INODE_ITEM_MISSING;
5793 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5794 struct btrfs_inode_item);
5795 mode = btrfs_inode_mode(path.nodes[0], ii);
5796 if (imode_to_type(mode) != BTRFS_FT_DIR)
5797 err |= INODE_ITEM_MISMATCH;
5800 /* lookup first inode ref */
5801 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5802 key.type = BTRFS_INODE_REF_KEY;
5803 /* special index value */
5806 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5812 btrfs_release_path(&path);
5815 err = repair_fs_first_inode(root, err);
5817 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5818 error("root dir INODE_ITEM is %s",
5819 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5820 if (err & INODE_REF_MISSING)
5821 error("root dir INODE_REF is missing");
5823 return ret < 0 ? ret : err;
5826 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5827 u64 parent, u64 root)
5829 struct rb_node *node;
5830 struct tree_backref *back = NULL;
5831 struct tree_backref match = {
5838 match.parent = parent;
5839 match.node.full_backref = 1;
5844 node = rb_search(&rec->backref_tree, &match.node.node,
5845 (rb_compare_keys)compare_extent_backref, NULL);
5847 back = to_tree_backref(rb_node_to_extent_backref(node));
5852 static struct data_backref *find_data_backref(struct extent_record *rec,
5853 u64 parent, u64 root,
5854 u64 owner, u64 offset,
5856 u64 disk_bytenr, u64 bytes)
5858 struct rb_node *node;
5859 struct data_backref *back = NULL;
5860 struct data_backref match = {
5867 .found_ref = found_ref,
5868 .disk_bytenr = disk_bytenr,
5872 match.parent = parent;
5873 match.node.full_backref = 1;
5878 node = rb_search(&rec->backref_tree, &match.node.node,
5879 (rb_compare_keys)compare_extent_backref, NULL);
5881 back = to_data_backref(rb_node_to_extent_backref(node));
5886 * Iterate all item on the tree and call check_inode_item() to check.
5888 * @root: the root of the tree to be checked.
5889 * @ext_ref: the EXTENDED_IREF feature
5891 * Return 0 if no error found.
5892 * Return <0 for error.
5894 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5896 struct btrfs_path path;
5897 struct node_refs nrefs;
5898 struct btrfs_root_item *root_item = &root->root_item;
5904 * We need to manually check the first inode item(256)
5905 * As the following traversal function will only start from
5906 * the first inode item in the leaf, if inode item(256) is missing
5907 * we will just skip it forever.
5909 ret = check_fs_first_inode(root, ext_ref);
5914 memset(&nrefs, 0, sizeof(nrefs));
5915 level = btrfs_header_level(root->node);
5916 btrfs_init_path(&path);
5918 if (btrfs_root_refs(root_item) > 0 ||
5919 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5920 path.nodes[level] = root->node;
5921 path.slots[level] = 0;
5922 extent_buffer_get(root->node);
5924 struct btrfs_key key;
5926 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5927 level = root_item->drop_level;
5928 path.lowest_level = level;
5929 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5936 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5939 /* if ret is negative, walk shall stop */
5945 ret = walk_up_tree_v2(root, &path, &level);
5947 /* Normal exit, reset ret to err */
5954 btrfs_release_path(&path);
5959 * Find the relative ref for root_ref and root_backref.
5961 * @root: the root of the root tree.
5962 * @ref_key: the key of the root ref.
5964 * Return 0 if no error occurred.
5966 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5967 struct extent_buffer *node, int slot)
5969 struct btrfs_path path;
5970 struct btrfs_key key;
5971 struct btrfs_root_ref *ref;
5972 struct btrfs_root_ref *backref;
5973 char ref_name[BTRFS_NAME_LEN] = {0};
5974 char backref_name[BTRFS_NAME_LEN] = {0};
5980 u32 backref_namelen;
5985 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5986 ref_dirid = btrfs_root_ref_dirid(node, ref);
5987 ref_seq = btrfs_root_ref_sequence(node, ref);
5988 ref_namelen = btrfs_root_ref_name_len(node, ref);
5990 if (ref_namelen <= BTRFS_NAME_LEN) {
5993 len = BTRFS_NAME_LEN;
5994 warning("%s[%llu %llu] ref_name too long",
5995 ref_key->type == BTRFS_ROOT_REF_KEY ?
5996 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5999 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6001 /* Find relative root_ref */
6002 key.objectid = ref_key->offset;
6003 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6004 key.offset = ref_key->objectid;
6006 btrfs_init_path(&path);
6007 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6009 err |= ROOT_REF_MISSING;
6010 error("%s[%llu %llu] couldn't find relative ref",
6011 ref_key->type == BTRFS_ROOT_REF_KEY ?
6012 "ROOT_REF" : "ROOT_BACKREF",
6013 ref_key->objectid, ref_key->offset);
6017 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6018 struct btrfs_root_ref);
6019 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6020 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6021 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6023 if (backref_namelen <= BTRFS_NAME_LEN) {
6024 len = backref_namelen;
6026 len = BTRFS_NAME_LEN;
6027 warning("%s[%llu %llu] ref_name too long",
6028 key.type == BTRFS_ROOT_REF_KEY ?
6029 "ROOT_REF" : "ROOT_BACKREF",
6030 key.objectid, key.offset);
6032 read_extent_buffer(path.nodes[0], backref_name,
6033 (unsigned long)(backref + 1), len);
6035 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6036 ref_namelen != backref_namelen ||
6037 strncmp(ref_name, backref_name, len)) {
6038 err |= ROOT_REF_MISMATCH;
6039 error("%s[%llu %llu] mismatch relative ref",
6040 ref_key->type == BTRFS_ROOT_REF_KEY ?
6041 "ROOT_REF" : "ROOT_BACKREF",
6042 ref_key->objectid, ref_key->offset);
6045 btrfs_release_path(&path);
6050 * Check all fs/file tree in low_memory mode.
6052 * 1. for fs tree root item, call check_fs_root_v2()
6053 * 2. for fs tree root ref/backref, call check_root_ref()
6055 * Return 0 if no error occurred.
6057 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6059 struct btrfs_root *tree_root = fs_info->tree_root;
6060 struct btrfs_root *cur_root = NULL;
6061 struct btrfs_path path;
6062 struct btrfs_key key;
6063 struct extent_buffer *node;
6064 unsigned int ext_ref;
6069 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6071 btrfs_init_path(&path);
6072 key.objectid = BTRFS_FS_TREE_OBJECTID;
6074 key.type = BTRFS_ROOT_ITEM_KEY;
6076 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6080 } else if (ret > 0) {
6086 node = path.nodes[0];
6087 slot = path.slots[0];
6088 btrfs_item_key_to_cpu(node, &key, slot);
6089 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6091 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6092 fs_root_objectid(key.objectid)) {
6093 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6094 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6097 key.offset = (u64)-1;
6098 cur_root = btrfs_read_fs_root(fs_info, &key);
6101 if (IS_ERR(cur_root)) {
6102 error("Fail to read fs/subvol tree: %lld",
6108 ret = check_fs_root_v2(cur_root, ext_ref);
6111 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6112 btrfs_free_fs_root(cur_root);
6113 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6114 key.type == BTRFS_ROOT_BACKREF_KEY) {
6115 ret = check_root_ref(tree_root, &key, node, slot);
6119 ret = btrfs_next_item(tree_root, &path);
6129 btrfs_release_path(&path);
6133 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6134 struct cache_tree *root_cache)
6138 if (!ctx.progress_enabled)
6139 fprintf(stderr, "checking fs roots\n");
6140 if (check_mode == CHECK_MODE_LOWMEM)
6141 ret = check_fs_roots_v2(fs_info);
6143 ret = check_fs_roots(fs_info, root_cache);
6148 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6150 struct extent_backref *back, *tmp;
6151 struct tree_backref *tback;
6152 struct data_backref *dback;
6156 rbtree_postorder_for_each_entry_safe(back, tmp,
6157 &rec->backref_tree, node) {
6158 if (!back->found_extent_tree) {
6162 if (back->is_data) {
6163 dback = to_data_backref(back);
6164 fprintf(stderr, "Data backref %llu %s %llu"
6165 " owner %llu offset %llu num_refs %lu"
6166 " not found in extent tree\n",
6167 (unsigned long long)rec->start,
6168 back->full_backref ?
6170 back->full_backref ?
6171 (unsigned long long)dback->parent:
6172 (unsigned long long)dback->root,
6173 (unsigned long long)dback->owner,
6174 (unsigned long long)dback->offset,
6175 (unsigned long)dback->num_refs);
6177 tback = to_tree_backref(back);
6178 fprintf(stderr, "Tree backref %llu parent %llu"
6179 " root %llu not found in extent tree\n",
6180 (unsigned long long)rec->start,
6181 (unsigned long long)tback->parent,
6182 (unsigned long long)tback->root);
6185 if (!back->is_data && !back->found_ref) {
6189 tback = to_tree_backref(back);
6190 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6191 (unsigned long long)rec->start,
6192 back->full_backref ? "parent" : "root",
6193 back->full_backref ?
6194 (unsigned long long)tback->parent :
6195 (unsigned long long)tback->root, back);
6197 if (back->is_data) {
6198 dback = to_data_backref(back);
6199 if (dback->found_ref != dback->num_refs) {
6203 fprintf(stderr, "Incorrect local backref count"
6204 " on %llu %s %llu owner %llu"
6205 " offset %llu found %u wanted %u back %p\n",
6206 (unsigned long long)rec->start,
6207 back->full_backref ?
6209 back->full_backref ?
6210 (unsigned long long)dback->parent:
6211 (unsigned long long)dback->root,
6212 (unsigned long long)dback->owner,
6213 (unsigned long long)dback->offset,
6214 dback->found_ref, dback->num_refs, back);
6216 if (dback->disk_bytenr != rec->start) {
6220 fprintf(stderr, "Backref disk bytenr does not"
6221 " match extent record, bytenr=%llu, "
6222 "ref bytenr=%llu\n",
6223 (unsigned long long)rec->start,
6224 (unsigned long long)dback->disk_bytenr);
6227 if (dback->bytes != rec->nr) {
6231 fprintf(stderr, "Backref bytes do not match "
6232 "extent backref, bytenr=%llu, ref "
6233 "bytes=%llu, backref bytes=%llu\n",
6234 (unsigned long long)rec->start,
6235 (unsigned long long)rec->nr,
6236 (unsigned long long)dback->bytes);
6239 if (!back->is_data) {
6242 dback = to_data_backref(back);
6243 found += dback->found_ref;
6246 if (found != rec->refs) {
6250 fprintf(stderr, "Incorrect global backref count "
6251 "on %llu found %llu wanted %llu\n",
6252 (unsigned long long)rec->start,
6253 (unsigned long long)found,
6254 (unsigned long long)rec->refs);
6260 static void __free_one_backref(struct rb_node *node)
6262 struct extent_backref *back = rb_node_to_extent_backref(node);
6267 static void free_all_extent_backrefs(struct extent_record *rec)
6269 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6272 static void free_extent_record_cache(struct cache_tree *extent_cache)
6274 struct cache_extent *cache;
6275 struct extent_record *rec;
6278 cache = first_cache_extent(extent_cache);
6281 rec = container_of(cache, struct extent_record, cache);
6282 remove_cache_extent(extent_cache, cache);
6283 free_all_extent_backrefs(rec);
6288 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6289 struct extent_record *rec)
6291 if (rec->content_checked && rec->owner_ref_checked &&
6292 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6293 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6294 !rec->bad_full_backref && !rec->crossing_stripes &&
6295 !rec->wrong_chunk_type) {
6296 remove_cache_extent(extent_cache, &rec->cache);
6297 free_all_extent_backrefs(rec);
6298 list_del_init(&rec->list);
6304 static int check_owner_ref(struct btrfs_root *root,
6305 struct extent_record *rec,
6306 struct extent_buffer *buf)
6308 struct extent_backref *node, *tmp;
6309 struct tree_backref *back;
6310 struct btrfs_root *ref_root;
6311 struct btrfs_key key;
6312 struct btrfs_path path;
6313 struct extent_buffer *parent;
6318 rbtree_postorder_for_each_entry_safe(node, tmp,
6319 &rec->backref_tree, node) {
6322 if (!node->found_ref)
6324 if (node->full_backref)
6326 back = to_tree_backref(node);
6327 if (btrfs_header_owner(buf) == back->root)
6330 BUG_ON(rec->is_root);
6332 /* try to find the block by search corresponding fs tree */
6333 key.objectid = btrfs_header_owner(buf);
6334 key.type = BTRFS_ROOT_ITEM_KEY;
6335 key.offset = (u64)-1;
6337 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6338 if (IS_ERR(ref_root))
6341 level = btrfs_header_level(buf);
6343 btrfs_item_key_to_cpu(buf, &key, 0);
6345 btrfs_node_key_to_cpu(buf, &key, 0);
6347 btrfs_init_path(&path);
6348 path.lowest_level = level + 1;
6349 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6353 parent = path.nodes[level + 1];
6354 if (parent && buf->start == btrfs_node_blockptr(parent,
6355 path.slots[level + 1]))
6358 btrfs_release_path(&path);
6359 return found ? 0 : 1;
6362 static int is_extent_tree_record(struct extent_record *rec)
6364 struct extent_backref *node, *tmp;
6365 struct tree_backref *back;
6368 rbtree_postorder_for_each_entry_safe(node, tmp,
6369 &rec->backref_tree, node) {
6372 back = to_tree_backref(node);
6373 if (node->full_backref)
6375 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6382 static int record_bad_block_io(struct btrfs_fs_info *info,
6383 struct cache_tree *extent_cache,
6386 struct extent_record *rec;
6387 struct cache_extent *cache;
6388 struct btrfs_key key;
6390 cache = lookup_cache_extent(extent_cache, start, len);
6394 rec = container_of(cache, struct extent_record, cache);
6395 if (!is_extent_tree_record(rec))
6398 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6399 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6402 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6403 struct extent_buffer *buf, int slot)
6405 if (btrfs_header_level(buf)) {
6406 struct btrfs_key_ptr ptr1, ptr2;
6408 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6409 sizeof(struct btrfs_key_ptr));
6410 read_extent_buffer(buf, &ptr2,
6411 btrfs_node_key_ptr_offset(slot + 1),
6412 sizeof(struct btrfs_key_ptr));
6413 write_extent_buffer(buf, &ptr1,
6414 btrfs_node_key_ptr_offset(slot + 1),
6415 sizeof(struct btrfs_key_ptr));
6416 write_extent_buffer(buf, &ptr2,
6417 btrfs_node_key_ptr_offset(slot),
6418 sizeof(struct btrfs_key_ptr));
6420 struct btrfs_disk_key key;
6421 btrfs_node_key(buf, &key, 0);
6422 btrfs_fixup_low_keys(root, path, &key,
6423 btrfs_header_level(buf) + 1);
6426 struct btrfs_item *item1, *item2;
6427 struct btrfs_key k1, k2;
6428 char *item1_data, *item2_data;
6429 u32 item1_offset, item2_offset, item1_size, item2_size;
6431 item1 = btrfs_item_nr(slot);
6432 item2 = btrfs_item_nr(slot + 1);
6433 btrfs_item_key_to_cpu(buf, &k1, slot);
6434 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6435 item1_offset = btrfs_item_offset(buf, item1);
6436 item2_offset = btrfs_item_offset(buf, item2);
6437 item1_size = btrfs_item_size(buf, item1);
6438 item2_size = btrfs_item_size(buf, item2);
6440 item1_data = malloc(item1_size);
6443 item2_data = malloc(item2_size);
6449 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6450 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6452 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6453 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6457 btrfs_set_item_offset(buf, item1, item2_offset);
6458 btrfs_set_item_offset(buf, item2, item1_offset);
6459 btrfs_set_item_size(buf, item1, item2_size);
6460 btrfs_set_item_size(buf, item2, item1_size);
6462 path->slots[0] = slot;
6463 btrfs_set_item_key_unsafe(root, path, &k2);
6464 path->slots[0] = slot + 1;
6465 btrfs_set_item_key_unsafe(root, path, &k1);
6470 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6472 struct extent_buffer *buf;
6473 struct btrfs_key k1, k2;
6475 int level = path->lowest_level;
6478 buf = path->nodes[level];
6479 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6481 btrfs_node_key_to_cpu(buf, &k1, i);
6482 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6484 btrfs_item_key_to_cpu(buf, &k1, i);
6485 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6487 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6489 ret = swap_values(root, path, buf, i);
6492 btrfs_mark_buffer_dirty(buf);
6498 static int delete_bogus_item(struct btrfs_root *root,
6499 struct btrfs_path *path,
6500 struct extent_buffer *buf, int slot)
6502 struct btrfs_key key;
6503 int nritems = btrfs_header_nritems(buf);
6505 btrfs_item_key_to_cpu(buf, &key, slot);
6507 /* These are all the keys we can deal with missing. */
6508 if (key.type != BTRFS_DIR_INDEX_KEY &&
6509 key.type != BTRFS_EXTENT_ITEM_KEY &&
6510 key.type != BTRFS_METADATA_ITEM_KEY &&
6511 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6512 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6515 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6516 (unsigned long long)key.objectid, key.type,
6517 (unsigned long long)key.offset, slot, buf->start);
6518 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6519 btrfs_item_nr_offset(slot + 1),
6520 sizeof(struct btrfs_item) *
6521 (nritems - slot - 1));
6522 btrfs_set_header_nritems(buf, nritems - 1);
6524 struct btrfs_disk_key disk_key;
6526 btrfs_item_key(buf, &disk_key, 0);
6527 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6529 btrfs_mark_buffer_dirty(buf);
6533 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6535 struct extent_buffer *buf;
6539 /* We should only get this for leaves */
6540 BUG_ON(path->lowest_level);
6541 buf = path->nodes[0];
6543 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6544 unsigned int shift = 0, offset;
6546 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6547 BTRFS_LEAF_DATA_SIZE(root)) {
6548 if (btrfs_item_end_nr(buf, i) >
6549 BTRFS_LEAF_DATA_SIZE(root)) {
6550 ret = delete_bogus_item(root, path, buf, i);
6553 fprintf(stderr, "item is off the end of the "
6554 "leaf, can't fix\n");
6558 shift = BTRFS_LEAF_DATA_SIZE(root) -
6559 btrfs_item_end_nr(buf, i);
6560 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6561 btrfs_item_offset_nr(buf, i - 1)) {
6562 if (btrfs_item_end_nr(buf, i) >
6563 btrfs_item_offset_nr(buf, i - 1)) {
6564 ret = delete_bogus_item(root, path, buf, i);
6567 fprintf(stderr, "items overlap, can't fix\n");
6571 shift = btrfs_item_offset_nr(buf, i - 1) -
6572 btrfs_item_end_nr(buf, i);
6577 printf("Shifting item nr %d by %u bytes in block %llu\n",
6578 i, shift, (unsigned long long)buf->start);
6579 offset = btrfs_item_offset_nr(buf, i);
6580 memmove_extent_buffer(buf,
6581 btrfs_leaf_data(buf) + offset + shift,
6582 btrfs_leaf_data(buf) + offset,
6583 btrfs_item_size_nr(buf, i));
6584 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6586 btrfs_mark_buffer_dirty(buf);
6590 * We may have moved things, in which case we want to exit so we don't
6591 * write those changes out. Once we have proper abort functionality in
6592 * progs this can be changed to something nicer.
6599 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6600 * then just return -EIO.
6602 static int try_to_fix_bad_block(struct btrfs_root *root,
6603 struct extent_buffer *buf,
6604 enum btrfs_tree_block_status status)
6606 struct btrfs_trans_handle *trans;
6607 struct ulist *roots;
6608 struct ulist_node *node;
6609 struct btrfs_root *search_root;
6610 struct btrfs_path path;
6611 struct ulist_iterator iter;
6612 struct btrfs_key root_key, key;
6615 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6616 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6619 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6623 btrfs_init_path(&path);
6624 ULIST_ITER_INIT(&iter);
6625 while ((node = ulist_next(roots, &iter))) {
6626 root_key.objectid = node->val;
6627 root_key.type = BTRFS_ROOT_ITEM_KEY;
6628 root_key.offset = (u64)-1;
6630 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6637 trans = btrfs_start_transaction(search_root, 0);
6638 if (IS_ERR(trans)) {
6639 ret = PTR_ERR(trans);
6643 path.lowest_level = btrfs_header_level(buf);
6644 path.skip_check_block = 1;
6645 if (path.lowest_level)
6646 btrfs_node_key_to_cpu(buf, &key, 0);
6648 btrfs_item_key_to_cpu(buf, &key, 0);
6649 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6652 btrfs_commit_transaction(trans, search_root);
6655 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6656 ret = fix_key_order(search_root, &path);
6657 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6658 ret = fix_item_offset(search_root, &path);
6660 btrfs_commit_transaction(trans, search_root);
6663 btrfs_release_path(&path);
6664 btrfs_commit_transaction(trans, search_root);
6667 btrfs_release_path(&path);
6671 static int check_block(struct btrfs_root *root,
6672 struct cache_tree *extent_cache,
6673 struct extent_buffer *buf, u64 flags)
6675 struct extent_record *rec;
6676 struct cache_extent *cache;
6677 struct btrfs_key key;
6678 enum btrfs_tree_block_status status;
6682 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6685 rec = container_of(cache, struct extent_record, cache);
6686 rec->generation = btrfs_header_generation(buf);
6688 level = btrfs_header_level(buf);
6689 if (btrfs_header_nritems(buf) > 0) {
6692 btrfs_item_key_to_cpu(buf, &key, 0);
6694 btrfs_node_key_to_cpu(buf, &key, 0);
6696 rec->info_objectid = key.objectid;
6698 rec->info_level = level;
6700 if (btrfs_is_leaf(buf))
6701 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6703 status = btrfs_check_node(root, &rec->parent_key, buf);
6705 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6707 status = try_to_fix_bad_block(root, buf, status);
6708 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6710 fprintf(stderr, "bad block %llu\n",
6711 (unsigned long long)buf->start);
6714 * Signal to callers we need to start the scan over
6715 * again since we'll have cowed blocks.
6720 rec->content_checked = 1;
6721 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6722 rec->owner_ref_checked = 1;
6724 ret = check_owner_ref(root, rec, buf);
6726 rec->owner_ref_checked = 1;
6730 maybe_free_extent_rec(extent_cache, rec);
6735 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6736 u64 parent, u64 root)
6738 struct list_head *cur = rec->backrefs.next;
6739 struct extent_backref *node;
6740 struct tree_backref *back;
6742 while(cur != &rec->backrefs) {
6743 node = to_extent_backref(cur);
6747 back = to_tree_backref(node);
6749 if (!node->full_backref)
6751 if (parent == back->parent)
6754 if (node->full_backref)
6756 if (back->root == root)
6764 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6765 u64 parent, u64 root)
6767 struct tree_backref *ref = malloc(sizeof(*ref));
6771 memset(&ref->node, 0, sizeof(ref->node));
6773 ref->parent = parent;
6774 ref->node.full_backref = 1;
6777 ref->node.full_backref = 0;
6784 static struct data_backref *find_data_backref(struct extent_record *rec,
6785 u64 parent, u64 root,
6786 u64 owner, u64 offset,
6788 u64 disk_bytenr, u64 bytes)
6790 struct list_head *cur = rec->backrefs.next;
6791 struct extent_backref *node;
6792 struct data_backref *back;
6794 while(cur != &rec->backrefs) {
6795 node = to_extent_backref(cur);
6799 back = to_data_backref(node);
6801 if (!node->full_backref)
6803 if (parent == back->parent)
6806 if (node->full_backref)
6808 if (back->root == root && back->owner == owner &&
6809 back->offset == offset) {
6810 if (found_ref && node->found_ref &&
6811 (back->bytes != bytes ||
6812 back->disk_bytenr != disk_bytenr))
6822 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6823 u64 parent, u64 root,
6824 u64 owner, u64 offset,
6827 struct data_backref *ref = malloc(sizeof(*ref));
6831 memset(&ref->node, 0, sizeof(ref->node));
6832 ref->node.is_data = 1;
6835 ref->parent = parent;
6838 ref->node.full_backref = 1;
6842 ref->offset = offset;
6843 ref->node.full_backref = 0;
6845 ref->bytes = max_size;
6848 if (max_size > rec->max_size)
6849 rec->max_size = max_size;
6853 /* Check if the type of extent matches with its chunk */
6854 static void check_extent_type(struct extent_record *rec)
6856 struct btrfs_block_group_cache *bg_cache;
6858 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6862 /* data extent, check chunk directly*/
6863 if (!rec->metadata) {
6864 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6865 rec->wrong_chunk_type = 1;
6869 /* metadata extent, check the obvious case first */
6870 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6871 BTRFS_BLOCK_GROUP_METADATA))) {
6872 rec->wrong_chunk_type = 1;
6877 * Check SYSTEM extent, as it's also marked as metadata, we can only
6878 * make sure it's a SYSTEM extent by its backref
6880 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6881 struct extent_backref *node;
6882 struct tree_backref *tback;
6885 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6886 if (node->is_data) {
6887 /* tree block shouldn't have data backref */
6888 rec->wrong_chunk_type = 1;
6891 tback = container_of(node, struct tree_backref, node);
6893 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6894 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6896 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6897 if (!(bg_cache->flags & bg_type))
6898 rec->wrong_chunk_type = 1;
6903 * Allocate a new extent record, fill default values from @tmpl and insert int
6904 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6905 * the cache, otherwise it fails.
6907 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6908 struct extent_record *tmpl)
6910 struct extent_record *rec;
6913 BUG_ON(tmpl->max_size == 0);
6914 rec = malloc(sizeof(*rec));
6917 rec->start = tmpl->start;
6918 rec->max_size = tmpl->max_size;
6919 rec->nr = max(tmpl->nr, tmpl->max_size);
6920 rec->found_rec = tmpl->found_rec;
6921 rec->content_checked = tmpl->content_checked;
6922 rec->owner_ref_checked = tmpl->owner_ref_checked;
6923 rec->num_duplicates = 0;
6924 rec->metadata = tmpl->metadata;
6925 rec->flag_block_full_backref = FLAG_UNSET;
6926 rec->bad_full_backref = 0;
6927 rec->crossing_stripes = 0;
6928 rec->wrong_chunk_type = 0;
6929 rec->is_root = tmpl->is_root;
6930 rec->refs = tmpl->refs;
6931 rec->extent_item_refs = tmpl->extent_item_refs;
6932 rec->parent_generation = tmpl->parent_generation;
6933 INIT_LIST_HEAD(&rec->backrefs);
6934 INIT_LIST_HEAD(&rec->dups);
6935 INIT_LIST_HEAD(&rec->list);
6936 rec->backref_tree = RB_ROOT;
6937 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6938 rec->cache.start = tmpl->start;
6939 rec->cache.size = tmpl->nr;
6940 ret = insert_cache_extent(extent_cache, &rec->cache);
6945 bytes_used += rec->nr;
6948 rec->crossing_stripes = check_crossing_stripes(global_info,
6949 rec->start, global_info->nodesize);
6950 check_extent_type(rec);
6955 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6957 * - refs - if found, increase refs
6958 * - is_root - if found, set
6959 * - content_checked - if found, set
6960 * - owner_ref_checked - if found, set
6962 * If not found, create a new one, initialize and insert.
6964 static int add_extent_rec(struct cache_tree *extent_cache,
6965 struct extent_record *tmpl)
6967 struct extent_record *rec;
6968 struct cache_extent *cache;
6972 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6974 rec = container_of(cache, struct extent_record, cache);
6978 rec->nr = max(tmpl->nr, tmpl->max_size);
6981 * We need to make sure to reset nr to whatever the extent
6982 * record says was the real size, this way we can compare it to
6985 if (tmpl->found_rec) {
6986 if (tmpl->start != rec->start || rec->found_rec) {
6987 struct extent_record *tmp;
6990 if (list_empty(&rec->list))
6991 list_add_tail(&rec->list,
6992 &duplicate_extents);
6995 * We have to do this song and dance in case we
6996 * find an extent record that falls inside of
6997 * our current extent record but does not have
6998 * the same objectid.
7000 tmp = malloc(sizeof(*tmp));
7003 tmp->start = tmpl->start;
7004 tmp->max_size = tmpl->max_size;
7007 tmp->metadata = tmpl->metadata;
7008 tmp->extent_item_refs = tmpl->extent_item_refs;
7009 INIT_LIST_HEAD(&tmp->list);
7010 list_add_tail(&tmp->list, &rec->dups);
7011 rec->num_duplicates++;
7018 if (tmpl->extent_item_refs && !dup) {
7019 if (rec->extent_item_refs) {
7020 fprintf(stderr, "block %llu rec "
7021 "extent_item_refs %llu, passed %llu\n",
7022 (unsigned long long)tmpl->start,
7023 (unsigned long long)
7024 rec->extent_item_refs,
7025 (unsigned long long)tmpl->extent_item_refs);
7027 rec->extent_item_refs = tmpl->extent_item_refs;
7031 if (tmpl->content_checked)
7032 rec->content_checked = 1;
7033 if (tmpl->owner_ref_checked)
7034 rec->owner_ref_checked = 1;
7035 memcpy(&rec->parent_key, &tmpl->parent_key,
7036 sizeof(tmpl->parent_key));
7037 if (tmpl->parent_generation)
7038 rec->parent_generation = tmpl->parent_generation;
7039 if (rec->max_size < tmpl->max_size)
7040 rec->max_size = tmpl->max_size;
7043 * A metadata extent can't cross stripe_len boundary, otherwise
7044 * kernel scrub won't be able to handle it.
7045 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7049 rec->crossing_stripes = check_crossing_stripes(
7050 global_info, rec->start,
7051 global_info->nodesize);
7052 check_extent_type(rec);
7053 maybe_free_extent_rec(extent_cache, rec);
7057 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7062 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7063 u64 parent, u64 root, int found_ref)
7065 struct extent_record *rec;
7066 struct tree_backref *back;
7067 struct cache_extent *cache;
7069 bool insert = false;
7071 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7073 struct extent_record tmpl;
7075 memset(&tmpl, 0, sizeof(tmpl));
7076 tmpl.start = bytenr;
7081 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7085 /* really a bug in cache_extent implement now */
7086 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7091 rec = container_of(cache, struct extent_record, cache);
7092 if (rec->start != bytenr) {
7094 * Several cause, from unaligned bytenr to over lapping extents
7099 back = find_tree_backref(rec, parent, root);
7101 back = alloc_tree_backref(rec, parent, root);
7108 if (back->node.found_ref) {
7109 fprintf(stderr, "Extent back ref already exists "
7110 "for %llu parent %llu root %llu \n",
7111 (unsigned long long)bytenr,
7112 (unsigned long long)parent,
7113 (unsigned long long)root);
7115 back->node.found_ref = 1;
7117 if (back->node.found_extent_tree) {
7118 fprintf(stderr, "Extent back ref already exists "
7119 "for %llu parent %llu root %llu \n",
7120 (unsigned long long)bytenr,
7121 (unsigned long long)parent,
7122 (unsigned long long)root);
7124 back->node.found_extent_tree = 1;
7127 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7128 compare_extent_backref));
7129 check_extent_type(rec);
7130 maybe_free_extent_rec(extent_cache, rec);
7134 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7135 u64 parent, u64 root, u64 owner, u64 offset,
7136 u32 num_refs, int found_ref, u64 max_size)
7138 struct extent_record *rec;
7139 struct data_backref *back;
7140 struct cache_extent *cache;
7142 bool insert = false;
7144 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7146 struct extent_record tmpl;
7148 memset(&tmpl, 0, sizeof(tmpl));
7149 tmpl.start = bytenr;
7151 tmpl.max_size = max_size;
7153 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7157 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7162 rec = container_of(cache, struct extent_record, cache);
7163 if (rec->max_size < max_size)
7164 rec->max_size = max_size;
7167 * If found_ref is set then max_size is the real size and must match the
7168 * existing refs. So if we have already found a ref then we need to
7169 * make sure that this ref matches the existing one, otherwise we need
7170 * to add a new backref so we can notice that the backrefs don't match
7171 * and we need to figure out who is telling the truth. This is to
7172 * account for that awful fsync bug I introduced where we'd end up with
7173 * a btrfs_file_extent_item that would have its length include multiple
7174 * prealloc extents or point inside of a prealloc extent.
7176 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7179 back = alloc_data_backref(rec, parent, root, owner, offset,
7186 BUG_ON(num_refs != 1);
7187 if (back->node.found_ref)
7188 BUG_ON(back->bytes != max_size);
7189 back->node.found_ref = 1;
7190 back->found_ref += 1;
7191 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7192 back->bytes = max_size;
7193 back->disk_bytenr = bytenr;
7195 /* Need to reinsert if not already in the tree */
7197 rb_erase(&back->node.node, &rec->backref_tree);
7202 rec->content_checked = 1;
7203 rec->owner_ref_checked = 1;
7205 if (back->node.found_extent_tree) {
7206 fprintf(stderr, "Extent back ref already exists "
7207 "for %llu parent %llu root %llu "
7208 "owner %llu offset %llu num_refs %lu\n",
7209 (unsigned long long)bytenr,
7210 (unsigned long long)parent,
7211 (unsigned long long)root,
7212 (unsigned long long)owner,
7213 (unsigned long long)offset,
7214 (unsigned long)num_refs);
7216 back->num_refs = num_refs;
7217 back->node.found_extent_tree = 1;
7220 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7221 compare_extent_backref));
7223 maybe_free_extent_rec(extent_cache, rec);
7227 static int add_pending(struct cache_tree *pending,
7228 struct cache_tree *seen, u64 bytenr, u32 size)
7231 ret = add_cache_extent(seen, bytenr, size);
7234 add_cache_extent(pending, bytenr, size);
7238 static int pick_next_pending(struct cache_tree *pending,
7239 struct cache_tree *reada,
7240 struct cache_tree *nodes,
7241 u64 last, struct block_info *bits, int bits_nr,
7244 unsigned long node_start = last;
7245 struct cache_extent *cache;
7248 cache = search_cache_extent(reada, 0);
7250 bits[0].start = cache->start;
7251 bits[0].size = cache->size;
7256 if (node_start > 32768)
7257 node_start -= 32768;
7259 cache = search_cache_extent(nodes, node_start);
7261 cache = search_cache_extent(nodes, 0);
7264 cache = search_cache_extent(pending, 0);
7269 bits[ret].start = cache->start;
7270 bits[ret].size = cache->size;
7271 cache = next_cache_extent(cache);
7273 } while (cache && ret < bits_nr);
7279 bits[ret].start = cache->start;
7280 bits[ret].size = cache->size;
7281 cache = next_cache_extent(cache);
7283 } while (cache && ret < bits_nr);
7285 if (bits_nr - ret > 8) {
7286 u64 lookup = bits[0].start + bits[0].size;
7287 struct cache_extent *next;
7288 next = search_cache_extent(pending, lookup);
7290 if (next->start - lookup > 32768)
7292 bits[ret].start = next->start;
7293 bits[ret].size = next->size;
7294 lookup = next->start + next->size;
7298 next = next_cache_extent(next);
7306 static void free_chunk_record(struct cache_extent *cache)
7308 struct chunk_record *rec;
7310 rec = container_of(cache, struct chunk_record, cache);
7311 list_del_init(&rec->list);
7312 list_del_init(&rec->dextents);
7316 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7318 cache_tree_free_extents(chunk_cache, free_chunk_record);
7321 static void free_device_record(struct rb_node *node)
7323 struct device_record *rec;
7325 rec = container_of(node, struct device_record, node);
7329 FREE_RB_BASED_TREE(device_cache, free_device_record);
7331 int insert_block_group_record(struct block_group_tree *tree,
7332 struct block_group_record *bg_rec)
7336 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7340 list_add_tail(&bg_rec->list, &tree->block_groups);
7344 static void free_block_group_record(struct cache_extent *cache)
7346 struct block_group_record *rec;
7348 rec = container_of(cache, struct block_group_record, cache);
7349 list_del_init(&rec->list);
7353 void free_block_group_tree(struct block_group_tree *tree)
7355 cache_tree_free_extents(&tree->tree, free_block_group_record);
7358 int insert_device_extent_record(struct device_extent_tree *tree,
7359 struct device_extent_record *de_rec)
7364 * Device extent is a bit different from the other extents, because
7365 * the extents which belong to the different devices may have the
7366 * same start and size, so we need use the special extent cache
7367 * search/insert functions.
7369 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7373 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7374 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7378 static void free_device_extent_record(struct cache_extent *cache)
7380 struct device_extent_record *rec;
7382 rec = container_of(cache, struct device_extent_record, cache);
7383 if (!list_empty(&rec->chunk_list))
7384 list_del_init(&rec->chunk_list);
7385 if (!list_empty(&rec->device_list))
7386 list_del_init(&rec->device_list);
7390 void free_device_extent_tree(struct device_extent_tree *tree)
7392 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7395 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7396 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7397 struct extent_buffer *leaf, int slot)
7399 struct btrfs_extent_ref_v0 *ref0;
7400 struct btrfs_key key;
7403 btrfs_item_key_to_cpu(leaf, &key, slot);
7404 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7405 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7406 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7409 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7410 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7416 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7417 struct btrfs_key *key,
7420 struct btrfs_chunk *ptr;
7421 struct chunk_record *rec;
7424 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7425 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7427 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7429 fprintf(stderr, "memory allocation failed\n");
7433 INIT_LIST_HEAD(&rec->list);
7434 INIT_LIST_HEAD(&rec->dextents);
7437 rec->cache.start = key->offset;
7438 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7440 rec->generation = btrfs_header_generation(leaf);
7442 rec->objectid = key->objectid;
7443 rec->type = key->type;
7444 rec->offset = key->offset;
7446 rec->length = rec->cache.size;
7447 rec->owner = btrfs_chunk_owner(leaf, ptr);
7448 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7449 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7450 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7451 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7452 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7453 rec->num_stripes = num_stripes;
7454 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7456 for (i = 0; i < rec->num_stripes; ++i) {
7457 rec->stripes[i].devid =
7458 btrfs_stripe_devid_nr(leaf, ptr, i);
7459 rec->stripes[i].offset =
7460 btrfs_stripe_offset_nr(leaf, ptr, i);
7461 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7462 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7469 static int process_chunk_item(struct cache_tree *chunk_cache,
7470 struct btrfs_key *key, struct extent_buffer *eb,
7473 struct chunk_record *rec;
7474 struct btrfs_chunk *chunk;
7477 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7479 * Do extra check for this chunk item,
7481 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7482 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7483 * and owner<->key_type check.
7485 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7488 error("chunk(%llu, %llu) is not valid, ignore it",
7489 key->offset, btrfs_chunk_length(eb, chunk));
7492 rec = btrfs_new_chunk_record(eb, key, slot);
7493 ret = insert_cache_extent(chunk_cache, &rec->cache);
7495 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7496 rec->offset, rec->length);
7503 static int process_device_item(struct rb_root *dev_cache,
7504 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7506 struct btrfs_dev_item *ptr;
7507 struct device_record *rec;
7510 ptr = btrfs_item_ptr(eb,
7511 slot, struct btrfs_dev_item);
7513 rec = malloc(sizeof(*rec));
7515 fprintf(stderr, "memory allocation failed\n");
7519 rec->devid = key->offset;
7520 rec->generation = btrfs_header_generation(eb);
7522 rec->objectid = key->objectid;
7523 rec->type = key->type;
7524 rec->offset = key->offset;
7526 rec->devid = btrfs_device_id(eb, ptr);
7527 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7528 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7530 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7532 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7539 struct block_group_record *
7540 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7543 struct btrfs_block_group_item *ptr;
7544 struct block_group_record *rec;
7546 rec = calloc(1, sizeof(*rec));
7548 fprintf(stderr, "memory allocation failed\n");
7552 rec->cache.start = key->objectid;
7553 rec->cache.size = key->offset;
7555 rec->generation = btrfs_header_generation(leaf);
7557 rec->objectid = key->objectid;
7558 rec->type = key->type;
7559 rec->offset = key->offset;
7561 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7562 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7564 INIT_LIST_HEAD(&rec->list);
7569 static int process_block_group_item(struct block_group_tree *block_group_cache,
7570 struct btrfs_key *key,
7571 struct extent_buffer *eb, int slot)
7573 struct block_group_record *rec;
7576 rec = btrfs_new_block_group_record(eb, key, slot);
7577 ret = insert_block_group_record(block_group_cache, rec);
7579 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7580 rec->objectid, rec->offset);
7587 struct device_extent_record *
7588 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7589 struct btrfs_key *key, int slot)
7591 struct device_extent_record *rec;
7592 struct btrfs_dev_extent *ptr;
7594 rec = calloc(1, sizeof(*rec));
7596 fprintf(stderr, "memory allocation failed\n");
7600 rec->cache.objectid = key->objectid;
7601 rec->cache.start = key->offset;
7603 rec->generation = btrfs_header_generation(leaf);
7605 rec->objectid = key->objectid;
7606 rec->type = key->type;
7607 rec->offset = key->offset;
7609 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7610 rec->chunk_objecteid =
7611 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7613 btrfs_dev_extent_chunk_offset(leaf, ptr);
7614 rec->length = btrfs_dev_extent_length(leaf, ptr);
7615 rec->cache.size = rec->length;
7617 INIT_LIST_HEAD(&rec->chunk_list);
7618 INIT_LIST_HEAD(&rec->device_list);
7624 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7625 struct btrfs_key *key, struct extent_buffer *eb,
7628 struct device_extent_record *rec;
7631 rec = btrfs_new_device_extent_record(eb, key, slot);
7632 ret = insert_device_extent_record(dev_extent_cache, rec);
7635 "Device extent[%llu, %llu, %llu] existed.\n",
7636 rec->objectid, rec->offset, rec->length);
7643 static int process_extent_item(struct btrfs_root *root,
7644 struct cache_tree *extent_cache,
7645 struct extent_buffer *eb, int slot)
7647 struct btrfs_extent_item *ei;
7648 struct btrfs_extent_inline_ref *iref;
7649 struct btrfs_extent_data_ref *dref;
7650 struct btrfs_shared_data_ref *sref;
7651 struct btrfs_key key;
7652 struct extent_record tmpl;
7657 u32 item_size = btrfs_item_size_nr(eb, slot);
7663 btrfs_item_key_to_cpu(eb, &key, slot);
7665 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7667 num_bytes = root->fs_info->nodesize;
7669 num_bytes = key.offset;
7672 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7673 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7674 key.objectid, root->fs_info->sectorsize);
7677 if (item_size < sizeof(*ei)) {
7678 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7679 struct btrfs_extent_item_v0 *ei0;
7680 BUG_ON(item_size != sizeof(*ei0));
7681 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7682 refs = btrfs_extent_refs_v0(eb, ei0);
7686 memset(&tmpl, 0, sizeof(tmpl));
7687 tmpl.start = key.objectid;
7688 tmpl.nr = num_bytes;
7689 tmpl.extent_item_refs = refs;
7690 tmpl.metadata = metadata;
7692 tmpl.max_size = num_bytes;
7694 return add_extent_rec(extent_cache, &tmpl);
7697 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7698 refs = btrfs_extent_refs(eb, ei);
7699 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7703 if (metadata && num_bytes != root->fs_info->nodesize) {
7704 error("ignore invalid metadata extent, length %llu does not equal to %u",
7705 num_bytes, root->fs_info->nodesize);
7708 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7709 error("ignore invalid data extent, length %llu is not aligned to %u",
7710 num_bytes, root->fs_info->sectorsize);
7714 memset(&tmpl, 0, sizeof(tmpl));
7715 tmpl.start = key.objectid;
7716 tmpl.nr = num_bytes;
7717 tmpl.extent_item_refs = refs;
7718 tmpl.metadata = metadata;
7720 tmpl.max_size = num_bytes;
7721 add_extent_rec(extent_cache, &tmpl);
7723 ptr = (unsigned long)(ei + 1);
7724 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7725 key.type == BTRFS_EXTENT_ITEM_KEY)
7726 ptr += sizeof(struct btrfs_tree_block_info);
7728 end = (unsigned long)ei + item_size;
7730 iref = (struct btrfs_extent_inline_ref *)ptr;
7731 type = btrfs_extent_inline_ref_type(eb, iref);
7732 offset = btrfs_extent_inline_ref_offset(eb, iref);
7734 case BTRFS_TREE_BLOCK_REF_KEY:
7735 ret = add_tree_backref(extent_cache, key.objectid,
7739 "add_tree_backref failed (extent items tree block): %s",
7742 case BTRFS_SHARED_BLOCK_REF_KEY:
7743 ret = add_tree_backref(extent_cache, key.objectid,
7747 "add_tree_backref failed (extent items shared block): %s",
7750 case BTRFS_EXTENT_DATA_REF_KEY:
7751 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7752 add_data_backref(extent_cache, key.objectid, 0,
7753 btrfs_extent_data_ref_root(eb, dref),
7754 btrfs_extent_data_ref_objectid(eb,
7756 btrfs_extent_data_ref_offset(eb, dref),
7757 btrfs_extent_data_ref_count(eb, dref),
7760 case BTRFS_SHARED_DATA_REF_KEY:
7761 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7762 add_data_backref(extent_cache, key.objectid, offset,
7764 btrfs_shared_data_ref_count(eb, sref),
7768 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7769 key.objectid, key.type, num_bytes);
7772 ptr += btrfs_extent_inline_ref_size(type);
7779 static int check_cache_range(struct btrfs_root *root,
7780 struct btrfs_block_group_cache *cache,
7781 u64 offset, u64 bytes)
7783 struct btrfs_free_space *entry;
7789 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7790 bytenr = btrfs_sb_offset(i);
7791 ret = btrfs_rmap_block(root->fs_info,
7792 cache->key.objectid, bytenr, 0,
7793 &logical, &nr, &stripe_len);
7798 if (logical[nr] + stripe_len <= offset)
7800 if (offset + bytes <= logical[nr])
7802 if (logical[nr] == offset) {
7803 if (stripe_len >= bytes) {
7807 bytes -= stripe_len;
7808 offset += stripe_len;
7809 } else if (logical[nr] < offset) {
7810 if (logical[nr] + stripe_len >=
7815 bytes = (offset + bytes) -
7816 (logical[nr] + stripe_len);
7817 offset = logical[nr] + stripe_len;
7820 * Could be tricky, the super may land in the
7821 * middle of the area we're checking. First
7822 * check the easiest case, it's at the end.
7824 if (logical[nr] + stripe_len >=
7826 bytes = logical[nr] - offset;
7830 /* Check the left side */
7831 ret = check_cache_range(root, cache,
7833 logical[nr] - offset);
7839 /* Now we continue with the right side */
7840 bytes = (offset + bytes) -
7841 (logical[nr] + stripe_len);
7842 offset = logical[nr] + stripe_len;
7849 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7851 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7852 offset, offset+bytes);
7856 if (entry->offset != offset) {
7857 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7862 if (entry->bytes != bytes) {
7863 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7864 bytes, entry->bytes, offset);
7868 unlink_free_space(cache->free_space_ctl, entry);
7873 static int verify_space_cache(struct btrfs_root *root,
7874 struct btrfs_block_group_cache *cache)
7876 struct btrfs_path path;
7877 struct extent_buffer *leaf;
7878 struct btrfs_key key;
7882 root = root->fs_info->extent_root;
7884 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7886 btrfs_init_path(&path);
7887 key.objectid = last;
7889 key.type = BTRFS_EXTENT_ITEM_KEY;
7890 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7895 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7896 ret = btrfs_next_leaf(root, &path);
7904 leaf = path.nodes[0];
7905 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7906 if (key.objectid >= cache->key.offset + cache->key.objectid)
7908 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7909 key.type != BTRFS_METADATA_ITEM_KEY) {
7914 if (last == key.objectid) {
7915 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7916 last = key.objectid + key.offset;
7918 last = key.objectid + root->fs_info->nodesize;
7923 ret = check_cache_range(root, cache, last,
7924 key.objectid - last);
7927 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7928 last = key.objectid + key.offset;
7930 last = key.objectid + root->fs_info->nodesize;
7934 if (last < cache->key.objectid + cache->key.offset)
7935 ret = check_cache_range(root, cache, last,
7936 cache->key.objectid +
7937 cache->key.offset - last);
7940 btrfs_release_path(&path);
7943 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7944 fprintf(stderr, "There are still entries left in the space "
7952 static int check_space_cache(struct btrfs_root *root)
7954 struct btrfs_block_group_cache *cache;
7955 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7959 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7960 btrfs_super_generation(root->fs_info->super_copy) !=
7961 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7962 printf("cache and super generation don't match, space cache "
7963 "will be invalidated\n");
7967 if (ctx.progress_enabled) {
7968 ctx.tp = TASK_FREE_SPACE;
7969 task_start(ctx.info);
7973 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7977 start = cache->key.objectid + cache->key.offset;
7978 if (!cache->free_space_ctl) {
7979 if (btrfs_init_free_space_ctl(cache,
7980 root->fs_info->sectorsize)) {
7985 btrfs_remove_free_space_cache(cache);
7988 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7989 ret = exclude_super_stripes(root, cache);
7991 fprintf(stderr, "could not exclude super stripes: %s\n",
7996 ret = load_free_space_tree(root->fs_info, cache);
7997 free_excluded_extents(root, cache);
7999 fprintf(stderr, "could not load free space tree: %s\n",
8006 ret = load_free_space_cache(root->fs_info, cache);
8011 ret = verify_space_cache(root, cache);
8013 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8014 cache->key.objectid);
8019 task_stop(ctx.info);
8021 return error ? -EINVAL : 0;
8024 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8025 u64 num_bytes, unsigned long leaf_offset,
8026 struct extent_buffer *eb) {
8028 struct btrfs_fs_info *fs_info = root->fs_info;
8030 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8032 unsigned long csum_offset;
8036 u64 data_checked = 0;
8042 if (num_bytes % fs_info->sectorsize)
8045 data = malloc(num_bytes);
8049 while (offset < num_bytes) {
8052 read_len = num_bytes - offset;
8053 /* read as much space once a time */
8054 ret = read_extent_data(fs_info, data + offset,
8055 bytenr + offset, &read_len, mirror);
8059 /* verify every 4k data's checksum */
8060 while (data_checked < read_len) {
8062 tmp = offset + data_checked;
8064 csum = btrfs_csum_data((char *)data + tmp,
8065 csum, fs_info->sectorsize);
8066 btrfs_csum_final(csum, (u8 *)&csum);
8068 csum_offset = leaf_offset +
8069 tmp / fs_info->sectorsize * csum_size;
8070 read_extent_buffer(eb, (char *)&csum_expected,
8071 csum_offset, csum_size);
8072 /* try another mirror */
8073 if (csum != csum_expected) {
8074 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8075 mirror, bytenr + tmp,
8076 csum, csum_expected);
8077 num_copies = btrfs_num_copies(root->fs_info,
8079 if (mirror < num_copies - 1) {
8084 data_checked += fs_info->sectorsize;
8093 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8096 struct btrfs_path path;
8097 struct extent_buffer *leaf;
8098 struct btrfs_key key;
8101 btrfs_init_path(&path);
8102 key.objectid = bytenr;
8103 key.type = BTRFS_EXTENT_ITEM_KEY;
8104 key.offset = (u64)-1;
8107 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8110 fprintf(stderr, "Error looking up extent record %d\n", ret);
8111 btrfs_release_path(&path);
8114 if (path.slots[0] > 0) {
8117 ret = btrfs_prev_leaf(root, &path);
8120 } else if (ret > 0) {
8127 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8130 * Block group items come before extent items if they have the same
8131 * bytenr, so walk back one more just in case. Dear future traveller,
8132 * first congrats on mastering time travel. Now if it's not too much
8133 * trouble could you go back to 2006 and tell Chris to make the
8134 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8135 * EXTENT_ITEM_KEY please?
8137 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8138 if (path.slots[0] > 0) {
8141 ret = btrfs_prev_leaf(root, &path);
8144 } else if (ret > 0) {
8149 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8153 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8154 ret = btrfs_next_leaf(root, &path);
8156 fprintf(stderr, "Error going to next leaf "
8158 btrfs_release_path(&path);
8164 leaf = path.nodes[0];
8165 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8166 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8170 if (key.objectid + key.offset < bytenr) {
8174 if (key.objectid > bytenr + num_bytes)
8177 if (key.objectid == bytenr) {
8178 if (key.offset >= num_bytes) {
8182 num_bytes -= key.offset;
8183 bytenr += key.offset;
8184 } else if (key.objectid < bytenr) {
8185 if (key.objectid + key.offset >= bytenr + num_bytes) {
8189 num_bytes = (bytenr + num_bytes) -
8190 (key.objectid + key.offset);
8191 bytenr = key.objectid + key.offset;
8193 if (key.objectid + key.offset < bytenr + num_bytes) {
8194 u64 new_start = key.objectid + key.offset;
8195 u64 new_bytes = bytenr + num_bytes - new_start;
8198 * Weird case, the extent is in the middle of
8199 * our range, we'll have to search one side
8200 * and then the other. Not sure if this happens
8201 * in real life, but no harm in coding it up
8202 * anyway just in case.
8204 btrfs_release_path(&path);
8205 ret = check_extent_exists(root, new_start,
8208 fprintf(stderr, "Right section didn't "
8212 num_bytes = key.objectid - bytenr;
8215 num_bytes = key.objectid - bytenr;
8222 if (num_bytes && !ret) {
8223 fprintf(stderr, "There are no extents for csum range "
8224 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8228 btrfs_release_path(&path);
8232 static int check_csums(struct btrfs_root *root)
8234 struct btrfs_path path;
8235 struct extent_buffer *leaf;
8236 struct btrfs_key key;
8237 u64 offset = 0, num_bytes = 0;
8238 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8242 unsigned long leaf_offset;
8244 root = root->fs_info->csum_root;
8245 if (!extent_buffer_uptodate(root->node)) {
8246 fprintf(stderr, "No valid csum tree found\n");
8250 btrfs_init_path(&path);
8251 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8252 key.type = BTRFS_EXTENT_CSUM_KEY;
8254 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8256 fprintf(stderr, "Error searching csum tree %d\n", ret);
8257 btrfs_release_path(&path);
8261 if (ret > 0 && path.slots[0])
8266 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8267 ret = btrfs_next_leaf(root, &path);
8269 fprintf(stderr, "Error going to next leaf "
8276 leaf = path.nodes[0];
8278 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8279 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8284 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8285 csum_size) * root->fs_info->sectorsize;
8286 if (!check_data_csum)
8287 goto skip_csum_check;
8288 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8289 ret = check_extent_csums(root, key.offset, data_len,
8295 offset = key.offset;
8296 } else if (key.offset != offset + num_bytes) {
8297 ret = check_extent_exists(root, offset, num_bytes);
8299 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8300 "there is no extent record\n",
8301 offset, offset+num_bytes);
8304 offset = key.offset;
8307 num_bytes += data_len;
8311 btrfs_release_path(&path);
8315 static int is_dropped_key(struct btrfs_key *key,
8316 struct btrfs_key *drop_key) {
8317 if (key->objectid < drop_key->objectid)
8319 else if (key->objectid == drop_key->objectid) {
8320 if (key->type < drop_key->type)
8322 else if (key->type == drop_key->type) {
8323 if (key->offset < drop_key->offset)
8331 * Here are the rules for FULL_BACKREF.
8333 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8334 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8336 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8337 * if it happened after the relocation occurred since we'll have dropped the
8338 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8339 * have no real way to know for sure.
8341 * We process the blocks one root at a time, and we start from the lowest root
8342 * objectid and go to the highest. So we can just lookup the owner backref for
8343 * the record and if we don't find it then we know it doesn't exist and we have
8346 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8347 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8348 * be set or not and then we can check later once we've gathered all the refs.
8350 static int calc_extent_flag(struct cache_tree *extent_cache,
8351 struct extent_buffer *buf,
8352 struct root_item_record *ri,
8355 struct extent_record *rec;
8356 struct cache_extent *cache;
8357 struct tree_backref *tback;
8360 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8361 /* we have added this extent before */
8365 rec = container_of(cache, struct extent_record, cache);
8368 * Except file/reloc tree, we can not have
8371 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8376 if (buf->start == ri->bytenr)
8379 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8382 owner = btrfs_header_owner(buf);
8383 if (owner == ri->objectid)
8386 tback = find_tree_backref(rec, 0, owner);
8391 if (rec->flag_block_full_backref != FLAG_UNSET &&
8392 rec->flag_block_full_backref != 0)
8393 rec->bad_full_backref = 1;
8396 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8397 if (rec->flag_block_full_backref != FLAG_UNSET &&
8398 rec->flag_block_full_backref != 1)
8399 rec->bad_full_backref = 1;
8403 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8405 fprintf(stderr, "Invalid key type(");
8406 print_key_type(stderr, 0, key_type);
8407 fprintf(stderr, ") found in root(");
8408 print_objectid(stderr, rootid, 0);
8409 fprintf(stderr, ")\n");
8413 * Check if the key is valid with its extent buffer.
8415 * This is a early check in case invalid key exists in a extent buffer
8416 * This is not comprehensive yet, but should prevent wrong key/item passed
8419 static int check_type_with_root(u64 rootid, u8 key_type)
8422 /* Only valid in chunk tree */
8423 case BTRFS_DEV_ITEM_KEY:
8424 case BTRFS_CHUNK_ITEM_KEY:
8425 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8428 /* valid in csum and log tree */
8429 case BTRFS_CSUM_TREE_OBJECTID:
8430 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8434 case BTRFS_EXTENT_ITEM_KEY:
8435 case BTRFS_METADATA_ITEM_KEY:
8436 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8437 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8440 case BTRFS_ROOT_ITEM_KEY:
8441 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8444 case BTRFS_DEV_EXTENT_KEY:
8445 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8451 report_mismatch_key_root(key_type, rootid);
8455 static int run_next_block(struct btrfs_root *root,
8456 struct block_info *bits,
8459 struct cache_tree *pending,
8460 struct cache_tree *seen,
8461 struct cache_tree *reada,
8462 struct cache_tree *nodes,
8463 struct cache_tree *extent_cache,
8464 struct cache_tree *chunk_cache,
8465 struct rb_root *dev_cache,
8466 struct block_group_tree *block_group_cache,
8467 struct device_extent_tree *dev_extent_cache,
8468 struct root_item_record *ri)
8470 struct btrfs_fs_info *fs_info = root->fs_info;
8471 struct extent_buffer *buf;
8472 struct extent_record *rec = NULL;
8483 struct btrfs_key key;
8484 struct cache_extent *cache;
8487 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8488 bits_nr, &reada_bits);
8493 for(i = 0; i < nritems; i++) {
8494 ret = add_cache_extent(reada, bits[i].start,
8499 /* fixme, get the parent transid */
8500 readahead_tree_block(fs_info, bits[i].start, 0);
8503 *last = bits[0].start;
8504 bytenr = bits[0].start;
8505 size = bits[0].size;
8507 cache = lookup_cache_extent(pending, bytenr, size);
8509 remove_cache_extent(pending, cache);
8512 cache = lookup_cache_extent(reada, bytenr, size);
8514 remove_cache_extent(reada, cache);
8517 cache = lookup_cache_extent(nodes, bytenr, size);
8519 remove_cache_extent(nodes, cache);
8522 cache = lookup_cache_extent(extent_cache, bytenr, size);
8524 rec = container_of(cache, struct extent_record, cache);
8525 gen = rec->parent_generation;
8528 /* fixme, get the real parent transid */
8529 buf = read_tree_block(root->fs_info, bytenr, gen);
8530 if (!extent_buffer_uptodate(buf)) {
8531 record_bad_block_io(root->fs_info,
8532 extent_cache, bytenr, size);
8536 nritems = btrfs_header_nritems(buf);
8539 if (!init_extent_tree) {
8540 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8541 btrfs_header_level(buf), 1, NULL,
8544 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8546 fprintf(stderr, "Couldn't calc extent flags\n");
8547 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8552 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8554 fprintf(stderr, "Couldn't calc extent flags\n");
8555 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8559 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8561 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8562 ri->objectid == btrfs_header_owner(buf)) {
8564 * Ok we got to this block from it's original owner and
8565 * we have FULL_BACKREF set. Relocation can leave
8566 * converted blocks over so this is altogether possible,
8567 * however it's not possible if the generation > the
8568 * last snapshot, so check for this case.
8570 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8571 btrfs_header_generation(buf) > ri->last_snapshot) {
8572 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8573 rec->bad_full_backref = 1;
8578 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8579 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8580 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8581 rec->bad_full_backref = 1;
8585 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8586 rec->flag_block_full_backref = 1;
8590 rec->flag_block_full_backref = 0;
8592 owner = btrfs_header_owner(buf);
8595 ret = check_block(root, extent_cache, buf, flags);
8599 if (btrfs_is_leaf(buf)) {
8600 btree_space_waste += btrfs_leaf_free_space(root, buf);
8601 for (i = 0; i < nritems; i++) {
8602 struct btrfs_file_extent_item *fi;
8603 btrfs_item_key_to_cpu(buf, &key, i);
8605 * Check key type against the leaf owner.
8606 * Could filter quite a lot of early error if
8609 if (check_type_with_root(btrfs_header_owner(buf),
8611 fprintf(stderr, "ignoring invalid key\n");
8614 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8615 process_extent_item(root, extent_cache, buf,
8619 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8620 process_extent_item(root, extent_cache, buf,
8624 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8626 btrfs_item_size_nr(buf, i);
8629 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8630 process_chunk_item(chunk_cache, &key, buf, i);
8633 if (key.type == BTRFS_DEV_ITEM_KEY) {
8634 process_device_item(dev_cache, &key, buf, i);
8637 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8638 process_block_group_item(block_group_cache,
8642 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8643 process_device_extent_item(dev_extent_cache,
8648 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8649 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8650 process_extent_ref_v0(extent_cache, buf, i);
8657 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8658 ret = add_tree_backref(extent_cache,
8659 key.objectid, 0, key.offset, 0);
8662 "add_tree_backref failed (leaf tree block): %s",
8666 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8667 ret = add_tree_backref(extent_cache,
8668 key.objectid, key.offset, 0, 0);
8671 "add_tree_backref failed (leaf shared block): %s",
8675 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8676 struct btrfs_extent_data_ref *ref;
8677 ref = btrfs_item_ptr(buf, i,
8678 struct btrfs_extent_data_ref);
8679 add_data_backref(extent_cache,
8681 btrfs_extent_data_ref_root(buf, ref),
8682 btrfs_extent_data_ref_objectid(buf,
8684 btrfs_extent_data_ref_offset(buf, ref),
8685 btrfs_extent_data_ref_count(buf, ref),
8686 0, root->fs_info->sectorsize);
8689 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8690 struct btrfs_shared_data_ref *ref;
8691 ref = btrfs_item_ptr(buf, i,
8692 struct btrfs_shared_data_ref);
8693 add_data_backref(extent_cache,
8694 key.objectid, key.offset, 0, 0, 0,
8695 btrfs_shared_data_ref_count(buf, ref),
8696 0, root->fs_info->sectorsize);
8699 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8700 struct bad_item *bad;
8702 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8706 bad = malloc(sizeof(struct bad_item));
8709 INIT_LIST_HEAD(&bad->list);
8710 memcpy(&bad->key, &key,
8711 sizeof(struct btrfs_key));
8712 bad->root_id = owner;
8713 list_add_tail(&bad->list, &delete_items);
8716 if (key.type != BTRFS_EXTENT_DATA_KEY)
8718 fi = btrfs_item_ptr(buf, i,
8719 struct btrfs_file_extent_item);
8720 if (btrfs_file_extent_type(buf, fi) ==
8721 BTRFS_FILE_EXTENT_INLINE)
8723 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8726 data_bytes_allocated +=
8727 btrfs_file_extent_disk_num_bytes(buf, fi);
8728 if (data_bytes_allocated < root->fs_info->sectorsize) {
8731 data_bytes_referenced +=
8732 btrfs_file_extent_num_bytes(buf, fi);
8733 add_data_backref(extent_cache,
8734 btrfs_file_extent_disk_bytenr(buf, fi),
8735 parent, owner, key.objectid, key.offset -
8736 btrfs_file_extent_offset(buf, fi), 1, 1,
8737 btrfs_file_extent_disk_num_bytes(buf, fi));
8741 struct btrfs_key first_key;
8743 first_key.objectid = 0;
8746 btrfs_item_key_to_cpu(buf, &first_key, 0);
8747 level = btrfs_header_level(buf);
8748 for (i = 0; i < nritems; i++) {
8749 struct extent_record tmpl;
8751 ptr = btrfs_node_blockptr(buf, i);
8752 size = root->fs_info->nodesize;
8753 btrfs_node_key_to_cpu(buf, &key, i);
8755 if ((level == ri->drop_level)
8756 && is_dropped_key(&key, &ri->drop_key)) {
8761 memset(&tmpl, 0, sizeof(tmpl));
8762 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8763 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8768 tmpl.max_size = size;
8769 ret = add_extent_rec(extent_cache, &tmpl);
8773 ret = add_tree_backref(extent_cache, ptr, parent,
8777 "add_tree_backref failed (non-leaf block): %s",
8783 add_pending(nodes, seen, ptr, size);
8785 add_pending(pending, seen, ptr, size);
8788 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8789 nritems) * sizeof(struct btrfs_key_ptr);
8791 total_btree_bytes += buf->len;
8792 if (fs_root_objectid(btrfs_header_owner(buf)))
8793 total_fs_tree_bytes += buf->len;
8794 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8795 total_extent_tree_bytes += buf->len;
8797 free_extent_buffer(buf);
8801 static int add_root_to_pending(struct extent_buffer *buf,
8802 struct cache_tree *extent_cache,
8803 struct cache_tree *pending,
8804 struct cache_tree *seen,
8805 struct cache_tree *nodes,
8808 struct extent_record tmpl;
8811 if (btrfs_header_level(buf) > 0)
8812 add_pending(nodes, seen, buf->start, buf->len);
8814 add_pending(pending, seen, buf->start, buf->len);
8816 memset(&tmpl, 0, sizeof(tmpl));
8817 tmpl.start = buf->start;
8822 tmpl.max_size = buf->len;
8823 add_extent_rec(extent_cache, &tmpl);
8825 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8826 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8827 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8830 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8835 /* as we fix the tree, we might be deleting blocks that
8836 * we're tracking for repair. This hook makes sure we
8837 * remove any backrefs for blocks as we are fixing them.
8839 static int free_extent_hook(struct btrfs_trans_handle *trans,
8840 struct btrfs_root *root,
8841 u64 bytenr, u64 num_bytes, u64 parent,
8842 u64 root_objectid, u64 owner, u64 offset,
8845 struct extent_record *rec;
8846 struct cache_extent *cache;
8848 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8850 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8851 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8855 rec = container_of(cache, struct extent_record, cache);
8857 struct data_backref *back;
8858 back = find_data_backref(rec, parent, root_objectid, owner,
8859 offset, 1, bytenr, num_bytes);
8862 if (back->node.found_ref) {
8863 back->found_ref -= refs_to_drop;
8865 rec->refs -= refs_to_drop;
8867 if (back->node.found_extent_tree) {
8868 back->num_refs -= refs_to_drop;
8869 if (rec->extent_item_refs)
8870 rec->extent_item_refs -= refs_to_drop;
8872 if (back->found_ref == 0)
8873 back->node.found_ref = 0;
8874 if (back->num_refs == 0)
8875 back->node.found_extent_tree = 0;
8877 if (!back->node.found_extent_tree && back->node.found_ref) {
8878 rb_erase(&back->node.node, &rec->backref_tree);
8882 struct tree_backref *back;
8883 back = find_tree_backref(rec, parent, root_objectid);
8886 if (back->node.found_ref) {
8889 back->node.found_ref = 0;
8891 if (back->node.found_extent_tree) {
8892 if (rec->extent_item_refs)
8893 rec->extent_item_refs--;
8894 back->node.found_extent_tree = 0;
8896 if (!back->node.found_extent_tree && back->node.found_ref) {
8897 rb_erase(&back->node.node, &rec->backref_tree);
8901 maybe_free_extent_rec(extent_cache, rec);
8906 static int delete_extent_records(struct btrfs_trans_handle *trans,
8907 struct btrfs_root *root,
8908 struct btrfs_path *path,
8911 struct btrfs_key key;
8912 struct btrfs_key found_key;
8913 struct extent_buffer *leaf;
8918 key.objectid = bytenr;
8920 key.offset = (u64)-1;
8923 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8930 if (path->slots[0] == 0)
8936 leaf = path->nodes[0];
8937 slot = path->slots[0];
8939 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8940 if (found_key.objectid != bytenr)
8943 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8944 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8945 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8946 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8947 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8948 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8949 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8950 btrfs_release_path(path);
8951 if (found_key.type == 0) {
8952 if (found_key.offset == 0)
8954 key.offset = found_key.offset - 1;
8955 key.type = found_key.type;
8957 key.type = found_key.type - 1;
8958 key.offset = (u64)-1;
8962 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8963 found_key.objectid, found_key.type, found_key.offset);
8965 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8968 btrfs_release_path(path);
8970 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8971 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8972 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8973 found_key.offset : root->fs_info->nodesize;
8975 ret = btrfs_update_block_group(trans, root, bytenr,
8982 btrfs_release_path(path);
8987 * for a single backref, this will allocate a new extent
8988 * and add the backref to it.
8990 static int record_extent(struct btrfs_trans_handle *trans,
8991 struct btrfs_fs_info *info,
8992 struct btrfs_path *path,
8993 struct extent_record *rec,
8994 struct extent_backref *back,
8995 int allocated, u64 flags)
8998 struct btrfs_root *extent_root = info->extent_root;
8999 struct extent_buffer *leaf;
9000 struct btrfs_key ins_key;
9001 struct btrfs_extent_item *ei;
9002 struct data_backref *dback;
9003 struct btrfs_tree_block_info *bi;
9006 rec->max_size = max_t(u64, rec->max_size,
9010 u32 item_size = sizeof(*ei);
9013 item_size += sizeof(*bi);
9015 ins_key.objectid = rec->start;
9016 ins_key.offset = rec->max_size;
9017 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9019 ret = btrfs_insert_empty_item(trans, extent_root, path,
9020 &ins_key, item_size);
9024 leaf = path->nodes[0];
9025 ei = btrfs_item_ptr(leaf, path->slots[0],
9026 struct btrfs_extent_item);
9028 btrfs_set_extent_refs(leaf, ei, 0);
9029 btrfs_set_extent_generation(leaf, ei, rec->generation);
9031 if (back->is_data) {
9032 btrfs_set_extent_flags(leaf, ei,
9033 BTRFS_EXTENT_FLAG_DATA);
9035 struct btrfs_disk_key copy_key;;
9037 bi = (struct btrfs_tree_block_info *)(ei + 1);
9038 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9041 btrfs_set_disk_key_objectid(©_key,
9042 rec->info_objectid);
9043 btrfs_set_disk_key_type(©_key, 0);
9044 btrfs_set_disk_key_offset(©_key, 0);
9046 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9047 btrfs_set_tree_block_key(leaf, bi, ©_key);
9049 btrfs_set_extent_flags(leaf, ei,
9050 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9053 btrfs_mark_buffer_dirty(leaf);
9054 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9055 rec->max_size, 1, 0);
9058 btrfs_release_path(path);
9061 if (back->is_data) {
9065 dback = to_data_backref(back);
9066 if (back->full_backref)
9067 parent = dback->parent;
9071 for (i = 0; i < dback->found_ref; i++) {
9072 /* if parent != 0, we're doing a full backref
9073 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9074 * just makes the backref allocator create a data
9077 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9078 rec->start, rec->max_size,
9082 BTRFS_FIRST_FREE_OBJECTID :
9088 fprintf(stderr, "adding new data backref"
9089 " on %llu %s %llu owner %llu"
9090 " offset %llu found %d\n",
9091 (unsigned long long)rec->start,
9092 back->full_backref ?
9094 back->full_backref ?
9095 (unsigned long long)parent :
9096 (unsigned long long)dback->root,
9097 (unsigned long long)dback->owner,
9098 (unsigned long long)dback->offset,
9102 struct tree_backref *tback;
9104 tback = to_tree_backref(back);
9105 if (back->full_backref)
9106 parent = tback->parent;
9110 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9111 rec->start, rec->max_size,
9112 parent, tback->root, 0, 0);
9113 fprintf(stderr, "adding new tree backref on "
9114 "start %llu len %llu parent %llu root %llu\n",
9115 rec->start, rec->max_size, parent, tback->root);
9118 btrfs_release_path(path);
9122 static struct extent_entry *find_entry(struct list_head *entries,
9123 u64 bytenr, u64 bytes)
9125 struct extent_entry *entry = NULL;
9127 list_for_each_entry(entry, entries, list) {
9128 if (entry->bytenr == bytenr && entry->bytes == bytes)
9135 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9137 struct extent_entry *entry, *best = NULL, *prev = NULL;
9139 list_for_each_entry(entry, entries, list) {
9141 * If there are as many broken entries as entries then we know
9142 * not to trust this particular entry.
9144 if (entry->broken == entry->count)
9148 * Special case, when there are only two entries and 'best' is
9158 * If our current entry == best then we can't be sure our best
9159 * is really the best, so we need to keep searching.
9161 if (best && best->count == entry->count) {
9167 /* Prev == entry, not good enough, have to keep searching */
9168 if (!prev->broken && prev->count == entry->count)
9172 best = (prev->count > entry->count) ? prev : entry;
9173 else if (best->count < entry->count)
9181 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9182 struct data_backref *dback, struct extent_entry *entry)
9184 struct btrfs_trans_handle *trans;
9185 struct btrfs_root *root;
9186 struct btrfs_file_extent_item *fi;
9187 struct extent_buffer *leaf;
9188 struct btrfs_key key;
9192 key.objectid = dback->root;
9193 key.type = BTRFS_ROOT_ITEM_KEY;
9194 key.offset = (u64)-1;
9195 root = btrfs_read_fs_root(info, &key);
9197 fprintf(stderr, "Couldn't find root for our ref\n");
9202 * The backref points to the original offset of the extent if it was
9203 * split, so we need to search down to the offset we have and then walk
9204 * forward until we find the backref we're looking for.
9206 key.objectid = dback->owner;
9207 key.type = BTRFS_EXTENT_DATA_KEY;
9208 key.offset = dback->offset;
9209 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9211 fprintf(stderr, "Error looking up ref %d\n", ret);
9216 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9217 ret = btrfs_next_leaf(root, path);
9219 fprintf(stderr, "Couldn't find our ref, next\n");
9223 leaf = path->nodes[0];
9224 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9225 if (key.objectid != dback->owner ||
9226 key.type != BTRFS_EXTENT_DATA_KEY) {
9227 fprintf(stderr, "Couldn't find our ref, search\n");
9230 fi = btrfs_item_ptr(leaf, path->slots[0],
9231 struct btrfs_file_extent_item);
9232 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9233 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9235 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9240 btrfs_release_path(path);
9242 trans = btrfs_start_transaction(root, 1);
9244 return PTR_ERR(trans);
9247 * Ok we have the key of the file extent we want to fix, now we can cow
9248 * down to the thing and fix it.
9250 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9252 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9253 key.objectid, key.type, key.offset, ret);
9257 fprintf(stderr, "Well that's odd, we just found this key "
9258 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9263 leaf = path->nodes[0];
9264 fi = btrfs_item_ptr(leaf, path->slots[0],
9265 struct btrfs_file_extent_item);
9267 if (btrfs_file_extent_compression(leaf, fi) &&
9268 dback->disk_bytenr != entry->bytenr) {
9269 fprintf(stderr, "Ref doesn't match the record start and is "
9270 "compressed, please take a btrfs-image of this file "
9271 "system and send it to a btrfs developer so they can "
9272 "complete this functionality for bytenr %Lu\n",
9273 dback->disk_bytenr);
9278 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9279 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9280 } else if (dback->disk_bytenr > entry->bytenr) {
9281 u64 off_diff, offset;
9283 off_diff = dback->disk_bytenr - entry->bytenr;
9284 offset = btrfs_file_extent_offset(leaf, fi);
9285 if (dback->disk_bytenr + offset +
9286 btrfs_file_extent_num_bytes(leaf, fi) >
9287 entry->bytenr + entry->bytes) {
9288 fprintf(stderr, "Ref is past the entry end, please "
9289 "take a btrfs-image of this file system and "
9290 "send it to a btrfs developer, ref %Lu\n",
9291 dback->disk_bytenr);
9296 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9297 btrfs_set_file_extent_offset(leaf, fi, offset);
9298 } else if (dback->disk_bytenr < entry->bytenr) {
9301 offset = btrfs_file_extent_offset(leaf, fi);
9302 if (dback->disk_bytenr + offset < entry->bytenr) {
9303 fprintf(stderr, "Ref is before the entry start, please"
9304 " take a btrfs-image of this file system and "
9305 "send it to a btrfs developer, ref %Lu\n",
9306 dback->disk_bytenr);
9311 offset += dback->disk_bytenr;
9312 offset -= entry->bytenr;
9313 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9314 btrfs_set_file_extent_offset(leaf, fi, offset);
9317 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9320 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9321 * only do this if we aren't using compression, otherwise it's a
9324 if (!btrfs_file_extent_compression(leaf, fi))
9325 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9327 printf("ram bytes may be wrong?\n");
9328 btrfs_mark_buffer_dirty(leaf);
9330 err = btrfs_commit_transaction(trans, root);
9331 btrfs_release_path(path);
9332 return ret ? ret : err;
9335 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9336 struct extent_record *rec)
9338 struct extent_backref *back, *tmp;
9339 struct data_backref *dback;
9340 struct extent_entry *entry, *best = NULL;
9343 int broken_entries = 0;
9348 * Metadata is easy and the backrefs should always agree on bytenr and
9349 * size, if not we've got bigger issues.
9354 rbtree_postorder_for_each_entry_safe(back, tmp,
9355 &rec->backref_tree, node) {
9356 if (back->full_backref || !back->is_data)
9359 dback = to_data_backref(back);
9362 * We only pay attention to backrefs that we found a real
9365 if (dback->found_ref == 0)
9369 * For now we only catch when the bytes don't match, not the
9370 * bytenr. We can easily do this at the same time, but I want
9371 * to have a fs image to test on before we just add repair
9372 * functionality willy-nilly so we know we won't screw up the
9376 entry = find_entry(&entries, dback->disk_bytenr,
9379 entry = malloc(sizeof(struct extent_entry));
9384 memset(entry, 0, sizeof(*entry));
9385 entry->bytenr = dback->disk_bytenr;
9386 entry->bytes = dback->bytes;
9387 list_add_tail(&entry->list, &entries);
9392 * If we only have on entry we may think the entries agree when
9393 * in reality they don't so we have to do some extra checking.
9395 if (dback->disk_bytenr != rec->start ||
9396 dback->bytes != rec->nr || back->broken)
9407 /* Yay all the backrefs agree, carry on good sir */
9408 if (nr_entries <= 1 && !mismatch)
9411 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9412 "%Lu\n", rec->start);
9415 * First we want to see if the backrefs can agree amongst themselves who
9416 * is right, so figure out which one of the entries has the highest
9419 best = find_most_right_entry(&entries);
9422 * Ok so we may have an even split between what the backrefs think, so
9423 * this is where we use the extent ref to see what it thinks.
9426 entry = find_entry(&entries, rec->start, rec->nr);
9427 if (!entry && (!broken_entries || !rec->found_rec)) {
9428 fprintf(stderr, "Backrefs don't agree with each other "
9429 "and extent record doesn't agree with anybody,"
9430 " so we can't fix bytenr %Lu bytes %Lu\n",
9431 rec->start, rec->nr);
9434 } else if (!entry) {
9436 * Ok our backrefs were broken, we'll assume this is the
9437 * correct value and add an entry for this range.
9439 entry = malloc(sizeof(struct extent_entry));
9444 memset(entry, 0, sizeof(*entry));
9445 entry->bytenr = rec->start;
9446 entry->bytes = rec->nr;
9447 list_add_tail(&entry->list, &entries);
9451 best = find_most_right_entry(&entries);
9453 fprintf(stderr, "Backrefs and extent record evenly "
9454 "split on who is right, this is going to "
9455 "require user input to fix bytenr %Lu bytes "
9456 "%Lu\n", rec->start, rec->nr);
9463 * I don't think this can happen currently as we'll abort() if we catch
9464 * this case higher up, but in case somebody removes that we still can't
9465 * deal with it properly here yet, so just bail out of that's the case.
9467 if (best->bytenr != rec->start) {
9468 fprintf(stderr, "Extent start and backref starts don't match, "
9469 "please use btrfs-image on this file system and send "
9470 "it to a btrfs developer so they can make fsck fix "
9471 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9472 rec->start, rec->nr);
9478 * Ok great we all agreed on an extent record, let's go find the real
9479 * references and fix up the ones that don't match.
9481 rbtree_postorder_for_each_entry_safe(back, tmp,
9482 &rec->backref_tree, node) {
9483 if (back->full_backref || !back->is_data)
9486 dback = to_data_backref(back);
9489 * Still ignoring backrefs that don't have a real ref attached
9492 if (dback->found_ref == 0)
9495 if (dback->bytes == best->bytes &&
9496 dback->disk_bytenr == best->bytenr)
9499 ret = repair_ref(info, path, dback, best);
9505 * Ok we messed with the actual refs, which means we need to drop our
9506 * entire cache and go back and rescan. I know this is a huge pain and
9507 * adds a lot of extra work, but it's the only way to be safe. Once all
9508 * the backrefs agree we may not need to do anything to the extent
9513 while (!list_empty(&entries)) {
9514 entry = list_entry(entries.next, struct extent_entry, list);
9515 list_del_init(&entry->list);
9521 static int process_duplicates(struct cache_tree *extent_cache,
9522 struct extent_record *rec)
9524 struct extent_record *good, *tmp;
9525 struct cache_extent *cache;
9529 * If we found a extent record for this extent then return, or if we
9530 * have more than one duplicate we are likely going to need to delete
9533 if (rec->found_rec || rec->num_duplicates > 1)
9536 /* Shouldn't happen but just in case */
9537 BUG_ON(!rec->num_duplicates);
9540 * So this happens if we end up with a backref that doesn't match the
9541 * actual extent entry. So either the backref is bad or the extent
9542 * entry is bad. Either way we want to have the extent_record actually
9543 * reflect what we found in the extent_tree, so we need to take the
9544 * duplicate out and use that as the extent_record since the only way we
9545 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9547 remove_cache_extent(extent_cache, &rec->cache);
9549 good = to_extent_record(rec->dups.next);
9550 list_del_init(&good->list);
9551 INIT_LIST_HEAD(&good->backrefs);
9552 INIT_LIST_HEAD(&good->dups);
9553 good->cache.start = good->start;
9554 good->cache.size = good->nr;
9555 good->content_checked = 0;
9556 good->owner_ref_checked = 0;
9557 good->num_duplicates = 0;
9558 good->refs = rec->refs;
9559 list_splice_init(&rec->backrefs, &good->backrefs);
9561 cache = lookup_cache_extent(extent_cache, good->start,
9565 tmp = container_of(cache, struct extent_record, cache);
9568 * If we find another overlapping extent and it's found_rec is
9569 * set then it's a duplicate and we need to try and delete
9572 if (tmp->found_rec || tmp->num_duplicates > 0) {
9573 if (list_empty(&good->list))
9574 list_add_tail(&good->list,
9575 &duplicate_extents);
9576 good->num_duplicates += tmp->num_duplicates + 1;
9577 list_splice_init(&tmp->dups, &good->dups);
9578 list_del_init(&tmp->list);
9579 list_add_tail(&tmp->list, &good->dups);
9580 remove_cache_extent(extent_cache, &tmp->cache);
9585 * Ok we have another non extent item backed extent rec, so lets
9586 * just add it to this extent and carry on like we did above.
9588 good->refs += tmp->refs;
9589 list_splice_init(&tmp->backrefs, &good->backrefs);
9590 remove_cache_extent(extent_cache, &tmp->cache);
9593 ret = insert_cache_extent(extent_cache, &good->cache);
9596 return good->num_duplicates ? 0 : 1;
9599 static int delete_duplicate_records(struct btrfs_root *root,
9600 struct extent_record *rec)
9602 struct btrfs_trans_handle *trans;
9603 LIST_HEAD(delete_list);
9604 struct btrfs_path path;
9605 struct extent_record *tmp, *good, *n;
9608 struct btrfs_key key;
9610 btrfs_init_path(&path);
9613 /* Find the record that covers all of the duplicates. */
9614 list_for_each_entry(tmp, &rec->dups, list) {
9615 if (good->start < tmp->start)
9617 if (good->nr > tmp->nr)
9620 if (tmp->start + tmp->nr < good->start + good->nr) {
9621 fprintf(stderr, "Ok we have overlapping extents that "
9622 "aren't completely covered by each other, this "
9623 "is going to require more careful thought. "
9624 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9625 tmp->start, tmp->nr, good->start, good->nr);
9632 list_add_tail(&rec->list, &delete_list);
9634 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9637 list_move_tail(&tmp->list, &delete_list);
9640 root = root->fs_info->extent_root;
9641 trans = btrfs_start_transaction(root, 1);
9642 if (IS_ERR(trans)) {
9643 ret = PTR_ERR(trans);
9647 list_for_each_entry(tmp, &delete_list, list) {
9648 if (tmp->found_rec == 0)
9650 key.objectid = tmp->start;
9651 key.type = BTRFS_EXTENT_ITEM_KEY;
9652 key.offset = tmp->nr;
9654 /* Shouldn't happen but just in case */
9655 if (tmp->metadata) {
9656 fprintf(stderr, "Well this shouldn't happen, extent "
9657 "record overlaps but is metadata? "
9658 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9662 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9668 ret = btrfs_del_item(trans, root, &path);
9671 btrfs_release_path(&path);
9674 err = btrfs_commit_transaction(trans, root);
9678 while (!list_empty(&delete_list)) {
9679 tmp = to_extent_record(delete_list.next);
9680 list_del_init(&tmp->list);
9686 while (!list_empty(&rec->dups)) {
9687 tmp = to_extent_record(rec->dups.next);
9688 list_del_init(&tmp->list);
9692 btrfs_release_path(&path);
9694 if (!ret && !nr_del)
9695 rec->num_duplicates = 0;
9697 return ret ? ret : nr_del;
9700 static int find_possible_backrefs(struct btrfs_fs_info *info,
9701 struct btrfs_path *path,
9702 struct cache_tree *extent_cache,
9703 struct extent_record *rec)
9705 struct btrfs_root *root;
9706 struct extent_backref *back, *tmp;
9707 struct data_backref *dback;
9708 struct cache_extent *cache;
9709 struct btrfs_file_extent_item *fi;
9710 struct btrfs_key key;
9714 rbtree_postorder_for_each_entry_safe(back, tmp,
9715 &rec->backref_tree, node) {
9716 /* Don't care about full backrefs (poor unloved backrefs) */
9717 if (back->full_backref || !back->is_data)
9720 dback = to_data_backref(back);
9722 /* We found this one, we don't need to do a lookup */
9723 if (dback->found_ref)
9726 key.objectid = dback->root;
9727 key.type = BTRFS_ROOT_ITEM_KEY;
9728 key.offset = (u64)-1;
9730 root = btrfs_read_fs_root(info, &key);
9732 /* No root, definitely a bad ref, skip */
9733 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9735 /* Other err, exit */
9737 return PTR_ERR(root);
9739 key.objectid = dback->owner;
9740 key.type = BTRFS_EXTENT_DATA_KEY;
9741 key.offset = dback->offset;
9742 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9744 btrfs_release_path(path);
9747 /* Didn't find it, we can carry on */
9752 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9753 struct btrfs_file_extent_item);
9754 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9755 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9756 btrfs_release_path(path);
9757 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9759 struct extent_record *tmp;
9760 tmp = container_of(cache, struct extent_record, cache);
9763 * If we found an extent record for the bytenr for this
9764 * particular backref then we can't add it to our
9765 * current extent record. We only want to add backrefs
9766 * that don't have a corresponding extent item in the
9767 * extent tree since they likely belong to this record
9768 * and we need to fix it if it doesn't match bytenrs.
9774 dback->found_ref += 1;
9775 dback->disk_bytenr = bytenr;
9776 dback->bytes = bytes;
9779 * Set this so the verify backref code knows not to trust the
9780 * values in this backref.
9789 * Record orphan data ref into corresponding root.
9791 * Return 0 if the extent item contains data ref and recorded.
9792 * Return 1 if the extent item contains no useful data ref
9793 * On that case, it may contains only shared_dataref or metadata backref
9794 * or the file extent exists(this should be handled by the extent bytenr
9796 * Return <0 if something goes wrong.
9798 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9799 struct extent_record *rec)
9801 struct btrfs_key key;
9802 struct btrfs_root *dest_root;
9803 struct extent_backref *back, *tmp;
9804 struct data_backref *dback;
9805 struct orphan_data_extent *orphan;
9806 struct btrfs_path path;
9807 int recorded_data_ref = 0;
9812 btrfs_init_path(&path);
9813 rbtree_postorder_for_each_entry_safe(back, tmp,
9814 &rec->backref_tree, node) {
9815 if (back->full_backref || !back->is_data ||
9816 !back->found_extent_tree)
9818 dback = to_data_backref(back);
9819 if (dback->found_ref)
9821 key.objectid = dback->root;
9822 key.type = BTRFS_ROOT_ITEM_KEY;
9823 key.offset = (u64)-1;
9825 dest_root = btrfs_read_fs_root(fs_info, &key);
9827 /* For non-exist root we just skip it */
9828 if (IS_ERR(dest_root) || !dest_root)
9831 key.objectid = dback->owner;
9832 key.type = BTRFS_EXTENT_DATA_KEY;
9833 key.offset = dback->offset;
9835 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9836 btrfs_release_path(&path);
9838 * For ret < 0, it's OK since the fs-tree may be corrupted,
9839 * we need to record it for inode/file extent rebuild.
9840 * For ret > 0, we record it only for file extent rebuild.
9841 * For ret == 0, the file extent exists but only bytenr
9842 * mismatch, let the original bytenr fix routine to handle,
9848 orphan = malloc(sizeof(*orphan));
9853 INIT_LIST_HEAD(&orphan->list);
9854 orphan->root = dback->root;
9855 orphan->objectid = dback->owner;
9856 orphan->offset = dback->offset;
9857 orphan->disk_bytenr = rec->cache.start;
9858 orphan->disk_len = rec->cache.size;
9859 list_add(&dest_root->orphan_data_extents, &orphan->list);
9860 recorded_data_ref = 1;
9863 btrfs_release_path(&path);
9865 return !recorded_data_ref;
9871 * when an incorrect extent item is found, this will delete
9872 * all of the existing entries for it and recreate them
9873 * based on what the tree scan found.
9875 static int fixup_extent_refs(struct btrfs_fs_info *info,
9876 struct cache_tree *extent_cache,
9877 struct extent_record *rec)
9879 struct btrfs_trans_handle *trans = NULL;
9881 struct btrfs_path path;
9882 struct cache_extent *cache;
9883 struct extent_backref *back, *tmp;
9887 if (rec->flag_block_full_backref)
9888 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9890 btrfs_init_path(&path);
9891 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9893 * Sometimes the backrefs themselves are so broken they don't
9894 * get attached to any meaningful rec, so first go back and
9895 * check any of our backrefs that we couldn't find and throw
9896 * them into the list if we find the backref so that
9897 * verify_backrefs can figure out what to do.
9899 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9904 /* step one, make sure all of the backrefs agree */
9905 ret = verify_backrefs(info, &path, rec);
9909 trans = btrfs_start_transaction(info->extent_root, 1);
9910 if (IS_ERR(trans)) {
9911 ret = PTR_ERR(trans);
9915 /* step two, delete all the existing records */
9916 ret = delete_extent_records(trans, info->extent_root, &path,
9922 /* was this block corrupt? If so, don't add references to it */
9923 cache = lookup_cache_extent(info->corrupt_blocks,
9924 rec->start, rec->max_size);
9930 /* step three, recreate all the refs we did find */
9931 rbtree_postorder_for_each_entry_safe(back, tmp,
9932 &rec->backref_tree, node) {
9934 * if we didn't find any references, don't create a
9937 if (!back->found_ref)
9940 rec->bad_full_backref = 0;
9941 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9949 int err = btrfs_commit_transaction(trans, info->extent_root);
9955 fprintf(stderr, "Repaired extent references for %llu\n",
9956 (unsigned long long)rec->start);
9958 btrfs_release_path(&path);
9962 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9963 struct extent_record *rec)
9965 struct btrfs_trans_handle *trans;
9966 struct btrfs_root *root = fs_info->extent_root;
9967 struct btrfs_path path;
9968 struct btrfs_extent_item *ei;
9969 struct btrfs_key key;
9973 key.objectid = rec->start;
9974 if (rec->metadata) {
9975 key.type = BTRFS_METADATA_ITEM_KEY;
9976 key.offset = rec->info_level;
9978 key.type = BTRFS_EXTENT_ITEM_KEY;
9979 key.offset = rec->max_size;
9982 trans = btrfs_start_transaction(root, 0);
9984 return PTR_ERR(trans);
9986 btrfs_init_path(&path);
9987 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9989 btrfs_release_path(&path);
9990 btrfs_commit_transaction(trans, root);
9993 fprintf(stderr, "Didn't find extent for %llu\n",
9994 (unsigned long long)rec->start);
9995 btrfs_release_path(&path);
9996 btrfs_commit_transaction(trans, root);
10000 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10001 struct btrfs_extent_item);
10002 flags = btrfs_extent_flags(path.nodes[0], ei);
10003 if (rec->flag_block_full_backref) {
10004 fprintf(stderr, "setting full backref on %llu\n",
10005 (unsigned long long)key.objectid);
10006 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10008 fprintf(stderr, "clearing full backref on %llu\n",
10009 (unsigned long long)key.objectid);
10010 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10012 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10013 btrfs_mark_buffer_dirty(path.nodes[0]);
10014 btrfs_release_path(&path);
10015 ret = btrfs_commit_transaction(trans, root);
10017 fprintf(stderr, "Repaired extent flags for %llu\n",
10018 (unsigned long long)rec->start);
10023 /* right now we only prune from the extent allocation tree */
10024 static int prune_one_block(struct btrfs_trans_handle *trans,
10025 struct btrfs_fs_info *info,
10026 struct btrfs_corrupt_block *corrupt)
10029 struct btrfs_path path;
10030 struct extent_buffer *eb;
10034 int level = corrupt->level + 1;
10036 btrfs_init_path(&path);
10038 /* we want to stop at the parent to our busted block */
10039 path.lowest_level = level;
10041 ret = btrfs_search_slot(trans, info->extent_root,
10042 &corrupt->key, &path, -1, 1);
10047 eb = path.nodes[level];
10054 * hopefully the search gave us the block we want to prune,
10055 * lets try that first
10057 slot = path.slots[level];
10058 found = btrfs_node_blockptr(eb, slot);
10059 if (found == corrupt->cache.start)
10062 nritems = btrfs_header_nritems(eb);
10064 /* the search failed, lets scan this node and hope we find it */
10065 for (slot = 0; slot < nritems; slot++) {
10066 found = btrfs_node_blockptr(eb, slot);
10067 if (found == corrupt->cache.start)
10071 * we couldn't find the bad block. TODO, search all the nodes for pointers
10074 if (eb == info->extent_root->node) {
10079 btrfs_release_path(&path);
10084 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10085 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10088 btrfs_release_path(&path);
10092 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10094 struct btrfs_trans_handle *trans = NULL;
10095 struct cache_extent *cache;
10096 struct btrfs_corrupt_block *corrupt;
10099 cache = search_cache_extent(info->corrupt_blocks, 0);
10103 trans = btrfs_start_transaction(info->extent_root, 1);
10105 return PTR_ERR(trans);
10107 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10108 prune_one_block(trans, info, corrupt);
10109 remove_cache_extent(info->corrupt_blocks, cache);
10112 return btrfs_commit_transaction(trans, info->extent_root);
10116 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10118 struct btrfs_block_group_cache *cache;
10123 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10124 &start, &end, EXTENT_DIRTY);
10127 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10132 cache = btrfs_lookup_first_block_group(fs_info, start);
10137 start = cache->key.objectid + cache->key.offset;
10141 static int check_extent_refs(struct btrfs_root *root,
10142 struct cache_tree *extent_cache)
10144 struct extent_record *rec;
10145 struct cache_extent *cache;
10151 * if we're doing a repair, we have to make sure
10152 * we don't allocate from the problem extents.
10153 * In the worst case, this will be all the
10154 * extents in the FS
10156 cache = search_cache_extent(extent_cache, 0);
10158 rec = container_of(cache, struct extent_record, cache);
10159 set_extent_dirty(root->fs_info->excluded_extents,
10161 rec->start + rec->max_size - 1);
10162 cache = next_cache_extent(cache);
10165 /* pin down all the corrupted blocks too */
10166 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10168 set_extent_dirty(root->fs_info->excluded_extents,
10170 cache->start + cache->size - 1);
10171 cache = next_cache_extent(cache);
10173 prune_corrupt_blocks(root->fs_info);
10174 reset_cached_block_groups(root->fs_info);
10177 reset_cached_block_groups(root->fs_info);
10180 * We need to delete any duplicate entries we find first otherwise we
10181 * could mess up the extent tree when we have backrefs that actually
10182 * belong to a different extent item and not the weird duplicate one.
10184 while (repair && !list_empty(&duplicate_extents)) {
10185 rec = to_extent_record(duplicate_extents.next);
10186 list_del_init(&rec->list);
10188 /* Sometimes we can find a backref before we find an actual
10189 * extent, so we need to process it a little bit to see if there
10190 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10191 * if this is a backref screwup. If we need to delete stuff
10192 * process_duplicates() will return 0, otherwise it will return
10195 if (process_duplicates(extent_cache, rec))
10197 ret = delete_duplicate_records(root, rec);
10201 * delete_duplicate_records will return the number of entries
10202 * deleted, so if it's greater than 0 then we know we actually
10203 * did something and we need to remove.
10216 cache = search_cache_extent(extent_cache, 0);
10219 rec = container_of(cache, struct extent_record, cache);
10220 if (rec->num_duplicates) {
10221 fprintf(stderr, "extent item %llu has multiple extent "
10222 "items\n", (unsigned long long)rec->start);
10226 if (rec->refs != rec->extent_item_refs) {
10227 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10228 (unsigned long long)rec->start,
10229 (unsigned long long)rec->nr);
10230 fprintf(stderr, "extent item %llu, found %llu\n",
10231 (unsigned long long)rec->extent_item_refs,
10232 (unsigned long long)rec->refs);
10233 ret = record_orphan_data_extents(root->fs_info, rec);
10239 if (all_backpointers_checked(rec, 1)) {
10240 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10241 (unsigned long long)rec->start,
10242 (unsigned long long)rec->nr);
10246 if (!rec->owner_ref_checked) {
10247 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10248 (unsigned long long)rec->start,
10249 (unsigned long long)rec->nr);
10254 if (repair && fix) {
10255 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10261 if (rec->bad_full_backref) {
10262 fprintf(stderr, "bad full backref, on [%llu]\n",
10263 (unsigned long long)rec->start);
10265 ret = fixup_extent_flags(root->fs_info, rec);
10273 * Although it's not a extent ref's problem, we reuse this
10274 * routine for error reporting.
10275 * No repair function yet.
10277 if (rec->crossing_stripes) {
10279 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10280 rec->start, rec->start + rec->max_size);
10284 if (rec->wrong_chunk_type) {
10286 "bad extent [%llu, %llu), type mismatch with chunk\n",
10287 rec->start, rec->start + rec->max_size);
10291 remove_cache_extent(extent_cache, cache);
10292 free_all_extent_backrefs(rec);
10293 if (!init_extent_tree && repair && (!cur_err || fix))
10294 clear_extent_dirty(root->fs_info->excluded_extents,
10296 rec->start + rec->max_size - 1);
10301 if (ret && ret != -EAGAIN) {
10302 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10305 struct btrfs_trans_handle *trans;
10307 root = root->fs_info->extent_root;
10308 trans = btrfs_start_transaction(root, 1);
10309 if (IS_ERR(trans)) {
10310 ret = PTR_ERR(trans);
10314 ret = btrfs_fix_block_accounting(trans, root);
10317 ret = btrfs_commit_transaction(trans, root);
10326 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10330 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10331 stripe_size = length;
10332 stripe_size /= num_stripes;
10333 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10334 stripe_size = length * 2;
10335 stripe_size /= num_stripes;
10336 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10337 stripe_size = length;
10338 stripe_size /= (num_stripes - 1);
10339 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10340 stripe_size = length;
10341 stripe_size /= (num_stripes - 2);
10343 stripe_size = length;
10345 return stripe_size;
10349 * Check the chunk with its block group/dev list ref:
10350 * Return 0 if all refs seems valid.
10351 * Return 1 if part of refs seems valid, need later check for rebuild ref
10352 * like missing block group and needs to search extent tree to rebuild them.
10353 * Return -1 if essential refs are missing and unable to rebuild.
10355 static int check_chunk_refs(struct chunk_record *chunk_rec,
10356 struct block_group_tree *block_group_cache,
10357 struct device_extent_tree *dev_extent_cache,
10360 struct cache_extent *block_group_item;
10361 struct block_group_record *block_group_rec;
10362 struct cache_extent *dev_extent_item;
10363 struct device_extent_record *dev_extent_rec;
10367 int metadump_v2 = 0;
10371 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10373 chunk_rec->length);
10374 if (block_group_item) {
10375 block_group_rec = container_of(block_group_item,
10376 struct block_group_record,
10378 if (chunk_rec->length != block_group_rec->offset ||
10379 chunk_rec->offset != block_group_rec->objectid ||
10381 chunk_rec->type_flags != block_group_rec->flags)) {
10384 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10385 chunk_rec->objectid,
10390 chunk_rec->type_flags,
10391 block_group_rec->objectid,
10392 block_group_rec->type,
10393 block_group_rec->offset,
10394 block_group_rec->offset,
10395 block_group_rec->objectid,
10396 block_group_rec->flags);
10399 list_del_init(&block_group_rec->list);
10400 chunk_rec->bg_rec = block_group_rec;
10405 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10406 chunk_rec->objectid,
10411 chunk_rec->type_flags);
10418 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10419 chunk_rec->num_stripes);
10420 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10421 devid = chunk_rec->stripes[i].devid;
10422 offset = chunk_rec->stripes[i].offset;
10423 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10424 devid, offset, length);
10425 if (dev_extent_item) {
10426 dev_extent_rec = container_of(dev_extent_item,
10427 struct device_extent_record,
10429 if (dev_extent_rec->objectid != devid ||
10430 dev_extent_rec->offset != offset ||
10431 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10432 dev_extent_rec->length != length) {
10435 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10436 chunk_rec->objectid,
10439 chunk_rec->stripes[i].devid,
10440 chunk_rec->stripes[i].offset,
10441 dev_extent_rec->objectid,
10442 dev_extent_rec->offset,
10443 dev_extent_rec->length);
10446 list_move(&dev_extent_rec->chunk_list,
10447 &chunk_rec->dextents);
10452 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10453 chunk_rec->objectid,
10456 chunk_rec->stripes[i].devid,
10457 chunk_rec->stripes[i].offset);
10464 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10465 int check_chunks(struct cache_tree *chunk_cache,
10466 struct block_group_tree *block_group_cache,
10467 struct device_extent_tree *dev_extent_cache,
10468 struct list_head *good, struct list_head *bad,
10469 struct list_head *rebuild, int silent)
10471 struct cache_extent *chunk_item;
10472 struct chunk_record *chunk_rec;
10473 struct block_group_record *bg_rec;
10474 struct device_extent_record *dext_rec;
10478 chunk_item = first_cache_extent(chunk_cache);
10479 while (chunk_item) {
10480 chunk_rec = container_of(chunk_item, struct chunk_record,
10482 err = check_chunk_refs(chunk_rec, block_group_cache,
10483 dev_extent_cache, silent);
10486 if (err == 0 && good)
10487 list_add_tail(&chunk_rec->list, good);
10488 if (err > 0 && rebuild)
10489 list_add_tail(&chunk_rec->list, rebuild);
10490 if (err < 0 && bad)
10491 list_add_tail(&chunk_rec->list, bad);
10492 chunk_item = next_cache_extent(chunk_item);
10495 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10498 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10506 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10510 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10511 dext_rec->objectid,
10521 static int check_device_used(struct device_record *dev_rec,
10522 struct device_extent_tree *dext_cache)
10524 struct cache_extent *cache;
10525 struct device_extent_record *dev_extent_rec;
10526 u64 total_byte = 0;
10528 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10530 dev_extent_rec = container_of(cache,
10531 struct device_extent_record,
10533 if (dev_extent_rec->objectid != dev_rec->devid)
10536 list_del_init(&dev_extent_rec->device_list);
10537 total_byte += dev_extent_rec->length;
10538 cache = next_cache_extent(cache);
10541 if (total_byte != dev_rec->byte_used) {
10543 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10544 total_byte, dev_rec->byte_used, dev_rec->objectid,
10545 dev_rec->type, dev_rec->offset);
10552 /* check btrfs_dev_item -> btrfs_dev_extent */
10553 static int check_devices(struct rb_root *dev_cache,
10554 struct device_extent_tree *dev_extent_cache)
10556 struct rb_node *dev_node;
10557 struct device_record *dev_rec;
10558 struct device_extent_record *dext_rec;
10562 dev_node = rb_first(dev_cache);
10564 dev_rec = container_of(dev_node, struct device_record, node);
10565 err = check_device_used(dev_rec, dev_extent_cache);
10569 dev_node = rb_next(dev_node);
10571 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10574 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10575 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10582 static int add_root_item_to_list(struct list_head *head,
10583 u64 objectid, u64 bytenr, u64 last_snapshot,
10584 u8 level, u8 drop_level,
10585 struct btrfs_key *drop_key)
10588 struct root_item_record *ri_rec;
10589 ri_rec = malloc(sizeof(*ri_rec));
10592 ri_rec->bytenr = bytenr;
10593 ri_rec->objectid = objectid;
10594 ri_rec->level = level;
10595 ri_rec->drop_level = drop_level;
10596 ri_rec->last_snapshot = last_snapshot;
10598 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10599 list_add_tail(&ri_rec->list, head);
10604 static void free_root_item_list(struct list_head *list)
10606 struct root_item_record *ri_rec;
10608 while (!list_empty(list)) {
10609 ri_rec = list_first_entry(list, struct root_item_record,
10611 list_del_init(&ri_rec->list);
10616 static int deal_root_from_list(struct list_head *list,
10617 struct btrfs_root *root,
10618 struct block_info *bits,
10620 struct cache_tree *pending,
10621 struct cache_tree *seen,
10622 struct cache_tree *reada,
10623 struct cache_tree *nodes,
10624 struct cache_tree *extent_cache,
10625 struct cache_tree *chunk_cache,
10626 struct rb_root *dev_cache,
10627 struct block_group_tree *block_group_cache,
10628 struct device_extent_tree *dev_extent_cache)
10633 while (!list_empty(list)) {
10634 struct root_item_record *rec;
10635 struct extent_buffer *buf;
10636 rec = list_entry(list->next,
10637 struct root_item_record, list);
10639 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10640 if (!extent_buffer_uptodate(buf)) {
10641 free_extent_buffer(buf);
10645 ret = add_root_to_pending(buf, extent_cache, pending,
10646 seen, nodes, rec->objectid);
10650 * To rebuild extent tree, we need deal with snapshot
10651 * one by one, otherwise we deal with node firstly which
10652 * can maximize readahead.
10655 ret = run_next_block(root, bits, bits_nr, &last,
10656 pending, seen, reada, nodes,
10657 extent_cache, chunk_cache,
10658 dev_cache, block_group_cache,
10659 dev_extent_cache, rec);
10663 free_extent_buffer(buf);
10664 list_del(&rec->list);
10670 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10671 reada, nodes, extent_cache, chunk_cache,
10672 dev_cache, block_group_cache,
10673 dev_extent_cache, NULL);
10683 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10685 struct rb_root dev_cache;
10686 struct cache_tree chunk_cache;
10687 struct block_group_tree block_group_cache;
10688 struct device_extent_tree dev_extent_cache;
10689 struct cache_tree extent_cache;
10690 struct cache_tree seen;
10691 struct cache_tree pending;
10692 struct cache_tree reada;
10693 struct cache_tree nodes;
10694 struct extent_io_tree excluded_extents;
10695 struct cache_tree corrupt_blocks;
10696 struct btrfs_path path;
10697 struct btrfs_key key;
10698 struct btrfs_key found_key;
10700 struct block_info *bits;
10702 struct extent_buffer *leaf;
10704 struct btrfs_root_item ri;
10705 struct list_head dropping_trees;
10706 struct list_head normal_trees;
10707 struct btrfs_root *root1;
10708 struct btrfs_root *root;
10712 root = fs_info->fs_root;
10713 dev_cache = RB_ROOT;
10714 cache_tree_init(&chunk_cache);
10715 block_group_tree_init(&block_group_cache);
10716 device_extent_tree_init(&dev_extent_cache);
10718 cache_tree_init(&extent_cache);
10719 cache_tree_init(&seen);
10720 cache_tree_init(&pending);
10721 cache_tree_init(&nodes);
10722 cache_tree_init(&reada);
10723 cache_tree_init(&corrupt_blocks);
10724 extent_io_tree_init(&excluded_extents);
10725 INIT_LIST_HEAD(&dropping_trees);
10726 INIT_LIST_HEAD(&normal_trees);
10729 fs_info->excluded_extents = &excluded_extents;
10730 fs_info->fsck_extent_cache = &extent_cache;
10731 fs_info->free_extent_hook = free_extent_hook;
10732 fs_info->corrupt_blocks = &corrupt_blocks;
10736 bits = malloc(bits_nr * sizeof(struct block_info));
10742 if (ctx.progress_enabled) {
10743 ctx.tp = TASK_EXTENTS;
10744 task_start(ctx.info);
10748 root1 = fs_info->tree_root;
10749 level = btrfs_header_level(root1->node);
10750 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10751 root1->node->start, 0, level, 0, NULL);
10754 root1 = fs_info->chunk_root;
10755 level = btrfs_header_level(root1->node);
10756 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10757 root1->node->start, 0, level, 0, NULL);
10760 btrfs_init_path(&path);
10763 key.type = BTRFS_ROOT_ITEM_KEY;
10764 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10768 leaf = path.nodes[0];
10769 slot = path.slots[0];
10770 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10771 ret = btrfs_next_leaf(root, &path);
10774 leaf = path.nodes[0];
10775 slot = path.slots[0];
10777 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10778 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10779 unsigned long offset;
10782 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10783 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10784 last_snapshot = btrfs_root_last_snapshot(&ri);
10785 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10786 level = btrfs_root_level(&ri);
10787 ret = add_root_item_to_list(&normal_trees,
10788 found_key.objectid,
10789 btrfs_root_bytenr(&ri),
10790 last_snapshot, level,
10795 level = btrfs_root_level(&ri);
10796 objectid = found_key.objectid;
10797 btrfs_disk_key_to_cpu(&found_key,
10798 &ri.drop_progress);
10799 ret = add_root_item_to_list(&dropping_trees,
10801 btrfs_root_bytenr(&ri),
10802 last_snapshot, level,
10803 ri.drop_level, &found_key);
10810 btrfs_release_path(&path);
10813 * check_block can return -EAGAIN if it fixes something, please keep
10814 * this in mind when dealing with return values from these functions, if
10815 * we get -EAGAIN we want to fall through and restart the loop.
10817 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10818 &seen, &reada, &nodes, &extent_cache,
10819 &chunk_cache, &dev_cache, &block_group_cache,
10820 &dev_extent_cache);
10822 if (ret == -EAGAIN)
10826 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10827 &pending, &seen, &reada, &nodes,
10828 &extent_cache, &chunk_cache, &dev_cache,
10829 &block_group_cache, &dev_extent_cache);
10831 if (ret == -EAGAIN)
10836 ret = check_chunks(&chunk_cache, &block_group_cache,
10837 &dev_extent_cache, NULL, NULL, NULL, 0);
10839 if (ret == -EAGAIN)
10844 ret = check_extent_refs(root, &extent_cache);
10846 if (ret == -EAGAIN)
10851 ret = check_devices(&dev_cache, &dev_extent_cache);
10856 task_stop(ctx.info);
10858 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10859 extent_io_tree_cleanup(&excluded_extents);
10860 fs_info->fsck_extent_cache = NULL;
10861 fs_info->free_extent_hook = NULL;
10862 fs_info->corrupt_blocks = NULL;
10863 fs_info->excluded_extents = NULL;
10866 free_chunk_cache_tree(&chunk_cache);
10867 free_device_cache_tree(&dev_cache);
10868 free_block_group_tree(&block_group_cache);
10869 free_device_extent_tree(&dev_extent_cache);
10870 free_extent_cache_tree(&seen);
10871 free_extent_cache_tree(&pending);
10872 free_extent_cache_tree(&reada);
10873 free_extent_cache_tree(&nodes);
10874 free_root_item_list(&normal_trees);
10875 free_root_item_list(&dropping_trees);
10878 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10879 free_extent_cache_tree(&seen);
10880 free_extent_cache_tree(&pending);
10881 free_extent_cache_tree(&reada);
10882 free_extent_cache_tree(&nodes);
10883 free_chunk_cache_tree(&chunk_cache);
10884 free_block_group_tree(&block_group_cache);
10885 free_device_cache_tree(&dev_cache);
10886 free_device_extent_tree(&dev_extent_cache);
10887 free_extent_record_cache(&extent_cache);
10888 free_root_item_list(&normal_trees);
10889 free_root_item_list(&dropping_trees);
10890 extent_io_tree_cleanup(&excluded_extents);
10895 * Check backrefs of a tree block given by @bytenr or @eb.
10897 * @root: the root containing the @bytenr or @eb
10898 * @eb: tree block extent buffer, can be NULL
10899 * @bytenr: bytenr of the tree block to search
10900 * @level: tree level of the tree block
10901 * @owner: owner of the tree block
10903 * Return >0 for any error found and output error message
10904 * Return 0 for no error found
10906 static int check_tree_block_ref(struct btrfs_root *root,
10907 struct extent_buffer *eb, u64 bytenr,
10908 int level, u64 owner)
10910 struct btrfs_key key;
10911 struct btrfs_root *extent_root = root->fs_info->extent_root;
10912 struct btrfs_path path;
10913 struct btrfs_extent_item *ei;
10914 struct btrfs_extent_inline_ref *iref;
10915 struct extent_buffer *leaf;
10921 u32 nodesize = root->fs_info->nodesize;
10924 int tree_reloc_root = 0;
10929 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10930 btrfs_header_bytenr(root->node) == bytenr)
10931 tree_reloc_root = 1;
10933 btrfs_init_path(&path);
10934 key.objectid = bytenr;
10935 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10936 key.type = BTRFS_METADATA_ITEM_KEY;
10938 key.type = BTRFS_EXTENT_ITEM_KEY;
10939 key.offset = (u64)-1;
10941 /* Search for the backref in extent tree */
10942 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10944 err |= BACKREF_MISSING;
10947 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10949 err |= BACKREF_MISSING;
10953 leaf = path.nodes[0];
10954 slot = path.slots[0];
10955 btrfs_item_key_to_cpu(leaf, &key, slot);
10957 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10959 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10960 skinny_level = (int)key.offset;
10961 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10963 struct btrfs_tree_block_info *info;
10965 info = (struct btrfs_tree_block_info *)(ei + 1);
10966 skinny_level = btrfs_tree_block_level(leaf, info);
10967 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10974 if (!(btrfs_extent_flags(leaf, ei) &
10975 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10977 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10978 key.objectid, nodesize,
10979 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10980 err = BACKREF_MISMATCH;
10982 header_gen = btrfs_header_generation(eb);
10983 extent_gen = btrfs_extent_generation(leaf, ei);
10984 if (header_gen != extent_gen) {
10986 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10987 key.objectid, nodesize, header_gen,
10989 err = BACKREF_MISMATCH;
10991 if (level != skinny_level) {
10993 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10994 key.objectid, nodesize, level, skinny_level);
10995 err = BACKREF_MISMATCH;
10997 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10999 "extent[%llu %u] is referred by other roots than %llu",
11000 key.objectid, nodesize, root->objectid);
11001 err = BACKREF_MISMATCH;
11006 * Iterate the extent/metadata item to find the exact backref
11008 item_size = btrfs_item_size_nr(leaf, slot);
11009 ptr = (unsigned long)iref;
11010 end = (unsigned long)ei + item_size;
11011 while (ptr < end) {
11012 iref = (struct btrfs_extent_inline_ref *)ptr;
11013 type = btrfs_extent_inline_ref_type(leaf, iref);
11014 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11016 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11017 (offset == root->objectid || offset == owner)) {
11019 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11021 * Backref of tree reloc root points to itself, no need
11022 * to check backref any more.
11024 if (tree_reloc_root)
11027 /* Check if the backref points to valid referencer */
11028 found_ref = !check_tree_block_ref(root, NULL,
11029 offset, level + 1, owner);
11034 ptr += btrfs_extent_inline_ref_size(type);
11038 * Inlined extent item doesn't have what we need, check
11039 * TREE_BLOCK_REF_KEY
11042 btrfs_release_path(&path);
11043 key.objectid = bytenr;
11044 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11045 key.offset = root->objectid;
11047 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11052 err |= BACKREF_MISSING;
11054 btrfs_release_path(&path);
11055 if (eb && (err & BACKREF_MISSING))
11056 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
11057 bytenr, nodesize, owner, level);
11062 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11064 * Return >0 any error found and output error message
11065 * Return 0 for no error found
11067 static int check_extent_data_item(struct btrfs_root *root,
11068 struct extent_buffer *eb, int slot)
11070 struct btrfs_file_extent_item *fi;
11071 struct btrfs_path path;
11072 struct btrfs_root *extent_root = root->fs_info->extent_root;
11073 struct btrfs_key fi_key;
11074 struct btrfs_key dbref_key;
11075 struct extent_buffer *leaf;
11076 struct btrfs_extent_item *ei;
11077 struct btrfs_extent_inline_ref *iref;
11078 struct btrfs_extent_data_ref *dref;
11081 u64 disk_num_bytes;
11082 u64 extent_num_bytes;
11089 int found_dbackref = 0;
11093 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11094 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11096 /* Nothing to check for hole and inline data extents */
11097 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11098 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11101 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11102 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11103 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11105 /* Check unaligned disk_num_bytes and num_bytes */
11106 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11108 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11109 fi_key.objectid, fi_key.offset, disk_num_bytes,
11110 root->fs_info->sectorsize);
11111 err |= BYTES_UNALIGNED;
11113 data_bytes_allocated += disk_num_bytes;
11115 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11117 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11118 fi_key.objectid, fi_key.offset, extent_num_bytes,
11119 root->fs_info->sectorsize);
11120 err |= BYTES_UNALIGNED;
11122 data_bytes_referenced += extent_num_bytes;
11124 owner = btrfs_header_owner(eb);
11126 /* Check the extent item of the file extent in extent tree */
11127 btrfs_init_path(&path);
11128 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11129 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11130 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11132 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11136 leaf = path.nodes[0];
11137 slot = path.slots[0];
11138 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11140 extent_flags = btrfs_extent_flags(leaf, ei);
11142 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11144 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11145 disk_bytenr, disk_num_bytes,
11146 BTRFS_EXTENT_FLAG_DATA);
11147 err |= BACKREF_MISMATCH;
11150 /* Check data backref inside that extent item */
11151 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11152 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11153 ptr = (unsigned long)iref;
11154 end = (unsigned long)ei + item_size;
11155 while (ptr < end) {
11156 iref = (struct btrfs_extent_inline_ref *)ptr;
11157 type = btrfs_extent_inline_ref_type(leaf, iref);
11158 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11160 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11161 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11162 if (ref_root == owner || ref_root == root->objectid)
11163 found_dbackref = 1;
11164 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11165 found_dbackref = !check_tree_block_ref(root, NULL,
11166 btrfs_extent_inline_ref_offset(leaf, iref),
11170 if (found_dbackref)
11172 ptr += btrfs_extent_inline_ref_size(type);
11175 if (!found_dbackref) {
11176 btrfs_release_path(&path);
11178 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11179 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11180 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11181 dbref_key.offset = hash_extent_data_ref(root->objectid,
11182 fi_key.objectid, fi_key.offset);
11184 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11185 &dbref_key, &path, 0, 0);
11187 found_dbackref = 1;
11191 btrfs_release_path(&path);
11194 * Neither inlined nor EXTENT_DATA_REF found, try
11195 * SHARED_DATA_REF as last chance.
11197 dbref_key.objectid = disk_bytenr;
11198 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11199 dbref_key.offset = eb->start;
11201 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11202 &dbref_key, &path, 0, 0);
11204 found_dbackref = 1;
11210 if (!found_dbackref)
11211 err |= BACKREF_MISSING;
11212 btrfs_release_path(&path);
11213 if (err & BACKREF_MISSING) {
11214 error("data extent[%llu %llu] backref lost",
11215 disk_bytenr, disk_num_bytes);
11221 * Get real tree block level for the case like shared block
11222 * Return >= 0 as tree level
11223 * Return <0 for error
11225 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11227 struct extent_buffer *eb;
11228 struct btrfs_path path;
11229 struct btrfs_key key;
11230 struct btrfs_extent_item *ei;
11237 /* Search extent tree for extent generation and level */
11238 key.objectid = bytenr;
11239 key.type = BTRFS_METADATA_ITEM_KEY;
11240 key.offset = (u64)-1;
11242 btrfs_init_path(&path);
11243 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11246 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11254 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11255 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11256 struct btrfs_extent_item);
11257 flags = btrfs_extent_flags(path.nodes[0], ei);
11258 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11263 /* Get transid for later read_tree_block() check */
11264 transid = btrfs_extent_generation(path.nodes[0], ei);
11266 /* Get backref level as one source */
11267 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11268 backref_level = key.offset;
11270 struct btrfs_tree_block_info *info;
11272 info = (struct btrfs_tree_block_info *)(ei + 1);
11273 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11275 btrfs_release_path(&path);
11277 /* Get level from tree block as an alternative source */
11278 eb = read_tree_block(fs_info, bytenr, transid);
11279 if (!extent_buffer_uptodate(eb)) {
11280 free_extent_buffer(eb);
11283 header_level = btrfs_header_level(eb);
11284 free_extent_buffer(eb);
11286 if (header_level != backref_level)
11288 return header_level;
11291 btrfs_release_path(&path);
11296 * Check if a tree block backref is valid (points to a valid tree block)
11297 * if level == -1, level will be resolved
11298 * Return >0 for any error found and print error message
11300 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11301 u64 bytenr, int level)
11303 struct btrfs_root *root;
11304 struct btrfs_key key;
11305 struct btrfs_path path;
11306 struct extent_buffer *eb;
11307 struct extent_buffer *node;
11308 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11312 /* Query level for level == -1 special case */
11314 level = query_tree_block_level(fs_info, bytenr);
11316 err |= REFERENCER_MISSING;
11320 key.objectid = root_id;
11321 key.type = BTRFS_ROOT_ITEM_KEY;
11322 key.offset = (u64)-1;
11324 root = btrfs_read_fs_root(fs_info, &key);
11325 if (IS_ERR(root)) {
11326 err |= REFERENCER_MISSING;
11330 /* Read out the tree block to get item/node key */
11331 eb = read_tree_block(fs_info, bytenr, 0);
11332 if (!extent_buffer_uptodate(eb)) {
11333 err |= REFERENCER_MISSING;
11334 free_extent_buffer(eb);
11338 /* Empty tree, no need to check key */
11339 if (!btrfs_header_nritems(eb) && !level) {
11340 free_extent_buffer(eb);
11345 btrfs_node_key_to_cpu(eb, &key, 0);
11347 btrfs_item_key_to_cpu(eb, &key, 0);
11349 free_extent_buffer(eb);
11351 btrfs_init_path(&path);
11352 path.lowest_level = level;
11353 /* Search with the first key, to ensure we can reach it */
11354 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11356 err |= REFERENCER_MISSING;
11360 node = path.nodes[level];
11361 if (btrfs_header_bytenr(node) != bytenr) {
11363 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11364 bytenr, nodesize, bytenr,
11365 btrfs_header_bytenr(node));
11366 err |= REFERENCER_MISMATCH;
11368 if (btrfs_header_level(node) != level) {
11370 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11371 bytenr, nodesize, level,
11372 btrfs_header_level(node));
11373 err |= REFERENCER_MISMATCH;
11377 btrfs_release_path(&path);
11379 if (err & REFERENCER_MISSING) {
11381 error("extent [%llu %d] lost referencer (owner: %llu)",
11382 bytenr, nodesize, root_id);
11385 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11386 bytenr, nodesize, root_id, level);
11393 * Check if tree block @eb is tree reloc root.
11394 * Return 0 if it's not or any problem happens
11395 * Return 1 if it's a tree reloc root
11397 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11398 struct extent_buffer *eb)
11400 struct btrfs_root *tree_reloc_root;
11401 struct btrfs_key key;
11402 u64 bytenr = btrfs_header_bytenr(eb);
11403 u64 owner = btrfs_header_owner(eb);
11406 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11407 key.offset = owner;
11408 key.type = BTRFS_ROOT_ITEM_KEY;
11410 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11411 if (IS_ERR(tree_reloc_root))
11414 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11416 btrfs_free_fs_root(tree_reloc_root);
11421 * Check referencer for shared block backref
11422 * If level == -1, this function will resolve the level.
11424 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11425 u64 parent, u64 bytenr, int level)
11427 struct extent_buffer *eb;
11429 int found_parent = 0;
11432 eb = read_tree_block(fs_info, parent, 0);
11433 if (!extent_buffer_uptodate(eb))
11437 level = query_tree_block_level(fs_info, bytenr);
11441 /* It's possible it's a tree reloc root */
11442 if (parent == bytenr) {
11443 if (is_tree_reloc_root(fs_info, eb))
11448 if (level + 1 != btrfs_header_level(eb))
11451 nr = btrfs_header_nritems(eb);
11452 for (i = 0; i < nr; i++) {
11453 if (bytenr == btrfs_node_blockptr(eb, i)) {
11459 free_extent_buffer(eb);
11460 if (!found_parent) {
11462 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11463 bytenr, fs_info->nodesize, parent, level);
11464 return REFERENCER_MISSING;
11470 * Check referencer for normal (inlined) data ref
11471 * If len == 0, it will be resolved by searching in extent tree
11473 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11474 u64 root_id, u64 objectid, u64 offset,
11475 u64 bytenr, u64 len, u32 count)
11477 struct btrfs_root *root;
11478 struct btrfs_root *extent_root = fs_info->extent_root;
11479 struct btrfs_key key;
11480 struct btrfs_path path;
11481 struct extent_buffer *leaf;
11482 struct btrfs_file_extent_item *fi;
11483 u32 found_count = 0;
11488 key.objectid = bytenr;
11489 key.type = BTRFS_EXTENT_ITEM_KEY;
11490 key.offset = (u64)-1;
11492 btrfs_init_path(&path);
11493 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11496 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11499 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11500 if (key.objectid != bytenr ||
11501 key.type != BTRFS_EXTENT_ITEM_KEY)
11504 btrfs_release_path(&path);
11506 key.objectid = root_id;
11507 key.type = BTRFS_ROOT_ITEM_KEY;
11508 key.offset = (u64)-1;
11509 btrfs_init_path(&path);
11511 root = btrfs_read_fs_root(fs_info, &key);
11515 key.objectid = objectid;
11516 key.type = BTRFS_EXTENT_DATA_KEY;
11518 * It can be nasty as data backref offset is
11519 * file offset - file extent offset, which is smaller or
11520 * equal to original backref offset. The only special case is
11521 * overflow. So we need to special check and do further search.
11523 key.offset = offset & (1ULL << 63) ? 0 : offset;
11525 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11530 * Search afterwards to get correct one
11531 * NOTE: As we must do a comprehensive check on the data backref to
11532 * make sure the dref count also matches, we must iterate all file
11533 * extents for that inode.
11536 leaf = path.nodes[0];
11537 slot = path.slots[0];
11539 if (slot >= btrfs_header_nritems(leaf))
11541 btrfs_item_key_to_cpu(leaf, &key, slot);
11542 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11544 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11546 * Except normal disk bytenr and disk num bytes, we still
11547 * need to do extra check on dbackref offset as
11548 * dbackref offset = file_offset - file_extent_offset
11550 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11551 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11552 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11557 ret = btrfs_next_item(root, &path);
11562 btrfs_release_path(&path);
11563 if (found_count != count) {
11565 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11566 bytenr, len, root_id, objectid, offset, count, found_count);
11567 return REFERENCER_MISSING;
11573 * Check if the referencer of a shared data backref exists
11575 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11576 u64 parent, u64 bytenr)
11578 struct extent_buffer *eb;
11579 struct btrfs_key key;
11580 struct btrfs_file_extent_item *fi;
11582 int found_parent = 0;
11585 eb = read_tree_block(fs_info, parent, 0);
11586 if (!extent_buffer_uptodate(eb))
11589 nr = btrfs_header_nritems(eb);
11590 for (i = 0; i < nr; i++) {
11591 btrfs_item_key_to_cpu(eb, &key, i);
11592 if (key.type != BTRFS_EXTENT_DATA_KEY)
11595 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11596 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11599 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11606 free_extent_buffer(eb);
11607 if (!found_parent) {
11608 error("shared extent %llu referencer lost (parent: %llu)",
11610 return REFERENCER_MISSING;
11616 * This function will check a given extent item, including its backref and
11617 * itself (like crossing stripe boundary and type)
11619 * Since we don't use extent_record anymore, introduce new error bit
11621 static int check_extent_item(struct btrfs_fs_info *fs_info,
11622 struct extent_buffer *eb, int slot)
11624 struct btrfs_extent_item *ei;
11625 struct btrfs_extent_inline_ref *iref;
11626 struct btrfs_extent_data_ref *dref;
11630 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11631 u32 item_size = btrfs_item_size_nr(eb, slot);
11636 struct btrfs_key key;
11640 btrfs_item_key_to_cpu(eb, &key, slot);
11641 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11642 bytes_used += key.offset;
11644 bytes_used += nodesize;
11646 if (item_size < sizeof(*ei)) {
11648 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11649 * old thing when on disk format is still un-determined.
11650 * No need to care about it anymore
11652 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11656 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11657 flags = btrfs_extent_flags(eb, ei);
11659 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11661 if (metadata && check_crossing_stripes(global_info, key.objectid,
11663 error("bad metadata [%llu, %llu) crossing stripe boundary",
11664 key.objectid, key.objectid + nodesize);
11665 err |= CROSSING_STRIPE_BOUNDARY;
11668 ptr = (unsigned long)(ei + 1);
11670 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11671 /* Old EXTENT_ITEM metadata */
11672 struct btrfs_tree_block_info *info;
11674 info = (struct btrfs_tree_block_info *)ptr;
11675 level = btrfs_tree_block_level(eb, info);
11676 ptr += sizeof(struct btrfs_tree_block_info);
11678 /* New METADATA_ITEM */
11679 level = key.offset;
11681 end = (unsigned long)ei + item_size;
11684 /* Reached extent item end normally */
11688 /* Beyond extent item end, wrong item size */
11690 err |= ITEM_SIZE_MISMATCH;
11691 error("extent item at bytenr %llu slot %d has wrong size",
11696 /* Now check every backref in this extent item */
11697 iref = (struct btrfs_extent_inline_ref *)ptr;
11698 type = btrfs_extent_inline_ref_type(eb, iref);
11699 offset = btrfs_extent_inline_ref_offset(eb, iref);
11701 case BTRFS_TREE_BLOCK_REF_KEY:
11702 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11706 case BTRFS_SHARED_BLOCK_REF_KEY:
11707 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11711 case BTRFS_EXTENT_DATA_REF_KEY:
11712 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11713 ret = check_extent_data_backref(fs_info,
11714 btrfs_extent_data_ref_root(eb, dref),
11715 btrfs_extent_data_ref_objectid(eb, dref),
11716 btrfs_extent_data_ref_offset(eb, dref),
11717 key.objectid, key.offset,
11718 btrfs_extent_data_ref_count(eb, dref));
11721 case BTRFS_SHARED_DATA_REF_KEY:
11722 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11726 error("extent[%llu %d %llu] has unknown ref type: %d",
11727 key.objectid, key.type, key.offset, type);
11728 err |= UNKNOWN_TYPE;
11732 ptr += btrfs_extent_inline_ref_size(type);
11740 * Check if a dev extent item is referred correctly by its chunk
11742 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11743 struct extent_buffer *eb, int slot)
11745 struct btrfs_root *chunk_root = fs_info->chunk_root;
11746 struct btrfs_dev_extent *ptr;
11747 struct btrfs_path path;
11748 struct btrfs_key chunk_key;
11749 struct btrfs_key devext_key;
11750 struct btrfs_chunk *chunk;
11751 struct extent_buffer *l;
11755 int found_chunk = 0;
11758 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11759 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11760 length = btrfs_dev_extent_length(eb, ptr);
11762 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11763 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11764 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11766 btrfs_init_path(&path);
11767 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11772 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11773 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11778 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11781 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11782 for (i = 0; i < num_stripes; i++) {
11783 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11784 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11786 if (devid == devext_key.objectid &&
11787 offset == devext_key.offset) {
11793 btrfs_release_path(&path);
11794 if (!found_chunk) {
11796 "device extent[%llu, %llu, %llu] did not find the related chunk",
11797 devext_key.objectid, devext_key.offset, length);
11798 return REFERENCER_MISSING;
11804 * Check if the used space is correct with the dev item
11806 static int check_dev_item(struct btrfs_fs_info *fs_info,
11807 struct extent_buffer *eb, int slot)
11809 struct btrfs_root *dev_root = fs_info->dev_root;
11810 struct btrfs_dev_item *dev_item;
11811 struct btrfs_path path;
11812 struct btrfs_key key;
11813 struct btrfs_dev_extent *ptr;
11819 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11820 dev_id = btrfs_device_id(eb, dev_item);
11821 used = btrfs_device_bytes_used(eb, dev_item);
11823 key.objectid = dev_id;
11824 key.type = BTRFS_DEV_EXTENT_KEY;
11827 btrfs_init_path(&path);
11828 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11830 btrfs_item_key_to_cpu(eb, &key, slot);
11831 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11832 key.objectid, key.type, key.offset);
11833 btrfs_release_path(&path);
11834 return REFERENCER_MISSING;
11837 /* Iterate dev_extents to calculate the used space of a device */
11839 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11842 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11843 if (key.objectid > dev_id)
11845 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11848 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11849 struct btrfs_dev_extent);
11850 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11852 ret = btrfs_next_item(dev_root, &path);
11856 btrfs_release_path(&path);
11858 if (used != total) {
11859 btrfs_item_key_to_cpu(eb, &key, slot);
11861 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11862 total, used, BTRFS_ROOT_TREE_OBJECTID,
11863 BTRFS_DEV_EXTENT_KEY, dev_id);
11864 return ACCOUNTING_MISMATCH;
11870 * Check a block group item with its referener (chunk) and its used space
11871 * with extent/metadata item
11873 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11874 struct extent_buffer *eb, int slot)
11876 struct btrfs_root *extent_root = fs_info->extent_root;
11877 struct btrfs_root *chunk_root = fs_info->chunk_root;
11878 struct btrfs_block_group_item *bi;
11879 struct btrfs_block_group_item bg_item;
11880 struct btrfs_path path;
11881 struct btrfs_key bg_key;
11882 struct btrfs_key chunk_key;
11883 struct btrfs_key extent_key;
11884 struct btrfs_chunk *chunk;
11885 struct extent_buffer *leaf;
11886 struct btrfs_extent_item *ei;
11887 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11895 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11896 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11897 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11898 used = btrfs_block_group_used(&bg_item);
11899 bg_flags = btrfs_block_group_flags(&bg_item);
11901 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11902 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11903 chunk_key.offset = bg_key.objectid;
11905 btrfs_init_path(&path);
11906 /* Search for the referencer chunk */
11907 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11910 "block group[%llu %llu] did not find the related chunk item",
11911 bg_key.objectid, bg_key.offset);
11912 err |= REFERENCER_MISSING;
11914 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11915 struct btrfs_chunk);
11916 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11919 "block group[%llu %llu] related chunk item length does not match",
11920 bg_key.objectid, bg_key.offset);
11921 err |= REFERENCER_MISMATCH;
11924 btrfs_release_path(&path);
11926 /* Search from the block group bytenr */
11927 extent_key.objectid = bg_key.objectid;
11928 extent_key.type = 0;
11929 extent_key.offset = 0;
11931 btrfs_init_path(&path);
11932 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11936 /* Iterate extent tree to account used space */
11938 leaf = path.nodes[0];
11940 /* Search slot can point to the last item beyond leaf nritems */
11941 if (path.slots[0] >= btrfs_header_nritems(leaf))
11944 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11945 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11948 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11949 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11951 if (extent_key.objectid < bg_key.objectid)
11954 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11957 total += extent_key.offset;
11959 ei = btrfs_item_ptr(leaf, path.slots[0],
11960 struct btrfs_extent_item);
11961 flags = btrfs_extent_flags(leaf, ei);
11962 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11963 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11965 "bad extent[%llu, %llu) type mismatch with chunk",
11966 extent_key.objectid,
11967 extent_key.objectid + extent_key.offset);
11968 err |= CHUNK_TYPE_MISMATCH;
11970 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11971 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11972 BTRFS_BLOCK_GROUP_METADATA))) {
11974 "bad extent[%llu, %llu) type mismatch with chunk",
11975 extent_key.objectid,
11976 extent_key.objectid + nodesize);
11977 err |= CHUNK_TYPE_MISMATCH;
11981 ret = btrfs_next_item(extent_root, &path);
11987 btrfs_release_path(&path);
11989 if (total != used) {
11991 "block group[%llu %llu] used %llu but extent items used %llu",
11992 bg_key.objectid, bg_key.offset, used, total);
11993 err |= ACCOUNTING_MISMATCH;
11999 * Check a chunk item.
12000 * Including checking all referred dev_extents and block group
12002 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12003 struct extent_buffer *eb, int slot)
12005 struct btrfs_root *extent_root = fs_info->extent_root;
12006 struct btrfs_root *dev_root = fs_info->dev_root;
12007 struct btrfs_path path;
12008 struct btrfs_key chunk_key;
12009 struct btrfs_key bg_key;
12010 struct btrfs_key devext_key;
12011 struct btrfs_chunk *chunk;
12012 struct extent_buffer *leaf;
12013 struct btrfs_block_group_item *bi;
12014 struct btrfs_block_group_item bg_item;
12015 struct btrfs_dev_extent *ptr;
12027 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12028 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12029 length = btrfs_chunk_length(eb, chunk);
12030 chunk_end = chunk_key.offset + length;
12031 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12034 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12036 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12039 type = btrfs_chunk_type(eb, chunk);
12041 bg_key.objectid = chunk_key.offset;
12042 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12043 bg_key.offset = length;
12045 btrfs_init_path(&path);
12046 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12049 "chunk[%llu %llu) did not find the related block group item",
12050 chunk_key.offset, chunk_end);
12051 err |= REFERENCER_MISSING;
12053 leaf = path.nodes[0];
12054 bi = btrfs_item_ptr(leaf, path.slots[0],
12055 struct btrfs_block_group_item);
12056 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12058 if (btrfs_block_group_flags(&bg_item) != type) {
12060 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12061 chunk_key.offset, chunk_end, type,
12062 btrfs_block_group_flags(&bg_item));
12063 err |= REFERENCER_MISSING;
12067 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12068 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12069 for (i = 0; i < num_stripes; i++) {
12070 btrfs_release_path(&path);
12071 btrfs_init_path(&path);
12072 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12073 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12074 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12076 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12079 goto not_match_dev;
12081 leaf = path.nodes[0];
12082 ptr = btrfs_item_ptr(leaf, path.slots[0],
12083 struct btrfs_dev_extent);
12084 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12085 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12086 if (objectid != chunk_key.objectid ||
12087 offset != chunk_key.offset ||
12088 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12089 goto not_match_dev;
12092 err |= BACKREF_MISSING;
12094 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12095 chunk_key.objectid, chunk_end, i);
12098 btrfs_release_path(&path);
12104 * Main entry function to check known items and update related accounting info
12106 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
12108 struct btrfs_fs_info *fs_info = root->fs_info;
12109 struct btrfs_key key;
12112 struct btrfs_extent_data_ref *dref;
12117 btrfs_item_key_to_cpu(eb, &key, slot);
12121 case BTRFS_EXTENT_DATA_KEY:
12122 ret = check_extent_data_item(root, eb, slot);
12125 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12126 ret = check_block_group_item(fs_info, eb, slot);
12129 case BTRFS_DEV_ITEM_KEY:
12130 ret = check_dev_item(fs_info, eb, slot);
12133 case BTRFS_CHUNK_ITEM_KEY:
12134 ret = check_chunk_item(fs_info, eb, slot);
12137 case BTRFS_DEV_EXTENT_KEY:
12138 ret = check_dev_extent_item(fs_info, eb, slot);
12141 case BTRFS_EXTENT_ITEM_KEY:
12142 case BTRFS_METADATA_ITEM_KEY:
12143 ret = check_extent_item(fs_info, eb, slot);
12146 case BTRFS_EXTENT_CSUM_KEY:
12147 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12149 case BTRFS_TREE_BLOCK_REF_KEY:
12150 ret = check_tree_block_backref(fs_info, key.offset,
12154 case BTRFS_EXTENT_DATA_REF_KEY:
12155 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12156 ret = check_extent_data_backref(fs_info,
12157 btrfs_extent_data_ref_root(eb, dref),
12158 btrfs_extent_data_ref_objectid(eb, dref),
12159 btrfs_extent_data_ref_offset(eb, dref),
12161 btrfs_extent_data_ref_count(eb, dref));
12164 case BTRFS_SHARED_BLOCK_REF_KEY:
12165 ret = check_shared_block_backref(fs_info, key.offset,
12169 case BTRFS_SHARED_DATA_REF_KEY:
12170 ret = check_shared_data_backref(fs_info, key.offset,
12178 if (++slot < btrfs_header_nritems(eb))
12185 * Helper function for later fs/subvol tree check. To determine if a tree
12186 * block should be checked.
12187 * This function will ensure only the direct referencer with lowest rootid to
12188 * check a fs/subvolume tree block.
12190 * Backref check at extent tree would detect errors like missing subvolume
12191 * tree, so we can do aggressive check to reduce duplicated checks.
12193 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12195 struct btrfs_root *extent_root = root->fs_info->extent_root;
12196 struct btrfs_key key;
12197 struct btrfs_path path;
12198 struct extent_buffer *leaf;
12200 struct btrfs_extent_item *ei;
12206 struct btrfs_extent_inline_ref *iref;
12209 btrfs_init_path(&path);
12210 key.objectid = btrfs_header_bytenr(eb);
12211 key.type = BTRFS_METADATA_ITEM_KEY;
12212 key.offset = (u64)-1;
12215 * Any failure in backref resolving means we can't determine
12216 * whom the tree block belongs to.
12217 * So in that case, we need to check that tree block
12219 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12223 ret = btrfs_previous_extent_item(extent_root, &path,
12224 btrfs_header_bytenr(eb));
12228 leaf = path.nodes[0];
12229 slot = path.slots[0];
12230 btrfs_item_key_to_cpu(leaf, &key, slot);
12231 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12233 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12234 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12236 struct btrfs_tree_block_info *info;
12238 info = (struct btrfs_tree_block_info *)(ei + 1);
12239 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12242 item_size = btrfs_item_size_nr(leaf, slot);
12243 ptr = (unsigned long)iref;
12244 end = (unsigned long)ei + item_size;
12245 while (ptr < end) {
12246 iref = (struct btrfs_extent_inline_ref *)ptr;
12247 type = btrfs_extent_inline_ref_type(leaf, iref);
12248 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12251 * We only check the tree block if current root is
12252 * the lowest referencer of it.
12254 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12255 offset < root->objectid) {
12256 btrfs_release_path(&path);
12260 ptr += btrfs_extent_inline_ref_size(type);
12263 * Normally we should also check keyed tree block ref, but that may be
12264 * very time consuming. Inlined ref should already make us skip a lot
12265 * of refs now. So skip search keyed tree block ref.
12269 btrfs_release_path(&path);
12274 * Traversal function for tree block. We will do:
12275 * 1) Skip shared fs/subvolume tree blocks
12276 * 2) Update related bytes accounting
12277 * 3) Pre-order traversal
12279 static int traverse_tree_block(struct btrfs_root *root,
12280 struct extent_buffer *node)
12282 struct extent_buffer *eb;
12283 struct btrfs_key key;
12284 struct btrfs_key drop_key;
12292 * Skip shared fs/subvolume tree block, in that case they will
12293 * be checked by referencer with lowest rootid
12295 if (is_fstree(root->objectid) && !should_check(root, node))
12298 /* Update bytes accounting */
12299 total_btree_bytes += node->len;
12300 if (fs_root_objectid(btrfs_header_owner(node)))
12301 total_fs_tree_bytes += node->len;
12302 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12303 total_extent_tree_bytes += node->len;
12305 /* pre-order tranversal, check itself first */
12306 level = btrfs_header_level(node);
12307 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12308 btrfs_header_level(node),
12309 btrfs_header_owner(node));
12313 "check %s failed root %llu bytenr %llu level %d, force continue check",
12314 level ? "node":"leaf", root->objectid,
12315 btrfs_header_bytenr(node), btrfs_header_level(node));
12318 btree_space_waste += btrfs_leaf_free_space(root, node);
12319 ret = check_leaf_items(root, node);
12324 nr = btrfs_header_nritems(node);
12325 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12326 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12327 sizeof(struct btrfs_key_ptr);
12329 /* Then check all its children */
12330 for (i = 0; i < nr; i++) {
12331 u64 blocknr = btrfs_node_blockptr(node, i);
12333 btrfs_node_key_to_cpu(node, &key, i);
12334 if (level == root->root_item.drop_level &&
12335 is_dropped_key(&key, &drop_key))
12339 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12340 * to call the function itself.
12342 eb = read_tree_block(root->fs_info, blocknr, 0);
12343 if (extent_buffer_uptodate(eb)) {
12344 ret = traverse_tree_block(root, eb);
12347 free_extent_buffer(eb);
12354 * Low memory usage version check_chunks_and_extents.
12356 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12358 struct btrfs_path path;
12359 struct btrfs_key key;
12360 struct btrfs_root *root1;
12361 struct btrfs_root *root;
12362 struct btrfs_root *cur_root;
12366 root = fs_info->fs_root;
12368 root1 = root->fs_info->chunk_root;
12369 ret = traverse_tree_block(root1, root1->node);
12372 root1 = root->fs_info->tree_root;
12373 ret = traverse_tree_block(root1, root1->node);
12376 btrfs_init_path(&path);
12377 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12379 key.type = BTRFS_ROOT_ITEM_KEY;
12381 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12383 error("cannot find extent treet in tree_root");
12388 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12389 if (key.type != BTRFS_ROOT_ITEM_KEY)
12391 key.offset = (u64)-1;
12393 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12394 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12397 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12398 if (IS_ERR(cur_root) || !cur_root) {
12399 error("failed to read tree: %lld", key.objectid);
12403 ret = traverse_tree_block(cur_root, cur_root->node);
12406 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12407 btrfs_free_fs_root(cur_root);
12409 ret = btrfs_next_item(root1, &path);
12415 btrfs_release_path(&path);
12419 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12423 if (!ctx.progress_enabled)
12424 fprintf(stderr, "checking extents\n");
12425 if (check_mode == CHECK_MODE_LOWMEM)
12426 ret = check_chunks_and_extents_v2(fs_info);
12428 ret = check_chunks_and_extents(fs_info);
12433 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12434 struct btrfs_root *root, int overwrite)
12436 struct extent_buffer *c;
12437 struct extent_buffer *old = root->node;
12440 struct btrfs_disk_key disk_key = {0,0,0};
12446 extent_buffer_get(c);
12449 c = btrfs_alloc_free_block(trans, root,
12450 root->fs_info->nodesize,
12451 root->root_key.objectid,
12452 &disk_key, level, 0, 0);
12455 extent_buffer_get(c);
12459 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12460 btrfs_set_header_level(c, level);
12461 btrfs_set_header_bytenr(c, c->start);
12462 btrfs_set_header_generation(c, trans->transid);
12463 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12464 btrfs_set_header_owner(c, root->root_key.objectid);
12466 write_extent_buffer(c, root->fs_info->fsid,
12467 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12469 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12470 btrfs_header_chunk_tree_uuid(c),
12473 btrfs_mark_buffer_dirty(c);
12475 * this case can happen in the following case:
12477 * 1.overwrite previous root.
12479 * 2.reinit reloc data root, this is because we skip pin
12480 * down reloc data tree before which means we can allocate
12481 * same block bytenr here.
12483 if (old->start == c->start) {
12484 btrfs_set_root_generation(&root->root_item,
12486 root->root_item.level = btrfs_header_level(root->node);
12487 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12488 &root->root_key, &root->root_item);
12490 free_extent_buffer(c);
12494 free_extent_buffer(old);
12496 add_root_to_dirty_list(root);
12500 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12501 struct extent_buffer *eb, int tree_root)
12503 struct extent_buffer *tmp;
12504 struct btrfs_root_item *ri;
12505 struct btrfs_key key;
12507 int level = btrfs_header_level(eb);
12513 * If we have pinned this block before, don't pin it again.
12514 * This can not only avoid forever loop with broken filesystem
12515 * but also give us some speedups.
12517 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12518 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12521 btrfs_pin_extent(fs_info, eb->start, eb->len);
12523 nritems = btrfs_header_nritems(eb);
12524 for (i = 0; i < nritems; i++) {
12526 btrfs_item_key_to_cpu(eb, &key, i);
12527 if (key.type != BTRFS_ROOT_ITEM_KEY)
12529 /* Skip the extent root and reloc roots */
12530 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12531 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12532 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12534 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12535 bytenr = btrfs_disk_root_bytenr(eb, ri);
12538 * If at any point we start needing the real root we
12539 * will have to build a stump root for the root we are
12540 * in, but for now this doesn't actually use the root so
12541 * just pass in extent_root.
12543 tmp = read_tree_block(fs_info, bytenr, 0);
12544 if (!extent_buffer_uptodate(tmp)) {
12545 fprintf(stderr, "Error reading root block\n");
12548 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12549 free_extent_buffer(tmp);
12553 bytenr = btrfs_node_blockptr(eb, i);
12555 /* If we aren't the tree root don't read the block */
12556 if (level == 1 && !tree_root) {
12557 btrfs_pin_extent(fs_info, bytenr,
12558 fs_info->nodesize);
12562 tmp = read_tree_block(fs_info, bytenr, 0);
12563 if (!extent_buffer_uptodate(tmp)) {
12564 fprintf(stderr, "Error reading tree block\n");
12567 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12568 free_extent_buffer(tmp);
12577 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12581 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12585 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12588 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12590 struct btrfs_block_group_cache *cache;
12591 struct btrfs_path path;
12592 struct extent_buffer *leaf;
12593 struct btrfs_chunk *chunk;
12594 struct btrfs_key key;
12598 btrfs_init_path(&path);
12600 key.type = BTRFS_CHUNK_ITEM_KEY;
12602 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12604 btrfs_release_path(&path);
12609 * We do this in case the block groups were screwed up and had alloc
12610 * bits that aren't actually set on the chunks. This happens with
12611 * restored images every time and could happen in real life I guess.
12613 fs_info->avail_data_alloc_bits = 0;
12614 fs_info->avail_metadata_alloc_bits = 0;
12615 fs_info->avail_system_alloc_bits = 0;
12617 /* First we need to create the in-memory block groups */
12619 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12620 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12622 btrfs_release_path(&path);
12630 leaf = path.nodes[0];
12631 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12632 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12637 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12638 btrfs_add_block_group(fs_info, 0,
12639 btrfs_chunk_type(leaf, chunk),
12640 key.objectid, key.offset,
12641 btrfs_chunk_length(leaf, chunk));
12642 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12643 key.offset + btrfs_chunk_length(leaf, chunk));
12648 cache = btrfs_lookup_first_block_group(fs_info, start);
12652 start = cache->key.objectid + cache->key.offset;
12655 btrfs_release_path(&path);
12659 static int reset_balance(struct btrfs_trans_handle *trans,
12660 struct btrfs_fs_info *fs_info)
12662 struct btrfs_root *root = fs_info->tree_root;
12663 struct btrfs_path path;
12664 struct extent_buffer *leaf;
12665 struct btrfs_key key;
12666 int del_slot, del_nr = 0;
12670 btrfs_init_path(&path);
12671 key.objectid = BTRFS_BALANCE_OBJECTID;
12672 key.type = BTRFS_BALANCE_ITEM_KEY;
12674 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12679 goto reinit_data_reloc;
12684 ret = btrfs_del_item(trans, root, &path);
12687 btrfs_release_path(&path);
12689 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12690 key.type = BTRFS_ROOT_ITEM_KEY;
12692 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12696 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12701 ret = btrfs_del_items(trans, root, &path,
12708 btrfs_release_path(&path);
12711 ret = btrfs_search_slot(trans, root, &key, &path,
12718 leaf = path.nodes[0];
12719 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12720 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12722 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12727 del_slot = path.slots[0];
12736 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12740 btrfs_release_path(&path);
12743 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12744 key.type = BTRFS_ROOT_ITEM_KEY;
12745 key.offset = (u64)-1;
12746 root = btrfs_read_fs_root(fs_info, &key);
12747 if (IS_ERR(root)) {
12748 fprintf(stderr, "Error reading data reloc tree\n");
12749 ret = PTR_ERR(root);
12752 record_root_in_trans(trans, root);
12753 ret = btrfs_fsck_reinit_root(trans, root, 0);
12756 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12758 btrfs_release_path(&path);
12762 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12763 struct btrfs_fs_info *fs_info)
12769 * The only reason we don't do this is because right now we're just
12770 * walking the trees we find and pinning down their bytes, we don't look
12771 * at any of the leaves. In order to do mixed groups we'd have to check
12772 * the leaves of any fs roots and pin down the bytes for any file
12773 * extents we find. Not hard but why do it if we don't have to?
12775 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12776 fprintf(stderr, "We don't support re-initing the extent tree "
12777 "for mixed block groups yet, please notify a btrfs "
12778 "developer you want to do this so they can add this "
12779 "functionality.\n");
12784 * first we need to walk all of the trees except the extent tree and pin
12785 * down the bytes that are in use so we don't overwrite any existing
12788 ret = pin_metadata_blocks(fs_info);
12790 fprintf(stderr, "error pinning down used bytes\n");
12795 * Need to drop all the block groups since we're going to recreate all
12798 btrfs_free_block_groups(fs_info);
12799 ret = reset_block_groups(fs_info);
12801 fprintf(stderr, "error resetting the block groups\n");
12805 /* Ok we can allocate now, reinit the extent root */
12806 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12808 fprintf(stderr, "extent root initialization failed\n");
12810 * When the transaction code is updated we should end the
12811 * transaction, but for now progs only knows about commit so
12812 * just return an error.
12818 * Now we have all the in-memory block groups setup so we can make
12819 * allocations properly, and the metadata we care about is safe since we
12820 * pinned all of it above.
12823 struct btrfs_block_group_cache *cache;
12825 cache = btrfs_lookup_first_block_group(fs_info, start);
12828 start = cache->key.objectid + cache->key.offset;
12829 ret = btrfs_insert_item(trans, fs_info->extent_root,
12830 &cache->key, &cache->item,
12831 sizeof(cache->item));
12833 fprintf(stderr, "Error adding block group\n");
12836 btrfs_extent_post_op(trans, fs_info->extent_root);
12839 ret = reset_balance(trans, fs_info);
12841 fprintf(stderr, "error resetting the pending balance\n");
12846 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12848 struct btrfs_path path;
12849 struct btrfs_trans_handle *trans;
12850 struct btrfs_key key;
12853 printf("Recowing metadata block %llu\n", eb->start);
12854 key.objectid = btrfs_header_owner(eb);
12855 key.type = BTRFS_ROOT_ITEM_KEY;
12856 key.offset = (u64)-1;
12858 root = btrfs_read_fs_root(root->fs_info, &key);
12859 if (IS_ERR(root)) {
12860 fprintf(stderr, "Couldn't find owner root %llu\n",
12862 return PTR_ERR(root);
12865 trans = btrfs_start_transaction(root, 1);
12867 return PTR_ERR(trans);
12869 btrfs_init_path(&path);
12870 path.lowest_level = btrfs_header_level(eb);
12871 if (path.lowest_level)
12872 btrfs_node_key_to_cpu(eb, &key, 0);
12874 btrfs_item_key_to_cpu(eb, &key, 0);
12876 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12877 btrfs_commit_transaction(trans, root);
12878 btrfs_release_path(&path);
12882 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12884 struct btrfs_path path;
12885 struct btrfs_trans_handle *trans;
12886 struct btrfs_key key;
12889 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12890 bad->key.type, bad->key.offset);
12891 key.objectid = bad->root_id;
12892 key.type = BTRFS_ROOT_ITEM_KEY;
12893 key.offset = (u64)-1;
12895 root = btrfs_read_fs_root(root->fs_info, &key);
12896 if (IS_ERR(root)) {
12897 fprintf(stderr, "Couldn't find owner root %llu\n",
12899 return PTR_ERR(root);
12902 trans = btrfs_start_transaction(root, 1);
12904 return PTR_ERR(trans);
12906 btrfs_init_path(&path);
12907 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12913 ret = btrfs_del_item(trans, root, &path);
12915 btrfs_commit_transaction(trans, root);
12916 btrfs_release_path(&path);
12920 static int zero_log_tree(struct btrfs_root *root)
12922 struct btrfs_trans_handle *trans;
12925 trans = btrfs_start_transaction(root, 1);
12926 if (IS_ERR(trans)) {
12927 ret = PTR_ERR(trans);
12930 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12931 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12932 ret = btrfs_commit_transaction(trans, root);
12936 static int populate_csum(struct btrfs_trans_handle *trans,
12937 struct btrfs_root *csum_root, char *buf, u64 start,
12940 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12945 while (offset < len) {
12946 sectorsize = fs_info->sectorsize;
12947 ret = read_extent_data(fs_info, buf, start + offset,
12951 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12952 start + offset, buf, sectorsize);
12955 offset += sectorsize;
12960 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12961 struct btrfs_root *csum_root,
12962 struct btrfs_root *cur_root)
12964 struct btrfs_path path;
12965 struct btrfs_key key;
12966 struct extent_buffer *node;
12967 struct btrfs_file_extent_item *fi;
12974 buf = malloc(cur_root->fs_info->sectorsize);
12978 btrfs_init_path(&path);
12982 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12985 /* Iterate all regular file extents and fill its csum */
12987 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12989 if (key.type != BTRFS_EXTENT_DATA_KEY)
12991 node = path.nodes[0];
12992 slot = path.slots[0];
12993 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12994 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12996 start = btrfs_file_extent_disk_bytenr(node, fi);
12997 len = btrfs_file_extent_disk_num_bytes(node, fi);
12999 ret = populate_csum(trans, csum_root, buf, start, len);
13000 if (ret == -EEXIST)
13006 * TODO: if next leaf is corrupted, jump to nearest next valid
13009 ret = btrfs_next_item(cur_root, &path);
13019 btrfs_release_path(&path);
13024 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13025 struct btrfs_root *csum_root)
13027 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13028 struct btrfs_path path;
13029 struct btrfs_root *tree_root = fs_info->tree_root;
13030 struct btrfs_root *cur_root;
13031 struct extent_buffer *node;
13032 struct btrfs_key key;
13036 btrfs_init_path(&path);
13037 key.objectid = BTRFS_FS_TREE_OBJECTID;
13039 key.type = BTRFS_ROOT_ITEM_KEY;
13040 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13049 node = path.nodes[0];
13050 slot = path.slots[0];
13051 btrfs_item_key_to_cpu(node, &key, slot);
13052 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13054 if (key.type != BTRFS_ROOT_ITEM_KEY)
13056 if (!is_fstree(key.objectid))
13058 key.offset = (u64)-1;
13060 cur_root = btrfs_read_fs_root(fs_info, &key);
13061 if (IS_ERR(cur_root) || !cur_root) {
13062 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13066 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13071 ret = btrfs_next_item(tree_root, &path);
13081 btrfs_release_path(&path);
13085 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13086 struct btrfs_root *csum_root)
13088 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13089 struct btrfs_path path;
13090 struct btrfs_extent_item *ei;
13091 struct extent_buffer *leaf;
13093 struct btrfs_key key;
13096 btrfs_init_path(&path);
13098 key.type = BTRFS_EXTENT_ITEM_KEY;
13100 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13102 btrfs_release_path(&path);
13106 buf = malloc(csum_root->fs_info->sectorsize);
13108 btrfs_release_path(&path);
13113 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13114 ret = btrfs_next_leaf(extent_root, &path);
13122 leaf = path.nodes[0];
13124 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13125 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13130 ei = btrfs_item_ptr(leaf, path.slots[0],
13131 struct btrfs_extent_item);
13132 if (!(btrfs_extent_flags(leaf, ei) &
13133 BTRFS_EXTENT_FLAG_DATA)) {
13138 ret = populate_csum(trans, csum_root, buf, key.objectid,
13145 btrfs_release_path(&path);
13151 * Recalculate the csum and put it into the csum tree.
13153 * Extent tree init will wipe out all the extent info, so in that case, we
13154 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13155 * will use fs/subvol trees to init the csum tree.
13157 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13158 struct btrfs_root *csum_root,
13159 int search_fs_tree)
13161 if (search_fs_tree)
13162 return fill_csum_tree_from_fs(trans, csum_root);
13164 return fill_csum_tree_from_extent(trans, csum_root);
13167 static void free_roots_info_cache(void)
13169 if (!roots_info_cache)
13172 while (!cache_tree_empty(roots_info_cache)) {
13173 struct cache_extent *entry;
13174 struct root_item_info *rii;
13176 entry = first_cache_extent(roots_info_cache);
13179 remove_cache_extent(roots_info_cache, entry);
13180 rii = container_of(entry, struct root_item_info, cache_extent);
13184 free(roots_info_cache);
13185 roots_info_cache = NULL;
13188 static int build_roots_info_cache(struct btrfs_fs_info *info)
13191 struct btrfs_key key;
13192 struct extent_buffer *leaf;
13193 struct btrfs_path path;
13195 if (!roots_info_cache) {
13196 roots_info_cache = malloc(sizeof(*roots_info_cache));
13197 if (!roots_info_cache)
13199 cache_tree_init(roots_info_cache);
13202 btrfs_init_path(&path);
13204 key.type = BTRFS_EXTENT_ITEM_KEY;
13206 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13209 leaf = path.nodes[0];
13212 struct btrfs_key found_key;
13213 struct btrfs_extent_item *ei;
13214 struct btrfs_extent_inline_ref *iref;
13215 int slot = path.slots[0];
13220 struct cache_extent *entry;
13221 struct root_item_info *rii;
13223 if (slot >= btrfs_header_nritems(leaf)) {
13224 ret = btrfs_next_leaf(info->extent_root, &path);
13231 leaf = path.nodes[0];
13232 slot = path.slots[0];
13235 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13237 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13238 found_key.type != BTRFS_METADATA_ITEM_KEY)
13241 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13242 flags = btrfs_extent_flags(leaf, ei);
13244 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13245 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13248 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13249 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13250 level = found_key.offset;
13252 struct btrfs_tree_block_info *binfo;
13254 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13255 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13256 level = btrfs_tree_block_level(leaf, binfo);
13260 * For a root extent, it must be of the following type and the
13261 * first (and only one) iref in the item.
13263 type = btrfs_extent_inline_ref_type(leaf, iref);
13264 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13267 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13268 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13270 rii = malloc(sizeof(struct root_item_info));
13275 rii->cache_extent.start = root_id;
13276 rii->cache_extent.size = 1;
13277 rii->level = (u8)-1;
13278 entry = &rii->cache_extent;
13279 ret = insert_cache_extent(roots_info_cache, entry);
13282 rii = container_of(entry, struct root_item_info,
13286 ASSERT(rii->cache_extent.start == root_id);
13287 ASSERT(rii->cache_extent.size == 1);
13289 if (level > rii->level || rii->level == (u8)-1) {
13290 rii->level = level;
13291 rii->bytenr = found_key.objectid;
13292 rii->gen = btrfs_extent_generation(leaf, ei);
13293 rii->node_count = 1;
13294 } else if (level == rii->level) {
13302 btrfs_release_path(&path);
13307 static int maybe_repair_root_item(struct btrfs_path *path,
13308 const struct btrfs_key *root_key,
13309 const int read_only_mode)
13311 const u64 root_id = root_key->objectid;
13312 struct cache_extent *entry;
13313 struct root_item_info *rii;
13314 struct btrfs_root_item ri;
13315 unsigned long offset;
13317 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13320 "Error: could not find extent items for root %llu\n",
13321 root_key->objectid);
13325 rii = container_of(entry, struct root_item_info, cache_extent);
13326 ASSERT(rii->cache_extent.start == root_id);
13327 ASSERT(rii->cache_extent.size == 1);
13329 if (rii->node_count != 1) {
13331 "Error: could not find btree root extent for root %llu\n",
13336 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13337 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13339 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13340 btrfs_root_level(&ri) != rii->level ||
13341 btrfs_root_generation(&ri) != rii->gen) {
13344 * If we're in repair mode but our caller told us to not update
13345 * the root item, i.e. just check if it needs to be updated, don't
13346 * print this message, since the caller will call us again shortly
13347 * for the same root item without read only mode (the caller will
13348 * open a transaction first).
13350 if (!(read_only_mode && repair))
13352 "%sroot item for root %llu,"
13353 " current bytenr %llu, current gen %llu, current level %u,"
13354 " new bytenr %llu, new gen %llu, new level %u\n",
13355 (read_only_mode ? "" : "fixing "),
13357 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13358 btrfs_root_level(&ri),
13359 rii->bytenr, rii->gen, rii->level);
13361 if (btrfs_root_generation(&ri) > rii->gen) {
13363 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13364 root_id, btrfs_root_generation(&ri), rii->gen);
13368 if (!read_only_mode) {
13369 btrfs_set_root_bytenr(&ri, rii->bytenr);
13370 btrfs_set_root_level(&ri, rii->level);
13371 btrfs_set_root_generation(&ri, rii->gen);
13372 write_extent_buffer(path->nodes[0], &ri,
13373 offset, sizeof(ri));
13383 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13384 * caused read-only snapshots to be corrupted if they were created at a moment
13385 * when the source subvolume/snapshot had orphan items. The issue was that the
13386 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13387 * node instead of the post orphan cleanup root node.
13388 * So this function, and its callees, just detects and fixes those cases. Even
13389 * though the regression was for read-only snapshots, this function applies to
13390 * any snapshot/subvolume root.
13391 * This must be run before any other repair code - not doing it so, makes other
13392 * repair code delete or modify backrefs in the extent tree for example, which
13393 * will result in an inconsistent fs after repairing the root items.
13395 static int repair_root_items(struct btrfs_fs_info *info)
13397 struct btrfs_path path;
13398 struct btrfs_key key;
13399 struct extent_buffer *leaf;
13400 struct btrfs_trans_handle *trans = NULL;
13403 int need_trans = 0;
13405 btrfs_init_path(&path);
13407 ret = build_roots_info_cache(info);
13411 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13412 key.type = BTRFS_ROOT_ITEM_KEY;
13417 * Avoid opening and committing transactions if a leaf doesn't have
13418 * any root items that need to be fixed, so that we avoid rotating
13419 * backup roots unnecessarily.
13422 trans = btrfs_start_transaction(info->tree_root, 1);
13423 if (IS_ERR(trans)) {
13424 ret = PTR_ERR(trans);
13429 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13433 leaf = path.nodes[0];
13436 struct btrfs_key found_key;
13438 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13439 int no_more_keys = find_next_key(&path, &key);
13441 btrfs_release_path(&path);
13443 ret = btrfs_commit_transaction(trans,
13455 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13457 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13459 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13462 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13466 if (!trans && repair) {
13469 btrfs_release_path(&path);
13479 free_roots_info_cache();
13480 btrfs_release_path(&path);
13482 btrfs_commit_transaction(trans, info->tree_root);
13489 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13491 struct btrfs_trans_handle *trans;
13492 struct btrfs_block_group_cache *bg_cache;
13496 /* Clear all free space cache inodes and its extent data */
13498 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13501 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13504 current = bg_cache->key.objectid + bg_cache->key.offset;
13507 /* Don't forget to set cache_generation to -1 */
13508 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13509 if (IS_ERR(trans)) {
13510 error("failed to update super block cache generation");
13511 return PTR_ERR(trans);
13513 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13514 btrfs_commit_transaction(trans, fs_info->tree_root);
13519 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13524 if (clear_version == 1) {
13525 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13527 "free space cache v2 detected, use --clear-space-cache v2");
13531 printf("Clearing free space cache\n");
13532 ret = clear_free_space_cache(fs_info);
13534 error("failed to clear free space cache");
13537 printf("Free space cache cleared\n");
13539 } else if (clear_version == 2) {
13540 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13541 printf("no free space cache v2 to clear\n");
13545 printf("Clear free space cache v2\n");
13546 ret = btrfs_clear_free_space_tree(fs_info);
13548 error("failed to clear free space cache v2: %d", ret);
13551 printf("free space cache v2 cleared\n");
13558 const char * const cmd_check_usage[] = {
13559 "btrfs check [options] <device>",
13560 "Check structural integrity of a filesystem (unmounted).",
13561 "Check structural integrity of an unmounted filesystem. Verify internal",
13562 "trees' consistency and item connectivity. In the repair mode try to",
13563 "fix the problems found. ",
13564 "WARNING: the repair mode is considered dangerous",
13566 "-s|--super <superblock> use this superblock copy",
13567 "-b|--backup use the first valid backup root copy",
13568 "--force skip mount checks, repair is not possible",
13569 "--repair try to repair the filesystem",
13570 "--readonly run in read-only mode (default)",
13571 "--init-csum-tree create a new CRC tree",
13572 "--init-extent-tree create a new extent tree",
13573 "--mode <MODE> allows choice of memory/IO trade-offs",
13574 " where MODE is one of:",
13575 " original - read inodes and extents to memory (requires",
13576 " more memory, does less IO)",
13577 " lowmem - try to use less memory but read blocks again",
13579 "--check-data-csum verify checksums of data blocks",
13580 "-Q|--qgroup-report print a report on qgroup consistency",
13581 "-E|--subvol-extents <subvolid>",
13582 " print subvolume extents and sharing state",
13583 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13584 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13585 "-p|--progress indicate progress",
13586 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13590 int cmd_check(int argc, char **argv)
13592 struct cache_tree root_cache;
13593 struct btrfs_root *root;
13594 struct btrfs_fs_info *info;
13597 u64 tree_root_bytenr = 0;
13598 u64 chunk_root_bytenr = 0;
13599 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13603 int init_csum_tree = 0;
13605 int clear_space_cache = 0;
13606 int qgroup_report = 0;
13607 int qgroups_repaired = 0;
13608 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13613 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13614 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13615 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13616 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13617 GETOPT_VAL_FORCE };
13618 static const struct option long_options[] = {
13619 { "super", required_argument, NULL, 's' },
13620 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13621 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13622 { "init-csum-tree", no_argument, NULL,
13623 GETOPT_VAL_INIT_CSUM },
13624 { "init-extent-tree", no_argument, NULL,
13625 GETOPT_VAL_INIT_EXTENT },
13626 { "check-data-csum", no_argument, NULL,
13627 GETOPT_VAL_CHECK_CSUM },
13628 { "backup", no_argument, NULL, 'b' },
13629 { "subvol-extents", required_argument, NULL, 'E' },
13630 { "qgroup-report", no_argument, NULL, 'Q' },
13631 { "tree-root", required_argument, NULL, 'r' },
13632 { "chunk-root", required_argument, NULL,
13633 GETOPT_VAL_CHUNK_TREE },
13634 { "progress", no_argument, NULL, 'p' },
13635 { "mode", required_argument, NULL,
13637 { "clear-space-cache", required_argument, NULL,
13638 GETOPT_VAL_CLEAR_SPACE_CACHE},
13639 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13640 { NULL, 0, NULL, 0}
13643 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13647 case 'a': /* ignored */ break;
13649 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13652 num = arg_strtou64(optarg);
13653 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13655 "super mirror should be less than %d",
13656 BTRFS_SUPER_MIRROR_MAX);
13659 bytenr = btrfs_sb_offset(((int)num));
13660 printf("using SB copy %llu, bytenr %llu\n", num,
13661 (unsigned long long)bytenr);
13667 subvolid = arg_strtou64(optarg);
13670 tree_root_bytenr = arg_strtou64(optarg);
13672 case GETOPT_VAL_CHUNK_TREE:
13673 chunk_root_bytenr = arg_strtou64(optarg);
13676 ctx.progress_enabled = true;
13680 usage(cmd_check_usage);
13681 case GETOPT_VAL_REPAIR:
13682 printf("enabling repair mode\n");
13684 ctree_flags |= OPEN_CTREE_WRITES;
13686 case GETOPT_VAL_READONLY:
13689 case GETOPT_VAL_INIT_CSUM:
13690 printf("Creating a new CRC tree\n");
13691 init_csum_tree = 1;
13693 ctree_flags |= OPEN_CTREE_WRITES;
13695 case GETOPT_VAL_INIT_EXTENT:
13696 init_extent_tree = 1;
13697 ctree_flags |= (OPEN_CTREE_WRITES |
13698 OPEN_CTREE_NO_BLOCK_GROUPS);
13701 case GETOPT_VAL_CHECK_CSUM:
13702 check_data_csum = 1;
13704 case GETOPT_VAL_MODE:
13705 check_mode = parse_check_mode(optarg);
13706 if (check_mode == CHECK_MODE_UNKNOWN) {
13707 error("unknown mode: %s", optarg);
13711 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13712 if (strcmp(optarg, "v1") == 0) {
13713 clear_space_cache = 1;
13714 } else if (strcmp(optarg, "v2") == 0) {
13715 clear_space_cache = 2;
13716 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13719 "invalid argument to --clear-space-cache, must be v1 or v2");
13722 ctree_flags |= OPEN_CTREE_WRITES;
13724 case GETOPT_VAL_FORCE:
13730 if (check_argc_exact(argc - optind, 1))
13731 usage(cmd_check_usage);
13733 if (ctx.progress_enabled) {
13734 ctx.tp = TASK_NOTHING;
13735 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13738 /* This check is the only reason for --readonly to exist */
13739 if (readonly && repair) {
13740 error("repair options are not compatible with --readonly");
13745 * experimental and dangerous
13747 if (repair && check_mode == CHECK_MODE_LOWMEM)
13748 warning("low-memory mode repair support is only partial");
13751 cache_tree_init(&root_cache);
13753 ret = check_mounted(argv[optind]);
13756 error("could not check mount status: %s",
13762 "%s is currently mounted, use --force if you really intend to check the filesystem",
13770 error("repair and --force is not yet supported");
13777 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13781 "filesystem mounted, continuing because of --force");
13783 /* A block device is mounted in exclusive mode by kernel */
13784 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13787 /* only allow partial opening under repair mode */
13789 ctree_flags |= OPEN_CTREE_PARTIAL;
13791 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13792 chunk_root_bytenr, ctree_flags);
13794 error("cannot open file system");
13800 global_info = info;
13801 root = info->fs_root;
13802 uuid_unparse(info->super_copy->fsid, uuidbuf);
13804 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13807 * Check the bare minimum before starting anything else that could rely
13808 * on it, namely the tree roots, any local consistency checks
13810 if (!extent_buffer_uptodate(info->tree_root->node) ||
13811 !extent_buffer_uptodate(info->dev_root->node) ||
13812 !extent_buffer_uptodate(info->chunk_root->node)) {
13813 error("critical roots corrupted, unable to check the filesystem");
13819 if (clear_space_cache) {
13820 ret = do_clear_free_space_cache(info, clear_space_cache);
13826 * repair mode will force us to commit transaction which
13827 * will make us fail to load log tree when mounting.
13829 if (repair && btrfs_super_log_root(info->super_copy)) {
13830 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13836 ret = zero_log_tree(root);
13839 error("failed to zero log tree: %d", ret);
13844 if (qgroup_report) {
13845 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13847 ret = qgroup_verify_all(info);
13854 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13855 subvolid, argv[optind], uuidbuf);
13856 ret = print_extent_state(info, subvolid);
13861 if (init_extent_tree || init_csum_tree) {
13862 struct btrfs_trans_handle *trans;
13864 trans = btrfs_start_transaction(info->extent_root, 0);
13865 if (IS_ERR(trans)) {
13866 error("error starting transaction");
13867 ret = PTR_ERR(trans);
13872 if (init_extent_tree) {
13873 printf("Creating a new extent tree\n");
13874 ret = reinit_extent_tree(trans, info);
13880 if (init_csum_tree) {
13881 printf("Reinitialize checksum tree\n");
13882 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13884 error("checksum tree initialization failed: %d",
13891 ret = fill_csum_tree(trans, info->csum_root,
13895 error("checksum tree refilling failed: %d", ret);
13900 * Ok now we commit and run the normal fsck, which will add
13901 * extent entries for all of the items it finds.
13903 ret = btrfs_commit_transaction(trans, info->extent_root);
13908 if (!extent_buffer_uptodate(info->extent_root->node)) {
13909 error("critical: extent_root, unable to check the filesystem");
13914 if (!extent_buffer_uptodate(info->csum_root->node)) {
13915 error("critical: csum_root, unable to check the filesystem");
13921 ret = do_check_chunks_and_extents(info);
13925 "errors found in extent allocation tree or chunk allocation");
13927 ret = repair_root_items(info);
13930 error("failed to repair root items: %s", strerror(-ret));
13934 fprintf(stderr, "Fixed %d roots.\n", ret);
13936 } else if (ret > 0) {
13938 "Found %d roots with an outdated root item.\n",
13941 "Please run a filesystem check with the option --repair to fix them.\n");
13947 if (!ctx.progress_enabled) {
13948 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13949 fprintf(stderr, "checking free space tree\n");
13951 fprintf(stderr, "checking free space cache\n");
13953 ret = check_space_cache(root);
13956 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13957 error("errors found in free space tree");
13959 error("errors found in free space cache");
13964 * We used to have to have these hole extents in between our real
13965 * extents so if we don't have this flag set we need to make sure there
13966 * are no gaps in the file extents for inodes, otherwise we can just
13967 * ignore it when this happens.
13969 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13970 ret = do_check_fs_roots(info, &root_cache);
13973 error("errors found in fs roots");
13977 fprintf(stderr, "checking csums\n");
13978 ret = check_csums(root);
13981 error("errors found in csum tree");
13985 fprintf(stderr, "checking root refs\n");
13986 /* For low memory mode, check_fs_roots_v2 handles root refs */
13987 if (check_mode != CHECK_MODE_LOWMEM) {
13988 ret = check_root_refs(root, &root_cache);
13991 error("errors found in root refs");
13996 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13997 struct extent_buffer *eb;
13999 eb = list_first_entry(&root->fs_info->recow_ebs,
14000 struct extent_buffer, recow);
14001 list_del_init(&eb->recow);
14002 ret = recow_extent_buffer(root, eb);
14005 error("fails to fix transid errors");
14010 while (!list_empty(&delete_items)) {
14011 struct bad_item *bad;
14013 bad = list_first_entry(&delete_items, struct bad_item, list);
14014 list_del_init(&bad->list);
14016 ret = delete_bad_item(root, bad);
14022 if (info->quota_enabled) {
14023 fprintf(stderr, "checking quota groups\n");
14024 ret = qgroup_verify_all(info);
14027 error("failed to check quota groups");
14031 ret = repair_qgroups(info, &qgroups_repaired);
14034 error("failed to repair quota groups");
14040 if (!list_empty(&root->fs_info->recow_ebs)) {
14041 error("transid errors in file system");
14046 printf("found %llu bytes used, ",
14047 (unsigned long long)bytes_used);
14049 printf("error(s) found\n");
14051 printf("no error found\n");
14052 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14053 printf("total tree bytes: %llu\n",
14054 (unsigned long long)total_btree_bytes);
14055 printf("total fs tree bytes: %llu\n",
14056 (unsigned long long)total_fs_tree_bytes);
14057 printf("total extent tree bytes: %llu\n",
14058 (unsigned long long)total_extent_tree_bytes);
14059 printf("btree space waste bytes: %llu\n",
14060 (unsigned long long)btree_space_waste);
14061 printf("file data blocks allocated: %llu\n referenced %llu\n",
14062 (unsigned long long)data_bytes_allocated,
14063 (unsigned long long)data_bytes_referenced);
14065 free_qgroup_counts();
14066 free_root_recs_tree(&root_cache);
14070 if (ctx.progress_enabled)
14071 task_deinit(ctx.info);