2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 return container_of(back, struct data_backref, node);
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145 struct data_backref *back1 = to_data_backref(ext1);
146 struct data_backref *back2 = to_data_backref(ext2);
148 WARN_ON(!ext1->is_data);
149 WARN_ON(!ext2->is_data);
151 /* parent and root are a union, so this covers both */
152 if (back1->parent > back2->parent)
154 if (back1->parent < back2->parent)
157 /* This is a full backref and the parents match. */
158 if (back1->node.full_backref)
161 if (back1->owner > back2->owner)
163 if (back1->owner < back2->owner)
166 if (back1->offset > back2->offset)
168 if (back1->offset < back2->offset)
171 if (back1->found_ref && back2->found_ref) {
172 if (back1->disk_bytenr > back2->disk_bytenr)
174 if (back1->disk_bytenr < back2->disk_bytenr)
177 if (back1->bytes > back2->bytes)
179 if (back1->bytes < back2->bytes)
187 * Much like data_backref, just removed the undetermined members
188 * and change it to use list_head.
189 * During extent scan, it is stored in root->orphan_data_extent.
190 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192 struct orphan_data_extent {
193 struct list_head list;
201 struct tree_backref {
202 struct extent_backref node;
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 return container_of(back, struct tree_backref, node);
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218 struct tree_backref *back1 = to_tree_backref(ext1);
219 struct tree_backref *back2 = to_tree_backref(ext2);
221 WARN_ON(ext1->is_data);
222 WARN_ON(ext2->is_data);
224 /* parent and root are a union, so this covers both */
225 if (back1->parent > back2->parent)
227 if (back1->parent < back2->parent)
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238 if (ext1->is_data > ext2->is_data)
241 if (ext1->is_data < ext2->is_data)
244 if (ext1->full_backref > ext2->full_backref)
246 if (ext1->full_backref < ext2->full_backref)
250 return compare_data_backref(node1, node2);
252 return compare_tree_backref(node1, node2);
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
258 struct extent_record {
259 struct list_head backrefs;
260 struct list_head dups;
261 struct rb_root backref_tree;
262 struct list_head list;
263 struct cache_extent cache;
264 struct btrfs_disk_key parent_key;
269 u64 extent_item_refs;
271 u64 parent_generation;
275 unsigned int flag_block_full_backref:2;
276 unsigned int found_rec:1;
277 unsigned int content_checked:1;
278 unsigned int owner_ref_checked:1;
279 unsigned int is_root:1;
280 unsigned int metadata:1;
281 unsigned int bad_full_backref:1;
282 unsigned int crossing_stripes:1;
283 unsigned int wrong_chunk_type:1;
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 return container_of(entry, struct extent_record, list);
291 struct inode_backref {
292 struct list_head list;
293 unsigned int found_dir_item:1;
294 unsigned int found_dir_index:1;
295 unsigned int found_inode_ref:1;
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 return list_entry(entry, struct inode_backref, list);
310 struct root_item_record {
311 struct list_head list;
317 struct btrfs_key drop_key;
320 #define REF_ERR_NO_DIR_ITEM (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX (1 << 1)
322 #define REF_ERR_NO_INODE_REF (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
325 #define REF_ERR_DUP_INODE_REF (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
334 struct file_extent_hole {
340 struct inode_record {
341 struct list_head backrefs;
342 unsigned int checked:1;
343 unsigned int merging:1;
344 unsigned int found_inode_item:1;
345 unsigned int found_dir_item:1;
346 unsigned int found_file_extent:1;
347 unsigned int found_csum_item:1;
348 unsigned int some_csum_missing:1;
349 unsigned int nodatasum:1;
362 struct rb_root holes;
363 struct list_head orphan_extents;
368 #define I_ERR_NO_INODE_ITEM (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
384 struct root_backref {
385 struct list_head list;
386 unsigned int found_dir_item:1;
387 unsigned int found_dir_index:1;
388 unsigned int found_back_ref:1;
389 unsigned int found_forward_ref:1;
390 unsigned int reachable:1;
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 return list_entry(entry, struct root_backref, list);
405 struct list_head backrefs;
406 struct cache_extent cache;
407 unsigned int found_root_item:1;
413 struct cache_extent cache;
418 struct cache_extent cache;
419 struct cache_tree root_cache;
420 struct cache_tree inode_cache;
421 struct inode_record *current;
430 struct walk_control {
431 struct cache_tree shared;
432 struct shared_node *nodes[BTRFS_MAX_LEVEL];
438 struct btrfs_key key;
440 struct list_head list;
443 struct extent_entry {
448 struct list_head list;
451 struct root_item_info {
452 /* level of the root */
454 /* number of nodes at this level, must be 1 for a root */
458 struct cache_extent cache_extent;
462 * Error bit for low memory mode check.
464 * Currently no caller cares about it yet. Just internal use for error
467 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH (1 << 8)
478 static void *print_status_check(void *p)
480 struct task_ctx *priv = p;
481 const char work_indicator[] = { '.', 'o', 'O', 'o' };
483 static char *task_position_string[] = {
485 "checking free space cache",
489 task_period_start(priv->info, 1000 /* 1s */);
491 if (priv->tp == TASK_NOTHING)
495 printf("%s [%c]\r", task_position_string[priv->tp],
496 work_indicator[count % 4]);
499 task_period_wait(priv->info);
504 static int print_status_return(void *p)
512 static enum btrfs_check_mode parse_check_mode(const char *str)
514 if (strcmp(str, "lowmem") == 0)
515 return CHECK_MODE_LOWMEM;
516 if (strcmp(str, "orig") == 0)
517 return CHECK_MODE_ORIGINAL;
518 if (strcmp(str, "original") == 0)
519 return CHECK_MODE_ORIGINAL;
521 return CHECK_MODE_UNKNOWN;
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
527 struct file_extent_hole *hole;
529 if (RB_EMPTY_ROOT(holes))
532 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 struct file_extent_hole *hole1;
539 struct file_extent_hole *hole2;
541 hole1 = rb_entry(node1, struct file_extent_hole, node);
542 hole2 = rb_entry(node2, struct file_extent_hole, node);
544 if (hole1->start > hole2->start)
546 if (hole1->start < hole2->start)
548 /* Now hole1->start == hole2->start */
549 if (hole1->len >= hole2->len)
551 * Hole 1 will be merge center
552 * Same hole will be merged later
555 /* Hole 2 will be merge center */
560 * Add a hole to the record
562 * This will do hole merge for copy_file_extent_holes(),
563 * which will ensure there won't be continuous holes.
565 static int add_file_extent_hole(struct rb_root *holes,
568 struct file_extent_hole *hole;
569 struct file_extent_hole *prev = NULL;
570 struct file_extent_hole *next = NULL;
572 hole = malloc(sizeof(*hole));
577 /* Since compare will not return 0, no -EEXIST will happen */
578 rb_insert(holes, &hole->node, compare_hole);
580 /* simple merge with previous hole */
581 if (rb_prev(&hole->node))
582 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584 if (prev && prev->start + prev->len >= hole->start) {
585 hole->len = hole->start + hole->len - prev->start;
586 hole->start = prev->start;
587 rb_erase(&prev->node, holes);
592 /* iterate merge with next holes */
594 if (!rb_next(&hole->node))
596 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598 if (hole->start + hole->len >= next->start) {
599 if (hole->start + hole->len <= next->start + next->len)
600 hole->len = next->start + next->len -
602 rb_erase(&next->node, holes);
611 static int compare_hole_range(struct rb_node *node, void *data)
613 struct file_extent_hole *hole;
616 hole = (struct file_extent_hole *)data;
619 hole = rb_entry(node, struct file_extent_hole, node);
620 if (start < hole->start)
622 if (start >= hole->start && start < hole->start + hole->len)
628 * Delete a hole in the record
630 * This will do the hole split and is much restrict than add.
632 static int del_file_extent_hole(struct rb_root *holes,
635 struct file_extent_hole *hole;
636 struct file_extent_hole tmp;
641 struct rb_node *node;
648 node = rb_search(holes, &tmp, compare_hole_range, NULL);
651 hole = rb_entry(node, struct file_extent_hole, node);
652 if (start + len > hole->start + hole->len)
656 * Now there will be no overlap, delete the hole and re-add the
657 * split(s) if they exists.
659 if (start > hole->start) {
660 prev_start = hole->start;
661 prev_len = start - hole->start;
664 if (hole->start + hole->len > start + len) {
665 next_start = start + len;
666 next_len = hole->start + hole->len - start - len;
669 rb_erase(node, holes);
672 ret = add_file_extent_hole(holes, prev_start, prev_len);
677 ret = add_file_extent_hole(holes, next_start, next_len);
684 static int copy_file_extent_holes(struct rb_root *dst,
687 struct file_extent_hole *hole;
688 struct rb_node *node;
691 node = rb_first(src);
693 hole = rb_entry(node, struct file_extent_hole, node);
694 ret = add_file_extent_hole(dst, hole->start, hole->len);
697 node = rb_next(node);
702 static void free_file_extent_holes(struct rb_root *holes)
704 struct rb_node *node;
705 struct file_extent_hole *hole;
707 node = rb_first(holes);
709 hole = rb_entry(node, struct file_extent_hole, node);
710 rb_erase(node, holes);
712 node = rb_first(holes);
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719 struct btrfs_root *root)
721 if (root->last_trans != trans->transid) {
722 root->track_dirty = 1;
723 root->last_trans = trans->transid;
724 root->commit_root = root->node;
725 extent_buffer_get(root->node);
729 static u8 imode_to_type(u32 imode)
732 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
734 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
735 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
736 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
737 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
738 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
739 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
742 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 struct device_record *rec1;
749 struct device_record *rec2;
751 rec1 = rb_entry(node1, struct device_record, node);
752 rec2 = rb_entry(node2, struct device_record, node);
753 if (rec1->devid > rec2->devid)
755 else if (rec1->devid < rec2->devid)
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 struct inode_record *rec;
764 struct inode_backref *backref;
765 struct inode_backref *orig;
766 struct inode_backref *tmp;
767 struct orphan_data_extent *src_orphan;
768 struct orphan_data_extent *dst_orphan;
773 rec = malloc(sizeof(*rec));
775 return ERR_PTR(-ENOMEM);
776 memcpy(rec, orig_rec, sizeof(*rec));
778 INIT_LIST_HEAD(&rec->backrefs);
779 INIT_LIST_HEAD(&rec->orphan_extents);
780 rec->holes = RB_ROOT;
782 list_for_each_entry(orig, &orig_rec->backrefs, list) {
783 size = sizeof(*orig) + orig->namelen + 1;
784 backref = malloc(size);
789 memcpy(backref, orig, size);
790 list_add_tail(&backref->list, &rec->backrefs);
792 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793 dst_orphan = malloc(sizeof(*dst_orphan));
798 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
808 rb = rb_first(&rec->holes);
810 struct file_extent_hole *hole;
812 hole = rb_entry(rb, struct file_extent_hole, node);
818 if (!list_empty(&rec->backrefs))
819 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820 list_del(&orig->list);
824 if (!list_empty(&rec->orphan_extents))
825 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826 list_del(&orig->list);
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
838 struct orphan_data_extent *orphan;
840 if (list_empty(orphan_extents))
842 printf("The following data extent is lost in tree %llu:\n",
844 list_for_each_entry(orphan, orphan_extents, list) {
845 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846 orphan->objectid, orphan->offset, orphan->disk_bytenr,
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 u64 root_objectid = root->root_key.objectid;
854 int errors = rec->errors;
858 /* reloc root errors, we print its corresponding fs root objectid*/
859 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860 root_objectid = root->root_key.offset;
861 fprintf(stderr, "reloc");
863 fprintf(stderr, "root %llu inode %llu errors %x",
864 (unsigned long long) root_objectid,
865 (unsigned long long) rec->ino, rec->errors);
867 if (errors & I_ERR_NO_INODE_ITEM)
868 fprintf(stderr, ", no inode item");
869 if (errors & I_ERR_NO_ORPHAN_ITEM)
870 fprintf(stderr, ", no orphan item");
871 if (errors & I_ERR_DUP_INODE_ITEM)
872 fprintf(stderr, ", dup inode item");
873 if (errors & I_ERR_DUP_DIR_INDEX)
874 fprintf(stderr, ", dup dir index");
875 if (errors & I_ERR_ODD_DIR_ITEM)
876 fprintf(stderr, ", odd dir item");
877 if (errors & I_ERR_ODD_FILE_EXTENT)
878 fprintf(stderr, ", odd file extent");
879 if (errors & I_ERR_BAD_FILE_EXTENT)
880 fprintf(stderr, ", bad file extent");
881 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882 fprintf(stderr, ", file extent overlap");
883 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884 fprintf(stderr, ", file extent discount");
885 if (errors & I_ERR_DIR_ISIZE_WRONG)
886 fprintf(stderr, ", dir isize wrong");
887 if (errors & I_ERR_FILE_NBYTES_WRONG)
888 fprintf(stderr, ", nbytes wrong");
889 if (errors & I_ERR_ODD_CSUM_ITEM)
890 fprintf(stderr, ", odd csum item");
891 if (errors & I_ERR_SOME_CSUM_MISSING)
892 fprintf(stderr, ", some csum missing");
893 if (errors & I_ERR_LINK_COUNT_WRONG)
894 fprintf(stderr, ", link count wrong");
895 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896 fprintf(stderr, ", orphan file extent");
897 fprintf(stderr, "\n");
898 /* Print the orphan extents if needed */
899 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902 /* Print the holes if needed */
903 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904 struct file_extent_hole *hole;
905 struct rb_node *node;
908 node = rb_first(&rec->holes);
909 fprintf(stderr, "Found file extent holes:\n");
912 hole = rb_entry(node, struct file_extent_hole, node);
913 fprintf(stderr, "\tstart: %llu, len: %llu\n",
914 hole->start, hole->len);
915 node = rb_next(node);
918 fprintf(stderr, "\tstart: 0, len: %llu\n",
920 root->fs_info->sectorsize));
924 static void print_ref_error(int errors)
926 if (errors & REF_ERR_NO_DIR_ITEM)
927 fprintf(stderr, ", no dir item");
928 if (errors & REF_ERR_NO_DIR_INDEX)
929 fprintf(stderr, ", no dir index");
930 if (errors & REF_ERR_NO_INODE_REF)
931 fprintf(stderr, ", no inode ref");
932 if (errors & REF_ERR_DUP_DIR_ITEM)
933 fprintf(stderr, ", dup dir item");
934 if (errors & REF_ERR_DUP_DIR_INDEX)
935 fprintf(stderr, ", dup dir index");
936 if (errors & REF_ERR_DUP_INODE_REF)
937 fprintf(stderr, ", dup inode ref");
938 if (errors & REF_ERR_INDEX_UNMATCH)
939 fprintf(stderr, ", index mismatch");
940 if (errors & REF_ERR_FILETYPE_UNMATCH)
941 fprintf(stderr, ", filetype mismatch");
942 if (errors & REF_ERR_NAME_TOO_LONG)
943 fprintf(stderr, ", name too long");
944 if (errors & REF_ERR_NO_ROOT_REF)
945 fprintf(stderr, ", no root ref");
946 if (errors & REF_ERR_NO_ROOT_BACKREF)
947 fprintf(stderr, ", no root backref");
948 if (errors & REF_ERR_DUP_ROOT_REF)
949 fprintf(stderr, ", dup root ref");
950 if (errors & REF_ERR_DUP_ROOT_BACKREF)
951 fprintf(stderr, ", dup root backref");
952 fprintf(stderr, "\n");
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958 struct ptr_node *node;
959 struct cache_extent *cache;
960 struct inode_record *rec = NULL;
963 cache = lookup_cache_extent(inode_cache, ino, 1);
965 node = container_of(cache, struct ptr_node, cache);
967 if (mod && rec->refs > 1) {
968 node->data = clone_inode_rec(rec);
969 if (IS_ERR(node->data))
975 rec = calloc(1, sizeof(*rec));
977 return ERR_PTR(-ENOMEM);
979 rec->extent_start = (u64)-1;
981 INIT_LIST_HEAD(&rec->backrefs);
982 INIT_LIST_HEAD(&rec->orphan_extents);
983 rec->holes = RB_ROOT;
985 node = malloc(sizeof(*node));
988 return ERR_PTR(-ENOMEM);
990 node->cache.start = ino;
991 node->cache.size = 1;
994 if (ino == BTRFS_FREE_INO_OBJECTID)
997 ret = insert_cache_extent(inode_cache, &node->cache);
999 return ERR_PTR(-EEXIST);
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 struct orphan_data_extent *orphan;
1008 while (!list_empty(orphan_extents)) {
1009 orphan = list_entry(orphan_extents->next,
1010 struct orphan_data_extent, list);
1011 list_del(&orphan->list);
1016 static void free_inode_rec(struct inode_record *rec)
1018 struct inode_backref *backref;
1020 if (--rec->refs > 0)
1023 while (!list_empty(&rec->backrefs)) {
1024 backref = to_inode_backref(rec->backrefs.next);
1025 list_del(&backref->list);
1028 free_orphan_data_extents(&rec->orphan_extents);
1029 free_file_extent_holes(&rec->holes);
1033 static int can_free_inode_rec(struct inode_record *rec)
1035 if (!rec->errors && rec->checked && rec->found_inode_item &&
1036 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042 struct inode_record *rec)
1044 struct cache_extent *cache;
1045 struct inode_backref *tmp, *backref;
1046 struct ptr_node *node;
1049 if (!rec->found_inode_item)
1052 filetype = imode_to_type(rec->imode);
1053 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054 if (backref->found_dir_item && backref->found_dir_index) {
1055 if (backref->filetype != filetype)
1056 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057 if (!backref->errors && backref->found_inode_ref &&
1058 rec->nlink == rec->found_link) {
1059 list_del(&backref->list);
1065 if (!rec->checked || rec->merging)
1068 if (S_ISDIR(rec->imode)) {
1069 if (rec->found_size != rec->isize)
1070 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071 if (rec->found_file_extent)
1072 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074 if (rec->found_dir_item)
1075 rec->errors |= I_ERR_ODD_DIR_ITEM;
1076 if (rec->found_size != rec->nbytes)
1077 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078 if (rec->nlink > 0 && !no_holes &&
1079 (rec->extent_end < rec->isize ||
1080 first_extent_gap(&rec->holes) < rec->isize))
1081 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1084 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085 if (rec->found_csum_item && rec->nodatasum)
1086 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087 if (rec->some_csum_missing && !rec->nodatasum)
1088 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1091 BUG_ON(rec->refs != 1);
1092 if (can_free_inode_rec(rec)) {
1093 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094 node = container_of(cache, struct ptr_node, cache);
1095 BUG_ON(node->data != rec);
1096 remove_cache_extent(inode_cache, &node->cache);
1098 free_inode_rec(rec);
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 struct btrfs_path path;
1105 struct btrfs_key key;
1108 key.objectid = BTRFS_ORPHAN_OBJECTID;
1109 key.type = BTRFS_ORPHAN_ITEM_KEY;
1112 btrfs_init_path(&path);
1113 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114 btrfs_release_path(&path);
1120 static int process_inode_item(struct extent_buffer *eb,
1121 int slot, struct btrfs_key *key,
1122 struct shared_node *active_node)
1124 struct inode_record *rec;
1125 struct btrfs_inode_item *item;
1127 rec = active_node->current;
1128 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129 if (rec->found_inode_item) {
1130 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134 rec->nlink = btrfs_inode_nlink(eb, item);
1135 rec->isize = btrfs_inode_size(eb, item);
1136 rec->nbytes = btrfs_inode_nbytes(eb, item);
1137 rec->imode = btrfs_inode_mode(eb, item);
1138 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140 rec->found_inode_item = 1;
1141 if (rec->nlink == 0)
1142 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143 maybe_free_inode_rec(&active_node->inode_cache, rec);
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149 int namelen, u64 dir)
1151 struct inode_backref *backref;
1153 list_for_each_entry(backref, &rec->backrefs, list) {
1154 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156 if (backref->dir != dir || backref->namelen != namelen)
1158 if (memcmp(name, backref->name, namelen))
1163 backref = malloc(sizeof(*backref) + namelen + 1);
1166 memset(backref, 0, sizeof(*backref));
1168 backref->namelen = namelen;
1169 memcpy(backref->name, name, namelen);
1170 backref->name[namelen] = '\0';
1171 list_add_tail(&backref->list, &rec->backrefs);
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176 u64 ino, u64 dir, u64 index,
1177 const char *name, int namelen,
1178 u8 filetype, u8 itemtype, int errors)
1180 struct inode_record *rec;
1181 struct inode_backref *backref;
1183 rec = get_inode_rec(inode_cache, ino, 1);
1184 BUG_ON(IS_ERR(rec));
1185 backref = get_inode_backref(rec, name, namelen, dir);
1188 backref->errors |= errors;
1189 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190 if (backref->found_dir_index)
1191 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192 if (backref->found_inode_ref && backref->index != index)
1193 backref->errors |= REF_ERR_INDEX_UNMATCH;
1194 if (backref->found_dir_item && backref->filetype != filetype)
1195 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197 backref->index = index;
1198 backref->filetype = filetype;
1199 backref->found_dir_index = 1;
1200 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202 if (backref->found_dir_item)
1203 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204 if (backref->found_dir_index && backref->filetype != filetype)
1205 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207 backref->filetype = filetype;
1208 backref->found_dir_item = 1;
1209 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211 if (backref->found_inode_ref)
1212 backref->errors |= REF_ERR_DUP_INODE_REF;
1213 if (backref->found_dir_index && backref->index != index)
1214 backref->errors |= REF_ERR_INDEX_UNMATCH;
1216 backref->index = index;
1218 backref->ref_type = itemtype;
1219 backref->found_inode_ref = 1;
1224 maybe_free_inode_rec(inode_cache, rec);
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229 struct cache_tree *dst_cache)
1231 struct inode_backref *backref;
1236 list_for_each_entry(backref, &src->backrefs, list) {
1237 if (backref->found_dir_index) {
1238 add_inode_backref(dst_cache, dst->ino, backref->dir,
1239 backref->index, backref->name,
1240 backref->namelen, backref->filetype,
1241 BTRFS_DIR_INDEX_KEY, backref->errors);
1243 if (backref->found_dir_item) {
1245 add_inode_backref(dst_cache, dst->ino,
1246 backref->dir, 0, backref->name,
1247 backref->namelen, backref->filetype,
1248 BTRFS_DIR_ITEM_KEY, backref->errors);
1250 if (backref->found_inode_ref) {
1251 add_inode_backref(dst_cache, dst->ino,
1252 backref->dir, backref->index,
1253 backref->name, backref->namelen, 0,
1254 backref->ref_type, backref->errors);
1258 if (src->found_dir_item)
1259 dst->found_dir_item = 1;
1260 if (src->found_file_extent)
1261 dst->found_file_extent = 1;
1262 if (src->found_csum_item)
1263 dst->found_csum_item = 1;
1264 if (src->some_csum_missing)
1265 dst->some_csum_missing = 1;
1266 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1272 BUG_ON(src->found_link < dir_count);
1273 dst->found_link += src->found_link - dir_count;
1274 dst->found_size += src->found_size;
1275 if (src->extent_start != (u64)-1) {
1276 if (dst->extent_start == (u64)-1) {
1277 dst->extent_start = src->extent_start;
1278 dst->extent_end = src->extent_end;
1280 if (dst->extent_end > src->extent_start)
1281 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282 else if (dst->extent_end < src->extent_start) {
1283 ret = add_file_extent_hole(&dst->holes,
1285 src->extent_start - dst->extent_end);
1287 if (dst->extent_end < src->extent_end)
1288 dst->extent_end = src->extent_end;
1292 dst->errors |= src->errors;
1293 if (src->found_inode_item) {
1294 if (!dst->found_inode_item) {
1295 dst->nlink = src->nlink;
1296 dst->isize = src->isize;
1297 dst->nbytes = src->nbytes;
1298 dst->imode = src->imode;
1299 dst->nodatasum = src->nodatasum;
1300 dst->found_inode_item = 1;
1302 dst->errors |= I_ERR_DUP_INODE_ITEM;
1310 static int splice_shared_node(struct shared_node *src_node,
1311 struct shared_node *dst_node)
1313 struct cache_extent *cache;
1314 struct ptr_node *node, *ins;
1315 struct cache_tree *src, *dst;
1316 struct inode_record *rec, *conflict;
1317 u64 current_ino = 0;
1321 if (--src_node->refs == 0)
1323 if (src_node->current)
1324 current_ino = src_node->current->ino;
1326 src = &src_node->root_cache;
1327 dst = &dst_node->root_cache;
1329 cache = search_cache_extent(src, 0);
1331 node = container_of(cache, struct ptr_node, cache);
1333 cache = next_cache_extent(cache);
1336 remove_cache_extent(src, &node->cache);
1339 ins = malloc(sizeof(*ins));
1341 ins->cache.start = node->cache.start;
1342 ins->cache.size = node->cache.size;
1346 ret = insert_cache_extent(dst, &ins->cache);
1347 if (ret == -EEXIST) {
1348 conflict = get_inode_rec(dst, rec->ino, 1);
1349 BUG_ON(IS_ERR(conflict));
1350 merge_inode_recs(rec, conflict, dst);
1352 conflict->checked = 1;
1353 if (dst_node->current == conflict)
1354 dst_node->current = NULL;
1356 maybe_free_inode_rec(dst, conflict);
1357 free_inode_rec(rec);
1364 if (src == &src_node->root_cache) {
1365 src = &src_node->inode_cache;
1366 dst = &dst_node->inode_cache;
1370 if (current_ino > 0 && (!dst_node->current ||
1371 current_ino > dst_node->current->ino)) {
1372 if (dst_node->current) {
1373 dst_node->current->checked = 1;
1374 maybe_free_inode_rec(dst, dst_node->current);
1376 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377 BUG_ON(IS_ERR(dst_node->current));
1382 static void free_inode_ptr(struct cache_extent *cache)
1384 struct ptr_node *node;
1385 struct inode_record *rec;
1387 node = container_of(cache, struct ptr_node, cache);
1389 free_inode_rec(rec);
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398 struct cache_extent *cache;
1399 struct shared_node *node;
1401 cache = lookup_cache_extent(shared, bytenr, 1);
1403 node = container_of(cache, struct shared_node, cache);
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1412 struct shared_node *node;
1414 node = calloc(1, sizeof(*node));
1417 node->cache.start = bytenr;
1418 node->cache.size = 1;
1419 cache_tree_init(&node->root_cache);
1420 cache_tree_init(&node->inode_cache);
1423 ret = insert_cache_extent(shared, &node->cache);
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429 struct walk_control *wc, int level)
1431 struct shared_node *node;
1432 struct shared_node *dest;
1435 if (level == wc->active_node)
1438 BUG_ON(wc->active_node <= level);
1439 node = find_shared_node(&wc->shared, bytenr);
1441 ret = add_shared_node(&wc->shared, bytenr, refs);
1443 node = find_shared_node(&wc->shared, bytenr);
1444 wc->nodes[level] = node;
1445 wc->active_node = level;
1449 if (wc->root_level == wc->active_node &&
1450 btrfs_root_refs(&root->root_item) == 0) {
1451 if (--node->refs == 0) {
1452 free_inode_recs_tree(&node->root_cache);
1453 free_inode_recs_tree(&node->inode_cache);
1454 remove_cache_extent(&wc->shared, &node->cache);
1460 dest = wc->nodes[wc->active_node];
1461 splice_shared_node(node, dest);
1462 if (node->refs == 0) {
1463 remove_cache_extent(&wc->shared, &node->cache);
1469 static int leave_shared_node(struct btrfs_root *root,
1470 struct walk_control *wc, int level)
1472 struct shared_node *node;
1473 struct shared_node *dest;
1476 if (level == wc->root_level)
1479 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1483 BUG_ON(i >= BTRFS_MAX_LEVEL);
1485 node = wc->nodes[wc->active_node];
1486 wc->nodes[wc->active_node] = NULL;
1487 wc->active_node = i;
1489 dest = wc->nodes[wc->active_node];
1490 if (wc->active_node < wc->root_level ||
1491 btrfs_root_refs(&root->root_item) > 0) {
1492 BUG_ON(node->refs <= 1);
1493 splice_shared_node(node, dest);
1495 BUG_ON(node->refs < 2);
1504 * 1 - if the root with id child_root_id is a child of root parent_root_id
1505 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1506 * has other root(s) as parent(s)
1507 * 2 - if the root child_root_id doesn't have any parent roots
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512 struct btrfs_path path;
1513 struct btrfs_key key;
1514 struct extent_buffer *leaf;
1518 btrfs_init_path(&path);
1520 key.objectid = parent_root_id;
1521 key.type = BTRFS_ROOT_REF_KEY;
1522 key.offset = child_root_id;
1523 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1527 btrfs_release_path(&path);
1531 key.objectid = child_root_id;
1532 key.type = BTRFS_ROOT_BACKREF_KEY;
1534 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1540 leaf = path.nodes[0];
1541 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545 leaf = path.nodes[0];
1548 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549 if (key.objectid != child_root_id ||
1550 key.type != BTRFS_ROOT_BACKREF_KEY)
1555 if (key.offset == parent_root_id) {
1556 btrfs_release_path(&path);
1563 btrfs_release_path(&path);
1566 return has_parent ? 0 : 2;
1569 static int process_dir_item(struct extent_buffer *eb,
1570 int slot, struct btrfs_key *key,
1571 struct shared_node *active_node)
1581 struct btrfs_dir_item *di;
1582 struct inode_record *rec;
1583 struct cache_tree *root_cache;
1584 struct cache_tree *inode_cache;
1585 struct btrfs_key location;
1586 char namebuf[BTRFS_NAME_LEN];
1588 root_cache = &active_node->root_cache;
1589 inode_cache = &active_node->inode_cache;
1590 rec = active_node->current;
1591 rec->found_dir_item = 1;
1593 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594 total = btrfs_item_size_nr(eb, slot);
1595 while (cur < total) {
1597 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598 name_len = btrfs_dir_name_len(eb, di);
1599 data_len = btrfs_dir_data_len(eb, di);
1600 filetype = btrfs_dir_type(eb, di);
1602 rec->found_size += name_len;
1603 if (cur + sizeof(*di) + name_len > total ||
1604 name_len > BTRFS_NAME_LEN) {
1605 error = REF_ERR_NAME_TOO_LONG;
1607 if (cur + sizeof(*di) > total)
1609 len = min_t(u32, total - cur - sizeof(*di),
1616 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619 key->offset != btrfs_name_hash(namebuf, len)) {
1620 rec->errors |= I_ERR_ODD_DIR_ITEM;
1621 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622 key->objectid, key->offset, namebuf, len, filetype,
1623 key->offset, btrfs_name_hash(namebuf, len));
1626 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627 add_inode_backref(inode_cache, location.objectid,
1628 key->objectid, key->offset, namebuf,
1629 len, filetype, key->type, error);
1630 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631 add_inode_backref(root_cache, location.objectid,
1632 key->objectid, key->offset,
1633 namebuf, len, filetype,
1636 fprintf(stderr, "invalid location in dir item %u\n",
1638 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639 key->objectid, key->offset, namebuf,
1640 len, filetype, key->type, error);
1643 len = sizeof(*di) + name_len + data_len;
1644 di = (struct btrfs_dir_item *)((char *)di + len);
1647 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648 rec->errors |= I_ERR_DUP_DIR_INDEX;
1653 static int process_inode_ref(struct extent_buffer *eb,
1654 int slot, struct btrfs_key *key,
1655 struct shared_node *active_node)
1663 struct cache_tree *inode_cache;
1664 struct btrfs_inode_ref *ref;
1665 char namebuf[BTRFS_NAME_LEN];
1667 inode_cache = &active_node->inode_cache;
1669 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670 total = btrfs_item_size_nr(eb, slot);
1671 while (cur < total) {
1672 name_len = btrfs_inode_ref_name_len(eb, ref);
1673 index = btrfs_inode_ref_index(eb, ref);
1675 /* inode_ref + namelen should not cross item boundary */
1676 if (cur + sizeof(*ref) + name_len > total ||
1677 name_len > BTRFS_NAME_LEN) {
1678 if (total < cur + sizeof(*ref))
1681 /* Still try to read out the remaining part */
1682 len = min_t(u32, total - cur - sizeof(*ref),
1684 error = REF_ERR_NAME_TOO_LONG;
1690 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691 add_inode_backref(inode_cache, key->objectid, key->offset,
1692 index, namebuf, len, 0, key->type, error);
1694 len = sizeof(*ref) + name_len;
1695 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1701 static int process_inode_extref(struct extent_buffer *eb,
1702 int slot, struct btrfs_key *key,
1703 struct shared_node *active_node)
1712 struct cache_tree *inode_cache;
1713 struct btrfs_inode_extref *extref;
1714 char namebuf[BTRFS_NAME_LEN];
1716 inode_cache = &active_node->inode_cache;
1718 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719 total = btrfs_item_size_nr(eb, slot);
1720 while (cur < total) {
1721 name_len = btrfs_inode_extref_name_len(eb, extref);
1722 index = btrfs_inode_extref_index(eb, extref);
1723 parent = btrfs_inode_extref_parent(eb, extref);
1724 if (name_len <= BTRFS_NAME_LEN) {
1728 len = BTRFS_NAME_LEN;
1729 error = REF_ERR_NAME_TOO_LONG;
1731 read_extent_buffer(eb, namebuf,
1732 (unsigned long)(extref + 1), len);
1733 add_inode_backref(inode_cache, key->objectid, parent,
1734 index, namebuf, len, 0, key->type, error);
1736 len = sizeof(*extref) + name_len;
1737 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745 u64 len, u64 *found)
1747 struct btrfs_key key;
1748 struct btrfs_path path;
1749 struct extent_buffer *leaf;
1754 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756 btrfs_init_path(&path);
1758 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760 key.type = BTRFS_EXTENT_CSUM_KEY;
1762 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1766 if (ret > 0 && path.slots[0] > 0) {
1767 leaf = path.nodes[0];
1768 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770 key.type == BTRFS_EXTENT_CSUM_KEY)
1775 leaf = path.nodes[0];
1776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1782 leaf = path.nodes[0];
1785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787 key.type != BTRFS_EXTENT_CSUM_KEY)
1790 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791 if (key.offset >= start + len)
1794 if (key.offset > start)
1797 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798 csum_end = key.offset + (size / csum_size) *
1799 root->fs_info->sectorsize;
1800 if (csum_end > start) {
1801 size = min(csum_end - start, len);
1810 btrfs_release_path(&path);
1816 static int process_file_extent(struct btrfs_root *root,
1817 struct extent_buffer *eb,
1818 int slot, struct btrfs_key *key,
1819 struct shared_node *active_node)
1821 struct inode_record *rec;
1822 struct btrfs_file_extent_item *fi;
1824 u64 disk_bytenr = 0;
1825 u64 extent_offset = 0;
1826 u64 mask = root->fs_info->sectorsize - 1;
1830 rec = active_node->current;
1831 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832 rec->found_file_extent = 1;
1834 if (rec->extent_start == (u64)-1) {
1835 rec->extent_start = key->offset;
1836 rec->extent_end = key->offset;
1839 if (rec->extent_end > key->offset)
1840 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841 else if (rec->extent_end < key->offset) {
1842 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843 key->offset - rec->extent_end);
1848 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849 extent_type = btrfs_file_extent_type(eb, fi);
1851 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855 rec->found_size += num_bytes;
1856 num_bytes = (num_bytes + mask) & ~mask;
1857 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861 extent_offset = btrfs_file_extent_offset(eb, fi);
1862 if (num_bytes == 0 || (num_bytes & mask))
1863 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864 if (num_bytes + extent_offset >
1865 btrfs_file_extent_ram_bytes(eb, fi))
1866 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868 (btrfs_file_extent_compression(eb, fi) ||
1869 btrfs_file_extent_encryption(eb, fi) ||
1870 btrfs_file_extent_other_encoding(eb, fi)))
1871 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872 if (disk_bytenr > 0)
1873 rec->found_size += num_bytes;
1875 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877 rec->extent_end = key->offset + num_bytes;
1880 * The data reloc tree will copy full extents into its inode and then
1881 * copy the corresponding csums. Because the extent it copied could be
1882 * a preallocated extent that hasn't been written to yet there may be no
1883 * csums to copy, ergo we won't have csums for our file extent. This is
1884 * ok so just don't bother checking csums if the inode belongs to the
1887 if (disk_bytenr > 0 &&
1888 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890 if (btrfs_file_extent_compression(eb, fi))
1891 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893 disk_bytenr += extent_offset;
1895 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1898 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900 rec->found_csum_item = 1;
1901 if (found < num_bytes)
1902 rec->some_csum_missing = 1;
1903 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912 struct walk_control *wc)
1914 struct btrfs_key key;
1918 struct cache_tree *inode_cache;
1919 struct shared_node *active_node;
1921 if (wc->root_level == wc->active_node &&
1922 btrfs_root_refs(&root->root_item) == 0)
1925 active_node = wc->nodes[wc->active_node];
1926 inode_cache = &active_node->inode_cache;
1927 nritems = btrfs_header_nritems(eb);
1928 for (i = 0; i < nritems; i++) {
1929 btrfs_item_key_to_cpu(eb, &key, i);
1931 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1936 if (active_node->current == NULL ||
1937 active_node->current->ino < key.objectid) {
1938 if (active_node->current) {
1939 active_node->current->checked = 1;
1940 maybe_free_inode_rec(inode_cache,
1941 active_node->current);
1943 active_node->current = get_inode_rec(inode_cache,
1945 BUG_ON(IS_ERR(active_node->current));
1948 case BTRFS_DIR_ITEM_KEY:
1949 case BTRFS_DIR_INDEX_KEY:
1950 ret = process_dir_item(eb, i, &key, active_node);
1952 case BTRFS_INODE_REF_KEY:
1953 ret = process_inode_ref(eb, i, &key, active_node);
1955 case BTRFS_INODE_EXTREF_KEY:
1956 ret = process_inode_extref(eb, i, &key, active_node);
1958 case BTRFS_INODE_ITEM_KEY:
1959 ret = process_inode_item(eb, i, &key, active_node);
1961 case BTRFS_EXTENT_DATA_KEY:
1962 ret = process_file_extent(root, eb, i, &key,
1973 u64 bytenr[BTRFS_MAX_LEVEL];
1974 u64 refs[BTRFS_MAX_LEVEL];
1975 int need_check[BTRFS_MAX_LEVEL];
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979 struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981 unsigned int ext_ref);
1984 * Returns >0 Found error, not fatal, should continue
1985 * Returns <0 Fatal error, must exit the whole check
1986 * Returns 0 No errors found
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989 struct node_refs *nrefs, int *level, int ext_ref)
1991 struct extent_buffer *cur = path->nodes[0];
1992 struct btrfs_key key;
1996 int root_level = btrfs_header_level(root->node);
1998 int ret = 0; /* Final return value */
1999 int err = 0; /* Positive error bitmap */
2001 cur_bytenr = cur->start;
2003 /* skip to first inode item or the first inode number change */
2004 nritems = btrfs_header_nritems(cur);
2005 for (i = 0; i < nritems; i++) {
2006 btrfs_item_key_to_cpu(cur, &key, i);
2008 first_ino = key.objectid;
2009 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010 (first_ino && first_ino != key.objectid))
2014 path->slots[0] = nritems;
2020 err |= check_inode_item(root, path, ext_ref);
2022 /* modify cur since check_inode_item may change path */
2023 cur = path->nodes[0];
2025 if (err & LAST_ITEM)
2028 /* still have inode items in thie leaf */
2029 if (cur->start == cur_bytenr)
2033 * we have switched to another leaf, above nodes may
2034 * have changed, here walk down the path, if a node
2035 * or leaf is shared, check whether we can skip this
2038 for (i = root_level; i >= 0; i--) {
2039 if (path->nodes[i]->start == nrefs->bytenr[i])
2042 ret = update_nodes_refs(root,
2043 path->nodes[i]->start,
2048 if (!nrefs->need_check[i]) {
2054 for (i = 0; i < *level; i++) {
2055 free_extent_buffer(path->nodes[i]);
2056 path->nodes[i] = NULL;
2065 static void reada_walk_down(struct btrfs_root *root,
2066 struct extent_buffer *node, int slot)
2068 struct btrfs_fs_info *fs_info = root->fs_info;
2075 level = btrfs_header_level(node);
2079 nritems = btrfs_header_nritems(node);
2080 for (i = slot; i < nritems; i++) {
2081 bytenr = btrfs_node_blockptr(node, i);
2082 ptr_gen = btrfs_node_ptr_generation(node, i);
2083 readahead_tree_block(fs_info, bytenr, ptr_gen);
2088 * Check the child node/leaf by the following condition:
2089 * 1. the first item key of the node/leaf should be the same with the one
2091 * 2. block in parent node should match the child node/leaf.
2092 * 3. generation of parent node and child's header should be consistent.
2094 * Or the child node/leaf pointed by the key in parent is not valid.
2096 * We hope to check leaf owner too, but since subvol may share leaves,
2097 * which makes leaf owner check not so strong, key check should be
2098 * sufficient enough for that case.
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101 struct extent_buffer *child)
2103 struct btrfs_key parent_key;
2104 struct btrfs_key child_key;
2107 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108 if (btrfs_header_level(child) == 0)
2109 btrfs_item_key_to_cpu(child, &child_key, 0);
2111 btrfs_node_key_to_cpu(child, &child_key, 0);
2113 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2116 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117 parent_key.objectid, parent_key.type, parent_key.offset,
2118 child_key.objectid, child_key.type, child_key.offset);
2120 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2122 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123 btrfs_node_blockptr(parent, slot),
2124 btrfs_header_bytenr(child));
2126 if (btrfs_node_ptr_generation(parent, slot) !=
2127 btrfs_header_generation(child)) {
2129 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130 btrfs_header_generation(child),
2131 btrfs_node_ptr_generation(parent, slot));
2137 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138 * in every fs or file tree check. Here we find its all root ids, and only check
2139 * it in the fs or file tree which has the smallest root id.
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2143 struct rb_node *node;
2144 struct ulist_node *u;
2146 if (roots->nnodes == 1)
2149 node = rb_first(&roots->root);
2150 u = rb_entry(node, struct ulist_node, rb_node);
2152 * current root id is not smallest, we skip it and let it be checked
2153 * in the fs or file tree who hash the smallest root id.
2155 if (root->objectid != u->val)
2162 * for a tree node or leaf, we record its reference count, so later if we still
2163 * process this node or leaf, don't need to compute its reference count again.
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166 struct node_refs *nrefs, u64 level)
2170 struct ulist *roots;
2172 if (nrefs->bytenr[level] != bytenr) {
2173 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174 level, 1, &refs, NULL);
2178 nrefs->bytenr[level] = bytenr;
2179 nrefs->refs[level] = refs;
2181 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2186 check = need_check(root, roots);
2188 nrefs->need_check[level] = check;
2190 nrefs->need_check[level] = 1;
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198 struct walk_control *wc, int *level,
2199 struct node_refs *nrefs)
2201 enum btrfs_tree_block_status status;
2204 struct btrfs_fs_info *fs_info = root->fs_info;
2205 struct extent_buffer *next;
2206 struct extent_buffer *cur;
2210 WARN_ON(*level < 0);
2211 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214 refs = nrefs->refs[*level];
2217 ret = btrfs_lookup_extent_info(NULL, root,
2218 path->nodes[*level]->start,
2219 *level, 1, &refs, NULL);
2224 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225 nrefs->refs[*level] = refs;
2229 ret = enter_shared_node(root, path->nodes[*level]->start,
2237 while (*level >= 0) {
2238 WARN_ON(*level < 0);
2239 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240 cur = path->nodes[*level];
2242 if (btrfs_header_level(cur) != *level)
2245 if (path->slots[*level] >= btrfs_header_nritems(cur))
2248 ret = process_one_leaf(root, cur, wc);
2253 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2256 if (bytenr == nrefs->bytenr[*level - 1]) {
2257 refs = nrefs->refs[*level - 1];
2259 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260 *level - 1, 1, &refs, NULL);
2264 nrefs->bytenr[*level - 1] = bytenr;
2265 nrefs->refs[*level - 1] = refs;
2270 ret = enter_shared_node(root, bytenr, refs,
2273 path->slots[*level]++;
2278 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280 free_extent_buffer(next);
2281 reada_walk_down(root, cur, path->slots[*level]);
2282 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283 if (!extent_buffer_uptodate(next)) {
2284 struct btrfs_key node_key;
2286 btrfs_node_key_to_cpu(path->nodes[*level],
2288 path->slots[*level]);
2289 btrfs_add_corrupt_extent_record(root->fs_info,
2291 path->nodes[*level]->start,
2292 root->fs_info->nodesize,
2299 ret = check_child_node(cur, path->slots[*level], next);
2301 free_extent_buffer(next);
2306 if (btrfs_is_leaf(next))
2307 status = btrfs_check_leaf(root, NULL, next);
2309 status = btrfs_check_node(root, NULL, next);
2310 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311 free_extent_buffer(next);
2316 *level = *level - 1;
2317 free_extent_buffer(path->nodes[*level]);
2318 path->nodes[*level] = next;
2319 path->slots[*level] = 0;
2322 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327 unsigned int ext_ref);
2330 * Returns >0 Found error, should continue
2331 * Returns <0 Fatal error, must exit the whole check
2332 * Returns 0 No errors found
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335 int *level, struct node_refs *nrefs, int ext_ref)
2337 enum btrfs_tree_block_status status;
2340 struct btrfs_fs_info *fs_info = root->fs_info;
2341 struct extent_buffer *next;
2342 struct extent_buffer *cur;
2345 WARN_ON(*level < 0);
2346 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2348 ret = update_nodes_refs(root, path->nodes[*level]->start,
2353 while (*level >= 0) {
2354 WARN_ON(*level < 0);
2355 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356 cur = path->nodes[*level];
2358 if (btrfs_header_level(cur) != *level)
2361 if (path->slots[*level] >= btrfs_header_nritems(cur))
2363 /* Don't forgot to check leaf/node validation */
2365 ret = btrfs_check_leaf(root, NULL, cur);
2366 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2370 ret = process_one_leaf_v2(root, path, nrefs,
2372 cur = path->nodes[*level];
2375 ret = btrfs_check_node(root, NULL, cur);
2376 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2381 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2384 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2387 if (!nrefs->need_check[*level - 1]) {
2388 path->slots[*level]++;
2392 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394 free_extent_buffer(next);
2395 reada_walk_down(root, cur, path->slots[*level]);
2396 next = read_tree_block(fs_info, bytenr, ptr_gen);
2397 if (!extent_buffer_uptodate(next)) {
2398 struct btrfs_key node_key;
2400 btrfs_node_key_to_cpu(path->nodes[*level],
2402 path->slots[*level]);
2403 btrfs_add_corrupt_extent_record(fs_info,
2405 path->nodes[*level]->start,
2413 ret = check_child_node(cur, path->slots[*level], next);
2417 if (btrfs_is_leaf(next))
2418 status = btrfs_check_leaf(root, NULL, next);
2420 status = btrfs_check_node(root, NULL, next);
2421 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422 free_extent_buffer(next);
2427 *level = *level - 1;
2428 free_extent_buffer(path->nodes[*level]);
2429 path->nodes[*level] = next;
2430 path->slots[*level] = 0;
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436 struct walk_control *wc, int *level)
2439 struct extent_buffer *leaf;
2441 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442 leaf = path->nodes[i];
2443 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2448 free_extent_buffer(path->nodes[*level]);
2449 path->nodes[*level] = NULL;
2450 BUG_ON(*level > wc->active_node);
2451 if (*level == wc->active_node)
2452 leave_shared_node(root, wc, *level);
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2463 struct extent_buffer *leaf;
2465 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466 leaf = path->nodes[i];
2467 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2472 free_extent_buffer(path->nodes[*level]);
2473 path->nodes[*level] = NULL;
2480 static int check_root_dir(struct inode_record *rec)
2482 struct inode_backref *backref;
2485 if (!rec->found_inode_item || rec->errors)
2487 if (rec->nlink != 1 || rec->found_link != 0)
2489 if (list_empty(&rec->backrefs))
2491 backref = to_inode_backref(rec->backrefs.next);
2492 if (!backref->found_inode_ref)
2494 if (backref->index != 0 || backref->namelen != 2 ||
2495 memcmp(backref->name, "..", 2))
2497 if (backref->found_dir_index || backref->found_dir_item)
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505 struct btrfs_root *root, struct btrfs_path *path,
2506 struct inode_record *rec)
2508 struct btrfs_inode_item *ei;
2509 struct btrfs_key key;
2512 key.objectid = rec->ino;
2513 key.type = BTRFS_INODE_ITEM_KEY;
2514 key.offset = (u64)-1;
2516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (!path->slots[0]) {
2527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528 if (key.objectid != rec->ino) {
2533 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534 struct btrfs_inode_item);
2535 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536 btrfs_mark_buffer_dirty(path->nodes[0]);
2537 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539 root->root_key.objectid);
2541 btrfs_release_path(path);
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546 struct btrfs_root *root,
2547 struct btrfs_path *path,
2548 struct inode_record *rec)
2552 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553 btrfs_release_path(path);
2555 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560 struct btrfs_root *root,
2561 struct btrfs_path *path,
2562 struct inode_record *rec)
2564 struct btrfs_inode_item *ei;
2565 struct btrfs_key key;
2568 key.objectid = rec->ino;
2569 key.type = BTRFS_INODE_ITEM_KEY;
2572 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2579 /* Since ret == 0, no need to check anything */
2580 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581 struct btrfs_inode_item);
2582 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583 btrfs_mark_buffer_dirty(path->nodes[0]);
2584 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585 printf("reset nbytes for ino %llu root %llu\n",
2586 rec->ino, root->root_key.objectid);
2588 btrfs_release_path(path);
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593 struct cache_tree *inode_cache,
2594 struct inode_record *rec,
2595 struct inode_backref *backref)
2597 struct btrfs_path path;
2598 struct btrfs_trans_handle *trans;
2599 struct btrfs_dir_item *dir_item;
2600 struct extent_buffer *leaf;
2601 struct btrfs_key key;
2602 struct btrfs_disk_key disk_key;
2603 struct inode_record *dir_rec;
2604 unsigned long name_ptr;
2605 u32 data_size = sizeof(*dir_item) + backref->namelen;
2608 trans = btrfs_start_transaction(root, 1);
2610 return PTR_ERR(trans);
2612 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613 (unsigned long long)rec->ino);
2615 btrfs_init_path(&path);
2616 key.objectid = backref->dir;
2617 key.type = BTRFS_DIR_INDEX_KEY;
2618 key.offset = backref->index;
2619 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2622 leaf = path.nodes[0];
2623 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625 disk_key.objectid = cpu_to_le64(rec->ino);
2626 disk_key.type = BTRFS_INODE_ITEM_KEY;
2627 disk_key.offset = 0;
2629 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631 btrfs_set_dir_data_len(leaf, dir_item, 0);
2632 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633 name_ptr = (unsigned long)(dir_item + 1);
2634 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635 btrfs_mark_buffer_dirty(leaf);
2636 btrfs_release_path(&path);
2637 btrfs_commit_transaction(trans, root);
2639 backref->found_dir_index = 1;
2640 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641 BUG_ON(IS_ERR(dir_rec));
2644 dir_rec->found_size += backref->namelen;
2645 if (dir_rec->found_size == dir_rec->isize &&
2646 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648 if (dir_rec->found_size != dir_rec->isize)
2649 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2654 static int delete_dir_index(struct btrfs_root *root,
2655 struct inode_backref *backref)
2657 struct btrfs_trans_handle *trans;
2658 struct btrfs_dir_item *di;
2659 struct btrfs_path path;
2662 trans = btrfs_start_transaction(root, 1);
2664 return PTR_ERR(trans);
2666 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667 (unsigned long long)backref->dir,
2668 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669 (unsigned long long)root->objectid);
2671 btrfs_init_path(&path);
2672 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673 backref->name, backref->namelen,
2674 backref->index, -1);
2677 btrfs_release_path(&path);
2678 btrfs_commit_transaction(trans, root);
2685 ret = btrfs_del_item(trans, root, &path);
2687 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689 btrfs_release_path(&path);
2690 btrfs_commit_transaction(trans, root);
2694 static int create_inode_item(struct btrfs_root *root,
2695 struct inode_record *rec,
2698 struct btrfs_trans_handle *trans;
2699 struct btrfs_inode_item inode_item;
2700 time_t now = time(NULL);
2703 trans = btrfs_start_transaction(root, 1);
2704 if (IS_ERR(trans)) {
2705 ret = PTR_ERR(trans);
2709 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2710 "be incomplete, please check permissions and content after "
2711 "the fsck completes.\n", (unsigned long long)root->objectid,
2712 (unsigned long long)rec->ino);
2714 memset(&inode_item, 0, sizeof(inode_item));
2715 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2717 btrfs_set_stack_inode_nlink(&inode_item, 1);
2719 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2720 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2721 if (rec->found_dir_item) {
2722 if (rec->found_file_extent)
2723 fprintf(stderr, "root %llu inode %llu has both a dir "
2724 "item and extents, unsure if it is a dir or a "
2725 "regular file so setting it as a directory\n",
2726 (unsigned long long)root->objectid,
2727 (unsigned long long)rec->ino);
2728 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2729 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2730 } else if (!rec->found_dir_item) {
2731 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2732 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2734 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2735 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2736 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2737 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2738 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2739 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2740 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2741 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2743 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2745 btrfs_commit_transaction(trans, root);
2749 static int repair_inode_backrefs(struct btrfs_root *root,
2750 struct inode_record *rec,
2751 struct cache_tree *inode_cache,
2754 struct inode_backref *tmp, *backref;
2755 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2759 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2760 if (!delete && rec->ino == root_dirid) {
2761 if (!rec->found_inode_item) {
2762 ret = create_inode_item(root, rec, 1);
2769 /* Index 0 for root dir's are special, don't mess with it */
2770 if (rec->ino == root_dirid && backref->index == 0)
2774 ((backref->found_dir_index && !backref->found_inode_ref) ||
2775 (backref->found_dir_index && backref->found_inode_ref &&
2776 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2777 ret = delete_dir_index(root, backref);
2781 list_del(&backref->list);
2786 if (!delete && !backref->found_dir_index &&
2787 backref->found_dir_item && backref->found_inode_ref) {
2788 ret = add_missing_dir_index(root, inode_cache, rec,
2793 if (backref->found_dir_item &&
2794 backref->found_dir_index) {
2795 if (!backref->errors &&
2796 backref->found_inode_ref) {
2797 list_del(&backref->list);
2804 if (!delete && (!backref->found_dir_index &&
2805 !backref->found_dir_item &&
2806 backref->found_inode_ref)) {
2807 struct btrfs_trans_handle *trans;
2808 struct btrfs_key location;
2810 ret = check_dir_conflict(root, backref->name,
2816 * let nlink fixing routine to handle it,
2817 * which can do it better.
2822 location.objectid = rec->ino;
2823 location.type = BTRFS_INODE_ITEM_KEY;
2824 location.offset = 0;
2826 trans = btrfs_start_transaction(root, 1);
2827 if (IS_ERR(trans)) {
2828 ret = PTR_ERR(trans);
2831 fprintf(stderr, "adding missing dir index/item pair "
2833 (unsigned long long)rec->ino);
2834 ret = btrfs_insert_dir_item(trans, root, backref->name,
2836 backref->dir, &location,
2837 imode_to_type(rec->imode),
2840 btrfs_commit_transaction(trans, root);
2844 if (!delete && (backref->found_inode_ref &&
2845 backref->found_dir_index &&
2846 backref->found_dir_item &&
2847 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2848 !rec->found_inode_item)) {
2849 ret = create_inode_item(root, rec, 0);
2856 return ret ? ret : repaired;
2860 * To determine the file type for nlink/inode_item repair
2862 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2863 * Return -ENOENT if file type is not found.
2865 static int find_file_type(struct inode_record *rec, u8 *type)
2867 struct inode_backref *backref;
2869 /* For inode item recovered case */
2870 if (rec->found_inode_item) {
2871 *type = imode_to_type(rec->imode);
2875 list_for_each_entry(backref, &rec->backrefs, list) {
2876 if (backref->found_dir_index || backref->found_dir_item) {
2877 *type = backref->filetype;
2885 * To determine the file name for nlink repair
2887 * Return 0 if file name is found, set name and namelen.
2888 * Return -ENOENT if file name is not found.
2890 static int find_file_name(struct inode_record *rec,
2891 char *name, int *namelen)
2893 struct inode_backref *backref;
2895 list_for_each_entry(backref, &rec->backrefs, list) {
2896 if (backref->found_dir_index || backref->found_dir_item ||
2897 backref->found_inode_ref) {
2898 memcpy(name, backref->name, backref->namelen);
2899 *namelen = backref->namelen;
2906 /* Reset the nlink of the inode to the correct one */
2907 static int reset_nlink(struct btrfs_trans_handle *trans,
2908 struct btrfs_root *root,
2909 struct btrfs_path *path,
2910 struct inode_record *rec)
2912 struct inode_backref *backref;
2913 struct inode_backref *tmp;
2914 struct btrfs_key key;
2915 struct btrfs_inode_item *inode_item;
2918 /* We don't believe this either, reset it and iterate backref */
2919 rec->found_link = 0;
2921 /* Remove all backref including the valid ones */
2922 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2923 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2924 backref->index, backref->name,
2925 backref->namelen, 0);
2929 /* remove invalid backref, so it won't be added back */
2930 if (!(backref->found_dir_index &&
2931 backref->found_dir_item &&
2932 backref->found_inode_ref)) {
2933 list_del(&backref->list);
2940 /* Set nlink to 0 */
2941 key.objectid = rec->ino;
2942 key.type = BTRFS_INODE_ITEM_KEY;
2944 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2951 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2952 struct btrfs_inode_item);
2953 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2954 btrfs_mark_buffer_dirty(path->nodes[0]);
2955 btrfs_release_path(path);
2958 * Add back valid inode_ref/dir_item/dir_index,
2959 * add_link() will handle the nlink inc, so new nlink must be correct
2961 list_for_each_entry(backref, &rec->backrefs, list) {
2962 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2963 backref->name, backref->namelen,
2964 backref->filetype, &backref->index, 1);
2969 btrfs_release_path(path);
2973 static int get_highest_inode(struct btrfs_trans_handle *trans,
2974 struct btrfs_root *root,
2975 struct btrfs_path *path,
2978 struct btrfs_key key, found_key;
2981 btrfs_init_path(path);
2982 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2984 key.type = BTRFS_INODE_ITEM_KEY;
2985 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2987 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2988 path->slots[0] - 1);
2989 *highest_ino = found_key.objectid;
2992 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2994 btrfs_release_path(path);
2998 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2999 struct btrfs_root *root,
3000 struct btrfs_path *path,
3001 struct inode_record *rec)
3003 char *dir_name = "lost+found";
3004 char namebuf[BTRFS_NAME_LEN] = {0};
3009 int name_recovered = 0;
3010 int type_recovered = 0;
3014 * Get file name and type first before these invalid inode ref
3015 * are deleted by remove_all_invalid_backref()
3017 name_recovered = !find_file_name(rec, namebuf, &namelen);
3018 type_recovered = !find_file_type(rec, &type);
3020 if (!name_recovered) {
3021 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3022 rec->ino, rec->ino);
3023 namelen = count_digits(rec->ino);
3024 sprintf(namebuf, "%llu", rec->ino);
3027 if (!type_recovered) {
3028 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3030 type = BTRFS_FT_REG_FILE;
3034 ret = reset_nlink(trans, root, path, rec);
3037 "Failed to reset nlink for inode %llu: %s\n",
3038 rec->ino, strerror(-ret));
3042 if (rec->found_link == 0) {
3043 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3047 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3048 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3051 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3052 dir_name, strerror(-ret));
3055 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3056 namebuf, namelen, type, NULL, 1);
3058 * Add ".INO" suffix several times to handle case where
3059 * "FILENAME.INO" is already taken by another file.
3061 while (ret == -EEXIST) {
3063 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3065 if (namelen + count_digits(rec->ino) + 1 >
3070 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3072 namelen += count_digits(rec->ino) + 1;
3073 ret = btrfs_add_link(trans, root, rec->ino,
3074 lost_found_ino, namebuf,
3075 namelen, type, NULL, 1);
3079 "Failed to link the inode %llu to %s dir: %s\n",
3080 rec->ino, dir_name, strerror(-ret));
3084 * Just increase the found_link, don't actually add the
3085 * backref. This will make things easier and this inode
3086 * record will be freed after the repair is done.
3087 * So fsck will not report problem about this inode.
3090 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3091 namelen, namebuf, dir_name);
3093 printf("Fixed the nlink of inode %llu\n", rec->ino);
3096 * Clear the flag anyway, or we will loop forever for the same inode
3097 * as it will not be removed from the bad inode list and the dead loop
3100 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3101 btrfs_release_path(path);
3106 * Check if there is any normal(reg or prealloc) file extent for given
3108 * This is used to determine the file type when neither its dir_index/item or
3109 * inode_item exists.
3111 * This will *NOT* report error, if any error happens, just consider it does
3112 * not have any normal file extent.
3114 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3116 struct btrfs_path path;
3117 struct btrfs_key key;
3118 struct btrfs_key found_key;
3119 struct btrfs_file_extent_item *fi;
3123 btrfs_init_path(&path);
3125 key.type = BTRFS_EXTENT_DATA_KEY;
3128 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3133 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3134 ret = btrfs_next_leaf(root, &path);
3141 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3143 if (found_key.objectid != ino ||
3144 found_key.type != BTRFS_EXTENT_DATA_KEY)
3146 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3147 struct btrfs_file_extent_item);
3148 type = btrfs_file_extent_type(path.nodes[0], fi);
3149 if (type != BTRFS_FILE_EXTENT_INLINE) {
3155 btrfs_release_path(&path);
3159 static u32 btrfs_type_to_imode(u8 type)
3161 static u32 imode_by_btrfs_type[] = {
3162 [BTRFS_FT_REG_FILE] = S_IFREG,
3163 [BTRFS_FT_DIR] = S_IFDIR,
3164 [BTRFS_FT_CHRDEV] = S_IFCHR,
3165 [BTRFS_FT_BLKDEV] = S_IFBLK,
3166 [BTRFS_FT_FIFO] = S_IFIFO,
3167 [BTRFS_FT_SOCK] = S_IFSOCK,
3168 [BTRFS_FT_SYMLINK] = S_IFLNK,
3171 return imode_by_btrfs_type[(type)];
3174 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3175 struct btrfs_root *root,
3176 struct btrfs_path *path,
3177 struct inode_record *rec)
3181 int type_recovered = 0;
3184 printf("Trying to rebuild inode:%llu\n", rec->ino);
3186 type_recovered = !find_file_type(rec, &filetype);
3189 * Try to determine inode type if type not found.
3191 * For found regular file extent, it must be FILE.
3192 * For found dir_item/index, it must be DIR.
3194 * For undetermined one, use FILE as fallback.
3197 * 1. If found backref(inode_index/item is already handled) to it,
3199 * Need new inode-inode ref structure to allow search for that.
3201 if (!type_recovered) {
3202 if (rec->found_file_extent &&
3203 find_normal_file_extent(root, rec->ino)) {
3205 filetype = BTRFS_FT_REG_FILE;
3206 } else if (rec->found_dir_item) {
3208 filetype = BTRFS_FT_DIR;
3209 } else if (!list_empty(&rec->orphan_extents)) {
3211 filetype = BTRFS_FT_REG_FILE;
3213 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3216 filetype = BTRFS_FT_REG_FILE;
3220 ret = btrfs_new_inode(trans, root, rec->ino,
3221 mode | btrfs_type_to_imode(filetype));
3226 * Here inode rebuild is done, we only rebuild the inode item,
3227 * don't repair the nlink(like move to lost+found).
3228 * That is the job of nlink repair.
3230 * We just fill the record and return
3232 rec->found_dir_item = 1;
3233 rec->imode = mode | btrfs_type_to_imode(filetype);
3235 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3236 /* Ensure the inode_nlinks repair function will be called */
3237 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3242 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3243 struct btrfs_root *root,
3244 struct btrfs_path *path,
3245 struct inode_record *rec)
3247 struct orphan_data_extent *orphan;
3248 struct orphan_data_extent *tmp;
3251 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3253 * Check for conflicting file extents
3255 * Here we don't know whether the extents is compressed or not,
3256 * so we can only assume it not compressed nor data offset,
3257 * and use its disk_len as extent length.
3259 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3260 orphan->offset, orphan->disk_len, 0);
3261 btrfs_release_path(path);
3266 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3267 orphan->disk_bytenr, orphan->disk_len);
3268 ret = btrfs_free_extent(trans,
3269 root->fs_info->extent_root,
3270 orphan->disk_bytenr, orphan->disk_len,
3271 0, root->objectid, orphan->objectid,
3276 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3277 orphan->offset, orphan->disk_bytenr,
3278 orphan->disk_len, orphan->disk_len);
3282 /* Update file size info */
3283 rec->found_size += orphan->disk_len;
3284 if (rec->found_size == rec->nbytes)
3285 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3287 /* Update the file extent hole info too */
3288 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3292 if (RB_EMPTY_ROOT(&rec->holes))
3293 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3295 list_del(&orphan->list);
3298 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3303 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3304 struct btrfs_root *root,
3305 struct btrfs_path *path,
3306 struct inode_record *rec)
3308 struct rb_node *node;
3309 struct file_extent_hole *hole;
3313 node = rb_first(&rec->holes);
3317 hole = rb_entry(node, struct file_extent_hole, node);
3318 ret = btrfs_punch_hole(trans, root, rec->ino,
3319 hole->start, hole->len);
3322 ret = del_file_extent_hole(&rec->holes, hole->start,
3326 if (RB_EMPTY_ROOT(&rec->holes))
3327 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3328 node = rb_first(&rec->holes);
3330 /* special case for a file losing all its file extent */
3332 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3333 round_up(rec->isize,
3334 root->fs_info->sectorsize));
3338 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3339 rec->ino, root->objectid);
3344 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3346 struct btrfs_trans_handle *trans;
3347 struct btrfs_path path;
3350 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3351 I_ERR_NO_ORPHAN_ITEM |
3352 I_ERR_LINK_COUNT_WRONG |
3353 I_ERR_NO_INODE_ITEM |
3354 I_ERR_FILE_EXTENT_ORPHAN |
3355 I_ERR_FILE_EXTENT_DISCOUNT|
3356 I_ERR_FILE_NBYTES_WRONG)))
3360 * For nlink repair, it may create a dir and add link, so
3361 * 2 for parent(256)'s dir_index and dir_item
3362 * 2 for lost+found dir's inode_item and inode_ref
3363 * 1 for the new inode_ref of the file
3364 * 2 for lost+found dir's dir_index and dir_item for the file
3366 trans = btrfs_start_transaction(root, 7);
3368 return PTR_ERR(trans);
3370 btrfs_init_path(&path);
3371 if (rec->errors & I_ERR_NO_INODE_ITEM)
3372 ret = repair_inode_no_item(trans, root, &path, rec);
3373 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3374 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3375 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3376 ret = repair_inode_discount_extent(trans, root, &path, rec);
3377 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3378 ret = repair_inode_isize(trans, root, &path, rec);
3379 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3380 ret = repair_inode_orphan_item(trans, root, &path, rec);
3381 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3382 ret = repair_inode_nlinks(trans, root, &path, rec);
3383 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3384 ret = repair_inode_nbytes(trans, root, &path, rec);
3385 btrfs_commit_transaction(trans, root);
3386 btrfs_release_path(&path);
3390 static int check_inode_recs(struct btrfs_root *root,
3391 struct cache_tree *inode_cache)
3393 struct cache_extent *cache;
3394 struct ptr_node *node;
3395 struct inode_record *rec;
3396 struct inode_backref *backref;
3401 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3403 if (btrfs_root_refs(&root->root_item) == 0) {
3404 if (!cache_tree_empty(inode_cache))
3405 fprintf(stderr, "warning line %d\n", __LINE__);
3410 * We need to repair backrefs first because we could change some of the
3411 * errors in the inode recs.
3413 * We also need to go through and delete invalid backrefs first and then
3414 * add the correct ones second. We do this because we may get EEXIST
3415 * when adding back the correct index because we hadn't yet deleted the
3418 * For example, if we were missing a dir index then the directories
3419 * isize would be wrong, so if we fixed the isize to what we thought it
3420 * would be and then fixed the backref we'd still have a invalid fs, so
3421 * we need to add back the dir index and then check to see if the isize
3426 if (stage == 3 && !err)
3429 cache = search_cache_extent(inode_cache, 0);
3430 while (repair && cache) {
3431 node = container_of(cache, struct ptr_node, cache);
3433 cache = next_cache_extent(cache);
3435 /* Need to free everything up and rescan */
3437 remove_cache_extent(inode_cache, &node->cache);
3439 free_inode_rec(rec);
3443 if (list_empty(&rec->backrefs))
3446 ret = repair_inode_backrefs(root, rec, inode_cache,
3460 rec = get_inode_rec(inode_cache, root_dirid, 0);
3461 BUG_ON(IS_ERR(rec));
3463 ret = check_root_dir(rec);
3465 fprintf(stderr, "root %llu root dir %llu error\n",
3466 (unsigned long long)root->root_key.objectid,
3467 (unsigned long long)root_dirid);
3468 print_inode_error(root, rec);
3473 struct btrfs_trans_handle *trans;
3475 trans = btrfs_start_transaction(root, 1);
3476 if (IS_ERR(trans)) {
3477 err = PTR_ERR(trans);
3482 "root %llu missing its root dir, recreating\n",
3483 (unsigned long long)root->objectid);
3485 ret = btrfs_make_root_dir(trans, root, root_dirid);
3488 btrfs_commit_transaction(trans, root);
3492 fprintf(stderr, "root %llu root dir %llu not found\n",
3493 (unsigned long long)root->root_key.objectid,
3494 (unsigned long long)root_dirid);
3498 cache = search_cache_extent(inode_cache, 0);
3501 node = container_of(cache, struct ptr_node, cache);
3503 remove_cache_extent(inode_cache, &node->cache);
3505 if (rec->ino == root_dirid ||
3506 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3507 free_inode_rec(rec);
3511 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3512 ret = check_orphan_item(root, rec->ino);
3514 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3515 if (can_free_inode_rec(rec)) {
3516 free_inode_rec(rec);
3521 if (!rec->found_inode_item)
3522 rec->errors |= I_ERR_NO_INODE_ITEM;
3523 if (rec->found_link != rec->nlink)
3524 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3526 ret = try_repair_inode(root, rec);
3527 if (ret == 0 && can_free_inode_rec(rec)) {
3528 free_inode_rec(rec);
3534 if (!(repair && ret == 0))
3536 print_inode_error(root, rec);
3537 list_for_each_entry(backref, &rec->backrefs, list) {
3538 if (!backref->found_dir_item)
3539 backref->errors |= REF_ERR_NO_DIR_ITEM;
3540 if (!backref->found_dir_index)
3541 backref->errors |= REF_ERR_NO_DIR_INDEX;
3542 if (!backref->found_inode_ref)
3543 backref->errors |= REF_ERR_NO_INODE_REF;
3544 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3545 " namelen %u name %s filetype %d errors %x",
3546 (unsigned long long)backref->dir,
3547 (unsigned long long)backref->index,
3548 backref->namelen, backref->name,
3549 backref->filetype, backref->errors);
3550 print_ref_error(backref->errors);
3552 free_inode_rec(rec);
3554 return (error > 0) ? -1 : 0;
3557 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3560 struct cache_extent *cache;
3561 struct root_record *rec = NULL;
3564 cache = lookup_cache_extent(root_cache, objectid, 1);
3566 rec = container_of(cache, struct root_record, cache);
3568 rec = calloc(1, sizeof(*rec));
3570 return ERR_PTR(-ENOMEM);
3571 rec->objectid = objectid;
3572 INIT_LIST_HEAD(&rec->backrefs);
3573 rec->cache.start = objectid;
3574 rec->cache.size = 1;
3576 ret = insert_cache_extent(root_cache, &rec->cache);
3578 return ERR_PTR(-EEXIST);
3583 static struct root_backref *get_root_backref(struct root_record *rec,
3584 u64 ref_root, u64 dir, u64 index,
3585 const char *name, int namelen)
3587 struct root_backref *backref;
3589 list_for_each_entry(backref, &rec->backrefs, list) {
3590 if (backref->ref_root != ref_root || backref->dir != dir ||
3591 backref->namelen != namelen)
3593 if (memcmp(name, backref->name, namelen))
3598 backref = calloc(1, sizeof(*backref) + namelen + 1);
3601 backref->ref_root = ref_root;
3603 backref->index = index;
3604 backref->namelen = namelen;
3605 memcpy(backref->name, name, namelen);
3606 backref->name[namelen] = '\0';
3607 list_add_tail(&backref->list, &rec->backrefs);
3611 static void free_root_record(struct cache_extent *cache)
3613 struct root_record *rec;
3614 struct root_backref *backref;
3616 rec = container_of(cache, struct root_record, cache);
3617 while (!list_empty(&rec->backrefs)) {
3618 backref = to_root_backref(rec->backrefs.next);
3619 list_del(&backref->list);
3626 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3628 static int add_root_backref(struct cache_tree *root_cache,
3629 u64 root_id, u64 ref_root, u64 dir, u64 index,
3630 const char *name, int namelen,
3631 int item_type, int errors)
3633 struct root_record *rec;
3634 struct root_backref *backref;
3636 rec = get_root_rec(root_cache, root_id);
3637 BUG_ON(IS_ERR(rec));
3638 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3641 backref->errors |= errors;
3643 if (item_type != BTRFS_DIR_ITEM_KEY) {
3644 if (backref->found_dir_index || backref->found_back_ref ||
3645 backref->found_forward_ref) {
3646 if (backref->index != index)
3647 backref->errors |= REF_ERR_INDEX_UNMATCH;
3649 backref->index = index;
3653 if (item_type == BTRFS_DIR_ITEM_KEY) {
3654 if (backref->found_forward_ref)
3656 backref->found_dir_item = 1;
3657 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3658 backref->found_dir_index = 1;
3659 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3660 if (backref->found_forward_ref)
3661 backref->errors |= REF_ERR_DUP_ROOT_REF;
3662 else if (backref->found_dir_item)
3664 backref->found_forward_ref = 1;
3665 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3666 if (backref->found_back_ref)
3667 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3668 backref->found_back_ref = 1;
3673 if (backref->found_forward_ref && backref->found_dir_item)
3674 backref->reachable = 1;
3678 static int merge_root_recs(struct btrfs_root *root,
3679 struct cache_tree *src_cache,
3680 struct cache_tree *dst_cache)
3682 struct cache_extent *cache;
3683 struct ptr_node *node;
3684 struct inode_record *rec;
3685 struct inode_backref *backref;
3688 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3689 free_inode_recs_tree(src_cache);
3694 cache = search_cache_extent(src_cache, 0);
3697 node = container_of(cache, struct ptr_node, cache);
3699 remove_cache_extent(src_cache, &node->cache);
3702 ret = is_child_root(root, root->objectid, rec->ino);
3708 list_for_each_entry(backref, &rec->backrefs, list) {
3709 BUG_ON(backref->found_inode_ref);
3710 if (backref->found_dir_item)
3711 add_root_backref(dst_cache, rec->ino,
3712 root->root_key.objectid, backref->dir,
3713 backref->index, backref->name,
3714 backref->namelen, BTRFS_DIR_ITEM_KEY,
3716 if (backref->found_dir_index)
3717 add_root_backref(dst_cache, rec->ino,
3718 root->root_key.objectid, backref->dir,
3719 backref->index, backref->name,
3720 backref->namelen, BTRFS_DIR_INDEX_KEY,
3724 free_inode_rec(rec);
3731 static int check_root_refs(struct btrfs_root *root,
3732 struct cache_tree *root_cache)
3734 struct root_record *rec;
3735 struct root_record *ref_root;
3736 struct root_backref *backref;
3737 struct cache_extent *cache;
3743 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3744 BUG_ON(IS_ERR(rec));
3747 /* fixme: this can not detect circular references */
3750 cache = search_cache_extent(root_cache, 0);
3754 rec = container_of(cache, struct root_record, cache);
3755 cache = next_cache_extent(cache);
3757 if (rec->found_ref == 0)
3760 list_for_each_entry(backref, &rec->backrefs, list) {
3761 if (!backref->reachable)
3764 ref_root = get_root_rec(root_cache,
3766 BUG_ON(IS_ERR(ref_root));
3767 if (ref_root->found_ref > 0)
3770 backref->reachable = 0;
3772 if (rec->found_ref == 0)
3778 cache = search_cache_extent(root_cache, 0);
3782 rec = container_of(cache, struct root_record, cache);
3783 cache = next_cache_extent(cache);
3785 if (rec->found_ref == 0 &&
3786 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3787 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3788 ret = check_orphan_item(root->fs_info->tree_root,
3794 * If we don't have a root item then we likely just have
3795 * a dir item in a snapshot for this root but no actual
3796 * ref key or anything so it's meaningless.
3798 if (!rec->found_root_item)
3801 fprintf(stderr, "fs tree %llu not referenced\n",
3802 (unsigned long long)rec->objectid);
3806 if (rec->found_ref > 0 && !rec->found_root_item)
3808 list_for_each_entry(backref, &rec->backrefs, list) {
3809 if (!backref->found_dir_item)
3810 backref->errors |= REF_ERR_NO_DIR_ITEM;
3811 if (!backref->found_dir_index)
3812 backref->errors |= REF_ERR_NO_DIR_INDEX;
3813 if (!backref->found_back_ref)
3814 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3815 if (!backref->found_forward_ref)
3816 backref->errors |= REF_ERR_NO_ROOT_REF;
3817 if (backref->reachable && backref->errors)
3824 fprintf(stderr, "fs tree %llu refs %u %s\n",
3825 (unsigned long long)rec->objectid, rec->found_ref,
3826 rec->found_root_item ? "" : "not found");
3828 list_for_each_entry(backref, &rec->backrefs, list) {
3829 if (!backref->reachable)
3831 if (!backref->errors && rec->found_root_item)
3833 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3834 " index %llu namelen %u name %s errors %x\n",
3835 (unsigned long long)backref->ref_root,
3836 (unsigned long long)backref->dir,
3837 (unsigned long long)backref->index,
3838 backref->namelen, backref->name,
3840 print_ref_error(backref->errors);
3843 return errors > 0 ? 1 : 0;
3846 static int process_root_ref(struct extent_buffer *eb, int slot,
3847 struct btrfs_key *key,
3848 struct cache_tree *root_cache)
3854 struct btrfs_root_ref *ref;
3855 char namebuf[BTRFS_NAME_LEN];
3858 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3860 dirid = btrfs_root_ref_dirid(eb, ref);
3861 index = btrfs_root_ref_sequence(eb, ref);
3862 name_len = btrfs_root_ref_name_len(eb, ref);
3864 if (name_len <= BTRFS_NAME_LEN) {
3868 len = BTRFS_NAME_LEN;
3869 error = REF_ERR_NAME_TOO_LONG;
3871 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3873 if (key->type == BTRFS_ROOT_REF_KEY) {
3874 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3875 index, namebuf, len, key->type, error);
3877 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3878 index, namebuf, len, key->type, error);
3883 static void free_corrupt_block(struct cache_extent *cache)
3885 struct btrfs_corrupt_block *corrupt;
3887 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3891 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3894 * Repair the btree of the given root.
3896 * The fix is to remove the node key in corrupt_blocks cache_tree.
3897 * and rebalance the tree.
3898 * After the fix, the btree should be writeable.
3900 static int repair_btree(struct btrfs_root *root,
3901 struct cache_tree *corrupt_blocks)
3903 struct btrfs_trans_handle *trans;
3904 struct btrfs_path path;
3905 struct btrfs_corrupt_block *corrupt;
3906 struct cache_extent *cache;
3907 struct btrfs_key key;
3912 if (cache_tree_empty(corrupt_blocks))
3915 trans = btrfs_start_transaction(root, 1);
3916 if (IS_ERR(trans)) {
3917 ret = PTR_ERR(trans);
3918 fprintf(stderr, "Error starting transaction: %s\n",
3922 btrfs_init_path(&path);
3923 cache = first_cache_extent(corrupt_blocks);
3925 corrupt = container_of(cache, struct btrfs_corrupt_block,
3927 level = corrupt->level;
3928 path.lowest_level = level;
3929 key.objectid = corrupt->key.objectid;
3930 key.type = corrupt->key.type;
3931 key.offset = corrupt->key.offset;
3934 * Here we don't want to do any tree balance, since it may
3935 * cause a balance with corrupted brother leaf/node,
3936 * so ins_len set to 0 here.
3937 * Balance will be done after all corrupt node/leaf is deleted.
3939 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3942 offset = btrfs_node_blockptr(path.nodes[level],
3945 /* Remove the ptr */
3946 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3950 * Remove the corresponding extent
3951 * return value is not concerned.
3953 btrfs_release_path(&path);
3954 ret = btrfs_free_extent(trans, root, offset,
3955 root->fs_info->nodesize, 0,
3956 root->root_key.objectid, level - 1, 0);
3957 cache = next_cache_extent(cache);
3960 /* Balance the btree using btrfs_search_slot() */
3961 cache = first_cache_extent(corrupt_blocks);
3963 corrupt = container_of(cache, struct btrfs_corrupt_block,
3965 memcpy(&key, &corrupt->key, sizeof(key));
3966 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3969 /* return will always >0 since it won't find the item */
3971 btrfs_release_path(&path);
3972 cache = next_cache_extent(cache);
3975 btrfs_commit_transaction(trans, root);
3976 btrfs_release_path(&path);
3980 static int check_fs_root(struct btrfs_root *root,
3981 struct cache_tree *root_cache,
3982 struct walk_control *wc)
3988 struct btrfs_path path;
3989 struct shared_node root_node;
3990 struct root_record *rec;
3991 struct btrfs_root_item *root_item = &root->root_item;
3992 struct cache_tree corrupt_blocks;
3993 struct orphan_data_extent *orphan;
3994 struct orphan_data_extent *tmp;
3995 enum btrfs_tree_block_status status;
3996 struct node_refs nrefs;
3999 * Reuse the corrupt_block cache tree to record corrupted tree block
4001 * Unlike the usage in extent tree check, here we do it in a per
4002 * fs/subvol tree base.
4004 cache_tree_init(&corrupt_blocks);
4005 root->fs_info->corrupt_blocks = &corrupt_blocks;
4007 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4008 rec = get_root_rec(root_cache, root->root_key.objectid);
4009 BUG_ON(IS_ERR(rec));
4010 if (btrfs_root_refs(root_item) > 0)
4011 rec->found_root_item = 1;
4014 btrfs_init_path(&path);
4015 memset(&root_node, 0, sizeof(root_node));
4016 cache_tree_init(&root_node.root_cache);
4017 cache_tree_init(&root_node.inode_cache);
4018 memset(&nrefs, 0, sizeof(nrefs));
4020 /* Move the orphan extent record to corresponding inode_record */
4021 list_for_each_entry_safe(orphan, tmp,
4022 &root->orphan_data_extents, list) {
4023 struct inode_record *inode;
4025 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4027 BUG_ON(IS_ERR(inode));
4028 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4029 list_move(&orphan->list, &inode->orphan_extents);
4032 level = btrfs_header_level(root->node);
4033 memset(wc->nodes, 0, sizeof(wc->nodes));
4034 wc->nodes[level] = &root_node;
4035 wc->active_node = level;
4036 wc->root_level = level;
4038 /* We may not have checked the root block, lets do that now */
4039 if (btrfs_is_leaf(root->node))
4040 status = btrfs_check_leaf(root, NULL, root->node);
4042 status = btrfs_check_node(root, NULL, root->node);
4043 if (status != BTRFS_TREE_BLOCK_CLEAN)
4046 if (btrfs_root_refs(root_item) > 0 ||
4047 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4048 path.nodes[level] = root->node;
4049 extent_buffer_get(root->node);
4050 path.slots[level] = 0;
4052 struct btrfs_key key;
4053 struct btrfs_disk_key found_key;
4055 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4056 level = root_item->drop_level;
4057 path.lowest_level = level;
4058 if (level > btrfs_header_level(root->node) ||
4059 level >= BTRFS_MAX_LEVEL) {
4060 error("ignoring invalid drop level: %u", level);
4063 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4066 btrfs_node_key(path.nodes[level], &found_key,
4068 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4069 sizeof(found_key)));
4073 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4079 wret = walk_up_tree(root, &path, wc, &level);
4086 btrfs_release_path(&path);
4088 if (!cache_tree_empty(&corrupt_blocks)) {
4089 struct cache_extent *cache;
4090 struct btrfs_corrupt_block *corrupt;
4092 printf("The following tree block(s) is corrupted in tree %llu:\n",
4093 root->root_key.objectid);
4094 cache = first_cache_extent(&corrupt_blocks);
4096 corrupt = container_of(cache,
4097 struct btrfs_corrupt_block,
4099 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4100 cache->start, corrupt->level,
4101 corrupt->key.objectid, corrupt->key.type,
4102 corrupt->key.offset);
4103 cache = next_cache_extent(cache);
4106 printf("Try to repair the btree for root %llu\n",
4107 root->root_key.objectid);
4108 ret = repair_btree(root, &corrupt_blocks);
4110 fprintf(stderr, "Failed to repair btree: %s\n",
4113 printf("Btree for root %llu is fixed\n",
4114 root->root_key.objectid);
4118 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4122 if (root_node.current) {
4123 root_node.current->checked = 1;
4124 maybe_free_inode_rec(&root_node.inode_cache,
4128 err = check_inode_recs(root, &root_node.inode_cache);
4132 free_corrupt_blocks_tree(&corrupt_blocks);
4133 root->fs_info->corrupt_blocks = NULL;
4134 free_orphan_data_extents(&root->orphan_data_extents);
4138 static int fs_root_objectid(u64 objectid)
4140 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4141 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4143 return is_fstree(objectid);
4146 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4147 struct cache_tree *root_cache)
4149 struct btrfs_path path;
4150 struct btrfs_key key;
4151 struct walk_control wc;
4152 struct extent_buffer *leaf, *tree_node;
4153 struct btrfs_root *tmp_root;
4154 struct btrfs_root *tree_root = fs_info->tree_root;
4158 if (ctx.progress_enabled) {
4159 ctx.tp = TASK_FS_ROOTS;
4160 task_start(ctx.info);
4164 * Just in case we made any changes to the extent tree that weren't
4165 * reflected into the free space cache yet.
4168 reset_cached_block_groups(fs_info);
4169 memset(&wc, 0, sizeof(wc));
4170 cache_tree_init(&wc.shared);
4171 btrfs_init_path(&path);
4176 key.type = BTRFS_ROOT_ITEM_KEY;
4177 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4182 tree_node = tree_root->node;
4184 if (tree_node != tree_root->node) {
4185 free_root_recs_tree(root_cache);
4186 btrfs_release_path(&path);
4189 leaf = path.nodes[0];
4190 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4191 ret = btrfs_next_leaf(tree_root, &path);
4197 leaf = path.nodes[0];
4199 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4200 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4201 fs_root_objectid(key.objectid)) {
4202 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4203 tmp_root = btrfs_read_fs_root_no_cache(
4206 key.offset = (u64)-1;
4207 tmp_root = btrfs_read_fs_root(
4210 if (IS_ERR(tmp_root)) {
4214 ret = check_fs_root(tmp_root, root_cache, &wc);
4215 if (ret == -EAGAIN) {
4216 free_root_recs_tree(root_cache);
4217 btrfs_release_path(&path);
4222 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4223 btrfs_free_fs_root(tmp_root);
4224 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4225 key.type == BTRFS_ROOT_BACKREF_KEY) {
4226 process_root_ref(leaf, path.slots[0], &key,
4233 btrfs_release_path(&path);
4235 free_extent_cache_tree(&wc.shared);
4236 if (!cache_tree_empty(&wc.shared))
4237 fprintf(stderr, "warning line %d\n", __LINE__);
4239 task_stop(ctx.info);
4245 * Find the @index according by @ino and name.
4246 * Notice:time efficiency is O(N)
4248 * @root: the root of the fs/file tree
4249 * @index_ret: the index as return value
4250 * @namebuf: the name to match
4251 * @name_len: the length of name to match
4252 * @file_type: the file_type of INODE_ITEM to match
4254 * Returns 0 if found and *@index_ret will be modified with right value
4255 * Returns< 0 not found and *@index_ret will be (u64)-1
4257 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4258 u64 *index_ret, char *namebuf, u32 name_len,
4261 struct btrfs_path path;
4262 struct extent_buffer *node;
4263 struct btrfs_dir_item *di;
4264 struct btrfs_key key;
4265 struct btrfs_key location;
4266 char name[BTRFS_NAME_LEN] = {0};
4278 /* search from the last index */
4279 key.objectid = dirid;
4280 key.offset = (u64)-1;
4281 key.type = BTRFS_DIR_INDEX_KEY;
4283 btrfs_init_path(&path);
4284 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4289 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4292 *index_ret = (64)-1;
4295 /* Check whether inode_id/filetype/name match */
4296 node = path.nodes[0];
4297 slot = path.slots[0];
4298 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4299 total = btrfs_item_size_nr(node, slot);
4300 while (cur < total) {
4302 len = btrfs_dir_name_len(node, di);
4303 data_len = btrfs_dir_data_len(node, di);
4305 btrfs_dir_item_key_to_cpu(node, di, &location);
4306 if (location.objectid != location_id ||
4307 location.type != BTRFS_INODE_ITEM_KEY ||
4308 location.offset != 0)
4311 filetype = btrfs_dir_type(node, di);
4312 if (file_type != filetype)
4315 if (len > BTRFS_NAME_LEN)
4316 len = BTRFS_NAME_LEN;
4318 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4319 if (len != name_len || strncmp(namebuf, name, len))
4322 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4323 *index_ret = key.offset;
4327 len += sizeof(*di) + data_len;
4328 di = (struct btrfs_dir_item *)((char *)di + len);
4334 btrfs_release_path(&path);
4339 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4340 * INODE_REF/INODE_EXTREF match.
4342 * @root: the root of the fs/file tree
4343 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4344 * value while find index
4345 * @location_key: location key of the struct btrfs_dir_item to match
4346 * @name: the name to match
4347 * @namelen: the length of name
4348 * @file_type: the type of file to math
4350 * Return 0 if no error occurred.
4351 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4352 * DIR_ITEM/DIR_INDEX
4353 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4354 * and DIR_ITEM/DIR_INDEX mismatch
4356 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4357 struct btrfs_key *location_key, char *name,
4358 u32 namelen, u8 file_type)
4360 struct btrfs_path path;
4361 struct extent_buffer *node;
4362 struct btrfs_dir_item *di;
4363 struct btrfs_key location;
4364 char namebuf[BTRFS_NAME_LEN] = {0};
4373 /* get the index by traversing all index */
4374 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4375 ret = find_dir_index(root, key->objectid,
4376 location_key->objectid, &key->offset,
4377 name, namelen, file_type);
4379 ret = DIR_INDEX_MISSING;
4383 btrfs_init_path(&path);
4384 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4386 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4391 /* Check whether inode_id/filetype/name match */
4392 node = path.nodes[0];
4393 slot = path.slots[0];
4394 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4395 total = btrfs_item_size_nr(node, slot);
4396 while (cur < total) {
4397 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4398 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4400 len = btrfs_dir_name_len(node, di);
4401 data_len = btrfs_dir_data_len(node, di);
4403 btrfs_dir_item_key_to_cpu(node, di, &location);
4404 if (location.objectid != location_key->objectid ||
4405 location.type != location_key->type ||
4406 location.offset != location_key->offset)
4409 filetype = btrfs_dir_type(node, di);
4410 if (file_type != filetype)
4413 if (len > BTRFS_NAME_LEN) {
4414 len = BTRFS_NAME_LEN;
4415 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4417 key->type == BTRFS_DIR_ITEM_KEY ?
4418 "DIR_ITEM" : "DIR_INDEX",
4419 key->objectid, key->offset, len);
4421 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4423 if (len != namelen || strncmp(namebuf, name, len))
4429 len += sizeof(*di) + data_len;
4430 di = (struct btrfs_dir_item *)((char *)di + len);
4435 btrfs_release_path(&path);
4440 * Prints inode ref error message
4442 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4443 u64 index, const char *namebuf, int name_len,
4444 u8 filetype, int err)
4449 /* root dir error */
4450 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4452 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4453 root->objectid, key->objectid, key->offset, namebuf);
4458 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4459 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4460 root->objectid, key->offset,
4461 btrfs_name_hash(namebuf, name_len),
4462 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4464 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4465 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4466 root->objectid, key->offset, index,
4467 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4472 * Traverse the given INODE_REF and call find_dir_item() to find related
4473 * DIR_ITEM/DIR_INDEX.
4475 * @root: the root of the fs/file tree
4476 * @ref_key: the key of the INODE_REF
4477 * @refs: the count of INODE_REF
4478 * @mode: the st_mode of INODE_ITEM
4480 * Return 0 if no error occurred.
4482 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4483 struct btrfs_path *path, char *name_ret,
4484 u32 *namelen_ret, u64 *refs, int mode)
4486 struct btrfs_key key;
4487 struct btrfs_key location;
4488 struct btrfs_inode_ref *ref;
4489 struct extent_buffer *node;
4490 char namebuf[BTRFS_NAME_LEN] = {0};
4500 location.objectid = ref_key->objectid;
4501 location.type = BTRFS_INODE_ITEM_KEY;
4502 location.offset = 0;
4503 node = path->nodes[0];
4504 slot = path->slots[0];
4506 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4507 total = btrfs_item_size_nr(node, slot);
4510 /* Update inode ref count */
4514 index = btrfs_inode_ref_index(node, ref);
4515 name_len = btrfs_inode_ref_name_len(node, ref);
4516 if (cur + sizeof(*ref) + name_len > total ||
4517 name_len > BTRFS_NAME_LEN) {
4518 warning("root %llu INODE_REF[%llu %llu] name too long",
4519 root->objectid, ref_key->objectid, ref_key->offset);
4521 if (total < cur + sizeof(*ref))
4523 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4528 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4530 /* copy the fisrt name found to name_ret */
4531 if (*refs == 1 && name_ret) {
4532 memcpy(name_ret, namebuf, len);
4536 /* Check root dir ref */
4537 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4538 if (index != 0 || len != strlen("..") ||
4539 strncmp("..", namebuf, len) ||
4540 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4541 /* set err bits then repair will delete the ref */
4542 err |= DIR_INDEX_MISSING;
4543 err |= DIR_ITEM_MISSING;
4548 /* Find related DIR_INDEX */
4549 key.objectid = ref_key->offset;
4550 key.type = BTRFS_DIR_INDEX_KEY;
4552 tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4554 /* Find related dir_item */
4555 key.objectid = ref_key->offset;
4556 key.type = BTRFS_DIR_ITEM_KEY;
4557 key.offset = btrfs_name_hash(namebuf, len);
4558 tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4561 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4562 imode_to_type(mode), tmp_err);
4564 len = sizeof(*ref) + name_len;
4565 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4575 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4576 * DIR_ITEM/DIR_INDEX.
4578 * @root: the root of the fs/file tree
4579 * @ref_key: the key of the INODE_EXTREF
4580 * @refs: the count of INODE_EXTREF
4581 * @mode: the st_mode of INODE_ITEM
4583 * Return 0 if no error occurred.
4585 static int check_inode_extref(struct btrfs_root *root,
4586 struct btrfs_key *ref_key,
4587 struct extent_buffer *node, int slot, u64 *refs,
4590 struct btrfs_key key;
4591 struct btrfs_key location;
4592 struct btrfs_inode_extref *extref;
4593 char namebuf[BTRFS_NAME_LEN] = {0};
4603 location.objectid = ref_key->objectid;
4604 location.type = BTRFS_INODE_ITEM_KEY;
4605 location.offset = 0;
4607 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4608 total = btrfs_item_size_nr(node, slot);
4611 /* update inode ref count */
4613 name_len = btrfs_inode_extref_name_len(node, extref);
4614 index = btrfs_inode_extref_index(node, extref);
4615 parent = btrfs_inode_extref_parent(node, extref);
4616 if (name_len <= BTRFS_NAME_LEN) {
4619 len = BTRFS_NAME_LEN;
4620 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4621 root->objectid, ref_key->objectid, ref_key->offset);
4623 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4625 /* Check root dir ref name */
4626 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4627 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4628 root->objectid, ref_key->objectid, ref_key->offset,
4630 err |= ROOT_DIR_ERROR;
4633 /* find related dir_index */
4634 key.objectid = parent;
4635 key.type = BTRFS_DIR_INDEX_KEY;
4637 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4640 /* find related dir_item */
4641 key.objectid = parent;
4642 key.type = BTRFS_DIR_ITEM_KEY;
4643 key.offset = btrfs_name_hash(namebuf, len);
4644 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4647 len = sizeof(*extref) + name_len;
4648 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4658 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4659 * DIR_ITEM/DIR_INDEX match.
4660 * Return with @index_ret.
4662 * @root: the root of the fs/file tree
4663 * @key: the key of the INODE_REF/INODE_EXTREF
4664 * @name: the name in the INODE_REF/INODE_EXTREF
4665 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4666 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4667 * value (64)-1 means do not check index
4668 * @ext_ref: the EXTENDED_IREF feature
4670 * Return 0 if no error occurred.
4671 * Return >0 for error bitmap
4673 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4674 char *name, int namelen, u64 *index_ret,
4675 unsigned int ext_ref)
4677 struct btrfs_path path;
4678 struct btrfs_inode_ref *ref;
4679 struct btrfs_inode_extref *extref;
4680 struct extent_buffer *node;
4681 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4694 btrfs_init_path(&path);
4695 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4697 ret = INODE_REF_MISSING;
4701 node = path.nodes[0];
4702 slot = path.slots[0];
4704 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4705 total = btrfs_item_size_nr(node, slot);
4707 /* Iterate all entry of INODE_REF */
4708 while (cur < total) {
4709 ret = INODE_REF_MISSING;
4711 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4712 ref_index = btrfs_inode_ref_index(node, ref);
4713 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4716 if (cur + sizeof(*ref) + ref_namelen > total ||
4717 ref_namelen > BTRFS_NAME_LEN) {
4718 warning("root %llu INODE %s[%llu %llu] name too long",
4720 key->type == BTRFS_INODE_REF_KEY ?
4722 key->objectid, key->offset);
4724 if (cur + sizeof(*ref) > total)
4726 len = min_t(u32, total - cur - sizeof(*ref),
4732 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4735 if (len != namelen || strncmp(ref_namebuf, name, len))
4738 *index_ret = ref_index;
4742 len = sizeof(*ref) + ref_namelen;
4743 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4748 /* Skip if not support EXTENDED_IREF feature */
4752 btrfs_release_path(&path);
4753 btrfs_init_path(&path);
4755 dir_id = key->offset;
4756 key->type = BTRFS_INODE_EXTREF_KEY;
4757 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4759 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4761 ret = INODE_REF_MISSING;
4765 node = path.nodes[0];
4766 slot = path.slots[0];
4768 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4770 total = btrfs_item_size_nr(node, slot);
4772 /* Iterate all entry of INODE_EXTREF */
4773 while (cur < total) {
4774 ret = INODE_REF_MISSING;
4776 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4777 ref_index = btrfs_inode_extref_index(node, extref);
4778 parent = btrfs_inode_extref_parent(node, extref);
4779 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4782 if (parent != dir_id)
4785 if (ref_namelen <= BTRFS_NAME_LEN) {
4788 len = BTRFS_NAME_LEN;
4789 warning("root %llu INODE %s[%llu %llu] name too long",
4791 key->type == BTRFS_INODE_REF_KEY ?
4793 key->objectid, key->offset);
4795 read_extent_buffer(node, ref_namebuf,
4796 (unsigned long)(extref + 1), len);
4798 if (len != namelen || strncmp(ref_namebuf, name, len))
4801 *index_ret = ref_index;
4806 len = sizeof(*extref) + ref_namelen;
4807 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4812 btrfs_release_path(&path);
4817 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4818 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4820 * @root: the root of the fs/file tree
4821 * @key: the key of the INODE_REF/INODE_EXTREF
4822 * @size: the st_size of the INODE_ITEM
4823 * @ext_ref: the EXTENDED_IREF feature
4825 * Return 0 if no error occurred.
4827 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4828 struct extent_buffer *node, int slot, u64 *size,
4829 unsigned int ext_ref)
4831 struct btrfs_dir_item *di;
4832 struct btrfs_inode_item *ii;
4833 struct btrfs_path path;
4834 struct btrfs_key location;
4835 char namebuf[BTRFS_NAME_LEN] = {0};
4848 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4849 * ignore index check.
4851 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4853 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4854 total = btrfs_item_size_nr(node, slot);
4856 while (cur < total) {
4857 data_len = btrfs_dir_data_len(node, di);
4859 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4860 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4861 "DIR_ITEM" : "DIR_INDEX",
4862 key->objectid, key->offset, data_len);
4864 name_len = btrfs_dir_name_len(node, di);
4865 if (cur + sizeof(*di) + name_len > total ||
4866 name_len > BTRFS_NAME_LEN) {
4867 warning("root %llu %s[%llu %llu] name too long",
4869 key->type == BTRFS_DIR_ITEM_KEY ?
4870 "DIR_ITEM" : "DIR_INDEX",
4871 key->objectid, key->offset);
4873 if (cur + sizeof(*di) > total)
4875 len = min_t(u32, total - cur - sizeof(*di),
4880 (*size) += name_len;
4882 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4883 filetype = btrfs_dir_type(node, di);
4885 if (key->type == BTRFS_DIR_ITEM_KEY &&
4886 key->offset != btrfs_name_hash(namebuf, len)) {
4888 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4889 root->objectid, key->objectid, key->offset,
4890 namebuf, len, filetype, key->offset,
4891 btrfs_name_hash(namebuf, len));
4894 btrfs_init_path(&path);
4895 btrfs_dir_item_key_to_cpu(node, di, &location);
4897 /* Ignore related ROOT_ITEM check */
4898 if (location.type == BTRFS_ROOT_ITEM_KEY)
4901 /* Check relative INODE_ITEM(existence/filetype) */
4902 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4904 err |= INODE_ITEM_MISSING;
4905 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4906 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4907 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4908 key->offset, location.objectid, name_len,
4913 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4914 struct btrfs_inode_item);
4915 mode = btrfs_inode_mode(path.nodes[0], ii);
4917 if (imode_to_type(mode) != filetype) {
4918 err |= INODE_ITEM_MISMATCH;
4919 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4920 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4921 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4922 key->offset, name_len, namebuf, filetype);
4925 /* Check relative INODE_REF/INODE_EXTREF */
4926 location.type = BTRFS_INODE_REF_KEY;
4927 location.offset = key->objectid;
4928 ret = find_inode_ref(root, &location, namebuf, len,
4931 if (ret & INODE_REF_MISSING)
4932 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4933 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4934 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4935 key->offset, name_len, namebuf, filetype);
4938 btrfs_release_path(&path);
4939 len = sizeof(*di) + name_len + data_len;
4940 di = (struct btrfs_dir_item *)((char *)di + len);
4943 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4944 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4945 root->objectid, key->objectid, key->offset);
4954 * Check file extent datasum/hole, update the size of the file extents,
4955 * check and update the last offset of the file extent.
4957 * @root: the root of fs/file tree.
4958 * @fkey: the key of the file extent.
4959 * @nodatasum: INODE_NODATASUM feature.
4960 * @size: the sum of all EXTENT_DATA items size for this inode.
4961 * @end: the offset of the last extent.
4963 * Return 0 if no error occurred.
4965 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4966 struct extent_buffer *node, int slot,
4967 unsigned int nodatasum, u64 *size, u64 *end)
4969 struct btrfs_file_extent_item *fi;
4972 u64 extent_num_bytes;
4974 u64 csum_found; /* In byte size, sectorsize aligned */
4975 u64 search_start; /* Logical range start we search for csum */
4976 u64 search_len; /* Logical range len we search for csum */
4977 unsigned int extent_type;
4978 unsigned int is_hole;
4983 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4985 /* Check inline extent */
4986 extent_type = btrfs_file_extent_type(node, fi);
4987 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4988 struct btrfs_item *e = btrfs_item_nr(slot);
4989 u32 item_inline_len;
4991 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4992 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4993 compressed = btrfs_file_extent_compression(node, fi);
4994 if (extent_num_bytes == 0) {
4996 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4997 root->objectid, fkey->objectid, fkey->offset);
4998 err |= FILE_EXTENT_ERROR;
5000 if (!compressed && extent_num_bytes != item_inline_len) {
5002 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5003 root->objectid, fkey->objectid, fkey->offset,
5004 extent_num_bytes, item_inline_len);
5005 err |= FILE_EXTENT_ERROR;
5007 *end += extent_num_bytes;
5008 *size += extent_num_bytes;
5012 /* Check extent type */
5013 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5014 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5015 err |= FILE_EXTENT_ERROR;
5016 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5017 root->objectid, fkey->objectid, fkey->offset);
5021 /* Check REG_EXTENT/PREALLOC_EXTENT */
5022 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5023 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5024 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5025 extent_offset = btrfs_file_extent_offset(node, fi);
5026 compressed = btrfs_file_extent_compression(node, fi);
5027 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5030 * Check EXTENT_DATA csum
5032 * For plain (uncompressed) extent, we should only check the range
5033 * we're referring to, as it's possible that part of prealloc extent
5034 * has been written, and has csum:
5036 * |<--- Original large preallocated extent A ---->|
5037 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5040 * For compressed extent, we should check the whole range.
5043 search_start = disk_bytenr + extent_offset;
5044 search_len = extent_num_bytes;
5046 search_start = disk_bytenr;
5047 search_len = disk_num_bytes;
5049 ret = count_csum_range(root, search_start, search_len, &csum_found);
5050 if (csum_found > 0 && nodatasum) {
5051 err |= ODD_CSUM_ITEM;
5052 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5053 root->objectid, fkey->objectid, fkey->offset);
5054 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5055 !is_hole && (ret < 0 || csum_found < search_len)) {
5056 err |= CSUM_ITEM_MISSING;
5057 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5058 root->objectid, fkey->objectid, fkey->offset,
5059 csum_found, search_len);
5060 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5061 err |= ODD_CSUM_ITEM;
5062 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5063 root->objectid, fkey->objectid, fkey->offset, csum_found);
5066 /* Check EXTENT_DATA hole */
5067 if (!no_holes && *end != fkey->offset) {
5068 err |= FILE_EXTENT_ERROR;
5069 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5070 root->objectid, fkey->objectid, fkey->offset);
5073 *end += extent_num_bytes;
5075 *size += extent_num_bytes;
5081 * Set inode item nbytes to @nbytes
5083 * Returns 0 on success
5084 * Returns != 0 on error
5086 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5087 struct btrfs_path *path,
5088 u64 ino, u64 nbytes)
5090 struct btrfs_trans_handle *trans;
5091 struct btrfs_inode_item *ii;
5092 struct btrfs_key key;
5093 struct btrfs_key research_key;
5097 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5100 key.type = BTRFS_INODE_ITEM_KEY;
5103 trans = btrfs_start_transaction(root, 1);
5104 if (IS_ERR(trans)) {
5105 ret = PTR_ERR(trans);
5110 btrfs_release_path(path);
5111 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5119 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5120 struct btrfs_inode_item);
5121 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5122 btrfs_mark_buffer_dirty(path->nodes[0]);
5124 btrfs_commit_transaction(trans, root);
5127 error("failed to set nbytes in inode %llu root %llu",
5128 ino, root->root_key.objectid);
5130 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5131 root->root_key.objectid, nbytes);
5134 btrfs_release_path(path);
5135 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5142 * Set directory inode isize to @isize.
5144 * Returns 0 on success.
5145 * Returns != 0 on error.
5147 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5148 struct btrfs_path *path,
5151 struct btrfs_trans_handle *trans;
5152 struct btrfs_inode_item *ii;
5153 struct btrfs_key key;
5154 struct btrfs_key research_key;
5158 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5161 key.type = BTRFS_INODE_ITEM_KEY;
5164 trans = btrfs_start_transaction(root, 1);
5165 if (IS_ERR(trans)) {
5166 ret = PTR_ERR(trans);
5171 btrfs_release_path(path);
5172 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5180 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5181 struct btrfs_inode_item);
5182 btrfs_set_inode_size(path->nodes[0], ii, isize);
5183 btrfs_mark_buffer_dirty(path->nodes[0]);
5185 btrfs_commit_transaction(trans, root);
5188 error("failed to set isize in inode %llu root %llu",
5189 ino, root->root_key.objectid);
5191 printf("Set isize in inode %llu root %llu to %llu\n",
5192 ino, root->root_key.objectid, isize);
5194 btrfs_release_path(path);
5195 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5202 * Wrapper function for btrfs_add_orphan_item().
5204 * Returns 0 on success.
5205 * Returns != 0 on error.
5207 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5208 struct btrfs_path *path, u64 ino)
5210 struct btrfs_trans_handle *trans;
5211 struct btrfs_key research_key;
5215 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5217 trans = btrfs_start_transaction(root, 1);
5218 if (IS_ERR(trans)) {
5219 ret = PTR_ERR(trans);
5224 btrfs_release_path(path);
5225 ret = btrfs_add_orphan_item(trans, root, path, ino);
5227 btrfs_commit_transaction(trans, root);
5230 error("failed to add inode %llu as orphan item root %llu",
5231 ino, root->root_key.objectid);
5233 printf("Added inode %llu as orphan item root %llu\n",
5234 ino, root->root_key.objectid);
5236 btrfs_release_path(path);
5237 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5244 * Check INODE_ITEM and related ITEMs (the same inode number)
5245 * 1. check link count
5246 * 2. check inode ref/extref
5247 * 3. check dir item/index
5249 * @ext_ref: the EXTENDED_IREF feature
5251 * Return 0 if no error occurred.
5252 * Return >0 for error or hit the traversal is done(by error bitmap)
5254 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5255 unsigned int ext_ref)
5257 struct extent_buffer *node;
5258 struct btrfs_inode_item *ii;
5259 struct btrfs_key key;
5268 u64 extent_size = 0;
5270 unsigned int nodatasum;
5274 char namebuf[BTRFS_NAME_LEN] = {0};
5277 node = path->nodes[0];
5278 slot = path->slots[0];
5280 btrfs_item_key_to_cpu(node, &key, slot);
5281 inode_id = key.objectid;
5283 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5284 ret = btrfs_next_item(root, path);
5290 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5291 isize = btrfs_inode_size(node, ii);
5292 nbytes = btrfs_inode_nbytes(node, ii);
5293 mode = btrfs_inode_mode(node, ii);
5294 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5295 nlink = btrfs_inode_nlink(node, ii);
5296 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5299 ret = btrfs_next_item(root, path);
5301 /* out will fill 'err' rusing current statistics */
5303 } else if (ret > 0) {
5308 node = path->nodes[0];
5309 slot = path->slots[0];
5310 btrfs_item_key_to_cpu(node, &key, slot);
5311 if (key.objectid != inode_id)
5315 case BTRFS_INODE_REF_KEY:
5316 ret = check_inode_ref(root, &key, path, namebuf,
5317 &name_len, &refs, mode);
5320 case BTRFS_INODE_EXTREF_KEY:
5321 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5322 warning("root %llu EXTREF[%llu %llu] isn't supported",
5323 root->objectid, key.objectid,
5325 ret = check_inode_extref(root, &key, node, slot, &refs,
5329 case BTRFS_DIR_ITEM_KEY:
5330 case BTRFS_DIR_INDEX_KEY:
5332 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5333 root->objectid, inode_id,
5334 imode_to_type(mode), key.objectid,
5337 ret = check_dir_item(root, &key, node, slot, &size,
5341 case BTRFS_EXTENT_DATA_KEY:
5343 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5344 root->objectid, inode_id, key.objectid,
5347 ret = check_file_extent(root, &key, node, slot,
5348 nodatasum, &extent_size,
5352 case BTRFS_XATTR_ITEM_KEY:
5355 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5356 key.objectid, key.type, key.offset);
5361 /* verify INODE_ITEM nlink/isize/nbytes */
5364 err |= LINK_COUNT_ERROR;
5365 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5366 root->objectid, inode_id, nlink);
5370 * Just a warning, as dir inode nbytes is just an
5371 * instructive value.
5373 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5374 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5375 root->objectid, inode_id,
5376 root->fs_info->nodesize);
5379 if (isize != size) {
5381 ret = repair_dir_isize_lowmem(root, path,
5383 if (!repair || ret) {
5386 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5387 root->objectid, inode_id, isize, size);
5391 if (nlink != refs) {
5392 err |= LINK_COUNT_ERROR;
5393 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5394 root->objectid, inode_id, nlink, refs);
5395 } else if (!nlink) {
5397 ret = repair_inode_orphan_item_lowmem(root,
5399 if (!repair || ret) {
5401 error("root %llu INODE[%llu] is orphan item",
5402 root->objectid, inode_id);
5406 if (!nbytes && !no_holes && extent_end < isize) {
5407 err |= NBYTES_ERROR;
5408 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5409 root->objectid, inode_id, isize);
5412 if (nbytes != extent_size) {
5414 ret = repair_inode_nbytes_lowmem(root, path,
5415 inode_id, extent_size);
5416 if (!repair || ret) {
5417 err |= NBYTES_ERROR;
5419 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5420 root->objectid, inode_id, nbytes,
5430 * check first root dir's inode_item and inode_ref
5432 * returns 0 means no error
5433 * returns >0 means error
5434 * returns <0 means fatal error
5436 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5438 struct btrfs_path path;
5439 struct btrfs_key key;
5440 struct btrfs_inode_item *ii;
5446 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5447 key.type = BTRFS_INODE_ITEM_KEY;
5450 /* For root being dropped, we don't need to check first inode */
5451 if (btrfs_root_refs(&root->root_item) == 0 &&
5452 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5453 BTRFS_FIRST_FREE_OBJECTID)
5456 btrfs_init_path(&path);
5457 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5462 err |= INODE_ITEM_MISSING;
5464 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5465 struct btrfs_inode_item);
5466 mode = btrfs_inode_mode(path.nodes[0], ii);
5467 if (imode_to_type(mode) != BTRFS_FT_DIR)
5468 err |= INODE_ITEM_MISMATCH;
5471 /* lookup first inode ref */
5472 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5473 key.type = BTRFS_INODE_REF_KEY;
5474 /* special index value */
5477 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5483 btrfs_release_path(&path);
5484 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5485 error("root dir INODE_ITEM is %s",
5486 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5487 if (err & INODE_REF_MISSING)
5488 error("root dir INODE_REF is missing");
5490 return ret < 0 ? ret : err;
5493 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5494 u64 parent, u64 root)
5496 struct rb_node *node;
5497 struct tree_backref *back = NULL;
5498 struct tree_backref match = {
5505 match.parent = parent;
5506 match.node.full_backref = 1;
5511 node = rb_search(&rec->backref_tree, &match.node.node,
5512 (rb_compare_keys)compare_extent_backref, NULL);
5514 back = to_tree_backref(rb_node_to_extent_backref(node));
5519 static struct data_backref *find_data_backref(struct extent_record *rec,
5520 u64 parent, u64 root,
5521 u64 owner, u64 offset,
5523 u64 disk_bytenr, u64 bytes)
5525 struct rb_node *node;
5526 struct data_backref *back = NULL;
5527 struct data_backref match = {
5534 .found_ref = found_ref,
5535 .disk_bytenr = disk_bytenr,
5539 match.parent = parent;
5540 match.node.full_backref = 1;
5545 node = rb_search(&rec->backref_tree, &match.node.node,
5546 (rb_compare_keys)compare_extent_backref, NULL);
5548 back = to_data_backref(rb_node_to_extent_backref(node));
5553 * Iterate all item on the tree and call check_inode_item() to check.
5555 * @root: the root of the tree to be checked.
5556 * @ext_ref: the EXTENDED_IREF feature
5558 * Return 0 if no error found.
5559 * Return <0 for error.
5561 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5563 struct btrfs_path path;
5564 struct node_refs nrefs;
5565 struct btrfs_root_item *root_item = &root->root_item;
5571 * We need to manually check the first inode item(256)
5572 * As the following traversal function will only start from
5573 * the first inode item in the leaf, if inode item(256) is missing
5574 * we will just skip it forever.
5576 ret = check_fs_first_inode(root, ext_ref);
5581 memset(&nrefs, 0, sizeof(nrefs));
5582 level = btrfs_header_level(root->node);
5583 btrfs_init_path(&path);
5585 if (btrfs_root_refs(root_item) > 0 ||
5586 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5587 path.nodes[level] = root->node;
5588 path.slots[level] = 0;
5589 extent_buffer_get(root->node);
5591 struct btrfs_key key;
5593 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5594 level = root_item->drop_level;
5595 path.lowest_level = level;
5596 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5603 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5606 /* if ret is negative, walk shall stop */
5612 ret = walk_up_tree_v2(root, &path, &level);
5614 /* Normal exit, reset ret to err */
5621 btrfs_release_path(&path);
5626 * Find the relative ref for root_ref and root_backref.
5628 * @root: the root of the root tree.
5629 * @ref_key: the key of the root ref.
5631 * Return 0 if no error occurred.
5633 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5634 struct extent_buffer *node, int slot)
5636 struct btrfs_path path;
5637 struct btrfs_key key;
5638 struct btrfs_root_ref *ref;
5639 struct btrfs_root_ref *backref;
5640 char ref_name[BTRFS_NAME_LEN] = {0};
5641 char backref_name[BTRFS_NAME_LEN] = {0};
5647 u32 backref_namelen;
5652 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5653 ref_dirid = btrfs_root_ref_dirid(node, ref);
5654 ref_seq = btrfs_root_ref_sequence(node, ref);
5655 ref_namelen = btrfs_root_ref_name_len(node, ref);
5657 if (ref_namelen <= BTRFS_NAME_LEN) {
5660 len = BTRFS_NAME_LEN;
5661 warning("%s[%llu %llu] ref_name too long",
5662 ref_key->type == BTRFS_ROOT_REF_KEY ?
5663 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5666 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5668 /* Find relative root_ref */
5669 key.objectid = ref_key->offset;
5670 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5671 key.offset = ref_key->objectid;
5673 btrfs_init_path(&path);
5674 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5676 err |= ROOT_REF_MISSING;
5677 error("%s[%llu %llu] couldn't find relative ref",
5678 ref_key->type == BTRFS_ROOT_REF_KEY ?
5679 "ROOT_REF" : "ROOT_BACKREF",
5680 ref_key->objectid, ref_key->offset);
5684 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5685 struct btrfs_root_ref);
5686 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5687 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5688 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5690 if (backref_namelen <= BTRFS_NAME_LEN) {
5691 len = backref_namelen;
5693 len = BTRFS_NAME_LEN;
5694 warning("%s[%llu %llu] ref_name too long",
5695 key.type == BTRFS_ROOT_REF_KEY ?
5696 "ROOT_REF" : "ROOT_BACKREF",
5697 key.objectid, key.offset);
5699 read_extent_buffer(path.nodes[0], backref_name,
5700 (unsigned long)(backref + 1), len);
5702 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5703 ref_namelen != backref_namelen ||
5704 strncmp(ref_name, backref_name, len)) {
5705 err |= ROOT_REF_MISMATCH;
5706 error("%s[%llu %llu] mismatch relative ref",
5707 ref_key->type == BTRFS_ROOT_REF_KEY ?
5708 "ROOT_REF" : "ROOT_BACKREF",
5709 ref_key->objectid, ref_key->offset);
5712 btrfs_release_path(&path);
5717 * Check all fs/file tree in low_memory mode.
5719 * 1. for fs tree root item, call check_fs_root_v2()
5720 * 2. for fs tree root ref/backref, call check_root_ref()
5722 * Return 0 if no error occurred.
5724 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5726 struct btrfs_root *tree_root = fs_info->tree_root;
5727 struct btrfs_root *cur_root = NULL;
5728 struct btrfs_path path;
5729 struct btrfs_key key;
5730 struct extent_buffer *node;
5731 unsigned int ext_ref;
5736 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5738 btrfs_init_path(&path);
5739 key.objectid = BTRFS_FS_TREE_OBJECTID;
5741 key.type = BTRFS_ROOT_ITEM_KEY;
5743 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5747 } else if (ret > 0) {
5753 node = path.nodes[0];
5754 slot = path.slots[0];
5755 btrfs_item_key_to_cpu(node, &key, slot);
5756 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5758 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5759 fs_root_objectid(key.objectid)) {
5760 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5761 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5764 key.offset = (u64)-1;
5765 cur_root = btrfs_read_fs_root(fs_info, &key);
5768 if (IS_ERR(cur_root)) {
5769 error("Fail to read fs/subvol tree: %lld",
5775 ret = check_fs_root_v2(cur_root, ext_ref);
5778 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5779 btrfs_free_fs_root(cur_root);
5780 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5781 key.type == BTRFS_ROOT_BACKREF_KEY) {
5782 ret = check_root_ref(tree_root, &key, node, slot);
5786 ret = btrfs_next_item(tree_root, &path);
5796 btrfs_release_path(&path);
5800 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5801 struct cache_tree *root_cache)
5805 if (!ctx.progress_enabled)
5806 fprintf(stderr, "checking fs roots\n");
5807 if (check_mode == CHECK_MODE_LOWMEM)
5808 ret = check_fs_roots_v2(fs_info);
5810 ret = check_fs_roots(fs_info, root_cache);
5815 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5817 struct extent_backref *back, *tmp;
5818 struct tree_backref *tback;
5819 struct data_backref *dback;
5823 rbtree_postorder_for_each_entry_safe(back, tmp,
5824 &rec->backref_tree, node) {
5825 if (!back->found_extent_tree) {
5829 if (back->is_data) {
5830 dback = to_data_backref(back);
5831 fprintf(stderr, "Data backref %llu %s %llu"
5832 " owner %llu offset %llu num_refs %lu"
5833 " not found in extent tree\n",
5834 (unsigned long long)rec->start,
5835 back->full_backref ?
5837 back->full_backref ?
5838 (unsigned long long)dback->parent:
5839 (unsigned long long)dback->root,
5840 (unsigned long long)dback->owner,
5841 (unsigned long long)dback->offset,
5842 (unsigned long)dback->num_refs);
5844 tback = to_tree_backref(back);
5845 fprintf(stderr, "Tree backref %llu parent %llu"
5846 " root %llu not found in extent tree\n",
5847 (unsigned long long)rec->start,
5848 (unsigned long long)tback->parent,
5849 (unsigned long long)tback->root);
5852 if (!back->is_data && !back->found_ref) {
5856 tback = to_tree_backref(back);
5857 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5858 (unsigned long long)rec->start,
5859 back->full_backref ? "parent" : "root",
5860 back->full_backref ?
5861 (unsigned long long)tback->parent :
5862 (unsigned long long)tback->root, back);
5864 if (back->is_data) {
5865 dback = to_data_backref(back);
5866 if (dback->found_ref != dback->num_refs) {
5870 fprintf(stderr, "Incorrect local backref count"
5871 " on %llu %s %llu owner %llu"
5872 " offset %llu found %u wanted %u back %p\n",
5873 (unsigned long long)rec->start,
5874 back->full_backref ?
5876 back->full_backref ?
5877 (unsigned long long)dback->parent:
5878 (unsigned long long)dback->root,
5879 (unsigned long long)dback->owner,
5880 (unsigned long long)dback->offset,
5881 dback->found_ref, dback->num_refs, back);
5883 if (dback->disk_bytenr != rec->start) {
5887 fprintf(stderr, "Backref disk bytenr does not"
5888 " match extent record, bytenr=%llu, "
5889 "ref bytenr=%llu\n",
5890 (unsigned long long)rec->start,
5891 (unsigned long long)dback->disk_bytenr);
5894 if (dback->bytes != rec->nr) {
5898 fprintf(stderr, "Backref bytes do not match "
5899 "extent backref, bytenr=%llu, ref "
5900 "bytes=%llu, backref bytes=%llu\n",
5901 (unsigned long long)rec->start,
5902 (unsigned long long)rec->nr,
5903 (unsigned long long)dback->bytes);
5906 if (!back->is_data) {
5909 dback = to_data_backref(back);
5910 found += dback->found_ref;
5913 if (found != rec->refs) {
5917 fprintf(stderr, "Incorrect global backref count "
5918 "on %llu found %llu wanted %llu\n",
5919 (unsigned long long)rec->start,
5920 (unsigned long long)found,
5921 (unsigned long long)rec->refs);
5927 static void __free_one_backref(struct rb_node *node)
5929 struct extent_backref *back = rb_node_to_extent_backref(node);
5934 static void free_all_extent_backrefs(struct extent_record *rec)
5936 rb_free_nodes(&rec->backref_tree, __free_one_backref);
5939 static void free_extent_record_cache(struct cache_tree *extent_cache)
5941 struct cache_extent *cache;
5942 struct extent_record *rec;
5945 cache = first_cache_extent(extent_cache);
5948 rec = container_of(cache, struct extent_record, cache);
5949 remove_cache_extent(extent_cache, cache);
5950 free_all_extent_backrefs(rec);
5955 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5956 struct extent_record *rec)
5958 if (rec->content_checked && rec->owner_ref_checked &&
5959 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5960 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5961 !rec->bad_full_backref && !rec->crossing_stripes &&
5962 !rec->wrong_chunk_type) {
5963 remove_cache_extent(extent_cache, &rec->cache);
5964 free_all_extent_backrefs(rec);
5965 list_del_init(&rec->list);
5971 static int check_owner_ref(struct btrfs_root *root,
5972 struct extent_record *rec,
5973 struct extent_buffer *buf)
5975 struct extent_backref *node, *tmp;
5976 struct tree_backref *back;
5977 struct btrfs_root *ref_root;
5978 struct btrfs_key key;
5979 struct btrfs_path path;
5980 struct extent_buffer *parent;
5985 rbtree_postorder_for_each_entry_safe(node, tmp,
5986 &rec->backref_tree, node) {
5989 if (!node->found_ref)
5991 if (node->full_backref)
5993 back = to_tree_backref(node);
5994 if (btrfs_header_owner(buf) == back->root)
5997 BUG_ON(rec->is_root);
5999 /* try to find the block by search corresponding fs tree */
6000 key.objectid = btrfs_header_owner(buf);
6001 key.type = BTRFS_ROOT_ITEM_KEY;
6002 key.offset = (u64)-1;
6004 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6005 if (IS_ERR(ref_root))
6008 level = btrfs_header_level(buf);
6010 btrfs_item_key_to_cpu(buf, &key, 0);
6012 btrfs_node_key_to_cpu(buf, &key, 0);
6014 btrfs_init_path(&path);
6015 path.lowest_level = level + 1;
6016 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6020 parent = path.nodes[level + 1];
6021 if (parent && buf->start == btrfs_node_blockptr(parent,
6022 path.slots[level + 1]))
6025 btrfs_release_path(&path);
6026 return found ? 0 : 1;
6029 static int is_extent_tree_record(struct extent_record *rec)
6031 struct extent_backref *node, *tmp;
6032 struct tree_backref *back;
6035 rbtree_postorder_for_each_entry_safe(node, tmp,
6036 &rec->backref_tree, node) {
6039 back = to_tree_backref(node);
6040 if (node->full_backref)
6042 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6049 static int record_bad_block_io(struct btrfs_fs_info *info,
6050 struct cache_tree *extent_cache,
6053 struct extent_record *rec;
6054 struct cache_extent *cache;
6055 struct btrfs_key key;
6057 cache = lookup_cache_extent(extent_cache, start, len);
6061 rec = container_of(cache, struct extent_record, cache);
6062 if (!is_extent_tree_record(rec))
6065 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6066 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6069 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6070 struct extent_buffer *buf, int slot)
6072 if (btrfs_header_level(buf)) {
6073 struct btrfs_key_ptr ptr1, ptr2;
6075 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6076 sizeof(struct btrfs_key_ptr));
6077 read_extent_buffer(buf, &ptr2,
6078 btrfs_node_key_ptr_offset(slot + 1),
6079 sizeof(struct btrfs_key_ptr));
6080 write_extent_buffer(buf, &ptr1,
6081 btrfs_node_key_ptr_offset(slot + 1),
6082 sizeof(struct btrfs_key_ptr));
6083 write_extent_buffer(buf, &ptr2,
6084 btrfs_node_key_ptr_offset(slot),
6085 sizeof(struct btrfs_key_ptr));
6087 struct btrfs_disk_key key;
6088 btrfs_node_key(buf, &key, 0);
6089 btrfs_fixup_low_keys(root, path, &key,
6090 btrfs_header_level(buf) + 1);
6093 struct btrfs_item *item1, *item2;
6094 struct btrfs_key k1, k2;
6095 char *item1_data, *item2_data;
6096 u32 item1_offset, item2_offset, item1_size, item2_size;
6098 item1 = btrfs_item_nr(slot);
6099 item2 = btrfs_item_nr(slot + 1);
6100 btrfs_item_key_to_cpu(buf, &k1, slot);
6101 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6102 item1_offset = btrfs_item_offset(buf, item1);
6103 item2_offset = btrfs_item_offset(buf, item2);
6104 item1_size = btrfs_item_size(buf, item1);
6105 item2_size = btrfs_item_size(buf, item2);
6107 item1_data = malloc(item1_size);
6110 item2_data = malloc(item2_size);
6116 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6117 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6119 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6120 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6124 btrfs_set_item_offset(buf, item1, item2_offset);
6125 btrfs_set_item_offset(buf, item2, item1_offset);
6126 btrfs_set_item_size(buf, item1, item2_size);
6127 btrfs_set_item_size(buf, item2, item1_size);
6129 path->slots[0] = slot;
6130 btrfs_set_item_key_unsafe(root, path, &k2);
6131 path->slots[0] = slot + 1;
6132 btrfs_set_item_key_unsafe(root, path, &k1);
6137 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6139 struct extent_buffer *buf;
6140 struct btrfs_key k1, k2;
6142 int level = path->lowest_level;
6145 buf = path->nodes[level];
6146 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6148 btrfs_node_key_to_cpu(buf, &k1, i);
6149 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6151 btrfs_item_key_to_cpu(buf, &k1, i);
6152 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6154 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6156 ret = swap_values(root, path, buf, i);
6159 btrfs_mark_buffer_dirty(buf);
6165 static int delete_bogus_item(struct btrfs_root *root,
6166 struct btrfs_path *path,
6167 struct extent_buffer *buf, int slot)
6169 struct btrfs_key key;
6170 int nritems = btrfs_header_nritems(buf);
6172 btrfs_item_key_to_cpu(buf, &key, slot);
6174 /* These are all the keys we can deal with missing. */
6175 if (key.type != BTRFS_DIR_INDEX_KEY &&
6176 key.type != BTRFS_EXTENT_ITEM_KEY &&
6177 key.type != BTRFS_METADATA_ITEM_KEY &&
6178 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6179 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6182 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6183 (unsigned long long)key.objectid, key.type,
6184 (unsigned long long)key.offset, slot, buf->start);
6185 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6186 btrfs_item_nr_offset(slot + 1),
6187 sizeof(struct btrfs_item) *
6188 (nritems - slot - 1));
6189 btrfs_set_header_nritems(buf, nritems - 1);
6191 struct btrfs_disk_key disk_key;
6193 btrfs_item_key(buf, &disk_key, 0);
6194 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6196 btrfs_mark_buffer_dirty(buf);
6200 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6202 struct extent_buffer *buf;
6206 /* We should only get this for leaves */
6207 BUG_ON(path->lowest_level);
6208 buf = path->nodes[0];
6210 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6211 unsigned int shift = 0, offset;
6213 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6214 BTRFS_LEAF_DATA_SIZE(root)) {
6215 if (btrfs_item_end_nr(buf, i) >
6216 BTRFS_LEAF_DATA_SIZE(root)) {
6217 ret = delete_bogus_item(root, path, buf, i);
6220 fprintf(stderr, "item is off the end of the "
6221 "leaf, can't fix\n");
6225 shift = BTRFS_LEAF_DATA_SIZE(root) -
6226 btrfs_item_end_nr(buf, i);
6227 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6228 btrfs_item_offset_nr(buf, i - 1)) {
6229 if (btrfs_item_end_nr(buf, i) >
6230 btrfs_item_offset_nr(buf, i - 1)) {
6231 ret = delete_bogus_item(root, path, buf, i);
6234 fprintf(stderr, "items overlap, can't fix\n");
6238 shift = btrfs_item_offset_nr(buf, i - 1) -
6239 btrfs_item_end_nr(buf, i);
6244 printf("Shifting item nr %d by %u bytes in block %llu\n",
6245 i, shift, (unsigned long long)buf->start);
6246 offset = btrfs_item_offset_nr(buf, i);
6247 memmove_extent_buffer(buf,
6248 btrfs_leaf_data(buf) + offset + shift,
6249 btrfs_leaf_data(buf) + offset,
6250 btrfs_item_size_nr(buf, i));
6251 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6253 btrfs_mark_buffer_dirty(buf);
6257 * We may have moved things, in which case we want to exit so we don't
6258 * write those changes out. Once we have proper abort functionality in
6259 * progs this can be changed to something nicer.
6266 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6267 * then just return -EIO.
6269 static int try_to_fix_bad_block(struct btrfs_root *root,
6270 struct extent_buffer *buf,
6271 enum btrfs_tree_block_status status)
6273 struct btrfs_trans_handle *trans;
6274 struct ulist *roots;
6275 struct ulist_node *node;
6276 struct btrfs_root *search_root;
6277 struct btrfs_path path;
6278 struct ulist_iterator iter;
6279 struct btrfs_key root_key, key;
6282 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6283 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6286 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6290 btrfs_init_path(&path);
6291 ULIST_ITER_INIT(&iter);
6292 while ((node = ulist_next(roots, &iter))) {
6293 root_key.objectid = node->val;
6294 root_key.type = BTRFS_ROOT_ITEM_KEY;
6295 root_key.offset = (u64)-1;
6297 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6304 trans = btrfs_start_transaction(search_root, 0);
6305 if (IS_ERR(trans)) {
6306 ret = PTR_ERR(trans);
6310 path.lowest_level = btrfs_header_level(buf);
6311 path.skip_check_block = 1;
6312 if (path.lowest_level)
6313 btrfs_node_key_to_cpu(buf, &key, 0);
6315 btrfs_item_key_to_cpu(buf, &key, 0);
6316 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6319 btrfs_commit_transaction(trans, search_root);
6322 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6323 ret = fix_key_order(search_root, &path);
6324 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6325 ret = fix_item_offset(search_root, &path);
6327 btrfs_commit_transaction(trans, search_root);
6330 btrfs_release_path(&path);
6331 btrfs_commit_transaction(trans, search_root);
6334 btrfs_release_path(&path);
6338 static int check_block(struct btrfs_root *root,
6339 struct cache_tree *extent_cache,
6340 struct extent_buffer *buf, u64 flags)
6342 struct extent_record *rec;
6343 struct cache_extent *cache;
6344 struct btrfs_key key;
6345 enum btrfs_tree_block_status status;
6349 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6352 rec = container_of(cache, struct extent_record, cache);
6353 rec->generation = btrfs_header_generation(buf);
6355 level = btrfs_header_level(buf);
6356 if (btrfs_header_nritems(buf) > 0) {
6359 btrfs_item_key_to_cpu(buf, &key, 0);
6361 btrfs_node_key_to_cpu(buf, &key, 0);
6363 rec->info_objectid = key.objectid;
6365 rec->info_level = level;
6367 if (btrfs_is_leaf(buf))
6368 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6370 status = btrfs_check_node(root, &rec->parent_key, buf);
6372 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6374 status = try_to_fix_bad_block(root, buf, status);
6375 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6377 fprintf(stderr, "bad block %llu\n",
6378 (unsigned long long)buf->start);
6381 * Signal to callers we need to start the scan over
6382 * again since we'll have cowed blocks.
6387 rec->content_checked = 1;
6388 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6389 rec->owner_ref_checked = 1;
6391 ret = check_owner_ref(root, rec, buf);
6393 rec->owner_ref_checked = 1;
6397 maybe_free_extent_rec(extent_cache, rec);
6402 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6403 u64 parent, u64 root)
6405 struct list_head *cur = rec->backrefs.next;
6406 struct extent_backref *node;
6407 struct tree_backref *back;
6409 while(cur != &rec->backrefs) {
6410 node = to_extent_backref(cur);
6414 back = to_tree_backref(node);
6416 if (!node->full_backref)
6418 if (parent == back->parent)
6421 if (node->full_backref)
6423 if (back->root == root)
6431 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6432 u64 parent, u64 root)
6434 struct tree_backref *ref = malloc(sizeof(*ref));
6438 memset(&ref->node, 0, sizeof(ref->node));
6440 ref->parent = parent;
6441 ref->node.full_backref = 1;
6444 ref->node.full_backref = 0;
6451 static struct data_backref *find_data_backref(struct extent_record *rec,
6452 u64 parent, u64 root,
6453 u64 owner, u64 offset,
6455 u64 disk_bytenr, u64 bytes)
6457 struct list_head *cur = rec->backrefs.next;
6458 struct extent_backref *node;
6459 struct data_backref *back;
6461 while(cur != &rec->backrefs) {
6462 node = to_extent_backref(cur);
6466 back = to_data_backref(node);
6468 if (!node->full_backref)
6470 if (parent == back->parent)
6473 if (node->full_backref)
6475 if (back->root == root && back->owner == owner &&
6476 back->offset == offset) {
6477 if (found_ref && node->found_ref &&
6478 (back->bytes != bytes ||
6479 back->disk_bytenr != disk_bytenr))
6489 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6490 u64 parent, u64 root,
6491 u64 owner, u64 offset,
6494 struct data_backref *ref = malloc(sizeof(*ref));
6498 memset(&ref->node, 0, sizeof(ref->node));
6499 ref->node.is_data = 1;
6502 ref->parent = parent;
6505 ref->node.full_backref = 1;
6509 ref->offset = offset;
6510 ref->node.full_backref = 0;
6512 ref->bytes = max_size;
6515 if (max_size > rec->max_size)
6516 rec->max_size = max_size;
6520 /* Check if the type of extent matches with its chunk */
6521 static void check_extent_type(struct extent_record *rec)
6523 struct btrfs_block_group_cache *bg_cache;
6525 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6529 /* data extent, check chunk directly*/
6530 if (!rec->metadata) {
6531 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6532 rec->wrong_chunk_type = 1;
6536 /* metadata extent, check the obvious case first */
6537 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6538 BTRFS_BLOCK_GROUP_METADATA))) {
6539 rec->wrong_chunk_type = 1;
6544 * Check SYSTEM extent, as it's also marked as metadata, we can only
6545 * make sure it's a SYSTEM extent by its backref
6547 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6548 struct extent_backref *node;
6549 struct tree_backref *tback;
6552 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6553 if (node->is_data) {
6554 /* tree block shouldn't have data backref */
6555 rec->wrong_chunk_type = 1;
6558 tback = container_of(node, struct tree_backref, node);
6560 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6561 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6563 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6564 if (!(bg_cache->flags & bg_type))
6565 rec->wrong_chunk_type = 1;
6570 * Allocate a new extent record, fill default values from @tmpl and insert int
6571 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6572 * the cache, otherwise it fails.
6574 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6575 struct extent_record *tmpl)
6577 struct extent_record *rec;
6580 BUG_ON(tmpl->max_size == 0);
6581 rec = malloc(sizeof(*rec));
6584 rec->start = tmpl->start;
6585 rec->max_size = tmpl->max_size;
6586 rec->nr = max(tmpl->nr, tmpl->max_size);
6587 rec->found_rec = tmpl->found_rec;
6588 rec->content_checked = tmpl->content_checked;
6589 rec->owner_ref_checked = tmpl->owner_ref_checked;
6590 rec->num_duplicates = 0;
6591 rec->metadata = tmpl->metadata;
6592 rec->flag_block_full_backref = FLAG_UNSET;
6593 rec->bad_full_backref = 0;
6594 rec->crossing_stripes = 0;
6595 rec->wrong_chunk_type = 0;
6596 rec->is_root = tmpl->is_root;
6597 rec->refs = tmpl->refs;
6598 rec->extent_item_refs = tmpl->extent_item_refs;
6599 rec->parent_generation = tmpl->parent_generation;
6600 INIT_LIST_HEAD(&rec->backrefs);
6601 INIT_LIST_HEAD(&rec->dups);
6602 INIT_LIST_HEAD(&rec->list);
6603 rec->backref_tree = RB_ROOT;
6604 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6605 rec->cache.start = tmpl->start;
6606 rec->cache.size = tmpl->nr;
6607 ret = insert_cache_extent(extent_cache, &rec->cache);
6612 bytes_used += rec->nr;
6615 rec->crossing_stripes = check_crossing_stripes(global_info,
6616 rec->start, global_info->nodesize);
6617 check_extent_type(rec);
6622 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6624 * - refs - if found, increase refs
6625 * - is_root - if found, set
6626 * - content_checked - if found, set
6627 * - owner_ref_checked - if found, set
6629 * If not found, create a new one, initialize and insert.
6631 static int add_extent_rec(struct cache_tree *extent_cache,
6632 struct extent_record *tmpl)
6634 struct extent_record *rec;
6635 struct cache_extent *cache;
6639 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6641 rec = container_of(cache, struct extent_record, cache);
6645 rec->nr = max(tmpl->nr, tmpl->max_size);
6648 * We need to make sure to reset nr to whatever the extent
6649 * record says was the real size, this way we can compare it to
6652 if (tmpl->found_rec) {
6653 if (tmpl->start != rec->start || rec->found_rec) {
6654 struct extent_record *tmp;
6657 if (list_empty(&rec->list))
6658 list_add_tail(&rec->list,
6659 &duplicate_extents);
6662 * We have to do this song and dance in case we
6663 * find an extent record that falls inside of
6664 * our current extent record but does not have
6665 * the same objectid.
6667 tmp = malloc(sizeof(*tmp));
6670 tmp->start = tmpl->start;
6671 tmp->max_size = tmpl->max_size;
6674 tmp->metadata = tmpl->metadata;
6675 tmp->extent_item_refs = tmpl->extent_item_refs;
6676 INIT_LIST_HEAD(&tmp->list);
6677 list_add_tail(&tmp->list, &rec->dups);
6678 rec->num_duplicates++;
6685 if (tmpl->extent_item_refs && !dup) {
6686 if (rec->extent_item_refs) {
6687 fprintf(stderr, "block %llu rec "
6688 "extent_item_refs %llu, passed %llu\n",
6689 (unsigned long long)tmpl->start,
6690 (unsigned long long)
6691 rec->extent_item_refs,
6692 (unsigned long long)tmpl->extent_item_refs);
6694 rec->extent_item_refs = tmpl->extent_item_refs;
6698 if (tmpl->content_checked)
6699 rec->content_checked = 1;
6700 if (tmpl->owner_ref_checked)
6701 rec->owner_ref_checked = 1;
6702 memcpy(&rec->parent_key, &tmpl->parent_key,
6703 sizeof(tmpl->parent_key));
6704 if (tmpl->parent_generation)
6705 rec->parent_generation = tmpl->parent_generation;
6706 if (rec->max_size < tmpl->max_size)
6707 rec->max_size = tmpl->max_size;
6710 * A metadata extent can't cross stripe_len boundary, otherwise
6711 * kernel scrub won't be able to handle it.
6712 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6716 rec->crossing_stripes = check_crossing_stripes(
6717 global_info, rec->start,
6718 global_info->nodesize);
6719 check_extent_type(rec);
6720 maybe_free_extent_rec(extent_cache, rec);
6724 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6729 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6730 u64 parent, u64 root, int found_ref)
6732 struct extent_record *rec;
6733 struct tree_backref *back;
6734 struct cache_extent *cache;
6736 bool insert = false;
6738 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6740 struct extent_record tmpl;
6742 memset(&tmpl, 0, sizeof(tmpl));
6743 tmpl.start = bytenr;
6748 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6752 /* really a bug in cache_extent implement now */
6753 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6758 rec = container_of(cache, struct extent_record, cache);
6759 if (rec->start != bytenr) {
6761 * Several cause, from unaligned bytenr to over lapping extents
6766 back = find_tree_backref(rec, parent, root);
6768 back = alloc_tree_backref(rec, parent, root);
6775 if (back->node.found_ref) {
6776 fprintf(stderr, "Extent back ref already exists "
6777 "for %llu parent %llu root %llu \n",
6778 (unsigned long long)bytenr,
6779 (unsigned long long)parent,
6780 (unsigned long long)root);
6782 back->node.found_ref = 1;
6784 if (back->node.found_extent_tree) {
6785 fprintf(stderr, "Extent back ref already exists "
6786 "for %llu parent %llu root %llu \n",
6787 (unsigned long long)bytenr,
6788 (unsigned long long)parent,
6789 (unsigned long long)root);
6791 back->node.found_extent_tree = 1;
6794 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6795 compare_extent_backref));
6796 check_extent_type(rec);
6797 maybe_free_extent_rec(extent_cache, rec);
6801 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6802 u64 parent, u64 root, u64 owner, u64 offset,
6803 u32 num_refs, int found_ref, u64 max_size)
6805 struct extent_record *rec;
6806 struct data_backref *back;
6807 struct cache_extent *cache;
6809 bool insert = false;
6811 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6813 struct extent_record tmpl;
6815 memset(&tmpl, 0, sizeof(tmpl));
6816 tmpl.start = bytenr;
6818 tmpl.max_size = max_size;
6820 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6824 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6829 rec = container_of(cache, struct extent_record, cache);
6830 if (rec->max_size < max_size)
6831 rec->max_size = max_size;
6834 * If found_ref is set then max_size is the real size and must match the
6835 * existing refs. So if we have already found a ref then we need to
6836 * make sure that this ref matches the existing one, otherwise we need
6837 * to add a new backref so we can notice that the backrefs don't match
6838 * and we need to figure out who is telling the truth. This is to
6839 * account for that awful fsync bug I introduced where we'd end up with
6840 * a btrfs_file_extent_item that would have its length include multiple
6841 * prealloc extents or point inside of a prealloc extent.
6843 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6846 back = alloc_data_backref(rec, parent, root, owner, offset,
6853 BUG_ON(num_refs != 1);
6854 if (back->node.found_ref)
6855 BUG_ON(back->bytes != max_size);
6856 back->node.found_ref = 1;
6857 back->found_ref += 1;
6858 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6859 back->bytes = max_size;
6860 back->disk_bytenr = bytenr;
6862 /* Need to reinsert if not already in the tree */
6864 rb_erase(&back->node.node, &rec->backref_tree);
6869 rec->content_checked = 1;
6870 rec->owner_ref_checked = 1;
6872 if (back->node.found_extent_tree) {
6873 fprintf(stderr, "Extent back ref already exists "
6874 "for %llu parent %llu root %llu "
6875 "owner %llu offset %llu num_refs %lu\n",
6876 (unsigned long long)bytenr,
6877 (unsigned long long)parent,
6878 (unsigned long long)root,
6879 (unsigned long long)owner,
6880 (unsigned long long)offset,
6881 (unsigned long)num_refs);
6883 back->num_refs = num_refs;
6884 back->node.found_extent_tree = 1;
6887 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6888 compare_extent_backref));
6890 maybe_free_extent_rec(extent_cache, rec);
6894 static int add_pending(struct cache_tree *pending,
6895 struct cache_tree *seen, u64 bytenr, u32 size)
6898 ret = add_cache_extent(seen, bytenr, size);
6901 add_cache_extent(pending, bytenr, size);
6905 static int pick_next_pending(struct cache_tree *pending,
6906 struct cache_tree *reada,
6907 struct cache_tree *nodes,
6908 u64 last, struct block_info *bits, int bits_nr,
6911 unsigned long node_start = last;
6912 struct cache_extent *cache;
6915 cache = search_cache_extent(reada, 0);
6917 bits[0].start = cache->start;
6918 bits[0].size = cache->size;
6923 if (node_start > 32768)
6924 node_start -= 32768;
6926 cache = search_cache_extent(nodes, node_start);
6928 cache = search_cache_extent(nodes, 0);
6931 cache = search_cache_extent(pending, 0);
6936 bits[ret].start = cache->start;
6937 bits[ret].size = cache->size;
6938 cache = next_cache_extent(cache);
6940 } while (cache && ret < bits_nr);
6946 bits[ret].start = cache->start;
6947 bits[ret].size = cache->size;
6948 cache = next_cache_extent(cache);
6950 } while (cache && ret < bits_nr);
6952 if (bits_nr - ret > 8) {
6953 u64 lookup = bits[0].start + bits[0].size;
6954 struct cache_extent *next;
6955 next = search_cache_extent(pending, lookup);
6957 if (next->start - lookup > 32768)
6959 bits[ret].start = next->start;
6960 bits[ret].size = next->size;
6961 lookup = next->start + next->size;
6965 next = next_cache_extent(next);
6973 static void free_chunk_record(struct cache_extent *cache)
6975 struct chunk_record *rec;
6977 rec = container_of(cache, struct chunk_record, cache);
6978 list_del_init(&rec->list);
6979 list_del_init(&rec->dextents);
6983 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6985 cache_tree_free_extents(chunk_cache, free_chunk_record);
6988 static void free_device_record(struct rb_node *node)
6990 struct device_record *rec;
6992 rec = container_of(node, struct device_record, node);
6996 FREE_RB_BASED_TREE(device_cache, free_device_record);
6998 int insert_block_group_record(struct block_group_tree *tree,
6999 struct block_group_record *bg_rec)
7003 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7007 list_add_tail(&bg_rec->list, &tree->block_groups);
7011 static void free_block_group_record(struct cache_extent *cache)
7013 struct block_group_record *rec;
7015 rec = container_of(cache, struct block_group_record, cache);
7016 list_del_init(&rec->list);
7020 void free_block_group_tree(struct block_group_tree *tree)
7022 cache_tree_free_extents(&tree->tree, free_block_group_record);
7025 int insert_device_extent_record(struct device_extent_tree *tree,
7026 struct device_extent_record *de_rec)
7031 * Device extent is a bit different from the other extents, because
7032 * the extents which belong to the different devices may have the
7033 * same start and size, so we need use the special extent cache
7034 * search/insert functions.
7036 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7040 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7041 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7045 static void free_device_extent_record(struct cache_extent *cache)
7047 struct device_extent_record *rec;
7049 rec = container_of(cache, struct device_extent_record, cache);
7050 if (!list_empty(&rec->chunk_list))
7051 list_del_init(&rec->chunk_list);
7052 if (!list_empty(&rec->device_list))
7053 list_del_init(&rec->device_list);
7057 void free_device_extent_tree(struct device_extent_tree *tree)
7059 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7062 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7063 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7064 struct extent_buffer *leaf, int slot)
7066 struct btrfs_extent_ref_v0 *ref0;
7067 struct btrfs_key key;
7070 btrfs_item_key_to_cpu(leaf, &key, slot);
7071 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7072 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7073 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7076 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7077 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7083 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7084 struct btrfs_key *key,
7087 struct btrfs_chunk *ptr;
7088 struct chunk_record *rec;
7091 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7092 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7094 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7096 fprintf(stderr, "memory allocation failed\n");
7100 INIT_LIST_HEAD(&rec->list);
7101 INIT_LIST_HEAD(&rec->dextents);
7104 rec->cache.start = key->offset;
7105 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7107 rec->generation = btrfs_header_generation(leaf);
7109 rec->objectid = key->objectid;
7110 rec->type = key->type;
7111 rec->offset = key->offset;
7113 rec->length = rec->cache.size;
7114 rec->owner = btrfs_chunk_owner(leaf, ptr);
7115 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7116 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7117 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7118 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7119 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7120 rec->num_stripes = num_stripes;
7121 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7123 for (i = 0; i < rec->num_stripes; ++i) {
7124 rec->stripes[i].devid =
7125 btrfs_stripe_devid_nr(leaf, ptr, i);
7126 rec->stripes[i].offset =
7127 btrfs_stripe_offset_nr(leaf, ptr, i);
7128 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7129 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7136 static int process_chunk_item(struct cache_tree *chunk_cache,
7137 struct btrfs_key *key, struct extent_buffer *eb,
7140 struct chunk_record *rec;
7141 struct btrfs_chunk *chunk;
7144 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7146 * Do extra check for this chunk item,
7148 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7149 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7150 * and owner<->key_type check.
7152 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7155 error("chunk(%llu, %llu) is not valid, ignore it",
7156 key->offset, btrfs_chunk_length(eb, chunk));
7159 rec = btrfs_new_chunk_record(eb, key, slot);
7160 ret = insert_cache_extent(chunk_cache, &rec->cache);
7162 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7163 rec->offset, rec->length);
7170 static int process_device_item(struct rb_root *dev_cache,
7171 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7173 struct btrfs_dev_item *ptr;
7174 struct device_record *rec;
7177 ptr = btrfs_item_ptr(eb,
7178 slot, struct btrfs_dev_item);
7180 rec = malloc(sizeof(*rec));
7182 fprintf(stderr, "memory allocation failed\n");
7186 rec->devid = key->offset;
7187 rec->generation = btrfs_header_generation(eb);
7189 rec->objectid = key->objectid;
7190 rec->type = key->type;
7191 rec->offset = key->offset;
7193 rec->devid = btrfs_device_id(eb, ptr);
7194 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7195 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7197 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7199 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7206 struct block_group_record *
7207 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7210 struct btrfs_block_group_item *ptr;
7211 struct block_group_record *rec;
7213 rec = calloc(1, sizeof(*rec));
7215 fprintf(stderr, "memory allocation failed\n");
7219 rec->cache.start = key->objectid;
7220 rec->cache.size = key->offset;
7222 rec->generation = btrfs_header_generation(leaf);
7224 rec->objectid = key->objectid;
7225 rec->type = key->type;
7226 rec->offset = key->offset;
7228 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7229 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7231 INIT_LIST_HEAD(&rec->list);
7236 static int process_block_group_item(struct block_group_tree *block_group_cache,
7237 struct btrfs_key *key,
7238 struct extent_buffer *eb, int slot)
7240 struct block_group_record *rec;
7243 rec = btrfs_new_block_group_record(eb, key, slot);
7244 ret = insert_block_group_record(block_group_cache, rec);
7246 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7247 rec->objectid, rec->offset);
7254 struct device_extent_record *
7255 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7256 struct btrfs_key *key, int slot)
7258 struct device_extent_record *rec;
7259 struct btrfs_dev_extent *ptr;
7261 rec = calloc(1, sizeof(*rec));
7263 fprintf(stderr, "memory allocation failed\n");
7267 rec->cache.objectid = key->objectid;
7268 rec->cache.start = key->offset;
7270 rec->generation = btrfs_header_generation(leaf);
7272 rec->objectid = key->objectid;
7273 rec->type = key->type;
7274 rec->offset = key->offset;
7276 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7277 rec->chunk_objecteid =
7278 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7280 btrfs_dev_extent_chunk_offset(leaf, ptr);
7281 rec->length = btrfs_dev_extent_length(leaf, ptr);
7282 rec->cache.size = rec->length;
7284 INIT_LIST_HEAD(&rec->chunk_list);
7285 INIT_LIST_HEAD(&rec->device_list);
7291 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7292 struct btrfs_key *key, struct extent_buffer *eb,
7295 struct device_extent_record *rec;
7298 rec = btrfs_new_device_extent_record(eb, key, slot);
7299 ret = insert_device_extent_record(dev_extent_cache, rec);
7302 "Device extent[%llu, %llu, %llu] existed.\n",
7303 rec->objectid, rec->offset, rec->length);
7310 static int process_extent_item(struct btrfs_root *root,
7311 struct cache_tree *extent_cache,
7312 struct extent_buffer *eb, int slot)
7314 struct btrfs_extent_item *ei;
7315 struct btrfs_extent_inline_ref *iref;
7316 struct btrfs_extent_data_ref *dref;
7317 struct btrfs_shared_data_ref *sref;
7318 struct btrfs_key key;
7319 struct extent_record tmpl;
7324 u32 item_size = btrfs_item_size_nr(eb, slot);
7330 btrfs_item_key_to_cpu(eb, &key, slot);
7332 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7334 num_bytes = root->fs_info->nodesize;
7336 num_bytes = key.offset;
7339 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7340 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7341 key.objectid, root->fs_info->sectorsize);
7344 if (item_size < sizeof(*ei)) {
7345 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7346 struct btrfs_extent_item_v0 *ei0;
7347 BUG_ON(item_size != sizeof(*ei0));
7348 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7349 refs = btrfs_extent_refs_v0(eb, ei0);
7353 memset(&tmpl, 0, sizeof(tmpl));
7354 tmpl.start = key.objectid;
7355 tmpl.nr = num_bytes;
7356 tmpl.extent_item_refs = refs;
7357 tmpl.metadata = metadata;
7359 tmpl.max_size = num_bytes;
7361 return add_extent_rec(extent_cache, &tmpl);
7364 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7365 refs = btrfs_extent_refs(eb, ei);
7366 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7370 if (metadata && num_bytes != root->fs_info->nodesize) {
7371 error("ignore invalid metadata extent, length %llu does not equal to %u",
7372 num_bytes, root->fs_info->nodesize);
7375 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7376 error("ignore invalid data extent, length %llu is not aligned to %u",
7377 num_bytes, root->fs_info->sectorsize);
7381 memset(&tmpl, 0, sizeof(tmpl));
7382 tmpl.start = key.objectid;
7383 tmpl.nr = num_bytes;
7384 tmpl.extent_item_refs = refs;
7385 tmpl.metadata = metadata;
7387 tmpl.max_size = num_bytes;
7388 add_extent_rec(extent_cache, &tmpl);
7390 ptr = (unsigned long)(ei + 1);
7391 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7392 key.type == BTRFS_EXTENT_ITEM_KEY)
7393 ptr += sizeof(struct btrfs_tree_block_info);
7395 end = (unsigned long)ei + item_size;
7397 iref = (struct btrfs_extent_inline_ref *)ptr;
7398 type = btrfs_extent_inline_ref_type(eb, iref);
7399 offset = btrfs_extent_inline_ref_offset(eb, iref);
7401 case BTRFS_TREE_BLOCK_REF_KEY:
7402 ret = add_tree_backref(extent_cache, key.objectid,
7406 "add_tree_backref failed (extent items tree block): %s",
7409 case BTRFS_SHARED_BLOCK_REF_KEY:
7410 ret = add_tree_backref(extent_cache, key.objectid,
7414 "add_tree_backref failed (extent items shared block): %s",
7417 case BTRFS_EXTENT_DATA_REF_KEY:
7418 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7419 add_data_backref(extent_cache, key.objectid, 0,
7420 btrfs_extent_data_ref_root(eb, dref),
7421 btrfs_extent_data_ref_objectid(eb,
7423 btrfs_extent_data_ref_offset(eb, dref),
7424 btrfs_extent_data_ref_count(eb, dref),
7427 case BTRFS_SHARED_DATA_REF_KEY:
7428 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7429 add_data_backref(extent_cache, key.objectid, offset,
7431 btrfs_shared_data_ref_count(eb, sref),
7435 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7436 key.objectid, key.type, num_bytes);
7439 ptr += btrfs_extent_inline_ref_size(type);
7446 static int check_cache_range(struct btrfs_root *root,
7447 struct btrfs_block_group_cache *cache,
7448 u64 offset, u64 bytes)
7450 struct btrfs_free_space *entry;
7456 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7457 bytenr = btrfs_sb_offset(i);
7458 ret = btrfs_rmap_block(root->fs_info,
7459 cache->key.objectid, bytenr, 0,
7460 &logical, &nr, &stripe_len);
7465 if (logical[nr] + stripe_len <= offset)
7467 if (offset + bytes <= logical[nr])
7469 if (logical[nr] == offset) {
7470 if (stripe_len >= bytes) {
7474 bytes -= stripe_len;
7475 offset += stripe_len;
7476 } else if (logical[nr] < offset) {
7477 if (logical[nr] + stripe_len >=
7482 bytes = (offset + bytes) -
7483 (logical[nr] + stripe_len);
7484 offset = logical[nr] + stripe_len;
7487 * Could be tricky, the super may land in the
7488 * middle of the area we're checking. First
7489 * check the easiest case, it's at the end.
7491 if (logical[nr] + stripe_len >=
7493 bytes = logical[nr] - offset;
7497 /* Check the left side */
7498 ret = check_cache_range(root, cache,
7500 logical[nr] - offset);
7506 /* Now we continue with the right side */
7507 bytes = (offset + bytes) -
7508 (logical[nr] + stripe_len);
7509 offset = logical[nr] + stripe_len;
7516 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7518 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7519 offset, offset+bytes);
7523 if (entry->offset != offset) {
7524 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7529 if (entry->bytes != bytes) {
7530 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7531 bytes, entry->bytes, offset);
7535 unlink_free_space(cache->free_space_ctl, entry);
7540 static int verify_space_cache(struct btrfs_root *root,
7541 struct btrfs_block_group_cache *cache)
7543 struct btrfs_path path;
7544 struct extent_buffer *leaf;
7545 struct btrfs_key key;
7549 root = root->fs_info->extent_root;
7551 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7553 btrfs_init_path(&path);
7554 key.objectid = last;
7556 key.type = BTRFS_EXTENT_ITEM_KEY;
7557 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7562 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7563 ret = btrfs_next_leaf(root, &path);
7571 leaf = path.nodes[0];
7572 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7573 if (key.objectid >= cache->key.offset + cache->key.objectid)
7575 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7576 key.type != BTRFS_METADATA_ITEM_KEY) {
7581 if (last == key.objectid) {
7582 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7583 last = key.objectid + key.offset;
7585 last = key.objectid + root->fs_info->nodesize;
7590 ret = check_cache_range(root, cache, last,
7591 key.objectid - last);
7594 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7595 last = key.objectid + key.offset;
7597 last = key.objectid + root->fs_info->nodesize;
7601 if (last < cache->key.objectid + cache->key.offset)
7602 ret = check_cache_range(root, cache, last,
7603 cache->key.objectid +
7604 cache->key.offset - last);
7607 btrfs_release_path(&path);
7610 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7611 fprintf(stderr, "There are still entries left in the space "
7619 static int check_space_cache(struct btrfs_root *root)
7621 struct btrfs_block_group_cache *cache;
7622 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7626 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7627 btrfs_super_generation(root->fs_info->super_copy) !=
7628 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7629 printf("cache and super generation don't match, space cache "
7630 "will be invalidated\n");
7634 if (ctx.progress_enabled) {
7635 ctx.tp = TASK_FREE_SPACE;
7636 task_start(ctx.info);
7640 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7644 start = cache->key.objectid + cache->key.offset;
7645 if (!cache->free_space_ctl) {
7646 if (btrfs_init_free_space_ctl(cache,
7647 root->fs_info->sectorsize)) {
7652 btrfs_remove_free_space_cache(cache);
7655 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7656 ret = exclude_super_stripes(root, cache);
7658 fprintf(stderr, "could not exclude super stripes: %s\n",
7663 ret = load_free_space_tree(root->fs_info, cache);
7664 free_excluded_extents(root, cache);
7666 fprintf(stderr, "could not load free space tree: %s\n",
7673 ret = load_free_space_cache(root->fs_info, cache);
7678 ret = verify_space_cache(root, cache);
7680 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7681 cache->key.objectid);
7686 task_stop(ctx.info);
7688 return error ? -EINVAL : 0;
7691 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7692 u64 num_bytes, unsigned long leaf_offset,
7693 struct extent_buffer *eb) {
7695 struct btrfs_fs_info *fs_info = root->fs_info;
7697 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7699 unsigned long csum_offset;
7703 u64 data_checked = 0;
7709 if (num_bytes % fs_info->sectorsize)
7712 data = malloc(num_bytes);
7716 while (offset < num_bytes) {
7719 read_len = num_bytes - offset;
7720 /* read as much space once a time */
7721 ret = read_extent_data(fs_info, data + offset,
7722 bytenr + offset, &read_len, mirror);
7726 /* verify every 4k data's checksum */
7727 while (data_checked < read_len) {
7729 tmp = offset + data_checked;
7731 csum = btrfs_csum_data((char *)data + tmp,
7732 csum, fs_info->sectorsize);
7733 btrfs_csum_final(csum, (u8 *)&csum);
7735 csum_offset = leaf_offset +
7736 tmp / fs_info->sectorsize * csum_size;
7737 read_extent_buffer(eb, (char *)&csum_expected,
7738 csum_offset, csum_size);
7739 /* try another mirror */
7740 if (csum != csum_expected) {
7741 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7742 mirror, bytenr + tmp,
7743 csum, csum_expected);
7744 num_copies = btrfs_num_copies(root->fs_info,
7746 if (mirror < num_copies - 1) {
7751 data_checked += fs_info->sectorsize;
7760 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7763 struct btrfs_path path;
7764 struct extent_buffer *leaf;
7765 struct btrfs_key key;
7768 btrfs_init_path(&path);
7769 key.objectid = bytenr;
7770 key.type = BTRFS_EXTENT_ITEM_KEY;
7771 key.offset = (u64)-1;
7774 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7777 fprintf(stderr, "Error looking up extent record %d\n", ret);
7778 btrfs_release_path(&path);
7781 if (path.slots[0] > 0) {
7784 ret = btrfs_prev_leaf(root, &path);
7787 } else if (ret > 0) {
7794 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7797 * Block group items come before extent items if they have the same
7798 * bytenr, so walk back one more just in case. Dear future traveller,
7799 * first congrats on mastering time travel. Now if it's not too much
7800 * trouble could you go back to 2006 and tell Chris to make the
7801 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7802 * EXTENT_ITEM_KEY please?
7804 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7805 if (path.slots[0] > 0) {
7808 ret = btrfs_prev_leaf(root, &path);
7811 } else if (ret > 0) {
7816 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7820 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7821 ret = btrfs_next_leaf(root, &path);
7823 fprintf(stderr, "Error going to next leaf "
7825 btrfs_release_path(&path);
7831 leaf = path.nodes[0];
7832 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7833 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7837 if (key.objectid + key.offset < bytenr) {
7841 if (key.objectid > bytenr + num_bytes)
7844 if (key.objectid == bytenr) {
7845 if (key.offset >= num_bytes) {
7849 num_bytes -= key.offset;
7850 bytenr += key.offset;
7851 } else if (key.objectid < bytenr) {
7852 if (key.objectid + key.offset >= bytenr + num_bytes) {
7856 num_bytes = (bytenr + num_bytes) -
7857 (key.objectid + key.offset);
7858 bytenr = key.objectid + key.offset;
7860 if (key.objectid + key.offset < bytenr + num_bytes) {
7861 u64 new_start = key.objectid + key.offset;
7862 u64 new_bytes = bytenr + num_bytes - new_start;
7865 * Weird case, the extent is in the middle of
7866 * our range, we'll have to search one side
7867 * and then the other. Not sure if this happens
7868 * in real life, but no harm in coding it up
7869 * anyway just in case.
7871 btrfs_release_path(&path);
7872 ret = check_extent_exists(root, new_start,
7875 fprintf(stderr, "Right section didn't "
7879 num_bytes = key.objectid - bytenr;
7882 num_bytes = key.objectid - bytenr;
7889 if (num_bytes && !ret) {
7890 fprintf(stderr, "There are no extents for csum range "
7891 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7895 btrfs_release_path(&path);
7899 static int check_csums(struct btrfs_root *root)
7901 struct btrfs_path path;
7902 struct extent_buffer *leaf;
7903 struct btrfs_key key;
7904 u64 offset = 0, num_bytes = 0;
7905 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7909 unsigned long leaf_offset;
7911 root = root->fs_info->csum_root;
7912 if (!extent_buffer_uptodate(root->node)) {
7913 fprintf(stderr, "No valid csum tree found\n");
7917 btrfs_init_path(&path);
7918 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7919 key.type = BTRFS_EXTENT_CSUM_KEY;
7921 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7923 fprintf(stderr, "Error searching csum tree %d\n", ret);
7924 btrfs_release_path(&path);
7928 if (ret > 0 && path.slots[0])
7933 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7934 ret = btrfs_next_leaf(root, &path);
7936 fprintf(stderr, "Error going to next leaf "
7943 leaf = path.nodes[0];
7945 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7946 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7951 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7952 csum_size) * root->fs_info->sectorsize;
7953 if (!check_data_csum)
7954 goto skip_csum_check;
7955 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7956 ret = check_extent_csums(root, key.offset, data_len,
7962 offset = key.offset;
7963 } else if (key.offset != offset + num_bytes) {
7964 ret = check_extent_exists(root, offset, num_bytes);
7966 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7967 "there is no extent record\n",
7968 offset, offset+num_bytes);
7971 offset = key.offset;
7974 num_bytes += data_len;
7978 btrfs_release_path(&path);
7982 static int is_dropped_key(struct btrfs_key *key,
7983 struct btrfs_key *drop_key) {
7984 if (key->objectid < drop_key->objectid)
7986 else if (key->objectid == drop_key->objectid) {
7987 if (key->type < drop_key->type)
7989 else if (key->type == drop_key->type) {
7990 if (key->offset < drop_key->offset)
7998 * Here are the rules for FULL_BACKREF.
8000 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8001 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8003 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8004 * if it happened after the relocation occurred since we'll have dropped the
8005 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8006 * have no real way to know for sure.
8008 * We process the blocks one root at a time, and we start from the lowest root
8009 * objectid and go to the highest. So we can just lookup the owner backref for
8010 * the record and if we don't find it then we know it doesn't exist and we have
8013 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8014 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8015 * be set or not and then we can check later once we've gathered all the refs.
8017 static int calc_extent_flag(struct cache_tree *extent_cache,
8018 struct extent_buffer *buf,
8019 struct root_item_record *ri,
8022 struct extent_record *rec;
8023 struct cache_extent *cache;
8024 struct tree_backref *tback;
8027 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8028 /* we have added this extent before */
8032 rec = container_of(cache, struct extent_record, cache);
8035 * Except file/reloc tree, we can not have
8038 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8043 if (buf->start == ri->bytenr)
8046 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8049 owner = btrfs_header_owner(buf);
8050 if (owner == ri->objectid)
8053 tback = find_tree_backref(rec, 0, owner);
8058 if (rec->flag_block_full_backref != FLAG_UNSET &&
8059 rec->flag_block_full_backref != 0)
8060 rec->bad_full_backref = 1;
8063 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8064 if (rec->flag_block_full_backref != FLAG_UNSET &&
8065 rec->flag_block_full_backref != 1)
8066 rec->bad_full_backref = 1;
8070 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8072 fprintf(stderr, "Invalid key type(");
8073 print_key_type(stderr, 0, key_type);
8074 fprintf(stderr, ") found in root(");
8075 print_objectid(stderr, rootid, 0);
8076 fprintf(stderr, ")\n");
8080 * Check if the key is valid with its extent buffer.
8082 * This is a early check in case invalid key exists in a extent buffer
8083 * This is not comprehensive yet, but should prevent wrong key/item passed
8086 static int check_type_with_root(u64 rootid, u8 key_type)
8089 /* Only valid in chunk tree */
8090 case BTRFS_DEV_ITEM_KEY:
8091 case BTRFS_CHUNK_ITEM_KEY:
8092 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8095 /* valid in csum and log tree */
8096 case BTRFS_CSUM_TREE_OBJECTID:
8097 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8101 case BTRFS_EXTENT_ITEM_KEY:
8102 case BTRFS_METADATA_ITEM_KEY:
8103 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8104 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8107 case BTRFS_ROOT_ITEM_KEY:
8108 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8111 case BTRFS_DEV_EXTENT_KEY:
8112 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8118 report_mismatch_key_root(key_type, rootid);
8122 static int run_next_block(struct btrfs_root *root,
8123 struct block_info *bits,
8126 struct cache_tree *pending,
8127 struct cache_tree *seen,
8128 struct cache_tree *reada,
8129 struct cache_tree *nodes,
8130 struct cache_tree *extent_cache,
8131 struct cache_tree *chunk_cache,
8132 struct rb_root *dev_cache,
8133 struct block_group_tree *block_group_cache,
8134 struct device_extent_tree *dev_extent_cache,
8135 struct root_item_record *ri)
8137 struct btrfs_fs_info *fs_info = root->fs_info;
8138 struct extent_buffer *buf;
8139 struct extent_record *rec = NULL;
8150 struct btrfs_key key;
8151 struct cache_extent *cache;
8154 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8155 bits_nr, &reada_bits);
8160 for(i = 0; i < nritems; i++) {
8161 ret = add_cache_extent(reada, bits[i].start,
8166 /* fixme, get the parent transid */
8167 readahead_tree_block(fs_info, bits[i].start, 0);
8170 *last = bits[0].start;
8171 bytenr = bits[0].start;
8172 size = bits[0].size;
8174 cache = lookup_cache_extent(pending, bytenr, size);
8176 remove_cache_extent(pending, cache);
8179 cache = lookup_cache_extent(reada, bytenr, size);
8181 remove_cache_extent(reada, cache);
8184 cache = lookup_cache_extent(nodes, bytenr, size);
8186 remove_cache_extent(nodes, cache);
8189 cache = lookup_cache_extent(extent_cache, bytenr, size);
8191 rec = container_of(cache, struct extent_record, cache);
8192 gen = rec->parent_generation;
8195 /* fixme, get the real parent transid */
8196 buf = read_tree_block(root->fs_info, bytenr, gen);
8197 if (!extent_buffer_uptodate(buf)) {
8198 record_bad_block_io(root->fs_info,
8199 extent_cache, bytenr, size);
8203 nritems = btrfs_header_nritems(buf);
8206 if (!init_extent_tree) {
8207 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8208 btrfs_header_level(buf), 1, NULL,
8211 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8213 fprintf(stderr, "Couldn't calc extent flags\n");
8214 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8219 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8221 fprintf(stderr, "Couldn't calc extent flags\n");
8222 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8226 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8228 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8229 ri->objectid == btrfs_header_owner(buf)) {
8231 * Ok we got to this block from it's original owner and
8232 * we have FULL_BACKREF set. Relocation can leave
8233 * converted blocks over so this is altogether possible,
8234 * however it's not possible if the generation > the
8235 * last snapshot, so check for this case.
8237 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8238 btrfs_header_generation(buf) > ri->last_snapshot) {
8239 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8240 rec->bad_full_backref = 1;
8245 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8246 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8247 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8248 rec->bad_full_backref = 1;
8252 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8253 rec->flag_block_full_backref = 1;
8257 rec->flag_block_full_backref = 0;
8259 owner = btrfs_header_owner(buf);
8262 ret = check_block(root, extent_cache, buf, flags);
8266 if (btrfs_is_leaf(buf)) {
8267 btree_space_waste += btrfs_leaf_free_space(root, buf);
8268 for (i = 0; i < nritems; i++) {
8269 struct btrfs_file_extent_item *fi;
8270 btrfs_item_key_to_cpu(buf, &key, i);
8272 * Check key type against the leaf owner.
8273 * Could filter quite a lot of early error if
8276 if (check_type_with_root(btrfs_header_owner(buf),
8278 fprintf(stderr, "ignoring invalid key\n");
8281 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8282 process_extent_item(root, extent_cache, buf,
8286 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8287 process_extent_item(root, extent_cache, buf,
8291 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8293 btrfs_item_size_nr(buf, i);
8296 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8297 process_chunk_item(chunk_cache, &key, buf, i);
8300 if (key.type == BTRFS_DEV_ITEM_KEY) {
8301 process_device_item(dev_cache, &key, buf, i);
8304 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8305 process_block_group_item(block_group_cache,
8309 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8310 process_device_extent_item(dev_extent_cache,
8315 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8316 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8317 process_extent_ref_v0(extent_cache, buf, i);
8324 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8325 ret = add_tree_backref(extent_cache,
8326 key.objectid, 0, key.offset, 0);
8329 "add_tree_backref failed (leaf tree block): %s",
8333 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8334 ret = add_tree_backref(extent_cache,
8335 key.objectid, key.offset, 0, 0);
8338 "add_tree_backref failed (leaf shared block): %s",
8342 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8343 struct btrfs_extent_data_ref *ref;
8344 ref = btrfs_item_ptr(buf, i,
8345 struct btrfs_extent_data_ref);
8346 add_data_backref(extent_cache,
8348 btrfs_extent_data_ref_root(buf, ref),
8349 btrfs_extent_data_ref_objectid(buf,
8351 btrfs_extent_data_ref_offset(buf, ref),
8352 btrfs_extent_data_ref_count(buf, ref),
8353 0, root->fs_info->sectorsize);
8356 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8357 struct btrfs_shared_data_ref *ref;
8358 ref = btrfs_item_ptr(buf, i,
8359 struct btrfs_shared_data_ref);
8360 add_data_backref(extent_cache,
8361 key.objectid, key.offset, 0, 0, 0,
8362 btrfs_shared_data_ref_count(buf, ref),
8363 0, root->fs_info->sectorsize);
8366 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8367 struct bad_item *bad;
8369 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8373 bad = malloc(sizeof(struct bad_item));
8376 INIT_LIST_HEAD(&bad->list);
8377 memcpy(&bad->key, &key,
8378 sizeof(struct btrfs_key));
8379 bad->root_id = owner;
8380 list_add_tail(&bad->list, &delete_items);
8383 if (key.type != BTRFS_EXTENT_DATA_KEY)
8385 fi = btrfs_item_ptr(buf, i,
8386 struct btrfs_file_extent_item);
8387 if (btrfs_file_extent_type(buf, fi) ==
8388 BTRFS_FILE_EXTENT_INLINE)
8390 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8393 data_bytes_allocated +=
8394 btrfs_file_extent_disk_num_bytes(buf, fi);
8395 if (data_bytes_allocated < root->fs_info->sectorsize) {
8398 data_bytes_referenced +=
8399 btrfs_file_extent_num_bytes(buf, fi);
8400 add_data_backref(extent_cache,
8401 btrfs_file_extent_disk_bytenr(buf, fi),
8402 parent, owner, key.objectid, key.offset -
8403 btrfs_file_extent_offset(buf, fi), 1, 1,
8404 btrfs_file_extent_disk_num_bytes(buf, fi));
8408 struct btrfs_key first_key;
8410 first_key.objectid = 0;
8413 btrfs_item_key_to_cpu(buf, &first_key, 0);
8414 level = btrfs_header_level(buf);
8415 for (i = 0; i < nritems; i++) {
8416 struct extent_record tmpl;
8418 ptr = btrfs_node_blockptr(buf, i);
8419 size = root->fs_info->nodesize;
8420 btrfs_node_key_to_cpu(buf, &key, i);
8422 if ((level == ri->drop_level)
8423 && is_dropped_key(&key, &ri->drop_key)) {
8428 memset(&tmpl, 0, sizeof(tmpl));
8429 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8430 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8435 tmpl.max_size = size;
8436 ret = add_extent_rec(extent_cache, &tmpl);
8440 ret = add_tree_backref(extent_cache, ptr, parent,
8444 "add_tree_backref failed (non-leaf block): %s",
8450 add_pending(nodes, seen, ptr, size);
8452 add_pending(pending, seen, ptr, size);
8455 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8456 nritems) * sizeof(struct btrfs_key_ptr);
8458 total_btree_bytes += buf->len;
8459 if (fs_root_objectid(btrfs_header_owner(buf)))
8460 total_fs_tree_bytes += buf->len;
8461 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8462 total_extent_tree_bytes += buf->len;
8464 free_extent_buffer(buf);
8468 static int add_root_to_pending(struct extent_buffer *buf,
8469 struct cache_tree *extent_cache,
8470 struct cache_tree *pending,
8471 struct cache_tree *seen,
8472 struct cache_tree *nodes,
8475 struct extent_record tmpl;
8478 if (btrfs_header_level(buf) > 0)
8479 add_pending(nodes, seen, buf->start, buf->len);
8481 add_pending(pending, seen, buf->start, buf->len);
8483 memset(&tmpl, 0, sizeof(tmpl));
8484 tmpl.start = buf->start;
8489 tmpl.max_size = buf->len;
8490 add_extent_rec(extent_cache, &tmpl);
8492 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8493 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8494 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8497 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8502 /* as we fix the tree, we might be deleting blocks that
8503 * we're tracking for repair. This hook makes sure we
8504 * remove any backrefs for blocks as we are fixing them.
8506 static int free_extent_hook(struct btrfs_trans_handle *trans,
8507 struct btrfs_root *root,
8508 u64 bytenr, u64 num_bytes, u64 parent,
8509 u64 root_objectid, u64 owner, u64 offset,
8512 struct extent_record *rec;
8513 struct cache_extent *cache;
8515 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8517 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8518 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8522 rec = container_of(cache, struct extent_record, cache);
8524 struct data_backref *back;
8525 back = find_data_backref(rec, parent, root_objectid, owner,
8526 offset, 1, bytenr, num_bytes);
8529 if (back->node.found_ref) {
8530 back->found_ref -= refs_to_drop;
8532 rec->refs -= refs_to_drop;
8534 if (back->node.found_extent_tree) {
8535 back->num_refs -= refs_to_drop;
8536 if (rec->extent_item_refs)
8537 rec->extent_item_refs -= refs_to_drop;
8539 if (back->found_ref == 0)
8540 back->node.found_ref = 0;
8541 if (back->num_refs == 0)
8542 back->node.found_extent_tree = 0;
8544 if (!back->node.found_extent_tree && back->node.found_ref) {
8545 rb_erase(&back->node.node, &rec->backref_tree);
8549 struct tree_backref *back;
8550 back = find_tree_backref(rec, parent, root_objectid);
8553 if (back->node.found_ref) {
8556 back->node.found_ref = 0;
8558 if (back->node.found_extent_tree) {
8559 if (rec->extent_item_refs)
8560 rec->extent_item_refs--;
8561 back->node.found_extent_tree = 0;
8563 if (!back->node.found_extent_tree && back->node.found_ref) {
8564 rb_erase(&back->node.node, &rec->backref_tree);
8568 maybe_free_extent_rec(extent_cache, rec);
8573 static int delete_extent_records(struct btrfs_trans_handle *trans,
8574 struct btrfs_root *root,
8575 struct btrfs_path *path,
8578 struct btrfs_key key;
8579 struct btrfs_key found_key;
8580 struct extent_buffer *leaf;
8585 key.objectid = bytenr;
8587 key.offset = (u64)-1;
8590 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8597 if (path->slots[0] == 0)
8603 leaf = path->nodes[0];
8604 slot = path->slots[0];
8606 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8607 if (found_key.objectid != bytenr)
8610 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8611 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8612 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8613 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8614 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8615 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8616 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8617 btrfs_release_path(path);
8618 if (found_key.type == 0) {
8619 if (found_key.offset == 0)
8621 key.offset = found_key.offset - 1;
8622 key.type = found_key.type;
8624 key.type = found_key.type - 1;
8625 key.offset = (u64)-1;
8629 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8630 found_key.objectid, found_key.type, found_key.offset);
8632 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8635 btrfs_release_path(path);
8637 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8638 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8639 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8640 found_key.offset : root->fs_info->nodesize;
8642 ret = btrfs_update_block_group(trans, root, bytenr,
8649 btrfs_release_path(path);
8654 * for a single backref, this will allocate a new extent
8655 * and add the backref to it.
8657 static int record_extent(struct btrfs_trans_handle *trans,
8658 struct btrfs_fs_info *info,
8659 struct btrfs_path *path,
8660 struct extent_record *rec,
8661 struct extent_backref *back,
8662 int allocated, u64 flags)
8665 struct btrfs_root *extent_root = info->extent_root;
8666 struct extent_buffer *leaf;
8667 struct btrfs_key ins_key;
8668 struct btrfs_extent_item *ei;
8669 struct data_backref *dback;
8670 struct btrfs_tree_block_info *bi;
8673 rec->max_size = max_t(u64, rec->max_size,
8677 u32 item_size = sizeof(*ei);
8680 item_size += sizeof(*bi);
8682 ins_key.objectid = rec->start;
8683 ins_key.offset = rec->max_size;
8684 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8686 ret = btrfs_insert_empty_item(trans, extent_root, path,
8687 &ins_key, item_size);
8691 leaf = path->nodes[0];
8692 ei = btrfs_item_ptr(leaf, path->slots[0],
8693 struct btrfs_extent_item);
8695 btrfs_set_extent_refs(leaf, ei, 0);
8696 btrfs_set_extent_generation(leaf, ei, rec->generation);
8698 if (back->is_data) {
8699 btrfs_set_extent_flags(leaf, ei,
8700 BTRFS_EXTENT_FLAG_DATA);
8702 struct btrfs_disk_key copy_key;;
8704 bi = (struct btrfs_tree_block_info *)(ei + 1);
8705 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8708 btrfs_set_disk_key_objectid(©_key,
8709 rec->info_objectid);
8710 btrfs_set_disk_key_type(©_key, 0);
8711 btrfs_set_disk_key_offset(©_key, 0);
8713 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8714 btrfs_set_tree_block_key(leaf, bi, ©_key);
8716 btrfs_set_extent_flags(leaf, ei,
8717 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8720 btrfs_mark_buffer_dirty(leaf);
8721 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8722 rec->max_size, 1, 0);
8725 btrfs_release_path(path);
8728 if (back->is_data) {
8732 dback = to_data_backref(back);
8733 if (back->full_backref)
8734 parent = dback->parent;
8738 for (i = 0; i < dback->found_ref; i++) {
8739 /* if parent != 0, we're doing a full backref
8740 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8741 * just makes the backref allocator create a data
8744 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8745 rec->start, rec->max_size,
8749 BTRFS_FIRST_FREE_OBJECTID :
8755 fprintf(stderr, "adding new data backref"
8756 " on %llu %s %llu owner %llu"
8757 " offset %llu found %d\n",
8758 (unsigned long long)rec->start,
8759 back->full_backref ?
8761 back->full_backref ?
8762 (unsigned long long)parent :
8763 (unsigned long long)dback->root,
8764 (unsigned long long)dback->owner,
8765 (unsigned long long)dback->offset,
8769 struct tree_backref *tback;
8771 tback = to_tree_backref(back);
8772 if (back->full_backref)
8773 parent = tback->parent;
8777 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8778 rec->start, rec->max_size,
8779 parent, tback->root, 0, 0);
8780 fprintf(stderr, "adding new tree backref on "
8781 "start %llu len %llu parent %llu root %llu\n",
8782 rec->start, rec->max_size, parent, tback->root);
8785 btrfs_release_path(path);
8789 static struct extent_entry *find_entry(struct list_head *entries,
8790 u64 bytenr, u64 bytes)
8792 struct extent_entry *entry = NULL;
8794 list_for_each_entry(entry, entries, list) {
8795 if (entry->bytenr == bytenr && entry->bytes == bytes)
8802 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8804 struct extent_entry *entry, *best = NULL, *prev = NULL;
8806 list_for_each_entry(entry, entries, list) {
8808 * If there are as many broken entries as entries then we know
8809 * not to trust this particular entry.
8811 if (entry->broken == entry->count)
8815 * Special case, when there are only two entries and 'best' is
8825 * If our current entry == best then we can't be sure our best
8826 * is really the best, so we need to keep searching.
8828 if (best && best->count == entry->count) {
8834 /* Prev == entry, not good enough, have to keep searching */
8835 if (!prev->broken && prev->count == entry->count)
8839 best = (prev->count > entry->count) ? prev : entry;
8840 else if (best->count < entry->count)
8848 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8849 struct data_backref *dback, struct extent_entry *entry)
8851 struct btrfs_trans_handle *trans;
8852 struct btrfs_root *root;
8853 struct btrfs_file_extent_item *fi;
8854 struct extent_buffer *leaf;
8855 struct btrfs_key key;
8859 key.objectid = dback->root;
8860 key.type = BTRFS_ROOT_ITEM_KEY;
8861 key.offset = (u64)-1;
8862 root = btrfs_read_fs_root(info, &key);
8864 fprintf(stderr, "Couldn't find root for our ref\n");
8869 * The backref points to the original offset of the extent if it was
8870 * split, so we need to search down to the offset we have and then walk
8871 * forward until we find the backref we're looking for.
8873 key.objectid = dback->owner;
8874 key.type = BTRFS_EXTENT_DATA_KEY;
8875 key.offset = dback->offset;
8876 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8878 fprintf(stderr, "Error looking up ref %d\n", ret);
8883 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8884 ret = btrfs_next_leaf(root, path);
8886 fprintf(stderr, "Couldn't find our ref, next\n");
8890 leaf = path->nodes[0];
8891 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8892 if (key.objectid != dback->owner ||
8893 key.type != BTRFS_EXTENT_DATA_KEY) {
8894 fprintf(stderr, "Couldn't find our ref, search\n");
8897 fi = btrfs_item_ptr(leaf, path->slots[0],
8898 struct btrfs_file_extent_item);
8899 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8900 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8902 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8907 btrfs_release_path(path);
8909 trans = btrfs_start_transaction(root, 1);
8911 return PTR_ERR(trans);
8914 * Ok we have the key of the file extent we want to fix, now we can cow
8915 * down to the thing and fix it.
8917 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8919 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8920 key.objectid, key.type, key.offset, ret);
8924 fprintf(stderr, "Well that's odd, we just found this key "
8925 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8930 leaf = path->nodes[0];
8931 fi = btrfs_item_ptr(leaf, path->slots[0],
8932 struct btrfs_file_extent_item);
8934 if (btrfs_file_extent_compression(leaf, fi) &&
8935 dback->disk_bytenr != entry->bytenr) {
8936 fprintf(stderr, "Ref doesn't match the record start and is "
8937 "compressed, please take a btrfs-image of this file "
8938 "system and send it to a btrfs developer so they can "
8939 "complete this functionality for bytenr %Lu\n",
8940 dback->disk_bytenr);
8945 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8946 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8947 } else if (dback->disk_bytenr > entry->bytenr) {
8948 u64 off_diff, offset;
8950 off_diff = dback->disk_bytenr - entry->bytenr;
8951 offset = btrfs_file_extent_offset(leaf, fi);
8952 if (dback->disk_bytenr + offset +
8953 btrfs_file_extent_num_bytes(leaf, fi) >
8954 entry->bytenr + entry->bytes) {
8955 fprintf(stderr, "Ref is past the entry end, please "
8956 "take a btrfs-image of this file system and "
8957 "send it to a btrfs developer, ref %Lu\n",
8958 dback->disk_bytenr);
8963 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8964 btrfs_set_file_extent_offset(leaf, fi, offset);
8965 } else if (dback->disk_bytenr < entry->bytenr) {
8968 offset = btrfs_file_extent_offset(leaf, fi);
8969 if (dback->disk_bytenr + offset < entry->bytenr) {
8970 fprintf(stderr, "Ref is before the entry start, please"
8971 " take a btrfs-image of this file system and "
8972 "send it to a btrfs developer, ref %Lu\n",
8973 dback->disk_bytenr);
8978 offset += dback->disk_bytenr;
8979 offset -= entry->bytenr;
8980 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8981 btrfs_set_file_extent_offset(leaf, fi, offset);
8984 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8987 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8988 * only do this if we aren't using compression, otherwise it's a
8991 if (!btrfs_file_extent_compression(leaf, fi))
8992 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8994 printf("ram bytes may be wrong?\n");
8995 btrfs_mark_buffer_dirty(leaf);
8997 err = btrfs_commit_transaction(trans, root);
8998 btrfs_release_path(path);
8999 return ret ? ret : err;
9002 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9003 struct extent_record *rec)
9005 struct extent_backref *back, *tmp;
9006 struct data_backref *dback;
9007 struct extent_entry *entry, *best = NULL;
9010 int broken_entries = 0;
9015 * Metadata is easy and the backrefs should always agree on bytenr and
9016 * size, if not we've got bigger issues.
9021 rbtree_postorder_for_each_entry_safe(back, tmp,
9022 &rec->backref_tree, node) {
9023 if (back->full_backref || !back->is_data)
9026 dback = to_data_backref(back);
9029 * We only pay attention to backrefs that we found a real
9032 if (dback->found_ref == 0)
9036 * For now we only catch when the bytes don't match, not the
9037 * bytenr. We can easily do this at the same time, but I want
9038 * to have a fs image to test on before we just add repair
9039 * functionality willy-nilly so we know we won't screw up the
9043 entry = find_entry(&entries, dback->disk_bytenr,
9046 entry = malloc(sizeof(struct extent_entry));
9051 memset(entry, 0, sizeof(*entry));
9052 entry->bytenr = dback->disk_bytenr;
9053 entry->bytes = dback->bytes;
9054 list_add_tail(&entry->list, &entries);
9059 * If we only have on entry we may think the entries agree when
9060 * in reality they don't so we have to do some extra checking.
9062 if (dback->disk_bytenr != rec->start ||
9063 dback->bytes != rec->nr || back->broken)
9074 /* Yay all the backrefs agree, carry on good sir */
9075 if (nr_entries <= 1 && !mismatch)
9078 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9079 "%Lu\n", rec->start);
9082 * First we want to see if the backrefs can agree amongst themselves who
9083 * is right, so figure out which one of the entries has the highest
9086 best = find_most_right_entry(&entries);
9089 * Ok so we may have an even split between what the backrefs think, so
9090 * this is where we use the extent ref to see what it thinks.
9093 entry = find_entry(&entries, rec->start, rec->nr);
9094 if (!entry && (!broken_entries || !rec->found_rec)) {
9095 fprintf(stderr, "Backrefs don't agree with each other "
9096 "and extent record doesn't agree with anybody,"
9097 " so we can't fix bytenr %Lu bytes %Lu\n",
9098 rec->start, rec->nr);
9101 } else if (!entry) {
9103 * Ok our backrefs were broken, we'll assume this is the
9104 * correct value and add an entry for this range.
9106 entry = malloc(sizeof(struct extent_entry));
9111 memset(entry, 0, sizeof(*entry));
9112 entry->bytenr = rec->start;
9113 entry->bytes = rec->nr;
9114 list_add_tail(&entry->list, &entries);
9118 best = find_most_right_entry(&entries);
9120 fprintf(stderr, "Backrefs and extent record evenly "
9121 "split on who is right, this is going to "
9122 "require user input to fix bytenr %Lu bytes "
9123 "%Lu\n", rec->start, rec->nr);
9130 * I don't think this can happen currently as we'll abort() if we catch
9131 * this case higher up, but in case somebody removes that we still can't
9132 * deal with it properly here yet, so just bail out of that's the case.
9134 if (best->bytenr != rec->start) {
9135 fprintf(stderr, "Extent start and backref starts don't match, "
9136 "please use btrfs-image on this file system and send "
9137 "it to a btrfs developer so they can make fsck fix "
9138 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9139 rec->start, rec->nr);
9145 * Ok great we all agreed on an extent record, let's go find the real
9146 * references and fix up the ones that don't match.
9148 rbtree_postorder_for_each_entry_safe(back, tmp,
9149 &rec->backref_tree, node) {
9150 if (back->full_backref || !back->is_data)
9153 dback = to_data_backref(back);
9156 * Still ignoring backrefs that don't have a real ref attached
9159 if (dback->found_ref == 0)
9162 if (dback->bytes == best->bytes &&
9163 dback->disk_bytenr == best->bytenr)
9166 ret = repair_ref(info, path, dback, best);
9172 * Ok we messed with the actual refs, which means we need to drop our
9173 * entire cache and go back and rescan. I know this is a huge pain and
9174 * adds a lot of extra work, but it's the only way to be safe. Once all
9175 * the backrefs agree we may not need to do anything to the extent
9180 while (!list_empty(&entries)) {
9181 entry = list_entry(entries.next, struct extent_entry, list);
9182 list_del_init(&entry->list);
9188 static int process_duplicates(struct cache_tree *extent_cache,
9189 struct extent_record *rec)
9191 struct extent_record *good, *tmp;
9192 struct cache_extent *cache;
9196 * If we found a extent record for this extent then return, or if we
9197 * have more than one duplicate we are likely going to need to delete
9200 if (rec->found_rec || rec->num_duplicates > 1)
9203 /* Shouldn't happen but just in case */
9204 BUG_ON(!rec->num_duplicates);
9207 * So this happens if we end up with a backref that doesn't match the
9208 * actual extent entry. So either the backref is bad or the extent
9209 * entry is bad. Either way we want to have the extent_record actually
9210 * reflect what we found in the extent_tree, so we need to take the
9211 * duplicate out and use that as the extent_record since the only way we
9212 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9214 remove_cache_extent(extent_cache, &rec->cache);
9216 good = to_extent_record(rec->dups.next);
9217 list_del_init(&good->list);
9218 INIT_LIST_HEAD(&good->backrefs);
9219 INIT_LIST_HEAD(&good->dups);
9220 good->cache.start = good->start;
9221 good->cache.size = good->nr;
9222 good->content_checked = 0;
9223 good->owner_ref_checked = 0;
9224 good->num_duplicates = 0;
9225 good->refs = rec->refs;
9226 list_splice_init(&rec->backrefs, &good->backrefs);
9228 cache = lookup_cache_extent(extent_cache, good->start,
9232 tmp = container_of(cache, struct extent_record, cache);
9235 * If we find another overlapping extent and it's found_rec is
9236 * set then it's a duplicate and we need to try and delete
9239 if (tmp->found_rec || tmp->num_duplicates > 0) {
9240 if (list_empty(&good->list))
9241 list_add_tail(&good->list,
9242 &duplicate_extents);
9243 good->num_duplicates += tmp->num_duplicates + 1;
9244 list_splice_init(&tmp->dups, &good->dups);
9245 list_del_init(&tmp->list);
9246 list_add_tail(&tmp->list, &good->dups);
9247 remove_cache_extent(extent_cache, &tmp->cache);
9252 * Ok we have another non extent item backed extent rec, so lets
9253 * just add it to this extent and carry on like we did above.
9255 good->refs += tmp->refs;
9256 list_splice_init(&tmp->backrefs, &good->backrefs);
9257 remove_cache_extent(extent_cache, &tmp->cache);
9260 ret = insert_cache_extent(extent_cache, &good->cache);
9263 return good->num_duplicates ? 0 : 1;
9266 static int delete_duplicate_records(struct btrfs_root *root,
9267 struct extent_record *rec)
9269 struct btrfs_trans_handle *trans;
9270 LIST_HEAD(delete_list);
9271 struct btrfs_path path;
9272 struct extent_record *tmp, *good, *n;
9275 struct btrfs_key key;
9277 btrfs_init_path(&path);
9280 /* Find the record that covers all of the duplicates. */
9281 list_for_each_entry(tmp, &rec->dups, list) {
9282 if (good->start < tmp->start)
9284 if (good->nr > tmp->nr)
9287 if (tmp->start + tmp->nr < good->start + good->nr) {
9288 fprintf(stderr, "Ok we have overlapping extents that "
9289 "aren't completely covered by each other, this "
9290 "is going to require more careful thought. "
9291 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9292 tmp->start, tmp->nr, good->start, good->nr);
9299 list_add_tail(&rec->list, &delete_list);
9301 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9304 list_move_tail(&tmp->list, &delete_list);
9307 root = root->fs_info->extent_root;
9308 trans = btrfs_start_transaction(root, 1);
9309 if (IS_ERR(trans)) {
9310 ret = PTR_ERR(trans);
9314 list_for_each_entry(tmp, &delete_list, list) {
9315 if (tmp->found_rec == 0)
9317 key.objectid = tmp->start;
9318 key.type = BTRFS_EXTENT_ITEM_KEY;
9319 key.offset = tmp->nr;
9321 /* Shouldn't happen but just in case */
9322 if (tmp->metadata) {
9323 fprintf(stderr, "Well this shouldn't happen, extent "
9324 "record overlaps but is metadata? "
9325 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9329 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9335 ret = btrfs_del_item(trans, root, &path);
9338 btrfs_release_path(&path);
9341 err = btrfs_commit_transaction(trans, root);
9345 while (!list_empty(&delete_list)) {
9346 tmp = to_extent_record(delete_list.next);
9347 list_del_init(&tmp->list);
9353 while (!list_empty(&rec->dups)) {
9354 tmp = to_extent_record(rec->dups.next);
9355 list_del_init(&tmp->list);
9359 btrfs_release_path(&path);
9361 if (!ret && !nr_del)
9362 rec->num_duplicates = 0;
9364 return ret ? ret : nr_del;
9367 static int find_possible_backrefs(struct btrfs_fs_info *info,
9368 struct btrfs_path *path,
9369 struct cache_tree *extent_cache,
9370 struct extent_record *rec)
9372 struct btrfs_root *root;
9373 struct extent_backref *back, *tmp;
9374 struct data_backref *dback;
9375 struct cache_extent *cache;
9376 struct btrfs_file_extent_item *fi;
9377 struct btrfs_key key;
9381 rbtree_postorder_for_each_entry_safe(back, tmp,
9382 &rec->backref_tree, node) {
9383 /* Don't care about full backrefs (poor unloved backrefs) */
9384 if (back->full_backref || !back->is_data)
9387 dback = to_data_backref(back);
9389 /* We found this one, we don't need to do a lookup */
9390 if (dback->found_ref)
9393 key.objectid = dback->root;
9394 key.type = BTRFS_ROOT_ITEM_KEY;
9395 key.offset = (u64)-1;
9397 root = btrfs_read_fs_root(info, &key);
9399 /* No root, definitely a bad ref, skip */
9400 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9402 /* Other err, exit */
9404 return PTR_ERR(root);
9406 key.objectid = dback->owner;
9407 key.type = BTRFS_EXTENT_DATA_KEY;
9408 key.offset = dback->offset;
9409 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9411 btrfs_release_path(path);
9414 /* Didn't find it, we can carry on */
9419 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9420 struct btrfs_file_extent_item);
9421 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9422 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9423 btrfs_release_path(path);
9424 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9426 struct extent_record *tmp;
9427 tmp = container_of(cache, struct extent_record, cache);
9430 * If we found an extent record for the bytenr for this
9431 * particular backref then we can't add it to our
9432 * current extent record. We only want to add backrefs
9433 * that don't have a corresponding extent item in the
9434 * extent tree since they likely belong to this record
9435 * and we need to fix it if it doesn't match bytenrs.
9441 dback->found_ref += 1;
9442 dback->disk_bytenr = bytenr;
9443 dback->bytes = bytes;
9446 * Set this so the verify backref code knows not to trust the
9447 * values in this backref.
9456 * Record orphan data ref into corresponding root.
9458 * Return 0 if the extent item contains data ref and recorded.
9459 * Return 1 if the extent item contains no useful data ref
9460 * On that case, it may contains only shared_dataref or metadata backref
9461 * or the file extent exists(this should be handled by the extent bytenr
9463 * Return <0 if something goes wrong.
9465 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9466 struct extent_record *rec)
9468 struct btrfs_key key;
9469 struct btrfs_root *dest_root;
9470 struct extent_backref *back, *tmp;
9471 struct data_backref *dback;
9472 struct orphan_data_extent *orphan;
9473 struct btrfs_path path;
9474 int recorded_data_ref = 0;
9479 btrfs_init_path(&path);
9480 rbtree_postorder_for_each_entry_safe(back, tmp,
9481 &rec->backref_tree, node) {
9482 if (back->full_backref || !back->is_data ||
9483 !back->found_extent_tree)
9485 dback = to_data_backref(back);
9486 if (dback->found_ref)
9488 key.objectid = dback->root;
9489 key.type = BTRFS_ROOT_ITEM_KEY;
9490 key.offset = (u64)-1;
9492 dest_root = btrfs_read_fs_root(fs_info, &key);
9494 /* For non-exist root we just skip it */
9495 if (IS_ERR(dest_root) || !dest_root)
9498 key.objectid = dback->owner;
9499 key.type = BTRFS_EXTENT_DATA_KEY;
9500 key.offset = dback->offset;
9502 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9503 btrfs_release_path(&path);
9505 * For ret < 0, it's OK since the fs-tree may be corrupted,
9506 * we need to record it for inode/file extent rebuild.
9507 * For ret > 0, we record it only for file extent rebuild.
9508 * For ret == 0, the file extent exists but only bytenr
9509 * mismatch, let the original bytenr fix routine to handle,
9515 orphan = malloc(sizeof(*orphan));
9520 INIT_LIST_HEAD(&orphan->list);
9521 orphan->root = dback->root;
9522 orphan->objectid = dback->owner;
9523 orphan->offset = dback->offset;
9524 orphan->disk_bytenr = rec->cache.start;
9525 orphan->disk_len = rec->cache.size;
9526 list_add(&dest_root->orphan_data_extents, &orphan->list);
9527 recorded_data_ref = 1;
9530 btrfs_release_path(&path);
9532 return !recorded_data_ref;
9538 * when an incorrect extent item is found, this will delete
9539 * all of the existing entries for it and recreate them
9540 * based on what the tree scan found.
9542 static int fixup_extent_refs(struct btrfs_fs_info *info,
9543 struct cache_tree *extent_cache,
9544 struct extent_record *rec)
9546 struct btrfs_trans_handle *trans = NULL;
9548 struct btrfs_path path;
9549 struct cache_extent *cache;
9550 struct extent_backref *back, *tmp;
9554 if (rec->flag_block_full_backref)
9555 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9557 btrfs_init_path(&path);
9558 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9560 * Sometimes the backrefs themselves are so broken they don't
9561 * get attached to any meaningful rec, so first go back and
9562 * check any of our backrefs that we couldn't find and throw
9563 * them into the list if we find the backref so that
9564 * verify_backrefs can figure out what to do.
9566 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9571 /* step one, make sure all of the backrefs agree */
9572 ret = verify_backrefs(info, &path, rec);
9576 trans = btrfs_start_transaction(info->extent_root, 1);
9577 if (IS_ERR(trans)) {
9578 ret = PTR_ERR(trans);
9582 /* step two, delete all the existing records */
9583 ret = delete_extent_records(trans, info->extent_root, &path,
9589 /* was this block corrupt? If so, don't add references to it */
9590 cache = lookup_cache_extent(info->corrupt_blocks,
9591 rec->start, rec->max_size);
9597 /* step three, recreate all the refs we did find */
9598 rbtree_postorder_for_each_entry_safe(back, tmp,
9599 &rec->backref_tree, node) {
9601 * if we didn't find any references, don't create a
9604 if (!back->found_ref)
9607 rec->bad_full_backref = 0;
9608 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9616 int err = btrfs_commit_transaction(trans, info->extent_root);
9622 fprintf(stderr, "Repaired extent references for %llu\n",
9623 (unsigned long long)rec->start);
9625 btrfs_release_path(&path);
9629 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9630 struct extent_record *rec)
9632 struct btrfs_trans_handle *trans;
9633 struct btrfs_root *root = fs_info->extent_root;
9634 struct btrfs_path path;
9635 struct btrfs_extent_item *ei;
9636 struct btrfs_key key;
9640 key.objectid = rec->start;
9641 if (rec->metadata) {
9642 key.type = BTRFS_METADATA_ITEM_KEY;
9643 key.offset = rec->info_level;
9645 key.type = BTRFS_EXTENT_ITEM_KEY;
9646 key.offset = rec->max_size;
9649 trans = btrfs_start_transaction(root, 0);
9651 return PTR_ERR(trans);
9653 btrfs_init_path(&path);
9654 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9656 btrfs_release_path(&path);
9657 btrfs_commit_transaction(trans, root);
9660 fprintf(stderr, "Didn't find extent for %llu\n",
9661 (unsigned long long)rec->start);
9662 btrfs_release_path(&path);
9663 btrfs_commit_transaction(trans, root);
9667 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9668 struct btrfs_extent_item);
9669 flags = btrfs_extent_flags(path.nodes[0], ei);
9670 if (rec->flag_block_full_backref) {
9671 fprintf(stderr, "setting full backref on %llu\n",
9672 (unsigned long long)key.objectid);
9673 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9675 fprintf(stderr, "clearing full backref on %llu\n",
9676 (unsigned long long)key.objectid);
9677 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9679 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9680 btrfs_mark_buffer_dirty(path.nodes[0]);
9681 btrfs_release_path(&path);
9682 ret = btrfs_commit_transaction(trans, root);
9684 fprintf(stderr, "Repaired extent flags for %llu\n",
9685 (unsigned long long)rec->start);
9690 /* right now we only prune from the extent allocation tree */
9691 static int prune_one_block(struct btrfs_trans_handle *trans,
9692 struct btrfs_fs_info *info,
9693 struct btrfs_corrupt_block *corrupt)
9696 struct btrfs_path path;
9697 struct extent_buffer *eb;
9701 int level = corrupt->level + 1;
9703 btrfs_init_path(&path);
9705 /* we want to stop at the parent to our busted block */
9706 path.lowest_level = level;
9708 ret = btrfs_search_slot(trans, info->extent_root,
9709 &corrupt->key, &path, -1, 1);
9714 eb = path.nodes[level];
9721 * hopefully the search gave us the block we want to prune,
9722 * lets try that first
9724 slot = path.slots[level];
9725 found = btrfs_node_blockptr(eb, slot);
9726 if (found == corrupt->cache.start)
9729 nritems = btrfs_header_nritems(eb);
9731 /* the search failed, lets scan this node and hope we find it */
9732 for (slot = 0; slot < nritems; slot++) {
9733 found = btrfs_node_blockptr(eb, slot);
9734 if (found == corrupt->cache.start)
9738 * we couldn't find the bad block. TODO, search all the nodes for pointers
9741 if (eb == info->extent_root->node) {
9746 btrfs_release_path(&path);
9751 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9752 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9755 btrfs_release_path(&path);
9759 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9761 struct btrfs_trans_handle *trans = NULL;
9762 struct cache_extent *cache;
9763 struct btrfs_corrupt_block *corrupt;
9766 cache = search_cache_extent(info->corrupt_blocks, 0);
9770 trans = btrfs_start_transaction(info->extent_root, 1);
9772 return PTR_ERR(trans);
9774 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9775 prune_one_block(trans, info, corrupt);
9776 remove_cache_extent(info->corrupt_blocks, cache);
9779 return btrfs_commit_transaction(trans, info->extent_root);
9783 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9785 struct btrfs_block_group_cache *cache;
9790 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9791 &start, &end, EXTENT_DIRTY);
9794 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9799 cache = btrfs_lookup_first_block_group(fs_info, start);
9804 start = cache->key.objectid + cache->key.offset;
9808 static int check_extent_refs(struct btrfs_root *root,
9809 struct cache_tree *extent_cache)
9811 struct extent_record *rec;
9812 struct cache_extent *cache;
9818 * if we're doing a repair, we have to make sure
9819 * we don't allocate from the problem extents.
9820 * In the worst case, this will be all the
9823 cache = search_cache_extent(extent_cache, 0);
9825 rec = container_of(cache, struct extent_record, cache);
9826 set_extent_dirty(root->fs_info->excluded_extents,
9828 rec->start + rec->max_size - 1);
9829 cache = next_cache_extent(cache);
9832 /* pin down all the corrupted blocks too */
9833 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9835 set_extent_dirty(root->fs_info->excluded_extents,
9837 cache->start + cache->size - 1);
9838 cache = next_cache_extent(cache);
9840 prune_corrupt_blocks(root->fs_info);
9841 reset_cached_block_groups(root->fs_info);
9844 reset_cached_block_groups(root->fs_info);
9847 * We need to delete any duplicate entries we find first otherwise we
9848 * could mess up the extent tree when we have backrefs that actually
9849 * belong to a different extent item and not the weird duplicate one.
9851 while (repair && !list_empty(&duplicate_extents)) {
9852 rec = to_extent_record(duplicate_extents.next);
9853 list_del_init(&rec->list);
9855 /* Sometimes we can find a backref before we find an actual
9856 * extent, so we need to process it a little bit to see if there
9857 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9858 * if this is a backref screwup. If we need to delete stuff
9859 * process_duplicates() will return 0, otherwise it will return
9862 if (process_duplicates(extent_cache, rec))
9864 ret = delete_duplicate_records(root, rec);
9868 * delete_duplicate_records will return the number of entries
9869 * deleted, so if it's greater than 0 then we know we actually
9870 * did something and we need to remove.
9883 cache = search_cache_extent(extent_cache, 0);
9886 rec = container_of(cache, struct extent_record, cache);
9887 if (rec->num_duplicates) {
9888 fprintf(stderr, "extent item %llu has multiple extent "
9889 "items\n", (unsigned long long)rec->start);
9893 if (rec->refs != rec->extent_item_refs) {
9894 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9895 (unsigned long long)rec->start,
9896 (unsigned long long)rec->nr);
9897 fprintf(stderr, "extent item %llu, found %llu\n",
9898 (unsigned long long)rec->extent_item_refs,
9899 (unsigned long long)rec->refs);
9900 ret = record_orphan_data_extents(root->fs_info, rec);
9906 if (all_backpointers_checked(rec, 1)) {
9907 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9908 (unsigned long long)rec->start,
9909 (unsigned long long)rec->nr);
9913 if (!rec->owner_ref_checked) {
9914 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9915 (unsigned long long)rec->start,
9916 (unsigned long long)rec->nr);
9921 if (repair && fix) {
9922 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9928 if (rec->bad_full_backref) {
9929 fprintf(stderr, "bad full backref, on [%llu]\n",
9930 (unsigned long long)rec->start);
9932 ret = fixup_extent_flags(root->fs_info, rec);
9940 * Although it's not a extent ref's problem, we reuse this
9941 * routine for error reporting.
9942 * No repair function yet.
9944 if (rec->crossing_stripes) {
9946 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9947 rec->start, rec->start + rec->max_size);
9951 if (rec->wrong_chunk_type) {
9953 "bad extent [%llu, %llu), type mismatch with chunk\n",
9954 rec->start, rec->start + rec->max_size);
9958 remove_cache_extent(extent_cache, cache);
9959 free_all_extent_backrefs(rec);
9960 if (!init_extent_tree && repair && (!cur_err || fix))
9961 clear_extent_dirty(root->fs_info->excluded_extents,
9963 rec->start + rec->max_size - 1);
9968 if (ret && ret != -EAGAIN) {
9969 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9972 struct btrfs_trans_handle *trans;
9974 root = root->fs_info->extent_root;
9975 trans = btrfs_start_transaction(root, 1);
9976 if (IS_ERR(trans)) {
9977 ret = PTR_ERR(trans);
9981 ret = btrfs_fix_block_accounting(trans, root);
9984 ret = btrfs_commit_transaction(trans, root);
9993 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9997 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9998 stripe_size = length;
9999 stripe_size /= num_stripes;
10000 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10001 stripe_size = length * 2;
10002 stripe_size /= num_stripes;
10003 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10004 stripe_size = length;
10005 stripe_size /= (num_stripes - 1);
10006 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10007 stripe_size = length;
10008 stripe_size /= (num_stripes - 2);
10010 stripe_size = length;
10012 return stripe_size;
10016 * Check the chunk with its block group/dev list ref:
10017 * Return 0 if all refs seems valid.
10018 * Return 1 if part of refs seems valid, need later check for rebuild ref
10019 * like missing block group and needs to search extent tree to rebuild them.
10020 * Return -1 if essential refs are missing and unable to rebuild.
10022 static int check_chunk_refs(struct chunk_record *chunk_rec,
10023 struct block_group_tree *block_group_cache,
10024 struct device_extent_tree *dev_extent_cache,
10027 struct cache_extent *block_group_item;
10028 struct block_group_record *block_group_rec;
10029 struct cache_extent *dev_extent_item;
10030 struct device_extent_record *dev_extent_rec;
10034 int metadump_v2 = 0;
10038 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10040 chunk_rec->length);
10041 if (block_group_item) {
10042 block_group_rec = container_of(block_group_item,
10043 struct block_group_record,
10045 if (chunk_rec->length != block_group_rec->offset ||
10046 chunk_rec->offset != block_group_rec->objectid ||
10048 chunk_rec->type_flags != block_group_rec->flags)) {
10051 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10052 chunk_rec->objectid,
10057 chunk_rec->type_flags,
10058 block_group_rec->objectid,
10059 block_group_rec->type,
10060 block_group_rec->offset,
10061 block_group_rec->offset,
10062 block_group_rec->objectid,
10063 block_group_rec->flags);
10066 list_del_init(&block_group_rec->list);
10067 chunk_rec->bg_rec = block_group_rec;
10072 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10073 chunk_rec->objectid,
10078 chunk_rec->type_flags);
10085 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10086 chunk_rec->num_stripes);
10087 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10088 devid = chunk_rec->stripes[i].devid;
10089 offset = chunk_rec->stripes[i].offset;
10090 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10091 devid, offset, length);
10092 if (dev_extent_item) {
10093 dev_extent_rec = container_of(dev_extent_item,
10094 struct device_extent_record,
10096 if (dev_extent_rec->objectid != devid ||
10097 dev_extent_rec->offset != offset ||
10098 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10099 dev_extent_rec->length != length) {
10102 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10103 chunk_rec->objectid,
10106 chunk_rec->stripes[i].devid,
10107 chunk_rec->stripes[i].offset,
10108 dev_extent_rec->objectid,
10109 dev_extent_rec->offset,
10110 dev_extent_rec->length);
10113 list_move(&dev_extent_rec->chunk_list,
10114 &chunk_rec->dextents);
10119 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10120 chunk_rec->objectid,
10123 chunk_rec->stripes[i].devid,
10124 chunk_rec->stripes[i].offset);
10131 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10132 int check_chunks(struct cache_tree *chunk_cache,
10133 struct block_group_tree *block_group_cache,
10134 struct device_extent_tree *dev_extent_cache,
10135 struct list_head *good, struct list_head *bad,
10136 struct list_head *rebuild, int silent)
10138 struct cache_extent *chunk_item;
10139 struct chunk_record *chunk_rec;
10140 struct block_group_record *bg_rec;
10141 struct device_extent_record *dext_rec;
10145 chunk_item = first_cache_extent(chunk_cache);
10146 while (chunk_item) {
10147 chunk_rec = container_of(chunk_item, struct chunk_record,
10149 err = check_chunk_refs(chunk_rec, block_group_cache,
10150 dev_extent_cache, silent);
10153 if (err == 0 && good)
10154 list_add_tail(&chunk_rec->list, good);
10155 if (err > 0 && rebuild)
10156 list_add_tail(&chunk_rec->list, rebuild);
10157 if (err < 0 && bad)
10158 list_add_tail(&chunk_rec->list, bad);
10159 chunk_item = next_cache_extent(chunk_item);
10162 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10165 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10173 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10177 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10178 dext_rec->objectid,
10188 static int check_device_used(struct device_record *dev_rec,
10189 struct device_extent_tree *dext_cache)
10191 struct cache_extent *cache;
10192 struct device_extent_record *dev_extent_rec;
10193 u64 total_byte = 0;
10195 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10197 dev_extent_rec = container_of(cache,
10198 struct device_extent_record,
10200 if (dev_extent_rec->objectid != dev_rec->devid)
10203 list_del_init(&dev_extent_rec->device_list);
10204 total_byte += dev_extent_rec->length;
10205 cache = next_cache_extent(cache);
10208 if (total_byte != dev_rec->byte_used) {
10210 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10211 total_byte, dev_rec->byte_used, dev_rec->objectid,
10212 dev_rec->type, dev_rec->offset);
10219 /* check btrfs_dev_item -> btrfs_dev_extent */
10220 static int check_devices(struct rb_root *dev_cache,
10221 struct device_extent_tree *dev_extent_cache)
10223 struct rb_node *dev_node;
10224 struct device_record *dev_rec;
10225 struct device_extent_record *dext_rec;
10229 dev_node = rb_first(dev_cache);
10231 dev_rec = container_of(dev_node, struct device_record, node);
10232 err = check_device_used(dev_rec, dev_extent_cache);
10236 dev_node = rb_next(dev_node);
10238 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10241 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10242 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10249 static int add_root_item_to_list(struct list_head *head,
10250 u64 objectid, u64 bytenr, u64 last_snapshot,
10251 u8 level, u8 drop_level,
10252 struct btrfs_key *drop_key)
10255 struct root_item_record *ri_rec;
10256 ri_rec = malloc(sizeof(*ri_rec));
10259 ri_rec->bytenr = bytenr;
10260 ri_rec->objectid = objectid;
10261 ri_rec->level = level;
10262 ri_rec->drop_level = drop_level;
10263 ri_rec->last_snapshot = last_snapshot;
10265 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10266 list_add_tail(&ri_rec->list, head);
10271 static void free_root_item_list(struct list_head *list)
10273 struct root_item_record *ri_rec;
10275 while (!list_empty(list)) {
10276 ri_rec = list_first_entry(list, struct root_item_record,
10278 list_del_init(&ri_rec->list);
10283 static int deal_root_from_list(struct list_head *list,
10284 struct btrfs_root *root,
10285 struct block_info *bits,
10287 struct cache_tree *pending,
10288 struct cache_tree *seen,
10289 struct cache_tree *reada,
10290 struct cache_tree *nodes,
10291 struct cache_tree *extent_cache,
10292 struct cache_tree *chunk_cache,
10293 struct rb_root *dev_cache,
10294 struct block_group_tree *block_group_cache,
10295 struct device_extent_tree *dev_extent_cache)
10300 while (!list_empty(list)) {
10301 struct root_item_record *rec;
10302 struct extent_buffer *buf;
10303 rec = list_entry(list->next,
10304 struct root_item_record, list);
10306 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10307 if (!extent_buffer_uptodate(buf)) {
10308 free_extent_buffer(buf);
10312 ret = add_root_to_pending(buf, extent_cache, pending,
10313 seen, nodes, rec->objectid);
10317 * To rebuild extent tree, we need deal with snapshot
10318 * one by one, otherwise we deal with node firstly which
10319 * can maximize readahead.
10322 ret = run_next_block(root, bits, bits_nr, &last,
10323 pending, seen, reada, nodes,
10324 extent_cache, chunk_cache,
10325 dev_cache, block_group_cache,
10326 dev_extent_cache, rec);
10330 free_extent_buffer(buf);
10331 list_del(&rec->list);
10337 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10338 reada, nodes, extent_cache, chunk_cache,
10339 dev_cache, block_group_cache,
10340 dev_extent_cache, NULL);
10350 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10352 struct rb_root dev_cache;
10353 struct cache_tree chunk_cache;
10354 struct block_group_tree block_group_cache;
10355 struct device_extent_tree dev_extent_cache;
10356 struct cache_tree extent_cache;
10357 struct cache_tree seen;
10358 struct cache_tree pending;
10359 struct cache_tree reada;
10360 struct cache_tree nodes;
10361 struct extent_io_tree excluded_extents;
10362 struct cache_tree corrupt_blocks;
10363 struct btrfs_path path;
10364 struct btrfs_key key;
10365 struct btrfs_key found_key;
10367 struct block_info *bits;
10369 struct extent_buffer *leaf;
10371 struct btrfs_root_item ri;
10372 struct list_head dropping_trees;
10373 struct list_head normal_trees;
10374 struct btrfs_root *root1;
10375 struct btrfs_root *root;
10379 root = fs_info->fs_root;
10380 dev_cache = RB_ROOT;
10381 cache_tree_init(&chunk_cache);
10382 block_group_tree_init(&block_group_cache);
10383 device_extent_tree_init(&dev_extent_cache);
10385 cache_tree_init(&extent_cache);
10386 cache_tree_init(&seen);
10387 cache_tree_init(&pending);
10388 cache_tree_init(&nodes);
10389 cache_tree_init(&reada);
10390 cache_tree_init(&corrupt_blocks);
10391 extent_io_tree_init(&excluded_extents);
10392 INIT_LIST_HEAD(&dropping_trees);
10393 INIT_LIST_HEAD(&normal_trees);
10396 fs_info->excluded_extents = &excluded_extents;
10397 fs_info->fsck_extent_cache = &extent_cache;
10398 fs_info->free_extent_hook = free_extent_hook;
10399 fs_info->corrupt_blocks = &corrupt_blocks;
10403 bits = malloc(bits_nr * sizeof(struct block_info));
10409 if (ctx.progress_enabled) {
10410 ctx.tp = TASK_EXTENTS;
10411 task_start(ctx.info);
10415 root1 = fs_info->tree_root;
10416 level = btrfs_header_level(root1->node);
10417 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10418 root1->node->start, 0, level, 0, NULL);
10421 root1 = fs_info->chunk_root;
10422 level = btrfs_header_level(root1->node);
10423 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10424 root1->node->start, 0, level, 0, NULL);
10427 btrfs_init_path(&path);
10430 key.type = BTRFS_ROOT_ITEM_KEY;
10431 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10435 leaf = path.nodes[0];
10436 slot = path.slots[0];
10437 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10438 ret = btrfs_next_leaf(root, &path);
10441 leaf = path.nodes[0];
10442 slot = path.slots[0];
10444 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10445 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10446 unsigned long offset;
10449 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10450 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10451 last_snapshot = btrfs_root_last_snapshot(&ri);
10452 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10453 level = btrfs_root_level(&ri);
10454 ret = add_root_item_to_list(&normal_trees,
10455 found_key.objectid,
10456 btrfs_root_bytenr(&ri),
10457 last_snapshot, level,
10462 level = btrfs_root_level(&ri);
10463 objectid = found_key.objectid;
10464 btrfs_disk_key_to_cpu(&found_key,
10465 &ri.drop_progress);
10466 ret = add_root_item_to_list(&dropping_trees,
10468 btrfs_root_bytenr(&ri),
10469 last_snapshot, level,
10470 ri.drop_level, &found_key);
10477 btrfs_release_path(&path);
10480 * check_block can return -EAGAIN if it fixes something, please keep
10481 * this in mind when dealing with return values from these functions, if
10482 * we get -EAGAIN we want to fall through and restart the loop.
10484 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10485 &seen, &reada, &nodes, &extent_cache,
10486 &chunk_cache, &dev_cache, &block_group_cache,
10487 &dev_extent_cache);
10489 if (ret == -EAGAIN)
10493 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10494 &pending, &seen, &reada, &nodes,
10495 &extent_cache, &chunk_cache, &dev_cache,
10496 &block_group_cache, &dev_extent_cache);
10498 if (ret == -EAGAIN)
10503 ret = check_chunks(&chunk_cache, &block_group_cache,
10504 &dev_extent_cache, NULL, NULL, NULL, 0);
10506 if (ret == -EAGAIN)
10511 ret = check_extent_refs(root, &extent_cache);
10513 if (ret == -EAGAIN)
10518 ret = check_devices(&dev_cache, &dev_extent_cache);
10523 task_stop(ctx.info);
10525 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10526 extent_io_tree_cleanup(&excluded_extents);
10527 fs_info->fsck_extent_cache = NULL;
10528 fs_info->free_extent_hook = NULL;
10529 fs_info->corrupt_blocks = NULL;
10530 fs_info->excluded_extents = NULL;
10533 free_chunk_cache_tree(&chunk_cache);
10534 free_device_cache_tree(&dev_cache);
10535 free_block_group_tree(&block_group_cache);
10536 free_device_extent_tree(&dev_extent_cache);
10537 free_extent_cache_tree(&seen);
10538 free_extent_cache_tree(&pending);
10539 free_extent_cache_tree(&reada);
10540 free_extent_cache_tree(&nodes);
10541 free_root_item_list(&normal_trees);
10542 free_root_item_list(&dropping_trees);
10545 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10546 free_extent_cache_tree(&seen);
10547 free_extent_cache_tree(&pending);
10548 free_extent_cache_tree(&reada);
10549 free_extent_cache_tree(&nodes);
10550 free_chunk_cache_tree(&chunk_cache);
10551 free_block_group_tree(&block_group_cache);
10552 free_device_cache_tree(&dev_cache);
10553 free_device_extent_tree(&dev_extent_cache);
10554 free_extent_record_cache(&extent_cache);
10555 free_root_item_list(&normal_trees);
10556 free_root_item_list(&dropping_trees);
10557 extent_io_tree_cleanup(&excluded_extents);
10562 * Check backrefs of a tree block given by @bytenr or @eb.
10564 * @root: the root containing the @bytenr or @eb
10565 * @eb: tree block extent buffer, can be NULL
10566 * @bytenr: bytenr of the tree block to search
10567 * @level: tree level of the tree block
10568 * @owner: owner of the tree block
10570 * Return >0 for any error found and output error message
10571 * Return 0 for no error found
10573 static int check_tree_block_ref(struct btrfs_root *root,
10574 struct extent_buffer *eb, u64 bytenr,
10575 int level, u64 owner)
10577 struct btrfs_key key;
10578 struct btrfs_root *extent_root = root->fs_info->extent_root;
10579 struct btrfs_path path;
10580 struct btrfs_extent_item *ei;
10581 struct btrfs_extent_inline_ref *iref;
10582 struct extent_buffer *leaf;
10588 u32 nodesize = root->fs_info->nodesize;
10591 int tree_reloc_root = 0;
10596 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10597 btrfs_header_bytenr(root->node) == bytenr)
10598 tree_reloc_root = 1;
10600 btrfs_init_path(&path);
10601 key.objectid = bytenr;
10602 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10603 key.type = BTRFS_METADATA_ITEM_KEY;
10605 key.type = BTRFS_EXTENT_ITEM_KEY;
10606 key.offset = (u64)-1;
10608 /* Search for the backref in extent tree */
10609 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10611 err |= BACKREF_MISSING;
10614 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10616 err |= BACKREF_MISSING;
10620 leaf = path.nodes[0];
10621 slot = path.slots[0];
10622 btrfs_item_key_to_cpu(leaf, &key, slot);
10624 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10626 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10627 skinny_level = (int)key.offset;
10628 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10630 struct btrfs_tree_block_info *info;
10632 info = (struct btrfs_tree_block_info *)(ei + 1);
10633 skinny_level = btrfs_tree_block_level(leaf, info);
10634 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10641 if (!(btrfs_extent_flags(leaf, ei) &
10642 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10644 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10645 key.objectid, nodesize,
10646 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10647 err = BACKREF_MISMATCH;
10649 header_gen = btrfs_header_generation(eb);
10650 extent_gen = btrfs_extent_generation(leaf, ei);
10651 if (header_gen != extent_gen) {
10653 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10654 key.objectid, nodesize, header_gen,
10656 err = BACKREF_MISMATCH;
10658 if (level != skinny_level) {
10660 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10661 key.objectid, nodesize, level, skinny_level);
10662 err = BACKREF_MISMATCH;
10664 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10666 "extent[%llu %u] is referred by other roots than %llu",
10667 key.objectid, nodesize, root->objectid);
10668 err = BACKREF_MISMATCH;
10673 * Iterate the extent/metadata item to find the exact backref
10675 item_size = btrfs_item_size_nr(leaf, slot);
10676 ptr = (unsigned long)iref;
10677 end = (unsigned long)ei + item_size;
10678 while (ptr < end) {
10679 iref = (struct btrfs_extent_inline_ref *)ptr;
10680 type = btrfs_extent_inline_ref_type(leaf, iref);
10681 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10683 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10684 (offset == root->objectid || offset == owner)) {
10686 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10688 * Backref of tree reloc root points to itself, no need
10689 * to check backref any more.
10691 if (tree_reloc_root)
10694 /* Check if the backref points to valid referencer */
10695 found_ref = !check_tree_block_ref(root, NULL,
10696 offset, level + 1, owner);
10701 ptr += btrfs_extent_inline_ref_size(type);
10705 * Inlined extent item doesn't have what we need, check
10706 * TREE_BLOCK_REF_KEY
10709 btrfs_release_path(&path);
10710 key.objectid = bytenr;
10711 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10712 key.offset = root->objectid;
10714 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10719 err |= BACKREF_MISSING;
10721 btrfs_release_path(&path);
10722 if (eb && (err & BACKREF_MISSING))
10723 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10724 bytenr, nodesize, owner, level);
10729 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10731 * Return >0 any error found and output error message
10732 * Return 0 for no error found
10734 static int check_extent_data_item(struct btrfs_root *root,
10735 struct extent_buffer *eb, int slot)
10737 struct btrfs_file_extent_item *fi;
10738 struct btrfs_path path;
10739 struct btrfs_root *extent_root = root->fs_info->extent_root;
10740 struct btrfs_key fi_key;
10741 struct btrfs_key dbref_key;
10742 struct extent_buffer *leaf;
10743 struct btrfs_extent_item *ei;
10744 struct btrfs_extent_inline_ref *iref;
10745 struct btrfs_extent_data_ref *dref;
10748 u64 disk_num_bytes;
10749 u64 extent_num_bytes;
10756 int found_dbackref = 0;
10760 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10761 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10763 /* Nothing to check for hole and inline data extents */
10764 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10765 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10768 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10769 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10770 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10772 /* Check unaligned disk_num_bytes and num_bytes */
10773 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10775 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10776 fi_key.objectid, fi_key.offset, disk_num_bytes,
10777 root->fs_info->sectorsize);
10778 err |= BYTES_UNALIGNED;
10780 data_bytes_allocated += disk_num_bytes;
10782 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10784 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10785 fi_key.objectid, fi_key.offset, extent_num_bytes,
10786 root->fs_info->sectorsize);
10787 err |= BYTES_UNALIGNED;
10789 data_bytes_referenced += extent_num_bytes;
10791 owner = btrfs_header_owner(eb);
10793 /* Check the extent item of the file extent in extent tree */
10794 btrfs_init_path(&path);
10795 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10796 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10797 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10799 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10803 leaf = path.nodes[0];
10804 slot = path.slots[0];
10805 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10807 extent_flags = btrfs_extent_flags(leaf, ei);
10809 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10811 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10812 disk_bytenr, disk_num_bytes,
10813 BTRFS_EXTENT_FLAG_DATA);
10814 err |= BACKREF_MISMATCH;
10817 /* Check data backref inside that extent item */
10818 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10819 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10820 ptr = (unsigned long)iref;
10821 end = (unsigned long)ei + item_size;
10822 while (ptr < end) {
10823 iref = (struct btrfs_extent_inline_ref *)ptr;
10824 type = btrfs_extent_inline_ref_type(leaf, iref);
10825 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10827 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10828 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10829 if (ref_root == owner || ref_root == root->objectid)
10830 found_dbackref = 1;
10831 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10832 found_dbackref = !check_tree_block_ref(root, NULL,
10833 btrfs_extent_inline_ref_offset(leaf, iref),
10837 if (found_dbackref)
10839 ptr += btrfs_extent_inline_ref_size(type);
10842 if (!found_dbackref) {
10843 btrfs_release_path(&path);
10845 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10846 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10847 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10848 dbref_key.offset = hash_extent_data_ref(root->objectid,
10849 fi_key.objectid, fi_key.offset);
10851 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10852 &dbref_key, &path, 0, 0);
10854 found_dbackref = 1;
10858 btrfs_release_path(&path);
10861 * Neither inlined nor EXTENT_DATA_REF found, try
10862 * SHARED_DATA_REF as last chance.
10864 dbref_key.objectid = disk_bytenr;
10865 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10866 dbref_key.offset = eb->start;
10868 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10869 &dbref_key, &path, 0, 0);
10871 found_dbackref = 1;
10877 if (!found_dbackref)
10878 err |= BACKREF_MISSING;
10879 btrfs_release_path(&path);
10880 if (err & BACKREF_MISSING) {
10881 error("data extent[%llu %llu] backref lost",
10882 disk_bytenr, disk_num_bytes);
10888 * Get real tree block level for the case like shared block
10889 * Return >= 0 as tree level
10890 * Return <0 for error
10892 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10894 struct extent_buffer *eb;
10895 struct btrfs_path path;
10896 struct btrfs_key key;
10897 struct btrfs_extent_item *ei;
10904 /* Search extent tree for extent generation and level */
10905 key.objectid = bytenr;
10906 key.type = BTRFS_METADATA_ITEM_KEY;
10907 key.offset = (u64)-1;
10909 btrfs_init_path(&path);
10910 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10913 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10921 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10922 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10923 struct btrfs_extent_item);
10924 flags = btrfs_extent_flags(path.nodes[0], ei);
10925 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10930 /* Get transid for later read_tree_block() check */
10931 transid = btrfs_extent_generation(path.nodes[0], ei);
10933 /* Get backref level as one source */
10934 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10935 backref_level = key.offset;
10937 struct btrfs_tree_block_info *info;
10939 info = (struct btrfs_tree_block_info *)(ei + 1);
10940 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10942 btrfs_release_path(&path);
10944 /* Get level from tree block as an alternative source */
10945 eb = read_tree_block(fs_info, bytenr, transid);
10946 if (!extent_buffer_uptodate(eb)) {
10947 free_extent_buffer(eb);
10950 header_level = btrfs_header_level(eb);
10951 free_extent_buffer(eb);
10953 if (header_level != backref_level)
10955 return header_level;
10958 btrfs_release_path(&path);
10963 * Check if a tree block backref is valid (points to a valid tree block)
10964 * if level == -1, level will be resolved
10965 * Return >0 for any error found and print error message
10967 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10968 u64 bytenr, int level)
10970 struct btrfs_root *root;
10971 struct btrfs_key key;
10972 struct btrfs_path path;
10973 struct extent_buffer *eb;
10974 struct extent_buffer *node;
10975 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10979 /* Query level for level == -1 special case */
10981 level = query_tree_block_level(fs_info, bytenr);
10983 err |= REFERENCER_MISSING;
10987 key.objectid = root_id;
10988 key.type = BTRFS_ROOT_ITEM_KEY;
10989 key.offset = (u64)-1;
10991 root = btrfs_read_fs_root(fs_info, &key);
10992 if (IS_ERR(root)) {
10993 err |= REFERENCER_MISSING;
10997 /* Read out the tree block to get item/node key */
10998 eb = read_tree_block(fs_info, bytenr, 0);
10999 if (!extent_buffer_uptodate(eb)) {
11000 err |= REFERENCER_MISSING;
11001 free_extent_buffer(eb);
11005 /* Empty tree, no need to check key */
11006 if (!btrfs_header_nritems(eb) && !level) {
11007 free_extent_buffer(eb);
11012 btrfs_node_key_to_cpu(eb, &key, 0);
11014 btrfs_item_key_to_cpu(eb, &key, 0);
11016 free_extent_buffer(eb);
11018 btrfs_init_path(&path);
11019 path.lowest_level = level;
11020 /* Search with the first key, to ensure we can reach it */
11021 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11023 err |= REFERENCER_MISSING;
11027 node = path.nodes[level];
11028 if (btrfs_header_bytenr(node) != bytenr) {
11030 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11031 bytenr, nodesize, bytenr,
11032 btrfs_header_bytenr(node));
11033 err |= REFERENCER_MISMATCH;
11035 if (btrfs_header_level(node) != level) {
11037 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11038 bytenr, nodesize, level,
11039 btrfs_header_level(node));
11040 err |= REFERENCER_MISMATCH;
11044 btrfs_release_path(&path);
11046 if (err & REFERENCER_MISSING) {
11048 error("extent [%llu %d] lost referencer (owner: %llu)",
11049 bytenr, nodesize, root_id);
11052 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11053 bytenr, nodesize, root_id, level);
11060 * Check if tree block @eb is tree reloc root.
11061 * Return 0 if it's not or any problem happens
11062 * Return 1 if it's a tree reloc root
11064 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11065 struct extent_buffer *eb)
11067 struct btrfs_root *tree_reloc_root;
11068 struct btrfs_key key;
11069 u64 bytenr = btrfs_header_bytenr(eb);
11070 u64 owner = btrfs_header_owner(eb);
11073 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11074 key.offset = owner;
11075 key.type = BTRFS_ROOT_ITEM_KEY;
11077 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11078 if (IS_ERR(tree_reloc_root))
11081 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11083 btrfs_free_fs_root(tree_reloc_root);
11088 * Check referencer for shared block backref
11089 * If level == -1, this function will resolve the level.
11091 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11092 u64 parent, u64 bytenr, int level)
11094 struct extent_buffer *eb;
11096 int found_parent = 0;
11099 eb = read_tree_block(fs_info, parent, 0);
11100 if (!extent_buffer_uptodate(eb))
11104 level = query_tree_block_level(fs_info, bytenr);
11108 /* It's possible it's a tree reloc root */
11109 if (parent == bytenr) {
11110 if (is_tree_reloc_root(fs_info, eb))
11115 if (level + 1 != btrfs_header_level(eb))
11118 nr = btrfs_header_nritems(eb);
11119 for (i = 0; i < nr; i++) {
11120 if (bytenr == btrfs_node_blockptr(eb, i)) {
11126 free_extent_buffer(eb);
11127 if (!found_parent) {
11129 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11130 bytenr, fs_info->nodesize, parent, level);
11131 return REFERENCER_MISSING;
11137 * Check referencer for normal (inlined) data ref
11138 * If len == 0, it will be resolved by searching in extent tree
11140 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11141 u64 root_id, u64 objectid, u64 offset,
11142 u64 bytenr, u64 len, u32 count)
11144 struct btrfs_root *root;
11145 struct btrfs_root *extent_root = fs_info->extent_root;
11146 struct btrfs_key key;
11147 struct btrfs_path path;
11148 struct extent_buffer *leaf;
11149 struct btrfs_file_extent_item *fi;
11150 u32 found_count = 0;
11155 key.objectid = bytenr;
11156 key.type = BTRFS_EXTENT_ITEM_KEY;
11157 key.offset = (u64)-1;
11159 btrfs_init_path(&path);
11160 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11163 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11166 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11167 if (key.objectid != bytenr ||
11168 key.type != BTRFS_EXTENT_ITEM_KEY)
11171 btrfs_release_path(&path);
11173 key.objectid = root_id;
11174 key.type = BTRFS_ROOT_ITEM_KEY;
11175 key.offset = (u64)-1;
11176 btrfs_init_path(&path);
11178 root = btrfs_read_fs_root(fs_info, &key);
11182 key.objectid = objectid;
11183 key.type = BTRFS_EXTENT_DATA_KEY;
11185 * It can be nasty as data backref offset is
11186 * file offset - file extent offset, which is smaller or
11187 * equal to original backref offset. The only special case is
11188 * overflow. So we need to special check and do further search.
11190 key.offset = offset & (1ULL << 63) ? 0 : offset;
11192 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11197 * Search afterwards to get correct one
11198 * NOTE: As we must do a comprehensive check on the data backref to
11199 * make sure the dref count also matches, we must iterate all file
11200 * extents for that inode.
11203 leaf = path.nodes[0];
11204 slot = path.slots[0];
11206 if (slot >= btrfs_header_nritems(leaf))
11208 btrfs_item_key_to_cpu(leaf, &key, slot);
11209 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11211 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11213 * Except normal disk bytenr and disk num bytes, we still
11214 * need to do extra check on dbackref offset as
11215 * dbackref offset = file_offset - file_extent_offset
11217 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11218 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11219 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11224 ret = btrfs_next_item(root, &path);
11229 btrfs_release_path(&path);
11230 if (found_count != count) {
11232 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11233 bytenr, len, root_id, objectid, offset, count, found_count);
11234 return REFERENCER_MISSING;
11240 * Check if the referencer of a shared data backref exists
11242 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11243 u64 parent, u64 bytenr)
11245 struct extent_buffer *eb;
11246 struct btrfs_key key;
11247 struct btrfs_file_extent_item *fi;
11249 int found_parent = 0;
11252 eb = read_tree_block(fs_info, parent, 0);
11253 if (!extent_buffer_uptodate(eb))
11256 nr = btrfs_header_nritems(eb);
11257 for (i = 0; i < nr; i++) {
11258 btrfs_item_key_to_cpu(eb, &key, i);
11259 if (key.type != BTRFS_EXTENT_DATA_KEY)
11262 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11263 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11266 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11273 free_extent_buffer(eb);
11274 if (!found_parent) {
11275 error("shared extent %llu referencer lost (parent: %llu)",
11277 return REFERENCER_MISSING;
11283 * This function will check a given extent item, including its backref and
11284 * itself (like crossing stripe boundary and type)
11286 * Since we don't use extent_record anymore, introduce new error bit
11288 static int check_extent_item(struct btrfs_fs_info *fs_info,
11289 struct extent_buffer *eb, int slot)
11291 struct btrfs_extent_item *ei;
11292 struct btrfs_extent_inline_ref *iref;
11293 struct btrfs_extent_data_ref *dref;
11297 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11298 u32 item_size = btrfs_item_size_nr(eb, slot);
11303 struct btrfs_key key;
11307 btrfs_item_key_to_cpu(eb, &key, slot);
11308 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11309 bytes_used += key.offset;
11311 bytes_used += nodesize;
11313 if (item_size < sizeof(*ei)) {
11315 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11316 * old thing when on disk format is still un-determined.
11317 * No need to care about it anymore
11319 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11323 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11324 flags = btrfs_extent_flags(eb, ei);
11326 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11328 if (metadata && check_crossing_stripes(global_info, key.objectid,
11330 error("bad metadata [%llu, %llu) crossing stripe boundary",
11331 key.objectid, key.objectid + nodesize);
11332 err |= CROSSING_STRIPE_BOUNDARY;
11335 ptr = (unsigned long)(ei + 1);
11337 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11338 /* Old EXTENT_ITEM metadata */
11339 struct btrfs_tree_block_info *info;
11341 info = (struct btrfs_tree_block_info *)ptr;
11342 level = btrfs_tree_block_level(eb, info);
11343 ptr += sizeof(struct btrfs_tree_block_info);
11345 /* New METADATA_ITEM */
11346 level = key.offset;
11348 end = (unsigned long)ei + item_size;
11351 /* Reached extent item end normally */
11355 /* Beyond extent item end, wrong item size */
11357 err |= ITEM_SIZE_MISMATCH;
11358 error("extent item at bytenr %llu slot %d has wrong size",
11363 /* Now check every backref in this extent item */
11364 iref = (struct btrfs_extent_inline_ref *)ptr;
11365 type = btrfs_extent_inline_ref_type(eb, iref);
11366 offset = btrfs_extent_inline_ref_offset(eb, iref);
11368 case BTRFS_TREE_BLOCK_REF_KEY:
11369 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11373 case BTRFS_SHARED_BLOCK_REF_KEY:
11374 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11378 case BTRFS_EXTENT_DATA_REF_KEY:
11379 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11380 ret = check_extent_data_backref(fs_info,
11381 btrfs_extent_data_ref_root(eb, dref),
11382 btrfs_extent_data_ref_objectid(eb, dref),
11383 btrfs_extent_data_ref_offset(eb, dref),
11384 key.objectid, key.offset,
11385 btrfs_extent_data_ref_count(eb, dref));
11388 case BTRFS_SHARED_DATA_REF_KEY:
11389 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11393 error("extent[%llu %d %llu] has unknown ref type: %d",
11394 key.objectid, key.type, key.offset, type);
11395 err |= UNKNOWN_TYPE;
11399 ptr += btrfs_extent_inline_ref_size(type);
11407 * Check if a dev extent item is referred correctly by its chunk
11409 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11410 struct extent_buffer *eb, int slot)
11412 struct btrfs_root *chunk_root = fs_info->chunk_root;
11413 struct btrfs_dev_extent *ptr;
11414 struct btrfs_path path;
11415 struct btrfs_key chunk_key;
11416 struct btrfs_key devext_key;
11417 struct btrfs_chunk *chunk;
11418 struct extent_buffer *l;
11422 int found_chunk = 0;
11425 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11426 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11427 length = btrfs_dev_extent_length(eb, ptr);
11429 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11430 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11431 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11433 btrfs_init_path(&path);
11434 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11439 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11440 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11445 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11448 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11449 for (i = 0; i < num_stripes; i++) {
11450 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11451 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11453 if (devid == devext_key.objectid &&
11454 offset == devext_key.offset) {
11460 btrfs_release_path(&path);
11461 if (!found_chunk) {
11463 "device extent[%llu, %llu, %llu] did not find the related chunk",
11464 devext_key.objectid, devext_key.offset, length);
11465 return REFERENCER_MISSING;
11471 * Check if the used space is correct with the dev item
11473 static int check_dev_item(struct btrfs_fs_info *fs_info,
11474 struct extent_buffer *eb, int slot)
11476 struct btrfs_root *dev_root = fs_info->dev_root;
11477 struct btrfs_dev_item *dev_item;
11478 struct btrfs_path path;
11479 struct btrfs_key key;
11480 struct btrfs_dev_extent *ptr;
11486 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11487 dev_id = btrfs_device_id(eb, dev_item);
11488 used = btrfs_device_bytes_used(eb, dev_item);
11490 key.objectid = dev_id;
11491 key.type = BTRFS_DEV_EXTENT_KEY;
11494 btrfs_init_path(&path);
11495 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11497 btrfs_item_key_to_cpu(eb, &key, slot);
11498 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11499 key.objectid, key.type, key.offset);
11500 btrfs_release_path(&path);
11501 return REFERENCER_MISSING;
11504 /* Iterate dev_extents to calculate the used space of a device */
11506 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11509 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11510 if (key.objectid > dev_id)
11512 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11515 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11516 struct btrfs_dev_extent);
11517 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11519 ret = btrfs_next_item(dev_root, &path);
11523 btrfs_release_path(&path);
11525 if (used != total) {
11526 btrfs_item_key_to_cpu(eb, &key, slot);
11528 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11529 total, used, BTRFS_ROOT_TREE_OBJECTID,
11530 BTRFS_DEV_EXTENT_KEY, dev_id);
11531 return ACCOUNTING_MISMATCH;
11537 * Check a block group item with its referener (chunk) and its used space
11538 * with extent/metadata item
11540 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11541 struct extent_buffer *eb, int slot)
11543 struct btrfs_root *extent_root = fs_info->extent_root;
11544 struct btrfs_root *chunk_root = fs_info->chunk_root;
11545 struct btrfs_block_group_item *bi;
11546 struct btrfs_block_group_item bg_item;
11547 struct btrfs_path path;
11548 struct btrfs_key bg_key;
11549 struct btrfs_key chunk_key;
11550 struct btrfs_key extent_key;
11551 struct btrfs_chunk *chunk;
11552 struct extent_buffer *leaf;
11553 struct btrfs_extent_item *ei;
11554 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11562 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11563 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11564 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11565 used = btrfs_block_group_used(&bg_item);
11566 bg_flags = btrfs_block_group_flags(&bg_item);
11568 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11569 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11570 chunk_key.offset = bg_key.objectid;
11572 btrfs_init_path(&path);
11573 /* Search for the referencer chunk */
11574 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11577 "block group[%llu %llu] did not find the related chunk item",
11578 bg_key.objectid, bg_key.offset);
11579 err |= REFERENCER_MISSING;
11581 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11582 struct btrfs_chunk);
11583 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11586 "block group[%llu %llu] related chunk item length does not match",
11587 bg_key.objectid, bg_key.offset);
11588 err |= REFERENCER_MISMATCH;
11591 btrfs_release_path(&path);
11593 /* Search from the block group bytenr */
11594 extent_key.objectid = bg_key.objectid;
11595 extent_key.type = 0;
11596 extent_key.offset = 0;
11598 btrfs_init_path(&path);
11599 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11603 /* Iterate extent tree to account used space */
11605 leaf = path.nodes[0];
11607 /* Search slot can point to the last item beyond leaf nritems */
11608 if (path.slots[0] >= btrfs_header_nritems(leaf))
11611 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11612 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11615 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11616 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11618 if (extent_key.objectid < bg_key.objectid)
11621 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11624 total += extent_key.offset;
11626 ei = btrfs_item_ptr(leaf, path.slots[0],
11627 struct btrfs_extent_item);
11628 flags = btrfs_extent_flags(leaf, ei);
11629 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11630 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11632 "bad extent[%llu, %llu) type mismatch with chunk",
11633 extent_key.objectid,
11634 extent_key.objectid + extent_key.offset);
11635 err |= CHUNK_TYPE_MISMATCH;
11637 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11638 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11639 BTRFS_BLOCK_GROUP_METADATA))) {
11641 "bad extent[%llu, %llu) type mismatch with chunk",
11642 extent_key.objectid,
11643 extent_key.objectid + nodesize);
11644 err |= CHUNK_TYPE_MISMATCH;
11648 ret = btrfs_next_item(extent_root, &path);
11654 btrfs_release_path(&path);
11656 if (total != used) {
11658 "block group[%llu %llu] used %llu but extent items used %llu",
11659 bg_key.objectid, bg_key.offset, used, total);
11660 err |= ACCOUNTING_MISMATCH;
11666 * Check a chunk item.
11667 * Including checking all referred dev_extents and block group
11669 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11670 struct extent_buffer *eb, int slot)
11672 struct btrfs_root *extent_root = fs_info->extent_root;
11673 struct btrfs_root *dev_root = fs_info->dev_root;
11674 struct btrfs_path path;
11675 struct btrfs_key chunk_key;
11676 struct btrfs_key bg_key;
11677 struct btrfs_key devext_key;
11678 struct btrfs_chunk *chunk;
11679 struct extent_buffer *leaf;
11680 struct btrfs_block_group_item *bi;
11681 struct btrfs_block_group_item bg_item;
11682 struct btrfs_dev_extent *ptr;
11694 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11695 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11696 length = btrfs_chunk_length(eb, chunk);
11697 chunk_end = chunk_key.offset + length;
11698 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11701 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11703 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11706 type = btrfs_chunk_type(eb, chunk);
11708 bg_key.objectid = chunk_key.offset;
11709 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11710 bg_key.offset = length;
11712 btrfs_init_path(&path);
11713 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11716 "chunk[%llu %llu) did not find the related block group item",
11717 chunk_key.offset, chunk_end);
11718 err |= REFERENCER_MISSING;
11720 leaf = path.nodes[0];
11721 bi = btrfs_item_ptr(leaf, path.slots[0],
11722 struct btrfs_block_group_item);
11723 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11725 if (btrfs_block_group_flags(&bg_item) != type) {
11727 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11728 chunk_key.offset, chunk_end, type,
11729 btrfs_block_group_flags(&bg_item));
11730 err |= REFERENCER_MISSING;
11734 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11735 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11736 for (i = 0; i < num_stripes; i++) {
11737 btrfs_release_path(&path);
11738 btrfs_init_path(&path);
11739 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11740 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11741 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11743 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11746 goto not_match_dev;
11748 leaf = path.nodes[0];
11749 ptr = btrfs_item_ptr(leaf, path.slots[0],
11750 struct btrfs_dev_extent);
11751 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11752 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11753 if (objectid != chunk_key.objectid ||
11754 offset != chunk_key.offset ||
11755 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11756 goto not_match_dev;
11759 err |= BACKREF_MISSING;
11761 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11762 chunk_key.objectid, chunk_end, i);
11765 btrfs_release_path(&path);
11771 * Main entry function to check known items and update related accounting info
11773 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11775 struct btrfs_fs_info *fs_info = root->fs_info;
11776 struct btrfs_key key;
11779 struct btrfs_extent_data_ref *dref;
11784 btrfs_item_key_to_cpu(eb, &key, slot);
11788 case BTRFS_EXTENT_DATA_KEY:
11789 ret = check_extent_data_item(root, eb, slot);
11792 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11793 ret = check_block_group_item(fs_info, eb, slot);
11796 case BTRFS_DEV_ITEM_KEY:
11797 ret = check_dev_item(fs_info, eb, slot);
11800 case BTRFS_CHUNK_ITEM_KEY:
11801 ret = check_chunk_item(fs_info, eb, slot);
11804 case BTRFS_DEV_EXTENT_KEY:
11805 ret = check_dev_extent_item(fs_info, eb, slot);
11808 case BTRFS_EXTENT_ITEM_KEY:
11809 case BTRFS_METADATA_ITEM_KEY:
11810 ret = check_extent_item(fs_info, eb, slot);
11813 case BTRFS_EXTENT_CSUM_KEY:
11814 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11816 case BTRFS_TREE_BLOCK_REF_KEY:
11817 ret = check_tree_block_backref(fs_info, key.offset,
11821 case BTRFS_EXTENT_DATA_REF_KEY:
11822 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11823 ret = check_extent_data_backref(fs_info,
11824 btrfs_extent_data_ref_root(eb, dref),
11825 btrfs_extent_data_ref_objectid(eb, dref),
11826 btrfs_extent_data_ref_offset(eb, dref),
11828 btrfs_extent_data_ref_count(eb, dref));
11831 case BTRFS_SHARED_BLOCK_REF_KEY:
11832 ret = check_shared_block_backref(fs_info, key.offset,
11836 case BTRFS_SHARED_DATA_REF_KEY:
11837 ret = check_shared_data_backref(fs_info, key.offset,
11845 if (++slot < btrfs_header_nritems(eb))
11852 * Helper function for later fs/subvol tree check. To determine if a tree
11853 * block should be checked.
11854 * This function will ensure only the direct referencer with lowest rootid to
11855 * check a fs/subvolume tree block.
11857 * Backref check at extent tree would detect errors like missing subvolume
11858 * tree, so we can do aggressive check to reduce duplicated checks.
11860 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11862 struct btrfs_root *extent_root = root->fs_info->extent_root;
11863 struct btrfs_key key;
11864 struct btrfs_path path;
11865 struct extent_buffer *leaf;
11867 struct btrfs_extent_item *ei;
11873 struct btrfs_extent_inline_ref *iref;
11876 btrfs_init_path(&path);
11877 key.objectid = btrfs_header_bytenr(eb);
11878 key.type = BTRFS_METADATA_ITEM_KEY;
11879 key.offset = (u64)-1;
11882 * Any failure in backref resolving means we can't determine
11883 * whom the tree block belongs to.
11884 * So in that case, we need to check that tree block
11886 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11890 ret = btrfs_previous_extent_item(extent_root, &path,
11891 btrfs_header_bytenr(eb));
11895 leaf = path.nodes[0];
11896 slot = path.slots[0];
11897 btrfs_item_key_to_cpu(leaf, &key, slot);
11898 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11900 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11901 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11903 struct btrfs_tree_block_info *info;
11905 info = (struct btrfs_tree_block_info *)(ei + 1);
11906 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11909 item_size = btrfs_item_size_nr(leaf, slot);
11910 ptr = (unsigned long)iref;
11911 end = (unsigned long)ei + item_size;
11912 while (ptr < end) {
11913 iref = (struct btrfs_extent_inline_ref *)ptr;
11914 type = btrfs_extent_inline_ref_type(leaf, iref);
11915 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11918 * We only check the tree block if current root is
11919 * the lowest referencer of it.
11921 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11922 offset < root->objectid) {
11923 btrfs_release_path(&path);
11927 ptr += btrfs_extent_inline_ref_size(type);
11930 * Normally we should also check keyed tree block ref, but that may be
11931 * very time consuming. Inlined ref should already make us skip a lot
11932 * of refs now. So skip search keyed tree block ref.
11936 btrfs_release_path(&path);
11941 * Traversal function for tree block. We will do:
11942 * 1) Skip shared fs/subvolume tree blocks
11943 * 2) Update related bytes accounting
11944 * 3) Pre-order traversal
11946 static int traverse_tree_block(struct btrfs_root *root,
11947 struct extent_buffer *node)
11949 struct extent_buffer *eb;
11950 struct btrfs_key key;
11951 struct btrfs_key drop_key;
11959 * Skip shared fs/subvolume tree block, in that case they will
11960 * be checked by referencer with lowest rootid
11962 if (is_fstree(root->objectid) && !should_check(root, node))
11965 /* Update bytes accounting */
11966 total_btree_bytes += node->len;
11967 if (fs_root_objectid(btrfs_header_owner(node)))
11968 total_fs_tree_bytes += node->len;
11969 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11970 total_extent_tree_bytes += node->len;
11972 /* pre-order tranversal, check itself first */
11973 level = btrfs_header_level(node);
11974 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11975 btrfs_header_level(node),
11976 btrfs_header_owner(node));
11980 "check %s failed root %llu bytenr %llu level %d, force continue check",
11981 level ? "node":"leaf", root->objectid,
11982 btrfs_header_bytenr(node), btrfs_header_level(node));
11985 btree_space_waste += btrfs_leaf_free_space(root, node);
11986 ret = check_leaf_items(root, node);
11991 nr = btrfs_header_nritems(node);
11992 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11993 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11994 sizeof(struct btrfs_key_ptr);
11996 /* Then check all its children */
11997 for (i = 0; i < nr; i++) {
11998 u64 blocknr = btrfs_node_blockptr(node, i);
12000 btrfs_node_key_to_cpu(node, &key, i);
12001 if (level == root->root_item.drop_level &&
12002 is_dropped_key(&key, &drop_key))
12006 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12007 * to call the function itself.
12009 eb = read_tree_block(root->fs_info, blocknr, 0);
12010 if (extent_buffer_uptodate(eb)) {
12011 ret = traverse_tree_block(root, eb);
12014 free_extent_buffer(eb);
12021 * Low memory usage version check_chunks_and_extents.
12023 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12025 struct btrfs_path path;
12026 struct btrfs_key key;
12027 struct btrfs_root *root1;
12028 struct btrfs_root *root;
12029 struct btrfs_root *cur_root;
12033 root = fs_info->fs_root;
12035 root1 = root->fs_info->chunk_root;
12036 ret = traverse_tree_block(root1, root1->node);
12039 root1 = root->fs_info->tree_root;
12040 ret = traverse_tree_block(root1, root1->node);
12043 btrfs_init_path(&path);
12044 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12046 key.type = BTRFS_ROOT_ITEM_KEY;
12048 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12050 error("cannot find extent treet in tree_root");
12055 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12056 if (key.type != BTRFS_ROOT_ITEM_KEY)
12058 key.offset = (u64)-1;
12060 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12061 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12064 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12065 if (IS_ERR(cur_root) || !cur_root) {
12066 error("failed to read tree: %lld", key.objectid);
12070 ret = traverse_tree_block(cur_root, cur_root->node);
12073 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12074 btrfs_free_fs_root(cur_root);
12076 ret = btrfs_next_item(root1, &path);
12082 btrfs_release_path(&path);
12086 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12090 if (!ctx.progress_enabled)
12091 fprintf(stderr, "checking extents\n");
12092 if (check_mode == CHECK_MODE_LOWMEM)
12093 ret = check_chunks_and_extents_v2(fs_info);
12095 ret = check_chunks_and_extents(fs_info);
12100 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12101 struct btrfs_root *root, int overwrite)
12103 struct extent_buffer *c;
12104 struct extent_buffer *old = root->node;
12107 struct btrfs_disk_key disk_key = {0,0,0};
12113 extent_buffer_get(c);
12116 c = btrfs_alloc_free_block(trans, root,
12117 root->fs_info->nodesize,
12118 root->root_key.objectid,
12119 &disk_key, level, 0, 0);
12122 extent_buffer_get(c);
12126 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12127 btrfs_set_header_level(c, level);
12128 btrfs_set_header_bytenr(c, c->start);
12129 btrfs_set_header_generation(c, trans->transid);
12130 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12131 btrfs_set_header_owner(c, root->root_key.objectid);
12133 write_extent_buffer(c, root->fs_info->fsid,
12134 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12136 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12137 btrfs_header_chunk_tree_uuid(c),
12140 btrfs_mark_buffer_dirty(c);
12142 * this case can happen in the following case:
12144 * 1.overwrite previous root.
12146 * 2.reinit reloc data root, this is because we skip pin
12147 * down reloc data tree before which means we can allocate
12148 * same block bytenr here.
12150 if (old->start == c->start) {
12151 btrfs_set_root_generation(&root->root_item,
12153 root->root_item.level = btrfs_header_level(root->node);
12154 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12155 &root->root_key, &root->root_item);
12157 free_extent_buffer(c);
12161 free_extent_buffer(old);
12163 add_root_to_dirty_list(root);
12167 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12168 struct extent_buffer *eb, int tree_root)
12170 struct extent_buffer *tmp;
12171 struct btrfs_root_item *ri;
12172 struct btrfs_key key;
12174 int level = btrfs_header_level(eb);
12180 * If we have pinned this block before, don't pin it again.
12181 * This can not only avoid forever loop with broken filesystem
12182 * but also give us some speedups.
12184 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12185 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12188 btrfs_pin_extent(fs_info, eb->start, eb->len);
12190 nritems = btrfs_header_nritems(eb);
12191 for (i = 0; i < nritems; i++) {
12193 btrfs_item_key_to_cpu(eb, &key, i);
12194 if (key.type != BTRFS_ROOT_ITEM_KEY)
12196 /* Skip the extent root and reloc roots */
12197 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12198 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12199 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12201 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12202 bytenr = btrfs_disk_root_bytenr(eb, ri);
12205 * If at any point we start needing the real root we
12206 * will have to build a stump root for the root we are
12207 * in, but for now this doesn't actually use the root so
12208 * just pass in extent_root.
12210 tmp = read_tree_block(fs_info, bytenr, 0);
12211 if (!extent_buffer_uptodate(tmp)) {
12212 fprintf(stderr, "Error reading root block\n");
12215 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12216 free_extent_buffer(tmp);
12220 bytenr = btrfs_node_blockptr(eb, i);
12222 /* If we aren't the tree root don't read the block */
12223 if (level == 1 && !tree_root) {
12224 btrfs_pin_extent(fs_info, bytenr,
12225 fs_info->nodesize);
12229 tmp = read_tree_block(fs_info, bytenr, 0);
12230 if (!extent_buffer_uptodate(tmp)) {
12231 fprintf(stderr, "Error reading tree block\n");
12234 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12235 free_extent_buffer(tmp);
12244 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12248 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12252 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12255 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12257 struct btrfs_block_group_cache *cache;
12258 struct btrfs_path path;
12259 struct extent_buffer *leaf;
12260 struct btrfs_chunk *chunk;
12261 struct btrfs_key key;
12265 btrfs_init_path(&path);
12267 key.type = BTRFS_CHUNK_ITEM_KEY;
12269 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12271 btrfs_release_path(&path);
12276 * We do this in case the block groups were screwed up and had alloc
12277 * bits that aren't actually set on the chunks. This happens with
12278 * restored images every time and could happen in real life I guess.
12280 fs_info->avail_data_alloc_bits = 0;
12281 fs_info->avail_metadata_alloc_bits = 0;
12282 fs_info->avail_system_alloc_bits = 0;
12284 /* First we need to create the in-memory block groups */
12286 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12287 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12289 btrfs_release_path(&path);
12297 leaf = path.nodes[0];
12298 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12299 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12304 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12305 btrfs_add_block_group(fs_info, 0,
12306 btrfs_chunk_type(leaf, chunk),
12307 key.objectid, key.offset,
12308 btrfs_chunk_length(leaf, chunk));
12309 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12310 key.offset + btrfs_chunk_length(leaf, chunk));
12315 cache = btrfs_lookup_first_block_group(fs_info, start);
12319 start = cache->key.objectid + cache->key.offset;
12322 btrfs_release_path(&path);
12326 static int reset_balance(struct btrfs_trans_handle *trans,
12327 struct btrfs_fs_info *fs_info)
12329 struct btrfs_root *root = fs_info->tree_root;
12330 struct btrfs_path path;
12331 struct extent_buffer *leaf;
12332 struct btrfs_key key;
12333 int del_slot, del_nr = 0;
12337 btrfs_init_path(&path);
12338 key.objectid = BTRFS_BALANCE_OBJECTID;
12339 key.type = BTRFS_BALANCE_ITEM_KEY;
12341 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12346 goto reinit_data_reloc;
12351 ret = btrfs_del_item(trans, root, &path);
12354 btrfs_release_path(&path);
12356 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12357 key.type = BTRFS_ROOT_ITEM_KEY;
12359 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12363 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12368 ret = btrfs_del_items(trans, root, &path,
12375 btrfs_release_path(&path);
12378 ret = btrfs_search_slot(trans, root, &key, &path,
12385 leaf = path.nodes[0];
12386 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12387 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12389 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12394 del_slot = path.slots[0];
12403 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12407 btrfs_release_path(&path);
12410 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12411 key.type = BTRFS_ROOT_ITEM_KEY;
12412 key.offset = (u64)-1;
12413 root = btrfs_read_fs_root(fs_info, &key);
12414 if (IS_ERR(root)) {
12415 fprintf(stderr, "Error reading data reloc tree\n");
12416 ret = PTR_ERR(root);
12419 record_root_in_trans(trans, root);
12420 ret = btrfs_fsck_reinit_root(trans, root, 0);
12423 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12425 btrfs_release_path(&path);
12429 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12430 struct btrfs_fs_info *fs_info)
12436 * The only reason we don't do this is because right now we're just
12437 * walking the trees we find and pinning down their bytes, we don't look
12438 * at any of the leaves. In order to do mixed groups we'd have to check
12439 * the leaves of any fs roots and pin down the bytes for any file
12440 * extents we find. Not hard but why do it if we don't have to?
12442 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12443 fprintf(stderr, "We don't support re-initing the extent tree "
12444 "for mixed block groups yet, please notify a btrfs "
12445 "developer you want to do this so they can add this "
12446 "functionality.\n");
12451 * first we need to walk all of the trees except the extent tree and pin
12452 * down the bytes that are in use so we don't overwrite any existing
12455 ret = pin_metadata_blocks(fs_info);
12457 fprintf(stderr, "error pinning down used bytes\n");
12462 * Need to drop all the block groups since we're going to recreate all
12465 btrfs_free_block_groups(fs_info);
12466 ret = reset_block_groups(fs_info);
12468 fprintf(stderr, "error resetting the block groups\n");
12472 /* Ok we can allocate now, reinit the extent root */
12473 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12475 fprintf(stderr, "extent root initialization failed\n");
12477 * When the transaction code is updated we should end the
12478 * transaction, but for now progs only knows about commit so
12479 * just return an error.
12485 * Now we have all the in-memory block groups setup so we can make
12486 * allocations properly, and the metadata we care about is safe since we
12487 * pinned all of it above.
12490 struct btrfs_block_group_cache *cache;
12492 cache = btrfs_lookup_first_block_group(fs_info, start);
12495 start = cache->key.objectid + cache->key.offset;
12496 ret = btrfs_insert_item(trans, fs_info->extent_root,
12497 &cache->key, &cache->item,
12498 sizeof(cache->item));
12500 fprintf(stderr, "Error adding block group\n");
12503 btrfs_extent_post_op(trans, fs_info->extent_root);
12506 ret = reset_balance(trans, fs_info);
12508 fprintf(stderr, "error resetting the pending balance\n");
12513 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12515 struct btrfs_path path;
12516 struct btrfs_trans_handle *trans;
12517 struct btrfs_key key;
12520 printf("Recowing metadata block %llu\n", eb->start);
12521 key.objectid = btrfs_header_owner(eb);
12522 key.type = BTRFS_ROOT_ITEM_KEY;
12523 key.offset = (u64)-1;
12525 root = btrfs_read_fs_root(root->fs_info, &key);
12526 if (IS_ERR(root)) {
12527 fprintf(stderr, "Couldn't find owner root %llu\n",
12529 return PTR_ERR(root);
12532 trans = btrfs_start_transaction(root, 1);
12534 return PTR_ERR(trans);
12536 btrfs_init_path(&path);
12537 path.lowest_level = btrfs_header_level(eb);
12538 if (path.lowest_level)
12539 btrfs_node_key_to_cpu(eb, &key, 0);
12541 btrfs_item_key_to_cpu(eb, &key, 0);
12543 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12544 btrfs_commit_transaction(trans, root);
12545 btrfs_release_path(&path);
12549 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12551 struct btrfs_path path;
12552 struct btrfs_trans_handle *trans;
12553 struct btrfs_key key;
12556 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12557 bad->key.type, bad->key.offset);
12558 key.objectid = bad->root_id;
12559 key.type = BTRFS_ROOT_ITEM_KEY;
12560 key.offset = (u64)-1;
12562 root = btrfs_read_fs_root(root->fs_info, &key);
12563 if (IS_ERR(root)) {
12564 fprintf(stderr, "Couldn't find owner root %llu\n",
12566 return PTR_ERR(root);
12569 trans = btrfs_start_transaction(root, 1);
12571 return PTR_ERR(trans);
12573 btrfs_init_path(&path);
12574 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12580 ret = btrfs_del_item(trans, root, &path);
12582 btrfs_commit_transaction(trans, root);
12583 btrfs_release_path(&path);
12587 static int zero_log_tree(struct btrfs_root *root)
12589 struct btrfs_trans_handle *trans;
12592 trans = btrfs_start_transaction(root, 1);
12593 if (IS_ERR(trans)) {
12594 ret = PTR_ERR(trans);
12597 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12598 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12599 ret = btrfs_commit_transaction(trans, root);
12603 static int populate_csum(struct btrfs_trans_handle *trans,
12604 struct btrfs_root *csum_root, char *buf, u64 start,
12607 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12612 while (offset < len) {
12613 sectorsize = fs_info->sectorsize;
12614 ret = read_extent_data(fs_info, buf, start + offset,
12618 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12619 start + offset, buf, sectorsize);
12622 offset += sectorsize;
12627 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12628 struct btrfs_root *csum_root,
12629 struct btrfs_root *cur_root)
12631 struct btrfs_path path;
12632 struct btrfs_key key;
12633 struct extent_buffer *node;
12634 struct btrfs_file_extent_item *fi;
12641 buf = malloc(cur_root->fs_info->sectorsize);
12645 btrfs_init_path(&path);
12649 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12652 /* Iterate all regular file extents and fill its csum */
12654 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12656 if (key.type != BTRFS_EXTENT_DATA_KEY)
12658 node = path.nodes[0];
12659 slot = path.slots[0];
12660 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12661 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12663 start = btrfs_file_extent_disk_bytenr(node, fi);
12664 len = btrfs_file_extent_disk_num_bytes(node, fi);
12666 ret = populate_csum(trans, csum_root, buf, start, len);
12667 if (ret == -EEXIST)
12673 * TODO: if next leaf is corrupted, jump to nearest next valid
12676 ret = btrfs_next_item(cur_root, &path);
12686 btrfs_release_path(&path);
12691 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12692 struct btrfs_root *csum_root)
12694 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12695 struct btrfs_path path;
12696 struct btrfs_root *tree_root = fs_info->tree_root;
12697 struct btrfs_root *cur_root;
12698 struct extent_buffer *node;
12699 struct btrfs_key key;
12703 btrfs_init_path(&path);
12704 key.objectid = BTRFS_FS_TREE_OBJECTID;
12706 key.type = BTRFS_ROOT_ITEM_KEY;
12707 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12716 node = path.nodes[0];
12717 slot = path.slots[0];
12718 btrfs_item_key_to_cpu(node, &key, slot);
12719 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12721 if (key.type != BTRFS_ROOT_ITEM_KEY)
12723 if (!is_fstree(key.objectid))
12725 key.offset = (u64)-1;
12727 cur_root = btrfs_read_fs_root(fs_info, &key);
12728 if (IS_ERR(cur_root) || !cur_root) {
12729 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12733 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12738 ret = btrfs_next_item(tree_root, &path);
12748 btrfs_release_path(&path);
12752 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12753 struct btrfs_root *csum_root)
12755 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12756 struct btrfs_path path;
12757 struct btrfs_extent_item *ei;
12758 struct extent_buffer *leaf;
12760 struct btrfs_key key;
12763 btrfs_init_path(&path);
12765 key.type = BTRFS_EXTENT_ITEM_KEY;
12767 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12769 btrfs_release_path(&path);
12773 buf = malloc(csum_root->fs_info->sectorsize);
12775 btrfs_release_path(&path);
12780 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12781 ret = btrfs_next_leaf(extent_root, &path);
12789 leaf = path.nodes[0];
12791 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12792 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12797 ei = btrfs_item_ptr(leaf, path.slots[0],
12798 struct btrfs_extent_item);
12799 if (!(btrfs_extent_flags(leaf, ei) &
12800 BTRFS_EXTENT_FLAG_DATA)) {
12805 ret = populate_csum(trans, csum_root, buf, key.objectid,
12812 btrfs_release_path(&path);
12818 * Recalculate the csum and put it into the csum tree.
12820 * Extent tree init will wipe out all the extent info, so in that case, we
12821 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12822 * will use fs/subvol trees to init the csum tree.
12824 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12825 struct btrfs_root *csum_root,
12826 int search_fs_tree)
12828 if (search_fs_tree)
12829 return fill_csum_tree_from_fs(trans, csum_root);
12831 return fill_csum_tree_from_extent(trans, csum_root);
12834 static void free_roots_info_cache(void)
12836 if (!roots_info_cache)
12839 while (!cache_tree_empty(roots_info_cache)) {
12840 struct cache_extent *entry;
12841 struct root_item_info *rii;
12843 entry = first_cache_extent(roots_info_cache);
12846 remove_cache_extent(roots_info_cache, entry);
12847 rii = container_of(entry, struct root_item_info, cache_extent);
12851 free(roots_info_cache);
12852 roots_info_cache = NULL;
12855 static int build_roots_info_cache(struct btrfs_fs_info *info)
12858 struct btrfs_key key;
12859 struct extent_buffer *leaf;
12860 struct btrfs_path path;
12862 if (!roots_info_cache) {
12863 roots_info_cache = malloc(sizeof(*roots_info_cache));
12864 if (!roots_info_cache)
12866 cache_tree_init(roots_info_cache);
12869 btrfs_init_path(&path);
12871 key.type = BTRFS_EXTENT_ITEM_KEY;
12873 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12876 leaf = path.nodes[0];
12879 struct btrfs_key found_key;
12880 struct btrfs_extent_item *ei;
12881 struct btrfs_extent_inline_ref *iref;
12882 int slot = path.slots[0];
12887 struct cache_extent *entry;
12888 struct root_item_info *rii;
12890 if (slot >= btrfs_header_nritems(leaf)) {
12891 ret = btrfs_next_leaf(info->extent_root, &path);
12898 leaf = path.nodes[0];
12899 slot = path.slots[0];
12902 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12904 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12905 found_key.type != BTRFS_METADATA_ITEM_KEY)
12908 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12909 flags = btrfs_extent_flags(leaf, ei);
12911 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12912 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12915 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12916 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12917 level = found_key.offset;
12919 struct btrfs_tree_block_info *binfo;
12921 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12922 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12923 level = btrfs_tree_block_level(leaf, binfo);
12927 * For a root extent, it must be of the following type and the
12928 * first (and only one) iref in the item.
12930 type = btrfs_extent_inline_ref_type(leaf, iref);
12931 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12934 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12935 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12937 rii = malloc(sizeof(struct root_item_info));
12942 rii->cache_extent.start = root_id;
12943 rii->cache_extent.size = 1;
12944 rii->level = (u8)-1;
12945 entry = &rii->cache_extent;
12946 ret = insert_cache_extent(roots_info_cache, entry);
12949 rii = container_of(entry, struct root_item_info,
12953 ASSERT(rii->cache_extent.start == root_id);
12954 ASSERT(rii->cache_extent.size == 1);
12956 if (level > rii->level || rii->level == (u8)-1) {
12957 rii->level = level;
12958 rii->bytenr = found_key.objectid;
12959 rii->gen = btrfs_extent_generation(leaf, ei);
12960 rii->node_count = 1;
12961 } else if (level == rii->level) {
12969 btrfs_release_path(&path);
12974 static int maybe_repair_root_item(struct btrfs_path *path,
12975 const struct btrfs_key *root_key,
12976 const int read_only_mode)
12978 const u64 root_id = root_key->objectid;
12979 struct cache_extent *entry;
12980 struct root_item_info *rii;
12981 struct btrfs_root_item ri;
12982 unsigned long offset;
12984 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12987 "Error: could not find extent items for root %llu\n",
12988 root_key->objectid);
12992 rii = container_of(entry, struct root_item_info, cache_extent);
12993 ASSERT(rii->cache_extent.start == root_id);
12994 ASSERT(rii->cache_extent.size == 1);
12996 if (rii->node_count != 1) {
12998 "Error: could not find btree root extent for root %llu\n",
13003 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13004 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13006 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13007 btrfs_root_level(&ri) != rii->level ||
13008 btrfs_root_generation(&ri) != rii->gen) {
13011 * If we're in repair mode but our caller told us to not update
13012 * the root item, i.e. just check if it needs to be updated, don't
13013 * print this message, since the caller will call us again shortly
13014 * for the same root item without read only mode (the caller will
13015 * open a transaction first).
13017 if (!(read_only_mode && repair))
13019 "%sroot item for root %llu,"
13020 " current bytenr %llu, current gen %llu, current level %u,"
13021 " new bytenr %llu, new gen %llu, new level %u\n",
13022 (read_only_mode ? "" : "fixing "),
13024 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13025 btrfs_root_level(&ri),
13026 rii->bytenr, rii->gen, rii->level);
13028 if (btrfs_root_generation(&ri) > rii->gen) {
13030 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13031 root_id, btrfs_root_generation(&ri), rii->gen);
13035 if (!read_only_mode) {
13036 btrfs_set_root_bytenr(&ri, rii->bytenr);
13037 btrfs_set_root_level(&ri, rii->level);
13038 btrfs_set_root_generation(&ri, rii->gen);
13039 write_extent_buffer(path->nodes[0], &ri,
13040 offset, sizeof(ri));
13050 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13051 * caused read-only snapshots to be corrupted if they were created at a moment
13052 * when the source subvolume/snapshot had orphan items. The issue was that the
13053 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13054 * node instead of the post orphan cleanup root node.
13055 * So this function, and its callees, just detects and fixes those cases. Even
13056 * though the regression was for read-only snapshots, this function applies to
13057 * any snapshot/subvolume root.
13058 * This must be run before any other repair code - not doing it so, makes other
13059 * repair code delete or modify backrefs in the extent tree for example, which
13060 * will result in an inconsistent fs after repairing the root items.
13062 static int repair_root_items(struct btrfs_fs_info *info)
13064 struct btrfs_path path;
13065 struct btrfs_key key;
13066 struct extent_buffer *leaf;
13067 struct btrfs_trans_handle *trans = NULL;
13070 int need_trans = 0;
13072 btrfs_init_path(&path);
13074 ret = build_roots_info_cache(info);
13078 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13079 key.type = BTRFS_ROOT_ITEM_KEY;
13084 * Avoid opening and committing transactions if a leaf doesn't have
13085 * any root items that need to be fixed, so that we avoid rotating
13086 * backup roots unnecessarily.
13089 trans = btrfs_start_transaction(info->tree_root, 1);
13090 if (IS_ERR(trans)) {
13091 ret = PTR_ERR(trans);
13096 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13100 leaf = path.nodes[0];
13103 struct btrfs_key found_key;
13105 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13106 int no_more_keys = find_next_key(&path, &key);
13108 btrfs_release_path(&path);
13110 ret = btrfs_commit_transaction(trans,
13122 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13124 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13126 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13129 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13133 if (!trans && repair) {
13136 btrfs_release_path(&path);
13146 free_roots_info_cache();
13147 btrfs_release_path(&path);
13149 btrfs_commit_transaction(trans, info->tree_root);
13156 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13158 struct btrfs_trans_handle *trans;
13159 struct btrfs_block_group_cache *bg_cache;
13163 /* Clear all free space cache inodes and its extent data */
13165 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13168 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13171 current = bg_cache->key.objectid + bg_cache->key.offset;
13174 /* Don't forget to set cache_generation to -1 */
13175 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13176 if (IS_ERR(trans)) {
13177 error("failed to update super block cache generation");
13178 return PTR_ERR(trans);
13180 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13181 btrfs_commit_transaction(trans, fs_info->tree_root);
13186 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13191 if (clear_version == 1) {
13192 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13194 "free space cache v2 detected, use --clear-space-cache v2");
13198 printf("Clearing free space cache\n");
13199 ret = clear_free_space_cache(fs_info);
13201 error("failed to clear free space cache");
13204 printf("Free space cache cleared\n");
13206 } else if (clear_version == 2) {
13207 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13208 printf("no free space cache v2 to clear\n");
13212 printf("Clear free space cache v2\n");
13213 ret = btrfs_clear_free_space_tree(fs_info);
13215 error("failed to clear free space cache v2: %d", ret);
13218 printf("free space cache v2 cleared\n");
13225 const char * const cmd_check_usage[] = {
13226 "btrfs check [options] <device>",
13227 "Check structural integrity of a filesystem (unmounted).",
13228 "Check structural integrity of an unmounted filesystem. Verify internal",
13229 "trees' consistency and item connectivity. In the repair mode try to",
13230 "fix the problems found. ",
13231 "WARNING: the repair mode is considered dangerous",
13233 "-s|--super <superblock> use this superblock copy",
13234 "-b|--backup use the first valid backup root copy",
13235 "--force skip mount checks, repair is not possible",
13236 "--repair try to repair the filesystem",
13237 "--readonly run in read-only mode (default)",
13238 "--init-csum-tree create a new CRC tree",
13239 "--init-extent-tree create a new extent tree",
13240 "--mode <MODE> allows choice of memory/IO trade-offs",
13241 " where MODE is one of:",
13242 " original - read inodes and extents to memory (requires",
13243 " more memory, does less IO)",
13244 " lowmem - try to use less memory but read blocks again",
13246 "--check-data-csum verify checksums of data blocks",
13247 "-Q|--qgroup-report print a report on qgroup consistency",
13248 "-E|--subvol-extents <subvolid>",
13249 " print subvolume extents and sharing state",
13250 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13251 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13252 "-p|--progress indicate progress",
13253 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13257 int cmd_check(int argc, char **argv)
13259 struct cache_tree root_cache;
13260 struct btrfs_root *root;
13261 struct btrfs_fs_info *info;
13264 u64 tree_root_bytenr = 0;
13265 u64 chunk_root_bytenr = 0;
13266 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13270 int init_csum_tree = 0;
13272 int clear_space_cache = 0;
13273 int qgroup_report = 0;
13274 int qgroups_repaired = 0;
13275 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13280 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13281 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13282 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13283 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13284 GETOPT_VAL_FORCE };
13285 static const struct option long_options[] = {
13286 { "super", required_argument, NULL, 's' },
13287 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13288 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13289 { "init-csum-tree", no_argument, NULL,
13290 GETOPT_VAL_INIT_CSUM },
13291 { "init-extent-tree", no_argument, NULL,
13292 GETOPT_VAL_INIT_EXTENT },
13293 { "check-data-csum", no_argument, NULL,
13294 GETOPT_VAL_CHECK_CSUM },
13295 { "backup", no_argument, NULL, 'b' },
13296 { "subvol-extents", required_argument, NULL, 'E' },
13297 { "qgroup-report", no_argument, NULL, 'Q' },
13298 { "tree-root", required_argument, NULL, 'r' },
13299 { "chunk-root", required_argument, NULL,
13300 GETOPT_VAL_CHUNK_TREE },
13301 { "progress", no_argument, NULL, 'p' },
13302 { "mode", required_argument, NULL,
13304 { "clear-space-cache", required_argument, NULL,
13305 GETOPT_VAL_CLEAR_SPACE_CACHE},
13306 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13307 { NULL, 0, NULL, 0}
13310 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13314 case 'a': /* ignored */ break;
13316 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13319 num = arg_strtou64(optarg);
13320 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13322 "super mirror should be less than %d",
13323 BTRFS_SUPER_MIRROR_MAX);
13326 bytenr = btrfs_sb_offset(((int)num));
13327 printf("using SB copy %llu, bytenr %llu\n", num,
13328 (unsigned long long)bytenr);
13334 subvolid = arg_strtou64(optarg);
13337 tree_root_bytenr = arg_strtou64(optarg);
13339 case GETOPT_VAL_CHUNK_TREE:
13340 chunk_root_bytenr = arg_strtou64(optarg);
13343 ctx.progress_enabled = true;
13347 usage(cmd_check_usage);
13348 case GETOPT_VAL_REPAIR:
13349 printf("enabling repair mode\n");
13351 ctree_flags |= OPEN_CTREE_WRITES;
13353 case GETOPT_VAL_READONLY:
13356 case GETOPT_VAL_INIT_CSUM:
13357 printf("Creating a new CRC tree\n");
13358 init_csum_tree = 1;
13360 ctree_flags |= OPEN_CTREE_WRITES;
13362 case GETOPT_VAL_INIT_EXTENT:
13363 init_extent_tree = 1;
13364 ctree_flags |= (OPEN_CTREE_WRITES |
13365 OPEN_CTREE_NO_BLOCK_GROUPS);
13368 case GETOPT_VAL_CHECK_CSUM:
13369 check_data_csum = 1;
13371 case GETOPT_VAL_MODE:
13372 check_mode = parse_check_mode(optarg);
13373 if (check_mode == CHECK_MODE_UNKNOWN) {
13374 error("unknown mode: %s", optarg);
13378 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13379 if (strcmp(optarg, "v1") == 0) {
13380 clear_space_cache = 1;
13381 } else if (strcmp(optarg, "v2") == 0) {
13382 clear_space_cache = 2;
13383 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13386 "invalid argument to --clear-space-cache, must be v1 or v2");
13389 ctree_flags |= OPEN_CTREE_WRITES;
13391 case GETOPT_VAL_FORCE:
13397 if (check_argc_exact(argc - optind, 1))
13398 usage(cmd_check_usage);
13400 if (ctx.progress_enabled) {
13401 ctx.tp = TASK_NOTHING;
13402 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13405 /* This check is the only reason for --readonly to exist */
13406 if (readonly && repair) {
13407 error("repair options are not compatible with --readonly");
13412 * experimental and dangerous
13414 if (repair && check_mode == CHECK_MODE_LOWMEM)
13415 warning("low-memory mode repair support is only partial");
13418 cache_tree_init(&root_cache);
13420 ret = check_mounted(argv[optind]);
13423 error("could not check mount status: %s",
13429 "%s is currently mounted, use --force if you really intend to check the filesystem",
13437 error("repair and --force is not yet supported");
13444 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13448 "filesystem mounted, continuing because of --force");
13450 /* A block device is mounted in exclusive mode by kernel */
13451 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13454 /* only allow partial opening under repair mode */
13456 ctree_flags |= OPEN_CTREE_PARTIAL;
13458 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13459 chunk_root_bytenr, ctree_flags);
13461 error("cannot open file system");
13467 global_info = info;
13468 root = info->fs_root;
13469 uuid_unparse(info->super_copy->fsid, uuidbuf);
13471 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13474 * Check the bare minimum before starting anything else that could rely
13475 * on it, namely the tree roots, any local consistency checks
13477 if (!extent_buffer_uptodate(info->tree_root->node) ||
13478 !extent_buffer_uptodate(info->dev_root->node) ||
13479 !extent_buffer_uptodate(info->chunk_root->node)) {
13480 error("critical roots corrupted, unable to check the filesystem");
13486 if (clear_space_cache) {
13487 ret = do_clear_free_space_cache(info, clear_space_cache);
13493 * repair mode will force us to commit transaction which
13494 * will make us fail to load log tree when mounting.
13496 if (repair && btrfs_super_log_root(info->super_copy)) {
13497 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13503 ret = zero_log_tree(root);
13506 error("failed to zero log tree: %d", ret);
13511 if (qgroup_report) {
13512 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13514 ret = qgroup_verify_all(info);
13521 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13522 subvolid, argv[optind], uuidbuf);
13523 ret = print_extent_state(info, subvolid);
13528 if (init_extent_tree || init_csum_tree) {
13529 struct btrfs_trans_handle *trans;
13531 trans = btrfs_start_transaction(info->extent_root, 0);
13532 if (IS_ERR(trans)) {
13533 error("error starting transaction");
13534 ret = PTR_ERR(trans);
13539 if (init_extent_tree) {
13540 printf("Creating a new extent tree\n");
13541 ret = reinit_extent_tree(trans, info);
13547 if (init_csum_tree) {
13548 printf("Reinitialize checksum tree\n");
13549 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13551 error("checksum tree initialization failed: %d",
13558 ret = fill_csum_tree(trans, info->csum_root,
13562 error("checksum tree refilling failed: %d", ret);
13567 * Ok now we commit and run the normal fsck, which will add
13568 * extent entries for all of the items it finds.
13570 ret = btrfs_commit_transaction(trans, info->extent_root);
13575 if (!extent_buffer_uptodate(info->extent_root->node)) {
13576 error("critical: extent_root, unable to check the filesystem");
13581 if (!extent_buffer_uptodate(info->csum_root->node)) {
13582 error("critical: csum_root, unable to check the filesystem");
13588 ret = do_check_chunks_and_extents(info);
13592 "errors found in extent allocation tree or chunk allocation");
13594 ret = repair_root_items(info);
13597 error("failed to repair root items: %s", strerror(-ret));
13601 fprintf(stderr, "Fixed %d roots.\n", ret);
13603 } else if (ret > 0) {
13605 "Found %d roots with an outdated root item.\n",
13608 "Please run a filesystem check with the option --repair to fix them.\n");
13614 if (!ctx.progress_enabled) {
13615 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13616 fprintf(stderr, "checking free space tree\n");
13618 fprintf(stderr, "checking free space cache\n");
13620 ret = check_space_cache(root);
13623 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13624 error("errors found in free space tree");
13626 error("errors found in free space cache");
13631 * We used to have to have these hole extents in between our real
13632 * extents so if we don't have this flag set we need to make sure there
13633 * are no gaps in the file extents for inodes, otherwise we can just
13634 * ignore it when this happens.
13636 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13637 ret = do_check_fs_roots(info, &root_cache);
13640 error("errors found in fs roots");
13644 fprintf(stderr, "checking csums\n");
13645 ret = check_csums(root);
13648 error("errors found in csum tree");
13652 fprintf(stderr, "checking root refs\n");
13653 /* For low memory mode, check_fs_roots_v2 handles root refs */
13654 if (check_mode != CHECK_MODE_LOWMEM) {
13655 ret = check_root_refs(root, &root_cache);
13658 error("errors found in root refs");
13663 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13664 struct extent_buffer *eb;
13666 eb = list_first_entry(&root->fs_info->recow_ebs,
13667 struct extent_buffer, recow);
13668 list_del_init(&eb->recow);
13669 ret = recow_extent_buffer(root, eb);
13672 error("fails to fix transid errors");
13677 while (!list_empty(&delete_items)) {
13678 struct bad_item *bad;
13680 bad = list_first_entry(&delete_items, struct bad_item, list);
13681 list_del_init(&bad->list);
13683 ret = delete_bad_item(root, bad);
13689 if (info->quota_enabled) {
13690 fprintf(stderr, "checking quota groups\n");
13691 ret = qgroup_verify_all(info);
13694 error("failed to check quota groups");
13698 ret = repair_qgroups(info, &qgroups_repaired);
13701 error("failed to repair quota groups");
13707 if (!list_empty(&root->fs_info->recow_ebs)) {
13708 error("transid errors in file system");
13713 printf("found %llu bytes used, ",
13714 (unsigned long long)bytes_used);
13716 printf("error(s) found\n");
13718 printf("no error found\n");
13719 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13720 printf("total tree bytes: %llu\n",
13721 (unsigned long long)total_btree_bytes);
13722 printf("total fs tree bytes: %llu\n",
13723 (unsigned long long)total_fs_tree_bytes);
13724 printf("total extent tree bytes: %llu\n",
13725 (unsigned long long)total_extent_tree_bytes);
13726 printf("btree space waste bytes: %llu\n",
13727 (unsigned long long)btree_space_waste);
13728 printf("file data blocks allocated: %llu\n referenced %llu\n",
13729 (unsigned long long)data_bytes_allocated,
13730 (unsigned long long)data_bytes_referenced);
13732 free_qgroup_counts();
13733 free_root_recs_tree(&root_cache);
13737 if (ctx.progress_enabled)
13738 task_deinit(ctx.info);