2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 return container_of(back, struct data_backref, node);
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
141 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143 struct data_backref *back1 = to_data_backref(ext1);
144 struct data_backref *back2 = to_data_backref(ext2);
146 WARN_ON(!ext1->is_data);
147 WARN_ON(!ext2->is_data);
149 /* parent and root are a union, so this covers both */
150 if (back1->parent > back2->parent)
152 if (back1->parent < back2->parent)
155 /* This is a full backref and the parents match. */
156 if (back1->node.full_backref)
159 if (back1->owner > back2->owner)
161 if (back1->owner < back2->owner)
164 if (back1->offset > back2->offset)
166 if (back1->offset < back2->offset)
169 if (back1->found_ref && back2->found_ref) {
170 if (back1->disk_bytenr > back2->disk_bytenr)
172 if (back1->disk_bytenr < back2->disk_bytenr)
175 if (back1->bytes > back2->bytes)
177 if (back1->bytes < back2->bytes)
185 * Much like data_backref, just removed the undetermined members
186 * and change it to use list_head.
187 * During extent scan, it is stored in root->orphan_data_extent.
188 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
190 struct orphan_data_extent {
191 struct list_head list;
199 struct tree_backref {
200 struct extent_backref node;
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
209 return container_of(back, struct tree_backref, node);
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
214 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216 struct tree_backref *back1 = to_tree_backref(ext1);
217 struct tree_backref *back2 = to_tree_backref(ext2);
219 WARN_ON(ext1->is_data);
220 WARN_ON(ext2->is_data);
222 /* parent and root are a union, so this covers both */
223 if (back1->parent > back2->parent)
225 if (back1->parent < back2->parent)
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
233 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
236 if (ext1->is_data > ext2->is_data)
239 if (ext1->is_data < ext2->is_data)
242 if (ext1->full_backref > ext2->full_backref)
244 if (ext1->full_backref < ext2->full_backref)
248 return compare_data_backref(node1, node2);
250 return compare_tree_backref(node1, node2);
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
256 struct extent_record {
257 struct list_head backrefs;
258 struct list_head dups;
259 struct rb_root backref_tree;
260 struct list_head list;
261 struct cache_extent cache;
262 struct btrfs_disk_key parent_key;
267 u64 extent_item_refs;
269 u64 parent_generation;
273 unsigned int flag_block_full_backref:2;
274 unsigned int found_rec:1;
275 unsigned int content_checked:1;
276 unsigned int owner_ref_checked:1;
277 unsigned int is_root:1;
278 unsigned int metadata:1;
279 unsigned int bad_full_backref:1;
280 unsigned int crossing_stripes:1;
281 unsigned int wrong_chunk_type:1;
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
286 return container_of(entry, struct extent_record, list);
289 struct inode_backref {
290 struct list_head list;
291 unsigned int found_dir_item:1;
292 unsigned int found_dir_index:1;
293 unsigned int found_inode_ref:1;
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
305 return list_entry(entry, struct inode_backref, list);
308 struct root_item_record {
309 struct list_head list;
315 struct btrfs_key drop_key;
318 #define REF_ERR_NO_DIR_ITEM (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX (1 << 1)
320 #define REF_ERR_NO_INODE_REF (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
323 #define REF_ERR_DUP_INODE_REF (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
332 struct file_extent_hole {
338 struct inode_record {
339 struct list_head backrefs;
340 unsigned int checked:1;
341 unsigned int merging:1;
342 unsigned int found_inode_item:1;
343 unsigned int found_dir_item:1;
344 unsigned int found_file_extent:1;
345 unsigned int found_csum_item:1;
346 unsigned int some_csum_missing:1;
347 unsigned int nodatasum:1;
360 struct rb_root holes;
361 struct list_head orphan_extents;
366 #define I_ERR_NO_INODE_ITEM (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
382 struct root_backref {
383 struct list_head list;
384 unsigned int found_dir_item:1;
385 unsigned int found_dir_index:1;
386 unsigned int found_back_ref:1;
387 unsigned int found_forward_ref:1;
388 unsigned int reachable:1;
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
399 return list_entry(entry, struct root_backref, list);
403 struct list_head backrefs;
404 struct cache_extent cache;
405 unsigned int found_root_item:1;
411 struct cache_extent cache;
416 struct cache_extent cache;
417 struct cache_tree root_cache;
418 struct cache_tree inode_cache;
419 struct inode_record *current;
428 struct walk_control {
429 struct cache_tree shared;
430 struct shared_node *nodes[BTRFS_MAX_LEVEL];
436 struct btrfs_key key;
438 struct list_head list;
441 struct extent_entry {
446 struct list_head list;
449 struct root_item_info {
450 /* level of the root */
452 /* number of nodes at this level, must be 1 for a root */
456 struct cache_extent cache_extent;
460 * Error bit for low memory mode check.
462 * Currently no caller cares about it yet. Just internal use for error
465 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH (1 << 8)
476 static void *print_status_check(void *p)
478 struct task_ctx *priv = p;
479 const char work_indicator[] = { '.', 'o', 'O', 'o' };
481 static char *task_position_string[] = {
483 "checking free space cache",
487 task_period_start(priv->info, 1000 /* 1s */);
489 if (priv->tp == TASK_NOTHING)
493 printf("%s [%c]\r", task_position_string[priv->tp],
494 work_indicator[count % 4]);
497 task_period_wait(priv->info);
502 static int print_status_return(void *p)
510 static enum btrfs_check_mode parse_check_mode(const char *str)
512 if (strcmp(str, "lowmem") == 0)
513 return CHECK_MODE_LOWMEM;
514 if (strcmp(str, "orig") == 0)
515 return CHECK_MODE_ORIGINAL;
516 if (strcmp(str, "original") == 0)
517 return CHECK_MODE_ORIGINAL;
519 return CHECK_MODE_UNKNOWN;
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
525 struct file_extent_hole *hole;
527 if (RB_EMPTY_ROOT(holes))
530 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
536 struct file_extent_hole *hole1;
537 struct file_extent_hole *hole2;
539 hole1 = rb_entry(node1, struct file_extent_hole, node);
540 hole2 = rb_entry(node2, struct file_extent_hole, node);
542 if (hole1->start > hole2->start)
544 if (hole1->start < hole2->start)
546 /* Now hole1->start == hole2->start */
547 if (hole1->len >= hole2->len)
549 * Hole 1 will be merge center
550 * Same hole will be merged later
553 /* Hole 2 will be merge center */
558 * Add a hole to the record
560 * This will do hole merge for copy_file_extent_holes(),
561 * which will ensure there won't be continuous holes.
563 static int add_file_extent_hole(struct rb_root *holes,
566 struct file_extent_hole *hole;
567 struct file_extent_hole *prev = NULL;
568 struct file_extent_hole *next = NULL;
570 hole = malloc(sizeof(*hole));
575 /* Since compare will not return 0, no -EEXIST will happen */
576 rb_insert(holes, &hole->node, compare_hole);
578 /* simple merge with previous hole */
579 if (rb_prev(&hole->node))
580 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
582 if (prev && prev->start + prev->len >= hole->start) {
583 hole->len = hole->start + hole->len - prev->start;
584 hole->start = prev->start;
585 rb_erase(&prev->node, holes);
590 /* iterate merge with next holes */
592 if (!rb_next(&hole->node))
594 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
596 if (hole->start + hole->len >= next->start) {
597 if (hole->start + hole->len <= next->start + next->len)
598 hole->len = next->start + next->len -
600 rb_erase(&next->node, holes);
609 static int compare_hole_range(struct rb_node *node, void *data)
611 struct file_extent_hole *hole;
614 hole = (struct file_extent_hole *)data;
617 hole = rb_entry(node, struct file_extent_hole, node);
618 if (start < hole->start)
620 if (start >= hole->start && start < hole->start + hole->len)
626 * Delete a hole in the record
628 * This will do the hole split and is much restrict than add.
630 static int del_file_extent_hole(struct rb_root *holes,
633 struct file_extent_hole *hole;
634 struct file_extent_hole tmp;
639 struct rb_node *node;
646 node = rb_search(holes, &tmp, compare_hole_range, NULL);
649 hole = rb_entry(node, struct file_extent_hole, node);
650 if (start + len > hole->start + hole->len)
654 * Now there will be no overlap, delete the hole and re-add the
655 * split(s) if they exists.
657 if (start > hole->start) {
658 prev_start = hole->start;
659 prev_len = start - hole->start;
662 if (hole->start + hole->len > start + len) {
663 next_start = start + len;
664 next_len = hole->start + hole->len - start - len;
667 rb_erase(node, holes);
670 ret = add_file_extent_hole(holes, prev_start, prev_len);
675 ret = add_file_extent_hole(holes, next_start, next_len);
682 static int copy_file_extent_holes(struct rb_root *dst,
685 struct file_extent_hole *hole;
686 struct rb_node *node;
689 node = rb_first(src);
691 hole = rb_entry(node, struct file_extent_hole, node);
692 ret = add_file_extent_hole(dst, hole->start, hole->len);
695 node = rb_next(node);
700 static void free_file_extent_holes(struct rb_root *holes)
702 struct rb_node *node;
703 struct file_extent_hole *hole;
705 node = rb_first(holes);
707 hole = rb_entry(node, struct file_extent_hole, node);
708 rb_erase(node, holes);
710 node = rb_first(holes);
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717 struct btrfs_root *root)
719 if (root->last_trans != trans->transid) {
720 root->track_dirty = 1;
721 root->last_trans = trans->transid;
722 root->commit_root = root->node;
723 extent_buffer_get(root->node);
727 static u8 imode_to_type(u32 imode)
730 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
732 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
733 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
734 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
735 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
736 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
737 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
740 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
746 struct device_record *rec1;
747 struct device_record *rec2;
749 rec1 = rb_entry(node1, struct device_record, node);
750 rec2 = rb_entry(node2, struct device_record, node);
751 if (rec1->devid > rec2->devid)
753 else if (rec1->devid < rec2->devid)
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
761 struct inode_record *rec;
762 struct inode_backref *backref;
763 struct inode_backref *orig;
764 struct inode_backref *tmp;
765 struct orphan_data_extent *src_orphan;
766 struct orphan_data_extent *dst_orphan;
771 rec = malloc(sizeof(*rec));
773 return ERR_PTR(-ENOMEM);
774 memcpy(rec, orig_rec, sizeof(*rec));
776 INIT_LIST_HEAD(&rec->backrefs);
777 INIT_LIST_HEAD(&rec->orphan_extents);
778 rec->holes = RB_ROOT;
780 list_for_each_entry(orig, &orig_rec->backrefs, list) {
781 size = sizeof(*orig) + orig->namelen + 1;
782 backref = malloc(size);
787 memcpy(backref, orig, size);
788 list_add_tail(&backref->list, &rec->backrefs);
790 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791 dst_orphan = malloc(sizeof(*dst_orphan));
796 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
799 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
806 rb = rb_first(&rec->holes);
808 struct file_extent_hole *hole;
810 hole = rb_entry(rb, struct file_extent_hole, node);
816 if (!list_empty(&rec->backrefs))
817 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818 list_del(&orig->list);
822 if (!list_empty(&rec->orphan_extents))
823 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824 list_del(&orig->list);
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
836 struct orphan_data_extent *orphan;
838 if (list_empty(orphan_extents))
840 printf("The following data extent is lost in tree %llu:\n",
842 list_for_each_entry(orphan, orphan_extents, list) {
843 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844 orphan->objectid, orphan->offset, orphan->disk_bytenr,
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
851 u64 root_objectid = root->root_key.objectid;
852 int errors = rec->errors;
856 /* reloc root errors, we print its corresponding fs root objectid*/
857 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858 root_objectid = root->root_key.offset;
859 fprintf(stderr, "reloc");
861 fprintf(stderr, "root %llu inode %llu errors %x",
862 (unsigned long long) root_objectid,
863 (unsigned long long) rec->ino, rec->errors);
865 if (errors & I_ERR_NO_INODE_ITEM)
866 fprintf(stderr, ", no inode item");
867 if (errors & I_ERR_NO_ORPHAN_ITEM)
868 fprintf(stderr, ", no orphan item");
869 if (errors & I_ERR_DUP_INODE_ITEM)
870 fprintf(stderr, ", dup inode item");
871 if (errors & I_ERR_DUP_DIR_INDEX)
872 fprintf(stderr, ", dup dir index");
873 if (errors & I_ERR_ODD_DIR_ITEM)
874 fprintf(stderr, ", odd dir item");
875 if (errors & I_ERR_ODD_FILE_EXTENT)
876 fprintf(stderr, ", odd file extent");
877 if (errors & I_ERR_BAD_FILE_EXTENT)
878 fprintf(stderr, ", bad file extent");
879 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880 fprintf(stderr, ", file extent overlap");
881 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882 fprintf(stderr, ", file extent discount");
883 if (errors & I_ERR_DIR_ISIZE_WRONG)
884 fprintf(stderr, ", dir isize wrong");
885 if (errors & I_ERR_FILE_NBYTES_WRONG)
886 fprintf(stderr, ", nbytes wrong");
887 if (errors & I_ERR_ODD_CSUM_ITEM)
888 fprintf(stderr, ", odd csum item");
889 if (errors & I_ERR_SOME_CSUM_MISSING)
890 fprintf(stderr, ", some csum missing");
891 if (errors & I_ERR_LINK_COUNT_WRONG)
892 fprintf(stderr, ", link count wrong");
893 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894 fprintf(stderr, ", orphan file extent");
895 fprintf(stderr, "\n");
896 /* Print the orphan extents if needed */
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
900 /* Print the holes if needed */
901 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902 struct file_extent_hole *hole;
903 struct rb_node *node;
906 node = rb_first(&rec->holes);
907 fprintf(stderr, "Found file extent holes:\n");
910 hole = rb_entry(node, struct file_extent_hole, node);
911 fprintf(stderr, "\tstart: %llu, len: %llu\n",
912 hole->start, hole->len);
913 node = rb_next(node);
916 fprintf(stderr, "\tstart: 0, len: %llu\n",
918 root->fs_info->sectorsize));
922 static void print_ref_error(int errors)
924 if (errors & REF_ERR_NO_DIR_ITEM)
925 fprintf(stderr, ", no dir item");
926 if (errors & REF_ERR_NO_DIR_INDEX)
927 fprintf(stderr, ", no dir index");
928 if (errors & REF_ERR_NO_INODE_REF)
929 fprintf(stderr, ", no inode ref");
930 if (errors & REF_ERR_DUP_DIR_ITEM)
931 fprintf(stderr, ", dup dir item");
932 if (errors & REF_ERR_DUP_DIR_INDEX)
933 fprintf(stderr, ", dup dir index");
934 if (errors & REF_ERR_DUP_INODE_REF)
935 fprintf(stderr, ", dup inode ref");
936 if (errors & REF_ERR_INDEX_UNMATCH)
937 fprintf(stderr, ", index mismatch");
938 if (errors & REF_ERR_FILETYPE_UNMATCH)
939 fprintf(stderr, ", filetype mismatch");
940 if (errors & REF_ERR_NAME_TOO_LONG)
941 fprintf(stderr, ", name too long");
942 if (errors & REF_ERR_NO_ROOT_REF)
943 fprintf(stderr, ", no root ref");
944 if (errors & REF_ERR_NO_ROOT_BACKREF)
945 fprintf(stderr, ", no root backref");
946 if (errors & REF_ERR_DUP_ROOT_REF)
947 fprintf(stderr, ", dup root ref");
948 if (errors & REF_ERR_DUP_ROOT_BACKREF)
949 fprintf(stderr, ", dup root backref");
950 fprintf(stderr, "\n");
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956 struct ptr_node *node;
957 struct cache_extent *cache;
958 struct inode_record *rec = NULL;
961 cache = lookup_cache_extent(inode_cache, ino, 1);
963 node = container_of(cache, struct ptr_node, cache);
965 if (mod && rec->refs > 1) {
966 node->data = clone_inode_rec(rec);
967 if (IS_ERR(node->data))
973 rec = calloc(1, sizeof(*rec));
975 return ERR_PTR(-ENOMEM);
977 rec->extent_start = (u64)-1;
979 INIT_LIST_HEAD(&rec->backrefs);
980 INIT_LIST_HEAD(&rec->orphan_extents);
981 rec->holes = RB_ROOT;
983 node = malloc(sizeof(*node));
986 return ERR_PTR(-ENOMEM);
988 node->cache.start = ino;
989 node->cache.size = 1;
992 if (ino == BTRFS_FREE_INO_OBJECTID)
995 ret = insert_cache_extent(inode_cache, &node->cache);
997 return ERR_PTR(-EEXIST);
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1004 struct orphan_data_extent *orphan;
1006 while (!list_empty(orphan_extents)) {
1007 orphan = list_entry(orphan_extents->next,
1008 struct orphan_data_extent, list);
1009 list_del(&orphan->list);
1014 static void free_inode_rec(struct inode_record *rec)
1016 struct inode_backref *backref;
1018 if (--rec->refs > 0)
1021 while (!list_empty(&rec->backrefs)) {
1022 backref = to_inode_backref(rec->backrefs.next);
1023 list_del(&backref->list);
1026 free_orphan_data_extents(&rec->orphan_extents);
1027 free_file_extent_holes(&rec->holes);
1031 static int can_free_inode_rec(struct inode_record *rec)
1033 if (!rec->errors && rec->checked && rec->found_inode_item &&
1034 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040 struct inode_record *rec)
1042 struct cache_extent *cache;
1043 struct inode_backref *tmp, *backref;
1044 struct ptr_node *node;
1047 if (!rec->found_inode_item)
1050 filetype = imode_to_type(rec->imode);
1051 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052 if (backref->found_dir_item && backref->found_dir_index) {
1053 if (backref->filetype != filetype)
1054 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055 if (!backref->errors && backref->found_inode_ref &&
1056 rec->nlink == rec->found_link) {
1057 list_del(&backref->list);
1063 if (!rec->checked || rec->merging)
1066 if (S_ISDIR(rec->imode)) {
1067 if (rec->found_size != rec->isize)
1068 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069 if (rec->found_file_extent)
1070 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072 if (rec->found_dir_item)
1073 rec->errors |= I_ERR_ODD_DIR_ITEM;
1074 if (rec->found_size != rec->nbytes)
1075 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076 if (rec->nlink > 0 && !no_holes &&
1077 (rec->extent_end < rec->isize ||
1078 first_extent_gap(&rec->holes) < rec->isize))
1079 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083 if (rec->found_csum_item && rec->nodatasum)
1084 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085 if (rec->some_csum_missing && !rec->nodatasum)
1086 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089 BUG_ON(rec->refs != 1);
1090 if (can_free_inode_rec(rec)) {
1091 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092 node = container_of(cache, struct ptr_node, cache);
1093 BUG_ON(node->data != rec);
1094 remove_cache_extent(inode_cache, &node->cache);
1096 free_inode_rec(rec);
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1102 struct btrfs_path path;
1103 struct btrfs_key key;
1106 key.objectid = BTRFS_ORPHAN_OBJECTID;
1107 key.type = BTRFS_ORPHAN_ITEM_KEY;
1110 btrfs_init_path(&path);
1111 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112 btrfs_release_path(&path);
1118 static int process_inode_item(struct extent_buffer *eb,
1119 int slot, struct btrfs_key *key,
1120 struct shared_node *active_node)
1122 struct inode_record *rec;
1123 struct btrfs_inode_item *item;
1125 rec = active_node->current;
1126 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127 if (rec->found_inode_item) {
1128 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132 rec->nlink = btrfs_inode_nlink(eb, item);
1133 rec->isize = btrfs_inode_size(eb, item);
1134 rec->nbytes = btrfs_inode_nbytes(eb, item);
1135 rec->imode = btrfs_inode_mode(eb, item);
1136 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1138 rec->found_inode_item = 1;
1139 if (rec->nlink == 0)
1140 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141 maybe_free_inode_rec(&active_node->inode_cache, rec);
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1147 int namelen, u64 dir)
1149 struct inode_backref *backref;
1151 list_for_each_entry(backref, &rec->backrefs, list) {
1152 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1154 if (backref->dir != dir || backref->namelen != namelen)
1156 if (memcmp(name, backref->name, namelen))
1161 backref = malloc(sizeof(*backref) + namelen + 1);
1164 memset(backref, 0, sizeof(*backref));
1166 backref->namelen = namelen;
1167 memcpy(backref->name, name, namelen);
1168 backref->name[namelen] = '\0';
1169 list_add_tail(&backref->list, &rec->backrefs);
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174 u64 ino, u64 dir, u64 index,
1175 const char *name, int namelen,
1176 u8 filetype, u8 itemtype, int errors)
1178 struct inode_record *rec;
1179 struct inode_backref *backref;
1181 rec = get_inode_rec(inode_cache, ino, 1);
1182 BUG_ON(IS_ERR(rec));
1183 backref = get_inode_backref(rec, name, namelen, dir);
1186 backref->errors |= errors;
1187 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188 if (backref->found_dir_index)
1189 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190 if (backref->found_inode_ref && backref->index != index)
1191 backref->errors |= REF_ERR_INDEX_UNMATCH;
1192 if (backref->found_dir_item && backref->filetype != filetype)
1193 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1195 backref->index = index;
1196 backref->filetype = filetype;
1197 backref->found_dir_index = 1;
1198 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1200 if (backref->found_dir_item)
1201 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202 if (backref->found_dir_index && backref->filetype != filetype)
1203 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1205 backref->filetype = filetype;
1206 backref->found_dir_item = 1;
1207 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209 if (backref->found_inode_ref)
1210 backref->errors |= REF_ERR_DUP_INODE_REF;
1211 if (backref->found_dir_index && backref->index != index)
1212 backref->errors |= REF_ERR_INDEX_UNMATCH;
1214 backref->index = index;
1216 backref->ref_type = itemtype;
1217 backref->found_inode_ref = 1;
1222 maybe_free_inode_rec(inode_cache, rec);
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227 struct cache_tree *dst_cache)
1229 struct inode_backref *backref;
1234 list_for_each_entry(backref, &src->backrefs, list) {
1235 if (backref->found_dir_index) {
1236 add_inode_backref(dst_cache, dst->ino, backref->dir,
1237 backref->index, backref->name,
1238 backref->namelen, backref->filetype,
1239 BTRFS_DIR_INDEX_KEY, backref->errors);
1241 if (backref->found_dir_item) {
1243 add_inode_backref(dst_cache, dst->ino,
1244 backref->dir, 0, backref->name,
1245 backref->namelen, backref->filetype,
1246 BTRFS_DIR_ITEM_KEY, backref->errors);
1248 if (backref->found_inode_ref) {
1249 add_inode_backref(dst_cache, dst->ino,
1250 backref->dir, backref->index,
1251 backref->name, backref->namelen, 0,
1252 backref->ref_type, backref->errors);
1256 if (src->found_dir_item)
1257 dst->found_dir_item = 1;
1258 if (src->found_file_extent)
1259 dst->found_file_extent = 1;
1260 if (src->found_csum_item)
1261 dst->found_csum_item = 1;
1262 if (src->some_csum_missing)
1263 dst->some_csum_missing = 1;
1264 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270 BUG_ON(src->found_link < dir_count);
1271 dst->found_link += src->found_link - dir_count;
1272 dst->found_size += src->found_size;
1273 if (src->extent_start != (u64)-1) {
1274 if (dst->extent_start == (u64)-1) {
1275 dst->extent_start = src->extent_start;
1276 dst->extent_end = src->extent_end;
1278 if (dst->extent_end > src->extent_start)
1279 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280 else if (dst->extent_end < src->extent_start) {
1281 ret = add_file_extent_hole(&dst->holes,
1283 src->extent_start - dst->extent_end);
1285 if (dst->extent_end < src->extent_end)
1286 dst->extent_end = src->extent_end;
1290 dst->errors |= src->errors;
1291 if (src->found_inode_item) {
1292 if (!dst->found_inode_item) {
1293 dst->nlink = src->nlink;
1294 dst->isize = src->isize;
1295 dst->nbytes = src->nbytes;
1296 dst->imode = src->imode;
1297 dst->nodatasum = src->nodatasum;
1298 dst->found_inode_item = 1;
1300 dst->errors |= I_ERR_DUP_INODE_ITEM;
1308 static int splice_shared_node(struct shared_node *src_node,
1309 struct shared_node *dst_node)
1311 struct cache_extent *cache;
1312 struct ptr_node *node, *ins;
1313 struct cache_tree *src, *dst;
1314 struct inode_record *rec, *conflict;
1315 u64 current_ino = 0;
1319 if (--src_node->refs == 0)
1321 if (src_node->current)
1322 current_ino = src_node->current->ino;
1324 src = &src_node->root_cache;
1325 dst = &dst_node->root_cache;
1327 cache = search_cache_extent(src, 0);
1329 node = container_of(cache, struct ptr_node, cache);
1331 cache = next_cache_extent(cache);
1334 remove_cache_extent(src, &node->cache);
1337 ins = malloc(sizeof(*ins));
1339 ins->cache.start = node->cache.start;
1340 ins->cache.size = node->cache.size;
1344 ret = insert_cache_extent(dst, &ins->cache);
1345 if (ret == -EEXIST) {
1346 conflict = get_inode_rec(dst, rec->ino, 1);
1347 BUG_ON(IS_ERR(conflict));
1348 merge_inode_recs(rec, conflict, dst);
1350 conflict->checked = 1;
1351 if (dst_node->current == conflict)
1352 dst_node->current = NULL;
1354 maybe_free_inode_rec(dst, conflict);
1355 free_inode_rec(rec);
1362 if (src == &src_node->root_cache) {
1363 src = &src_node->inode_cache;
1364 dst = &dst_node->inode_cache;
1368 if (current_ino > 0 && (!dst_node->current ||
1369 current_ino > dst_node->current->ino)) {
1370 if (dst_node->current) {
1371 dst_node->current->checked = 1;
1372 maybe_free_inode_rec(dst, dst_node->current);
1374 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375 BUG_ON(IS_ERR(dst_node->current));
1380 static void free_inode_ptr(struct cache_extent *cache)
1382 struct ptr_node *node;
1383 struct inode_record *rec;
1385 node = container_of(cache, struct ptr_node, cache);
1387 free_inode_rec(rec);
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396 struct cache_extent *cache;
1397 struct shared_node *node;
1399 cache = lookup_cache_extent(shared, bytenr, 1);
1401 node = container_of(cache, struct shared_node, cache);
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 struct shared_node *node;
1412 node = calloc(1, sizeof(*node));
1415 node->cache.start = bytenr;
1416 node->cache.size = 1;
1417 cache_tree_init(&node->root_cache);
1418 cache_tree_init(&node->inode_cache);
1421 ret = insert_cache_extent(shared, &node->cache);
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427 struct walk_control *wc, int level)
1429 struct shared_node *node;
1430 struct shared_node *dest;
1433 if (level == wc->active_node)
1436 BUG_ON(wc->active_node <= level);
1437 node = find_shared_node(&wc->shared, bytenr);
1439 ret = add_shared_node(&wc->shared, bytenr, refs);
1441 node = find_shared_node(&wc->shared, bytenr);
1442 wc->nodes[level] = node;
1443 wc->active_node = level;
1447 if (wc->root_level == wc->active_node &&
1448 btrfs_root_refs(&root->root_item) == 0) {
1449 if (--node->refs == 0) {
1450 free_inode_recs_tree(&node->root_cache);
1451 free_inode_recs_tree(&node->inode_cache);
1452 remove_cache_extent(&wc->shared, &node->cache);
1458 dest = wc->nodes[wc->active_node];
1459 splice_shared_node(node, dest);
1460 if (node->refs == 0) {
1461 remove_cache_extent(&wc->shared, &node->cache);
1467 static int leave_shared_node(struct btrfs_root *root,
1468 struct walk_control *wc, int level)
1470 struct shared_node *node;
1471 struct shared_node *dest;
1474 if (level == wc->root_level)
1477 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481 BUG_ON(i >= BTRFS_MAX_LEVEL);
1483 node = wc->nodes[wc->active_node];
1484 wc->nodes[wc->active_node] = NULL;
1485 wc->active_node = i;
1487 dest = wc->nodes[wc->active_node];
1488 if (wc->active_node < wc->root_level ||
1489 btrfs_root_refs(&root->root_item) > 0) {
1490 BUG_ON(node->refs <= 1);
1491 splice_shared_node(node, dest);
1493 BUG_ON(node->refs < 2);
1502 * 1 - if the root with id child_root_id is a child of root parent_root_id
1503 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1504 * has other root(s) as parent(s)
1505 * 2 - if the root child_root_id doesn't have any parent roots
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510 struct btrfs_path path;
1511 struct btrfs_key key;
1512 struct extent_buffer *leaf;
1516 btrfs_init_path(&path);
1518 key.objectid = parent_root_id;
1519 key.type = BTRFS_ROOT_REF_KEY;
1520 key.offset = child_root_id;
1521 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525 btrfs_release_path(&path);
1529 key.objectid = child_root_id;
1530 key.type = BTRFS_ROOT_BACKREF_KEY;
1532 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1538 leaf = path.nodes[0];
1539 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543 leaf = path.nodes[0];
1546 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547 if (key.objectid != child_root_id ||
1548 key.type != BTRFS_ROOT_BACKREF_KEY)
1553 if (key.offset == parent_root_id) {
1554 btrfs_release_path(&path);
1561 btrfs_release_path(&path);
1564 return has_parent ? 0 : 2;
1567 static int process_dir_item(struct extent_buffer *eb,
1568 int slot, struct btrfs_key *key,
1569 struct shared_node *active_node)
1579 struct btrfs_dir_item *di;
1580 struct inode_record *rec;
1581 struct cache_tree *root_cache;
1582 struct cache_tree *inode_cache;
1583 struct btrfs_key location;
1584 char namebuf[BTRFS_NAME_LEN];
1586 root_cache = &active_node->root_cache;
1587 inode_cache = &active_node->inode_cache;
1588 rec = active_node->current;
1589 rec->found_dir_item = 1;
1591 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592 total = btrfs_item_size_nr(eb, slot);
1593 while (cur < total) {
1595 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596 name_len = btrfs_dir_name_len(eb, di);
1597 data_len = btrfs_dir_data_len(eb, di);
1598 filetype = btrfs_dir_type(eb, di);
1600 rec->found_size += name_len;
1601 if (cur + sizeof(*di) + name_len > total ||
1602 name_len > BTRFS_NAME_LEN) {
1603 error = REF_ERR_NAME_TOO_LONG;
1605 if (cur + sizeof(*di) > total)
1607 len = min_t(u32, total - cur - sizeof(*di),
1614 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1616 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617 key->offset != btrfs_name_hash(namebuf, len)) {
1618 rec->errors |= I_ERR_ODD_DIR_ITEM;
1619 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620 key->objectid, key->offset, namebuf, len, filetype,
1621 key->offset, btrfs_name_hash(namebuf, len));
1624 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625 add_inode_backref(inode_cache, location.objectid,
1626 key->objectid, key->offset, namebuf,
1627 len, filetype, key->type, error);
1628 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629 add_inode_backref(root_cache, location.objectid,
1630 key->objectid, key->offset,
1631 namebuf, len, filetype,
1634 fprintf(stderr, "invalid location in dir item %u\n",
1636 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637 key->objectid, key->offset, namebuf,
1638 len, filetype, key->type, error);
1641 len = sizeof(*di) + name_len + data_len;
1642 di = (struct btrfs_dir_item *)((char *)di + len);
1645 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646 rec->errors |= I_ERR_DUP_DIR_INDEX;
1651 static int process_inode_ref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1661 struct cache_tree *inode_cache;
1662 struct btrfs_inode_ref *ref;
1663 char namebuf[BTRFS_NAME_LEN];
1665 inode_cache = &active_node->inode_cache;
1667 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668 total = btrfs_item_size_nr(eb, slot);
1669 while (cur < total) {
1670 name_len = btrfs_inode_ref_name_len(eb, ref);
1671 index = btrfs_inode_ref_index(eb, ref);
1673 /* inode_ref + namelen should not cross item boundary */
1674 if (cur + sizeof(*ref) + name_len > total ||
1675 name_len > BTRFS_NAME_LEN) {
1676 if (total < cur + sizeof(*ref))
1679 /* Still try to read out the remaining part */
1680 len = min_t(u32, total - cur - sizeof(*ref),
1682 error = REF_ERR_NAME_TOO_LONG;
1688 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689 add_inode_backref(inode_cache, key->objectid, key->offset,
1690 index, namebuf, len, 0, key->type, error);
1692 len = sizeof(*ref) + name_len;
1693 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1699 static int process_inode_extref(struct extent_buffer *eb,
1700 int slot, struct btrfs_key *key,
1701 struct shared_node *active_node)
1710 struct cache_tree *inode_cache;
1711 struct btrfs_inode_extref *extref;
1712 char namebuf[BTRFS_NAME_LEN];
1714 inode_cache = &active_node->inode_cache;
1716 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717 total = btrfs_item_size_nr(eb, slot);
1718 while (cur < total) {
1719 name_len = btrfs_inode_extref_name_len(eb, extref);
1720 index = btrfs_inode_extref_index(eb, extref);
1721 parent = btrfs_inode_extref_parent(eb, extref);
1722 if (name_len <= BTRFS_NAME_LEN) {
1726 len = BTRFS_NAME_LEN;
1727 error = REF_ERR_NAME_TOO_LONG;
1729 read_extent_buffer(eb, namebuf,
1730 (unsigned long)(extref + 1), len);
1731 add_inode_backref(inode_cache, key->objectid, parent,
1732 index, namebuf, len, 0, key->type, error);
1734 len = sizeof(*extref) + name_len;
1735 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743 u64 len, u64 *found)
1745 struct btrfs_key key;
1746 struct btrfs_path path;
1747 struct extent_buffer *leaf;
1752 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1754 btrfs_init_path(&path);
1756 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1758 key.type = BTRFS_EXTENT_CSUM_KEY;
1760 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764 if (ret > 0 && path.slots[0] > 0) {
1765 leaf = path.nodes[0];
1766 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768 key.type == BTRFS_EXTENT_CSUM_KEY)
1773 leaf = path.nodes[0];
1774 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780 leaf = path.nodes[0];
1783 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785 key.type != BTRFS_EXTENT_CSUM_KEY)
1788 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789 if (key.offset >= start + len)
1792 if (key.offset > start)
1795 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796 csum_end = key.offset + (size / csum_size) *
1797 root->fs_info->sectorsize;
1798 if (csum_end > start) {
1799 size = min(csum_end - start, len);
1808 btrfs_release_path(&path);
1814 static int process_file_extent(struct btrfs_root *root,
1815 struct extent_buffer *eb,
1816 int slot, struct btrfs_key *key,
1817 struct shared_node *active_node)
1819 struct inode_record *rec;
1820 struct btrfs_file_extent_item *fi;
1822 u64 disk_bytenr = 0;
1823 u64 extent_offset = 0;
1824 u64 mask = root->fs_info->sectorsize - 1;
1828 rec = active_node->current;
1829 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830 rec->found_file_extent = 1;
1832 if (rec->extent_start == (u64)-1) {
1833 rec->extent_start = key->offset;
1834 rec->extent_end = key->offset;
1837 if (rec->extent_end > key->offset)
1838 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839 else if (rec->extent_end < key->offset) {
1840 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841 key->offset - rec->extent_end);
1846 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847 extent_type = btrfs_file_extent_type(eb, fi);
1849 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1852 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853 rec->found_size += num_bytes;
1854 num_bytes = (num_bytes + mask) & ~mask;
1855 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859 extent_offset = btrfs_file_extent_offset(eb, fi);
1860 if (num_bytes == 0 || (num_bytes & mask))
1861 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862 if (num_bytes + extent_offset >
1863 btrfs_file_extent_ram_bytes(eb, fi))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866 (btrfs_file_extent_compression(eb, fi) ||
1867 btrfs_file_extent_encryption(eb, fi) ||
1868 btrfs_file_extent_other_encoding(eb, fi)))
1869 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870 if (disk_bytenr > 0)
1871 rec->found_size += num_bytes;
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1875 rec->extent_end = key->offset + num_bytes;
1878 * The data reloc tree will copy full extents into its inode and then
1879 * copy the corresponding csums. Because the extent it copied could be
1880 * a preallocated extent that hasn't been written to yet there may be no
1881 * csums to copy, ergo we won't have csums for our file extent. This is
1882 * ok so just don't bother checking csums if the inode belongs to the
1885 if (disk_bytenr > 0 &&
1886 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1888 if (btrfs_file_extent_compression(eb, fi))
1889 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1891 disk_bytenr += extent_offset;
1893 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1898 rec->found_csum_item = 1;
1899 if (found < num_bytes)
1900 rec->some_csum_missing = 1;
1901 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1903 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910 struct walk_control *wc)
1912 struct btrfs_key key;
1916 struct cache_tree *inode_cache;
1917 struct shared_node *active_node;
1919 if (wc->root_level == wc->active_node &&
1920 btrfs_root_refs(&root->root_item) == 0)
1923 active_node = wc->nodes[wc->active_node];
1924 inode_cache = &active_node->inode_cache;
1925 nritems = btrfs_header_nritems(eb);
1926 for (i = 0; i < nritems; i++) {
1927 btrfs_item_key_to_cpu(eb, &key, i);
1929 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1931 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934 if (active_node->current == NULL ||
1935 active_node->current->ino < key.objectid) {
1936 if (active_node->current) {
1937 active_node->current->checked = 1;
1938 maybe_free_inode_rec(inode_cache,
1939 active_node->current);
1941 active_node->current = get_inode_rec(inode_cache,
1943 BUG_ON(IS_ERR(active_node->current));
1946 case BTRFS_DIR_ITEM_KEY:
1947 case BTRFS_DIR_INDEX_KEY:
1948 ret = process_dir_item(eb, i, &key, active_node);
1950 case BTRFS_INODE_REF_KEY:
1951 ret = process_inode_ref(eb, i, &key, active_node);
1953 case BTRFS_INODE_EXTREF_KEY:
1954 ret = process_inode_extref(eb, i, &key, active_node);
1956 case BTRFS_INODE_ITEM_KEY:
1957 ret = process_inode_item(eb, i, &key, active_node);
1959 case BTRFS_EXTENT_DATA_KEY:
1960 ret = process_file_extent(root, eb, i, &key,
1971 u64 bytenr[BTRFS_MAX_LEVEL];
1972 u64 refs[BTRFS_MAX_LEVEL];
1973 int need_check[BTRFS_MAX_LEVEL];
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977 struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979 unsigned int ext_ref);
1982 * Returns >0 Found error, not fatal, should continue
1983 * Returns <0 Fatal error, must exit the whole check
1984 * Returns 0 No errors found
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987 struct node_refs *nrefs, int *level, int ext_ref)
1989 struct extent_buffer *cur = path->nodes[0];
1990 struct btrfs_key key;
1994 int root_level = btrfs_header_level(root->node);
1996 int ret = 0; /* Final return value */
1997 int err = 0; /* Positive error bitmap */
1999 cur_bytenr = cur->start;
2001 /* skip to first inode item or the first inode number change */
2002 nritems = btrfs_header_nritems(cur);
2003 for (i = 0; i < nritems; i++) {
2004 btrfs_item_key_to_cpu(cur, &key, i);
2006 first_ino = key.objectid;
2007 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008 (first_ino && first_ino != key.objectid))
2012 path->slots[0] = nritems;
2018 err |= check_inode_item(root, path, ext_ref);
2020 if (err & LAST_ITEM)
2023 /* still have inode items in thie leaf */
2024 if (cur->start == cur_bytenr)
2028 * we have switched to another leaf, above nodes may
2029 * have changed, here walk down the path, if a node
2030 * or leaf is shared, check whether we can skip this
2033 for (i = root_level; i >= 0; i--) {
2034 if (path->nodes[i]->start == nrefs->bytenr[i])
2037 ret = update_nodes_refs(root,
2038 path->nodes[i]->start,
2043 if (!nrefs->need_check[i]) {
2049 for (i = 0; i < *level; i++) {
2050 free_extent_buffer(path->nodes[i]);
2051 path->nodes[i] = NULL;
2060 static void reada_walk_down(struct btrfs_root *root,
2061 struct extent_buffer *node, int slot)
2063 struct btrfs_fs_info *fs_info = root->fs_info;
2070 level = btrfs_header_level(node);
2074 nritems = btrfs_header_nritems(node);
2075 for (i = slot; i < nritems; i++) {
2076 bytenr = btrfs_node_blockptr(node, i);
2077 ptr_gen = btrfs_node_ptr_generation(node, i);
2078 readahead_tree_block(fs_info, bytenr, ptr_gen);
2083 * Check the child node/leaf by the following condition:
2084 * 1. the first item key of the node/leaf should be the same with the one
2086 * 2. block in parent node should match the child node/leaf.
2087 * 3. generation of parent node and child's header should be consistent.
2089 * Or the child node/leaf pointed by the key in parent is not valid.
2091 * We hope to check leaf owner too, but since subvol may share leaves,
2092 * which makes leaf owner check not so strong, key check should be
2093 * sufficient enough for that case.
2095 static int check_child_node(struct extent_buffer *parent, int slot,
2096 struct extent_buffer *child)
2098 struct btrfs_key parent_key;
2099 struct btrfs_key child_key;
2102 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2103 if (btrfs_header_level(child) == 0)
2104 btrfs_item_key_to_cpu(child, &child_key, 0);
2106 btrfs_node_key_to_cpu(child, &child_key, 0);
2108 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2111 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2112 parent_key.objectid, parent_key.type, parent_key.offset,
2113 child_key.objectid, child_key.type, child_key.offset);
2115 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2117 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2118 btrfs_node_blockptr(parent, slot),
2119 btrfs_header_bytenr(child));
2121 if (btrfs_node_ptr_generation(parent, slot) !=
2122 btrfs_header_generation(child)) {
2124 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2125 btrfs_header_generation(child),
2126 btrfs_node_ptr_generation(parent, slot));
2132 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2133 * in every fs or file tree check. Here we find its all root ids, and only check
2134 * it in the fs or file tree which has the smallest root id.
2136 static int need_check(struct btrfs_root *root, struct ulist *roots)
2138 struct rb_node *node;
2139 struct ulist_node *u;
2141 if (roots->nnodes == 1)
2144 node = rb_first(&roots->root);
2145 u = rb_entry(node, struct ulist_node, rb_node);
2147 * current root id is not smallest, we skip it and let it be checked
2148 * in the fs or file tree who hash the smallest root id.
2150 if (root->objectid != u->val)
2157 * for a tree node or leaf, we record its reference count, so later if we still
2158 * process this node or leaf, don't need to compute its reference count again.
2160 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2161 struct node_refs *nrefs, u64 level)
2165 struct ulist *roots;
2167 if (nrefs->bytenr[level] != bytenr) {
2168 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2169 level, 1, &refs, NULL);
2173 nrefs->bytenr[level] = bytenr;
2174 nrefs->refs[level] = refs;
2176 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2181 check = need_check(root, roots);
2183 nrefs->need_check[level] = check;
2185 nrefs->need_check[level] = 1;
2192 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2193 struct walk_control *wc, int *level,
2194 struct node_refs *nrefs)
2196 enum btrfs_tree_block_status status;
2199 struct btrfs_fs_info *fs_info = root->fs_info;
2200 struct extent_buffer *next;
2201 struct extent_buffer *cur;
2205 WARN_ON(*level < 0);
2206 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2208 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2209 refs = nrefs->refs[*level];
2212 ret = btrfs_lookup_extent_info(NULL, root,
2213 path->nodes[*level]->start,
2214 *level, 1, &refs, NULL);
2219 nrefs->bytenr[*level] = path->nodes[*level]->start;
2220 nrefs->refs[*level] = refs;
2224 ret = enter_shared_node(root, path->nodes[*level]->start,
2232 while (*level >= 0) {
2233 WARN_ON(*level < 0);
2234 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2235 cur = path->nodes[*level];
2237 if (btrfs_header_level(cur) != *level)
2240 if (path->slots[*level] >= btrfs_header_nritems(cur))
2243 ret = process_one_leaf(root, cur, wc);
2248 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2249 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2251 if (bytenr == nrefs->bytenr[*level - 1]) {
2252 refs = nrefs->refs[*level - 1];
2254 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2255 *level - 1, 1, &refs, NULL);
2259 nrefs->bytenr[*level - 1] = bytenr;
2260 nrefs->refs[*level - 1] = refs;
2265 ret = enter_shared_node(root, bytenr, refs,
2268 path->slots[*level]++;
2273 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2274 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2275 free_extent_buffer(next);
2276 reada_walk_down(root, cur, path->slots[*level]);
2277 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2278 if (!extent_buffer_uptodate(next)) {
2279 struct btrfs_key node_key;
2281 btrfs_node_key_to_cpu(path->nodes[*level],
2283 path->slots[*level]);
2284 btrfs_add_corrupt_extent_record(root->fs_info,
2286 path->nodes[*level]->start,
2287 root->fs_info->nodesize,
2294 ret = check_child_node(cur, path->slots[*level], next);
2296 free_extent_buffer(next);
2301 if (btrfs_is_leaf(next))
2302 status = btrfs_check_leaf(root, NULL, next);
2304 status = btrfs_check_node(root, NULL, next);
2305 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2306 free_extent_buffer(next);
2311 *level = *level - 1;
2312 free_extent_buffer(path->nodes[*level]);
2313 path->nodes[*level] = next;
2314 path->slots[*level] = 0;
2317 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2321 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2322 unsigned int ext_ref);
2325 * Returns >0 Found error, should continue
2326 * Returns <0 Fatal error, must exit the whole check
2327 * Returns 0 No errors found
2329 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2330 int *level, struct node_refs *nrefs, int ext_ref)
2332 enum btrfs_tree_block_status status;
2335 struct btrfs_fs_info *fs_info = root->fs_info;
2336 struct extent_buffer *next;
2337 struct extent_buffer *cur;
2340 WARN_ON(*level < 0);
2341 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2343 ret = update_nodes_refs(root, path->nodes[*level]->start,
2348 while (*level >= 0) {
2349 WARN_ON(*level < 0);
2350 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2351 cur = path->nodes[*level];
2353 if (btrfs_header_level(cur) != *level)
2356 if (path->slots[*level] >= btrfs_header_nritems(cur))
2358 /* Don't forgot to check leaf/node validation */
2360 ret = btrfs_check_leaf(root, NULL, cur);
2361 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2365 ret = process_one_leaf_v2(root, path, nrefs,
2369 ret = btrfs_check_node(root, NULL, cur);
2370 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2375 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2376 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2378 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2381 if (!nrefs->need_check[*level - 1]) {
2382 path->slots[*level]++;
2386 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2387 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2388 free_extent_buffer(next);
2389 reada_walk_down(root, cur, path->slots[*level]);
2390 next = read_tree_block(fs_info, bytenr, ptr_gen);
2391 if (!extent_buffer_uptodate(next)) {
2392 struct btrfs_key node_key;
2394 btrfs_node_key_to_cpu(path->nodes[*level],
2396 path->slots[*level]);
2397 btrfs_add_corrupt_extent_record(fs_info,
2399 path->nodes[*level]->start,
2407 ret = check_child_node(cur, path->slots[*level], next);
2411 if (btrfs_is_leaf(next))
2412 status = btrfs_check_leaf(root, NULL, next);
2414 status = btrfs_check_node(root, NULL, next);
2415 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2416 free_extent_buffer(next);
2421 *level = *level - 1;
2422 free_extent_buffer(path->nodes[*level]);
2423 path->nodes[*level] = next;
2424 path->slots[*level] = 0;
2429 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2430 struct walk_control *wc, int *level)
2433 struct extent_buffer *leaf;
2435 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2436 leaf = path->nodes[i];
2437 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2442 free_extent_buffer(path->nodes[*level]);
2443 path->nodes[*level] = NULL;
2444 BUG_ON(*level > wc->active_node);
2445 if (*level == wc->active_node)
2446 leave_shared_node(root, wc, *level);
2453 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2457 struct extent_buffer *leaf;
2459 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2460 leaf = path->nodes[i];
2461 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2466 free_extent_buffer(path->nodes[*level]);
2467 path->nodes[*level] = NULL;
2474 static int check_root_dir(struct inode_record *rec)
2476 struct inode_backref *backref;
2479 if (!rec->found_inode_item || rec->errors)
2481 if (rec->nlink != 1 || rec->found_link != 0)
2483 if (list_empty(&rec->backrefs))
2485 backref = to_inode_backref(rec->backrefs.next);
2486 if (!backref->found_inode_ref)
2488 if (backref->index != 0 || backref->namelen != 2 ||
2489 memcmp(backref->name, "..", 2))
2491 if (backref->found_dir_index || backref->found_dir_item)
2498 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2499 struct btrfs_root *root, struct btrfs_path *path,
2500 struct inode_record *rec)
2502 struct btrfs_inode_item *ei;
2503 struct btrfs_key key;
2506 key.objectid = rec->ino;
2507 key.type = BTRFS_INODE_ITEM_KEY;
2508 key.offset = (u64)-1;
2510 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2514 if (!path->slots[0]) {
2521 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2522 if (key.objectid != rec->ino) {
2527 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2528 struct btrfs_inode_item);
2529 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2530 btrfs_mark_buffer_dirty(path->nodes[0]);
2531 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2532 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2533 root->root_key.objectid);
2535 btrfs_release_path(path);
2539 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2540 struct btrfs_root *root,
2541 struct btrfs_path *path,
2542 struct inode_record *rec)
2546 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2547 btrfs_release_path(path);
2549 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2553 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2554 struct btrfs_root *root,
2555 struct btrfs_path *path,
2556 struct inode_record *rec)
2558 struct btrfs_inode_item *ei;
2559 struct btrfs_key key;
2562 key.objectid = rec->ino;
2563 key.type = BTRFS_INODE_ITEM_KEY;
2566 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573 /* Since ret == 0, no need to check anything */
2574 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2575 struct btrfs_inode_item);
2576 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2577 btrfs_mark_buffer_dirty(path->nodes[0]);
2578 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2579 printf("reset nbytes for ino %llu root %llu\n",
2580 rec->ino, root->root_key.objectid);
2582 btrfs_release_path(path);
2586 static int add_missing_dir_index(struct btrfs_root *root,
2587 struct cache_tree *inode_cache,
2588 struct inode_record *rec,
2589 struct inode_backref *backref)
2591 struct btrfs_path path;
2592 struct btrfs_trans_handle *trans;
2593 struct btrfs_dir_item *dir_item;
2594 struct extent_buffer *leaf;
2595 struct btrfs_key key;
2596 struct btrfs_disk_key disk_key;
2597 struct inode_record *dir_rec;
2598 unsigned long name_ptr;
2599 u32 data_size = sizeof(*dir_item) + backref->namelen;
2602 trans = btrfs_start_transaction(root, 1);
2604 return PTR_ERR(trans);
2606 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2607 (unsigned long long)rec->ino);
2609 btrfs_init_path(&path);
2610 key.objectid = backref->dir;
2611 key.type = BTRFS_DIR_INDEX_KEY;
2612 key.offset = backref->index;
2613 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2616 leaf = path.nodes[0];
2617 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2619 disk_key.objectid = cpu_to_le64(rec->ino);
2620 disk_key.type = BTRFS_INODE_ITEM_KEY;
2621 disk_key.offset = 0;
2623 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2624 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2625 btrfs_set_dir_data_len(leaf, dir_item, 0);
2626 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2627 name_ptr = (unsigned long)(dir_item + 1);
2628 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2629 btrfs_mark_buffer_dirty(leaf);
2630 btrfs_release_path(&path);
2631 btrfs_commit_transaction(trans, root);
2633 backref->found_dir_index = 1;
2634 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2635 BUG_ON(IS_ERR(dir_rec));
2638 dir_rec->found_size += backref->namelen;
2639 if (dir_rec->found_size == dir_rec->isize &&
2640 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2641 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2642 if (dir_rec->found_size != dir_rec->isize)
2643 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2648 static int delete_dir_index(struct btrfs_root *root,
2649 struct inode_backref *backref)
2651 struct btrfs_trans_handle *trans;
2652 struct btrfs_dir_item *di;
2653 struct btrfs_path path;
2656 trans = btrfs_start_transaction(root, 1);
2658 return PTR_ERR(trans);
2660 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2661 (unsigned long long)backref->dir,
2662 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2663 (unsigned long long)root->objectid);
2665 btrfs_init_path(&path);
2666 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2667 backref->name, backref->namelen,
2668 backref->index, -1);
2671 btrfs_release_path(&path);
2672 btrfs_commit_transaction(trans, root);
2679 ret = btrfs_del_item(trans, root, &path);
2681 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2683 btrfs_release_path(&path);
2684 btrfs_commit_transaction(trans, root);
2688 static int create_inode_item(struct btrfs_root *root,
2689 struct inode_record *rec,
2692 struct btrfs_trans_handle *trans;
2693 struct btrfs_inode_item inode_item;
2694 time_t now = time(NULL);
2697 trans = btrfs_start_transaction(root, 1);
2698 if (IS_ERR(trans)) {
2699 ret = PTR_ERR(trans);
2703 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2704 "be incomplete, please check permissions and content after "
2705 "the fsck completes.\n", (unsigned long long)root->objectid,
2706 (unsigned long long)rec->ino);
2708 memset(&inode_item, 0, sizeof(inode_item));
2709 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2711 btrfs_set_stack_inode_nlink(&inode_item, 1);
2713 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2714 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2715 if (rec->found_dir_item) {
2716 if (rec->found_file_extent)
2717 fprintf(stderr, "root %llu inode %llu has both a dir "
2718 "item and extents, unsure if it is a dir or a "
2719 "regular file so setting it as a directory\n",
2720 (unsigned long long)root->objectid,
2721 (unsigned long long)rec->ino);
2722 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2723 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2724 } else if (!rec->found_dir_item) {
2725 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2726 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2728 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2729 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2730 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2731 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2732 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2733 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2734 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2735 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2737 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2739 btrfs_commit_transaction(trans, root);
2743 static int repair_inode_backrefs(struct btrfs_root *root,
2744 struct inode_record *rec,
2745 struct cache_tree *inode_cache,
2748 struct inode_backref *tmp, *backref;
2749 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2753 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2754 if (!delete && rec->ino == root_dirid) {
2755 if (!rec->found_inode_item) {
2756 ret = create_inode_item(root, rec, 1);
2763 /* Index 0 for root dir's are special, don't mess with it */
2764 if (rec->ino == root_dirid && backref->index == 0)
2768 ((backref->found_dir_index && !backref->found_inode_ref) ||
2769 (backref->found_dir_index && backref->found_inode_ref &&
2770 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2771 ret = delete_dir_index(root, backref);
2775 list_del(&backref->list);
2780 if (!delete && !backref->found_dir_index &&
2781 backref->found_dir_item && backref->found_inode_ref) {
2782 ret = add_missing_dir_index(root, inode_cache, rec,
2787 if (backref->found_dir_item &&
2788 backref->found_dir_index) {
2789 if (!backref->errors &&
2790 backref->found_inode_ref) {
2791 list_del(&backref->list);
2798 if (!delete && (!backref->found_dir_index &&
2799 !backref->found_dir_item &&
2800 backref->found_inode_ref)) {
2801 struct btrfs_trans_handle *trans;
2802 struct btrfs_key location;
2804 ret = check_dir_conflict(root, backref->name,
2810 * let nlink fixing routine to handle it,
2811 * which can do it better.
2816 location.objectid = rec->ino;
2817 location.type = BTRFS_INODE_ITEM_KEY;
2818 location.offset = 0;
2820 trans = btrfs_start_transaction(root, 1);
2821 if (IS_ERR(trans)) {
2822 ret = PTR_ERR(trans);
2825 fprintf(stderr, "adding missing dir index/item pair "
2827 (unsigned long long)rec->ino);
2828 ret = btrfs_insert_dir_item(trans, root, backref->name,
2830 backref->dir, &location,
2831 imode_to_type(rec->imode),
2834 btrfs_commit_transaction(trans, root);
2838 if (!delete && (backref->found_inode_ref &&
2839 backref->found_dir_index &&
2840 backref->found_dir_item &&
2841 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2842 !rec->found_inode_item)) {
2843 ret = create_inode_item(root, rec, 0);
2850 return ret ? ret : repaired;
2854 * To determine the file type for nlink/inode_item repair
2856 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2857 * Return -ENOENT if file type is not found.
2859 static int find_file_type(struct inode_record *rec, u8 *type)
2861 struct inode_backref *backref;
2863 /* For inode item recovered case */
2864 if (rec->found_inode_item) {
2865 *type = imode_to_type(rec->imode);
2869 list_for_each_entry(backref, &rec->backrefs, list) {
2870 if (backref->found_dir_index || backref->found_dir_item) {
2871 *type = backref->filetype;
2879 * To determine the file name for nlink repair
2881 * Return 0 if file name is found, set name and namelen.
2882 * Return -ENOENT if file name is not found.
2884 static int find_file_name(struct inode_record *rec,
2885 char *name, int *namelen)
2887 struct inode_backref *backref;
2889 list_for_each_entry(backref, &rec->backrefs, list) {
2890 if (backref->found_dir_index || backref->found_dir_item ||
2891 backref->found_inode_ref) {
2892 memcpy(name, backref->name, backref->namelen);
2893 *namelen = backref->namelen;
2900 /* Reset the nlink of the inode to the correct one */
2901 static int reset_nlink(struct btrfs_trans_handle *trans,
2902 struct btrfs_root *root,
2903 struct btrfs_path *path,
2904 struct inode_record *rec)
2906 struct inode_backref *backref;
2907 struct inode_backref *tmp;
2908 struct btrfs_key key;
2909 struct btrfs_inode_item *inode_item;
2912 /* We don't believe this either, reset it and iterate backref */
2913 rec->found_link = 0;
2915 /* Remove all backref including the valid ones */
2916 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2917 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2918 backref->index, backref->name,
2919 backref->namelen, 0);
2923 /* remove invalid backref, so it won't be added back */
2924 if (!(backref->found_dir_index &&
2925 backref->found_dir_item &&
2926 backref->found_inode_ref)) {
2927 list_del(&backref->list);
2934 /* Set nlink to 0 */
2935 key.objectid = rec->ino;
2936 key.type = BTRFS_INODE_ITEM_KEY;
2938 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2945 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2946 struct btrfs_inode_item);
2947 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2948 btrfs_mark_buffer_dirty(path->nodes[0]);
2949 btrfs_release_path(path);
2952 * Add back valid inode_ref/dir_item/dir_index,
2953 * add_link() will handle the nlink inc, so new nlink must be correct
2955 list_for_each_entry(backref, &rec->backrefs, list) {
2956 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2957 backref->name, backref->namelen,
2958 backref->filetype, &backref->index, 1);
2963 btrfs_release_path(path);
2967 static int get_highest_inode(struct btrfs_trans_handle *trans,
2968 struct btrfs_root *root,
2969 struct btrfs_path *path,
2972 struct btrfs_key key, found_key;
2975 btrfs_init_path(path);
2976 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2978 key.type = BTRFS_INODE_ITEM_KEY;
2979 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2981 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2982 path->slots[0] - 1);
2983 *highest_ino = found_key.objectid;
2986 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2988 btrfs_release_path(path);
2992 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2993 struct btrfs_root *root,
2994 struct btrfs_path *path,
2995 struct inode_record *rec)
2997 char *dir_name = "lost+found";
2998 char namebuf[BTRFS_NAME_LEN] = {0};
3003 int name_recovered = 0;
3004 int type_recovered = 0;
3008 * Get file name and type first before these invalid inode ref
3009 * are deleted by remove_all_invalid_backref()
3011 name_recovered = !find_file_name(rec, namebuf, &namelen);
3012 type_recovered = !find_file_type(rec, &type);
3014 if (!name_recovered) {
3015 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3016 rec->ino, rec->ino);
3017 namelen = count_digits(rec->ino);
3018 sprintf(namebuf, "%llu", rec->ino);
3021 if (!type_recovered) {
3022 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3024 type = BTRFS_FT_REG_FILE;
3028 ret = reset_nlink(trans, root, path, rec);
3031 "Failed to reset nlink for inode %llu: %s\n",
3032 rec->ino, strerror(-ret));
3036 if (rec->found_link == 0) {
3037 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3041 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3042 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3045 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3046 dir_name, strerror(-ret));
3049 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3050 namebuf, namelen, type, NULL, 1);
3052 * Add ".INO" suffix several times to handle case where
3053 * "FILENAME.INO" is already taken by another file.
3055 while (ret == -EEXIST) {
3057 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3059 if (namelen + count_digits(rec->ino) + 1 >
3064 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3066 namelen += count_digits(rec->ino) + 1;
3067 ret = btrfs_add_link(trans, root, rec->ino,
3068 lost_found_ino, namebuf,
3069 namelen, type, NULL, 1);
3073 "Failed to link the inode %llu to %s dir: %s\n",
3074 rec->ino, dir_name, strerror(-ret));
3078 * Just increase the found_link, don't actually add the
3079 * backref. This will make things easier and this inode
3080 * record will be freed after the repair is done.
3081 * So fsck will not report problem about this inode.
3084 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3085 namelen, namebuf, dir_name);
3087 printf("Fixed the nlink of inode %llu\n", rec->ino);
3090 * Clear the flag anyway, or we will loop forever for the same inode
3091 * as it will not be removed from the bad inode list and the dead loop
3094 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3095 btrfs_release_path(path);
3100 * Check if there is any normal(reg or prealloc) file extent for given
3102 * This is used to determine the file type when neither its dir_index/item or
3103 * inode_item exists.
3105 * This will *NOT* report error, if any error happens, just consider it does
3106 * not have any normal file extent.
3108 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3110 struct btrfs_path path;
3111 struct btrfs_key key;
3112 struct btrfs_key found_key;
3113 struct btrfs_file_extent_item *fi;
3117 btrfs_init_path(&path);
3119 key.type = BTRFS_EXTENT_DATA_KEY;
3122 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3127 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3128 ret = btrfs_next_leaf(root, &path);
3135 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3137 if (found_key.objectid != ino ||
3138 found_key.type != BTRFS_EXTENT_DATA_KEY)
3140 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3141 struct btrfs_file_extent_item);
3142 type = btrfs_file_extent_type(path.nodes[0], fi);
3143 if (type != BTRFS_FILE_EXTENT_INLINE) {
3149 btrfs_release_path(&path);
3153 static u32 btrfs_type_to_imode(u8 type)
3155 static u32 imode_by_btrfs_type[] = {
3156 [BTRFS_FT_REG_FILE] = S_IFREG,
3157 [BTRFS_FT_DIR] = S_IFDIR,
3158 [BTRFS_FT_CHRDEV] = S_IFCHR,
3159 [BTRFS_FT_BLKDEV] = S_IFBLK,
3160 [BTRFS_FT_FIFO] = S_IFIFO,
3161 [BTRFS_FT_SOCK] = S_IFSOCK,
3162 [BTRFS_FT_SYMLINK] = S_IFLNK,
3165 return imode_by_btrfs_type[(type)];
3168 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3169 struct btrfs_root *root,
3170 struct btrfs_path *path,
3171 struct inode_record *rec)
3175 int type_recovered = 0;
3178 printf("Trying to rebuild inode:%llu\n", rec->ino);
3180 type_recovered = !find_file_type(rec, &filetype);
3183 * Try to determine inode type if type not found.
3185 * For found regular file extent, it must be FILE.
3186 * For found dir_item/index, it must be DIR.
3188 * For undetermined one, use FILE as fallback.
3191 * 1. If found backref(inode_index/item is already handled) to it,
3193 * Need new inode-inode ref structure to allow search for that.
3195 if (!type_recovered) {
3196 if (rec->found_file_extent &&
3197 find_normal_file_extent(root, rec->ino)) {
3199 filetype = BTRFS_FT_REG_FILE;
3200 } else if (rec->found_dir_item) {
3202 filetype = BTRFS_FT_DIR;
3203 } else if (!list_empty(&rec->orphan_extents)) {
3205 filetype = BTRFS_FT_REG_FILE;
3207 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3210 filetype = BTRFS_FT_REG_FILE;
3214 ret = btrfs_new_inode(trans, root, rec->ino,
3215 mode | btrfs_type_to_imode(filetype));
3220 * Here inode rebuild is done, we only rebuild the inode item,
3221 * don't repair the nlink(like move to lost+found).
3222 * That is the job of nlink repair.
3224 * We just fill the record and return
3226 rec->found_dir_item = 1;
3227 rec->imode = mode | btrfs_type_to_imode(filetype);
3229 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3230 /* Ensure the inode_nlinks repair function will be called */
3231 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3236 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3237 struct btrfs_root *root,
3238 struct btrfs_path *path,
3239 struct inode_record *rec)
3241 struct orphan_data_extent *orphan;
3242 struct orphan_data_extent *tmp;
3245 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3247 * Check for conflicting file extents
3249 * Here we don't know whether the extents is compressed or not,
3250 * so we can only assume it not compressed nor data offset,
3251 * and use its disk_len as extent length.
3253 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3254 orphan->offset, orphan->disk_len, 0);
3255 btrfs_release_path(path);
3260 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3261 orphan->disk_bytenr, orphan->disk_len);
3262 ret = btrfs_free_extent(trans,
3263 root->fs_info->extent_root,
3264 orphan->disk_bytenr, orphan->disk_len,
3265 0, root->objectid, orphan->objectid,
3270 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3271 orphan->offset, orphan->disk_bytenr,
3272 orphan->disk_len, orphan->disk_len);
3276 /* Update file size info */
3277 rec->found_size += orphan->disk_len;
3278 if (rec->found_size == rec->nbytes)
3279 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3281 /* Update the file extent hole info too */
3282 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3286 if (RB_EMPTY_ROOT(&rec->holes))
3287 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3289 list_del(&orphan->list);
3292 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3297 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3298 struct btrfs_root *root,
3299 struct btrfs_path *path,
3300 struct inode_record *rec)
3302 struct rb_node *node;
3303 struct file_extent_hole *hole;
3307 node = rb_first(&rec->holes);
3311 hole = rb_entry(node, struct file_extent_hole, node);
3312 ret = btrfs_punch_hole(trans, root, rec->ino,
3313 hole->start, hole->len);
3316 ret = del_file_extent_hole(&rec->holes, hole->start,
3320 if (RB_EMPTY_ROOT(&rec->holes))
3321 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3322 node = rb_first(&rec->holes);
3324 /* special case for a file losing all its file extent */
3326 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3327 round_up(rec->isize,
3328 root->fs_info->sectorsize));
3332 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3333 rec->ino, root->objectid);
3338 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3340 struct btrfs_trans_handle *trans;
3341 struct btrfs_path path;
3344 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3345 I_ERR_NO_ORPHAN_ITEM |
3346 I_ERR_LINK_COUNT_WRONG |
3347 I_ERR_NO_INODE_ITEM |
3348 I_ERR_FILE_EXTENT_ORPHAN |
3349 I_ERR_FILE_EXTENT_DISCOUNT|
3350 I_ERR_FILE_NBYTES_WRONG)))
3354 * For nlink repair, it may create a dir and add link, so
3355 * 2 for parent(256)'s dir_index and dir_item
3356 * 2 for lost+found dir's inode_item and inode_ref
3357 * 1 for the new inode_ref of the file
3358 * 2 for lost+found dir's dir_index and dir_item for the file
3360 trans = btrfs_start_transaction(root, 7);
3362 return PTR_ERR(trans);
3364 btrfs_init_path(&path);
3365 if (rec->errors & I_ERR_NO_INODE_ITEM)
3366 ret = repair_inode_no_item(trans, root, &path, rec);
3367 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3368 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3369 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3370 ret = repair_inode_discount_extent(trans, root, &path, rec);
3371 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3372 ret = repair_inode_isize(trans, root, &path, rec);
3373 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3374 ret = repair_inode_orphan_item(trans, root, &path, rec);
3375 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3376 ret = repair_inode_nlinks(trans, root, &path, rec);
3377 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3378 ret = repair_inode_nbytes(trans, root, &path, rec);
3379 btrfs_commit_transaction(trans, root);
3380 btrfs_release_path(&path);
3384 static int check_inode_recs(struct btrfs_root *root,
3385 struct cache_tree *inode_cache)
3387 struct cache_extent *cache;
3388 struct ptr_node *node;
3389 struct inode_record *rec;
3390 struct inode_backref *backref;
3395 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3397 if (btrfs_root_refs(&root->root_item) == 0) {
3398 if (!cache_tree_empty(inode_cache))
3399 fprintf(stderr, "warning line %d\n", __LINE__);
3404 * We need to repair backrefs first because we could change some of the
3405 * errors in the inode recs.
3407 * We also need to go through and delete invalid backrefs first and then
3408 * add the correct ones second. We do this because we may get EEXIST
3409 * when adding back the correct index because we hadn't yet deleted the
3412 * For example, if we were missing a dir index then the directories
3413 * isize would be wrong, so if we fixed the isize to what we thought it
3414 * would be and then fixed the backref we'd still have a invalid fs, so
3415 * we need to add back the dir index and then check to see if the isize
3420 if (stage == 3 && !err)
3423 cache = search_cache_extent(inode_cache, 0);
3424 while (repair && cache) {
3425 node = container_of(cache, struct ptr_node, cache);
3427 cache = next_cache_extent(cache);
3429 /* Need to free everything up and rescan */
3431 remove_cache_extent(inode_cache, &node->cache);
3433 free_inode_rec(rec);
3437 if (list_empty(&rec->backrefs))
3440 ret = repair_inode_backrefs(root, rec, inode_cache,
3454 rec = get_inode_rec(inode_cache, root_dirid, 0);
3455 BUG_ON(IS_ERR(rec));
3457 ret = check_root_dir(rec);
3459 fprintf(stderr, "root %llu root dir %llu error\n",
3460 (unsigned long long)root->root_key.objectid,
3461 (unsigned long long)root_dirid);
3462 print_inode_error(root, rec);
3467 struct btrfs_trans_handle *trans;
3469 trans = btrfs_start_transaction(root, 1);
3470 if (IS_ERR(trans)) {
3471 err = PTR_ERR(trans);
3476 "root %llu missing its root dir, recreating\n",
3477 (unsigned long long)root->objectid);
3479 ret = btrfs_make_root_dir(trans, root, root_dirid);
3482 btrfs_commit_transaction(trans, root);
3486 fprintf(stderr, "root %llu root dir %llu not found\n",
3487 (unsigned long long)root->root_key.objectid,
3488 (unsigned long long)root_dirid);
3492 cache = search_cache_extent(inode_cache, 0);
3495 node = container_of(cache, struct ptr_node, cache);
3497 remove_cache_extent(inode_cache, &node->cache);
3499 if (rec->ino == root_dirid ||
3500 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3501 free_inode_rec(rec);
3505 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3506 ret = check_orphan_item(root, rec->ino);
3508 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3509 if (can_free_inode_rec(rec)) {
3510 free_inode_rec(rec);
3515 if (!rec->found_inode_item)
3516 rec->errors |= I_ERR_NO_INODE_ITEM;
3517 if (rec->found_link != rec->nlink)
3518 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3520 ret = try_repair_inode(root, rec);
3521 if (ret == 0 && can_free_inode_rec(rec)) {
3522 free_inode_rec(rec);
3528 if (!(repair && ret == 0))
3530 print_inode_error(root, rec);
3531 list_for_each_entry(backref, &rec->backrefs, list) {
3532 if (!backref->found_dir_item)
3533 backref->errors |= REF_ERR_NO_DIR_ITEM;
3534 if (!backref->found_dir_index)
3535 backref->errors |= REF_ERR_NO_DIR_INDEX;
3536 if (!backref->found_inode_ref)
3537 backref->errors |= REF_ERR_NO_INODE_REF;
3538 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3539 " namelen %u name %s filetype %d errors %x",
3540 (unsigned long long)backref->dir,
3541 (unsigned long long)backref->index,
3542 backref->namelen, backref->name,
3543 backref->filetype, backref->errors);
3544 print_ref_error(backref->errors);
3546 free_inode_rec(rec);
3548 return (error > 0) ? -1 : 0;
3551 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3554 struct cache_extent *cache;
3555 struct root_record *rec = NULL;
3558 cache = lookup_cache_extent(root_cache, objectid, 1);
3560 rec = container_of(cache, struct root_record, cache);
3562 rec = calloc(1, sizeof(*rec));
3564 return ERR_PTR(-ENOMEM);
3565 rec->objectid = objectid;
3566 INIT_LIST_HEAD(&rec->backrefs);
3567 rec->cache.start = objectid;
3568 rec->cache.size = 1;
3570 ret = insert_cache_extent(root_cache, &rec->cache);
3572 return ERR_PTR(-EEXIST);
3577 static struct root_backref *get_root_backref(struct root_record *rec,
3578 u64 ref_root, u64 dir, u64 index,
3579 const char *name, int namelen)
3581 struct root_backref *backref;
3583 list_for_each_entry(backref, &rec->backrefs, list) {
3584 if (backref->ref_root != ref_root || backref->dir != dir ||
3585 backref->namelen != namelen)
3587 if (memcmp(name, backref->name, namelen))
3592 backref = calloc(1, sizeof(*backref) + namelen + 1);
3595 backref->ref_root = ref_root;
3597 backref->index = index;
3598 backref->namelen = namelen;
3599 memcpy(backref->name, name, namelen);
3600 backref->name[namelen] = '\0';
3601 list_add_tail(&backref->list, &rec->backrefs);
3605 static void free_root_record(struct cache_extent *cache)
3607 struct root_record *rec;
3608 struct root_backref *backref;
3610 rec = container_of(cache, struct root_record, cache);
3611 while (!list_empty(&rec->backrefs)) {
3612 backref = to_root_backref(rec->backrefs.next);
3613 list_del(&backref->list);
3620 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3622 static int add_root_backref(struct cache_tree *root_cache,
3623 u64 root_id, u64 ref_root, u64 dir, u64 index,
3624 const char *name, int namelen,
3625 int item_type, int errors)
3627 struct root_record *rec;
3628 struct root_backref *backref;
3630 rec = get_root_rec(root_cache, root_id);
3631 BUG_ON(IS_ERR(rec));
3632 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3635 backref->errors |= errors;
3637 if (item_type != BTRFS_DIR_ITEM_KEY) {
3638 if (backref->found_dir_index || backref->found_back_ref ||
3639 backref->found_forward_ref) {
3640 if (backref->index != index)
3641 backref->errors |= REF_ERR_INDEX_UNMATCH;
3643 backref->index = index;
3647 if (item_type == BTRFS_DIR_ITEM_KEY) {
3648 if (backref->found_forward_ref)
3650 backref->found_dir_item = 1;
3651 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3652 backref->found_dir_index = 1;
3653 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3654 if (backref->found_forward_ref)
3655 backref->errors |= REF_ERR_DUP_ROOT_REF;
3656 else if (backref->found_dir_item)
3658 backref->found_forward_ref = 1;
3659 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3660 if (backref->found_back_ref)
3661 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3662 backref->found_back_ref = 1;
3667 if (backref->found_forward_ref && backref->found_dir_item)
3668 backref->reachable = 1;
3672 static int merge_root_recs(struct btrfs_root *root,
3673 struct cache_tree *src_cache,
3674 struct cache_tree *dst_cache)
3676 struct cache_extent *cache;
3677 struct ptr_node *node;
3678 struct inode_record *rec;
3679 struct inode_backref *backref;
3682 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3683 free_inode_recs_tree(src_cache);
3688 cache = search_cache_extent(src_cache, 0);
3691 node = container_of(cache, struct ptr_node, cache);
3693 remove_cache_extent(src_cache, &node->cache);
3696 ret = is_child_root(root, root->objectid, rec->ino);
3702 list_for_each_entry(backref, &rec->backrefs, list) {
3703 BUG_ON(backref->found_inode_ref);
3704 if (backref->found_dir_item)
3705 add_root_backref(dst_cache, rec->ino,
3706 root->root_key.objectid, backref->dir,
3707 backref->index, backref->name,
3708 backref->namelen, BTRFS_DIR_ITEM_KEY,
3710 if (backref->found_dir_index)
3711 add_root_backref(dst_cache, rec->ino,
3712 root->root_key.objectid, backref->dir,
3713 backref->index, backref->name,
3714 backref->namelen, BTRFS_DIR_INDEX_KEY,
3718 free_inode_rec(rec);
3725 static int check_root_refs(struct btrfs_root *root,
3726 struct cache_tree *root_cache)
3728 struct root_record *rec;
3729 struct root_record *ref_root;
3730 struct root_backref *backref;
3731 struct cache_extent *cache;
3737 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3738 BUG_ON(IS_ERR(rec));
3741 /* fixme: this can not detect circular references */
3744 cache = search_cache_extent(root_cache, 0);
3748 rec = container_of(cache, struct root_record, cache);
3749 cache = next_cache_extent(cache);
3751 if (rec->found_ref == 0)
3754 list_for_each_entry(backref, &rec->backrefs, list) {
3755 if (!backref->reachable)
3758 ref_root = get_root_rec(root_cache,
3760 BUG_ON(IS_ERR(ref_root));
3761 if (ref_root->found_ref > 0)
3764 backref->reachable = 0;
3766 if (rec->found_ref == 0)
3772 cache = search_cache_extent(root_cache, 0);
3776 rec = container_of(cache, struct root_record, cache);
3777 cache = next_cache_extent(cache);
3779 if (rec->found_ref == 0 &&
3780 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3781 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3782 ret = check_orphan_item(root->fs_info->tree_root,
3788 * If we don't have a root item then we likely just have
3789 * a dir item in a snapshot for this root but no actual
3790 * ref key or anything so it's meaningless.
3792 if (!rec->found_root_item)
3795 fprintf(stderr, "fs tree %llu not referenced\n",
3796 (unsigned long long)rec->objectid);
3800 if (rec->found_ref > 0 && !rec->found_root_item)
3802 list_for_each_entry(backref, &rec->backrefs, list) {
3803 if (!backref->found_dir_item)
3804 backref->errors |= REF_ERR_NO_DIR_ITEM;
3805 if (!backref->found_dir_index)
3806 backref->errors |= REF_ERR_NO_DIR_INDEX;
3807 if (!backref->found_back_ref)
3808 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3809 if (!backref->found_forward_ref)
3810 backref->errors |= REF_ERR_NO_ROOT_REF;
3811 if (backref->reachable && backref->errors)
3818 fprintf(stderr, "fs tree %llu refs %u %s\n",
3819 (unsigned long long)rec->objectid, rec->found_ref,
3820 rec->found_root_item ? "" : "not found");
3822 list_for_each_entry(backref, &rec->backrefs, list) {
3823 if (!backref->reachable)
3825 if (!backref->errors && rec->found_root_item)
3827 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3828 " index %llu namelen %u name %s errors %x\n",
3829 (unsigned long long)backref->ref_root,
3830 (unsigned long long)backref->dir,
3831 (unsigned long long)backref->index,
3832 backref->namelen, backref->name,
3834 print_ref_error(backref->errors);
3837 return errors > 0 ? 1 : 0;
3840 static int process_root_ref(struct extent_buffer *eb, int slot,
3841 struct btrfs_key *key,
3842 struct cache_tree *root_cache)
3848 struct btrfs_root_ref *ref;
3849 char namebuf[BTRFS_NAME_LEN];
3852 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3854 dirid = btrfs_root_ref_dirid(eb, ref);
3855 index = btrfs_root_ref_sequence(eb, ref);
3856 name_len = btrfs_root_ref_name_len(eb, ref);
3858 if (name_len <= BTRFS_NAME_LEN) {
3862 len = BTRFS_NAME_LEN;
3863 error = REF_ERR_NAME_TOO_LONG;
3865 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3867 if (key->type == BTRFS_ROOT_REF_KEY) {
3868 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3869 index, namebuf, len, key->type, error);
3871 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3872 index, namebuf, len, key->type, error);
3877 static void free_corrupt_block(struct cache_extent *cache)
3879 struct btrfs_corrupt_block *corrupt;
3881 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3885 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3888 * Repair the btree of the given root.
3890 * The fix is to remove the node key in corrupt_blocks cache_tree.
3891 * and rebalance the tree.
3892 * After the fix, the btree should be writeable.
3894 static int repair_btree(struct btrfs_root *root,
3895 struct cache_tree *corrupt_blocks)
3897 struct btrfs_trans_handle *trans;
3898 struct btrfs_path path;
3899 struct btrfs_corrupt_block *corrupt;
3900 struct cache_extent *cache;
3901 struct btrfs_key key;
3906 if (cache_tree_empty(corrupt_blocks))
3909 trans = btrfs_start_transaction(root, 1);
3910 if (IS_ERR(trans)) {
3911 ret = PTR_ERR(trans);
3912 fprintf(stderr, "Error starting transaction: %s\n",
3916 btrfs_init_path(&path);
3917 cache = first_cache_extent(corrupt_blocks);
3919 corrupt = container_of(cache, struct btrfs_corrupt_block,
3921 level = corrupt->level;
3922 path.lowest_level = level;
3923 key.objectid = corrupt->key.objectid;
3924 key.type = corrupt->key.type;
3925 key.offset = corrupt->key.offset;
3928 * Here we don't want to do any tree balance, since it may
3929 * cause a balance with corrupted brother leaf/node,
3930 * so ins_len set to 0 here.
3931 * Balance will be done after all corrupt node/leaf is deleted.
3933 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3936 offset = btrfs_node_blockptr(path.nodes[level],
3939 /* Remove the ptr */
3940 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3944 * Remove the corresponding extent
3945 * return value is not concerned.
3947 btrfs_release_path(&path);
3948 ret = btrfs_free_extent(trans, root, offset,
3949 root->fs_info->nodesize, 0,
3950 root->root_key.objectid, level - 1, 0);
3951 cache = next_cache_extent(cache);
3954 /* Balance the btree using btrfs_search_slot() */
3955 cache = first_cache_extent(corrupt_blocks);
3957 corrupt = container_of(cache, struct btrfs_corrupt_block,
3959 memcpy(&key, &corrupt->key, sizeof(key));
3960 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3963 /* return will always >0 since it won't find the item */
3965 btrfs_release_path(&path);
3966 cache = next_cache_extent(cache);
3969 btrfs_commit_transaction(trans, root);
3970 btrfs_release_path(&path);
3974 static int check_fs_root(struct btrfs_root *root,
3975 struct cache_tree *root_cache,
3976 struct walk_control *wc)
3982 struct btrfs_path path;
3983 struct shared_node root_node;
3984 struct root_record *rec;
3985 struct btrfs_root_item *root_item = &root->root_item;
3986 struct cache_tree corrupt_blocks;
3987 struct orphan_data_extent *orphan;
3988 struct orphan_data_extent *tmp;
3989 enum btrfs_tree_block_status status;
3990 struct node_refs nrefs;
3993 * Reuse the corrupt_block cache tree to record corrupted tree block
3995 * Unlike the usage in extent tree check, here we do it in a per
3996 * fs/subvol tree base.
3998 cache_tree_init(&corrupt_blocks);
3999 root->fs_info->corrupt_blocks = &corrupt_blocks;
4001 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4002 rec = get_root_rec(root_cache, root->root_key.objectid);
4003 BUG_ON(IS_ERR(rec));
4004 if (btrfs_root_refs(root_item) > 0)
4005 rec->found_root_item = 1;
4008 btrfs_init_path(&path);
4009 memset(&root_node, 0, sizeof(root_node));
4010 cache_tree_init(&root_node.root_cache);
4011 cache_tree_init(&root_node.inode_cache);
4012 memset(&nrefs, 0, sizeof(nrefs));
4014 /* Move the orphan extent record to corresponding inode_record */
4015 list_for_each_entry_safe(orphan, tmp,
4016 &root->orphan_data_extents, list) {
4017 struct inode_record *inode;
4019 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4021 BUG_ON(IS_ERR(inode));
4022 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4023 list_move(&orphan->list, &inode->orphan_extents);
4026 level = btrfs_header_level(root->node);
4027 memset(wc->nodes, 0, sizeof(wc->nodes));
4028 wc->nodes[level] = &root_node;
4029 wc->active_node = level;
4030 wc->root_level = level;
4032 /* We may not have checked the root block, lets do that now */
4033 if (btrfs_is_leaf(root->node))
4034 status = btrfs_check_leaf(root, NULL, root->node);
4036 status = btrfs_check_node(root, NULL, root->node);
4037 if (status != BTRFS_TREE_BLOCK_CLEAN)
4040 if (btrfs_root_refs(root_item) > 0 ||
4041 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4042 path.nodes[level] = root->node;
4043 extent_buffer_get(root->node);
4044 path.slots[level] = 0;
4046 struct btrfs_key key;
4047 struct btrfs_disk_key found_key;
4049 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4050 level = root_item->drop_level;
4051 path.lowest_level = level;
4052 if (level > btrfs_header_level(root->node) ||
4053 level >= BTRFS_MAX_LEVEL) {
4054 error("ignoring invalid drop level: %u", level);
4057 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4060 btrfs_node_key(path.nodes[level], &found_key,
4062 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4063 sizeof(found_key)));
4067 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4073 wret = walk_up_tree(root, &path, wc, &level);
4080 btrfs_release_path(&path);
4082 if (!cache_tree_empty(&corrupt_blocks)) {
4083 struct cache_extent *cache;
4084 struct btrfs_corrupt_block *corrupt;
4086 printf("The following tree block(s) is corrupted in tree %llu:\n",
4087 root->root_key.objectid);
4088 cache = first_cache_extent(&corrupt_blocks);
4090 corrupt = container_of(cache,
4091 struct btrfs_corrupt_block,
4093 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4094 cache->start, corrupt->level,
4095 corrupt->key.objectid, corrupt->key.type,
4096 corrupt->key.offset);
4097 cache = next_cache_extent(cache);
4100 printf("Try to repair the btree for root %llu\n",
4101 root->root_key.objectid);
4102 ret = repair_btree(root, &corrupt_blocks);
4104 fprintf(stderr, "Failed to repair btree: %s\n",
4107 printf("Btree for root %llu is fixed\n",
4108 root->root_key.objectid);
4112 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4116 if (root_node.current) {
4117 root_node.current->checked = 1;
4118 maybe_free_inode_rec(&root_node.inode_cache,
4122 err = check_inode_recs(root, &root_node.inode_cache);
4126 free_corrupt_blocks_tree(&corrupt_blocks);
4127 root->fs_info->corrupt_blocks = NULL;
4128 free_orphan_data_extents(&root->orphan_data_extents);
4132 static int fs_root_objectid(u64 objectid)
4134 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4135 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4137 return is_fstree(objectid);
4140 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4141 struct cache_tree *root_cache)
4143 struct btrfs_path path;
4144 struct btrfs_key key;
4145 struct walk_control wc;
4146 struct extent_buffer *leaf, *tree_node;
4147 struct btrfs_root *tmp_root;
4148 struct btrfs_root *tree_root = fs_info->tree_root;
4152 if (ctx.progress_enabled) {
4153 ctx.tp = TASK_FS_ROOTS;
4154 task_start(ctx.info);
4158 * Just in case we made any changes to the extent tree that weren't
4159 * reflected into the free space cache yet.
4162 reset_cached_block_groups(fs_info);
4163 memset(&wc, 0, sizeof(wc));
4164 cache_tree_init(&wc.shared);
4165 btrfs_init_path(&path);
4170 key.type = BTRFS_ROOT_ITEM_KEY;
4171 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4176 tree_node = tree_root->node;
4178 if (tree_node != tree_root->node) {
4179 free_root_recs_tree(root_cache);
4180 btrfs_release_path(&path);
4183 leaf = path.nodes[0];
4184 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4185 ret = btrfs_next_leaf(tree_root, &path);
4191 leaf = path.nodes[0];
4193 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4194 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4195 fs_root_objectid(key.objectid)) {
4196 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4197 tmp_root = btrfs_read_fs_root_no_cache(
4200 key.offset = (u64)-1;
4201 tmp_root = btrfs_read_fs_root(
4204 if (IS_ERR(tmp_root)) {
4208 ret = check_fs_root(tmp_root, root_cache, &wc);
4209 if (ret == -EAGAIN) {
4210 free_root_recs_tree(root_cache);
4211 btrfs_release_path(&path);
4216 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4217 btrfs_free_fs_root(tmp_root);
4218 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4219 key.type == BTRFS_ROOT_BACKREF_KEY) {
4220 process_root_ref(leaf, path.slots[0], &key,
4227 btrfs_release_path(&path);
4229 free_extent_cache_tree(&wc.shared);
4230 if (!cache_tree_empty(&wc.shared))
4231 fprintf(stderr, "warning line %d\n", __LINE__);
4233 task_stop(ctx.info);
4239 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4240 * INODE_REF/INODE_EXTREF match.
4242 * @root: the root of the fs/file tree
4243 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4244 * @key: the key of the DIR_ITEM/DIR_INDEX
4245 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4246 * distinguish root_dir between normal dir/file
4247 * @name: the name in the INODE_REF/INODE_EXTREF
4248 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4249 * @mode: the st_mode of INODE_ITEM
4251 * Return 0 if no error occurred.
4252 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4253 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4255 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4256 * not match for normal dir/file.
4258 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4259 struct btrfs_key *key, u64 index, char *name,
4260 u32 namelen, u32 mode)
4262 struct btrfs_path path;
4263 struct extent_buffer *node;
4264 struct btrfs_dir_item *di;
4265 struct btrfs_key location;
4266 char namebuf[BTRFS_NAME_LEN] = {0};
4276 btrfs_init_path(&path);
4277 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4279 ret = DIR_ITEM_MISSING;
4283 /* Process root dir and goto out*/
4286 ret = ROOT_DIR_ERROR;
4288 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4290 ref_key->type == BTRFS_INODE_REF_KEY ?
4292 ref_key->objectid, ref_key->offset,
4293 key->type == BTRFS_DIR_ITEM_KEY ?
4294 "DIR_ITEM" : "DIR_INDEX");
4302 /* Process normal file/dir */
4304 ret = DIR_ITEM_MISSING;
4306 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4308 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4309 ref_key->objectid, ref_key->offset,
4310 key->type == BTRFS_DIR_ITEM_KEY ?
4311 "DIR_ITEM" : "DIR_INDEX",
4312 key->objectid, key->offset, namelen, name,
4313 imode_to_type(mode));
4317 /* Check whether inode_id/filetype/name match */
4318 node = path.nodes[0];
4319 slot = path.slots[0];
4320 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4321 total = btrfs_item_size_nr(node, slot);
4322 while (cur < total) {
4323 ret = DIR_ITEM_MISMATCH;
4324 name_len = btrfs_dir_name_len(node, di);
4325 data_len = btrfs_dir_data_len(node, di);
4327 btrfs_dir_item_key_to_cpu(node, di, &location);
4328 if (location.objectid != ref_key->objectid ||
4329 location.type != BTRFS_INODE_ITEM_KEY ||
4330 location.offset != 0)
4333 filetype = btrfs_dir_type(node, di);
4334 if (imode_to_type(mode) != filetype)
4337 if (cur + sizeof(*di) + name_len > total ||
4338 name_len > BTRFS_NAME_LEN) {
4339 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4341 key->type == BTRFS_DIR_ITEM_KEY ?
4342 "DIR_ITEM" : "DIR_INDEX",
4343 key->objectid, key->offset, name_len);
4345 if (cur + sizeof(*di) > total)
4347 len = min_t(u32, total - cur - sizeof(*di),
4353 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4354 if (len != namelen || strncmp(namebuf, name, len))
4360 len = sizeof(*di) + name_len + data_len;
4361 di = (struct btrfs_dir_item *)((char *)di + len);
4364 if (ret == DIR_ITEM_MISMATCH)
4366 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4368 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4369 ref_key->objectid, ref_key->offset,
4370 key->type == BTRFS_DIR_ITEM_KEY ?
4371 "DIR_ITEM" : "DIR_INDEX",
4372 key->objectid, key->offset, namelen, name,
4373 imode_to_type(mode));
4375 btrfs_release_path(&path);
4380 * Traverse the given INODE_REF and call find_dir_item() to find related
4381 * DIR_ITEM/DIR_INDEX.
4383 * @root: the root of the fs/file tree
4384 * @ref_key: the key of the INODE_REF
4385 * @refs: the count of INODE_REF
4386 * @mode: the st_mode of INODE_ITEM
4388 * Return 0 if no error occurred.
4390 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4391 struct extent_buffer *node, int slot, u64 *refs,
4394 struct btrfs_key key;
4395 struct btrfs_inode_ref *ref;
4396 char namebuf[BTRFS_NAME_LEN] = {0};
4404 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4405 total = btrfs_item_size_nr(node, slot);
4408 /* Update inode ref count */
4411 index = btrfs_inode_ref_index(node, ref);
4412 name_len = btrfs_inode_ref_name_len(node, ref);
4413 if (cur + sizeof(*ref) + name_len > total ||
4414 name_len > BTRFS_NAME_LEN) {
4415 warning("root %llu INODE_REF[%llu %llu] name too long",
4416 root->objectid, ref_key->objectid, ref_key->offset);
4418 if (total < cur + sizeof(*ref))
4420 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4425 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4427 /* Check root dir ref name */
4428 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4429 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4430 root->objectid, ref_key->objectid, ref_key->offset,
4432 err |= ROOT_DIR_ERROR;
4435 /* Find related DIR_INDEX */
4436 key.objectid = ref_key->offset;
4437 key.type = BTRFS_DIR_INDEX_KEY;
4439 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4442 /* Find related dir_item */
4443 key.objectid = ref_key->offset;
4444 key.type = BTRFS_DIR_ITEM_KEY;
4445 key.offset = btrfs_name_hash(namebuf, len);
4446 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4449 len = sizeof(*ref) + name_len;
4450 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4460 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4461 * DIR_ITEM/DIR_INDEX.
4463 * @root: the root of the fs/file tree
4464 * @ref_key: the key of the INODE_EXTREF
4465 * @refs: the count of INODE_EXTREF
4466 * @mode: the st_mode of INODE_ITEM
4468 * Return 0 if no error occurred.
4470 static int check_inode_extref(struct btrfs_root *root,
4471 struct btrfs_key *ref_key,
4472 struct extent_buffer *node, int slot, u64 *refs,
4475 struct btrfs_key key;
4476 struct btrfs_inode_extref *extref;
4477 char namebuf[BTRFS_NAME_LEN] = {0};
4487 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4488 total = btrfs_item_size_nr(node, slot);
4491 /* update inode ref count */
4493 name_len = btrfs_inode_extref_name_len(node, extref);
4494 index = btrfs_inode_extref_index(node, extref);
4495 parent = btrfs_inode_extref_parent(node, extref);
4496 if (name_len <= BTRFS_NAME_LEN) {
4499 len = BTRFS_NAME_LEN;
4500 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4501 root->objectid, ref_key->objectid, ref_key->offset);
4503 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4505 /* Check root dir ref name */
4506 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4507 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4508 root->objectid, ref_key->objectid, ref_key->offset,
4510 err |= ROOT_DIR_ERROR;
4513 /* find related dir_index */
4514 key.objectid = parent;
4515 key.type = BTRFS_DIR_INDEX_KEY;
4517 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4520 /* find related dir_item */
4521 key.objectid = parent;
4522 key.type = BTRFS_DIR_ITEM_KEY;
4523 key.offset = btrfs_name_hash(namebuf, len);
4524 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4527 len = sizeof(*extref) + name_len;
4528 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4538 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4539 * DIR_ITEM/DIR_INDEX match.
4541 * @root: the root of the fs/file tree
4542 * @key: the key of the INODE_REF/INODE_EXTREF
4543 * @name: the name in the INODE_REF/INODE_EXTREF
4544 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4545 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4547 * @ext_ref: the EXTENDED_IREF feature
4549 * Return 0 if no error occurred.
4550 * Return >0 for error bitmap
4552 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4553 char *name, int namelen, u64 index,
4554 unsigned int ext_ref)
4556 struct btrfs_path path;
4557 struct btrfs_inode_ref *ref;
4558 struct btrfs_inode_extref *extref;
4559 struct extent_buffer *node;
4560 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4571 btrfs_init_path(&path);
4572 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4574 ret = INODE_REF_MISSING;
4578 node = path.nodes[0];
4579 slot = path.slots[0];
4581 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4582 total = btrfs_item_size_nr(node, slot);
4584 /* Iterate all entry of INODE_REF */
4585 while (cur < total) {
4586 ret = INODE_REF_MISSING;
4588 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4589 ref_index = btrfs_inode_ref_index(node, ref);
4590 if (index != (u64)-1 && index != ref_index)
4593 if (cur + sizeof(*ref) + ref_namelen > total ||
4594 ref_namelen > BTRFS_NAME_LEN) {
4595 warning("root %llu INODE %s[%llu %llu] name too long",
4597 key->type == BTRFS_INODE_REF_KEY ?
4599 key->objectid, key->offset);
4601 if (cur + sizeof(*ref) > total)
4603 len = min_t(u32, total - cur - sizeof(*ref),
4609 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4612 if (len != namelen || strncmp(ref_namebuf, name, len))
4618 len = sizeof(*ref) + ref_namelen;
4619 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4624 /* Skip if not support EXTENDED_IREF feature */
4628 btrfs_release_path(&path);
4629 btrfs_init_path(&path);
4631 dir_id = key->offset;
4632 key->type = BTRFS_INODE_EXTREF_KEY;
4633 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4635 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4637 ret = INODE_REF_MISSING;
4641 node = path.nodes[0];
4642 slot = path.slots[0];
4644 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4646 total = btrfs_item_size_nr(node, slot);
4648 /* Iterate all entry of INODE_EXTREF */
4649 while (cur < total) {
4650 ret = INODE_REF_MISSING;
4652 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4653 ref_index = btrfs_inode_extref_index(node, extref);
4654 parent = btrfs_inode_extref_parent(node, extref);
4655 if (index != (u64)-1 && index != ref_index)
4658 if (parent != dir_id)
4661 if (ref_namelen <= BTRFS_NAME_LEN) {
4664 len = BTRFS_NAME_LEN;
4665 warning("root %llu INODE %s[%llu %llu] name too long",
4667 key->type == BTRFS_INODE_REF_KEY ?
4669 key->objectid, key->offset);
4671 read_extent_buffer(node, ref_namebuf,
4672 (unsigned long)(extref + 1), len);
4674 if (len != namelen || strncmp(ref_namebuf, name, len))
4681 len = sizeof(*extref) + ref_namelen;
4682 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4687 btrfs_release_path(&path);
4692 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4693 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4695 * @root: the root of the fs/file tree
4696 * @key: the key of the INODE_REF/INODE_EXTREF
4697 * @size: the st_size of the INODE_ITEM
4698 * @ext_ref: the EXTENDED_IREF feature
4700 * Return 0 if no error occurred.
4702 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4703 struct extent_buffer *node, int slot, u64 *size,
4704 unsigned int ext_ref)
4706 struct btrfs_dir_item *di;
4707 struct btrfs_inode_item *ii;
4708 struct btrfs_path path;
4709 struct btrfs_key location;
4710 char namebuf[BTRFS_NAME_LEN] = {0};
4723 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4724 * ignore index check.
4726 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4728 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4729 total = btrfs_item_size_nr(node, slot);
4731 while (cur < total) {
4732 data_len = btrfs_dir_data_len(node, di);
4734 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4735 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4736 "DIR_ITEM" : "DIR_INDEX",
4737 key->objectid, key->offset, data_len);
4739 name_len = btrfs_dir_name_len(node, di);
4740 if (cur + sizeof(*di) + name_len > total ||
4741 name_len > BTRFS_NAME_LEN) {
4742 warning("root %llu %s[%llu %llu] name too long",
4744 key->type == BTRFS_DIR_ITEM_KEY ?
4745 "DIR_ITEM" : "DIR_INDEX",
4746 key->objectid, key->offset);
4748 if (cur + sizeof(*di) > total)
4750 len = min_t(u32, total - cur - sizeof(*di),
4755 (*size) += name_len;
4757 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4758 filetype = btrfs_dir_type(node, di);
4760 if (key->type == BTRFS_DIR_ITEM_KEY &&
4761 key->offset != btrfs_name_hash(namebuf, len)) {
4763 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4764 root->objectid, key->objectid, key->offset,
4765 namebuf, len, filetype, key->offset,
4766 btrfs_name_hash(namebuf, len));
4769 btrfs_init_path(&path);
4770 btrfs_dir_item_key_to_cpu(node, di, &location);
4772 /* Ignore related ROOT_ITEM check */
4773 if (location.type == BTRFS_ROOT_ITEM_KEY)
4776 /* Check relative INODE_ITEM(existence/filetype) */
4777 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4779 err |= INODE_ITEM_MISSING;
4780 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4781 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4782 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4783 key->offset, location.objectid, name_len,
4788 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4789 struct btrfs_inode_item);
4790 mode = btrfs_inode_mode(path.nodes[0], ii);
4792 if (imode_to_type(mode) != filetype) {
4793 err |= INODE_ITEM_MISMATCH;
4794 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4795 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4796 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4797 key->offset, name_len, namebuf, filetype);
4800 /* Check relative INODE_REF/INODE_EXTREF */
4801 location.type = BTRFS_INODE_REF_KEY;
4802 location.offset = key->objectid;
4803 ret = find_inode_ref(root, &location, namebuf, len,
4806 if (ret & INODE_REF_MISSING)
4807 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4808 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4809 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4810 key->offset, name_len, namebuf, filetype);
4813 btrfs_release_path(&path);
4814 len = sizeof(*di) + name_len + data_len;
4815 di = (struct btrfs_dir_item *)((char *)di + len);
4818 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4819 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4820 root->objectid, key->objectid, key->offset);
4829 * Check file extent datasum/hole, update the size of the file extents,
4830 * check and update the last offset of the file extent.
4832 * @root: the root of fs/file tree.
4833 * @fkey: the key of the file extent.
4834 * @nodatasum: INODE_NODATASUM feature.
4835 * @size: the sum of all EXTENT_DATA items size for this inode.
4836 * @end: the offset of the last extent.
4838 * Return 0 if no error occurred.
4840 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4841 struct extent_buffer *node, int slot,
4842 unsigned int nodatasum, u64 *size, u64 *end)
4844 struct btrfs_file_extent_item *fi;
4847 u64 extent_num_bytes;
4849 u64 csum_found; /* In byte size, sectorsize aligned */
4850 u64 search_start; /* Logical range start we search for csum */
4851 u64 search_len; /* Logical range len we search for csum */
4852 unsigned int extent_type;
4853 unsigned int is_hole;
4858 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4860 /* Check inline extent */
4861 extent_type = btrfs_file_extent_type(node, fi);
4862 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4863 struct btrfs_item *e = btrfs_item_nr(slot);
4864 u32 item_inline_len;
4866 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4867 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4868 compressed = btrfs_file_extent_compression(node, fi);
4869 if (extent_num_bytes == 0) {
4871 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4872 root->objectid, fkey->objectid, fkey->offset);
4873 err |= FILE_EXTENT_ERROR;
4875 if (!compressed && extent_num_bytes != item_inline_len) {
4877 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4878 root->objectid, fkey->objectid, fkey->offset,
4879 extent_num_bytes, item_inline_len);
4880 err |= FILE_EXTENT_ERROR;
4882 *end += extent_num_bytes;
4883 *size += extent_num_bytes;
4887 /* Check extent type */
4888 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4889 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4890 err |= FILE_EXTENT_ERROR;
4891 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4892 root->objectid, fkey->objectid, fkey->offset);
4896 /* Check REG_EXTENT/PREALLOC_EXTENT */
4897 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4898 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4899 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4900 extent_offset = btrfs_file_extent_offset(node, fi);
4901 compressed = btrfs_file_extent_compression(node, fi);
4902 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4905 * Check EXTENT_DATA csum
4907 * For plain (uncompressed) extent, we should only check the range
4908 * we're referring to, as it's possible that part of prealloc extent
4909 * has been written, and has csum:
4911 * |<--- Original large preallocated extent A ---->|
4912 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4915 * For compressed extent, we should check the whole range.
4918 search_start = disk_bytenr + extent_offset;
4919 search_len = extent_num_bytes;
4921 search_start = disk_bytenr;
4922 search_len = disk_num_bytes;
4924 ret = count_csum_range(root, search_start, search_len, &csum_found);
4925 if (csum_found > 0 && nodatasum) {
4926 err |= ODD_CSUM_ITEM;
4927 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4928 root->objectid, fkey->objectid, fkey->offset);
4929 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4930 !is_hole && (ret < 0 || csum_found < search_len)) {
4931 err |= CSUM_ITEM_MISSING;
4932 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4933 root->objectid, fkey->objectid, fkey->offset,
4934 csum_found, search_len);
4935 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4936 err |= ODD_CSUM_ITEM;
4937 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4938 root->objectid, fkey->objectid, fkey->offset, csum_found);
4941 /* Check EXTENT_DATA hole */
4942 if (!no_holes && *end != fkey->offset) {
4943 err |= FILE_EXTENT_ERROR;
4944 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4945 root->objectid, fkey->objectid, fkey->offset);
4948 *end += extent_num_bytes;
4950 *size += extent_num_bytes;
4956 * Check INODE_ITEM and related ITEMs (the same inode number)
4957 * 1. check link count
4958 * 2. check inode ref/extref
4959 * 3. check dir item/index
4961 * @ext_ref: the EXTENDED_IREF feature
4963 * Return 0 if no error occurred.
4964 * Return >0 for error or hit the traversal is done(by error bitmap)
4966 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4967 unsigned int ext_ref)
4969 struct extent_buffer *node;
4970 struct btrfs_inode_item *ii;
4971 struct btrfs_key key;
4980 u64 extent_size = 0;
4982 unsigned int nodatasum;
4987 node = path->nodes[0];
4988 slot = path->slots[0];
4990 btrfs_item_key_to_cpu(node, &key, slot);
4991 inode_id = key.objectid;
4993 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4994 ret = btrfs_next_item(root, path);
5000 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5001 isize = btrfs_inode_size(node, ii);
5002 nbytes = btrfs_inode_nbytes(node, ii);
5003 mode = btrfs_inode_mode(node, ii);
5004 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5005 nlink = btrfs_inode_nlink(node, ii);
5006 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5009 ret = btrfs_next_item(root, path);
5011 /* out will fill 'err' rusing current statistics */
5013 } else if (ret > 0) {
5018 node = path->nodes[0];
5019 slot = path->slots[0];
5020 btrfs_item_key_to_cpu(node, &key, slot);
5021 if (key.objectid != inode_id)
5025 case BTRFS_INODE_REF_KEY:
5026 ret = check_inode_ref(root, &key, node, slot, &refs,
5030 case BTRFS_INODE_EXTREF_KEY:
5031 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5032 warning("root %llu EXTREF[%llu %llu] isn't supported",
5033 root->objectid, key.objectid,
5035 ret = check_inode_extref(root, &key, node, slot, &refs,
5039 case BTRFS_DIR_ITEM_KEY:
5040 case BTRFS_DIR_INDEX_KEY:
5042 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5043 root->objectid, inode_id,
5044 imode_to_type(mode), key.objectid,
5047 ret = check_dir_item(root, &key, node, slot, &size,
5051 case BTRFS_EXTENT_DATA_KEY:
5053 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5054 root->objectid, inode_id, key.objectid,
5057 ret = check_file_extent(root, &key, node, slot,
5058 nodatasum, &extent_size,
5062 case BTRFS_XATTR_ITEM_KEY:
5065 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5066 key.objectid, key.type, key.offset);
5071 /* verify INODE_ITEM nlink/isize/nbytes */
5074 err |= LINK_COUNT_ERROR;
5075 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5076 root->objectid, inode_id, nlink);
5080 * Just a warning, as dir inode nbytes is just an
5081 * instructive value.
5083 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5084 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5085 root->objectid, inode_id,
5086 root->fs_info->nodesize);
5089 if (isize != size) {
5091 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5092 root->objectid, inode_id, isize, size);
5095 if (nlink != refs) {
5096 err |= LINK_COUNT_ERROR;
5097 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5098 root->objectid, inode_id, nlink, refs);
5099 } else if (!nlink) {
5103 if (!nbytes && !no_holes && extent_end < isize) {
5104 err |= NBYTES_ERROR;
5105 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5106 root->objectid, inode_id, isize);
5109 if (nbytes != extent_size) {
5110 err |= NBYTES_ERROR;
5111 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5112 root->objectid, inode_id, nbytes, extent_size);
5119 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5121 struct btrfs_path path;
5122 struct btrfs_key key;
5126 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5127 key.type = BTRFS_INODE_ITEM_KEY;
5130 /* For root being dropped, we don't need to check first inode */
5131 if (btrfs_root_refs(&root->root_item) == 0 &&
5132 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5136 btrfs_init_path(&path);
5138 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5143 err |= INODE_ITEM_MISSING;
5144 error("first inode item of root %llu is missing",
5148 err |= check_inode_item(root, &path, ext_ref);
5153 btrfs_release_path(&path);
5157 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5158 u64 parent, u64 root)
5160 struct rb_node *node;
5161 struct tree_backref *back = NULL;
5162 struct tree_backref match = {
5169 match.parent = parent;
5170 match.node.full_backref = 1;
5175 node = rb_search(&rec->backref_tree, &match.node.node,
5176 (rb_compare_keys)compare_extent_backref, NULL);
5178 back = to_tree_backref(rb_node_to_extent_backref(node));
5183 static struct data_backref *find_data_backref(struct extent_record *rec,
5184 u64 parent, u64 root,
5185 u64 owner, u64 offset,
5187 u64 disk_bytenr, u64 bytes)
5189 struct rb_node *node;
5190 struct data_backref *back = NULL;
5191 struct data_backref match = {
5198 .found_ref = found_ref,
5199 .disk_bytenr = disk_bytenr,
5203 match.parent = parent;
5204 match.node.full_backref = 1;
5209 node = rb_search(&rec->backref_tree, &match.node.node,
5210 (rb_compare_keys)compare_extent_backref, NULL);
5212 back = to_data_backref(rb_node_to_extent_backref(node));
5217 * Iterate all item on the tree and call check_inode_item() to check.
5219 * @root: the root of the tree to be checked.
5220 * @ext_ref: the EXTENDED_IREF feature
5222 * Return 0 if no error found.
5223 * Return <0 for error.
5225 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5227 struct btrfs_path path;
5228 struct node_refs nrefs;
5229 struct btrfs_root_item *root_item = &root->root_item;
5235 * We need to manually check the first inode item(256)
5236 * As the following traversal function will only start from
5237 * the first inode item in the leaf, if inode item(256) is missing
5238 * we will just skip it forever.
5240 ret = check_fs_first_inode(root, ext_ref);
5244 memset(&nrefs, 0, sizeof(nrefs));
5245 level = btrfs_header_level(root->node);
5246 btrfs_init_path(&path);
5248 if (btrfs_root_refs(root_item) > 0 ||
5249 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5250 path.nodes[level] = root->node;
5251 path.slots[level] = 0;
5252 extent_buffer_get(root->node);
5254 struct btrfs_key key;
5256 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5257 level = root_item->drop_level;
5258 path.lowest_level = level;
5259 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5266 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5269 /* if ret is negative, walk shall stop */
5275 ret = walk_up_tree_v2(root, &path, &level);
5277 /* Normal exit, reset ret to err */
5284 btrfs_release_path(&path);
5289 * Find the relative ref for root_ref and root_backref.
5291 * @root: the root of the root tree.
5292 * @ref_key: the key of the root ref.
5294 * Return 0 if no error occurred.
5296 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5297 struct extent_buffer *node, int slot)
5299 struct btrfs_path path;
5300 struct btrfs_key key;
5301 struct btrfs_root_ref *ref;
5302 struct btrfs_root_ref *backref;
5303 char ref_name[BTRFS_NAME_LEN] = {0};
5304 char backref_name[BTRFS_NAME_LEN] = {0};
5310 u32 backref_namelen;
5315 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5316 ref_dirid = btrfs_root_ref_dirid(node, ref);
5317 ref_seq = btrfs_root_ref_sequence(node, ref);
5318 ref_namelen = btrfs_root_ref_name_len(node, ref);
5320 if (ref_namelen <= BTRFS_NAME_LEN) {
5323 len = BTRFS_NAME_LEN;
5324 warning("%s[%llu %llu] ref_name too long",
5325 ref_key->type == BTRFS_ROOT_REF_KEY ?
5326 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5329 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5331 /* Find relative root_ref */
5332 key.objectid = ref_key->offset;
5333 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5334 key.offset = ref_key->objectid;
5336 btrfs_init_path(&path);
5337 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5339 err |= ROOT_REF_MISSING;
5340 error("%s[%llu %llu] couldn't find relative ref",
5341 ref_key->type == BTRFS_ROOT_REF_KEY ?
5342 "ROOT_REF" : "ROOT_BACKREF",
5343 ref_key->objectid, ref_key->offset);
5347 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5348 struct btrfs_root_ref);
5349 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5350 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5351 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5353 if (backref_namelen <= BTRFS_NAME_LEN) {
5354 len = backref_namelen;
5356 len = BTRFS_NAME_LEN;
5357 warning("%s[%llu %llu] ref_name too long",
5358 key.type == BTRFS_ROOT_REF_KEY ?
5359 "ROOT_REF" : "ROOT_BACKREF",
5360 key.objectid, key.offset);
5362 read_extent_buffer(path.nodes[0], backref_name,
5363 (unsigned long)(backref + 1), len);
5365 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5366 ref_namelen != backref_namelen ||
5367 strncmp(ref_name, backref_name, len)) {
5368 err |= ROOT_REF_MISMATCH;
5369 error("%s[%llu %llu] mismatch relative ref",
5370 ref_key->type == BTRFS_ROOT_REF_KEY ?
5371 "ROOT_REF" : "ROOT_BACKREF",
5372 ref_key->objectid, ref_key->offset);
5375 btrfs_release_path(&path);
5380 * Check all fs/file tree in low_memory mode.
5382 * 1. for fs tree root item, call check_fs_root_v2()
5383 * 2. for fs tree root ref/backref, call check_root_ref()
5385 * Return 0 if no error occurred.
5387 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5389 struct btrfs_root *tree_root = fs_info->tree_root;
5390 struct btrfs_root *cur_root = NULL;
5391 struct btrfs_path path;
5392 struct btrfs_key key;
5393 struct extent_buffer *node;
5394 unsigned int ext_ref;
5399 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5401 btrfs_init_path(&path);
5402 key.objectid = BTRFS_FS_TREE_OBJECTID;
5404 key.type = BTRFS_ROOT_ITEM_KEY;
5406 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5410 } else if (ret > 0) {
5416 node = path.nodes[0];
5417 slot = path.slots[0];
5418 btrfs_item_key_to_cpu(node, &key, slot);
5419 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5421 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5422 fs_root_objectid(key.objectid)) {
5423 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5424 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5427 key.offset = (u64)-1;
5428 cur_root = btrfs_read_fs_root(fs_info, &key);
5431 if (IS_ERR(cur_root)) {
5432 error("Fail to read fs/subvol tree: %lld",
5438 ret = check_fs_root_v2(cur_root, ext_ref);
5441 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5442 btrfs_free_fs_root(cur_root);
5443 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5444 key.type == BTRFS_ROOT_BACKREF_KEY) {
5445 ret = check_root_ref(tree_root, &key, node, slot);
5449 ret = btrfs_next_item(tree_root, &path);
5459 btrfs_release_path(&path);
5463 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5464 struct cache_tree *root_cache)
5468 if (!ctx.progress_enabled)
5469 fprintf(stderr, "checking fs roots\n");
5470 if (check_mode == CHECK_MODE_LOWMEM)
5471 ret = check_fs_roots_v2(fs_info);
5473 ret = check_fs_roots(fs_info, root_cache);
5478 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5480 struct extent_backref *back, *tmp;
5481 struct tree_backref *tback;
5482 struct data_backref *dback;
5486 rbtree_postorder_for_each_entry_safe(back, tmp,
5487 &rec->backref_tree, node) {
5488 if (!back->found_extent_tree) {
5492 if (back->is_data) {
5493 dback = to_data_backref(back);
5494 fprintf(stderr, "Data backref %llu %s %llu"
5495 " owner %llu offset %llu num_refs %lu"
5496 " not found in extent tree\n",
5497 (unsigned long long)rec->start,
5498 back->full_backref ?
5500 back->full_backref ?
5501 (unsigned long long)dback->parent:
5502 (unsigned long long)dback->root,
5503 (unsigned long long)dback->owner,
5504 (unsigned long long)dback->offset,
5505 (unsigned long)dback->num_refs);
5507 tback = to_tree_backref(back);
5508 fprintf(stderr, "Tree backref %llu parent %llu"
5509 " root %llu not found in extent tree\n",
5510 (unsigned long long)rec->start,
5511 (unsigned long long)tback->parent,
5512 (unsigned long long)tback->root);
5515 if (!back->is_data && !back->found_ref) {
5519 tback = to_tree_backref(back);
5520 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5521 (unsigned long long)rec->start,
5522 back->full_backref ? "parent" : "root",
5523 back->full_backref ?
5524 (unsigned long long)tback->parent :
5525 (unsigned long long)tback->root, back);
5527 if (back->is_data) {
5528 dback = to_data_backref(back);
5529 if (dback->found_ref != dback->num_refs) {
5533 fprintf(stderr, "Incorrect local backref count"
5534 " on %llu %s %llu owner %llu"
5535 " offset %llu found %u wanted %u back %p\n",
5536 (unsigned long long)rec->start,
5537 back->full_backref ?
5539 back->full_backref ?
5540 (unsigned long long)dback->parent:
5541 (unsigned long long)dback->root,
5542 (unsigned long long)dback->owner,
5543 (unsigned long long)dback->offset,
5544 dback->found_ref, dback->num_refs, back);
5546 if (dback->disk_bytenr != rec->start) {
5550 fprintf(stderr, "Backref disk bytenr does not"
5551 " match extent record, bytenr=%llu, "
5552 "ref bytenr=%llu\n",
5553 (unsigned long long)rec->start,
5554 (unsigned long long)dback->disk_bytenr);
5557 if (dback->bytes != rec->nr) {
5561 fprintf(stderr, "Backref bytes do not match "
5562 "extent backref, bytenr=%llu, ref "
5563 "bytes=%llu, backref bytes=%llu\n",
5564 (unsigned long long)rec->start,
5565 (unsigned long long)rec->nr,
5566 (unsigned long long)dback->bytes);
5569 if (!back->is_data) {
5572 dback = to_data_backref(back);
5573 found += dback->found_ref;
5576 if (found != rec->refs) {
5580 fprintf(stderr, "Incorrect global backref count "
5581 "on %llu found %llu wanted %llu\n",
5582 (unsigned long long)rec->start,
5583 (unsigned long long)found,
5584 (unsigned long long)rec->refs);
5590 static void __free_one_backref(struct rb_node *node)
5592 struct extent_backref *back = rb_node_to_extent_backref(node);
5597 static void free_all_extent_backrefs(struct extent_record *rec)
5599 rb_free_nodes(&rec->backref_tree, __free_one_backref);
5602 static void free_extent_record_cache(struct cache_tree *extent_cache)
5604 struct cache_extent *cache;
5605 struct extent_record *rec;
5608 cache = first_cache_extent(extent_cache);
5611 rec = container_of(cache, struct extent_record, cache);
5612 remove_cache_extent(extent_cache, cache);
5613 free_all_extent_backrefs(rec);
5618 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5619 struct extent_record *rec)
5621 if (rec->content_checked && rec->owner_ref_checked &&
5622 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5623 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5624 !rec->bad_full_backref && !rec->crossing_stripes &&
5625 !rec->wrong_chunk_type) {
5626 remove_cache_extent(extent_cache, &rec->cache);
5627 free_all_extent_backrefs(rec);
5628 list_del_init(&rec->list);
5634 static int check_owner_ref(struct btrfs_root *root,
5635 struct extent_record *rec,
5636 struct extent_buffer *buf)
5638 struct extent_backref *node, *tmp;
5639 struct tree_backref *back;
5640 struct btrfs_root *ref_root;
5641 struct btrfs_key key;
5642 struct btrfs_path path;
5643 struct extent_buffer *parent;
5648 rbtree_postorder_for_each_entry_safe(node, tmp,
5649 &rec->backref_tree, node) {
5652 if (!node->found_ref)
5654 if (node->full_backref)
5656 back = to_tree_backref(node);
5657 if (btrfs_header_owner(buf) == back->root)
5660 BUG_ON(rec->is_root);
5662 /* try to find the block by search corresponding fs tree */
5663 key.objectid = btrfs_header_owner(buf);
5664 key.type = BTRFS_ROOT_ITEM_KEY;
5665 key.offset = (u64)-1;
5667 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5668 if (IS_ERR(ref_root))
5671 level = btrfs_header_level(buf);
5673 btrfs_item_key_to_cpu(buf, &key, 0);
5675 btrfs_node_key_to_cpu(buf, &key, 0);
5677 btrfs_init_path(&path);
5678 path.lowest_level = level + 1;
5679 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5683 parent = path.nodes[level + 1];
5684 if (parent && buf->start == btrfs_node_blockptr(parent,
5685 path.slots[level + 1]))
5688 btrfs_release_path(&path);
5689 return found ? 0 : 1;
5692 static int is_extent_tree_record(struct extent_record *rec)
5694 struct extent_backref *node, *tmp;
5695 struct tree_backref *back;
5698 rbtree_postorder_for_each_entry_safe(node, tmp,
5699 &rec->backref_tree, node) {
5702 back = to_tree_backref(node);
5703 if (node->full_backref)
5705 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5712 static int record_bad_block_io(struct btrfs_fs_info *info,
5713 struct cache_tree *extent_cache,
5716 struct extent_record *rec;
5717 struct cache_extent *cache;
5718 struct btrfs_key key;
5720 cache = lookup_cache_extent(extent_cache, start, len);
5724 rec = container_of(cache, struct extent_record, cache);
5725 if (!is_extent_tree_record(rec))
5728 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5729 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5732 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5733 struct extent_buffer *buf, int slot)
5735 if (btrfs_header_level(buf)) {
5736 struct btrfs_key_ptr ptr1, ptr2;
5738 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5739 sizeof(struct btrfs_key_ptr));
5740 read_extent_buffer(buf, &ptr2,
5741 btrfs_node_key_ptr_offset(slot + 1),
5742 sizeof(struct btrfs_key_ptr));
5743 write_extent_buffer(buf, &ptr1,
5744 btrfs_node_key_ptr_offset(slot + 1),
5745 sizeof(struct btrfs_key_ptr));
5746 write_extent_buffer(buf, &ptr2,
5747 btrfs_node_key_ptr_offset(slot),
5748 sizeof(struct btrfs_key_ptr));
5750 struct btrfs_disk_key key;
5751 btrfs_node_key(buf, &key, 0);
5752 btrfs_fixup_low_keys(root, path, &key,
5753 btrfs_header_level(buf) + 1);
5756 struct btrfs_item *item1, *item2;
5757 struct btrfs_key k1, k2;
5758 char *item1_data, *item2_data;
5759 u32 item1_offset, item2_offset, item1_size, item2_size;
5761 item1 = btrfs_item_nr(slot);
5762 item2 = btrfs_item_nr(slot + 1);
5763 btrfs_item_key_to_cpu(buf, &k1, slot);
5764 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5765 item1_offset = btrfs_item_offset(buf, item1);
5766 item2_offset = btrfs_item_offset(buf, item2);
5767 item1_size = btrfs_item_size(buf, item1);
5768 item2_size = btrfs_item_size(buf, item2);
5770 item1_data = malloc(item1_size);
5773 item2_data = malloc(item2_size);
5779 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5780 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5782 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5783 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5787 btrfs_set_item_offset(buf, item1, item2_offset);
5788 btrfs_set_item_offset(buf, item2, item1_offset);
5789 btrfs_set_item_size(buf, item1, item2_size);
5790 btrfs_set_item_size(buf, item2, item1_size);
5792 path->slots[0] = slot;
5793 btrfs_set_item_key_unsafe(root, path, &k2);
5794 path->slots[0] = slot + 1;
5795 btrfs_set_item_key_unsafe(root, path, &k1);
5800 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5802 struct extent_buffer *buf;
5803 struct btrfs_key k1, k2;
5805 int level = path->lowest_level;
5808 buf = path->nodes[level];
5809 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5811 btrfs_node_key_to_cpu(buf, &k1, i);
5812 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5814 btrfs_item_key_to_cpu(buf, &k1, i);
5815 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5817 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5819 ret = swap_values(root, path, buf, i);
5822 btrfs_mark_buffer_dirty(buf);
5828 static int delete_bogus_item(struct btrfs_root *root,
5829 struct btrfs_path *path,
5830 struct extent_buffer *buf, int slot)
5832 struct btrfs_key key;
5833 int nritems = btrfs_header_nritems(buf);
5835 btrfs_item_key_to_cpu(buf, &key, slot);
5837 /* These are all the keys we can deal with missing. */
5838 if (key.type != BTRFS_DIR_INDEX_KEY &&
5839 key.type != BTRFS_EXTENT_ITEM_KEY &&
5840 key.type != BTRFS_METADATA_ITEM_KEY &&
5841 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5842 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5845 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5846 (unsigned long long)key.objectid, key.type,
5847 (unsigned long long)key.offset, slot, buf->start);
5848 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5849 btrfs_item_nr_offset(slot + 1),
5850 sizeof(struct btrfs_item) *
5851 (nritems - slot - 1));
5852 btrfs_set_header_nritems(buf, nritems - 1);
5854 struct btrfs_disk_key disk_key;
5856 btrfs_item_key(buf, &disk_key, 0);
5857 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5859 btrfs_mark_buffer_dirty(buf);
5863 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5865 struct extent_buffer *buf;
5869 /* We should only get this for leaves */
5870 BUG_ON(path->lowest_level);
5871 buf = path->nodes[0];
5873 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5874 unsigned int shift = 0, offset;
5876 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5877 BTRFS_LEAF_DATA_SIZE(root)) {
5878 if (btrfs_item_end_nr(buf, i) >
5879 BTRFS_LEAF_DATA_SIZE(root)) {
5880 ret = delete_bogus_item(root, path, buf, i);
5883 fprintf(stderr, "item is off the end of the "
5884 "leaf, can't fix\n");
5888 shift = BTRFS_LEAF_DATA_SIZE(root) -
5889 btrfs_item_end_nr(buf, i);
5890 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5891 btrfs_item_offset_nr(buf, i - 1)) {
5892 if (btrfs_item_end_nr(buf, i) >
5893 btrfs_item_offset_nr(buf, i - 1)) {
5894 ret = delete_bogus_item(root, path, buf, i);
5897 fprintf(stderr, "items overlap, can't fix\n");
5901 shift = btrfs_item_offset_nr(buf, i - 1) -
5902 btrfs_item_end_nr(buf, i);
5907 printf("Shifting item nr %d by %u bytes in block %llu\n",
5908 i, shift, (unsigned long long)buf->start);
5909 offset = btrfs_item_offset_nr(buf, i);
5910 memmove_extent_buffer(buf,
5911 btrfs_leaf_data(buf) + offset + shift,
5912 btrfs_leaf_data(buf) + offset,
5913 btrfs_item_size_nr(buf, i));
5914 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5916 btrfs_mark_buffer_dirty(buf);
5920 * We may have moved things, in which case we want to exit so we don't
5921 * write those changes out. Once we have proper abort functionality in
5922 * progs this can be changed to something nicer.
5929 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5930 * then just return -EIO.
5932 static int try_to_fix_bad_block(struct btrfs_root *root,
5933 struct extent_buffer *buf,
5934 enum btrfs_tree_block_status status)
5936 struct btrfs_trans_handle *trans;
5937 struct ulist *roots;
5938 struct ulist_node *node;
5939 struct btrfs_root *search_root;
5940 struct btrfs_path path;
5941 struct ulist_iterator iter;
5942 struct btrfs_key root_key, key;
5945 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5946 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5949 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5953 btrfs_init_path(&path);
5954 ULIST_ITER_INIT(&iter);
5955 while ((node = ulist_next(roots, &iter))) {
5956 root_key.objectid = node->val;
5957 root_key.type = BTRFS_ROOT_ITEM_KEY;
5958 root_key.offset = (u64)-1;
5960 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5967 trans = btrfs_start_transaction(search_root, 0);
5968 if (IS_ERR(trans)) {
5969 ret = PTR_ERR(trans);
5973 path.lowest_level = btrfs_header_level(buf);
5974 path.skip_check_block = 1;
5975 if (path.lowest_level)
5976 btrfs_node_key_to_cpu(buf, &key, 0);
5978 btrfs_item_key_to_cpu(buf, &key, 0);
5979 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5982 btrfs_commit_transaction(trans, search_root);
5985 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5986 ret = fix_key_order(search_root, &path);
5987 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5988 ret = fix_item_offset(search_root, &path);
5990 btrfs_commit_transaction(trans, search_root);
5993 btrfs_release_path(&path);
5994 btrfs_commit_transaction(trans, search_root);
5997 btrfs_release_path(&path);
6001 static int check_block(struct btrfs_root *root,
6002 struct cache_tree *extent_cache,
6003 struct extent_buffer *buf, u64 flags)
6005 struct extent_record *rec;
6006 struct cache_extent *cache;
6007 struct btrfs_key key;
6008 enum btrfs_tree_block_status status;
6012 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6015 rec = container_of(cache, struct extent_record, cache);
6016 rec->generation = btrfs_header_generation(buf);
6018 level = btrfs_header_level(buf);
6019 if (btrfs_header_nritems(buf) > 0) {
6022 btrfs_item_key_to_cpu(buf, &key, 0);
6024 btrfs_node_key_to_cpu(buf, &key, 0);
6026 rec->info_objectid = key.objectid;
6028 rec->info_level = level;
6030 if (btrfs_is_leaf(buf))
6031 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6033 status = btrfs_check_node(root, &rec->parent_key, buf);
6035 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6037 status = try_to_fix_bad_block(root, buf, status);
6038 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6040 fprintf(stderr, "bad block %llu\n",
6041 (unsigned long long)buf->start);
6044 * Signal to callers we need to start the scan over
6045 * again since we'll have cowed blocks.
6050 rec->content_checked = 1;
6051 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6052 rec->owner_ref_checked = 1;
6054 ret = check_owner_ref(root, rec, buf);
6056 rec->owner_ref_checked = 1;
6060 maybe_free_extent_rec(extent_cache, rec);
6065 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6066 u64 parent, u64 root)
6068 struct list_head *cur = rec->backrefs.next;
6069 struct extent_backref *node;
6070 struct tree_backref *back;
6072 while(cur != &rec->backrefs) {
6073 node = to_extent_backref(cur);
6077 back = to_tree_backref(node);
6079 if (!node->full_backref)
6081 if (parent == back->parent)
6084 if (node->full_backref)
6086 if (back->root == root)
6094 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6095 u64 parent, u64 root)
6097 struct tree_backref *ref = malloc(sizeof(*ref));
6101 memset(&ref->node, 0, sizeof(ref->node));
6103 ref->parent = parent;
6104 ref->node.full_backref = 1;
6107 ref->node.full_backref = 0;
6114 static struct data_backref *find_data_backref(struct extent_record *rec,
6115 u64 parent, u64 root,
6116 u64 owner, u64 offset,
6118 u64 disk_bytenr, u64 bytes)
6120 struct list_head *cur = rec->backrefs.next;
6121 struct extent_backref *node;
6122 struct data_backref *back;
6124 while(cur != &rec->backrefs) {
6125 node = to_extent_backref(cur);
6129 back = to_data_backref(node);
6131 if (!node->full_backref)
6133 if (parent == back->parent)
6136 if (node->full_backref)
6138 if (back->root == root && back->owner == owner &&
6139 back->offset == offset) {
6140 if (found_ref && node->found_ref &&
6141 (back->bytes != bytes ||
6142 back->disk_bytenr != disk_bytenr))
6152 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6153 u64 parent, u64 root,
6154 u64 owner, u64 offset,
6157 struct data_backref *ref = malloc(sizeof(*ref));
6161 memset(&ref->node, 0, sizeof(ref->node));
6162 ref->node.is_data = 1;
6165 ref->parent = parent;
6168 ref->node.full_backref = 1;
6172 ref->offset = offset;
6173 ref->node.full_backref = 0;
6175 ref->bytes = max_size;
6178 if (max_size > rec->max_size)
6179 rec->max_size = max_size;
6183 /* Check if the type of extent matches with its chunk */
6184 static void check_extent_type(struct extent_record *rec)
6186 struct btrfs_block_group_cache *bg_cache;
6188 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6192 /* data extent, check chunk directly*/
6193 if (!rec->metadata) {
6194 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6195 rec->wrong_chunk_type = 1;
6199 /* metadata extent, check the obvious case first */
6200 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6201 BTRFS_BLOCK_GROUP_METADATA))) {
6202 rec->wrong_chunk_type = 1;
6207 * Check SYSTEM extent, as it's also marked as metadata, we can only
6208 * make sure it's a SYSTEM extent by its backref
6210 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6211 struct extent_backref *node;
6212 struct tree_backref *tback;
6215 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6216 if (node->is_data) {
6217 /* tree block shouldn't have data backref */
6218 rec->wrong_chunk_type = 1;
6221 tback = container_of(node, struct tree_backref, node);
6223 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6224 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6226 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6227 if (!(bg_cache->flags & bg_type))
6228 rec->wrong_chunk_type = 1;
6233 * Allocate a new extent record, fill default values from @tmpl and insert int
6234 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6235 * the cache, otherwise it fails.
6237 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6238 struct extent_record *tmpl)
6240 struct extent_record *rec;
6243 BUG_ON(tmpl->max_size == 0);
6244 rec = malloc(sizeof(*rec));
6247 rec->start = tmpl->start;
6248 rec->max_size = tmpl->max_size;
6249 rec->nr = max(tmpl->nr, tmpl->max_size);
6250 rec->found_rec = tmpl->found_rec;
6251 rec->content_checked = tmpl->content_checked;
6252 rec->owner_ref_checked = tmpl->owner_ref_checked;
6253 rec->num_duplicates = 0;
6254 rec->metadata = tmpl->metadata;
6255 rec->flag_block_full_backref = FLAG_UNSET;
6256 rec->bad_full_backref = 0;
6257 rec->crossing_stripes = 0;
6258 rec->wrong_chunk_type = 0;
6259 rec->is_root = tmpl->is_root;
6260 rec->refs = tmpl->refs;
6261 rec->extent_item_refs = tmpl->extent_item_refs;
6262 rec->parent_generation = tmpl->parent_generation;
6263 INIT_LIST_HEAD(&rec->backrefs);
6264 INIT_LIST_HEAD(&rec->dups);
6265 INIT_LIST_HEAD(&rec->list);
6266 rec->backref_tree = RB_ROOT;
6267 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6268 rec->cache.start = tmpl->start;
6269 rec->cache.size = tmpl->nr;
6270 ret = insert_cache_extent(extent_cache, &rec->cache);
6275 bytes_used += rec->nr;
6278 rec->crossing_stripes = check_crossing_stripes(global_info,
6279 rec->start, global_info->nodesize);
6280 check_extent_type(rec);
6285 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6287 * - refs - if found, increase refs
6288 * - is_root - if found, set
6289 * - content_checked - if found, set
6290 * - owner_ref_checked - if found, set
6292 * If not found, create a new one, initialize and insert.
6294 static int add_extent_rec(struct cache_tree *extent_cache,
6295 struct extent_record *tmpl)
6297 struct extent_record *rec;
6298 struct cache_extent *cache;
6302 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6304 rec = container_of(cache, struct extent_record, cache);
6308 rec->nr = max(tmpl->nr, tmpl->max_size);
6311 * We need to make sure to reset nr to whatever the extent
6312 * record says was the real size, this way we can compare it to
6315 if (tmpl->found_rec) {
6316 if (tmpl->start != rec->start || rec->found_rec) {
6317 struct extent_record *tmp;
6320 if (list_empty(&rec->list))
6321 list_add_tail(&rec->list,
6322 &duplicate_extents);
6325 * We have to do this song and dance in case we
6326 * find an extent record that falls inside of
6327 * our current extent record but does not have
6328 * the same objectid.
6330 tmp = malloc(sizeof(*tmp));
6333 tmp->start = tmpl->start;
6334 tmp->max_size = tmpl->max_size;
6337 tmp->metadata = tmpl->metadata;
6338 tmp->extent_item_refs = tmpl->extent_item_refs;
6339 INIT_LIST_HEAD(&tmp->list);
6340 list_add_tail(&tmp->list, &rec->dups);
6341 rec->num_duplicates++;
6348 if (tmpl->extent_item_refs && !dup) {
6349 if (rec->extent_item_refs) {
6350 fprintf(stderr, "block %llu rec "
6351 "extent_item_refs %llu, passed %llu\n",
6352 (unsigned long long)tmpl->start,
6353 (unsigned long long)
6354 rec->extent_item_refs,
6355 (unsigned long long)tmpl->extent_item_refs);
6357 rec->extent_item_refs = tmpl->extent_item_refs;
6361 if (tmpl->content_checked)
6362 rec->content_checked = 1;
6363 if (tmpl->owner_ref_checked)
6364 rec->owner_ref_checked = 1;
6365 memcpy(&rec->parent_key, &tmpl->parent_key,
6366 sizeof(tmpl->parent_key));
6367 if (tmpl->parent_generation)
6368 rec->parent_generation = tmpl->parent_generation;
6369 if (rec->max_size < tmpl->max_size)
6370 rec->max_size = tmpl->max_size;
6373 * A metadata extent can't cross stripe_len boundary, otherwise
6374 * kernel scrub won't be able to handle it.
6375 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6379 rec->crossing_stripes = check_crossing_stripes(
6380 global_info, rec->start,
6381 global_info->nodesize);
6382 check_extent_type(rec);
6383 maybe_free_extent_rec(extent_cache, rec);
6387 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6392 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6393 u64 parent, u64 root, int found_ref)
6395 struct extent_record *rec;
6396 struct tree_backref *back;
6397 struct cache_extent *cache;
6399 bool insert = false;
6401 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6403 struct extent_record tmpl;
6405 memset(&tmpl, 0, sizeof(tmpl));
6406 tmpl.start = bytenr;
6411 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6415 /* really a bug in cache_extent implement now */
6416 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6421 rec = container_of(cache, struct extent_record, cache);
6422 if (rec->start != bytenr) {
6424 * Several cause, from unaligned bytenr to over lapping extents
6429 back = find_tree_backref(rec, parent, root);
6431 back = alloc_tree_backref(rec, parent, root);
6438 if (back->node.found_ref) {
6439 fprintf(stderr, "Extent back ref already exists "
6440 "for %llu parent %llu root %llu \n",
6441 (unsigned long long)bytenr,
6442 (unsigned long long)parent,
6443 (unsigned long long)root);
6445 back->node.found_ref = 1;
6447 if (back->node.found_extent_tree) {
6448 fprintf(stderr, "Extent back ref already exists "
6449 "for %llu parent %llu root %llu \n",
6450 (unsigned long long)bytenr,
6451 (unsigned long long)parent,
6452 (unsigned long long)root);
6454 back->node.found_extent_tree = 1;
6457 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6458 compare_extent_backref));
6459 check_extent_type(rec);
6460 maybe_free_extent_rec(extent_cache, rec);
6464 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6465 u64 parent, u64 root, u64 owner, u64 offset,
6466 u32 num_refs, int found_ref, u64 max_size)
6468 struct extent_record *rec;
6469 struct data_backref *back;
6470 struct cache_extent *cache;
6472 bool insert = false;
6474 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6476 struct extent_record tmpl;
6478 memset(&tmpl, 0, sizeof(tmpl));
6479 tmpl.start = bytenr;
6481 tmpl.max_size = max_size;
6483 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6487 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6492 rec = container_of(cache, struct extent_record, cache);
6493 if (rec->max_size < max_size)
6494 rec->max_size = max_size;
6497 * If found_ref is set then max_size is the real size and must match the
6498 * existing refs. So if we have already found a ref then we need to
6499 * make sure that this ref matches the existing one, otherwise we need
6500 * to add a new backref so we can notice that the backrefs don't match
6501 * and we need to figure out who is telling the truth. This is to
6502 * account for that awful fsync bug I introduced where we'd end up with
6503 * a btrfs_file_extent_item that would have its length include multiple
6504 * prealloc extents or point inside of a prealloc extent.
6506 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6509 back = alloc_data_backref(rec, parent, root, owner, offset,
6516 BUG_ON(num_refs != 1);
6517 if (back->node.found_ref)
6518 BUG_ON(back->bytes != max_size);
6519 back->node.found_ref = 1;
6520 back->found_ref += 1;
6521 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6522 back->bytes = max_size;
6523 back->disk_bytenr = bytenr;
6525 /* Need to reinsert if not already in the tree */
6527 rb_erase(&back->node.node, &rec->backref_tree);
6532 rec->content_checked = 1;
6533 rec->owner_ref_checked = 1;
6535 if (back->node.found_extent_tree) {
6536 fprintf(stderr, "Extent back ref already exists "
6537 "for %llu parent %llu root %llu "
6538 "owner %llu offset %llu num_refs %lu\n",
6539 (unsigned long long)bytenr,
6540 (unsigned long long)parent,
6541 (unsigned long long)root,
6542 (unsigned long long)owner,
6543 (unsigned long long)offset,
6544 (unsigned long)num_refs);
6546 back->num_refs = num_refs;
6547 back->node.found_extent_tree = 1;
6550 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6551 compare_extent_backref));
6553 maybe_free_extent_rec(extent_cache, rec);
6557 static int add_pending(struct cache_tree *pending,
6558 struct cache_tree *seen, u64 bytenr, u32 size)
6561 ret = add_cache_extent(seen, bytenr, size);
6564 add_cache_extent(pending, bytenr, size);
6568 static int pick_next_pending(struct cache_tree *pending,
6569 struct cache_tree *reada,
6570 struct cache_tree *nodes,
6571 u64 last, struct block_info *bits, int bits_nr,
6574 unsigned long node_start = last;
6575 struct cache_extent *cache;
6578 cache = search_cache_extent(reada, 0);
6580 bits[0].start = cache->start;
6581 bits[0].size = cache->size;
6586 if (node_start > 32768)
6587 node_start -= 32768;
6589 cache = search_cache_extent(nodes, node_start);
6591 cache = search_cache_extent(nodes, 0);
6594 cache = search_cache_extent(pending, 0);
6599 bits[ret].start = cache->start;
6600 bits[ret].size = cache->size;
6601 cache = next_cache_extent(cache);
6603 } while (cache && ret < bits_nr);
6609 bits[ret].start = cache->start;
6610 bits[ret].size = cache->size;
6611 cache = next_cache_extent(cache);
6613 } while (cache && ret < bits_nr);
6615 if (bits_nr - ret > 8) {
6616 u64 lookup = bits[0].start + bits[0].size;
6617 struct cache_extent *next;
6618 next = search_cache_extent(pending, lookup);
6620 if (next->start - lookup > 32768)
6622 bits[ret].start = next->start;
6623 bits[ret].size = next->size;
6624 lookup = next->start + next->size;
6628 next = next_cache_extent(next);
6636 static void free_chunk_record(struct cache_extent *cache)
6638 struct chunk_record *rec;
6640 rec = container_of(cache, struct chunk_record, cache);
6641 list_del_init(&rec->list);
6642 list_del_init(&rec->dextents);
6646 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6648 cache_tree_free_extents(chunk_cache, free_chunk_record);
6651 static void free_device_record(struct rb_node *node)
6653 struct device_record *rec;
6655 rec = container_of(node, struct device_record, node);
6659 FREE_RB_BASED_TREE(device_cache, free_device_record);
6661 int insert_block_group_record(struct block_group_tree *tree,
6662 struct block_group_record *bg_rec)
6666 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6670 list_add_tail(&bg_rec->list, &tree->block_groups);
6674 static void free_block_group_record(struct cache_extent *cache)
6676 struct block_group_record *rec;
6678 rec = container_of(cache, struct block_group_record, cache);
6679 list_del_init(&rec->list);
6683 void free_block_group_tree(struct block_group_tree *tree)
6685 cache_tree_free_extents(&tree->tree, free_block_group_record);
6688 int insert_device_extent_record(struct device_extent_tree *tree,
6689 struct device_extent_record *de_rec)
6694 * Device extent is a bit different from the other extents, because
6695 * the extents which belong to the different devices may have the
6696 * same start and size, so we need use the special extent cache
6697 * search/insert functions.
6699 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6703 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6704 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6708 static void free_device_extent_record(struct cache_extent *cache)
6710 struct device_extent_record *rec;
6712 rec = container_of(cache, struct device_extent_record, cache);
6713 if (!list_empty(&rec->chunk_list))
6714 list_del_init(&rec->chunk_list);
6715 if (!list_empty(&rec->device_list))
6716 list_del_init(&rec->device_list);
6720 void free_device_extent_tree(struct device_extent_tree *tree)
6722 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6725 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6726 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6727 struct extent_buffer *leaf, int slot)
6729 struct btrfs_extent_ref_v0 *ref0;
6730 struct btrfs_key key;
6733 btrfs_item_key_to_cpu(leaf, &key, slot);
6734 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6735 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6736 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6739 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6740 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6746 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6747 struct btrfs_key *key,
6750 struct btrfs_chunk *ptr;
6751 struct chunk_record *rec;
6754 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6755 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6757 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6759 fprintf(stderr, "memory allocation failed\n");
6763 INIT_LIST_HEAD(&rec->list);
6764 INIT_LIST_HEAD(&rec->dextents);
6767 rec->cache.start = key->offset;
6768 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6770 rec->generation = btrfs_header_generation(leaf);
6772 rec->objectid = key->objectid;
6773 rec->type = key->type;
6774 rec->offset = key->offset;
6776 rec->length = rec->cache.size;
6777 rec->owner = btrfs_chunk_owner(leaf, ptr);
6778 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6779 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6780 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6781 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6782 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6783 rec->num_stripes = num_stripes;
6784 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6786 for (i = 0; i < rec->num_stripes; ++i) {
6787 rec->stripes[i].devid =
6788 btrfs_stripe_devid_nr(leaf, ptr, i);
6789 rec->stripes[i].offset =
6790 btrfs_stripe_offset_nr(leaf, ptr, i);
6791 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6792 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6799 static int process_chunk_item(struct cache_tree *chunk_cache,
6800 struct btrfs_key *key, struct extent_buffer *eb,
6803 struct chunk_record *rec;
6804 struct btrfs_chunk *chunk;
6807 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6809 * Do extra check for this chunk item,
6811 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6812 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6813 * and owner<->key_type check.
6815 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6818 error("chunk(%llu, %llu) is not valid, ignore it",
6819 key->offset, btrfs_chunk_length(eb, chunk));
6822 rec = btrfs_new_chunk_record(eb, key, slot);
6823 ret = insert_cache_extent(chunk_cache, &rec->cache);
6825 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6826 rec->offset, rec->length);
6833 static int process_device_item(struct rb_root *dev_cache,
6834 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6836 struct btrfs_dev_item *ptr;
6837 struct device_record *rec;
6840 ptr = btrfs_item_ptr(eb,
6841 slot, struct btrfs_dev_item);
6843 rec = malloc(sizeof(*rec));
6845 fprintf(stderr, "memory allocation failed\n");
6849 rec->devid = key->offset;
6850 rec->generation = btrfs_header_generation(eb);
6852 rec->objectid = key->objectid;
6853 rec->type = key->type;
6854 rec->offset = key->offset;
6856 rec->devid = btrfs_device_id(eb, ptr);
6857 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6858 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6860 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6862 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6869 struct block_group_record *
6870 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6873 struct btrfs_block_group_item *ptr;
6874 struct block_group_record *rec;
6876 rec = calloc(1, sizeof(*rec));
6878 fprintf(stderr, "memory allocation failed\n");
6882 rec->cache.start = key->objectid;
6883 rec->cache.size = key->offset;
6885 rec->generation = btrfs_header_generation(leaf);
6887 rec->objectid = key->objectid;
6888 rec->type = key->type;
6889 rec->offset = key->offset;
6891 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6892 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6894 INIT_LIST_HEAD(&rec->list);
6899 static int process_block_group_item(struct block_group_tree *block_group_cache,
6900 struct btrfs_key *key,
6901 struct extent_buffer *eb, int slot)
6903 struct block_group_record *rec;
6906 rec = btrfs_new_block_group_record(eb, key, slot);
6907 ret = insert_block_group_record(block_group_cache, rec);
6909 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6910 rec->objectid, rec->offset);
6917 struct device_extent_record *
6918 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6919 struct btrfs_key *key, int slot)
6921 struct device_extent_record *rec;
6922 struct btrfs_dev_extent *ptr;
6924 rec = calloc(1, sizeof(*rec));
6926 fprintf(stderr, "memory allocation failed\n");
6930 rec->cache.objectid = key->objectid;
6931 rec->cache.start = key->offset;
6933 rec->generation = btrfs_header_generation(leaf);
6935 rec->objectid = key->objectid;
6936 rec->type = key->type;
6937 rec->offset = key->offset;
6939 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6940 rec->chunk_objecteid =
6941 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6943 btrfs_dev_extent_chunk_offset(leaf, ptr);
6944 rec->length = btrfs_dev_extent_length(leaf, ptr);
6945 rec->cache.size = rec->length;
6947 INIT_LIST_HEAD(&rec->chunk_list);
6948 INIT_LIST_HEAD(&rec->device_list);
6954 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6955 struct btrfs_key *key, struct extent_buffer *eb,
6958 struct device_extent_record *rec;
6961 rec = btrfs_new_device_extent_record(eb, key, slot);
6962 ret = insert_device_extent_record(dev_extent_cache, rec);
6965 "Device extent[%llu, %llu, %llu] existed.\n",
6966 rec->objectid, rec->offset, rec->length);
6973 static int process_extent_item(struct btrfs_root *root,
6974 struct cache_tree *extent_cache,
6975 struct extent_buffer *eb, int slot)
6977 struct btrfs_extent_item *ei;
6978 struct btrfs_extent_inline_ref *iref;
6979 struct btrfs_extent_data_ref *dref;
6980 struct btrfs_shared_data_ref *sref;
6981 struct btrfs_key key;
6982 struct extent_record tmpl;
6987 u32 item_size = btrfs_item_size_nr(eb, slot);
6993 btrfs_item_key_to_cpu(eb, &key, slot);
6995 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6997 num_bytes = root->fs_info->nodesize;
6999 num_bytes = key.offset;
7002 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7003 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7004 key.objectid, root->fs_info->sectorsize);
7007 if (item_size < sizeof(*ei)) {
7008 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7009 struct btrfs_extent_item_v0 *ei0;
7010 BUG_ON(item_size != sizeof(*ei0));
7011 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7012 refs = btrfs_extent_refs_v0(eb, ei0);
7016 memset(&tmpl, 0, sizeof(tmpl));
7017 tmpl.start = key.objectid;
7018 tmpl.nr = num_bytes;
7019 tmpl.extent_item_refs = refs;
7020 tmpl.metadata = metadata;
7022 tmpl.max_size = num_bytes;
7024 return add_extent_rec(extent_cache, &tmpl);
7027 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7028 refs = btrfs_extent_refs(eb, ei);
7029 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7033 if (metadata && num_bytes != root->fs_info->nodesize) {
7034 error("ignore invalid metadata extent, length %llu does not equal to %u",
7035 num_bytes, root->fs_info->nodesize);
7038 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7039 error("ignore invalid data extent, length %llu is not aligned to %u",
7040 num_bytes, root->fs_info->sectorsize);
7044 memset(&tmpl, 0, sizeof(tmpl));
7045 tmpl.start = key.objectid;
7046 tmpl.nr = num_bytes;
7047 tmpl.extent_item_refs = refs;
7048 tmpl.metadata = metadata;
7050 tmpl.max_size = num_bytes;
7051 add_extent_rec(extent_cache, &tmpl);
7053 ptr = (unsigned long)(ei + 1);
7054 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7055 key.type == BTRFS_EXTENT_ITEM_KEY)
7056 ptr += sizeof(struct btrfs_tree_block_info);
7058 end = (unsigned long)ei + item_size;
7060 iref = (struct btrfs_extent_inline_ref *)ptr;
7061 type = btrfs_extent_inline_ref_type(eb, iref);
7062 offset = btrfs_extent_inline_ref_offset(eb, iref);
7064 case BTRFS_TREE_BLOCK_REF_KEY:
7065 ret = add_tree_backref(extent_cache, key.objectid,
7069 "add_tree_backref failed (extent items tree block): %s",
7072 case BTRFS_SHARED_BLOCK_REF_KEY:
7073 ret = add_tree_backref(extent_cache, key.objectid,
7077 "add_tree_backref failed (extent items shared block): %s",
7080 case BTRFS_EXTENT_DATA_REF_KEY:
7081 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7082 add_data_backref(extent_cache, key.objectid, 0,
7083 btrfs_extent_data_ref_root(eb, dref),
7084 btrfs_extent_data_ref_objectid(eb,
7086 btrfs_extent_data_ref_offset(eb, dref),
7087 btrfs_extent_data_ref_count(eb, dref),
7090 case BTRFS_SHARED_DATA_REF_KEY:
7091 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7092 add_data_backref(extent_cache, key.objectid, offset,
7094 btrfs_shared_data_ref_count(eb, sref),
7098 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7099 key.objectid, key.type, num_bytes);
7102 ptr += btrfs_extent_inline_ref_size(type);
7109 static int check_cache_range(struct btrfs_root *root,
7110 struct btrfs_block_group_cache *cache,
7111 u64 offset, u64 bytes)
7113 struct btrfs_free_space *entry;
7119 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7120 bytenr = btrfs_sb_offset(i);
7121 ret = btrfs_rmap_block(root->fs_info,
7122 cache->key.objectid, bytenr, 0,
7123 &logical, &nr, &stripe_len);
7128 if (logical[nr] + stripe_len <= offset)
7130 if (offset + bytes <= logical[nr])
7132 if (logical[nr] == offset) {
7133 if (stripe_len >= bytes) {
7137 bytes -= stripe_len;
7138 offset += stripe_len;
7139 } else if (logical[nr] < offset) {
7140 if (logical[nr] + stripe_len >=
7145 bytes = (offset + bytes) -
7146 (logical[nr] + stripe_len);
7147 offset = logical[nr] + stripe_len;
7150 * Could be tricky, the super may land in the
7151 * middle of the area we're checking. First
7152 * check the easiest case, it's at the end.
7154 if (logical[nr] + stripe_len >=
7156 bytes = logical[nr] - offset;
7160 /* Check the left side */
7161 ret = check_cache_range(root, cache,
7163 logical[nr] - offset);
7169 /* Now we continue with the right side */
7170 bytes = (offset + bytes) -
7171 (logical[nr] + stripe_len);
7172 offset = logical[nr] + stripe_len;
7179 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7181 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7182 offset, offset+bytes);
7186 if (entry->offset != offset) {
7187 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7192 if (entry->bytes != bytes) {
7193 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7194 bytes, entry->bytes, offset);
7198 unlink_free_space(cache->free_space_ctl, entry);
7203 static int verify_space_cache(struct btrfs_root *root,
7204 struct btrfs_block_group_cache *cache)
7206 struct btrfs_path path;
7207 struct extent_buffer *leaf;
7208 struct btrfs_key key;
7212 root = root->fs_info->extent_root;
7214 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7216 btrfs_init_path(&path);
7217 key.objectid = last;
7219 key.type = BTRFS_EXTENT_ITEM_KEY;
7220 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7225 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7226 ret = btrfs_next_leaf(root, &path);
7234 leaf = path.nodes[0];
7235 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7236 if (key.objectid >= cache->key.offset + cache->key.objectid)
7238 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7239 key.type != BTRFS_METADATA_ITEM_KEY) {
7244 if (last == key.objectid) {
7245 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7246 last = key.objectid + key.offset;
7248 last = key.objectid + root->fs_info->nodesize;
7253 ret = check_cache_range(root, cache, last,
7254 key.objectid - last);
7257 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7258 last = key.objectid + key.offset;
7260 last = key.objectid + root->fs_info->nodesize;
7264 if (last < cache->key.objectid + cache->key.offset)
7265 ret = check_cache_range(root, cache, last,
7266 cache->key.objectid +
7267 cache->key.offset - last);
7270 btrfs_release_path(&path);
7273 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7274 fprintf(stderr, "There are still entries left in the space "
7282 static int check_space_cache(struct btrfs_root *root)
7284 struct btrfs_block_group_cache *cache;
7285 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7289 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7290 btrfs_super_generation(root->fs_info->super_copy) !=
7291 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7292 printf("cache and super generation don't match, space cache "
7293 "will be invalidated\n");
7297 if (ctx.progress_enabled) {
7298 ctx.tp = TASK_FREE_SPACE;
7299 task_start(ctx.info);
7303 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7307 start = cache->key.objectid + cache->key.offset;
7308 if (!cache->free_space_ctl) {
7309 if (btrfs_init_free_space_ctl(cache,
7310 root->fs_info->sectorsize)) {
7315 btrfs_remove_free_space_cache(cache);
7318 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7319 ret = exclude_super_stripes(root, cache);
7321 fprintf(stderr, "could not exclude super stripes: %s\n",
7326 ret = load_free_space_tree(root->fs_info, cache);
7327 free_excluded_extents(root, cache);
7329 fprintf(stderr, "could not load free space tree: %s\n",
7336 ret = load_free_space_cache(root->fs_info, cache);
7341 ret = verify_space_cache(root, cache);
7343 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7344 cache->key.objectid);
7349 task_stop(ctx.info);
7351 return error ? -EINVAL : 0;
7354 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7355 u64 num_bytes, unsigned long leaf_offset,
7356 struct extent_buffer *eb) {
7358 struct btrfs_fs_info *fs_info = root->fs_info;
7360 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7362 unsigned long csum_offset;
7366 u64 data_checked = 0;
7372 if (num_bytes % fs_info->sectorsize)
7375 data = malloc(num_bytes);
7379 while (offset < num_bytes) {
7382 read_len = num_bytes - offset;
7383 /* read as much space once a time */
7384 ret = read_extent_data(fs_info, data + offset,
7385 bytenr + offset, &read_len, mirror);
7389 /* verify every 4k data's checksum */
7390 while (data_checked < read_len) {
7392 tmp = offset + data_checked;
7394 csum = btrfs_csum_data((char *)data + tmp,
7395 csum, fs_info->sectorsize);
7396 btrfs_csum_final(csum, (u8 *)&csum);
7398 csum_offset = leaf_offset +
7399 tmp / fs_info->sectorsize * csum_size;
7400 read_extent_buffer(eb, (char *)&csum_expected,
7401 csum_offset, csum_size);
7402 /* try another mirror */
7403 if (csum != csum_expected) {
7404 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7405 mirror, bytenr + tmp,
7406 csum, csum_expected);
7407 num_copies = btrfs_num_copies(root->fs_info,
7409 if (mirror < num_copies - 1) {
7414 data_checked += fs_info->sectorsize;
7423 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7426 struct btrfs_path path;
7427 struct extent_buffer *leaf;
7428 struct btrfs_key key;
7431 btrfs_init_path(&path);
7432 key.objectid = bytenr;
7433 key.type = BTRFS_EXTENT_ITEM_KEY;
7434 key.offset = (u64)-1;
7437 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7440 fprintf(stderr, "Error looking up extent record %d\n", ret);
7441 btrfs_release_path(&path);
7444 if (path.slots[0] > 0) {
7447 ret = btrfs_prev_leaf(root, &path);
7450 } else if (ret > 0) {
7457 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7460 * Block group items come before extent items if they have the same
7461 * bytenr, so walk back one more just in case. Dear future traveller,
7462 * first congrats on mastering time travel. Now if it's not too much
7463 * trouble could you go back to 2006 and tell Chris to make the
7464 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7465 * EXTENT_ITEM_KEY please?
7467 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7468 if (path.slots[0] > 0) {
7471 ret = btrfs_prev_leaf(root, &path);
7474 } else if (ret > 0) {
7479 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7483 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7484 ret = btrfs_next_leaf(root, &path);
7486 fprintf(stderr, "Error going to next leaf "
7488 btrfs_release_path(&path);
7494 leaf = path.nodes[0];
7495 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7496 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7500 if (key.objectid + key.offset < bytenr) {
7504 if (key.objectid > bytenr + num_bytes)
7507 if (key.objectid == bytenr) {
7508 if (key.offset >= num_bytes) {
7512 num_bytes -= key.offset;
7513 bytenr += key.offset;
7514 } else if (key.objectid < bytenr) {
7515 if (key.objectid + key.offset >= bytenr + num_bytes) {
7519 num_bytes = (bytenr + num_bytes) -
7520 (key.objectid + key.offset);
7521 bytenr = key.objectid + key.offset;
7523 if (key.objectid + key.offset < bytenr + num_bytes) {
7524 u64 new_start = key.objectid + key.offset;
7525 u64 new_bytes = bytenr + num_bytes - new_start;
7528 * Weird case, the extent is in the middle of
7529 * our range, we'll have to search one side
7530 * and then the other. Not sure if this happens
7531 * in real life, but no harm in coding it up
7532 * anyway just in case.
7534 btrfs_release_path(&path);
7535 ret = check_extent_exists(root, new_start,
7538 fprintf(stderr, "Right section didn't "
7542 num_bytes = key.objectid - bytenr;
7545 num_bytes = key.objectid - bytenr;
7552 if (num_bytes && !ret) {
7553 fprintf(stderr, "There are no extents for csum range "
7554 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7558 btrfs_release_path(&path);
7562 static int check_csums(struct btrfs_root *root)
7564 struct btrfs_path path;
7565 struct extent_buffer *leaf;
7566 struct btrfs_key key;
7567 u64 offset = 0, num_bytes = 0;
7568 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7572 unsigned long leaf_offset;
7574 root = root->fs_info->csum_root;
7575 if (!extent_buffer_uptodate(root->node)) {
7576 fprintf(stderr, "No valid csum tree found\n");
7580 btrfs_init_path(&path);
7581 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7582 key.type = BTRFS_EXTENT_CSUM_KEY;
7584 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7586 fprintf(stderr, "Error searching csum tree %d\n", ret);
7587 btrfs_release_path(&path);
7591 if (ret > 0 && path.slots[0])
7596 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7597 ret = btrfs_next_leaf(root, &path);
7599 fprintf(stderr, "Error going to next leaf "
7606 leaf = path.nodes[0];
7608 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7609 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7614 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7615 csum_size) * root->fs_info->sectorsize;
7616 if (!check_data_csum)
7617 goto skip_csum_check;
7618 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7619 ret = check_extent_csums(root, key.offset, data_len,
7625 offset = key.offset;
7626 } else if (key.offset != offset + num_bytes) {
7627 ret = check_extent_exists(root, offset, num_bytes);
7629 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7630 "there is no extent record\n",
7631 offset, offset+num_bytes);
7634 offset = key.offset;
7637 num_bytes += data_len;
7641 btrfs_release_path(&path);
7645 static int is_dropped_key(struct btrfs_key *key,
7646 struct btrfs_key *drop_key) {
7647 if (key->objectid < drop_key->objectid)
7649 else if (key->objectid == drop_key->objectid) {
7650 if (key->type < drop_key->type)
7652 else if (key->type == drop_key->type) {
7653 if (key->offset < drop_key->offset)
7661 * Here are the rules for FULL_BACKREF.
7663 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7664 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7666 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7667 * if it happened after the relocation occurred since we'll have dropped the
7668 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7669 * have no real way to know for sure.
7671 * We process the blocks one root at a time, and we start from the lowest root
7672 * objectid and go to the highest. So we can just lookup the owner backref for
7673 * the record and if we don't find it then we know it doesn't exist and we have
7676 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7677 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7678 * be set or not and then we can check later once we've gathered all the refs.
7680 static int calc_extent_flag(struct cache_tree *extent_cache,
7681 struct extent_buffer *buf,
7682 struct root_item_record *ri,
7685 struct extent_record *rec;
7686 struct cache_extent *cache;
7687 struct tree_backref *tback;
7690 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7691 /* we have added this extent before */
7695 rec = container_of(cache, struct extent_record, cache);
7698 * Except file/reloc tree, we can not have
7701 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7706 if (buf->start == ri->bytenr)
7709 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7712 owner = btrfs_header_owner(buf);
7713 if (owner == ri->objectid)
7716 tback = find_tree_backref(rec, 0, owner);
7721 if (rec->flag_block_full_backref != FLAG_UNSET &&
7722 rec->flag_block_full_backref != 0)
7723 rec->bad_full_backref = 1;
7726 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7727 if (rec->flag_block_full_backref != FLAG_UNSET &&
7728 rec->flag_block_full_backref != 1)
7729 rec->bad_full_backref = 1;
7733 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7735 fprintf(stderr, "Invalid key type(");
7736 print_key_type(stderr, 0, key_type);
7737 fprintf(stderr, ") found in root(");
7738 print_objectid(stderr, rootid, 0);
7739 fprintf(stderr, ")\n");
7743 * Check if the key is valid with its extent buffer.
7745 * This is a early check in case invalid key exists in a extent buffer
7746 * This is not comprehensive yet, but should prevent wrong key/item passed
7749 static int check_type_with_root(u64 rootid, u8 key_type)
7752 /* Only valid in chunk tree */
7753 case BTRFS_DEV_ITEM_KEY:
7754 case BTRFS_CHUNK_ITEM_KEY:
7755 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7758 /* valid in csum and log tree */
7759 case BTRFS_CSUM_TREE_OBJECTID:
7760 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7764 case BTRFS_EXTENT_ITEM_KEY:
7765 case BTRFS_METADATA_ITEM_KEY:
7766 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7767 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7770 case BTRFS_ROOT_ITEM_KEY:
7771 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7774 case BTRFS_DEV_EXTENT_KEY:
7775 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7781 report_mismatch_key_root(key_type, rootid);
7785 static int run_next_block(struct btrfs_root *root,
7786 struct block_info *bits,
7789 struct cache_tree *pending,
7790 struct cache_tree *seen,
7791 struct cache_tree *reada,
7792 struct cache_tree *nodes,
7793 struct cache_tree *extent_cache,
7794 struct cache_tree *chunk_cache,
7795 struct rb_root *dev_cache,
7796 struct block_group_tree *block_group_cache,
7797 struct device_extent_tree *dev_extent_cache,
7798 struct root_item_record *ri)
7800 struct btrfs_fs_info *fs_info = root->fs_info;
7801 struct extent_buffer *buf;
7802 struct extent_record *rec = NULL;
7813 struct btrfs_key key;
7814 struct cache_extent *cache;
7817 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7818 bits_nr, &reada_bits);
7823 for(i = 0; i < nritems; i++) {
7824 ret = add_cache_extent(reada, bits[i].start,
7829 /* fixme, get the parent transid */
7830 readahead_tree_block(fs_info, bits[i].start, 0);
7833 *last = bits[0].start;
7834 bytenr = bits[0].start;
7835 size = bits[0].size;
7837 cache = lookup_cache_extent(pending, bytenr, size);
7839 remove_cache_extent(pending, cache);
7842 cache = lookup_cache_extent(reada, bytenr, size);
7844 remove_cache_extent(reada, cache);
7847 cache = lookup_cache_extent(nodes, bytenr, size);
7849 remove_cache_extent(nodes, cache);
7852 cache = lookup_cache_extent(extent_cache, bytenr, size);
7854 rec = container_of(cache, struct extent_record, cache);
7855 gen = rec->parent_generation;
7858 /* fixme, get the real parent transid */
7859 buf = read_tree_block(root->fs_info, bytenr, gen);
7860 if (!extent_buffer_uptodate(buf)) {
7861 record_bad_block_io(root->fs_info,
7862 extent_cache, bytenr, size);
7866 nritems = btrfs_header_nritems(buf);
7869 if (!init_extent_tree) {
7870 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7871 btrfs_header_level(buf), 1, NULL,
7874 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7876 fprintf(stderr, "Couldn't calc extent flags\n");
7877 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7882 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7884 fprintf(stderr, "Couldn't calc extent flags\n");
7885 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7889 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7891 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7892 ri->objectid == btrfs_header_owner(buf)) {
7894 * Ok we got to this block from it's original owner and
7895 * we have FULL_BACKREF set. Relocation can leave
7896 * converted blocks over so this is altogether possible,
7897 * however it's not possible if the generation > the
7898 * last snapshot, so check for this case.
7900 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7901 btrfs_header_generation(buf) > ri->last_snapshot) {
7902 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7903 rec->bad_full_backref = 1;
7908 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7909 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7910 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7911 rec->bad_full_backref = 1;
7915 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7916 rec->flag_block_full_backref = 1;
7920 rec->flag_block_full_backref = 0;
7922 owner = btrfs_header_owner(buf);
7925 ret = check_block(root, extent_cache, buf, flags);
7929 if (btrfs_is_leaf(buf)) {
7930 btree_space_waste += btrfs_leaf_free_space(root, buf);
7931 for (i = 0; i < nritems; i++) {
7932 struct btrfs_file_extent_item *fi;
7933 btrfs_item_key_to_cpu(buf, &key, i);
7935 * Check key type against the leaf owner.
7936 * Could filter quite a lot of early error if
7939 if (check_type_with_root(btrfs_header_owner(buf),
7941 fprintf(stderr, "ignoring invalid key\n");
7944 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7945 process_extent_item(root, extent_cache, buf,
7949 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7950 process_extent_item(root, extent_cache, buf,
7954 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7956 btrfs_item_size_nr(buf, i);
7959 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7960 process_chunk_item(chunk_cache, &key, buf, i);
7963 if (key.type == BTRFS_DEV_ITEM_KEY) {
7964 process_device_item(dev_cache, &key, buf, i);
7967 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7968 process_block_group_item(block_group_cache,
7972 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7973 process_device_extent_item(dev_extent_cache,
7978 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7979 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7980 process_extent_ref_v0(extent_cache, buf, i);
7987 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7988 ret = add_tree_backref(extent_cache,
7989 key.objectid, 0, key.offset, 0);
7992 "add_tree_backref failed (leaf tree block): %s",
7996 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7997 ret = add_tree_backref(extent_cache,
7998 key.objectid, key.offset, 0, 0);
8001 "add_tree_backref failed (leaf shared block): %s",
8005 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8006 struct btrfs_extent_data_ref *ref;
8007 ref = btrfs_item_ptr(buf, i,
8008 struct btrfs_extent_data_ref);
8009 add_data_backref(extent_cache,
8011 btrfs_extent_data_ref_root(buf, ref),
8012 btrfs_extent_data_ref_objectid(buf,
8014 btrfs_extent_data_ref_offset(buf, ref),
8015 btrfs_extent_data_ref_count(buf, ref),
8016 0, root->fs_info->sectorsize);
8019 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8020 struct btrfs_shared_data_ref *ref;
8021 ref = btrfs_item_ptr(buf, i,
8022 struct btrfs_shared_data_ref);
8023 add_data_backref(extent_cache,
8024 key.objectid, key.offset, 0, 0, 0,
8025 btrfs_shared_data_ref_count(buf, ref),
8026 0, root->fs_info->sectorsize);
8029 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8030 struct bad_item *bad;
8032 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8036 bad = malloc(sizeof(struct bad_item));
8039 INIT_LIST_HEAD(&bad->list);
8040 memcpy(&bad->key, &key,
8041 sizeof(struct btrfs_key));
8042 bad->root_id = owner;
8043 list_add_tail(&bad->list, &delete_items);
8046 if (key.type != BTRFS_EXTENT_DATA_KEY)
8048 fi = btrfs_item_ptr(buf, i,
8049 struct btrfs_file_extent_item);
8050 if (btrfs_file_extent_type(buf, fi) ==
8051 BTRFS_FILE_EXTENT_INLINE)
8053 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8056 data_bytes_allocated +=
8057 btrfs_file_extent_disk_num_bytes(buf, fi);
8058 if (data_bytes_allocated < root->fs_info->sectorsize) {
8061 data_bytes_referenced +=
8062 btrfs_file_extent_num_bytes(buf, fi);
8063 add_data_backref(extent_cache,
8064 btrfs_file_extent_disk_bytenr(buf, fi),
8065 parent, owner, key.objectid, key.offset -
8066 btrfs_file_extent_offset(buf, fi), 1, 1,
8067 btrfs_file_extent_disk_num_bytes(buf, fi));
8071 struct btrfs_key first_key;
8073 first_key.objectid = 0;
8076 btrfs_item_key_to_cpu(buf, &first_key, 0);
8077 level = btrfs_header_level(buf);
8078 for (i = 0; i < nritems; i++) {
8079 struct extent_record tmpl;
8081 ptr = btrfs_node_blockptr(buf, i);
8082 size = root->fs_info->nodesize;
8083 btrfs_node_key_to_cpu(buf, &key, i);
8085 if ((level == ri->drop_level)
8086 && is_dropped_key(&key, &ri->drop_key)) {
8091 memset(&tmpl, 0, sizeof(tmpl));
8092 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8093 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8098 tmpl.max_size = size;
8099 ret = add_extent_rec(extent_cache, &tmpl);
8103 ret = add_tree_backref(extent_cache, ptr, parent,
8107 "add_tree_backref failed (non-leaf block): %s",
8113 add_pending(nodes, seen, ptr, size);
8115 add_pending(pending, seen, ptr, size);
8118 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8119 nritems) * sizeof(struct btrfs_key_ptr);
8121 total_btree_bytes += buf->len;
8122 if (fs_root_objectid(btrfs_header_owner(buf)))
8123 total_fs_tree_bytes += buf->len;
8124 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8125 total_extent_tree_bytes += buf->len;
8127 free_extent_buffer(buf);
8131 static int add_root_to_pending(struct extent_buffer *buf,
8132 struct cache_tree *extent_cache,
8133 struct cache_tree *pending,
8134 struct cache_tree *seen,
8135 struct cache_tree *nodes,
8138 struct extent_record tmpl;
8141 if (btrfs_header_level(buf) > 0)
8142 add_pending(nodes, seen, buf->start, buf->len);
8144 add_pending(pending, seen, buf->start, buf->len);
8146 memset(&tmpl, 0, sizeof(tmpl));
8147 tmpl.start = buf->start;
8152 tmpl.max_size = buf->len;
8153 add_extent_rec(extent_cache, &tmpl);
8155 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8156 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8157 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8160 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8165 /* as we fix the tree, we might be deleting blocks that
8166 * we're tracking for repair. This hook makes sure we
8167 * remove any backrefs for blocks as we are fixing them.
8169 static int free_extent_hook(struct btrfs_trans_handle *trans,
8170 struct btrfs_root *root,
8171 u64 bytenr, u64 num_bytes, u64 parent,
8172 u64 root_objectid, u64 owner, u64 offset,
8175 struct extent_record *rec;
8176 struct cache_extent *cache;
8178 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8180 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8181 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8185 rec = container_of(cache, struct extent_record, cache);
8187 struct data_backref *back;
8188 back = find_data_backref(rec, parent, root_objectid, owner,
8189 offset, 1, bytenr, num_bytes);
8192 if (back->node.found_ref) {
8193 back->found_ref -= refs_to_drop;
8195 rec->refs -= refs_to_drop;
8197 if (back->node.found_extent_tree) {
8198 back->num_refs -= refs_to_drop;
8199 if (rec->extent_item_refs)
8200 rec->extent_item_refs -= refs_to_drop;
8202 if (back->found_ref == 0)
8203 back->node.found_ref = 0;
8204 if (back->num_refs == 0)
8205 back->node.found_extent_tree = 0;
8207 if (!back->node.found_extent_tree && back->node.found_ref) {
8208 rb_erase(&back->node.node, &rec->backref_tree);
8212 struct tree_backref *back;
8213 back = find_tree_backref(rec, parent, root_objectid);
8216 if (back->node.found_ref) {
8219 back->node.found_ref = 0;
8221 if (back->node.found_extent_tree) {
8222 if (rec->extent_item_refs)
8223 rec->extent_item_refs--;
8224 back->node.found_extent_tree = 0;
8226 if (!back->node.found_extent_tree && back->node.found_ref) {
8227 rb_erase(&back->node.node, &rec->backref_tree);
8231 maybe_free_extent_rec(extent_cache, rec);
8236 static int delete_extent_records(struct btrfs_trans_handle *trans,
8237 struct btrfs_root *root,
8238 struct btrfs_path *path,
8241 struct btrfs_key key;
8242 struct btrfs_key found_key;
8243 struct extent_buffer *leaf;
8248 key.objectid = bytenr;
8250 key.offset = (u64)-1;
8253 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8260 if (path->slots[0] == 0)
8266 leaf = path->nodes[0];
8267 slot = path->slots[0];
8269 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8270 if (found_key.objectid != bytenr)
8273 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8274 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8275 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8276 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8277 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8278 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8279 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8280 btrfs_release_path(path);
8281 if (found_key.type == 0) {
8282 if (found_key.offset == 0)
8284 key.offset = found_key.offset - 1;
8285 key.type = found_key.type;
8287 key.type = found_key.type - 1;
8288 key.offset = (u64)-1;
8292 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8293 found_key.objectid, found_key.type, found_key.offset);
8295 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8298 btrfs_release_path(path);
8300 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8301 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8302 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8303 found_key.offset : root->fs_info->nodesize;
8305 ret = btrfs_update_block_group(trans, root, bytenr,
8312 btrfs_release_path(path);
8317 * for a single backref, this will allocate a new extent
8318 * and add the backref to it.
8320 static int record_extent(struct btrfs_trans_handle *trans,
8321 struct btrfs_fs_info *info,
8322 struct btrfs_path *path,
8323 struct extent_record *rec,
8324 struct extent_backref *back,
8325 int allocated, u64 flags)
8328 struct btrfs_root *extent_root = info->extent_root;
8329 struct extent_buffer *leaf;
8330 struct btrfs_key ins_key;
8331 struct btrfs_extent_item *ei;
8332 struct data_backref *dback;
8333 struct btrfs_tree_block_info *bi;
8336 rec->max_size = max_t(u64, rec->max_size,
8340 u32 item_size = sizeof(*ei);
8343 item_size += sizeof(*bi);
8345 ins_key.objectid = rec->start;
8346 ins_key.offset = rec->max_size;
8347 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8349 ret = btrfs_insert_empty_item(trans, extent_root, path,
8350 &ins_key, item_size);
8354 leaf = path->nodes[0];
8355 ei = btrfs_item_ptr(leaf, path->slots[0],
8356 struct btrfs_extent_item);
8358 btrfs_set_extent_refs(leaf, ei, 0);
8359 btrfs_set_extent_generation(leaf, ei, rec->generation);
8361 if (back->is_data) {
8362 btrfs_set_extent_flags(leaf, ei,
8363 BTRFS_EXTENT_FLAG_DATA);
8365 struct btrfs_disk_key copy_key;;
8367 bi = (struct btrfs_tree_block_info *)(ei + 1);
8368 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8371 btrfs_set_disk_key_objectid(©_key,
8372 rec->info_objectid);
8373 btrfs_set_disk_key_type(©_key, 0);
8374 btrfs_set_disk_key_offset(©_key, 0);
8376 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8377 btrfs_set_tree_block_key(leaf, bi, ©_key);
8379 btrfs_set_extent_flags(leaf, ei,
8380 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8383 btrfs_mark_buffer_dirty(leaf);
8384 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8385 rec->max_size, 1, 0);
8388 btrfs_release_path(path);
8391 if (back->is_data) {
8395 dback = to_data_backref(back);
8396 if (back->full_backref)
8397 parent = dback->parent;
8401 for (i = 0; i < dback->found_ref; i++) {
8402 /* if parent != 0, we're doing a full backref
8403 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8404 * just makes the backref allocator create a data
8407 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8408 rec->start, rec->max_size,
8412 BTRFS_FIRST_FREE_OBJECTID :
8418 fprintf(stderr, "adding new data backref"
8419 " on %llu %s %llu owner %llu"
8420 " offset %llu found %d\n",
8421 (unsigned long long)rec->start,
8422 back->full_backref ?
8424 back->full_backref ?
8425 (unsigned long long)parent :
8426 (unsigned long long)dback->root,
8427 (unsigned long long)dback->owner,
8428 (unsigned long long)dback->offset,
8432 struct tree_backref *tback;
8434 tback = to_tree_backref(back);
8435 if (back->full_backref)
8436 parent = tback->parent;
8440 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8441 rec->start, rec->max_size,
8442 parent, tback->root, 0, 0);
8443 fprintf(stderr, "adding new tree backref on "
8444 "start %llu len %llu parent %llu root %llu\n",
8445 rec->start, rec->max_size, parent, tback->root);
8448 btrfs_release_path(path);
8452 static struct extent_entry *find_entry(struct list_head *entries,
8453 u64 bytenr, u64 bytes)
8455 struct extent_entry *entry = NULL;
8457 list_for_each_entry(entry, entries, list) {
8458 if (entry->bytenr == bytenr && entry->bytes == bytes)
8465 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8467 struct extent_entry *entry, *best = NULL, *prev = NULL;
8469 list_for_each_entry(entry, entries, list) {
8471 * If there are as many broken entries as entries then we know
8472 * not to trust this particular entry.
8474 if (entry->broken == entry->count)
8478 * Special case, when there are only two entries and 'best' is
8488 * If our current entry == best then we can't be sure our best
8489 * is really the best, so we need to keep searching.
8491 if (best && best->count == entry->count) {
8497 /* Prev == entry, not good enough, have to keep searching */
8498 if (!prev->broken && prev->count == entry->count)
8502 best = (prev->count > entry->count) ? prev : entry;
8503 else if (best->count < entry->count)
8511 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8512 struct data_backref *dback, struct extent_entry *entry)
8514 struct btrfs_trans_handle *trans;
8515 struct btrfs_root *root;
8516 struct btrfs_file_extent_item *fi;
8517 struct extent_buffer *leaf;
8518 struct btrfs_key key;
8522 key.objectid = dback->root;
8523 key.type = BTRFS_ROOT_ITEM_KEY;
8524 key.offset = (u64)-1;
8525 root = btrfs_read_fs_root(info, &key);
8527 fprintf(stderr, "Couldn't find root for our ref\n");
8532 * The backref points to the original offset of the extent if it was
8533 * split, so we need to search down to the offset we have and then walk
8534 * forward until we find the backref we're looking for.
8536 key.objectid = dback->owner;
8537 key.type = BTRFS_EXTENT_DATA_KEY;
8538 key.offset = dback->offset;
8539 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8541 fprintf(stderr, "Error looking up ref %d\n", ret);
8546 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8547 ret = btrfs_next_leaf(root, path);
8549 fprintf(stderr, "Couldn't find our ref, next\n");
8553 leaf = path->nodes[0];
8554 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8555 if (key.objectid != dback->owner ||
8556 key.type != BTRFS_EXTENT_DATA_KEY) {
8557 fprintf(stderr, "Couldn't find our ref, search\n");
8560 fi = btrfs_item_ptr(leaf, path->slots[0],
8561 struct btrfs_file_extent_item);
8562 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8563 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8565 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8570 btrfs_release_path(path);
8572 trans = btrfs_start_transaction(root, 1);
8574 return PTR_ERR(trans);
8577 * Ok we have the key of the file extent we want to fix, now we can cow
8578 * down to the thing and fix it.
8580 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8582 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8583 key.objectid, key.type, key.offset, ret);
8587 fprintf(stderr, "Well that's odd, we just found this key "
8588 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8593 leaf = path->nodes[0];
8594 fi = btrfs_item_ptr(leaf, path->slots[0],
8595 struct btrfs_file_extent_item);
8597 if (btrfs_file_extent_compression(leaf, fi) &&
8598 dback->disk_bytenr != entry->bytenr) {
8599 fprintf(stderr, "Ref doesn't match the record start and is "
8600 "compressed, please take a btrfs-image of this file "
8601 "system and send it to a btrfs developer so they can "
8602 "complete this functionality for bytenr %Lu\n",
8603 dback->disk_bytenr);
8608 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8609 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8610 } else if (dback->disk_bytenr > entry->bytenr) {
8611 u64 off_diff, offset;
8613 off_diff = dback->disk_bytenr - entry->bytenr;
8614 offset = btrfs_file_extent_offset(leaf, fi);
8615 if (dback->disk_bytenr + offset +
8616 btrfs_file_extent_num_bytes(leaf, fi) >
8617 entry->bytenr + entry->bytes) {
8618 fprintf(stderr, "Ref is past the entry end, please "
8619 "take a btrfs-image of this file system and "
8620 "send it to a btrfs developer, ref %Lu\n",
8621 dback->disk_bytenr);
8626 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8627 btrfs_set_file_extent_offset(leaf, fi, offset);
8628 } else if (dback->disk_bytenr < entry->bytenr) {
8631 offset = btrfs_file_extent_offset(leaf, fi);
8632 if (dback->disk_bytenr + offset < entry->bytenr) {
8633 fprintf(stderr, "Ref is before the entry start, please"
8634 " take a btrfs-image of this file system and "
8635 "send it to a btrfs developer, ref %Lu\n",
8636 dback->disk_bytenr);
8641 offset += dback->disk_bytenr;
8642 offset -= entry->bytenr;
8643 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8644 btrfs_set_file_extent_offset(leaf, fi, offset);
8647 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8650 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8651 * only do this if we aren't using compression, otherwise it's a
8654 if (!btrfs_file_extent_compression(leaf, fi))
8655 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8657 printf("ram bytes may be wrong?\n");
8658 btrfs_mark_buffer_dirty(leaf);
8660 err = btrfs_commit_transaction(trans, root);
8661 btrfs_release_path(path);
8662 return ret ? ret : err;
8665 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8666 struct extent_record *rec)
8668 struct extent_backref *back, *tmp;
8669 struct data_backref *dback;
8670 struct extent_entry *entry, *best = NULL;
8673 int broken_entries = 0;
8678 * Metadata is easy and the backrefs should always agree on bytenr and
8679 * size, if not we've got bigger issues.
8684 rbtree_postorder_for_each_entry_safe(back, tmp,
8685 &rec->backref_tree, node) {
8686 if (back->full_backref || !back->is_data)
8689 dback = to_data_backref(back);
8692 * We only pay attention to backrefs that we found a real
8695 if (dback->found_ref == 0)
8699 * For now we only catch when the bytes don't match, not the
8700 * bytenr. We can easily do this at the same time, but I want
8701 * to have a fs image to test on before we just add repair
8702 * functionality willy-nilly so we know we won't screw up the
8706 entry = find_entry(&entries, dback->disk_bytenr,
8709 entry = malloc(sizeof(struct extent_entry));
8714 memset(entry, 0, sizeof(*entry));
8715 entry->bytenr = dback->disk_bytenr;
8716 entry->bytes = dback->bytes;
8717 list_add_tail(&entry->list, &entries);
8722 * If we only have on entry we may think the entries agree when
8723 * in reality they don't so we have to do some extra checking.
8725 if (dback->disk_bytenr != rec->start ||
8726 dback->bytes != rec->nr || back->broken)
8737 /* Yay all the backrefs agree, carry on good sir */
8738 if (nr_entries <= 1 && !mismatch)
8741 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8742 "%Lu\n", rec->start);
8745 * First we want to see if the backrefs can agree amongst themselves who
8746 * is right, so figure out which one of the entries has the highest
8749 best = find_most_right_entry(&entries);
8752 * Ok so we may have an even split between what the backrefs think, so
8753 * this is where we use the extent ref to see what it thinks.
8756 entry = find_entry(&entries, rec->start, rec->nr);
8757 if (!entry && (!broken_entries || !rec->found_rec)) {
8758 fprintf(stderr, "Backrefs don't agree with each other "
8759 "and extent record doesn't agree with anybody,"
8760 " so we can't fix bytenr %Lu bytes %Lu\n",
8761 rec->start, rec->nr);
8764 } else if (!entry) {
8766 * Ok our backrefs were broken, we'll assume this is the
8767 * correct value and add an entry for this range.
8769 entry = malloc(sizeof(struct extent_entry));
8774 memset(entry, 0, sizeof(*entry));
8775 entry->bytenr = rec->start;
8776 entry->bytes = rec->nr;
8777 list_add_tail(&entry->list, &entries);
8781 best = find_most_right_entry(&entries);
8783 fprintf(stderr, "Backrefs and extent record evenly "
8784 "split on who is right, this is going to "
8785 "require user input to fix bytenr %Lu bytes "
8786 "%Lu\n", rec->start, rec->nr);
8793 * I don't think this can happen currently as we'll abort() if we catch
8794 * this case higher up, but in case somebody removes that we still can't
8795 * deal with it properly here yet, so just bail out of that's the case.
8797 if (best->bytenr != rec->start) {
8798 fprintf(stderr, "Extent start and backref starts don't match, "
8799 "please use btrfs-image on this file system and send "
8800 "it to a btrfs developer so they can make fsck fix "
8801 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8802 rec->start, rec->nr);
8808 * Ok great we all agreed on an extent record, let's go find the real
8809 * references and fix up the ones that don't match.
8811 rbtree_postorder_for_each_entry_safe(back, tmp,
8812 &rec->backref_tree, node) {
8813 if (back->full_backref || !back->is_data)
8816 dback = to_data_backref(back);
8819 * Still ignoring backrefs that don't have a real ref attached
8822 if (dback->found_ref == 0)
8825 if (dback->bytes == best->bytes &&
8826 dback->disk_bytenr == best->bytenr)
8829 ret = repair_ref(info, path, dback, best);
8835 * Ok we messed with the actual refs, which means we need to drop our
8836 * entire cache and go back and rescan. I know this is a huge pain and
8837 * adds a lot of extra work, but it's the only way to be safe. Once all
8838 * the backrefs agree we may not need to do anything to the extent
8843 while (!list_empty(&entries)) {
8844 entry = list_entry(entries.next, struct extent_entry, list);
8845 list_del_init(&entry->list);
8851 static int process_duplicates(struct cache_tree *extent_cache,
8852 struct extent_record *rec)
8854 struct extent_record *good, *tmp;
8855 struct cache_extent *cache;
8859 * If we found a extent record for this extent then return, or if we
8860 * have more than one duplicate we are likely going to need to delete
8863 if (rec->found_rec || rec->num_duplicates > 1)
8866 /* Shouldn't happen but just in case */
8867 BUG_ON(!rec->num_duplicates);
8870 * So this happens if we end up with a backref that doesn't match the
8871 * actual extent entry. So either the backref is bad or the extent
8872 * entry is bad. Either way we want to have the extent_record actually
8873 * reflect what we found in the extent_tree, so we need to take the
8874 * duplicate out and use that as the extent_record since the only way we
8875 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8877 remove_cache_extent(extent_cache, &rec->cache);
8879 good = to_extent_record(rec->dups.next);
8880 list_del_init(&good->list);
8881 INIT_LIST_HEAD(&good->backrefs);
8882 INIT_LIST_HEAD(&good->dups);
8883 good->cache.start = good->start;
8884 good->cache.size = good->nr;
8885 good->content_checked = 0;
8886 good->owner_ref_checked = 0;
8887 good->num_duplicates = 0;
8888 good->refs = rec->refs;
8889 list_splice_init(&rec->backrefs, &good->backrefs);
8891 cache = lookup_cache_extent(extent_cache, good->start,
8895 tmp = container_of(cache, struct extent_record, cache);
8898 * If we find another overlapping extent and it's found_rec is
8899 * set then it's a duplicate and we need to try and delete
8902 if (tmp->found_rec || tmp->num_duplicates > 0) {
8903 if (list_empty(&good->list))
8904 list_add_tail(&good->list,
8905 &duplicate_extents);
8906 good->num_duplicates += tmp->num_duplicates + 1;
8907 list_splice_init(&tmp->dups, &good->dups);
8908 list_del_init(&tmp->list);
8909 list_add_tail(&tmp->list, &good->dups);
8910 remove_cache_extent(extent_cache, &tmp->cache);
8915 * Ok we have another non extent item backed extent rec, so lets
8916 * just add it to this extent and carry on like we did above.
8918 good->refs += tmp->refs;
8919 list_splice_init(&tmp->backrefs, &good->backrefs);
8920 remove_cache_extent(extent_cache, &tmp->cache);
8923 ret = insert_cache_extent(extent_cache, &good->cache);
8926 return good->num_duplicates ? 0 : 1;
8929 static int delete_duplicate_records(struct btrfs_root *root,
8930 struct extent_record *rec)
8932 struct btrfs_trans_handle *trans;
8933 LIST_HEAD(delete_list);
8934 struct btrfs_path path;
8935 struct extent_record *tmp, *good, *n;
8938 struct btrfs_key key;
8940 btrfs_init_path(&path);
8943 /* Find the record that covers all of the duplicates. */
8944 list_for_each_entry(tmp, &rec->dups, list) {
8945 if (good->start < tmp->start)
8947 if (good->nr > tmp->nr)
8950 if (tmp->start + tmp->nr < good->start + good->nr) {
8951 fprintf(stderr, "Ok we have overlapping extents that "
8952 "aren't completely covered by each other, this "
8953 "is going to require more careful thought. "
8954 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8955 tmp->start, tmp->nr, good->start, good->nr);
8962 list_add_tail(&rec->list, &delete_list);
8964 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8967 list_move_tail(&tmp->list, &delete_list);
8970 root = root->fs_info->extent_root;
8971 trans = btrfs_start_transaction(root, 1);
8972 if (IS_ERR(trans)) {
8973 ret = PTR_ERR(trans);
8977 list_for_each_entry(tmp, &delete_list, list) {
8978 if (tmp->found_rec == 0)
8980 key.objectid = tmp->start;
8981 key.type = BTRFS_EXTENT_ITEM_KEY;
8982 key.offset = tmp->nr;
8984 /* Shouldn't happen but just in case */
8985 if (tmp->metadata) {
8986 fprintf(stderr, "Well this shouldn't happen, extent "
8987 "record overlaps but is metadata? "
8988 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8992 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8998 ret = btrfs_del_item(trans, root, &path);
9001 btrfs_release_path(&path);
9004 err = btrfs_commit_transaction(trans, root);
9008 while (!list_empty(&delete_list)) {
9009 tmp = to_extent_record(delete_list.next);
9010 list_del_init(&tmp->list);
9016 while (!list_empty(&rec->dups)) {
9017 tmp = to_extent_record(rec->dups.next);
9018 list_del_init(&tmp->list);
9022 btrfs_release_path(&path);
9024 if (!ret && !nr_del)
9025 rec->num_duplicates = 0;
9027 return ret ? ret : nr_del;
9030 static int find_possible_backrefs(struct btrfs_fs_info *info,
9031 struct btrfs_path *path,
9032 struct cache_tree *extent_cache,
9033 struct extent_record *rec)
9035 struct btrfs_root *root;
9036 struct extent_backref *back, *tmp;
9037 struct data_backref *dback;
9038 struct cache_extent *cache;
9039 struct btrfs_file_extent_item *fi;
9040 struct btrfs_key key;
9044 rbtree_postorder_for_each_entry_safe(back, tmp,
9045 &rec->backref_tree, node) {
9046 /* Don't care about full backrefs (poor unloved backrefs) */
9047 if (back->full_backref || !back->is_data)
9050 dback = to_data_backref(back);
9052 /* We found this one, we don't need to do a lookup */
9053 if (dback->found_ref)
9056 key.objectid = dback->root;
9057 key.type = BTRFS_ROOT_ITEM_KEY;
9058 key.offset = (u64)-1;
9060 root = btrfs_read_fs_root(info, &key);
9062 /* No root, definitely a bad ref, skip */
9063 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9065 /* Other err, exit */
9067 return PTR_ERR(root);
9069 key.objectid = dback->owner;
9070 key.type = BTRFS_EXTENT_DATA_KEY;
9071 key.offset = dback->offset;
9072 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9074 btrfs_release_path(path);
9077 /* Didn't find it, we can carry on */
9082 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9083 struct btrfs_file_extent_item);
9084 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9085 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9086 btrfs_release_path(path);
9087 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9089 struct extent_record *tmp;
9090 tmp = container_of(cache, struct extent_record, cache);
9093 * If we found an extent record for the bytenr for this
9094 * particular backref then we can't add it to our
9095 * current extent record. We only want to add backrefs
9096 * that don't have a corresponding extent item in the
9097 * extent tree since they likely belong to this record
9098 * and we need to fix it if it doesn't match bytenrs.
9104 dback->found_ref += 1;
9105 dback->disk_bytenr = bytenr;
9106 dback->bytes = bytes;
9109 * Set this so the verify backref code knows not to trust the
9110 * values in this backref.
9119 * Record orphan data ref into corresponding root.
9121 * Return 0 if the extent item contains data ref and recorded.
9122 * Return 1 if the extent item contains no useful data ref
9123 * On that case, it may contains only shared_dataref or metadata backref
9124 * or the file extent exists(this should be handled by the extent bytenr
9126 * Return <0 if something goes wrong.
9128 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9129 struct extent_record *rec)
9131 struct btrfs_key key;
9132 struct btrfs_root *dest_root;
9133 struct extent_backref *back, *tmp;
9134 struct data_backref *dback;
9135 struct orphan_data_extent *orphan;
9136 struct btrfs_path path;
9137 int recorded_data_ref = 0;
9142 btrfs_init_path(&path);
9143 rbtree_postorder_for_each_entry_safe(back, tmp,
9144 &rec->backref_tree, node) {
9145 if (back->full_backref || !back->is_data ||
9146 !back->found_extent_tree)
9148 dback = to_data_backref(back);
9149 if (dback->found_ref)
9151 key.objectid = dback->root;
9152 key.type = BTRFS_ROOT_ITEM_KEY;
9153 key.offset = (u64)-1;
9155 dest_root = btrfs_read_fs_root(fs_info, &key);
9157 /* For non-exist root we just skip it */
9158 if (IS_ERR(dest_root) || !dest_root)
9161 key.objectid = dback->owner;
9162 key.type = BTRFS_EXTENT_DATA_KEY;
9163 key.offset = dback->offset;
9165 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9166 btrfs_release_path(&path);
9168 * For ret < 0, it's OK since the fs-tree may be corrupted,
9169 * we need to record it for inode/file extent rebuild.
9170 * For ret > 0, we record it only for file extent rebuild.
9171 * For ret == 0, the file extent exists but only bytenr
9172 * mismatch, let the original bytenr fix routine to handle,
9178 orphan = malloc(sizeof(*orphan));
9183 INIT_LIST_HEAD(&orphan->list);
9184 orphan->root = dback->root;
9185 orphan->objectid = dback->owner;
9186 orphan->offset = dback->offset;
9187 orphan->disk_bytenr = rec->cache.start;
9188 orphan->disk_len = rec->cache.size;
9189 list_add(&dest_root->orphan_data_extents, &orphan->list);
9190 recorded_data_ref = 1;
9193 btrfs_release_path(&path);
9195 return !recorded_data_ref;
9201 * when an incorrect extent item is found, this will delete
9202 * all of the existing entries for it and recreate them
9203 * based on what the tree scan found.
9205 static int fixup_extent_refs(struct btrfs_fs_info *info,
9206 struct cache_tree *extent_cache,
9207 struct extent_record *rec)
9209 struct btrfs_trans_handle *trans = NULL;
9211 struct btrfs_path path;
9212 struct cache_extent *cache;
9213 struct extent_backref *back, *tmp;
9217 if (rec->flag_block_full_backref)
9218 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9220 btrfs_init_path(&path);
9221 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9223 * Sometimes the backrefs themselves are so broken they don't
9224 * get attached to any meaningful rec, so first go back and
9225 * check any of our backrefs that we couldn't find and throw
9226 * them into the list if we find the backref so that
9227 * verify_backrefs can figure out what to do.
9229 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9234 /* step one, make sure all of the backrefs agree */
9235 ret = verify_backrefs(info, &path, rec);
9239 trans = btrfs_start_transaction(info->extent_root, 1);
9240 if (IS_ERR(trans)) {
9241 ret = PTR_ERR(trans);
9245 /* step two, delete all the existing records */
9246 ret = delete_extent_records(trans, info->extent_root, &path,
9252 /* was this block corrupt? If so, don't add references to it */
9253 cache = lookup_cache_extent(info->corrupt_blocks,
9254 rec->start, rec->max_size);
9260 /* step three, recreate all the refs we did find */
9261 rbtree_postorder_for_each_entry_safe(back, tmp,
9262 &rec->backref_tree, node) {
9264 * if we didn't find any references, don't create a
9267 if (!back->found_ref)
9270 rec->bad_full_backref = 0;
9271 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9279 int err = btrfs_commit_transaction(trans, info->extent_root);
9285 fprintf(stderr, "Repaired extent references for %llu\n",
9286 (unsigned long long)rec->start);
9288 btrfs_release_path(&path);
9292 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9293 struct extent_record *rec)
9295 struct btrfs_trans_handle *trans;
9296 struct btrfs_root *root = fs_info->extent_root;
9297 struct btrfs_path path;
9298 struct btrfs_extent_item *ei;
9299 struct btrfs_key key;
9303 key.objectid = rec->start;
9304 if (rec->metadata) {
9305 key.type = BTRFS_METADATA_ITEM_KEY;
9306 key.offset = rec->info_level;
9308 key.type = BTRFS_EXTENT_ITEM_KEY;
9309 key.offset = rec->max_size;
9312 trans = btrfs_start_transaction(root, 0);
9314 return PTR_ERR(trans);
9316 btrfs_init_path(&path);
9317 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9319 btrfs_release_path(&path);
9320 btrfs_commit_transaction(trans, root);
9323 fprintf(stderr, "Didn't find extent for %llu\n",
9324 (unsigned long long)rec->start);
9325 btrfs_release_path(&path);
9326 btrfs_commit_transaction(trans, root);
9330 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9331 struct btrfs_extent_item);
9332 flags = btrfs_extent_flags(path.nodes[0], ei);
9333 if (rec->flag_block_full_backref) {
9334 fprintf(stderr, "setting full backref on %llu\n",
9335 (unsigned long long)key.objectid);
9336 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9338 fprintf(stderr, "clearing full backref on %llu\n",
9339 (unsigned long long)key.objectid);
9340 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9342 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9343 btrfs_mark_buffer_dirty(path.nodes[0]);
9344 btrfs_release_path(&path);
9345 ret = btrfs_commit_transaction(trans, root);
9347 fprintf(stderr, "Repaired extent flags for %llu\n",
9348 (unsigned long long)rec->start);
9353 /* right now we only prune from the extent allocation tree */
9354 static int prune_one_block(struct btrfs_trans_handle *trans,
9355 struct btrfs_fs_info *info,
9356 struct btrfs_corrupt_block *corrupt)
9359 struct btrfs_path path;
9360 struct extent_buffer *eb;
9364 int level = corrupt->level + 1;
9366 btrfs_init_path(&path);
9368 /* we want to stop at the parent to our busted block */
9369 path.lowest_level = level;
9371 ret = btrfs_search_slot(trans, info->extent_root,
9372 &corrupt->key, &path, -1, 1);
9377 eb = path.nodes[level];
9384 * hopefully the search gave us the block we want to prune,
9385 * lets try that first
9387 slot = path.slots[level];
9388 found = btrfs_node_blockptr(eb, slot);
9389 if (found == corrupt->cache.start)
9392 nritems = btrfs_header_nritems(eb);
9394 /* the search failed, lets scan this node and hope we find it */
9395 for (slot = 0; slot < nritems; slot++) {
9396 found = btrfs_node_blockptr(eb, slot);
9397 if (found == corrupt->cache.start)
9401 * we couldn't find the bad block. TODO, search all the nodes for pointers
9404 if (eb == info->extent_root->node) {
9409 btrfs_release_path(&path);
9414 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9415 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9418 btrfs_release_path(&path);
9422 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9424 struct btrfs_trans_handle *trans = NULL;
9425 struct cache_extent *cache;
9426 struct btrfs_corrupt_block *corrupt;
9429 cache = search_cache_extent(info->corrupt_blocks, 0);
9433 trans = btrfs_start_transaction(info->extent_root, 1);
9435 return PTR_ERR(trans);
9437 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9438 prune_one_block(trans, info, corrupt);
9439 remove_cache_extent(info->corrupt_blocks, cache);
9442 return btrfs_commit_transaction(trans, info->extent_root);
9446 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9448 struct btrfs_block_group_cache *cache;
9453 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9454 &start, &end, EXTENT_DIRTY);
9457 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9462 cache = btrfs_lookup_first_block_group(fs_info, start);
9467 start = cache->key.objectid + cache->key.offset;
9471 static int check_extent_refs(struct btrfs_root *root,
9472 struct cache_tree *extent_cache)
9474 struct extent_record *rec;
9475 struct cache_extent *cache;
9481 * if we're doing a repair, we have to make sure
9482 * we don't allocate from the problem extents.
9483 * In the worst case, this will be all the
9486 cache = search_cache_extent(extent_cache, 0);
9488 rec = container_of(cache, struct extent_record, cache);
9489 set_extent_dirty(root->fs_info->excluded_extents,
9491 rec->start + rec->max_size - 1);
9492 cache = next_cache_extent(cache);
9495 /* pin down all the corrupted blocks too */
9496 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9498 set_extent_dirty(root->fs_info->excluded_extents,
9500 cache->start + cache->size - 1);
9501 cache = next_cache_extent(cache);
9503 prune_corrupt_blocks(root->fs_info);
9504 reset_cached_block_groups(root->fs_info);
9507 reset_cached_block_groups(root->fs_info);
9510 * We need to delete any duplicate entries we find first otherwise we
9511 * could mess up the extent tree when we have backrefs that actually
9512 * belong to a different extent item and not the weird duplicate one.
9514 while (repair && !list_empty(&duplicate_extents)) {
9515 rec = to_extent_record(duplicate_extents.next);
9516 list_del_init(&rec->list);
9518 /* Sometimes we can find a backref before we find an actual
9519 * extent, so we need to process it a little bit to see if there
9520 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9521 * if this is a backref screwup. If we need to delete stuff
9522 * process_duplicates() will return 0, otherwise it will return
9525 if (process_duplicates(extent_cache, rec))
9527 ret = delete_duplicate_records(root, rec);
9531 * delete_duplicate_records will return the number of entries
9532 * deleted, so if it's greater than 0 then we know we actually
9533 * did something and we need to remove.
9546 cache = search_cache_extent(extent_cache, 0);
9549 rec = container_of(cache, struct extent_record, cache);
9550 if (rec->num_duplicates) {
9551 fprintf(stderr, "extent item %llu has multiple extent "
9552 "items\n", (unsigned long long)rec->start);
9556 if (rec->refs != rec->extent_item_refs) {
9557 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9558 (unsigned long long)rec->start,
9559 (unsigned long long)rec->nr);
9560 fprintf(stderr, "extent item %llu, found %llu\n",
9561 (unsigned long long)rec->extent_item_refs,
9562 (unsigned long long)rec->refs);
9563 ret = record_orphan_data_extents(root->fs_info, rec);
9569 if (all_backpointers_checked(rec, 1)) {
9570 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9571 (unsigned long long)rec->start,
9572 (unsigned long long)rec->nr);
9576 if (!rec->owner_ref_checked) {
9577 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9578 (unsigned long long)rec->start,
9579 (unsigned long long)rec->nr);
9584 if (repair && fix) {
9585 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9591 if (rec->bad_full_backref) {
9592 fprintf(stderr, "bad full backref, on [%llu]\n",
9593 (unsigned long long)rec->start);
9595 ret = fixup_extent_flags(root->fs_info, rec);
9603 * Although it's not a extent ref's problem, we reuse this
9604 * routine for error reporting.
9605 * No repair function yet.
9607 if (rec->crossing_stripes) {
9609 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9610 rec->start, rec->start + rec->max_size);
9614 if (rec->wrong_chunk_type) {
9616 "bad extent [%llu, %llu), type mismatch with chunk\n",
9617 rec->start, rec->start + rec->max_size);
9621 remove_cache_extent(extent_cache, cache);
9622 free_all_extent_backrefs(rec);
9623 if (!init_extent_tree && repair && (!cur_err || fix))
9624 clear_extent_dirty(root->fs_info->excluded_extents,
9626 rec->start + rec->max_size - 1);
9631 if (ret && ret != -EAGAIN) {
9632 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9635 struct btrfs_trans_handle *trans;
9637 root = root->fs_info->extent_root;
9638 trans = btrfs_start_transaction(root, 1);
9639 if (IS_ERR(trans)) {
9640 ret = PTR_ERR(trans);
9644 ret = btrfs_fix_block_accounting(trans, root);
9647 ret = btrfs_commit_transaction(trans, root);
9656 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9660 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9661 stripe_size = length;
9662 stripe_size /= num_stripes;
9663 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9664 stripe_size = length * 2;
9665 stripe_size /= num_stripes;
9666 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9667 stripe_size = length;
9668 stripe_size /= (num_stripes - 1);
9669 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9670 stripe_size = length;
9671 stripe_size /= (num_stripes - 2);
9673 stripe_size = length;
9679 * Check the chunk with its block group/dev list ref:
9680 * Return 0 if all refs seems valid.
9681 * Return 1 if part of refs seems valid, need later check for rebuild ref
9682 * like missing block group and needs to search extent tree to rebuild them.
9683 * Return -1 if essential refs are missing and unable to rebuild.
9685 static int check_chunk_refs(struct chunk_record *chunk_rec,
9686 struct block_group_tree *block_group_cache,
9687 struct device_extent_tree *dev_extent_cache,
9690 struct cache_extent *block_group_item;
9691 struct block_group_record *block_group_rec;
9692 struct cache_extent *dev_extent_item;
9693 struct device_extent_record *dev_extent_rec;
9697 int metadump_v2 = 0;
9701 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9704 if (block_group_item) {
9705 block_group_rec = container_of(block_group_item,
9706 struct block_group_record,
9708 if (chunk_rec->length != block_group_rec->offset ||
9709 chunk_rec->offset != block_group_rec->objectid ||
9711 chunk_rec->type_flags != block_group_rec->flags)) {
9714 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9715 chunk_rec->objectid,
9720 chunk_rec->type_flags,
9721 block_group_rec->objectid,
9722 block_group_rec->type,
9723 block_group_rec->offset,
9724 block_group_rec->offset,
9725 block_group_rec->objectid,
9726 block_group_rec->flags);
9729 list_del_init(&block_group_rec->list);
9730 chunk_rec->bg_rec = block_group_rec;
9735 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9736 chunk_rec->objectid,
9741 chunk_rec->type_flags);
9748 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9749 chunk_rec->num_stripes);
9750 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9751 devid = chunk_rec->stripes[i].devid;
9752 offset = chunk_rec->stripes[i].offset;
9753 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9754 devid, offset, length);
9755 if (dev_extent_item) {
9756 dev_extent_rec = container_of(dev_extent_item,
9757 struct device_extent_record,
9759 if (dev_extent_rec->objectid != devid ||
9760 dev_extent_rec->offset != offset ||
9761 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9762 dev_extent_rec->length != length) {
9765 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9766 chunk_rec->objectid,
9769 chunk_rec->stripes[i].devid,
9770 chunk_rec->stripes[i].offset,
9771 dev_extent_rec->objectid,
9772 dev_extent_rec->offset,
9773 dev_extent_rec->length);
9776 list_move(&dev_extent_rec->chunk_list,
9777 &chunk_rec->dextents);
9782 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9783 chunk_rec->objectid,
9786 chunk_rec->stripes[i].devid,
9787 chunk_rec->stripes[i].offset);
9794 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9795 int check_chunks(struct cache_tree *chunk_cache,
9796 struct block_group_tree *block_group_cache,
9797 struct device_extent_tree *dev_extent_cache,
9798 struct list_head *good, struct list_head *bad,
9799 struct list_head *rebuild, int silent)
9801 struct cache_extent *chunk_item;
9802 struct chunk_record *chunk_rec;
9803 struct block_group_record *bg_rec;
9804 struct device_extent_record *dext_rec;
9808 chunk_item = first_cache_extent(chunk_cache);
9809 while (chunk_item) {
9810 chunk_rec = container_of(chunk_item, struct chunk_record,
9812 err = check_chunk_refs(chunk_rec, block_group_cache,
9813 dev_extent_cache, silent);
9816 if (err == 0 && good)
9817 list_add_tail(&chunk_rec->list, good);
9818 if (err > 0 && rebuild)
9819 list_add_tail(&chunk_rec->list, rebuild);
9821 list_add_tail(&chunk_rec->list, bad);
9822 chunk_item = next_cache_extent(chunk_item);
9825 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9828 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9836 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9840 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9851 static int check_device_used(struct device_record *dev_rec,
9852 struct device_extent_tree *dext_cache)
9854 struct cache_extent *cache;
9855 struct device_extent_record *dev_extent_rec;
9858 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9860 dev_extent_rec = container_of(cache,
9861 struct device_extent_record,
9863 if (dev_extent_rec->objectid != dev_rec->devid)
9866 list_del_init(&dev_extent_rec->device_list);
9867 total_byte += dev_extent_rec->length;
9868 cache = next_cache_extent(cache);
9871 if (total_byte != dev_rec->byte_used) {
9873 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9874 total_byte, dev_rec->byte_used, dev_rec->objectid,
9875 dev_rec->type, dev_rec->offset);
9882 /* check btrfs_dev_item -> btrfs_dev_extent */
9883 static int check_devices(struct rb_root *dev_cache,
9884 struct device_extent_tree *dev_extent_cache)
9886 struct rb_node *dev_node;
9887 struct device_record *dev_rec;
9888 struct device_extent_record *dext_rec;
9892 dev_node = rb_first(dev_cache);
9894 dev_rec = container_of(dev_node, struct device_record, node);
9895 err = check_device_used(dev_rec, dev_extent_cache);
9899 dev_node = rb_next(dev_node);
9901 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9904 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9905 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9912 static int add_root_item_to_list(struct list_head *head,
9913 u64 objectid, u64 bytenr, u64 last_snapshot,
9914 u8 level, u8 drop_level,
9915 struct btrfs_key *drop_key)
9918 struct root_item_record *ri_rec;
9919 ri_rec = malloc(sizeof(*ri_rec));
9922 ri_rec->bytenr = bytenr;
9923 ri_rec->objectid = objectid;
9924 ri_rec->level = level;
9925 ri_rec->drop_level = drop_level;
9926 ri_rec->last_snapshot = last_snapshot;
9928 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9929 list_add_tail(&ri_rec->list, head);
9934 static void free_root_item_list(struct list_head *list)
9936 struct root_item_record *ri_rec;
9938 while (!list_empty(list)) {
9939 ri_rec = list_first_entry(list, struct root_item_record,
9941 list_del_init(&ri_rec->list);
9946 static int deal_root_from_list(struct list_head *list,
9947 struct btrfs_root *root,
9948 struct block_info *bits,
9950 struct cache_tree *pending,
9951 struct cache_tree *seen,
9952 struct cache_tree *reada,
9953 struct cache_tree *nodes,
9954 struct cache_tree *extent_cache,
9955 struct cache_tree *chunk_cache,
9956 struct rb_root *dev_cache,
9957 struct block_group_tree *block_group_cache,
9958 struct device_extent_tree *dev_extent_cache)
9963 while (!list_empty(list)) {
9964 struct root_item_record *rec;
9965 struct extent_buffer *buf;
9966 rec = list_entry(list->next,
9967 struct root_item_record, list);
9969 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9970 if (!extent_buffer_uptodate(buf)) {
9971 free_extent_buffer(buf);
9975 ret = add_root_to_pending(buf, extent_cache, pending,
9976 seen, nodes, rec->objectid);
9980 * To rebuild extent tree, we need deal with snapshot
9981 * one by one, otherwise we deal with node firstly which
9982 * can maximize readahead.
9985 ret = run_next_block(root, bits, bits_nr, &last,
9986 pending, seen, reada, nodes,
9987 extent_cache, chunk_cache,
9988 dev_cache, block_group_cache,
9989 dev_extent_cache, rec);
9993 free_extent_buffer(buf);
9994 list_del(&rec->list);
10000 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10001 reada, nodes, extent_cache, chunk_cache,
10002 dev_cache, block_group_cache,
10003 dev_extent_cache, NULL);
10013 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10015 struct rb_root dev_cache;
10016 struct cache_tree chunk_cache;
10017 struct block_group_tree block_group_cache;
10018 struct device_extent_tree dev_extent_cache;
10019 struct cache_tree extent_cache;
10020 struct cache_tree seen;
10021 struct cache_tree pending;
10022 struct cache_tree reada;
10023 struct cache_tree nodes;
10024 struct extent_io_tree excluded_extents;
10025 struct cache_tree corrupt_blocks;
10026 struct btrfs_path path;
10027 struct btrfs_key key;
10028 struct btrfs_key found_key;
10030 struct block_info *bits;
10032 struct extent_buffer *leaf;
10034 struct btrfs_root_item ri;
10035 struct list_head dropping_trees;
10036 struct list_head normal_trees;
10037 struct btrfs_root *root1;
10038 struct btrfs_root *root;
10042 root = fs_info->fs_root;
10043 dev_cache = RB_ROOT;
10044 cache_tree_init(&chunk_cache);
10045 block_group_tree_init(&block_group_cache);
10046 device_extent_tree_init(&dev_extent_cache);
10048 cache_tree_init(&extent_cache);
10049 cache_tree_init(&seen);
10050 cache_tree_init(&pending);
10051 cache_tree_init(&nodes);
10052 cache_tree_init(&reada);
10053 cache_tree_init(&corrupt_blocks);
10054 extent_io_tree_init(&excluded_extents);
10055 INIT_LIST_HEAD(&dropping_trees);
10056 INIT_LIST_HEAD(&normal_trees);
10059 fs_info->excluded_extents = &excluded_extents;
10060 fs_info->fsck_extent_cache = &extent_cache;
10061 fs_info->free_extent_hook = free_extent_hook;
10062 fs_info->corrupt_blocks = &corrupt_blocks;
10066 bits = malloc(bits_nr * sizeof(struct block_info));
10072 if (ctx.progress_enabled) {
10073 ctx.tp = TASK_EXTENTS;
10074 task_start(ctx.info);
10078 root1 = fs_info->tree_root;
10079 level = btrfs_header_level(root1->node);
10080 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10081 root1->node->start, 0, level, 0, NULL);
10084 root1 = fs_info->chunk_root;
10085 level = btrfs_header_level(root1->node);
10086 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10087 root1->node->start, 0, level, 0, NULL);
10090 btrfs_init_path(&path);
10093 key.type = BTRFS_ROOT_ITEM_KEY;
10094 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10098 leaf = path.nodes[0];
10099 slot = path.slots[0];
10100 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10101 ret = btrfs_next_leaf(root, &path);
10104 leaf = path.nodes[0];
10105 slot = path.slots[0];
10107 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10108 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10109 unsigned long offset;
10112 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10113 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10114 last_snapshot = btrfs_root_last_snapshot(&ri);
10115 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10116 level = btrfs_root_level(&ri);
10117 ret = add_root_item_to_list(&normal_trees,
10118 found_key.objectid,
10119 btrfs_root_bytenr(&ri),
10120 last_snapshot, level,
10125 level = btrfs_root_level(&ri);
10126 objectid = found_key.objectid;
10127 btrfs_disk_key_to_cpu(&found_key,
10128 &ri.drop_progress);
10129 ret = add_root_item_to_list(&dropping_trees,
10131 btrfs_root_bytenr(&ri),
10132 last_snapshot, level,
10133 ri.drop_level, &found_key);
10140 btrfs_release_path(&path);
10143 * check_block can return -EAGAIN if it fixes something, please keep
10144 * this in mind when dealing with return values from these functions, if
10145 * we get -EAGAIN we want to fall through and restart the loop.
10147 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10148 &seen, &reada, &nodes, &extent_cache,
10149 &chunk_cache, &dev_cache, &block_group_cache,
10150 &dev_extent_cache);
10152 if (ret == -EAGAIN)
10156 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10157 &pending, &seen, &reada, &nodes,
10158 &extent_cache, &chunk_cache, &dev_cache,
10159 &block_group_cache, &dev_extent_cache);
10161 if (ret == -EAGAIN)
10166 ret = check_chunks(&chunk_cache, &block_group_cache,
10167 &dev_extent_cache, NULL, NULL, NULL, 0);
10169 if (ret == -EAGAIN)
10174 ret = check_extent_refs(root, &extent_cache);
10176 if (ret == -EAGAIN)
10181 ret = check_devices(&dev_cache, &dev_extent_cache);
10186 task_stop(ctx.info);
10188 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10189 extent_io_tree_cleanup(&excluded_extents);
10190 fs_info->fsck_extent_cache = NULL;
10191 fs_info->free_extent_hook = NULL;
10192 fs_info->corrupt_blocks = NULL;
10193 fs_info->excluded_extents = NULL;
10196 free_chunk_cache_tree(&chunk_cache);
10197 free_device_cache_tree(&dev_cache);
10198 free_block_group_tree(&block_group_cache);
10199 free_device_extent_tree(&dev_extent_cache);
10200 free_extent_cache_tree(&seen);
10201 free_extent_cache_tree(&pending);
10202 free_extent_cache_tree(&reada);
10203 free_extent_cache_tree(&nodes);
10204 free_root_item_list(&normal_trees);
10205 free_root_item_list(&dropping_trees);
10208 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10209 free_extent_cache_tree(&seen);
10210 free_extent_cache_tree(&pending);
10211 free_extent_cache_tree(&reada);
10212 free_extent_cache_tree(&nodes);
10213 free_chunk_cache_tree(&chunk_cache);
10214 free_block_group_tree(&block_group_cache);
10215 free_device_cache_tree(&dev_cache);
10216 free_device_extent_tree(&dev_extent_cache);
10217 free_extent_record_cache(&extent_cache);
10218 free_root_item_list(&normal_trees);
10219 free_root_item_list(&dropping_trees);
10220 extent_io_tree_cleanup(&excluded_extents);
10225 * Check backrefs of a tree block given by @bytenr or @eb.
10227 * @root: the root containing the @bytenr or @eb
10228 * @eb: tree block extent buffer, can be NULL
10229 * @bytenr: bytenr of the tree block to search
10230 * @level: tree level of the tree block
10231 * @owner: owner of the tree block
10233 * Return >0 for any error found and output error message
10234 * Return 0 for no error found
10236 static int check_tree_block_ref(struct btrfs_root *root,
10237 struct extent_buffer *eb, u64 bytenr,
10238 int level, u64 owner)
10240 struct btrfs_key key;
10241 struct btrfs_root *extent_root = root->fs_info->extent_root;
10242 struct btrfs_path path;
10243 struct btrfs_extent_item *ei;
10244 struct btrfs_extent_inline_ref *iref;
10245 struct extent_buffer *leaf;
10251 u32 nodesize = root->fs_info->nodesize;
10254 int tree_reloc_root = 0;
10259 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10260 btrfs_header_bytenr(root->node) == bytenr)
10261 tree_reloc_root = 1;
10263 btrfs_init_path(&path);
10264 key.objectid = bytenr;
10265 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10266 key.type = BTRFS_METADATA_ITEM_KEY;
10268 key.type = BTRFS_EXTENT_ITEM_KEY;
10269 key.offset = (u64)-1;
10271 /* Search for the backref in extent tree */
10272 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10274 err |= BACKREF_MISSING;
10277 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10279 err |= BACKREF_MISSING;
10283 leaf = path.nodes[0];
10284 slot = path.slots[0];
10285 btrfs_item_key_to_cpu(leaf, &key, slot);
10287 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10289 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10290 skinny_level = (int)key.offset;
10291 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10293 struct btrfs_tree_block_info *info;
10295 info = (struct btrfs_tree_block_info *)(ei + 1);
10296 skinny_level = btrfs_tree_block_level(leaf, info);
10297 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10304 if (!(btrfs_extent_flags(leaf, ei) &
10305 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10307 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10308 key.objectid, nodesize,
10309 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10310 err = BACKREF_MISMATCH;
10312 header_gen = btrfs_header_generation(eb);
10313 extent_gen = btrfs_extent_generation(leaf, ei);
10314 if (header_gen != extent_gen) {
10316 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10317 key.objectid, nodesize, header_gen,
10319 err = BACKREF_MISMATCH;
10321 if (level != skinny_level) {
10323 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10324 key.objectid, nodesize, level, skinny_level);
10325 err = BACKREF_MISMATCH;
10327 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10329 "extent[%llu %u] is referred by other roots than %llu",
10330 key.objectid, nodesize, root->objectid);
10331 err = BACKREF_MISMATCH;
10336 * Iterate the extent/metadata item to find the exact backref
10338 item_size = btrfs_item_size_nr(leaf, slot);
10339 ptr = (unsigned long)iref;
10340 end = (unsigned long)ei + item_size;
10341 while (ptr < end) {
10342 iref = (struct btrfs_extent_inline_ref *)ptr;
10343 type = btrfs_extent_inline_ref_type(leaf, iref);
10344 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10346 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10347 (offset == root->objectid || offset == owner)) {
10349 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10351 * Backref of tree reloc root points to itself, no need
10352 * to check backref any more.
10354 if (tree_reloc_root)
10357 /* Check if the backref points to valid referencer */
10358 found_ref = !check_tree_block_ref(root, NULL,
10359 offset, level + 1, owner);
10364 ptr += btrfs_extent_inline_ref_size(type);
10368 * Inlined extent item doesn't have what we need, check
10369 * TREE_BLOCK_REF_KEY
10372 btrfs_release_path(&path);
10373 key.objectid = bytenr;
10374 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10375 key.offset = root->objectid;
10377 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10382 err |= BACKREF_MISSING;
10384 btrfs_release_path(&path);
10385 if (eb && (err & BACKREF_MISSING))
10386 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10387 bytenr, nodesize, owner, level);
10392 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10394 * Return >0 any error found and output error message
10395 * Return 0 for no error found
10397 static int check_extent_data_item(struct btrfs_root *root,
10398 struct extent_buffer *eb, int slot)
10400 struct btrfs_file_extent_item *fi;
10401 struct btrfs_path path;
10402 struct btrfs_root *extent_root = root->fs_info->extent_root;
10403 struct btrfs_key fi_key;
10404 struct btrfs_key dbref_key;
10405 struct extent_buffer *leaf;
10406 struct btrfs_extent_item *ei;
10407 struct btrfs_extent_inline_ref *iref;
10408 struct btrfs_extent_data_ref *dref;
10411 u64 disk_num_bytes;
10412 u64 extent_num_bytes;
10419 int found_dbackref = 0;
10423 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10424 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10426 /* Nothing to check for hole and inline data extents */
10427 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10428 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10431 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10432 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10433 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10435 /* Check unaligned disk_num_bytes and num_bytes */
10436 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10438 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10439 fi_key.objectid, fi_key.offset, disk_num_bytes,
10440 root->fs_info->sectorsize);
10441 err |= BYTES_UNALIGNED;
10443 data_bytes_allocated += disk_num_bytes;
10445 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10447 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10448 fi_key.objectid, fi_key.offset, extent_num_bytes,
10449 root->fs_info->sectorsize);
10450 err |= BYTES_UNALIGNED;
10452 data_bytes_referenced += extent_num_bytes;
10454 owner = btrfs_header_owner(eb);
10456 /* Check the extent item of the file extent in extent tree */
10457 btrfs_init_path(&path);
10458 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10459 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10460 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10462 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10466 leaf = path.nodes[0];
10467 slot = path.slots[0];
10468 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10470 extent_flags = btrfs_extent_flags(leaf, ei);
10472 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10474 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10475 disk_bytenr, disk_num_bytes,
10476 BTRFS_EXTENT_FLAG_DATA);
10477 err |= BACKREF_MISMATCH;
10480 /* Check data backref inside that extent item */
10481 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10482 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10483 ptr = (unsigned long)iref;
10484 end = (unsigned long)ei + item_size;
10485 while (ptr < end) {
10486 iref = (struct btrfs_extent_inline_ref *)ptr;
10487 type = btrfs_extent_inline_ref_type(leaf, iref);
10488 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10490 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10491 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10492 if (ref_root == owner || ref_root == root->objectid)
10493 found_dbackref = 1;
10494 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10495 found_dbackref = !check_tree_block_ref(root, NULL,
10496 btrfs_extent_inline_ref_offset(leaf, iref),
10500 if (found_dbackref)
10502 ptr += btrfs_extent_inline_ref_size(type);
10505 if (!found_dbackref) {
10506 btrfs_release_path(&path);
10508 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10509 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10510 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10511 dbref_key.offset = hash_extent_data_ref(root->objectid,
10512 fi_key.objectid, fi_key.offset);
10514 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10515 &dbref_key, &path, 0, 0);
10517 found_dbackref = 1;
10521 btrfs_release_path(&path);
10524 * Neither inlined nor EXTENT_DATA_REF found, try
10525 * SHARED_DATA_REF as last chance.
10527 dbref_key.objectid = disk_bytenr;
10528 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10529 dbref_key.offset = eb->start;
10531 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10532 &dbref_key, &path, 0, 0);
10534 found_dbackref = 1;
10540 if (!found_dbackref)
10541 err |= BACKREF_MISSING;
10542 btrfs_release_path(&path);
10543 if (err & BACKREF_MISSING) {
10544 error("data extent[%llu %llu] backref lost",
10545 disk_bytenr, disk_num_bytes);
10551 * Get real tree block level for the case like shared block
10552 * Return >= 0 as tree level
10553 * Return <0 for error
10555 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10557 struct extent_buffer *eb;
10558 struct btrfs_path path;
10559 struct btrfs_key key;
10560 struct btrfs_extent_item *ei;
10567 /* Search extent tree for extent generation and level */
10568 key.objectid = bytenr;
10569 key.type = BTRFS_METADATA_ITEM_KEY;
10570 key.offset = (u64)-1;
10572 btrfs_init_path(&path);
10573 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10576 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10584 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10585 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10586 struct btrfs_extent_item);
10587 flags = btrfs_extent_flags(path.nodes[0], ei);
10588 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10593 /* Get transid for later read_tree_block() check */
10594 transid = btrfs_extent_generation(path.nodes[0], ei);
10596 /* Get backref level as one source */
10597 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10598 backref_level = key.offset;
10600 struct btrfs_tree_block_info *info;
10602 info = (struct btrfs_tree_block_info *)(ei + 1);
10603 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10605 btrfs_release_path(&path);
10607 /* Get level from tree block as an alternative source */
10608 eb = read_tree_block(fs_info, bytenr, transid);
10609 if (!extent_buffer_uptodate(eb)) {
10610 free_extent_buffer(eb);
10613 header_level = btrfs_header_level(eb);
10614 free_extent_buffer(eb);
10616 if (header_level != backref_level)
10618 return header_level;
10621 btrfs_release_path(&path);
10626 * Check if a tree block backref is valid (points to a valid tree block)
10627 * if level == -1, level will be resolved
10628 * Return >0 for any error found and print error message
10630 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10631 u64 bytenr, int level)
10633 struct btrfs_root *root;
10634 struct btrfs_key key;
10635 struct btrfs_path path;
10636 struct extent_buffer *eb;
10637 struct extent_buffer *node;
10638 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10642 /* Query level for level == -1 special case */
10644 level = query_tree_block_level(fs_info, bytenr);
10646 err |= REFERENCER_MISSING;
10650 key.objectid = root_id;
10651 key.type = BTRFS_ROOT_ITEM_KEY;
10652 key.offset = (u64)-1;
10654 root = btrfs_read_fs_root(fs_info, &key);
10655 if (IS_ERR(root)) {
10656 err |= REFERENCER_MISSING;
10660 /* Read out the tree block to get item/node key */
10661 eb = read_tree_block(fs_info, bytenr, 0);
10662 if (!extent_buffer_uptodate(eb)) {
10663 err |= REFERENCER_MISSING;
10664 free_extent_buffer(eb);
10668 /* Empty tree, no need to check key */
10669 if (!btrfs_header_nritems(eb) && !level) {
10670 free_extent_buffer(eb);
10675 btrfs_node_key_to_cpu(eb, &key, 0);
10677 btrfs_item_key_to_cpu(eb, &key, 0);
10679 free_extent_buffer(eb);
10681 btrfs_init_path(&path);
10682 path.lowest_level = level;
10683 /* Search with the first key, to ensure we can reach it */
10684 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10686 err |= REFERENCER_MISSING;
10690 node = path.nodes[level];
10691 if (btrfs_header_bytenr(node) != bytenr) {
10693 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10694 bytenr, nodesize, bytenr,
10695 btrfs_header_bytenr(node));
10696 err |= REFERENCER_MISMATCH;
10698 if (btrfs_header_level(node) != level) {
10700 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10701 bytenr, nodesize, level,
10702 btrfs_header_level(node));
10703 err |= REFERENCER_MISMATCH;
10707 btrfs_release_path(&path);
10709 if (err & REFERENCER_MISSING) {
10711 error("extent [%llu %d] lost referencer (owner: %llu)",
10712 bytenr, nodesize, root_id);
10715 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10716 bytenr, nodesize, root_id, level);
10723 * Check if tree block @eb is tree reloc root.
10724 * Return 0 if it's not or any problem happens
10725 * Return 1 if it's a tree reloc root
10727 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10728 struct extent_buffer *eb)
10730 struct btrfs_root *tree_reloc_root;
10731 struct btrfs_key key;
10732 u64 bytenr = btrfs_header_bytenr(eb);
10733 u64 owner = btrfs_header_owner(eb);
10736 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10737 key.offset = owner;
10738 key.type = BTRFS_ROOT_ITEM_KEY;
10740 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10741 if (IS_ERR(tree_reloc_root))
10744 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10746 btrfs_free_fs_root(tree_reloc_root);
10751 * Check referencer for shared block backref
10752 * If level == -1, this function will resolve the level.
10754 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10755 u64 parent, u64 bytenr, int level)
10757 struct extent_buffer *eb;
10759 int found_parent = 0;
10762 eb = read_tree_block(fs_info, parent, 0);
10763 if (!extent_buffer_uptodate(eb))
10767 level = query_tree_block_level(fs_info, bytenr);
10771 /* It's possible it's a tree reloc root */
10772 if (parent == bytenr) {
10773 if (is_tree_reloc_root(fs_info, eb))
10778 if (level + 1 != btrfs_header_level(eb))
10781 nr = btrfs_header_nritems(eb);
10782 for (i = 0; i < nr; i++) {
10783 if (bytenr == btrfs_node_blockptr(eb, i)) {
10789 free_extent_buffer(eb);
10790 if (!found_parent) {
10792 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10793 bytenr, fs_info->nodesize, parent, level);
10794 return REFERENCER_MISSING;
10800 * Check referencer for normal (inlined) data ref
10801 * If len == 0, it will be resolved by searching in extent tree
10803 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10804 u64 root_id, u64 objectid, u64 offset,
10805 u64 bytenr, u64 len, u32 count)
10807 struct btrfs_root *root;
10808 struct btrfs_root *extent_root = fs_info->extent_root;
10809 struct btrfs_key key;
10810 struct btrfs_path path;
10811 struct extent_buffer *leaf;
10812 struct btrfs_file_extent_item *fi;
10813 u32 found_count = 0;
10818 key.objectid = bytenr;
10819 key.type = BTRFS_EXTENT_ITEM_KEY;
10820 key.offset = (u64)-1;
10822 btrfs_init_path(&path);
10823 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10826 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10829 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10830 if (key.objectid != bytenr ||
10831 key.type != BTRFS_EXTENT_ITEM_KEY)
10834 btrfs_release_path(&path);
10836 key.objectid = root_id;
10837 key.type = BTRFS_ROOT_ITEM_KEY;
10838 key.offset = (u64)-1;
10839 btrfs_init_path(&path);
10841 root = btrfs_read_fs_root(fs_info, &key);
10845 key.objectid = objectid;
10846 key.type = BTRFS_EXTENT_DATA_KEY;
10848 * It can be nasty as data backref offset is
10849 * file offset - file extent offset, which is smaller or
10850 * equal to original backref offset. The only special case is
10851 * overflow. So we need to special check and do further search.
10853 key.offset = offset & (1ULL << 63) ? 0 : offset;
10855 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10860 * Search afterwards to get correct one
10861 * NOTE: As we must do a comprehensive check on the data backref to
10862 * make sure the dref count also matches, we must iterate all file
10863 * extents for that inode.
10866 leaf = path.nodes[0];
10867 slot = path.slots[0];
10869 if (slot >= btrfs_header_nritems(leaf))
10871 btrfs_item_key_to_cpu(leaf, &key, slot);
10872 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10874 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10876 * Except normal disk bytenr and disk num bytes, we still
10877 * need to do extra check on dbackref offset as
10878 * dbackref offset = file_offset - file_extent_offset
10880 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10881 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10882 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10887 ret = btrfs_next_item(root, &path);
10892 btrfs_release_path(&path);
10893 if (found_count != count) {
10895 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10896 bytenr, len, root_id, objectid, offset, count, found_count);
10897 return REFERENCER_MISSING;
10903 * Check if the referencer of a shared data backref exists
10905 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10906 u64 parent, u64 bytenr)
10908 struct extent_buffer *eb;
10909 struct btrfs_key key;
10910 struct btrfs_file_extent_item *fi;
10912 int found_parent = 0;
10915 eb = read_tree_block(fs_info, parent, 0);
10916 if (!extent_buffer_uptodate(eb))
10919 nr = btrfs_header_nritems(eb);
10920 for (i = 0; i < nr; i++) {
10921 btrfs_item_key_to_cpu(eb, &key, i);
10922 if (key.type != BTRFS_EXTENT_DATA_KEY)
10925 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10926 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10929 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10936 free_extent_buffer(eb);
10937 if (!found_parent) {
10938 error("shared extent %llu referencer lost (parent: %llu)",
10940 return REFERENCER_MISSING;
10946 * This function will check a given extent item, including its backref and
10947 * itself (like crossing stripe boundary and type)
10949 * Since we don't use extent_record anymore, introduce new error bit
10951 static int check_extent_item(struct btrfs_fs_info *fs_info,
10952 struct extent_buffer *eb, int slot)
10954 struct btrfs_extent_item *ei;
10955 struct btrfs_extent_inline_ref *iref;
10956 struct btrfs_extent_data_ref *dref;
10960 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10961 u32 item_size = btrfs_item_size_nr(eb, slot);
10966 struct btrfs_key key;
10970 btrfs_item_key_to_cpu(eb, &key, slot);
10971 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10972 bytes_used += key.offset;
10974 bytes_used += nodesize;
10976 if (item_size < sizeof(*ei)) {
10978 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10979 * old thing when on disk format is still un-determined.
10980 * No need to care about it anymore
10982 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10986 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10987 flags = btrfs_extent_flags(eb, ei);
10989 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10991 if (metadata && check_crossing_stripes(global_info, key.objectid,
10993 error("bad metadata [%llu, %llu) crossing stripe boundary",
10994 key.objectid, key.objectid + nodesize);
10995 err |= CROSSING_STRIPE_BOUNDARY;
10998 ptr = (unsigned long)(ei + 1);
11000 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11001 /* Old EXTENT_ITEM metadata */
11002 struct btrfs_tree_block_info *info;
11004 info = (struct btrfs_tree_block_info *)ptr;
11005 level = btrfs_tree_block_level(eb, info);
11006 ptr += sizeof(struct btrfs_tree_block_info);
11008 /* New METADATA_ITEM */
11009 level = key.offset;
11011 end = (unsigned long)ei + item_size;
11014 /* Reached extent item end normally */
11018 /* Beyond extent item end, wrong item size */
11020 err |= ITEM_SIZE_MISMATCH;
11021 error("extent item at bytenr %llu slot %d has wrong size",
11026 /* Now check every backref in this extent item */
11027 iref = (struct btrfs_extent_inline_ref *)ptr;
11028 type = btrfs_extent_inline_ref_type(eb, iref);
11029 offset = btrfs_extent_inline_ref_offset(eb, iref);
11031 case BTRFS_TREE_BLOCK_REF_KEY:
11032 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11036 case BTRFS_SHARED_BLOCK_REF_KEY:
11037 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11041 case BTRFS_EXTENT_DATA_REF_KEY:
11042 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11043 ret = check_extent_data_backref(fs_info,
11044 btrfs_extent_data_ref_root(eb, dref),
11045 btrfs_extent_data_ref_objectid(eb, dref),
11046 btrfs_extent_data_ref_offset(eb, dref),
11047 key.objectid, key.offset,
11048 btrfs_extent_data_ref_count(eb, dref));
11051 case BTRFS_SHARED_DATA_REF_KEY:
11052 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11056 error("extent[%llu %d %llu] has unknown ref type: %d",
11057 key.objectid, key.type, key.offset, type);
11058 err |= UNKNOWN_TYPE;
11062 ptr += btrfs_extent_inline_ref_size(type);
11070 * Check if a dev extent item is referred correctly by its chunk
11072 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11073 struct extent_buffer *eb, int slot)
11075 struct btrfs_root *chunk_root = fs_info->chunk_root;
11076 struct btrfs_dev_extent *ptr;
11077 struct btrfs_path path;
11078 struct btrfs_key chunk_key;
11079 struct btrfs_key devext_key;
11080 struct btrfs_chunk *chunk;
11081 struct extent_buffer *l;
11085 int found_chunk = 0;
11088 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11089 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11090 length = btrfs_dev_extent_length(eb, ptr);
11092 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11093 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11094 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11096 btrfs_init_path(&path);
11097 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11102 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11103 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11108 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11111 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11112 for (i = 0; i < num_stripes; i++) {
11113 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11114 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11116 if (devid == devext_key.objectid &&
11117 offset == devext_key.offset) {
11123 btrfs_release_path(&path);
11124 if (!found_chunk) {
11126 "device extent[%llu, %llu, %llu] did not find the related chunk",
11127 devext_key.objectid, devext_key.offset, length);
11128 return REFERENCER_MISSING;
11134 * Check if the used space is correct with the dev item
11136 static int check_dev_item(struct btrfs_fs_info *fs_info,
11137 struct extent_buffer *eb, int slot)
11139 struct btrfs_root *dev_root = fs_info->dev_root;
11140 struct btrfs_dev_item *dev_item;
11141 struct btrfs_path path;
11142 struct btrfs_key key;
11143 struct btrfs_dev_extent *ptr;
11149 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11150 dev_id = btrfs_device_id(eb, dev_item);
11151 used = btrfs_device_bytes_used(eb, dev_item);
11153 key.objectid = dev_id;
11154 key.type = BTRFS_DEV_EXTENT_KEY;
11157 btrfs_init_path(&path);
11158 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11160 btrfs_item_key_to_cpu(eb, &key, slot);
11161 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11162 key.objectid, key.type, key.offset);
11163 btrfs_release_path(&path);
11164 return REFERENCER_MISSING;
11167 /* Iterate dev_extents to calculate the used space of a device */
11169 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11172 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11173 if (key.objectid > dev_id)
11175 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11178 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11179 struct btrfs_dev_extent);
11180 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11182 ret = btrfs_next_item(dev_root, &path);
11186 btrfs_release_path(&path);
11188 if (used != total) {
11189 btrfs_item_key_to_cpu(eb, &key, slot);
11191 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11192 total, used, BTRFS_ROOT_TREE_OBJECTID,
11193 BTRFS_DEV_EXTENT_KEY, dev_id);
11194 return ACCOUNTING_MISMATCH;
11200 * Check a block group item with its referener (chunk) and its used space
11201 * with extent/metadata item
11203 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11204 struct extent_buffer *eb, int slot)
11206 struct btrfs_root *extent_root = fs_info->extent_root;
11207 struct btrfs_root *chunk_root = fs_info->chunk_root;
11208 struct btrfs_block_group_item *bi;
11209 struct btrfs_block_group_item bg_item;
11210 struct btrfs_path path;
11211 struct btrfs_key bg_key;
11212 struct btrfs_key chunk_key;
11213 struct btrfs_key extent_key;
11214 struct btrfs_chunk *chunk;
11215 struct extent_buffer *leaf;
11216 struct btrfs_extent_item *ei;
11217 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11225 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11226 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11227 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11228 used = btrfs_block_group_used(&bg_item);
11229 bg_flags = btrfs_block_group_flags(&bg_item);
11231 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11232 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11233 chunk_key.offset = bg_key.objectid;
11235 btrfs_init_path(&path);
11236 /* Search for the referencer chunk */
11237 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11240 "block group[%llu %llu] did not find the related chunk item",
11241 bg_key.objectid, bg_key.offset);
11242 err |= REFERENCER_MISSING;
11244 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11245 struct btrfs_chunk);
11246 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11249 "block group[%llu %llu] related chunk item length does not match",
11250 bg_key.objectid, bg_key.offset);
11251 err |= REFERENCER_MISMATCH;
11254 btrfs_release_path(&path);
11256 /* Search from the block group bytenr */
11257 extent_key.objectid = bg_key.objectid;
11258 extent_key.type = 0;
11259 extent_key.offset = 0;
11261 btrfs_init_path(&path);
11262 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11266 /* Iterate extent tree to account used space */
11268 leaf = path.nodes[0];
11270 /* Search slot can point to the last item beyond leaf nritems */
11271 if (path.slots[0] >= btrfs_header_nritems(leaf))
11274 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11275 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11278 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11279 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11281 if (extent_key.objectid < bg_key.objectid)
11284 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11287 total += extent_key.offset;
11289 ei = btrfs_item_ptr(leaf, path.slots[0],
11290 struct btrfs_extent_item);
11291 flags = btrfs_extent_flags(leaf, ei);
11292 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11293 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11295 "bad extent[%llu, %llu) type mismatch with chunk",
11296 extent_key.objectid,
11297 extent_key.objectid + extent_key.offset);
11298 err |= CHUNK_TYPE_MISMATCH;
11300 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11301 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11302 BTRFS_BLOCK_GROUP_METADATA))) {
11304 "bad extent[%llu, %llu) type mismatch with chunk",
11305 extent_key.objectid,
11306 extent_key.objectid + nodesize);
11307 err |= CHUNK_TYPE_MISMATCH;
11311 ret = btrfs_next_item(extent_root, &path);
11317 btrfs_release_path(&path);
11319 if (total != used) {
11321 "block group[%llu %llu] used %llu but extent items used %llu",
11322 bg_key.objectid, bg_key.offset, used, total);
11323 err |= ACCOUNTING_MISMATCH;
11329 * Check a chunk item.
11330 * Including checking all referred dev_extents and block group
11332 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11333 struct extent_buffer *eb, int slot)
11335 struct btrfs_root *extent_root = fs_info->extent_root;
11336 struct btrfs_root *dev_root = fs_info->dev_root;
11337 struct btrfs_path path;
11338 struct btrfs_key chunk_key;
11339 struct btrfs_key bg_key;
11340 struct btrfs_key devext_key;
11341 struct btrfs_chunk *chunk;
11342 struct extent_buffer *leaf;
11343 struct btrfs_block_group_item *bi;
11344 struct btrfs_block_group_item bg_item;
11345 struct btrfs_dev_extent *ptr;
11357 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11358 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11359 length = btrfs_chunk_length(eb, chunk);
11360 chunk_end = chunk_key.offset + length;
11361 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11364 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11366 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11369 type = btrfs_chunk_type(eb, chunk);
11371 bg_key.objectid = chunk_key.offset;
11372 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11373 bg_key.offset = length;
11375 btrfs_init_path(&path);
11376 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11379 "chunk[%llu %llu) did not find the related block group item",
11380 chunk_key.offset, chunk_end);
11381 err |= REFERENCER_MISSING;
11383 leaf = path.nodes[0];
11384 bi = btrfs_item_ptr(leaf, path.slots[0],
11385 struct btrfs_block_group_item);
11386 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11388 if (btrfs_block_group_flags(&bg_item) != type) {
11390 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11391 chunk_key.offset, chunk_end, type,
11392 btrfs_block_group_flags(&bg_item));
11393 err |= REFERENCER_MISSING;
11397 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11398 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11399 for (i = 0; i < num_stripes; i++) {
11400 btrfs_release_path(&path);
11401 btrfs_init_path(&path);
11402 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11403 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11404 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11406 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11409 goto not_match_dev;
11411 leaf = path.nodes[0];
11412 ptr = btrfs_item_ptr(leaf, path.slots[0],
11413 struct btrfs_dev_extent);
11414 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11415 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11416 if (objectid != chunk_key.objectid ||
11417 offset != chunk_key.offset ||
11418 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11419 goto not_match_dev;
11422 err |= BACKREF_MISSING;
11424 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11425 chunk_key.objectid, chunk_end, i);
11428 btrfs_release_path(&path);
11434 * Main entry function to check known items and update related accounting info
11436 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11438 struct btrfs_fs_info *fs_info = root->fs_info;
11439 struct btrfs_key key;
11442 struct btrfs_extent_data_ref *dref;
11447 btrfs_item_key_to_cpu(eb, &key, slot);
11451 case BTRFS_EXTENT_DATA_KEY:
11452 ret = check_extent_data_item(root, eb, slot);
11455 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11456 ret = check_block_group_item(fs_info, eb, slot);
11459 case BTRFS_DEV_ITEM_KEY:
11460 ret = check_dev_item(fs_info, eb, slot);
11463 case BTRFS_CHUNK_ITEM_KEY:
11464 ret = check_chunk_item(fs_info, eb, slot);
11467 case BTRFS_DEV_EXTENT_KEY:
11468 ret = check_dev_extent_item(fs_info, eb, slot);
11471 case BTRFS_EXTENT_ITEM_KEY:
11472 case BTRFS_METADATA_ITEM_KEY:
11473 ret = check_extent_item(fs_info, eb, slot);
11476 case BTRFS_EXTENT_CSUM_KEY:
11477 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11479 case BTRFS_TREE_BLOCK_REF_KEY:
11480 ret = check_tree_block_backref(fs_info, key.offset,
11484 case BTRFS_EXTENT_DATA_REF_KEY:
11485 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11486 ret = check_extent_data_backref(fs_info,
11487 btrfs_extent_data_ref_root(eb, dref),
11488 btrfs_extent_data_ref_objectid(eb, dref),
11489 btrfs_extent_data_ref_offset(eb, dref),
11491 btrfs_extent_data_ref_count(eb, dref));
11494 case BTRFS_SHARED_BLOCK_REF_KEY:
11495 ret = check_shared_block_backref(fs_info, key.offset,
11499 case BTRFS_SHARED_DATA_REF_KEY:
11500 ret = check_shared_data_backref(fs_info, key.offset,
11508 if (++slot < btrfs_header_nritems(eb))
11515 * Helper function for later fs/subvol tree check. To determine if a tree
11516 * block should be checked.
11517 * This function will ensure only the direct referencer with lowest rootid to
11518 * check a fs/subvolume tree block.
11520 * Backref check at extent tree would detect errors like missing subvolume
11521 * tree, so we can do aggressive check to reduce duplicated checks.
11523 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11525 struct btrfs_root *extent_root = root->fs_info->extent_root;
11526 struct btrfs_key key;
11527 struct btrfs_path path;
11528 struct extent_buffer *leaf;
11530 struct btrfs_extent_item *ei;
11536 struct btrfs_extent_inline_ref *iref;
11539 btrfs_init_path(&path);
11540 key.objectid = btrfs_header_bytenr(eb);
11541 key.type = BTRFS_METADATA_ITEM_KEY;
11542 key.offset = (u64)-1;
11545 * Any failure in backref resolving means we can't determine
11546 * whom the tree block belongs to.
11547 * So in that case, we need to check that tree block
11549 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11553 ret = btrfs_previous_extent_item(extent_root, &path,
11554 btrfs_header_bytenr(eb));
11558 leaf = path.nodes[0];
11559 slot = path.slots[0];
11560 btrfs_item_key_to_cpu(leaf, &key, slot);
11561 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11563 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11564 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11566 struct btrfs_tree_block_info *info;
11568 info = (struct btrfs_tree_block_info *)(ei + 1);
11569 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11572 item_size = btrfs_item_size_nr(leaf, slot);
11573 ptr = (unsigned long)iref;
11574 end = (unsigned long)ei + item_size;
11575 while (ptr < end) {
11576 iref = (struct btrfs_extent_inline_ref *)ptr;
11577 type = btrfs_extent_inline_ref_type(leaf, iref);
11578 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11581 * We only check the tree block if current root is
11582 * the lowest referencer of it.
11584 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11585 offset < root->objectid) {
11586 btrfs_release_path(&path);
11590 ptr += btrfs_extent_inline_ref_size(type);
11593 * Normally we should also check keyed tree block ref, but that may be
11594 * very time consuming. Inlined ref should already make us skip a lot
11595 * of refs now. So skip search keyed tree block ref.
11599 btrfs_release_path(&path);
11604 * Traversal function for tree block. We will do:
11605 * 1) Skip shared fs/subvolume tree blocks
11606 * 2) Update related bytes accounting
11607 * 3) Pre-order traversal
11609 static int traverse_tree_block(struct btrfs_root *root,
11610 struct extent_buffer *node)
11612 struct extent_buffer *eb;
11613 struct btrfs_key key;
11614 struct btrfs_key drop_key;
11622 * Skip shared fs/subvolume tree block, in that case they will
11623 * be checked by referencer with lowest rootid
11625 if (is_fstree(root->objectid) && !should_check(root, node))
11628 /* Update bytes accounting */
11629 total_btree_bytes += node->len;
11630 if (fs_root_objectid(btrfs_header_owner(node)))
11631 total_fs_tree_bytes += node->len;
11632 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11633 total_extent_tree_bytes += node->len;
11635 /* pre-order tranversal, check itself first */
11636 level = btrfs_header_level(node);
11637 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11638 btrfs_header_level(node),
11639 btrfs_header_owner(node));
11643 "check %s failed root %llu bytenr %llu level %d, force continue check",
11644 level ? "node":"leaf", root->objectid,
11645 btrfs_header_bytenr(node), btrfs_header_level(node));
11648 btree_space_waste += btrfs_leaf_free_space(root, node);
11649 ret = check_leaf_items(root, node);
11654 nr = btrfs_header_nritems(node);
11655 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11656 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11657 sizeof(struct btrfs_key_ptr);
11659 /* Then check all its children */
11660 for (i = 0; i < nr; i++) {
11661 u64 blocknr = btrfs_node_blockptr(node, i);
11663 btrfs_node_key_to_cpu(node, &key, i);
11664 if (level == root->root_item.drop_level &&
11665 is_dropped_key(&key, &drop_key))
11669 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11670 * to call the function itself.
11672 eb = read_tree_block(root->fs_info, blocknr, 0);
11673 if (extent_buffer_uptodate(eb)) {
11674 ret = traverse_tree_block(root, eb);
11677 free_extent_buffer(eb);
11684 * Low memory usage version check_chunks_and_extents.
11686 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11688 struct btrfs_path path;
11689 struct btrfs_key key;
11690 struct btrfs_root *root1;
11691 struct btrfs_root *root;
11692 struct btrfs_root *cur_root;
11696 root = fs_info->fs_root;
11698 root1 = root->fs_info->chunk_root;
11699 ret = traverse_tree_block(root1, root1->node);
11702 root1 = root->fs_info->tree_root;
11703 ret = traverse_tree_block(root1, root1->node);
11706 btrfs_init_path(&path);
11707 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11709 key.type = BTRFS_ROOT_ITEM_KEY;
11711 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11713 error("cannot find extent treet in tree_root");
11718 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11719 if (key.type != BTRFS_ROOT_ITEM_KEY)
11721 key.offset = (u64)-1;
11723 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11724 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11727 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11728 if (IS_ERR(cur_root) || !cur_root) {
11729 error("failed to read tree: %lld", key.objectid);
11733 ret = traverse_tree_block(cur_root, cur_root->node);
11736 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11737 btrfs_free_fs_root(cur_root);
11739 ret = btrfs_next_item(root1, &path);
11745 btrfs_release_path(&path);
11749 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11753 if (!ctx.progress_enabled)
11754 fprintf(stderr, "checking extents\n");
11755 if (check_mode == CHECK_MODE_LOWMEM)
11756 ret = check_chunks_and_extents_v2(fs_info);
11758 ret = check_chunks_and_extents(fs_info);
11763 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11764 struct btrfs_root *root, int overwrite)
11766 struct extent_buffer *c;
11767 struct extent_buffer *old = root->node;
11770 struct btrfs_disk_key disk_key = {0,0,0};
11776 extent_buffer_get(c);
11779 c = btrfs_alloc_free_block(trans, root,
11780 root->fs_info->nodesize,
11781 root->root_key.objectid,
11782 &disk_key, level, 0, 0);
11785 extent_buffer_get(c);
11789 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11790 btrfs_set_header_level(c, level);
11791 btrfs_set_header_bytenr(c, c->start);
11792 btrfs_set_header_generation(c, trans->transid);
11793 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11794 btrfs_set_header_owner(c, root->root_key.objectid);
11796 write_extent_buffer(c, root->fs_info->fsid,
11797 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11799 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11800 btrfs_header_chunk_tree_uuid(c),
11803 btrfs_mark_buffer_dirty(c);
11805 * this case can happen in the following case:
11807 * 1.overwrite previous root.
11809 * 2.reinit reloc data root, this is because we skip pin
11810 * down reloc data tree before which means we can allocate
11811 * same block bytenr here.
11813 if (old->start == c->start) {
11814 btrfs_set_root_generation(&root->root_item,
11816 root->root_item.level = btrfs_header_level(root->node);
11817 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11818 &root->root_key, &root->root_item);
11820 free_extent_buffer(c);
11824 free_extent_buffer(old);
11826 add_root_to_dirty_list(root);
11830 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11831 struct extent_buffer *eb, int tree_root)
11833 struct extent_buffer *tmp;
11834 struct btrfs_root_item *ri;
11835 struct btrfs_key key;
11837 int level = btrfs_header_level(eb);
11843 * If we have pinned this block before, don't pin it again.
11844 * This can not only avoid forever loop with broken filesystem
11845 * but also give us some speedups.
11847 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11848 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11851 btrfs_pin_extent(fs_info, eb->start, eb->len);
11853 nritems = btrfs_header_nritems(eb);
11854 for (i = 0; i < nritems; i++) {
11856 btrfs_item_key_to_cpu(eb, &key, i);
11857 if (key.type != BTRFS_ROOT_ITEM_KEY)
11859 /* Skip the extent root and reloc roots */
11860 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11861 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11862 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11864 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11865 bytenr = btrfs_disk_root_bytenr(eb, ri);
11868 * If at any point we start needing the real root we
11869 * will have to build a stump root for the root we are
11870 * in, but for now this doesn't actually use the root so
11871 * just pass in extent_root.
11873 tmp = read_tree_block(fs_info, bytenr, 0);
11874 if (!extent_buffer_uptodate(tmp)) {
11875 fprintf(stderr, "Error reading root block\n");
11878 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11879 free_extent_buffer(tmp);
11883 bytenr = btrfs_node_blockptr(eb, i);
11885 /* If we aren't the tree root don't read the block */
11886 if (level == 1 && !tree_root) {
11887 btrfs_pin_extent(fs_info, bytenr,
11888 fs_info->nodesize);
11892 tmp = read_tree_block(fs_info, bytenr, 0);
11893 if (!extent_buffer_uptodate(tmp)) {
11894 fprintf(stderr, "Error reading tree block\n");
11897 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11898 free_extent_buffer(tmp);
11907 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11911 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11915 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11918 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11920 struct btrfs_block_group_cache *cache;
11921 struct btrfs_path path;
11922 struct extent_buffer *leaf;
11923 struct btrfs_chunk *chunk;
11924 struct btrfs_key key;
11928 btrfs_init_path(&path);
11930 key.type = BTRFS_CHUNK_ITEM_KEY;
11932 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11934 btrfs_release_path(&path);
11939 * We do this in case the block groups were screwed up and had alloc
11940 * bits that aren't actually set on the chunks. This happens with
11941 * restored images every time and could happen in real life I guess.
11943 fs_info->avail_data_alloc_bits = 0;
11944 fs_info->avail_metadata_alloc_bits = 0;
11945 fs_info->avail_system_alloc_bits = 0;
11947 /* First we need to create the in-memory block groups */
11949 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11950 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11952 btrfs_release_path(&path);
11960 leaf = path.nodes[0];
11961 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11962 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11967 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11968 btrfs_add_block_group(fs_info, 0,
11969 btrfs_chunk_type(leaf, chunk),
11970 key.objectid, key.offset,
11971 btrfs_chunk_length(leaf, chunk));
11972 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11973 key.offset + btrfs_chunk_length(leaf, chunk));
11978 cache = btrfs_lookup_first_block_group(fs_info, start);
11982 start = cache->key.objectid + cache->key.offset;
11985 btrfs_release_path(&path);
11989 static int reset_balance(struct btrfs_trans_handle *trans,
11990 struct btrfs_fs_info *fs_info)
11992 struct btrfs_root *root = fs_info->tree_root;
11993 struct btrfs_path path;
11994 struct extent_buffer *leaf;
11995 struct btrfs_key key;
11996 int del_slot, del_nr = 0;
12000 btrfs_init_path(&path);
12001 key.objectid = BTRFS_BALANCE_OBJECTID;
12002 key.type = BTRFS_BALANCE_ITEM_KEY;
12004 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12009 goto reinit_data_reloc;
12014 ret = btrfs_del_item(trans, root, &path);
12017 btrfs_release_path(&path);
12019 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12020 key.type = BTRFS_ROOT_ITEM_KEY;
12022 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12026 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12031 ret = btrfs_del_items(trans, root, &path,
12038 btrfs_release_path(&path);
12041 ret = btrfs_search_slot(trans, root, &key, &path,
12048 leaf = path.nodes[0];
12049 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12050 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12052 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12057 del_slot = path.slots[0];
12066 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12070 btrfs_release_path(&path);
12073 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12074 key.type = BTRFS_ROOT_ITEM_KEY;
12075 key.offset = (u64)-1;
12076 root = btrfs_read_fs_root(fs_info, &key);
12077 if (IS_ERR(root)) {
12078 fprintf(stderr, "Error reading data reloc tree\n");
12079 ret = PTR_ERR(root);
12082 record_root_in_trans(trans, root);
12083 ret = btrfs_fsck_reinit_root(trans, root, 0);
12086 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12088 btrfs_release_path(&path);
12092 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12093 struct btrfs_fs_info *fs_info)
12099 * The only reason we don't do this is because right now we're just
12100 * walking the trees we find and pinning down their bytes, we don't look
12101 * at any of the leaves. In order to do mixed groups we'd have to check
12102 * the leaves of any fs roots and pin down the bytes for any file
12103 * extents we find. Not hard but why do it if we don't have to?
12105 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12106 fprintf(stderr, "We don't support re-initing the extent tree "
12107 "for mixed block groups yet, please notify a btrfs "
12108 "developer you want to do this so they can add this "
12109 "functionality.\n");
12114 * first we need to walk all of the trees except the extent tree and pin
12115 * down the bytes that are in use so we don't overwrite any existing
12118 ret = pin_metadata_blocks(fs_info);
12120 fprintf(stderr, "error pinning down used bytes\n");
12125 * Need to drop all the block groups since we're going to recreate all
12128 btrfs_free_block_groups(fs_info);
12129 ret = reset_block_groups(fs_info);
12131 fprintf(stderr, "error resetting the block groups\n");
12135 /* Ok we can allocate now, reinit the extent root */
12136 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12138 fprintf(stderr, "extent root initialization failed\n");
12140 * When the transaction code is updated we should end the
12141 * transaction, but for now progs only knows about commit so
12142 * just return an error.
12148 * Now we have all the in-memory block groups setup so we can make
12149 * allocations properly, and the metadata we care about is safe since we
12150 * pinned all of it above.
12153 struct btrfs_block_group_cache *cache;
12155 cache = btrfs_lookup_first_block_group(fs_info, start);
12158 start = cache->key.objectid + cache->key.offset;
12159 ret = btrfs_insert_item(trans, fs_info->extent_root,
12160 &cache->key, &cache->item,
12161 sizeof(cache->item));
12163 fprintf(stderr, "Error adding block group\n");
12166 btrfs_extent_post_op(trans, fs_info->extent_root);
12169 ret = reset_balance(trans, fs_info);
12171 fprintf(stderr, "error resetting the pending balance\n");
12176 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12178 struct btrfs_path path;
12179 struct btrfs_trans_handle *trans;
12180 struct btrfs_key key;
12183 printf("Recowing metadata block %llu\n", eb->start);
12184 key.objectid = btrfs_header_owner(eb);
12185 key.type = BTRFS_ROOT_ITEM_KEY;
12186 key.offset = (u64)-1;
12188 root = btrfs_read_fs_root(root->fs_info, &key);
12189 if (IS_ERR(root)) {
12190 fprintf(stderr, "Couldn't find owner root %llu\n",
12192 return PTR_ERR(root);
12195 trans = btrfs_start_transaction(root, 1);
12197 return PTR_ERR(trans);
12199 btrfs_init_path(&path);
12200 path.lowest_level = btrfs_header_level(eb);
12201 if (path.lowest_level)
12202 btrfs_node_key_to_cpu(eb, &key, 0);
12204 btrfs_item_key_to_cpu(eb, &key, 0);
12206 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12207 btrfs_commit_transaction(trans, root);
12208 btrfs_release_path(&path);
12212 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12214 struct btrfs_path path;
12215 struct btrfs_trans_handle *trans;
12216 struct btrfs_key key;
12219 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12220 bad->key.type, bad->key.offset);
12221 key.objectid = bad->root_id;
12222 key.type = BTRFS_ROOT_ITEM_KEY;
12223 key.offset = (u64)-1;
12225 root = btrfs_read_fs_root(root->fs_info, &key);
12226 if (IS_ERR(root)) {
12227 fprintf(stderr, "Couldn't find owner root %llu\n",
12229 return PTR_ERR(root);
12232 trans = btrfs_start_transaction(root, 1);
12234 return PTR_ERR(trans);
12236 btrfs_init_path(&path);
12237 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12243 ret = btrfs_del_item(trans, root, &path);
12245 btrfs_commit_transaction(trans, root);
12246 btrfs_release_path(&path);
12250 static int zero_log_tree(struct btrfs_root *root)
12252 struct btrfs_trans_handle *trans;
12255 trans = btrfs_start_transaction(root, 1);
12256 if (IS_ERR(trans)) {
12257 ret = PTR_ERR(trans);
12260 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12261 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12262 ret = btrfs_commit_transaction(trans, root);
12266 static int populate_csum(struct btrfs_trans_handle *trans,
12267 struct btrfs_root *csum_root, char *buf, u64 start,
12270 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12275 while (offset < len) {
12276 sectorsize = fs_info->sectorsize;
12277 ret = read_extent_data(fs_info, buf, start + offset,
12281 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12282 start + offset, buf, sectorsize);
12285 offset += sectorsize;
12290 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12291 struct btrfs_root *csum_root,
12292 struct btrfs_root *cur_root)
12294 struct btrfs_path path;
12295 struct btrfs_key key;
12296 struct extent_buffer *node;
12297 struct btrfs_file_extent_item *fi;
12304 buf = malloc(cur_root->fs_info->sectorsize);
12308 btrfs_init_path(&path);
12312 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12315 /* Iterate all regular file extents and fill its csum */
12317 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12319 if (key.type != BTRFS_EXTENT_DATA_KEY)
12321 node = path.nodes[0];
12322 slot = path.slots[0];
12323 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12324 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12326 start = btrfs_file_extent_disk_bytenr(node, fi);
12327 len = btrfs_file_extent_disk_num_bytes(node, fi);
12329 ret = populate_csum(trans, csum_root, buf, start, len);
12330 if (ret == -EEXIST)
12336 * TODO: if next leaf is corrupted, jump to nearest next valid
12339 ret = btrfs_next_item(cur_root, &path);
12349 btrfs_release_path(&path);
12354 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12355 struct btrfs_root *csum_root)
12357 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12358 struct btrfs_path path;
12359 struct btrfs_root *tree_root = fs_info->tree_root;
12360 struct btrfs_root *cur_root;
12361 struct extent_buffer *node;
12362 struct btrfs_key key;
12366 btrfs_init_path(&path);
12367 key.objectid = BTRFS_FS_TREE_OBJECTID;
12369 key.type = BTRFS_ROOT_ITEM_KEY;
12370 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12379 node = path.nodes[0];
12380 slot = path.slots[0];
12381 btrfs_item_key_to_cpu(node, &key, slot);
12382 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12384 if (key.type != BTRFS_ROOT_ITEM_KEY)
12386 if (!is_fstree(key.objectid))
12388 key.offset = (u64)-1;
12390 cur_root = btrfs_read_fs_root(fs_info, &key);
12391 if (IS_ERR(cur_root) || !cur_root) {
12392 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12396 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12401 ret = btrfs_next_item(tree_root, &path);
12411 btrfs_release_path(&path);
12415 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12416 struct btrfs_root *csum_root)
12418 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12419 struct btrfs_path path;
12420 struct btrfs_extent_item *ei;
12421 struct extent_buffer *leaf;
12423 struct btrfs_key key;
12426 btrfs_init_path(&path);
12428 key.type = BTRFS_EXTENT_ITEM_KEY;
12430 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12432 btrfs_release_path(&path);
12436 buf = malloc(csum_root->fs_info->sectorsize);
12438 btrfs_release_path(&path);
12443 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12444 ret = btrfs_next_leaf(extent_root, &path);
12452 leaf = path.nodes[0];
12454 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12455 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12460 ei = btrfs_item_ptr(leaf, path.slots[0],
12461 struct btrfs_extent_item);
12462 if (!(btrfs_extent_flags(leaf, ei) &
12463 BTRFS_EXTENT_FLAG_DATA)) {
12468 ret = populate_csum(trans, csum_root, buf, key.objectid,
12475 btrfs_release_path(&path);
12481 * Recalculate the csum and put it into the csum tree.
12483 * Extent tree init will wipe out all the extent info, so in that case, we
12484 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12485 * will use fs/subvol trees to init the csum tree.
12487 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12488 struct btrfs_root *csum_root,
12489 int search_fs_tree)
12491 if (search_fs_tree)
12492 return fill_csum_tree_from_fs(trans, csum_root);
12494 return fill_csum_tree_from_extent(trans, csum_root);
12497 static void free_roots_info_cache(void)
12499 if (!roots_info_cache)
12502 while (!cache_tree_empty(roots_info_cache)) {
12503 struct cache_extent *entry;
12504 struct root_item_info *rii;
12506 entry = first_cache_extent(roots_info_cache);
12509 remove_cache_extent(roots_info_cache, entry);
12510 rii = container_of(entry, struct root_item_info, cache_extent);
12514 free(roots_info_cache);
12515 roots_info_cache = NULL;
12518 static int build_roots_info_cache(struct btrfs_fs_info *info)
12521 struct btrfs_key key;
12522 struct extent_buffer *leaf;
12523 struct btrfs_path path;
12525 if (!roots_info_cache) {
12526 roots_info_cache = malloc(sizeof(*roots_info_cache));
12527 if (!roots_info_cache)
12529 cache_tree_init(roots_info_cache);
12532 btrfs_init_path(&path);
12534 key.type = BTRFS_EXTENT_ITEM_KEY;
12536 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12539 leaf = path.nodes[0];
12542 struct btrfs_key found_key;
12543 struct btrfs_extent_item *ei;
12544 struct btrfs_extent_inline_ref *iref;
12545 int slot = path.slots[0];
12550 struct cache_extent *entry;
12551 struct root_item_info *rii;
12553 if (slot >= btrfs_header_nritems(leaf)) {
12554 ret = btrfs_next_leaf(info->extent_root, &path);
12561 leaf = path.nodes[0];
12562 slot = path.slots[0];
12565 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12567 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12568 found_key.type != BTRFS_METADATA_ITEM_KEY)
12571 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12572 flags = btrfs_extent_flags(leaf, ei);
12574 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12575 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12578 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12579 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12580 level = found_key.offset;
12582 struct btrfs_tree_block_info *binfo;
12584 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12585 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12586 level = btrfs_tree_block_level(leaf, binfo);
12590 * For a root extent, it must be of the following type and the
12591 * first (and only one) iref in the item.
12593 type = btrfs_extent_inline_ref_type(leaf, iref);
12594 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12597 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12598 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12600 rii = malloc(sizeof(struct root_item_info));
12605 rii->cache_extent.start = root_id;
12606 rii->cache_extent.size = 1;
12607 rii->level = (u8)-1;
12608 entry = &rii->cache_extent;
12609 ret = insert_cache_extent(roots_info_cache, entry);
12612 rii = container_of(entry, struct root_item_info,
12616 ASSERT(rii->cache_extent.start == root_id);
12617 ASSERT(rii->cache_extent.size == 1);
12619 if (level > rii->level || rii->level == (u8)-1) {
12620 rii->level = level;
12621 rii->bytenr = found_key.objectid;
12622 rii->gen = btrfs_extent_generation(leaf, ei);
12623 rii->node_count = 1;
12624 } else if (level == rii->level) {
12632 btrfs_release_path(&path);
12637 static int maybe_repair_root_item(struct btrfs_path *path,
12638 const struct btrfs_key *root_key,
12639 const int read_only_mode)
12641 const u64 root_id = root_key->objectid;
12642 struct cache_extent *entry;
12643 struct root_item_info *rii;
12644 struct btrfs_root_item ri;
12645 unsigned long offset;
12647 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12650 "Error: could not find extent items for root %llu\n",
12651 root_key->objectid);
12655 rii = container_of(entry, struct root_item_info, cache_extent);
12656 ASSERT(rii->cache_extent.start == root_id);
12657 ASSERT(rii->cache_extent.size == 1);
12659 if (rii->node_count != 1) {
12661 "Error: could not find btree root extent for root %llu\n",
12666 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12667 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12669 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12670 btrfs_root_level(&ri) != rii->level ||
12671 btrfs_root_generation(&ri) != rii->gen) {
12674 * If we're in repair mode but our caller told us to not update
12675 * the root item, i.e. just check if it needs to be updated, don't
12676 * print this message, since the caller will call us again shortly
12677 * for the same root item without read only mode (the caller will
12678 * open a transaction first).
12680 if (!(read_only_mode && repair))
12682 "%sroot item for root %llu,"
12683 " current bytenr %llu, current gen %llu, current level %u,"
12684 " new bytenr %llu, new gen %llu, new level %u\n",
12685 (read_only_mode ? "" : "fixing "),
12687 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12688 btrfs_root_level(&ri),
12689 rii->bytenr, rii->gen, rii->level);
12691 if (btrfs_root_generation(&ri) > rii->gen) {
12693 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12694 root_id, btrfs_root_generation(&ri), rii->gen);
12698 if (!read_only_mode) {
12699 btrfs_set_root_bytenr(&ri, rii->bytenr);
12700 btrfs_set_root_level(&ri, rii->level);
12701 btrfs_set_root_generation(&ri, rii->gen);
12702 write_extent_buffer(path->nodes[0], &ri,
12703 offset, sizeof(ri));
12713 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12714 * caused read-only snapshots to be corrupted if they were created at a moment
12715 * when the source subvolume/snapshot had orphan items. The issue was that the
12716 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12717 * node instead of the post orphan cleanup root node.
12718 * So this function, and its callees, just detects and fixes those cases. Even
12719 * though the regression was for read-only snapshots, this function applies to
12720 * any snapshot/subvolume root.
12721 * This must be run before any other repair code - not doing it so, makes other
12722 * repair code delete or modify backrefs in the extent tree for example, which
12723 * will result in an inconsistent fs after repairing the root items.
12725 static int repair_root_items(struct btrfs_fs_info *info)
12727 struct btrfs_path path;
12728 struct btrfs_key key;
12729 struct extent_buffer *leaf;
12730 struct btrfs_trans_handle *trans = NULL;
12733 int need_trans = 0;
12735 btrfs_init_path(&path);
12737 ret = build_roots_info_cache(info);
12741 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12742 key.type = BTRFS_ROOT_ITEM_KEY;
12747 * Avoid opening and committing transactions if a leaf doesn't have
12748 * any root items that need to be fixed, so that we avoid rotating
12749 * backup roots unnecessarily.
12752 trans = btrfs_start_transaction(info->tree_root, 1);
12753 if (IS_ERR(trans)) {
12754 ret = PTR_ERR(trans);
12759 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12763 leaf = path.nodes[0];
12766 struct btrfs_key found_key;
12768 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12769 int no_more_keys = find_next_key(&path, &key);
12771 btrfs_release_path(&path);
12773 ret = btrfs_commit_transaction(trans,
12785 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12787 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12789 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12792 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12796 if (!trans && repair) {
12799 btrfs_release_path(&path);
12809 free_roots_info_cache();
12810 btrfs_release_path(&path);
12812 btrfs_commit_transaction(trans, info->tree_root);
12819 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12821 struct btrfs_trans_handle *trans;
12822 struct btrfs_block_group_cache *bg_cache;
12826 /* Clear all free space cache inodes and its extent data */
12828 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12831 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12834 current = bg_cache->key.objectid + bg_cache->key.offset;
12837 /* Don't forget to set cache_generation to -1 */
12838 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12839 if (IS_ERR(trans)) {
12840 error("failed to update super block cache generation");
12841 return PTR_ERR(trans);
12843 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12844 btrfs_commit_transaction(trans, fs_info->tree_root);
12849 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12854 if (clear_version == 1) {
12855 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12857 "free space cache v2 detected, use --clear-space-cache v2");
12861 printf("Clearing free space cache\n");
12862 ret = clear_free_space_cache(fs_info);
12864 error("failed to clear free space cache");
12867 printf("Free space cache cleared\n");
12869 } else if (clear_version == 2) {
12870 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12871 printf("no free space cache v2 to clear\n");
12875 printf("Clear free space cache v2\n");
12876 ret = btrfs_clear_free_space_tree(fs_info);
12878 error("failed to clear free space cache v2: %d", ret);
12881 printf("free space cache v2 cleared\n");
12888 const char * const cmd_check_usage[] = {
12889 "btrfs check [options] <device>",
12890 "Check structural integrity of a filesystem (unmounted).",
12891 "Check structural integrity of an unmounted filesystem. Verify internal",
12892 "trees' consistency and item connectivity. In the repair mode try to",
12893 "fix the problems found. ",
12894 "WARNING: the repair mode is considered dangerous",
12896 "-s|--super <superblock> use this superblock copy",
12897 "-b|--backup use the first valid backup root copy",
12898 "--force skip mount checks, repair is not possible",
12899 "--repair try to repair the filesystem",
12900 "--readonly run in read-only mode (default)",
12901 "--init-csum-tree create a new CRC tree",
12902 "--init-extent-tree create a new extent tree",
12903 "--mode <MODE> allows choice of memory/IO trade-offs",
12904 " where MODE is one of:",
12905 " original - read inodes and extents to memory (requires",
12906 " more memory, does less IO)",
12907 " lowmem - try to use less memory but read blocks again",
12909 "--check-data-csum verify checksums of data blocks",
12910 "-Q|--qgroup-report print a report on qgroup consistency",
12911 "-E|--subvol-extents <subvolid>",
12912 " print subvolume extents and sharing state",
12913 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12914 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12915 "-p|--progress indicate progress",
12916 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12920 int cmd_check(int argc, char **argv)
12922 struct cache_tree root_cache;
12923 struct btrfs_root *root;
12924 struct btrfs_fs_info *info;
12927 u64 tree_root_bytenr = 0;
12928 u64 chunk_root_bytenr = 0;
12929 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12933 int init_csum_tree = 0;
12935 int clear_space_cache = 0;
12936 int qgroup_report = 0;
12937 int qgroups_repaired = 0;
12938 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12943 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12944 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12945 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12946 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
12947 GETOPT_VAL_FORCE };
12948 static const struct option long_options[] = {
12949 { "super", required_argument, NULL, 's' },
12950 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12951 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12952 { "init-csum-tree", no_argument, NULL,
12953 GETOPT_VAL_INIT_CSUM },
12954 { "init-extent-tree", no_argument, NULL,
12955 GETOPT_VAL_INIT_EXTENT },
12956 { "check-data-csum", no_argument, NULL,
12957 GETOPT_VAL_CHECK_CSUM },
12958 { "backup", no_argument, NULL, 'b' },
12959 { "subvol-extents", required_argument, NULL, 'E' },
12960 { "qgroup-report", no_argument, NULL, 'Q' },
12961 { "tree-root", required_argument, NULL, 'r' },
12962 { "chunk-root", required_argument, NULL,
12963 GETOPT_VAL_CHUNK_TREE },
12964 { "progress", no_argument, NULL, 'p' },
12965 { "mode", required_argument, NULL,
12967 { "clear-space-cache", required_argument, NULL,
12968 GETOPT_VAL_CLEAR_SPACE_CACHE},
12969 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
12970 { NULL, 0, NULL, 0}
12973 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12977 case 'a': /* ignored */ break;
12979 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12982 num = arg_strtou64(optarg);
12983 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12985 "super mirror should be less than %d",
12986 BTRFS_SUPER_MIRROR_MAX);
12989 bytenr = btrfs_sb_offset(((int)num));
12990 printf("using SB copy %llu, bytenr %llu\n", num,
12991 (unsigned long long)bytenr);
12997 subvolid = arg_strtou64(optarg);
13000 tree_root_bytenr = arg_strtou64(optarg);
13002 case GETOPT_VAL_CHUNK_TREE:
13003 chunk_root_bytenr = arg_strtou64(optarg);
13006 ctx.progress_enabled = true;
13010 usage(cmd_check_usage);
13011 case GETOPT_VAL_REPAIR:
13012 printf("enabling repair mode\n");
13014 ctree_flags |= OPEN_CTREE_WRITES;
13016 case GETOPT_VAL_READONLY:
13019 case GETOPT_VAL_INIT_CSUM:
13020 printf("Creating a new CRC tree\n");
13021 init_csum_tree = 1;
13023 ctree_flags |= OPEN_CTREE_WRITES;
13025 case GETOPT_VAL_INIT_EXTENT:
13026 init_extent_tree = 1;
13027 ctree_flags |= (OPEN_CTREE_WRITES |
13028 OPEN_CTREE_NO_BLOCK_GROUPS);
13031 case GETOPT_VAL_CHECK_CSUM:
13032 check_data_csum = 1;
13034 case GETOPT_VAL_MODE:
13035 check_mode = parse_check_mode(optarg);
13036 if (check_mode == CHECK_MODE_UNKNOWN) {
13037 error("unknown mode: %s", optarg);
13041 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13042 if (strcmp(optarg, "v1") == 0) {
13043 clear_space_cache = 1;
13044 } else if (strcmp(optarg, "v2") == 0) {
13045 clear_space_cache = 2;
13046 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13049 "invalid argument to --clear-space-cache, must be v1 or v2");
13052 ctree_flags |= OPEN_CTREE_WRITES;
13054 case GETOPT_VAL_FORCE:
13060 if (check_argc_exact(argc - optind, 1))
13061 usage(cmd_check_usage);
13063 if (ctx.progress_enabled) {
13064 ctx.tp = TASK_NOTHING;
13065 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13068 /* This check is the only reason for --readonly to exist */
13069 if (readonly && repair) {
13070 error("repair options are not compatible with --readonly");
13075 * experimental and dangerous
13077 if (repair && check_mode == CHECK_MODE_LOWMEM)
13078 warning("low-memory mode repair support is only partial");
13081 cache_tree_init(&root_cache);
13083 ret = check_mounted(argv[optind]);
13086 error("could not check mount status: %s",
13092 "%s is currently mounted, use --force if you really intend to check the filesystem",
13100 error("repair and --force is not yet supported");
13107 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13111 "filesystem mounted, continuing because of --force");
13113 /* A block device is mounted in exclusive mode by kernel */
13114 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13117 /* only allow partial opening under repair mode */
13119 ctree_flags |= OPEN_CTREE_PARTIAL;
13121 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13122 chunk_root_bytenr, ctree_flags);
13124 error("cannot open file system");
13130 global_info = info;
13131 root = info->fs_root;
13132 uuid_unparse(info->super_copy->fsid, uuidbuf);
13134 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13137 * Check the bare minimum before starting anything else that could rely
13138 * on it, namely the tree roots, any local consistency checks
13140 if (!extent_buffer_uptodate(info->tree_root->node) ||
13141 !extent_buffer_uptodate(info->dev_root->node) ||
13142 !extent_buffer_uptodate(info->chunk_root->node)) {
13143 error("critical roots corrupted, unable to check the filesystem");
13149 if (clear_space_cache) {
13150 ret = do_clear_free_space_cache(info, clear_space_cache);
13156 * repair mode will force us to commit transaction which
13157 * will make us fail to load log tree when mounting.
13159 if (repair && btrfs_super_log_root(info->super_copy)) {
13160 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13166 ret = zero_log_tree(root);
13169 error("failed to zero log tree: %d", ret);
13174 if (qgroup_report) {
13175 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13177 ret = qgroup_verify_all(info);
13184 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13185 subvolid, argv[optind], uuidbuf);
13186 ret = print_extent_state(info, subvolid);
13191 if (init_extent_tree || init_csum_tree) {
13192 struct btrfs_trans_handle *trans;
13194 trans = btrfs_start_transaction(info->extent_root, 0);
13195 if (IS_ERR(trans)) {
13196 error("error starting transaction");
13197 ret = PTR_ERR(trans);
13202 if (init_extent_tree) {
13203 printf("Creating a new extent tree\n");
13204 ret = reinit_extent_tree(trans, info);
13210 if (init_csum_tree) {
13211 printf("Reinitialize checksum tree\n");
13212 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13214 error("checksum tree initialization failed: %d",
13221 ret = fill_csum_tree(trans, info->csum_root,
13225 error("checksum tree refilling failed: %d", ret);
13230 * Ok now we commit and run the normal fsck, which will add
13231 * extent entries for all of the items it finds.
13233 ret = btrfs_commit_transaction(trans, info->extent_root);
13238 if (!extent_buffer_uptodate(info->extent_root->node)) {
13239 error("critical: extent_root, unable to check the filesystem");
13244 if (!extent_buffer_uptodate(info->csum_root->node)) {
13245 error("critical: csum_root, unable to check the filesystem");
13251 ret = do_check_chunks_and_extents(info);
13255 "errors found in extent allocation tree or chunk allocation");
13257 ret = repair_root_items(info);
13260 error("failed to repair root items: %s", strerror(-ret));
13264 fprintf(stderr, "Fixed %d roots.\n", ret);
13266 } else if (ret > 0) {
13268 "Found %d roots with an outdated root item.\n",
13271 "Please run a filesystem check with the option --repair to fix them.\n");
13277 if (!ctx.progress_enabled) {
13278 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13279 fprintf(stderr, "checking free space tree\n");
13281 fprintf(stderr, "checking free space cache\n");
13283 ret = check_space_cache(root);
13286 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13287 error("errors found in free space tree");
13289 error("errors found in free space cache");
13294 * We used to have to have these hole extents in between our real
13295 * extents so if we don't have this flag set we need to make sure there
13296 * are no gaps in the file extents for inodes, otherwise we can just
13297 * ignore it when this happens.
13299 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13300 ret = do_check_fs_roots(info, &root_cache);
13303 error("errors found in fs roots");
13307 fprintf(stderr, "checking csums\n");
13308 ret = check_csums(root);
13311 error("errors found in csum tree");
13315 fprintf(stderr, "checking root refs\n");
13316 /* For low memory mode, check_fs_roots_v2 handles root refs */
13317 if (check_mode != CHECK_MODE_LOWMEM) {
13318 ret = check_root_refs(root, &root_cache);
13321 error("errors found in root refs");
13326 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13327 struct extent_buffer *eb;
13329 eb = list_first_entry(&root->fs_info->recow_ebs,
13330 struct extent_buffer, recow);
13331 list_del_init(&eb->recow);
13332 ret = recow_extent_buffer(root, eb);
13335 error("fails to fix transid errors");
13340 while (!list_empty(&delete_items)) {
13341 struct bad_item *bad;
13343 bad = list_first_entry(&delete_items, struct bad_item, list);
13344 list_del_init(&bad->list);
13346 ret = delete_bad_item(root, bad);
13352 if (info->quota_enabled) {
13353 fprintf(stderr, "checking quota groups\n");
13354 ret = qgroup_verify_all(info);
13357 error("failed to check quota groups");
13361 ret = repair_qgroups(info, &qgroups_repaired);
13364 error("failed to repair quota groups");
13370 if (!list_empty(&root->fs_info->recow_ebs)) {
13371 error("transid errors in file system");
13376 printf("found %llu bytes used, ",
13377 (unsigned long long)bytes_used);
13379 printf("error(s) found\n");
13381 printf("no error found\n");
13382 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13383 printf("total tree bytes: %llu\n",
13384 (unsigned long long)total_btree_bytes);
13385 printf("total fs tree bytes: %llu\n",
13386 (unsigned long long)total_fs_tree_bytes);
13387 printf("total extent tree bytes: %llu\n",
13388 (unsigned long long)total_extent_tree_bytes);
13389 printf("btree space waste bytes: %llu\n",
13390 (unsigned long long)btree_space_waste);
13391 printf("file data blocks allocated: %llu\n referenced %llu\n",
13392 (unsigned long long)data_bytes_allocated,
13393 (unsigned long long)data_bytes_referenced);
13395 free_qgroup_counts();
13396 free_root_recs_tree(&root_cache);
13400 if (ctx.progress_enabled)
13401 task_deinit(ctx.info);