2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 return container_of(back, struct data_backref, node);
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145 struct data_backref *back1 = to_data_backref(ext1);
146 struct data_backref *back2 = to_data_backref(ext2);
148 WARN_ON(!ext1->is_data);
149 WARN_ON(!ext2->is_data);
151 /* parent and root are a union, so this covers both */
152 if (back1->parent > back2->parent)
154 if (back1->parent < back2->parent)
157 /* This is a full backref and the parents match. */
158 if (back1->node.full_backref)
161 if (back1->owner > back2->owner)
163 if (back1->owner < back2->owner)
166 if (back1->offset > back2->offset)
168 if (back1->offset < back2->offset)
171 if (back1->found_ref && back2->found_ref) {
172 if (back1->disk_bytenr > back2->disk_bytenr)
174 if (back1->disk_bytenr < back2->disk_bytenr)
177 if (back1->bytes > back2->bytes)
179 if (back1->bytes < back2->bytes)
187 * Much like data_backref, just removed the undetermined members
188 * and change it to use list_head.
189 * During extent scan, it is stored in root->orphan_data_extent.
190 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192 struct orphan_data_extent {
193 struct list_head list;
201 struct tree_backref {
202 struct extent_backref node;
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 return container_of(back, struct tree_backref, node);
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218 struct tree_backref *back1 = to_tree_backref(ext1);
219 struct tree_backref *back2 = to_tree_backref(ext2);
221 WARN_ON(ext1->is_data);
222 WARN_ON(ext2->is_data);
224 /* parent and root are a union, so this covers both */
225 if (back1->parent > back2->parent)
227 if (back1->parent < back2->parent)
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238 if (ext1->is_data > ext2->is_data)
241 if (ext1->is_data < ext2->is_data)
244 if (ext1->full_backref > ext2->full_backref)
246 if (ext1->full_backref < ext2->full_backref)
250 return compare_data_backref(node1, node2);
252 return compare_tree_backref(node1, node2);
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
258 struct extent_record {
259 struct list_head backrefs;
260 struct list_head dups;
261 struct rb_root backref_tree;
262 struct list_head list;
263 struct cache_extent cache;
264 struct btrfs_disk_key parent_key;
269 u64 extent_item_refs;
271 u64 parent_generation;
275 unsigned int flag_block_full_backref:2;
276 unsigned int found_rec:1;
277 unsigned int content_checked:1;
278 unsigned int owner_ref_checked:1;
279 unsigned int is_root:1;
280 unsigned int metadata:1;
281 unsigned int bad_full_backref:1;
282 unsigned int crossing_stripes:1;
283 unsigned int wrong_chunk_type:1;
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 return container_of(entry, struct extent_record, list);
291 struct inode_backref {
292 struct list_head list;
293 unsigned int found_dir_item:1;
294 unsigned int found_dir_index:1;
295 unsigned int found_inode_ref:1;
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 return list_entry(entry, struct inode_backref, list);
310 struct root_item_record {
311 struct list_head list;
317 struct btrfs_key drop_key;
320 #define REF_ERR_NO_DIR_ITEM (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX (1 << 1)
322 #define REF_ERR_NO_INODE_REF (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
325 #define REF_ERR_DUP_INODE_REF (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
334 struct file_extent_hole {
340 struct inode_record {
341 struct list_head backrefs;
342 unsigned int checked:1;
343 unsigned int merging:1;
344 unsigned int found_inode_item:1;
345 unsigned int found_dir_item:1;
346 unsigned int found_file_extent:1;
347 unsigned int found_csum_item:1;
348 unsigned int some_csum_missing:1;
349 unsigned int nodatasum:1;
362 struct rb_root holes;
363 struct list_head orphan_extents;
368 #define I_ERR_NO_INODE_ITEM (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
384 struct root_backref {
385 struct list_head list;
386 unsigned int found_dir_item:1;
387 unsigned int found_dir_index:1;
388 unsigned int found_back_ref:1;
389 unsigned int found_forward_ref:1;
390 unsigned int reachable:1;
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 return list_entry(entry, struct root_backref, list);
405 struct list_head backrefs;
406 struct cache_extent cache;
407 unsigned int found_root_item:1;
413 struct cache_extent cache;
418 struct cache_extent cache;
419 struct cache_tree root_cache;
420 struct cache_tree inode_cache;
421 struct inode_record *current;
430 struct walk_control {
431 struct cache_tree shared;
432 struct shared_node *nodes[BTRFS_MAX_LEVEL];
438 struct btrfs_key key;
440 struct list_head list;
443 struct extent_entry {
448 struct list_head list;
451 struct root_item_info {
452 /* level of the root */
454 /* number of nodes at this level, must be 1 for a root */
458 struct cache_extent cache_extent;
462 * Error bit for low memory mode check.
464 * Currently no caller cares about it yet. Just internal use for error
467 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH (1 << 8)
478 static void *print_status_check(void *p)
480 struct task_ctx *priv = p;
481 const char work_indicator[] = { '.', 'o', 'O', 'o' };
483 static char *task_position_string[] = {
485 "checking free space cache",
489 task_period_start(priv->info, 1000 /* 1s */);
491 if (priv->tp == TASK_NOTHING)
495 printf("%s [%c]\r", task_position_string[priv->tp],
496 work_indicator[count % 4]);
499 task_period_wait(priv->info);
504 static int print_status_return(void *p)
512 static enum btrfs_check_mode parse_check_mode(const char *str)
514 if (strcmp(str, "lowmem") == 0)
515 return CHECK_MODE_LOWMEM;
516 if (strcmp(str, "orig") == 0)
517 return CHECK_MODE_ORIGINAL;
518 if (strcmp(str, "original") == 0)
519 return CHECK_MODE_ORIGINAL;
521 return CHECK_MODE_UNKNOWN;
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
527 struct file_extent_hole *hole;
529 if (RB_EMPTY_ROOT(holes))
532 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 struct file_extent_hole *hole1;
539 struct file_extent_hole *hole2;
541 hole1 = rb_entry(node1, struct file_extent_hole, node);
542 hole2 = rb_entry(node2, struct file_extent_hole, node);
544 if (hole1->start > hole2->start)
546 if (hole1->start < hole2->start)
548 /* Now hole1->start == hole2->start */
549 if (hole1->len >= hole2->len)
551 * Hole 1 will be merge center
552 * Same hole will be merged later
555 /* Hole 2 will be merge center */
560 * Add a hole to the record
562 * This will do hole merge for copy_file_extent_holes(),
563 * which will ensure there won't be continuous holes.
565 static int add_file_extent_hole(struct rb_root *holes,
568 struct file_extent_hole *hole;
569 struct file_extent_hole *prev = NULL;
570 struct file_extent_hole *next = NULL;
572 hole = malloc(sizeof(*hole));
577 /* Since compare will not return 0, no -EEXIST will happen */
578 rb_insert(holes, &hole->node, compare_hole);
580 /* simple merge with previous hole */
581 if (rb_prev(&hole->node))
582 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584 if (prev && prev->start + prev->len >= hole->start) {
585 hole->len = hole->start + hole->len - prev->start;
586 hole->start = prev->start;
587 rb_erase(&prev->node, holes);
592 /* iterate merge with next holes */
594 if (!rb_next(&hole->node))
596 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598 if (hole->start + hole->len >= next->start) {
599 if (hole->start + hole->len <= next->start + next->len)
600 hole->len = next->start + next->len -
602 rb_erase(&next->node, holes);
611 static int compare_hole_range(struct rb_node *node, void *data)
613 struct file_extent_hole *hole;
616 hole = (struct file_extent_hole *)data;
619 hole = rb_entry(node, struct file_extent_hole, node);
620 if (start < hole->start)
622 if (start >= hole->start && start < hole->start + hole->len)
628 * Delete a hole in the record
630 * This will do the hole split and is much restrict than add.
632 static int del_file_extent_hole(struct rb_root *holes,
635 struct file_extent_hole *hole;
636 struct file_extent_hole tmp;
641 struct rb_node *node;
648 node = rb_search(holes, &tmp, compare_hole_range, NULL);
651 hole = rb_entry(node, struct file_extent_hole, node);
652 if (start + len > hole->start + hole->len)
656 * Now there will be no overlap, delete the hole and re-add the
657 * split(s) if they exists.
659 if (start > hole->start) {
660 prev_start = hole->start;
661 prev_len = start - hole->start;
664 if (hole->start + hole->len > start + len) {
665 next_start = start + len;
666 next_len = hole->start + hole->len - start - len;
669 rb_erase(node, holes);
672 ret = add_file_extent_hole(holes, prev_start, prev_len);
677 ret = add_file_extent_hole(holes, next_start, next_len);
684 static int copy_file_extent_holes(struct rb_root *dst,
687 struct file_extent_hole *hole;
688 struct rb_node *node;
691 node = rb_first(src);
693 hole = rb_entry(node, struct file_extent_hole, node);
694 ret = add_file_extent_hole(dst, hole->start, hole->len);
697 node = rb_next(node);
702 static void free_file_extent_holes(struct rb_root *holes)
704 struct rb_node *node;
705 struct file_extent_hole *hole;
707 node = rb_first(holes);
709 hole = rb_entry(node, struct file_extent_hole, node);
710 rb_erase(node, holes);
712 node = rb_first(holes);
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719 struct btrfs_root *root)
721 if (root->last_trans != trans->transid) {
722 root->track_dirty = 1;
723 root->last_trans = trans->transid;
724 root->commit_root = root->node;
725 extent_buffer_get(root->node);
729 static u8 imode_to_type(u32 imode)
732 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
734 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
735 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
736 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
737 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
738 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
739 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
742 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 struct device_record *rec1;
749 struct device_record *rec2;
751 rec1 = rb_entry(node1, struct device_record, node);
752 rec2 = rb_entry(node2, struct device_record, node);
753 if (rec1->devid > rec2->devid)
755 else if (rec1->devid < rec2->devid)
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 struct inode_record *rec;
764 struct inode_backref *backref;
765 struct inode_backref *orig;
766 struct inode_backref *tmp;
767 struct orphan_data_extent *src_orphan;
768 struct orphan_data_extent *dst_orphan;
773 rec = malloc(sizeof(*rec));
775 return ERR_PTR(-ENOMEM);
776 memcpy(rec, orig_rec, sizeof(*rec));
778 INIT_LIST_HEAD(&rec->backrefs);
779 INIT_LIST_HEAD(&rec->orphan_extents);
780 rec->holes = RB_ROOT;
782 list_for_each_entry(orig, &orig_rec->backrefs, list) {
783 size = sizeof(*orig) + orig->namelen + 1;
784 backref = malloc(size);
789 memcpy(backref, orig, size);
790 list_add_tail(&backref->list, &rec->backrefs);
792 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793 dst_orphan = malloc(sizeof(*dst_orphan));
798 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
808 rb = rb_first(&rec->holes);
810 struct file_extent_hole *hole;
812 hole = rb_entry(rb, struct file_extent_hole, node);
818 if (!list_empty(&rec->backrefs))
819 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820 list_del(&orig->list);
824 if (!list_empty(&rec->orphan_extents))
825 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826 list_del(&orig->list);
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
838 struct orphan_data_extent *orphan;
840 if (list_empty(orphan_extents))
842 printf("The following data extent is lost in tree %llu:\n",
844 list_for_each_entry(orphan, orphan_extents, list) {
845 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846 orphan->objectid, orphan->offset, orphan->disk_bytenr,
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 u64 root_objectid = root->root_key.objectid;
854 int errors = rec->errors;
858 /* reloc root errors, we print its corresponding fs root objectid*/
859 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860 root_objectid = root->root_key.offset;
861 fprintf(stderr, "reloc");
863 fprintf(stderr, "root %llu inode %llu errors %x",
864 (unsigned long long) root_objectid,
865 (unsigned long long) rec->ino, rec->errors);
867 if (errors & I_ERR_NO_INODE_ITEM)
868 fprintf(stderr, ", no inode item");
869 if (errors & I_ERR_NO_ORPHAN_ITEM)
870 fprintf(stderr, ", no orphan item");
871 if (errors & I_ERR_DUP_INODE_ITEM)
872 fprintf(stderr, ", dup inode item");
873 if (errors & I_ERR_DUP_DIR_INDEX)
874 fprintf(stderr, ", dup dir index");
875 if (errors & I_ERR_ODD_DIR_ITEM)
876 fprintf(stderr, ", odd dir item");
877 if (errors & I_ERR_ODD_FILE_EXTENT)
878 fprintf(stderr, ", odd file extent");
879 if (errors & I_ERR_BAD_FILE_EXTENT)
880 fprintf(stderr, ", bad file extent");
881 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882 fprintf(stderr, ", file extent overlap");
883 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884 fprintf(stderr, ", file extent discount");
885 if (errors & I_ERR_DIR_ISIZE_WRONG)
886 fprintf(stderr, ", dir isize wrong");
887 if (errors & I_ERR_FILE_NBYTES_WRONG)
888 fprintf(stderr, ", nbytes wrong");
889 if (errors & I_ERR_ODD_CSUM_ITEM)
890 fprintf(stderr, ", odd csum item");
891 if (errors & I_ERR_SOME_CSUM_MISSING)
892 fprintf(stderr, ", some csum missing");
893 if (errors & I_ERR_LINK_COUNT_WRONG)
894 fprintf(stderr, ", link count wrong");
895 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896 fprintf(stderr, ", orphan file extent");
897 fprintf(stderr, "\n");
898 /* Print the orphan extents if needed */
899 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902 /* Print the holes if needed */
903 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904 struct file_extent_hole *hole;
905 struct rb_node *node;
908 node = rb_first(&rec->holes);
909 fprintf(stderr, "Found file extent holes:\n");
912 hole = rb_entry(node, struct file_extent_hole, node);
913 fprintf(stderr, "\tstart: %llu, len: %llu\n",
914 hole->start, hole->len);
915 node = rb_next(node);
918 fprintf(stderr, "\tstart: 0, len: %llu\n",
920 root->fs_info->sectorsize));
924 static void print_ref_error(int errors)
926 if (errors & REF_ERR_NO_DIR_ITEM)
927 fprintf(stderr, ", no dir item");
928 if (errors & REF_ERR_NO_DIR_INDEX)
929 fprintf(stderr, ", no dir index");
930 if (errors & REF_ERR_NO_INODE_REF)
931 fprintf(stderr, ", no inode ref");
932 if (errors & REF_ERR_DUP_DIR_ITEM)
933 fprintf(stderr, ", dup dir item");
934 if (errors & REF_ERR_DUP_DIR_INDEX)
935 fprintf(stderr, ", dup dir index");
936 if (errors & REF_ERR_DUP_INODE_REF)
937 fprintf(stderr, ", dup inode ref");
938 if (errors & REF_ERR_INDEX_UNMATCH)
939 fprintf(stderr, ", index mismatch");
940 if (errors & REF_ERR_FILETYPE_UNMATCH)
941 fprintf(stderr, ", filetype mismatch");
942 if (errors & REF_ERR_NAME_TOO_LONG)
943 fprintf(stderr, ", name too long");
944 if (errors & REF_ERR_NO_ROOT_REF)
945 fprintf(stderr, ", no root ref");
946 if (errors & REF_ERR_NO_ROOT_BACKREF)
947 fprintf(stderr, ", no root backref");
948 if (errors & REF_ERR_DUP_ROOT_REF)
949 fprintf(stderr, ", dup root ref");
950 if (errors & REF_ERR_DUP_ROOT_BACKREF)
951 fprintf(stderr, ", dup root backref");
952 fprintf(stderr, "\n");
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958 struct ptr_node *node;
959 struct cache_extent *cache;
960 struct inode_record *rec = NULL;
963 cache = lookup_cache_extent(inode_cache, ino, 1);
965 node = container_of(cache, struct ptr_node, cache);
967 if (mod && rec->refs > 1) {
968 node->data = clone_inode_rec(rec);
969 if (IS_ERR(node->data))
975 rec = calloc(1, sizeof(*rec));
977 return ERR_PTR(-ENOMEM);
979 rec->extent_start = (u64)-1;
981 INIT_LIST_HEAD(&rec->backrefs);
982 INIT_LIST_HEAD(&rec->orphan_extents);
983 rec->holes = RB_ROOT;
985 node = malloc(sizeof(*node));
988 return ERR_PTR(-ENOMEM);
990 node->cache.start = ino;
991 node->cache.size = 1;
994 if (ino == BTRFS_FREE_INO_OBJECTID)
997 ret = insert_cache_extent(inode_cache, &node->cache);
999 return ERR_PTR(-EEXIST);
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 struct orphan_data_extent *orphan;
1008 while (!list_empty(orphan_extents)) {
1009 orphan = list_entry(orphan_extents->next,
1010 struct orphan_data_extent, list);
1011 list_del(&orphan->list);
1016 static void free_inode_rec(struct inode_record *rec)
1018 struct inode_backref *backref;
1020 if (--rec->refs > 0)
1023 while (!list_empty(&rec->backrefs)) {
1024 backref = to_inode_backref(rec->backrefs.next);
1025 list_del(&backref->list);
1028 free_orphan_data_extents(&rec->orphan_extents);
1029 free_file_extent_holes(&rec->holes);
1033 static int can_free_inode_rec(struct inode_record *rec)
1035 if (!rec->errors && rec->checked && rec->found_inode_item &&
1036 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042 struct inode_record *rec)
1044 struct cache_extent *cache;
1045 struct inode_backref *tmp, *backref;
1046 struct ptr_node *node;
1049 if (!rec->found_inode_item)
1052 filetype = imode_to_type(rec->imode);
1053 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054 if (backref->found_dir_item && backref->found_dir_index) {
1055 if (backref->filetype != filetype)
1056 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057 if (!backref->errors && backref->found_inode_ref &&
1058 rec->nlink == rec->found_link) {
1059 list_del(&backref->list);
1065 if (!rec->checked || rec->merging)
1068 if (S_ISDIR(rec->imode)) {
1069 if (rec->found_size != rec->isize)
1070 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071 if (rec->found_file_extent)
1072 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074 if (rec->found_dir_item)
1075 rec->errors |= I_ERR_ODD_DIR_ITEM;
1076 if (rec->found_size != rec->nbytes)
1077 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078 if (rec->nlink > 0 && !no_holes &&
1079 (rec->extent_end < rec->isize ||
1080 first_extent_gap(&rec->holes) < rec->isize))
1081 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1084 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085 if (rec->found_csum_item && rec->nodatasum)
1086 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087 if (rec->some_csum_missing && !rec->nodatasum)
1088 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1091 BUG_ON(rec->refs != 1);
1092 if (can_free_inode_rec(rec)) {
1093 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094 node = container_of(cache, struct ptr_node, cache);
1095 BUG_ON(node->data != rec);
1096 remove_cache_extent(inode_cache, &node->cache);
1098 free_inode_rec(rec);
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 struct btrfs_path path;
1105 struct btrfs_key key;
1108 key.objectid = BTRFS_ORPHAN_OBJECTID;
1109 key.type = BTRFS_ORPHAN_ITEM_KEY;
1112 btrfs_init_path(&path);
1113 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114 btrfs_release_path(&path);
1120 static int process_inode_item(struct extent_buffer *eb,
1121 int slot, struct btrfs_key *key,
1122 struct shared_node *active_node)
1124 struct inode_record *rec;
1125 struct btrfs_inode_item *item;
1127 rec = active_node->current;
1128 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129 if (rec->found_inode_item) {
1130 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134 rec->nlink = btrfs_inode_nlink(eb, item);
1135 rec->isize = btrfs_inode_size(eb, item);
1136 rec->nbytes = btrfs_inode_nbytes(eb, item);
1137 rec->imode = btrfs_inode_mode(eb, item);
1138 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140 rec->found_inode_item = 1;
1141 if (rec->nlink == 0)
1142 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143 maybe_free_inode_rec(&active_node->inode_cache, rec);
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149 int namelen, u64 dir)
1151 struct inode_backref *backref;
1153 list_for_each_entry(backref, &rec->backrefs, list) {
1154 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156 if (backref->dir != dir || backref->namelen != namelen)
1158 if (memcmp(name, backref->name, namelen))
1163 backref = malloc(sizeof(*backref) + namelen + 1);
1166 memset(backref, 0, sizeof(*backref));
1168 backref->namelen = namelen;
1169 memcpy(backref->name, name, namelen);
1170 backref->name[namelen] = '\0';
1171 list_add_tail(&backref->list, &rec->backrefs);
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176 u64 ino, u64 dir, u64 index,
1177 const char *name, int namelen,
1178 u8 filetype, u8 itemtype, int errors)
1180 struct inode_record *rec;
1181 struct inode_backref *backref;
1183 rec = get_inode_rec(inode_cache, ino, 1);
1184 BUG_ON(IS_ERR(rec));
1185 backref = get_inode_backref(rec, name, namelen, dir);
1188 backref->errors |= errors;
1189 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190 if (backref->found_dir_index)
1191 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192 if (backref->found_inode_ref && backref->index != index)
1193 backref->errors |= REF_ERR_INDEX_UNMATCH;
1194 if (backref->found_dir_item && backref->filetype != filetype)
1195 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197 backref->index = index;
1198 backref->filetype = filetype;
1199 backref->found_dir_index = 1;
1200 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202 if (backref->found_dir_item)
1203 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204 if (backref->found_dir_index && backref->filetype != filetype)
1205 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207 backref->filetype = filetype;
1208 backref->found_dir_item = 1;
1209 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211 if (backref->found_inode_ref)
1212 backref->errors |= REF_ERR_DUP_INODE_REF;
1213 if (backref->found_dir_index && backref->index != index)
1214 backref->errors |= REF_ERR_INDEX_UNMATCH;
1216 backref->index = index;
1218 backref->ref_type = itemtype;
1219 backref->found_inode_ref = 1;
1224 maybe_free_inode_rec(inode_cache, rec);
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229 struct cache_tree *dst_cache)
1231 struct inode_backref *backref;
1236 list_for_each_entry(backref, &src->backrefs, list) {
1237 if (backref->found_dir_index) {
1238 add_inode_backref(dst_cache, dst->ino, backref->dir,
1239 backref->index, backref->name,
1240 backref->namelen, backref->filetype,
1241 BTRFS_DIR_INDEX_KEY, backref->errors);
1243 if (backref->found_dir_item) {
1245 add_inode_backref(dst_cache, dst->ino,
1246 backref->dir, 0, backref->name,
1247 backref->namelen, backref->filetype,
1248 BTRFS_DIR_ITEM_KEY, backref->errors);
1250 if (backref->found_inode_ref) {
1251 add_inode_backref(dst_cache, dst->ino,
1252 backref->dir, backref->index,
1253 backref->name, backref->namelen, 0,
1254 backref->ref_type, backref->errors);
1258 if (src->found_dir_item)
1259 dst->found_dir_item = 1;
1260 if (src->found_file_extent)
1261 dst->found_file_extent = 1;
1262 if (src->found_csum_item)
1263 dst->found_csum_item = 1;
1264 if (src->some_csum_missing)
1265 dst->some_csum_missing = 1;
1266 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1272 BUG_ON(src->found_link < dir_count);
1273 dst->found_link += src->found_link - dir_count;
1274 dst->found_size += src->found_size;
1275 if (src->extent_start != (u64)-1) {
1276 if (dst->extent_start == (u64)-1) {
1277 dst->extent_start = src->extent_start;
1278 dst->extent_end = src->extent_end;
1280 if (dst->extent_end > src->extent_start)
1281 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282 else if (dst->extent_end < src->extent_start) {
1283 ret = add_file_extent_hole(&dst->holes,
1285 src->extent_start - dst->extent_end);
1287 if (dst->extent_end < src->extent_end)
1288 dst->extent_end = src->extent_end;
1292 dst->errors |= src->errors;
1293 if (src->found_inode_item) {
1294 if (!dst->found_inode_item) {
1295 dst->nlink = src->nlink;
1296 dst->isize = src->isize;
1297 dst->nbytes = src->nbytes;
1298 dst->imode = src->imode;
1299 dst->nodatasum = src->nodatasum;
1300 dst->found_inode_item = 1;
1302 dst->errors |= I_ERR_DUP_INODE_ITEM;
1310 static int splice_shared_node(struct shared_node *src_node,
1311 struct shared_node *dst_node)
1313 struct cache_extent *cache;
1314 struct ptr_node *node, *ins;
1315 struct cache_tree *src, *dst;
1316 struct inode_record *rec, *conflict;
1317 u64 current_ino = 0;
1321 if (--src_node->refs == 0)
1323 if (src_node->current)
1324 current_ino = src_node->current->ino;
1326 src = &src_node->root_cache;
1327 dst = &dst_node->root_cache;
1329 cache = search_cache_extent(src, 0);
1331 node = container_of(cache, struct ptr_node, cache);
1333 cache = next_cache_extent(cache);
1336 remove_cache_extent(src, &node->cache);
1339 ins = malloc(sizeof(*ins));
1341 ins->cache.start = node->cache.start;
1342 ins->cache.size = node->cache.size;
1346 ret = insert_cache_extent(dst, &ins->cache);
1347 if (ret == -EEXIST) {
1348 conflict = get_inode_rec(dst, rec->ino, 1);
1349 BUG_ON(IS_ERR(conflict));
1350 merge_inode_recs(rec, conflict, dst);
1352 conflict->checked = 1;
1353 if (dst_node->current == conflict)
1354 dst_node->current = NULL;
1356 maybe_free_inode_rec(dst, conflict);
1357 free_inode_rec(rec);
1364 if (src == &src_node->root_cache) {
1365 src = &src_node->inode_cache;
1366 dst = &dst_node->inode_cache;
1370 if (current_ino > 0 && (!dst_node->current ||
1371 current_ino > dst_node->current->ino)) {
1372 if (dst_node->current) {
1373 dst_node->current->checked = 1;
1374 maybe_free_inode_rec(dst, dst_node->current);
1376 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377 BUG_ON(IS_ERR(dst_node->current));
1382 static void free_inode_ptr(struct cache_extent *cache)
1384 struct ptr_node *node;
1385 struct inode_record *rec;
1387 node = container_of(cache, struct ptr_node, cache);
1389 free_inode_rec(rec);
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398 struct cache_extent *cache;
1399 struct shared_node *node;
1401 cache = lookup_cache_extent(shared, bytenr, 1);
1403 node = container_of(cache, struct shared_node, cache);
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1412 struct shared_node *node;
1414 node = calloc(1, sizeof(*node));
1417 node->cache.start = bytenr;
1418 node->cache.size = 1;
1419 cache_tree_init(&node->root_cache);
1420 cache_tree_init(&node->inode_cache);
1423 ret = insert_cache_extent(shared, &node->cache);
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429 struct walk_control *wc, int level)
1431 struct shared_node *node;
1432 struct shared_node *dest;
1435 if (level == wc->active_node)
1438 BUG_ON(wc->active_node <= level);
1439 node = find_shared_node(&wc->shared, bytenr);
1441 ret = add_shared_node(&wc->shared, bytenr, refs);
1443 node = find_shared_node(&wc->shared, bytenr);
1444 wc->nodes[level] = node;
1445 wc->active_node = level;
1449 if (wc->root_level == wc->active_node &&
1450 btrfs_root_refs(&root->root_item) == 0) {
1451 if (--node->refs == 0) {
1452 free_inode_recs_tree(&node->root_cache);
1453 free_inode_recs_tree(&node->inode_cache);
1454 remove_cache_extent(&wc->shared, &node->cache);
1460 dest = wc->nodes[wc->active_node];
1461 splice_shared_node(node, dest);
1462 if (node->refs == 0) {
1463 remove_cache_extent(&wc->shared, &node->cache);
1469 static int leave_shared_node(struct btrfs_root *root,
1470 struct walk_control *wc, int level)
1472 struct shared_node *node;
1473 struct shared_node *dest;
1476 if (level == wc->root_level)
1479 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1483 BUG_ON(i >= BTRFS_MAX_LEVEL);
1485 node = wc->nodes[wc->active_node];
1486 wc->nodes[wc->active_node] = NULL;
1487 wc->active_node = i;
1489 dest = wc->nodes[wc->active_node];
1490 if (wc->active_node < wc->root_level ||
1491 btrfs_root_refs(&root->root_item) > 0) {
1492 BUG_ON(node->refs <= 1);
1493 splice_shared_node(node, dest);
1495 BUG_ON(node->refs < 2);
1504 * 1 - if the root with id child_root_id is a child of root parent_root_id
1505 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1506 * has other root(s) as parent(s)
1507 * 2 - if the root child_root_id doesn't have any parent roots
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512 struct btrfs_path path;
1513 struct btrfs_key key;
1514 struct extent_buffer *leaf;
1518 btrfs_init_path(&path);
1520 key.objectid = parent_root_id;
1521 key.type = BTRFS_ROOT_REF_KEY;
1522 key.offset = child_root_id;
1523 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1527 btrfs_release_path(&path);
1531 key.objectid = child_root_id;
1532 key.type = BTRFS_ROOT_BACKREF_KEY;
1534 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1540 leaf = path.nodes[0];
1541 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545 leaf = path.nodes[0];
1548 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549 if (key.objectid != child_root_id ||
1550 key.type != BTRFS_ROOT_BACKREF_KEY)
1555 if (key.offset == parent_root_id) {
1556 btrfs_release_path(&path);
1563 btrfs_release_path(&path);
1566 return has_parent ? 0 : 2;
1569 static int process_dir_item(struct extent_buffer *eb,
1570 int slot, struct btrfs_key *key,
1571 struct shared_node *active_node)
1581 struct btrfs_dir_item *di;
1582 struct inode_record *rec;
1583 struct cache_tree *root_cache;
1584 struct cache_tree *inode_cache;
1585 struct btrfs_key location;
1586 char namebuf[BTRFS_NAME_LEN];
1588 root_cache = &active_node->root_cache;
1589 inode_cache = &active_node->inode_cache;
1590 rec = active_node->current;
1591 rec->found_dir_item = 1;
1593 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594 total = btrfs_item_size_nr(eb, slot);
1595 while (cur < total) {
1597 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598 name_len = btrfs_dir_name_len(eb, di);
1599 data_len = btrfs_dir_data_len(eb, di);
1600 filetype = btrfs_dir_type(eb, di);
1602 rec->found_size += name_len;
1603 if (cur + sizeof(*di) + name_len > total ||
1604 name_len > BTRFS_NAME_LEN) {
1605 error = REF_ERR_NAME_TOO_LONG;
1607 if (cur + sizeof(*di) > total)
1609 len = min_t(u32, total - cur - sizeof(*di),
1616 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619 key->offset != btrfs_name_hash(namebuf, len)) {
1620 rec->errors |= I_ERR_ODD_DIR_ITEM;
1621 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622 key->objectid, key->offset, namebuf, len, filetype,
1623 key->offset, btrfs_name_hash(namebuf, len));
1626 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627 add_inode_backref(inode_cache, location.objectid,
1628 key->objectid, key->offset, namebuf,
1629 len, filetype, key->type, error);
1630 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631 add_inode_backref(root_cache, location.objectid,
1632 key->objectid, key->offset,
1633 namebuf, len, filetype,
1636 fprintf(stderr, "invalid location in dir item %u\n",
1638 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639 key->objectid, key->offset, namebuf,
1640 len, filetype, key->type, error);
1643 len = sizeof(*di) + name_len + data_len;
1644 di = (struct btrfs_dir_item *)((char *)di + len);
1647 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648 rec->errors |= I_ERR_DUP_DIR_INDEX;
1653 static int process_inode_ref(struct extent_buffer *eb,
1654 int slot, struct btrfs_key *key,
1655 struct shared_node *active_node)
1663 struct cache_tree *inode_cache;
1664 struct btrfs_inode_ref *ref;
1665 char namebuf[BTRFS_NAME_LEN];
1667 inode_cache = &active_node->inode_cache;
1669 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670 total = btrfs_item_size_nr(eb, slot);
1671 while (cur < total) {
1672 name_len = btrfs_inode_ref_name_len(eb, ref);
1673 index = btrfs_inode_ref_index(eb, ref);
1675 /* inode_ref + namelen should not cross item boundary */
1676 if (cur + sizeof(*ref) + name_len > total ||
1677 name_len > BTRFS_NAME_LEN) {
1678 if (total < cur + sizeof(*ref))
1681 /* Still try to read out the remaining part */
1682 len = min_t(u32, total - cur - sizeof(*ref),
1684 error = REF_ERR_NAME_TOO_LONG;
1690 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691 add_inode_backref(inode_cache, key->objectid, key->offset,
1692 index, namebuf, len, 0, key->type, error);
1694 len = sizeof(*ref) + name_len;
1695 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1701 static int process_inode_extref(struct extent_buffer *eb,
1702 int slot, struct btrfs_key *key,
1703 struct shared_node *active_node)
1712 struct cache_tree *inode_cache;
1713 struct btrfs_inode_extref *extref;
1714 char namebuf[BTRFS_NAME_LEN];
1716 inode_cache = &active_node->inode_cache;
1718 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719 total = btrfs_item_size_nr(eb, slot);
1720 while (cur < total) {
1721 name_len = btrfs_inode_extref_name_len(eb, extref);
1722 index = btrfs_inode_extref_index(eb, extref);
1723 parent = btrfs_inode_extref_parent(eb, extref);
1724 if (name_len <= BTRFS_NAME_LEN) {
1728 len = BTRFS_NAME_LEN;
1729 error = REF_ERR_NAME_TOO_LONG;
1731 read_extent_buffer(eb, namebuf,
1732 (unsigned long)(extref + 1), len);
1733 add_inode_backref(inode_cache, key->objectid, parent,
1734 index, namebuf, len, 0, key->type, error);
1736 len = sizeof(*extref) + name_len;
1737 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745 u64 len, u64 *found)
1747 struct btrfs_key key;
1748 struct btrfs_path path;
1749 struct extent_buffer *leaf;
1754 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756 btrfs_init_path(&path);
1758 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760 key.type = BTRFS_EXTENT_CSUM_KEY;
1762 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1766 if (ret > 0 && path.slots[0] > 0) {
1767 leaf = path.nodes[0];
1768 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770 key.type == BTRFS_EXTENT_CSUM_KEY)
1775 leaf = path.nodes[0];
1776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1782 leaf = path.nodes[0];
1785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787 key.type != BTRFS_EXTENT_CSUM_KEY)
1790 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791 if (key.offset >= start + len)
1794 if (key.offset > start)
1797 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798 csum_end = key.offset + (size / csum_size) *
1799 root->fs_info->sectorsize;
1800 if (csum_end > start) {
1801 size = min(csum_end - start, len);
1810 btrfs_release_path(&path);
1816 static int process_file_extent(struct btrfs_root *root,
1817 struct extent_buffer *eb,
1818 int slot, struct btrfs_key *key,
1819 struct shared_node *active_node)
1821 struct inode_record *rec;
1822 struct btrfs_file_extent_item *fi;
1824 u64 disk_bytenr = 0;
1825 u64 extent_offset = 0;
1826 u64 mask = root->fs_info->sectorsize - 1;
1830 rec = active_node->current;
1831 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832 rec->found_file_extent = 1;
1834 if (rec->extent_start == (u64)-1) {
1835 rec->extent_start = key->offset;
1836 rec->extent_end = key->offset;
1839 if (rec->extent_end > key->offset)
1840 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841 else if (rec->extent_end < key->offset) {
1842 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843 key->offset - rec->extent_end);
1848 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849 extent_type = btrfs_file_extent_type(eb, fi);
1851 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855 rec->found_size += num_bytes;
1856 num_bytes = (num_bytes + mask) & ~mask;
1857 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861 extent_offset = btrfs_file_extent_offset(eb, fi);
1862 if (num_bytes == 0 || (num_bytes & mask))
1863 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864 if (num_bytes + extent_offset >
1865 btrfs_file_extent_ram_bytes(eb, fi))
1866 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868 (btrfs_file_extent_compression(eb, fi) ||
1869 btrfs_file_extent_encryption(eb, fi) ||
1870 btrfs_file_extent_other_encoding(eb, fi)))
1871 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872 if (disk_bytenr > 0)
1873 rec->found_size += num_bytes;
1875 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877 rec->extent_end = key->offset + num_bytes;
1880 * The data reloc tree will copy full extents into its inode and then
1881 * copy the corresponding csums. Because the extent it copied could be
1882 * a preallocated extent that hasn't been written to yet there may be no
1883 * csums to copy, ergo we won't have csums for our file extent. This is
1884 * ok so just don't bother checking csums if the inode belongs to the
1887 if (disk_bytenr > 0 &&
1888 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890 if (btrfs_file_extent_compression(eb, fi))
1891 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893 disk_bytenr += extent_offset;
1895 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1898 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900 rec->found_csum_item = 1;
1901 if (found < num_bytes)
1902 rec->some_csum_missing = 1;
1903 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912 struct walk_control *wc)
1914 struct btrfs_key key;
1918 struct cache_tree *inode_cache;
1919 struct shared_node *active_node;
1921 if (wc->root_level == wc->active_node &&
1922 btrfs_root_refs(&root->root_item) == 0)
1925 active_node = wc->nodes[wc->active_node];
1926 inode_cache = &active_node->inode_cache;
1927 nritems = btrfs_header_nritems(eb);
1928 for (i = 0; i < nritems; i++) {
1929 btrfs_item_key_to_cpu(eb, &key, i);
1931 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1936 if (active_node->current == NULL ||
1937 active_node->current->ino < key.objectid) {
1938 if (active_node->current) {
1939 active_node->current->checked = 1;
1940 maybe_free_inode_rec(inode_cache,
1941 active_node->current);
1943 active_node->current = get_inode_rec(inode_cache,
1945 BUG_ON(IS_ERR(active_node->current));
1948 case BTRFS_DIR_ITEM_KEY:
1949 case BTRFS_DIR_INDEX_KEY:
1950 ret = process_dir_item(eb, i, &key, active_node);
1952 case BTRFS_INODE_REF_KEY:
1953 ret = process_inode_ref(eb, i, &key, active_node);
1955 case BTRFS_INODE_EXTREF_KEY:
1956 ret = process_inode_extref(eb, i, &key, active_node);
1958 case BTRFS_INODE_ITEM_KEY:
1959 ret = process_inode_item(eb, i, &key, active_node);
1961 case BTRFS_EXTENT_DATA_KEY:
1962 ret = process_file_extent(root, eb, i, &key,
1973 u64 bytenr[BTRFS_MAX_LEVEL];
1974 u64 refs[BTRFS_MAX_LEVEL];
1975 int need_check[BTRFS_MAX_LEVEL];
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979 struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981 unsigned int ext_ref);
1984 * Returns >0 Found error, not fatal, should continue
1985 * Returns <0 Fatal error, must exit the whole check
1986 * Returns 0 No errors found
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989 struct node_refs *nrefs, int *level, int ext_ref)
1991 struct extent_buffer *cur = path->nodes[0];
1992 struct btrfs_key key;
1996 int root_level = btrfs_header_level(root->node);
1998 int ret = 0; /* Final return value */
1999 int err = 0; /* Positive error bitmap */
2001 cur_bytenr = cur->start;
2003 /* skip to first inode item or the first inode number change */
2004 nritems = btrfs_header_nritems(cur);
2005 for (i = 0; i < nritems; i++) {
2006 btrfs_item_key_to_cpu(cur, &key, i);
2008 first_ino = key.objectid;
2009 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010 (first_ino && first_ino != key.objectid))
2014 path->slots[0] = nritems;
2020 err |= check_inode_item(root, path, ext_ref);
2022 /* modify cur since check_inode_item may change path */
2023 cur = path->nodes[0];
2025 if (err & LAST_ITEM)
2028 /* still have inode items in thie leaf */
2029 if (cur->start == cur_bytenr)
2033 * we have switched to another leaf, above nodes may
2034 * have changed, here walk down the path, if a node
2035 * or leaf is shared, check whether we can skip this
2038 for (i = root_level; i >= 0; i--) {
2039 if (path->nodes[i]->start == nrefs->bytenr[i])
2042 ret = update_nodes_refs(root,
2043 path->nodes[i]->start,
2048 if (!nrefs->need_check[i]) {
2054 for (i = 0; i < *level; i++) {
2055 free_extent_buffer(path->nodes[i]);
2056 path->nodes[i] = NULL;
2065 static void reada_walk_down(struct btrfs_root *root,
2066 struct extent_buffer *node, int slot)
2068 struct btrfs_fs_info *fs_info = root->fs_info;
2075 level = btrfs_header_level(node);
2079 nritems = btrfs_header_nritems(node);
2080 for (i = slot; i < nritems; i++) {
2081 bytenr = btrfs_node_blockptr(node, i);
2082 ptr_gen = btrfs_node_ptr_generation(node, i);
2083 readahead_tree_block(fs_info, bytenr, ptr_gen);
2088 * Check the child node/leaf by the following condition:
2089 * 1. the first item key of the node/leaf should be the same with the one
2091 * 2. block in parent node should match the child node/leaf.
2092 * 3. generation of parent node and child's header should be consistent.
2094 * Or the child node/leaf pointed by the key in parent is not valid.
2096 * We hope to check leaf owner too, but since subvol may share leaves,
2097 * which makes leaf owner check not so strong, key check should be
2098 * sufficient enough for that case.
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101 struct extent_buffer *child)
2103 struct btrfs_key parent_key;
2104 struct btrfs_key child_key;
2107 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108 if (btrfs_header_level(child) == 0)
2109 btrfs_item_key_to_cpu(child, &child_key, 0);
2111 btrfs_node_key_to_cpu(child, &child_key, 0);
2113 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2116 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117 parent_key.objectid, parent_key.type, parent_key.offset,
2118 child_key.objectid, child_key.type, child_key.offset);
2120 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2122 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123 btrfs_node_blockptr(parent, slot),
2124 btrfs_header_bytenr(child));
2126 if (btrfs_node_ptr_generation(parent, slot) !=
2127 btrfs_header_generation(child)) {
2129 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130 btrfs_header_generation(child),
2131 btrfs_node_ptr_generation(parent, slot));
2137 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138 * in every fs or file tree check. Here we find its all root ids, and only check
2139 * it in the fs or file tree which has the smallest root id.
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2143 struct rb_node *node;
2144 struct ulist_node *u;
2146 if (roots->nnodes == 1)
2149 node = rb_first(&roots->root);
2150 u = rb_entry(node, struct ulist_node, rb_node);
2152 * current root id is not smallest, we skip it and let it be checked
2153 * in the fs or file tree who hash the smallest root id.
2155 if (root->objectid != u->val)
2162 * for a tree node or leaf, we record its reference count, so later if we still
2163 * process this node or leaf, don't need to compute its reference count again.
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166 struct node_refs *nrefs, u64 level)
2170 struct ulist *roots;
2172 if (nrefs->bytenr[level] != bytenr) {
2173 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174 level, 1, &refs, NULL);
2178 nrefs->bytenr[level] = bytenr;
2179 nrefs->refs[level] = refs;
2181 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2186 check = need_check(root, roots);
2188 nrefs->need_check[level] = check;
2190 nrefs->need_check[level] = 1;
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198 struct walk_control *wc, int *level,
2199 struct node_refs *nrefs)
2201 enum btrfs_tree_block_status status;
2204 struct btrfs_fs_info *fs_info = root->fs_info;
2205 struct extent_buffer *next;
2206 struct extent_buffer *cur;
2210 WARN_ON(*level < 0);
2211 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214 refs = nrefs->refs[*level];
2217 ret = btrfs_lookup_extent_info(NULL, root,
2218 path->nodes[*level]->start,
2219 *level, 1, &refs, NULL);
2224 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225 nrefs->refs[*level] = refs;
2229 ret = enter_shared_node(root, path->nodes[*level]->start,
2237 while (*level >= 0) {
2238 WARN_ON(*level < 0);
2239 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240 cur = path->nodes[*level];
2242 if (btrfs_header_level(cur) != *level)
2245 if (path->slots[*level] >= btrfs_header_nritems(cur))
2248 ret = process_one_leaf(root, cur, wc);
2253 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2256 if (bytenr == nrefs->bytenr[*level - 1]) {
2257 refs = nrefs->refs[*level - 1];
2259 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260 *level - 1, 1, &refs, NULL);
2264 nrefs->bytenr[*level - 1] = bytenr;
2265 nrefs->refs[*level - 1] = refs;
2270 ret = enter_shared_node(root, bytenr, refs,
2273 path->slots[*level]++;
2278 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280 free_extent_buffer(next);
2281 reada_walk_down(root, cur, path->slots[*level]);
2282 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283 if (!extent_buffer_uptodate(next)) {
2284 struct btrfs_key node_key;
2286 btrfs_node_key_to_cpu(path->nodes[*level],
2288 path->slots[*level]);
2289 btrfs_add_corrupt_extent_record(root->fs_info,
2291 path->nodes[*level]->start,
2292 root->fs_info->nodesize,
2299 ret = check_child_node(cur, path->slots[*level], next);
2301 free_extent_buffer(next);
2306 if (btrfs_is_leaf(next))
2307 status = btrfs_check_leaf(root, NULL, next);
2309 status = btrfs_check_node(root, NULL, next);
2310 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311 free_extent_buffer(next);
2316 *level = *level - 1;
2317 free_extent_buffer(path->nodes[*level]);
2318 path->nodes[*level] = next;
2319 path->slots[*level] = 0;
2322 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327 unsigned int ext_ref);
2330 * Returns >0 Found error, should continue
2331 * Returns <0 Fatal error, must exit the whole check
2332 * Returns 0 No errors found
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335 int *level, struct node_refs *nrefs, int ext_ref)
2337 enum btrfs_tree_block_status status;
2340 struct btrfs_fs_info *fs_info = root->fs_info;
2341 struct extent_buffer *next;
2342 struct extent_buffer *cur;
2345 WARN_ON(*level < 0);
2346 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2348 ret = update_nodes_refs(root, path->nodes[*level]->start,
2353 while (*level >= 0) {
2354 WARN_ON(*level < 0);
2355 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356 cur = path->nodes[*level];
2358 if (btrfs_header_level(cur) != *level)
2361 if (path->slots[*level] >= btrfs_header_nritems(cur))
2363 /* Don't forgot to check leaf/node validation */
2365 ret = btrfs_check_leaf(root, NULL, cur);
2366 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2370 ret = process_one_leaf_v2(root, path, nrefs,
2372 cur = path->nodes[*level];
2375 ret = btrfs_check_node(root, NULL, cur);
2376 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2381 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2384 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2387 if (!nrefs->need_check[*level - 1]) {
2388 path->slots[*level]++;
2392 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394 free_extent_buffer(next);
2395 reada_walk_down(root, cur, path->slots[*level]);
2396 next = read_tree_block(fs_info, bytenr, ptr_gen);
2397 if (!extent_buffer_uptodate(next)) {
2398 struct btrfs_key node_key;
2400 btrfs_node_key_to_cpu(path->nodes[*level],
2402 path->slots[*level]);
2403 btrfs_add_corrupt_extent_record(fs_info,
2405 path->nodes[*level]->start,
2413 ret = check_child_node(cur, path->slots[*level], next);
2417 if (btrfs_is_leaf(next))
2418 status = btrfs_check_leaf(root, NULL, next);
2420 status = btrfs_check_node(root, NULL, next);
2421 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422 free_extent_buffer(next);
2427 *level = *level - 1;
2428 free_extent_buffer(path->nodes[*level]);
2429 path->nodes[*level] = next;
2430 path->slots[*level] = 0;
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436 struct walk_control *wc, int *level)
2439 struct extent_buffer *leaf;
2441 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442 leaf = path->nodes[i];
2443 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2448 free_extent_buffer(path->nodes[*level]);
2449 path->nodes[*level] = NULL;
2450 BUG_ON(*level > wc->active_node);
2451 if (*level == wc->active_node)
2452 leave_shared_node(root, wc, *level);
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2463 struct extent_buffer *leaf;
2465 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466 leaf = path->nodes[i];
2467 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2472 free_extent_buffer(path->nodes[*level]);
2473 path->nodes[*level] = NULL;
2480 static int check_root_dir(struct inode_record *rec)
2482 struct inode_backref *backref;
2485 if (!rec->found_inode_item || rec->errors)
2487 if (rec->nlink != 1 || rec->found_link != 0)
2489 if (list_empty(&rec->backrefs))
2491 backref = to_inode_backref(rec->backrefs.next);
2492 if (!backref->found_inode_ref)
2494 if (backref->index != 0 || backref->namelen != 2 ||
2495 memcmp(backref->name, "..", 2))
2497 if (backref->found_dir_index || backref->found_dir_item)
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505 struct btrfs_root *root, struct btrfs_path *path,
2506 struct inode_record *rec)
2508 struct btrfs_inode_item *ei;
2509 struct btrfs_key key;
2512 key.objectid = rec->ino;
2513 key.type = BTRFS_INODE_ITEM_KEY;
2514 key.offset = (u64)-1;
2516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (!path->slots[0]) {
2527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528 if (key.objectid != rec->ino) {
2533 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534 struct btrfs_inode_item);
2535 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536 btrfs_mark_buffer_dirty(path->nodes[0]);
2537 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539 root->root_key.objectid);
2541 btrfs_release_path(path);
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546 struct btrfs_root *root,
2547 struct btrfs_path *path,
2548 struct inode_record *rec)
2552 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553 btrfs_release_path(path);
2555 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560 struct btrfs_root *root,
2561 struct btrfs_path *path,
2562 struct inode_record *rec)
2564 struct btrfs_inode_item *ei;
2565 struct btrfs_key key;
2568 key.objectid = rec->ino;
2569 key.type = BTRFS_INODE_ITEM_KEY;
2572 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2579 /* Since ret == 0, no need to check anything */
2580 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581 struct btrfs_inode_item);
2582 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583 btrfs_mark_buffer_dirty(path->nodes[0]);
2584 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585 printf("reset nbytes for ino %llu root %llu\n",
2586 rec->ino, root->root_key.objectid);
2588 btrfs_release_path(path);
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593 struct cache_tree *inode_cache,
2594 struct inode_record *rec,
2595 struct inode_backref *backref)
2597 struct btrfs_path path;
2598 struct btrfs_trans_handle *trans;
2599 struct btrfs_dir_item *dir_item;
2600 struct extent_buffer *leaf;
2601 struct btrfs_key key;
2602 struct btrfs_disk_key disk_key;
2603 struct inode_record *dir_rec;
2604 unsigned long name_ptr;
2605 u32 data_size = sizeof(*dir_item) + backref->namelen;
2608 trans = btrfs_start_transaction(root, 1);
2610 return PTR_ERR(trans);
2612 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613 (unsigned long long)rec->ino);
2615 btrfs_init_path(&path);
2616 key.objectid = backref->dir;
2617 key.type = BTRFS_DIR_INDEX_KEY;
2618 key.offset = backref->index;
2619 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2622 leaf = path.nodes[0];
2623 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625 disk_key.objectid = cpu_to_le64(rec->ino);
2626 disk_key.type = BTRFS_INODE_ITEM_KEY;
2627 disk_key.offset = 0;
2629 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631 btrfs_set_dir_data_len(leaf, dir_item, 0);
2632 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633 name_ptr = (unsigned long)(dir_item + 1);
2634 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635 btrfs_mark_buffer_dirty(leaf);
2636 btrfs_release_path(&path);
2637 btrfs_commit_transaction(trans, root);
2639 backref->found_dir_index = 1;
2640 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641 BUG_ON(IS_ERR(dir_rec));
2644 dir_rec->found_size += backref->namelen;
2645 if (dir_rec->found_size == dir_rec->isize &&
2646 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648 if (dir_rec->found_size != dir_rec->isize)
2649 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2654 static int delete_dir_index(struct btrfs_root *root,
2655 struct inode_backref *backref)
2657 struct btrfs_trans_handle *trans;
2658 struct btrfs_dir_item *di;
2659 struct btrfs_path path;
2662 trans = btrfs_start_transaction(root, 1);
2664 return PTR_ERR(trans);
2666 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667 (unsigned long long)backref->dir,
2668 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669 (unsigned long long)root->objectid);
2671 btrfs_init_path(&path);
2672 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673 backref->name, backref->namelen,
2674 backref->index, -1);
2677 btrfs_release_path(&path);
2678 btrfs_commit_transaction(trans, root);
2685 ret = btrfs_del_item(trans, root, &path);
2687 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689 btrfs_release_path(&path);
2690 btrfs_commit_transaction(trans, root);
2694 static int __create_inode_item(struct btrfs_trans_handle *trans,
2695 struct btrfs_root *root, u64 ino, u64 size,
2696 u64 nbytes, u64 nlink, u32 mode)
2698 struct btrfs_inode_item ii;
2699 time_t now = time(NULL);
2702 btrfs_set_stack_inode_size(&ii, size);
2703 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2704 btrfs_set_stack_inode_nlink(&ii, nlink);
2705 btrfs_set_stack_inode_mode(&ii, mode);
2706 btrfs_set_stack_inode_generation(&ii, trans->transid);
2707 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2708 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2709 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2710 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2711 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2712 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2713 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2715 ret = btrfs_insert_inode(trans, root, ino, &ii);
2718 warning("root %llu inode %llu recreating inode item, this may "
2719 "be incomplete, please check permissions and content after "
2720 "the fsck completes.\n", (unsigned long long)root->objectid,
2721 (unsigned long long)ino);
2726 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2727 struct btrfs_root *root, u64 ino,
2730 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2732 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2735 static int create_inode_item(struct btrfs_root *root,
2736 struct inode_record *rec, int root_dir)
2738 struct btrfs_trans_handle *trans;
2744 trans = btrfs_start_transaction(root, 1);
2745 if (IS_ERR(trans)) {
2746 ret = PTR_ERR(trans);
2750 nlink = root_dir ? 1 : rec->found_link;
2751 if (rec->found_dir_item) {
2752 if (rec->found_file_extent)
2753 fprintf(stderr, "root %llu inode %llu has both a dir "
2754 "item and extents, unsure if it is a dir or a "
2755 "regular file so setting it as a directory\n",
2756 (unsigned long long)root->objectid,
2757 (unsigned long long)rec->ino);
2758 mode = S_IFDIR | 0755;
2759 size = rec->found_size;
2760 } else if (!rec->found_dir_item) {
2761 size = rec->extent_end;
2762 mode = S_IFREG | 0755;
2765 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2767 btrfs_commit_transaction(trans, root);
2771 static int repair_inode_backrefs(struct btrfs_root *root,
2772 struct inode_record *rec,
2773 struct cache_tree *inode_cache,
2776 struct inode_backref *tmp, *backref;
2777 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2781 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2782 if (!delete && rec->ino == root_dirid) {
2783 if (!rec->found_inode_item) {
2784 ret = create_inode_item(root, rec, 1);
2791 /* Index 0 for root dir's are special, don't mess with it */
2792 if (rec->ino == root_dirid && backref->index == 0)
2796 ((backref->found_dir_index && !backref->found_inode_ref) ||
2797 (backref->found_dir_index && backref->found_inode_ref &&
2798 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2799 ret = delete_dir_index(root, backref);
2803 list_del(&backref->list);
2808 if (!delete && !backref->found_dir_index &&
2809 backref->found_dir_item && backref->found_inode_ref) {
2810 ret = add_missing_dir_index(root, inode_cache, rec,
2815 if (backref->found_dir_item &&
2816 backref->found_dir_index) {
2817 if (!backref->errors &&
2818 backref->found_inode_ref) {
2819 list_del(&backref->list);
2826 if (!delete && (!backref->found_dir_index &&
2827 !backref->found_dir_item &&
2828 backref->found_inode_ref)) {
2829 struct btrfs_trans_handle *trans;
2830 struct btrfs_key location;
2832 ret = check_dir_conflict(root, backref->name,
2838 * let nlink fixing routine to handle it,
2839 * which can do it better.
2844 location.objectid = rec->ino;
2845 location.type = BTRFS_INODE_ITEM_KEY;
2846 location.offset = 0;
2848 trans = btrfs_start_transaction(root, 1);
2849 if (IS_ERR(trans)) {
2850 ret = PTR_ERR(trans);
2853 fprintf(stderr, "adding missing dir index/item pair "
2855 (unsigned long long)rec->ino);
2856 ret = btrfs_insert_dir_item(trans, root, backref->name,
2858 backref->dir, &location,
2859 imode_to_type(rec->imode),
2862 btrfs_commit_transaction(trans, root);
2866 if (!delete && (backref->found_inode_ref &&
2867 backref->found_dir_index &&
2868 backref->found_dir_item &&
2869 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2870 !rec->found_inode_item)) {
2871 ret = create_inode_item(root, rec, 0);
2878 return ret ? ret : repaired;
2882 * To determine the file type for nlink/inode_item repair
2884 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2885 * Return -ENOENT if file type is not found.
2887 static int find_file_type(struct inode_record *rec, u8 *type)
2889 struct inode_backref *backref;
2891 /* For inode item recovered case */
2892 if (rec->found_inode_item) {
2893 *type = imode_to_type(rec->imode);
2897 list_for_each_entry(backref, &rec->backrefs, list) {
2898 if (backref->found_dir_index || backref->found_dir_item) {
2899 *type = backref->filetype;
2907 * To determine the file name for nlink repair
2909 * Return 0 if file name is found, set name and namelen.
2910 * Return -ENOENT if file name is not found.
2912 static int find_file_name(struct inode_record *rec,
2913 char *name, int *namelen)
2915 struct inode_backref *backref;
2917 list_for_each_entry(backref, &rec->backrefs, list) {
2918 if (backref->found_dir_index || backref->found_dir_item ||
2919 backref->found_inode_ref) {
2920 memcpy(name, backref->name, backref->namelen);
2921 *namelen = backref->namelen;
2928 /* Reset the nlink of the inode to the correct one */
2929 static int reset_nlink(struct btrfs_trans_handle *trans,
2930 struct btrfs_root *root,
2931 struct btrfs_path *path,
2932 struct inode_record *rec)
2934 struct inode_backref *backref;
2935 struct inode_backref *tmp;
2936 struct btrfs_key key;
2937 struct btrfs_inode_item *inode_item;
2940 /* We don't believe this either, reset it and iterate backref */
2941 rec->found_link = 0;
2943 /* Remove all backref including the valid ones */
2944 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2945 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2946 backref->index, backref->name,
2947 backref->namelen, 0);
2951 /* remove invalid backref, so it won't be added back */
2952 if (!(backref->found_dir_index &&
2953 backref->found_dir_item &&
2954 backref->found_inode_ref)) {
2955 list_del(&backref->list);
2962 /* Set nlink to 0 */
2963 key.objectid = rec->ino;
2964 key.type = BTRFS_INODE_ITEM_KEY;
2966 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2973 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2974 struct btrfs_inode_item);
2975 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2976 btrfs_mark_buffer_dirty(path->nodes[0]);
2977 btrfs_release_path(path);
2980 * Add back valid inode_ref/dir_item/dir_index,
2981 * add_link() will handle the nlink inc, so new nlink must be correct
2983 list_for_each_entry(backref, &rec->backrefs, list) {
2984 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2985 backref->name, backref->namelen,
2986 backref->filetype, &backref->index, 1, 0);
2991 btrfs_release_path(path);
2995 static int get_highest_inode(struct btrfs_trans_handle *trans,
2996 struct btrfs_root *root,
2997 struct btrfs_path *path,
3000 struct btrfs_key key, found_key;
3003 btrfs_init_path(path);
3004 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3006 key.type = BTRFS_INODE_ITEM_KEY;
3007 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3009 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3010 path->slots[0] - 1);
3011 *highest_ino = found_key.objectid;
3014 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3016 btrfs_release_path(path);
3020 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3021 struct btrfs_root *root,
3022 struct btrfs_path *path,
3023 struct inode_record *rec)
3025 char *dir_name = "lost+found";
3026 char namebuf[BTRFS_NAME_LEN] = {0};
3031 int name_recovered = 0;
3032 int type_recovered = 0;
3036 * Get file name and type first before these invalid inode ref
3037 * are deleted by remove_all_invalid_backref()
3039 name_recovered = !find_file_name(rec, namebuf, &namelen);
3040 type_recovered = !find_file_type(rec, &type);
3042 if (!name_recovered) {
3043 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3044 rec->ino, rec->ino);
3045 namelen = count_digits(rec->ino);
3046 sprintf(namebuf, "%llu", rec->ino);
3049 if (!type_recovered) {
3050 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3052 type = BTRFS_FT_REG_FILE;
3056 ret = reset_nlink(trans, root, path, rec);
3059 "Failed to reset nlink for inode %llu: %s\n",
3060 rec->ino, strerror(-ret));
3064 if (rec->found_link == 0) {
3065 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3069 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3070 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3073 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3074 dir_name, strerror(-ret));
3077 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3078 namebuf, namelen, type, NULL, 1, 0);
3080 * Add ".INO" suffix several times to handle case where
3081 * "FILENAME.INO" is already taken by another file.
3083 while (ret == -EEXIST) {
3085 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3087 if (namelen + count_digits(rec->ino) + 1 >
3092 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3094 namelen += count_digits(rec->ino) + 1;
3095 ret = btrfs_add_link(trans, root, rec->ino,
3096 lost_found_ino, namebuf,
3097 namelen, type, NULL, 1, 0);
3101 "Failed to link the inode %llu to %s dir: %s\n",
3102 rec->ino, dir_name, strerror(-ret));
3106 * Just increase the found_link, don't actually add the
3107 * backref. This will make things easier and this inode
3108 * record will be freed after the repair is done.
3109 * So fsck will not report problem about this inode.
3112 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3113 namelen, namebuf, dir_name);
3115 printf("Fixed the nlink of inode %llu\n", rec->ino);
3118 * Clear the flag anyway, or we will loop forever for the same inode
3119 * as it will not be removed from the bad inode list and the dead loop
3122 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3123 btrfs_release_path(path);
3128 * Check if there is any normal(reg or prealloc) file extent for given
3130 * This is used to determine the file type when neither its dir_index/item or
3131 * inode_item exists.
3133 * This will *NOT* report error, if any error happens, just consider it does
3134 * not have any normal file extent.
3136 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3138 struct btrfs_path path;
3139 struct btrfs_key key;
3140 struct btrfs_key found_key;
3141 struct btrfs_file_extent_item *fi;
3145 btrfs_init_path(&path);
3147 key.type = BTRFS_EXTENT_DATA_KEY;
3150 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3155 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3156 ret = btrfs_next_leaf(root, &path);
3163 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3165 if (found_key.objectid != ino ||
3166 found_key.type != BTRFS_EXTENT_DATA_KEY)
3168 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3169 struct btrfs_file_extent_item);
3170 type = btrfs_file_extent_type(path.nodes[0], fi);
3171 if (type != BTRFS_FILE_EXTENT_INLINE) {
3177 btrfs_release_path(&path);
3181 static u32 btrfs_type_to_imode(u8 type)
3183 static u32 imode_by_btrfs_type[] = {
3184 [BTRFS_FT_REG_FILE] = S_IFREG,
3185 [BTRFS_FT_DIR] = S_IFDIR,
3186 [BTRFS_FT_CHRDEV] = S_IFCHR,
3187 [BTRFS_FT_BLKDEV] = S_IFBLK,
3188 [BTRFS_FT_FIFO] = S_IFIFO,
3189 [BTRFS_FT_SOCK] = S_IFSOCK,
3190 [BTRFS_FT_SYMLINK] = S_IFLNK,
3193 return imode_by_btrfs_type[(type)];
3196 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3197 struct btrfs_root *root,
3198 struct btrfs_path *path,
3199 struct inode_record *rec)
3203 int type_recovered = 0;
3206 printf("Trying to rebuild inode:%llu\n", rec->ino);
3208 type_recovered = !find_file_type(rec, &filetype);
3211 * Try to determine inode type if type not found.
3213 * For found regular file extent, it must be FILE.
3214 * For found dir_item/index, it must be DIR.
3216 * For undetermined one, use FILE as fallback.
3219 * 1. If found backref(inode_index/item is already handled) to it,
3221 * Need new inode-inode ref structure to allow search for that.
3223 if (!type_recovered) {
3224 if (rec->found_file_extent &&
3225 find_normal_file_extent(root, rec->ino)) {
3227 filetype = BTRFS_FT_REG_FILE;
3228 } else if (rec->found_dir_item) {
3230 filetype = BTRFS_FT_DIR;
3231 } else if (!list_empty(&rec->orphan_extents)) {
3233 filetype = BTRFS_FT_REG_FILE;
3235 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3238 filetype = BTRFS_FT_REG_FILE;
3242 ret = btrfs_new_inode(trans, root, rec->ino,
3243 mode | btrfs_type_to_imode(filetype));
3248 * Here inode rebuild is done, we only rebuild the inode item,
3249 * don't repair the nlink(like move to lost+found).
3250 * That is the job of nlink repair.
3252 * We just fill the record and return
3254 rec->found_dir_item = 1;
3255 rec->imode = mode | btrfs_type_to_imode(filetype);
3257 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3258 /* Ensure the inode_nlinks repair function will be called */
3259 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3264 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3265 struct btrfs_root *root,
3266 struct btrfs_path *path,
3267 struct inode_record *rec)
3269 struct orphan_data_extent *orphan;
3270 struct orphan_data_extent *tmp;
3273 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3275 * Check for conflicting file extents
3277 * Here we don't know whether the extents is compressed or not,
3278 * so we can only assume it not compressed nor data offset,
3279 * and use its disk_len as extent length.
3281 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3282 orphan->offset, orphan->disk_len, 0);
3283 btrfs_release_path(path);
3288 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3289 orphan->disk_bytenr, orphan->disk_len);
3290 ret = btrfs_free_extent(trans,
3291 root->fs_info->extent_root,
3292 orphan->disk_bytenr, orphan->disk_len,
3293 0, root->objectid, orphan->objectid,
3298 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3299 orphan->offset, orphan->disk_bytenr,
3300 orphan->disk_len, orphan->disk_len);
3304 /* Update file size info */
3305 rec->found_size += orphan->disk_len;
3306 if (rec->found_size == rec->nbytes)
3307 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3309 /* Update the file extent hole info too */
3310 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3314 if (RB_EMPTY_ROOT(&rec->holes))
3315 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3317 list_del(&orphan->list);
3320 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3325 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3326 struct btrfs_root *root,
3327 struct btrfs_path *path,
3328 struct inode_record *rec)
3330 struct rb_node *node;
3331 struct file_extent_hole *hole;
3335 node = rb_first(&rec->holes);
3339 hole = rb_entry(node, struct file_extent_hole, node);
3340 ret = btrfs_punch_hole(trans, root, rec->ino,
3341 hole->start, hole->len);
3344 ret = del_file_extent_hole(&rec->holes, hole->start,
3348 if (RB_EMPTY_ROOT(&rec->holes))
3349 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3350 node = rb_first(&rec->holes);
3352 /* special case for a file losing all its file extent */
3354 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3355 round_up(rec->isize,
3356 root->fs_info->sectorsize));
3360 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3361 rec->ino, root->objectid);
3366 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3368 struct btrfs_trans_handle *trans;
3369 struct btrfs_path path;
3372 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3373 I_ERR_NO_ORPHAN_ITEM |
3374 I_ERR_LINK_COUNT_WRONG |
3375 I_ERR_NO_INODE_ITEM |
3376 I_ERR_FILE_EXTENT_ORPHAN |
3377 I_ERR_FILE_EXTENT_DISCOUNT|
3378 I_ERR_FILE_NBYTES_WRONG)))
3382 * For nlink repair, it may create a dir and add link, so
3383 * 2 for parent(256)'s dir_index and dir_item
3384 * 2 for lost+found dir's inode_item and inode_ref
3385 * 1 for the new inode_ref of the file
3386 * 2 for lost+found dir's dir_index and dir_item for the file
3388 trans = btrfs_start_transaction(root, 7);
3390 return PTR_ERR(trans);
3392 btrfs_init_path(&path);
3393 if (rec->errors & I_ERR_NO_INODE_ITEM)
3394 ret = repair_inode_no_item(trans, root, &path, rec);
3395 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3396 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3397 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3398 ret = repair_inode_discount_extent(trans, root, &path, rec);
3399 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3400 ret = repair_inode_isize(trans, root, &path, rec);
3401 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3402 ret = repair_inode_orphan_item(trans, root, &path, rec);
3403 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3404 ret = repair_inode_nlinks(trans, root, &path, rec);
3405 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3406 ret = repair_inode_nbytes(trans, root, &path, rec);
3407 btrfs_commit_transaction(trans, root);
3408 btrfs_release_path(&path);
3412 static int check_inode_recs(struct btrfs_root *root,
3413 struct cache_tree *inode_cache)
3415 struct cache_extent *cache;
3416 struct ptr_node *node;
3417 struct inode_record *rec;
3418 struct inode_backref *backref;
3423 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3425 if (btrfs_root_refs(&root->root_item) == 0) {
3426 if (!cache_tree_empty(inode_cache))
3427 fprintf(stderr, "warning line %d\n", __LINE__);
3432 * We need to repair backrefs first because we could change some of the
3433 * errors in the inode recs.
3435 * We also need to go through and delete invalid backrefs first and then
3436 * add the correct ones second. We do this because we may get EEXIST
3437 * when adding back the correct index because we hadn't yet deleted the
3440 * For example, if we were missing a dir index then the directories
3441 * isize would be wrong, so if we fixed the isize to what we thought it
3442 * would be and then fixed the backref we'd still have a invalid fs, so
3443 * we need to add back the dir index and then check to see if the isize
3448 if (stage == 3 && !err)
3451 cache = search_cache_extent(inode_cache, 0);
3452 while (repair && cache) {
3453 node = container_of(cache, struct ptr_node, cache);
3455 cache = next_cache_extent(cache);
3457 /* Need to free everything up and rescan */
3459 remove_cache_extent(inode_cache, &node->cache);
3461 free_inode_rec(rec);
3465 if (list_empty(&rec->backrefs))
3468 ret = repair_inode_backrefs(root, rec, inode_cache,
3482 rec = get_inode_rec(inode_cache, root_dirid, 0);
3483 BUG_ON(IS_ERR(rec));
3485 ret = check_root_dir(rec);
3487 fprintf(stderr, "root %llu root dir %llu error\n",
3488 (unsigned long long)root->root_key.objectid,
3489 (unsigned long long)root_dirid);
3490 print_inode_error(root, rec);
3495 struct btrfs_trans_handle *trans;
3497 trans = btrfs_start_transaction(root, 1);
3498 if (IS_ERR(trans)) {
3499 err = PTR_ERR(trans);
3504 "root %llu missing its root dir, recreating\n",
3505 (unsigned long long)root->objectid);
3507 ret = btrfs_make_root_dir(trans, root, root_dirid);
3510 btrfs_commit_transaction(trans, root);
3514 fprintf(stderr, "root %llu root dir %llu not found\n",
3515 (unsigned long long)root->root_key.objectid,
3516 (unsigned long long)root_dirid);
3520 cache = search_cache_extent(inode_cache, 0);
3523 node = container_of(cache, struct ptr_node, cache);
3525 remove_cache_extent(inode_cache, &node->cache);
3527 if (rec->ino == root_dirid ||
3528 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3529 free_inode_rec(rec);
3533 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3534 ret = check_orphan_item(root, rec->ino);
3536 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3537 if (can_free_inode_rec(rec)) {
3538 free_inode_rec(rec);
3543 if (!rec->found_inode_item)
3544 rec->errors |= I_ERR_NO_INODE_ITEM;
3545 if (rec->found_link != rec->nlink)
3546 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3548 ret = try_repair_inode(root, rec);
3549 if (ret == 0 && can_free_inode_rec(rec)) {
3550 free_inode_rec(rec);
3556 if (!(repair && ret == 0))
3558 print_inode_error(root, rec);
3559 list_for_each_entry(backref, &rec->backrefs, list) {
3560 if (!backref->found_dir_item)
3561 backref->errors |= REF_ERR_NO_DIR_ITEM;
3562 if (!backref->found_dir_index)
3563 backref->errors |= REF_ERR_NO_DIR_INDEX;
3564 if (!backref->found_inode_ref)
3565 backref->errors |= REF_ERR_NO_INODE_REF;
3566 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3567 " namelen %u name %s filetype %d errors %x",
3568 (unsigned long long)backref->dir,
3569 (unsigned long long)backref->index,
3570 backref->namelen, backref->name,
3571 backref->filetype, backref->errors);
3572 print_ref_error(backref->errors);
3574 free_inode_rec(rec);
3576 return (error > 0) ? -1 : 0;
3579 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3582 struct cache_extent *cache;
3583 struct root_record *rec = NULL;
3586 cache = lookup_cache_extent(root_cache, objectid, 1);
3588 rec = container_of(cache, struct root_record, cache);
3590 rec = calloc(1, sizeof(*rec));
3592 return ERR_PTR(-ENOMEM);
3593 rec->objectid = objectid;
3594 INIT_LIST_HEAD(&rec->backrefs);
3595 rec->cache.start = objectid;
3596 rec->cache.size = 1;
3598 ret = insert_cache_extent(root_cache, &rec->cache);
3600 return ERR_PTR(-EEXIST);
3605 static struct root_backref *get_root_backref(struct root_record *rec,
3606 u64 ref_root, u64 dir, u64 index,
3607 const char *name, int namelen)
3609 struct root_backref *backref;
3611 list_for_each_entry(backref, &rec->backrefs, list) {
3612 if (backref->ref_root != ref_root || backref->dir != dir ||
3613 backref->namelen != namelen)
3615 if (memcmp(name, backref->name, namelen))
3620 backref = calloc(1, sizeof(*backref) + namelen + 1);
3623 backref->ref_root = ref_root;
3625 backref->index = index;
3626 backref->namelen = namelen;
3627 memcpy(backref->name, name, namelen);
3628 backref->name[namelen] = '\0';
3629 list_add_tail(&backref->list, &rec->backrefs);
3633 static void free_root_record(struct cache_extent *cache)
3635 struct root_record *rec;
3636 struct root_backref *backref;
3638 rec = container_of(cache, struct root_record, cache);
3639 while (!list_empty(&rec->backrefs)) {
3640 backref = to_root_backref(rec->backrefs.next);
3641 list_del(&backref->list);
3648 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3650 static int add_root_backref(struct cache_tree *root_cache,
3651 u64 root_id, u64 ref_root, u64 dir, u64 index,
3652 const char *name, int namelen,
3653 int item_type, int errors)
3655 struct root_record *rec;
3656 struct root_backref *backref;
3658 rec = get_root_rec(root_cache, root_id);
3659 BUG_ON(IS_ERR(rec));
3660 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3663 backref->errors |= errors;
3665 if (item_type != BTRFS_DIR_ITEM_KEY) {
3666 if (backref->found_dir_index || backref->found_back_ref ||
3667 backref->found_forward_ref) {
3668 if (backref->index != index)
3669 backref->errors |= REF_ERR_INDEX_UNMATCH;
3671 backref->index = index;
3675 if (item_type == BTRFS_DIR_ITEM_KEY) {
3676 if (backref->found_forward_ref)
3678 backref->found_dir_item = 1;
3679 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3680 backref->found_dir_index = 1;
3681 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3682 if (backref->found_forward_ref)
3683 backref->errors |= REF_ERR_DUP_ROOT_REF;
3684 else if (backref->found_dir_item)
3686 backref->found_forward_ref = 1;
3687 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3688 if (backref->found_back_ref)
3689 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3690 backref->found_back_ref = 1;
3695 if (backref->found_forward_ref && backref->found_dir_item)
3696 backref->reachable = 1;
3700 static int merge_root_recs(struct btrfs_root *root,
3701 struct cache_tree *src_cache,
3702 struct cache_tree *dst_cache)
3704 struct cache_extent *cache;
3705 struct ptr_node *node;
3706 struct inode_record *rec;
3707 struct inode_backref *backref;
3710 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3711 free_inode_recs_tree(src_cache);
3716 cache = search_cache_extent(src_cache, 0);
3719 node = container_of(cache, struct ptr_node, cache);
3721 remove_cache_extent(src_cache, &node->cache);
3724 ret = is_child_root(root, root->objectid, rec->ino);
3730 list_for_each_entry(backref, &rec->backrefs, list) {
3731 BUG_ON(backref->found_inode_ref);
3732 if (backref->found_dir_item)
3733 add_root_backref(dst_cache, rec->ino,
3734 root->root_key.objectid, backref->dir,
3735 backref->index, backref->name,
3736 backref->namelen, BTRFS_DIR_ITEM_KEY,
3738 if (backref->found_dir_index)
3739 add_root_backref(dst_cache, rec->ino,
3740 root->root_key.objectid, backref->dir,
3741 backref->index, backref->name,
3742 backref->namelen, BTRFS_DIR_INDEX_KEY,
3746 free_inode_rec(rec);
3753 static int check_root_refs(struct btrfs_root *root,
3754 struct cache_tree *root_cache)
3756 struct root_record *rec;
3757 struct root_record *ref_root;
3758 struct root_backref *backref;
3759 struct cache_extent *cache;
3765 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3766 BUG_ON(IS_ERR(rec));
3769 /* fixme: this can not detect circular references */
3772 cache = search_cache_extent(root_cache, 0);
3776 rec = container_of(cache, struct root_record, cache);
3777 cache = next_cache_extent(cache);
3779 if (rec->found_ref == 0)
3782 list_for_each_entry(backref, &rec->backrefs, list) {
3783 if (!backref->reachable)
3786 ref_root = get_root_rec(root_cache,
3788 BUG_ON(IS_ERR(ref_root));
3789 if (ref_root->found_ref > 0)
3792 backref->reachable = 0;
3794 if (rec->found_ref == 0)
3800 cache = search_cache_extent(root_cache, 0);
3804 rec = container_of(cache, struct root_record, cache);
3805 cache = next_cache_extent(cache);
3807 if (rec->found_ref == 0 &&
3808 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3809 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3810 ret = check_orphan_item(root->fs_info->tree_root,
3816 * If we don't have a root item then we likely just have
3817 * a dir item in a snapshot for this root but no actual
3818 * ref key or anything so it's meaningless.
3820 if (!rec->found_root_item)
3823 fprintf(stderr, "fs tree %llu not referenced\n",
3824 (unsigned long long)rec->objectid);
3828 if (rec->found_ref > 0 && !rec->found_root_item)
3830 list_for_each_entry(backref, &rec->backrefs, list) {
3831 if (!backref->found_dir_item)
3832 backref->errors |= REF_ERR_NO_DIR_ITEM;
3833 if (!backref->found_dir_index)
3834 backref->errors |= REF_ERR_NO_DIR_INDEX;
3835 if (!backref->found_back_ref)
3836 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3837 if (!backref->found_forward_ref)
3838 backref->errors |= REF_ERR_NO_ROOT_REF;
3839 if (backref->reachable && backref->errors)
3846 fprintf(stderr, "fs tree %llu refs %u %s\n",
3847 (unsigned long long)rec->objectid, rec->found_ref,
3848 rec->found_root_item ? "" : "not found");
3850 list_for_each_entry(backref, &rec->backrefs, list) {
3851 if (!backref->reachable)
3853 if (!backref->errors && rec->found_root_item)
3855 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3856 " index %llu namelen %u name %s errors %x\n",
3857 (unsigned long long)backref->ref_root,
3858 (unsigned long long)backref->dir,
3859 (unsigned long long)backref->index,
3860 backref->namelen, backref->name,
3862 print_ref_error(backref->errors);
3865 return errors > 0 ? 1 : 0;
3868 static int process_root_ref(struct extent_buffer *eb, int slot,
3869 struct btrfs_key *key,
3870 struct cache_tree *root_cache)
3876 struct btrfs_root_ref *ref;
3877 char namebuf[BTRFS_NAME_LEN];
3880 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3882 dirid = btrfs_root_ref_dirid(eb, ref);
3883 index = btrfs_root_ref_sequence(eb, ref);
3884 name_len = btrfs_root_ref_name_len(eb, ref);
3886 if (name_len <= BTRFS_NAME_LEN) {
3890 len = BTRFS_NAME_LEN;
3891 error = REF_ERR_NAME_TOO_LONG;
3893 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3895 if (key->type == BTRFS_ROOT_REF_KEY) {
3896 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3897 index, namebuf, len, key->type, error);
3899 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3900 index, namebuf, len, key->type, error);
3905 static void free_corrupt_block(struct cache_extent *cache)
3907 struct btrfs_corrupt_block *corrupt;
3909 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3913 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3916 * Repair the btree of the given root.
3918 * The fix is to remove the node key in corrupt_blocks cache_tree.
3919 * and rebalance the tree.
3920 * After the fix, the btree should be writeable.
3922 static int repair_btree(struct btrfs_root *root,
3923 struct cache_tree *corrupt_blocks)
3925 struct btrfs_trans_handle *trans;
3926 struct btrfs_path path;
3927 struct btrfs_corrupt_block *corrupt;
3928 struct cache_extent *cache;
3929 struct btrfs_key key;
3934 if (cache_tree_empty(corrupt_blocks))
3937 trans = btrfs_start_transaction(root, 1);
3938 if (IS_ERR(trans)) {
3939 ret = PTR_ERR(trans);
3940 fprintf(stderr, "Error starting transaction: %s\n",
3944 btrfs_init_path(&path);
3945 cache = first_cache_extent(corrupt_blocks);
3947 corrupt = container_of(cache, struct btrfs_corrupt_block,
3949 level = corrupt->level;
3950 path.lowest_level = level;
3951 key.objectid = corrupt->key.objectid;
3952 key.type = corrupt->key.type;
3953 key.offset = corrupt->key.offset;
3956 * Here we don't want to do any tree balance, since it may
3957 * cause a balance with corrupted brother leaf/node,
3958 * so ins_len set to 0 here.
3959 * Balance will be done after all corrupt node/leaf is deleted.
3961 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3964 offset = btrfs_node_blockptr(path.nodes[level],
3967 /* Remove the ptr */
3968 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3972 * Remove the corresponding extent
3973 * return value is not concerned.
3975 btrfs_release_path(&path);
3976 ret = btrfs_free_extent(trans, root, offset,
3977 root->fs_info->nodesize, 0,
3978 root->root_key.objectid, level - 1, 0);
3979 cache = next_cache_extent(cache);
3982 /* Balance the btree using btrfs_search_slot() */
3983 cache = first_cache_extent(corrupt_blocks);
3985 corrupt = container_of(cache, struct btrfs_corrupt_block,
3987 memcpy(&key, &corrupt->key, sizeof(key));
3988 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3991 /* return will always >0 since it won't find the item */
3993 btrfs_release_path(&path);
3994 cache = next_cache_extent(cache);
3997 btrfs_commit_transaction(trans, root);
3998 btrfs_release_path(&path);
4002 static int check_fs_root(struct btrfs_root *root,
4003 struct cache_tree *root_cache,
4004 struct walk_control *wc)
4010 struct btrfs_path path;
4011 struct shared_node root_node;
4012 struct root_record *rec;
4013 struct btrfs_root_item *root_item = &root->root_item;
4014 struct cache_tree corrupt_blocks;
4015 struct orphan_data_extent *orphan;
4016 struct orphan_data_extent *tmp;
4017 enum btrfs_tree_block_status status;
4018 struct node_refs nrefs;
4021 * Reuse the corrupt_block cache tree to record corrupted tree block
4023 * Unlike the usage in extent tree check, here we do it in a per
4024 * fs/subvol tree base.
4026 cache_tree_init(&corrupt_blocks);
4027 root->fs_info->corrupt_blocks = &corrupt_blocks;
4029 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4030 rec = get_root_rec(root_cache, root->root_key.objectid);
4031 BUG_ON(IS_ERR(rec));
4032 if (btrfs_root_refs(root_item) > 0)
4033 rec->found_root_item = 1;
4036 btrfs_init_path(&path);
4037 memset(&root_node, 0, sizeof(root_node));
4038 cache_tree_init(&root_node.root_cache);
4039 cache_tree_init(&root_node.inode_cache);
4040 memset(&nrefs, 0, sizeof(nrefs));
4042 /* Move the orphan extent record to corresponding inode_record */
4043 list_for_each_entry_safe(orphan, tmp,
4044 &root->orphan_data_extents, list) {
4045 struct inode_record *inode;
4047 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4049 BUG_ON(IS_ERR(inode));
4050 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4051 list_move(&orphan->list, &inode->orphan_extents);
4054 level = btrfs_header_level(root->node);
4055 memset(wc->nodes, 0, sizeof(wc->nodes));
4056 wc->nodes[level] = &root_node;
4057 wc->active_node = level;
4058 wc->root_level = level;
4060 /* We may not have checked the root block, lets do that now */
4061 if (btrfs_is_leaf(root->node))
4062 status = btrfs_check_leaf(root, NULL, root->node);
4064 status = btrfs_check_node(root, NULL, root->node);
4065 if (status != BTRFS_TREE_BLOCK_CLEAN)
4068 if (btrfs_root_refs(root_item) > 0 ||
4069 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4070 path.nodes[level] = root->node;
4071 extent_buffer_get(root->node);
4072 path.slots[level] = 0;
4074 struct btrfs_key key;
4075 struct btrfs_disk_key found_key;
4077 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4078 level = root_item->drop_level;
4079 path.lowest_level = level;
4080 if (level > btrfs_header_level(root->node) ||
4081 level >= BTRFS_MAX_LEVEL) {
4082 error("ignoring invalid drop level: %u", level);
4085 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4088 btrfs_node_key(path.nodes[level], &found_key,
4090 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4091 sizeof(found_key)));
4095 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4101 wret = walk_up_tree(root, &path, wc, &level);
4108 btrfs_release_path(&path);
4110 if (!cache_tree_empty(&corrupt_blocks)) {
4111 struct cache_extent *cache;
4112 struct btrfs_corrupt_block *corrupt;
4114 printf("The following tree block(s) is corrupted in tree %llu:\n",
4115 root->root_key.objectid);
4116 cache = first_cache_extent(&corrupt_blocks);
4118 corrupt = container_of(cache,
4119 struct btrfs_corrupt_block,
4121 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4122 cache->start, corrupt->level,
4123 corrupt->key.objectid, corrupt->key.type,
4124 corrupt->key.offset);
4125 cache = next_cache_extent(cache);
4128 printf("Try to repair the btree for root %llu\n",
4129 root->root_key.objectid);
4130 ret = repair_btree(root, &corrupt_blocks);
4132 fprintf(stderr, "Failed to repair btree: %s\n",
4135 printf("Btree for root %llu is fixed\n",
4136 root->root_key.objectid);
4140 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4144 if (root_node.current) {
4145 root_node.current->checked = 1;
4146 maybe_free_inode_rec(&root_node.inode_cache,
4150 err = check_inode_recs(root, &root_node.inode_cache);
4154 free_corrupt_blocks_tree(&corrupt_blocks);
4155 root->fs_info->corrupt_blocks = NULL;
4156 free_orphan_data_extents(&root->orphan_data_extents);
4160 static int fs_root_objectid(u64 objectid)
4162 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4163 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4165 return is_fstree(objectid);
4168 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4169 struct cache_tree *root_cache)
4171 struct btrfs_path path;
4172 struct btrfs_key key;
4173 struct walk_control wc;
4174 struct extent_buffer *leaf, *tree_node;
4175 struct btrfs_root *tmp_root;
4176 struct btrfs_root *tree_root = fs_info->tree_root;
4180 if (ctx.progress_enabled) {
4181 ctx.tp = TASK_FS_ROOTS;
4182 task_start(ctx.info);
4186 * Just in case we made any changes to the extent tree that weren't
4187 * reflected into the free space cache yet.
4190 reset_cached_block_groups(fs_info);
4191 memset(&wc, 0, sizeof(wc));
4192 cache_tree_init(&wc.shared);
4193 btrfs_init_path(&path);
4198 key.type = BTRFS_ROOT_ITEM_KEY;
4199 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4204 tree_node = tree_root->node;
4206 if (tree_node != tree_root->node) {
4207 free_root_recs_tree(root_cache);
4208 btrfs_release_path(&path);
4211 leaf = path.nodes[0];
4212 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4213 ret = btrfs_next_leaf(tree_root, &path);
4219 leaf = path.nodes[0];
4221 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4222 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4223 fs_root_objectid(key.objectid)) {
4224 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4225 tmp_root = btrfs_read_fs_root_no_cache(
4228 key.offset = (u64)-1;
4229 tmp_root = btrfs_read_fs_root(
4232 if (IS_ERR(tmp_root)) {
4236 ret = check_fs_root(tmp_root, root_cache, &wc);
4237 if (ret == -EAGAIN) {
4238 free_root_recs_tree(root_cache);
4239 btrfs_release_path(&path);
4244 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4245 btrfs_free_fs_root(tmp_root);
4246 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4247 key.type == BTRFS_ROOT_BACKREF_KEY) {
4248 process_root_ref(leaf, path.slots[0], &key,
4255 btrfs_release_path(&path);
4257 free_extent_cache_tree(&wc.shared);
4258 if (!cache_tree_empty(&wc.shared))
4259 fprintf(stderr, "warning line %d\n", __LINE__);
4261 task_stop(ctx.info);
4267 * Find the @index according by @ino and name.
4268 * Notice:time efficiency is O(N)
4270 * @root: the root of the fs/file tree
4271 * @index_ret: the index as return value
4272 * @namebuf: the name to match
4273 * @name_len: the length of name to match
4274 * @file_type: the file_type of INODE_ITEM to match
4276 * Returns 0 if found and *@index_ret will be modified with right value
4277 * Returns< 0 not found and *@index_ret will be (u64)-1
4279 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4280 u64 *index_ret, char *namebuf, u32 name_len,
4283 struct btrfs_path path;
4284 struct extent_buffer *node;
4285 struct btrfs_dir_item *di;
4286 struct btrfs_key key;
4287 struct btrfs_key location;
4288 char name[BTRFS_NAME_LEN] = {0};
4300 /* search from the last index */
4301 key.objectid = dirid;
4302 key.offset = (u64)-1;
4303 key.type = BTRFS_DIR_INDEX_KEY;
4305 btrfs_init_path(&path);
4306 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4311 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4314 *index_ret = (64)-1;
4317 /* Check whether inode_id/filetype/name match */
4318 node = path.nodes[0];
4319 slot = path.slots[0];
4320 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4321 total = btrfs_item_size_nr(node, slot);
4322 while (cur < total) {
4324 len = btrfs_dir_name_len(node, di);
4325 data_len = btrfs_dir_data_len(node, di);
4327 btrfs_dir_item_key_to_cpu(node, di, &location);
4328 if (location.objectid != location_id ||
4329 location.type != BTRFS_INODE_ITEM_KEY ||
4330 location.offset != 0)
4333 filetype = btrfs_dir_type(node, di);
4334 if (file_type != filetype)
4337 if (len > BTRFS_NAME_LEN)
4338 len = BTRFS_NAME_LEN;
4340 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4341 if (len != name_len || strncmp(namebuf, name, len))
4344 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4345 *index_ret = key.offset;
4349 len += sizeof(*di) + data_len;
4350 di = (struct btrfs_dir_item *)((char *)di + len);
4356 btrfs_release_path(&path);
4361 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4362 * INODE_REF/INODE_EXTREF match.
4364 * @root: the root of the fs/file tree
4365 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4366 * value while find index
4367 * @location_key: location key of the struct btrfs_dir_item to match
4368 * @name: the name to match
4369 * @namelen: the length of name
4370 * @file_type: the type of file to math
4372 * Return 0 if no error occurred.
4373 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4374 * DIR_ITEM/DIR_INDEX
4375 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4376 * and DIR_ITEM/DIR_INDEX mismatch
4378 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4379 struct btrfs_key *location_key, char *name,
4380 u32 namelen, u8 file_type)
4382 struct btrfs_path path;
4383 struct extent_buffer *node;
4384 struct btrfs_dir_item *di;
4385 struct btrfs_key location;
4386 char namebuf[BTRFS_NAME_LEN] = {0};
4395 /* get the index by traversing all index */
4396 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4397 ret = find_dir_index(root, key->objectid,
4398 location_key->objectid, &key->offset,
4399 name, namelen, file_type);
4401 ret = DIR_INDEX_MISSING;
4405 btrfs_init_path(&path);
4406 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4408 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4413 /* Check whether inode_id/filetype/name match */
4414 node = path.nodes[0];
4415 slot = path.slots[0];
4416 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4417 total = btrfs_item_size_nr(node, slot);
4418 while (cur < total) {
4419 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4420 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4422 len = btrfs_dir_name_len(node, di);
4423 data_len = btrfs_dir_data_len(node, di);
4425 btrfs_dir_item_key_to_cpu(node, di, &location);
4426 if (location.objectid != location_key->objectid ||
4427 location.type != location_key->type ||
4428 location.offset != location_key->offset)
4431 filetype = btrfs_dir_type(node, di);
4432 if (file_type != filetype)
4435 if (len > BTRFS_NAME_LEN) {
4436 len = BTRFS_NAME_LEN;
4437 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4439 key->type == BTRFS_DIR_ITEM_KEY ?
4440 "DIR_ITEM" : "DIR_INDEX",
4441 key->objectid, key->offset, len);
4443 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4445 if (len != namelen || strncmp(namebuf, name, len))
4451 len += sizeof(*di) + data_len;
4452 di = (struct btrfs_dir_item *)((char *)di + len);
4457 btrfs_release_path(&path);
4462 * Prints inode ref error message
4464 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4465 u64 index, const char *namebuf, int name_len,
4466 u8 filetype, int err)
4471 /* root dir error */
4472 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4474 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4475 root->objectid, key->objectid, key->offset, namebuf);
4480 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4481 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4482 root->objectid, key->offset,
4483 btrfs_name_hash(namebuf, name_len),
4484 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4486 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4487 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4488 root->objectid, key->offset, index,
4489 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4494 * Insert the missing inode item.
4496 * Returns 0 means success.
4497 * Returns <0 means error.
4499 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4502 struct btrfs_key key;
4503 struct btrfs_trans_handle *trans;
4504 struct btrfs_path path;
4508 key.type = BTRFS_INODE_ITEM_KEY;
4511 btrfs_init_path(&path);
4512 trans = btrfs_start_transaction(root, 1);
4513 if (IS_ERR(trans)) {
4518 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4519 if (ret < 0 || !ret)
4522 /* insert inode item */
4523 create_inode_item_lowmem(trans, root, ino, filetype);
4526 btrfs_commit_transaction(trans, root);
4529 error("failed to repair root %llu INODE ITEM[%llu] missing",
4530 root->objectid, ino);
4531 btrfs_release_path(&path);
4536 * Traverse the given INODE_REF and call find_dir_item() to find related
4537 * DIR_ITEM/DIR_INDEX.
4539 * @root: the root of the fs/file tree
4540 * @ref_key: the key of the INODE_REF
4541 * @refs: the count of INODE_REF
4542 * @mode: the st_mode of INODE_ITEM
4544 * Return 0 if no error occurred.
4546 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4547 struct btrfs_path *path, char *name_ret,
4548 u32 *namelen_ret, u64 *refs, int mode)
4550 struct btrfs_key key;
4551 struct btrfs_key location;
4552 struct btrfs_inode_ref *ref;
4553 struct extent_buffer *node;
4554 char namebuf[BTRFS_NAME_LEN] = {0};
4564 location.objectid = ref_key->objectid;
4565 location.type = BTRFS_INODE_ITEM_KEY;
4566 location.offset = 0;
4567 node = path->nodes[0];
4568 slot = path->slots[0];
4570 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4571 total = btrfs_item_size_nr(node, slot);
4574 /* Update inode ref count */
4578 index = btrfs_inode_ref_index(node, ref);
4579 name_len = btrfs_inode_ref_name_len(node, ref);
4580 if (cur + sizeof(*ref) + name_len > total ||
4581 name_len > BTRFS_NAME_LEN) {
4582 warning("root %llu INODE_REF[%llu %llu] name too long",
4583 root->objectid, ref_key->objectid, ref_key->offset);
4585 if (total < cur + sizeof(*ref))
4587 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4592 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4594 /* copy the fisrt name found to name_ret */
4595 if (*refs == 1 && name_ret) {
4596 memcpy(name_ret, namebuf, len);
4600 /* Check root dir ref */
4601 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4602 if (index != 0 || len != strlen("..") ||
4603 strncmp("..", namebuf, len) ||
4604 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4605 /* set err bits then repair will delete the ref */
4606 err |= DIR_INDEX_MISSING;
4607 err |= DIR_ITEM_MISSING;
4612 /* Find related DIR_INDEX */
4613 key.objectid = ref_key->offset;
4614 key.type = BTRFS_DIR_INDEX_KEY;
4616 tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4618 /* Find related dir_item */
4619 key.objectid = ref_key->offset;
4620 key.type = BTRFS_DIR_ITEM_KEY;
4621 key.offset = btrfs_name_hash(namebuf, len);
4622 tmp_err |= find_dir_item(root, &key, &location, namebuf, len, mode);
4625 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4626 imode_to_type(mode), tmp_err);
4628 len = sizeof(*ref) + name_len;
4629 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4639 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4640 * DIR_ITEM/DIR_INDEX.
4642 * @root: the root of the fs/file tree
4643 * @ref_key: the key of the INODE_EXTREF
4644 * @refs: the count of INODE_EXTREF
4645 * @mode: the st_mode of INODE_ITEM
4647 * Return 0 if no error occurred.
4649 static int check_inode_extref(struct btrfs_root *root,
4650 struct btrfs_key *ref_key,
4651 struct extent_buffer *node, int slot, u64 *refs,
4654 struct btrfs_key key;
4655 struct btrfs_key location;
4656 struct btrfs_inode_extref *extref;
4657 char namebuf[BTRFS_NAME_LEN] = {0};
4667 location.objectid = ref_key->objectid;
4668 location.type = BTRFS_INODE_ITEM_KEY;
4669 location.offset = 0;
4671 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4672 total = btrfs_item_size_nr(node, slot);
4675 /* update inode ref count */
4677 name_len = btrfs_inode_extref_name_len(node, extref);
4678 index = btrfs_inode_extref_index(node, extref);
4679 parent = btrfs_inode_extref_parent(node, extref);
4680 if (name_len <= BTRFS_NAME_LEN) {
4683 len = BTRFS_NAME_LEN;
4684 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4685 root->objectid, ref_key->objectid, ref_key->offset);
4687 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4689 /* Check root dir ref name */
4690 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4691 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4692 root->objectid, ref_key->objectid, ref_key->offset,
4694 err |= ROOT_DIR_ERROR;
4697 /* find related dir_index */
4698 key.objectid = parent;
4699 key.type = BTRFS_DIR_INDEX_KEY;
4701 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4704 /* find related dir_item */
4705 key.objectid = parent;
4706 key.type = BTRFS_DIR_ITEM_KEY;
4707 key.offset = btrfs_name_hash(namebuf, len);
4708 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4711 len = sizeof(*extref) + name_len;
4712 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4722 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4723 * DIR_ITEM/DIR_INDEX match.
4724 * Return with @index_ret.
4726 * @root: the root of the fs/file tree
4727 * @key: the key of the INODE_REF/INODE_EXTREF
4728 * @name: the name in the INODE_REF/INODE_EXTREF
4729 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4730 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4731 * value (64)-1 means do not check index
4732 * @ext_ref: the EXTENDED_IREF feature
4734 * Return 0 if no error occurred.
4735 * Return >0 for error bitmap
4737 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4738 char *name, int namelen, u64 *index_ret,
4739 unsigned int ext_ref)
4741 struct btrfs_path path;
4742 struct btrfs_inode_ref *ref;
4743 struct btrfs_inode_extref *extref;
4744 struct extent_buffer *node;
4745 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4758 btrfs_init_path(&path);
4759 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4761 ret = INODE_REF_MISSING;
4765 node = path.nodes[0];
4766 slot = path.slots[0];
4768 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4769 total = btrfs_item_size_nr(node, slot);
4771 /* Iterate all entry of INODE_REF */
4772 while (cur < total) {
4773 ret = INODE_REF_MISSING;
4775 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4776 ref_index = btrfs_inode_ref_index(node, ref);
4777 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4780 if (cur + sizeof(*ref) + ref_namelen > total ||
4781 ref_namelen > BTRFS_NAME_LEN) {
4782 warning("root %llu INODE %s[%llu %llu] name too long",
4784 key->type == BTRFS_INODE_REF_KEY ?
4786 key->objectid, key->offset);
4788 if (cur + sizeof(*ref) > total)
4790 len = min_t(u32, total - cur - sizeof(*ref),
4796 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4799 if (len != namelen || strncmp(ref_namebuf, name, len))
4802 *index_ret = ref_index;
4806 len = sizeof(*ref) + ref_namelen;
4807 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4812 /* Skip if not support EXTENDED_IREF feature */
4816 btrfs_release_path(&path);
4817 btrfs_init_path(&path);
4819 dir_id = key->offset;
4820 key->type = BTRFS_INODE_EXTREF_KEY;
4821 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4823 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4825 ret = INODE_REF_MISSING;
4829 node = path.nodes[0];
4830 slot = path.slots[0];
4832 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4834 total = btrfs_item_size_nr(node, slot);
4836 /* Iterate all entry of INODE_EXTREF */
4837 while (cur < total) {
4838 ret = INODE_REF_MISSING;
4840 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4841 ref_index = btrfs_inode_extref_index(node, extref);
4842 parent = btrfs_inode_extref_parent(node, extref);
4843 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4846 if (parent != dir_id)
4849 if (ref_namelen <= BTRFS_NAME_LEN) {
4852 len = BTRFS_NAME_LEN;
4853 warning("root %llu INODE %s[%llu %llu] name too long",
4855 key->type == BTRFS_INODE_REF_KEY ?
4857 key->objectid, key->offset);
4859 read_extent_buffer(node, ref_namebuf,
4860 (unsigned long)(extref + 1), len);
4862 if (len != namelen || strncmp(ref_namebuf, name, len))
4865 *index_ret = ref_index;
4870 len = sizeof(*extref) + ref_namelen;
4871 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4876 btrfs_release_path(&path);
4880 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4881 u64 ino, u64 index, const char *namebuf,
4882 int name_len, u8 filetype, int err)
4884 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4885 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4886 root->objectid, key->objectid, key->offset, namebuf,
4888 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4891 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4892 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4893 root->objectid, key->objectid, index, namebuf, filetype,
4894 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4897 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4899 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4900 root->objectid, ino, index, namebuf, filetype,
4901 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4904 if (err & INODE_REF_MISSING)
4906 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4907 root->objectid, ino, key->objectid, namebuf, filetype);
4912 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4913 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4915 * @root: the root of the fs/file tree
4916 * @key: the key of the INODE_REF/INODE_EXTREF
4918 * @size: the st_size of the INODE_ITEM
4919 * @ext_ref: the EXTENDED_IREF feature
4921 * Return 0 if no error occurred.
4923 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
4924 struct btrfs_path *path, u64 *size,
4925 unsigned int ext_ref)
4927 struct btrfs_dir_item *di;
4928 struct btrfs_inode_item *ii;
4929 struct btrfs_key key;
4930 struct btrfs_key location;
4931 struct extent_buffer *node;
4933 char namebuf[BTRFS_NAME_LEN] = {0};
4946 node = path->nodes[0];
4947 slot = path->slots[0];
4949 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4950 * ignore index check.
4952 if (di_key->type == BTRFS_DIR_INDEX_KEY)
4953 index = di_key->offset;
4957 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4958 total = btrfs_item_size_nr(node, slot);
4959 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4961 while (cur < total) {
4962 data_len = btrfs_dir_data_len(node, di);
4965 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4967 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
4968 di_key->objectid, di_key->offset, data_len);
4970 name_len = btrfs_dir_name_len(node, di);
4971 if (name_len <= BTRFS_NAME_LEN) {
4974 len = BTRFS_NAME_LEN;
4975 warning("root %llu %s[%llu %llu] name too long",
4977 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
4978 di_key->objectid, di_key->offset);
4980 (*size) += name_len;
4981 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4983 filetype = btrfs_dir_type(node, di);
4985 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
4986 di_key->offset != btrfs_name_hash(namebuf, len)) {
4988 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4989 root->objectid, di_key->objectid, di_key->offset,
4990 namebuf, len, filetype, di_key->offset,
4991 btrfs_name_hash(namebuf, len));
4994 btrfs_dir_item_key_to_cpu(node, di, &location);
4995 /* Ignore related ROOT_ITEM check */
4996 if (location.type == BTRFS_ROOT_ITEM_KEY)
4999 btrfs_release_path(path);
5000 /* Check relative INODE_ITEM(existence/filetype) */
5001 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5003 tmp_err |= INODE_ITEM_MISSING;
5007 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5008 struct btrfs_inode_item);
5009 mode = btrfs_inode_mode(path->nodes[0], ii);
5010 if (imode_to_type(mode) != filetype) {
5011 tmp_err |= INODE_ITEM_MISMATCH;
5015 /* Check relative INODE_REF/INODE_EXTREF */
5016 key.objectid = location.objectid;
5017 key.type = BTRFS_INODE_REF_KEY;
5018 key.offset = di_key->objectid;
5019 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5022 /* check relative INDEX/ITEM */
5023 key.objectid = di_key->objectid;
5024 if (key.type == BTRFS_DIR_ITEM_KEY) {
5025 key.type = BTRFS_DIR_INDEX_KEY;
5028 key.type = BTRFS_DIR_ITEM_KEY;
5029 key.offset = btrfs_name_hash(namebuf, name_len);
5032 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5033 name_len, filetype);
5034 /* find_dir_item may find index */
5035 if (key.type == BTRFS_DIR_INDEX_KEY)
5038 btrfs_release_path(path);
5039 print_dir_item_err(root, di_key, location.objectid, index,
5040 namebuf, name_len, filetype, tmp_err);
5042 len = sizeof(*di) + name_len + data_len;
5043 di = (struct btrfs_dir_item *)((char *)di + len);
5046 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5047 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5048 root->objectid, di_key->objectid,
5055 btrfs_release_path(path);
5056 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5058 err |= ret > 0 ? -ENOENT : ret;
5063 * Check file extent datasum/hole, update the size of the file extents,
5064 * check and update the last offset of the file extent.
5066 * @root: the root of fs/file tree.
5067 * @fkey: the key of the file extent.
5068 * @nodatasum: INODE_NODATASUM feature.
5069 * @size: the sum of all EXTENT_DATA items size for this inode.
5070 * @end: the offset of the last extent.
5072 * Return 0 if no error occurred.
5074 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5075 struct extent_buffer *node, int slot,
5076 unsigned int nodatasum, u64 *size, u64 *end)
5078 struct btrfs_file_extent_item *fi;
5081 u64 extent_num_bytes;
5083 u64 csum_found; /* In byte size, sectorsize aligned */
5084 u64 search_start; /* Logical range start we search for csum */
5085 u64 search_len; /* Logical range len we search for csum */
5086 unsigned int extent_type;
5087 unsigned int is_hole;
5092 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5094 /* Check inline extent */
5095 extent_type = btrfs_file_extent_type(node, fi);
5096 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5097 struct btrfs_item *e = btrfs_item_nr(slot);
5098 u32 item_inline_len;
5100 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5101 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5102 compressed = btrfs_file_extent_compression(node, fi);
5103 if (extent_num_bytes == 0) {
5105 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5106 root->objectid, fkey->objectid, fkey->offset);
5107 err |= FILE_EXTENT_ERROR;
5109 if (!compressed && extent_num_bytes != item_inline_len) {
5111 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5112 root->objectid, fkey->objectid, fkey->offset,
5113 extent_num_bytes, item_inline_len);
5114 err |= FILE_EXTENT_ERROR;
5116 *end += extent_num_bytes;
5117 *size += extent_num_bytes;
5121 /* Check extent type */
5122 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5123 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5124 err |= FILE_EXTENT_ERROR;
5125 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5126 root->objectid, fkey->objectid, fkey->offset);
5130 /* Check REG_EXTENT/PREALLOC_EXTENT */
5131 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5132 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5133 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5134 extent_offset = btrfs_file_extent_offset(node, fi);
5135 compressed = btrfs_file_extent_compression(node, fi);
5136 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5139 * Check EXTENT_DATA csum
5141 * For plain (uncompressed) extent, we should only check the range
5142 * we're referring to, as it's possible that part of prealloc extent
5143 * has been written, and has csum:
5145 * |<--- Original large preallocated extent A ---->|
5146 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5149 * For compressed extent, we should check the whole range.
5152 search_start = disk_bytenr + extent_offset;
5153 search_len = extent_num_bytes;
5155 search_start = disk_bytenr;
5156 search_len = disk_num_bytes;
5158 ret = count_csum_range(root, search_start, search_len, &csum_found);
5159 if (csum_found > 0 && nodatasum) {
5160 err |= ODD_CSUM_ITEM;
5161 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5162 root->objectid, fkey->objectid, fkey->offset);
5163 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5164 !is_hole && (ret < 0 || csum_found < search_len)) {
5165 err |= CSUM_ITEM_MISSING;
5166 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5167 root->objectid, fkey->objectid, fkey->offset,
5168 csum_found, search_len);
5169 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5170 err |= ODD_CSUM_ITEM;
5171 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5172 root->objectid, fkey->objectid, fkey->offset, csum_found);
5175 /* Check EXTENT_DATA hole */
5176 if (!no_holes && *end != fkey->offset) {
5177 err |= FILE_EXTENT_ERROR;
5178 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5179 root->objectid, fkey->objectid, fkey->offset);
5182 *end += extent_num_bytes;
5184 *size += extent_num_bytes;
5190 * Set inode item nbytes to @nbytes
5192 * Returns 0 on success
5193 * Returns != 0 on error
5195 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5196 struct btrfs_path *path,
5197 u64 ino, u64 nbytes)
5199 struct btrfs_trans_handle *trans;
5200 struct btrfs_inode_item *ii;
5201 struct btrfs_key key;
5202 struct btrfs_key research_key;
5206 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5209 key.type = BTRFS_INODE_ITEM_KEY;
5212 trans = btrfs_start_transaction(root, 1);
5213 if (IS_ERR(trans)) {
5214 ret = PTR_ERR(trans);
5219 btrfs_release_path(path);
5220 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5228 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5229 struct btrfs_inode_item);
5230 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5231 btrfs_mark_buffer_dirty(path->nodes[0]);
5233 btrfs_commit_transaction(trans, root);
5236 error("failed to set nbytes in inode %llu root %llu",
5237 ino, root->root_key.objectid);
5239 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5240 root->root_key.objectid, nbytes);
5243 btrfs_release_path(path);
5244 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5251 * Set directory inode isize to @isize.
5253 * Returns 0 on success.
5254 * Returns != 0 on error.
5256 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5257 struct btrfs_path *path,
5260 struct btrfs_trans_handle *trans;
5261 struct btrfs_inode_item *ii;
5262 struct btrfs_key key;
5263 struct btrfs_key research_key;
5267 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5270 key.type = BTRFS_INODE_ITEM_KEY;
5273 trans = btrfs_start_transaction(root, 1);
5274 if (IS_ERR(trans)) {
5275 ret = PTR_ERR(trans);
5280 btrfs_release_path(path);
5281 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5289 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5290 struct btrfs_inode_item);
5291 btrfs_set_inode_size(path->nodes[0], ii, isize);
5292 btrfs_mark_buffer_dirty(path->nodes[0]);
5294 btrfs_commit_transaction(trans, root);
5297 error("failed to set isize in inode %llu root %llu",
5298 ino, root->root_key.objectid);
5300 printf("Set isize in inode %llu root %llu to %llu\n",
5301 ino, root->root_key.objectid, isize);
5303 btrfs_release_path(path);
5304 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5311 * Wrapper function for btrfs_add_orphan_item().
5313 * Returns 0 on success.
5314 * Returns != 0 on error.
5316 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5317 struct btrfs_path *path, u64 ino)
5319 struct btrfs_trans_handle *trans;
5320 struct btrfs_key research_key;
5324 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5326 trans = btrfs_start_transaction(root, 1);
5327 if (IS_ERR(trans)) {
5328 ret = PTR_ERR(trans);
5333 btrfs_release_path(path);
5334 ret = btrfs_add_orphan_item(trans, root, path, ino);
5336 btrfs_commit_transaction(trans, root);
5339 error("failed to add inode %llu as orphan item root %llu",
5340 ino, root->root_key.objectid);
5342 printf("Added inode %llu as orphan item root %llu\n",
5343 ino, root->root_key.objectid);
5345 btrfs_release_path(path);
5346 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5353 * Check INODE_ITEM and related ITEMs (the same inode number)
5354 * 1. check link count
5355 * 2. check inode ref/extref
5356 * 3. check dir item/index
5358 * @ext_ref: the EXTENDED_IREF feature
5360 * Return 0 if no error occurred.
5361 * Return >0 for error or hit the traversal is done(by error bitmap)
5363 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5364 unsigned int ext_ref)
5366 struct extent_buffer *node;
5367 struct btrfs_inode_item *ii;
5368 struct btrfs_key key;
5377 u64 extent_size = 0;
5379 unsigned int nodatasum;
5383 char namebuf[BTRFS_NAME_LEN] = {0};
5386 node = path->nodes[0];
5387 slot = path->slots[0];
5389 btrfs_item_key_to_cpu(node, &key, slot);
5390 inode_id = key.objectid;
5392 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5393 ret = btrfs_next_item(root, path);
5399 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5400 isize = btrfs_inode_size(node, ii);
5401 nbytes = btrfs_inode_nbytes(node, ii);
5402 mode = btrfs_inode_mode(node, ii);
5403 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5404 nlink = btrfs_inode_nlink(node, ii);
5405 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5408 ret = btrfs_next_item(root, path);
5410 /* out will fill 'err' rusing current statistics */
5412 } else if (ret > 0) {
5417 node = path->nodes[0];
5418 slot = path->slots[0];
5419 btrfs_item_key_to_cpu(node, &key, slot);
5420 if (key.objectid != inode_id)
5424 case BTRFS_INODE_REF_KEY:
5425 ret = check_inode_ref(root, &key, path, namebuf,
5426 &name_len, &refs, mode);
5429 case BTRFS_INODE_EXTREF_KEY:
5430 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5431 warning("root %llu EXTREF[%llu %llu] isn't supported",
5432 root->objectid, key.objectid,
5434 ret = check_inode_extref(root, &key, node, slot, &refs,
5438 case BTRFS_DIR_ITEM_KEY:
5439 case BTRFS_DIR_INDEX_KEY:
5441 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5442 root->objectid, inode_id,
5443 imode_to_type(mode), key.objectid,
5446 ret = check_dir_item(root, &key, path, &size, ext_ref);
5449 case BTRFS_EXTENT_DATA_KEY:
5451 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5452 root->objectid, inode_id, key.objectid,
5455 ret = check_file_extent(root, &key, node, slot,
5456 nodatasum, &extent_size,
5460 case BTRFS_XATTR_ITEM_KEY:
5463 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5464 key.objectid, key.type, key.offset);
5469 /* verify INODE_ITEM nlink/isize/nbytes */
5472 err |= LINK_COUNT_ERROR;
5473 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5474 root->objectid, inode_id, nlink);
5478 * Just a warning, as dir inode nbytes is just an
5479 * instructive value.
5481 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5482 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5483 root->objectid, inode_id,
5484 root->fs_info->nodesize);
5487 if (isize != size) {
5489 ret = repair_dir_isize_lowmem(root, path,
5491 if (!repair || ret) {
5494 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5495 root->objectid, inode_id, isize, size);
5499 if (nlink != refs) {
5500 err |= LINK_COUNT_ERROR;
5501 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5502 root->objectid, inode_id, nlink, refs);
5503 } else if (!nlink) {
5505 ret = repair_inode_orphan_item_lowmem(root,
5507 if (!repair || ret) {
5509 error("root %llu INODE[%llu] is orphan item",
5510 root->objectid, inode_id);
5514 if (!nbytes && !no_holes && extent_end < isize) {
5515 err |= NBYTES_ERROR;
5516 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5517 root->objectid, inode_id, isize);
5520 if (nbytes != extent_size) {
5522 ret = repair_inode_nbytes_lowmem(root, path,
5523 inode_id, extent_size);
5524 if (!repair || ret) {
5525 err |= NBYTES_ERROR;
5527 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5528 root->objectid, inode_id, nbytes,
5538 * check first root dir's inode_item and inode_ref
5540 * returns 0 means no error
5541 * returns >0 means error
5542 * returns <0 means fatal error
5544 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5546 struct btrfs_path path;
5547 struct btrfs_key key;
5548 struct btrfs_inode_item *ii;
5554 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5555 key.type = BTRFS_INODE_ITEM_KEY;
5558 /* For root being dropped, we don't need to check first inode */
5559 if (btrfs_root_refs(&root->root_item) == 0 &&
5560 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5561 BTRFS_FIRST_FREE_OBJECTID)
5564 btrfs_init_path(&path);
5565 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5570 err |= INODE_ITEM_MISSING;
5572 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5573 struct btrfs_inode_item);
5574 mode = btrfs_inode_mode(path.nodes[0], ii);
5575 if (imode_to_type(mode) != BTRFS_FT_DIR)
5576 err |= INODE_ITEM_MISMATCH;
5579 /* lookup first inode ref */
5580 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5581 key.type = BTRFS_INODE_REF_KEY;
5582 /* special index value */
5585 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5591 btrfs_release_path(&path);
5592 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5593 error("root dir INODE_ITEM is %s",
5594 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5595 if (err & INODE_REF_MISSING)
5596 error("root dir INODE_REF is missing");
5598 return ret < 0 ? ret : err;
5601 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5602 u64 parent, u64 root)
5604 struct rb_node *node;
5605 struct tree_backref *back = NULL;
5606 struct tree_backref match = {
5613 match.parent = parent;
5614 match.node.full_backref = 1;
5619 node = rb_search(&rec->backref_tree, &match.node.node,
5620 (rb_compare_keys)compare_extent_backref, NULL);
5622 back = to_tree_backref(rb_node_to_extent_backref(node));
5627 static struct data_backref *find_data_backref(struct extent_record *rec,
5628 u64 parent, u64 root,
5629 u64 owner, u64 offset,
5631 u64 disk_bytenr, u64 bytes)
5633 struct rb_node *node;
5634 struct data_backref *back = NULL;
5635 struct data_backref match = {
5642 .found_ref = found_ref,
5643 .disk_bytenr = disk_bytenr,
5647 match.parent = parent;
5648 match.node.full_backref = 1;
5653 node = rb_search(&rec->backref_tree, &match.node.node,
5654 (rb_compare_keys)compare_extent_backref, NULL);
5656 back = to_data_backref(rb_node_to_extent_backref(node));
5661 * Iterate all item on the tree and call check_inode_item() to check.
5663 * @root: the root of the tree to be checked.
5664 * @ext_ref: the EXTENDED_IREF feature
5666 * Return 0 if no error found.
5667 * Return <0 for error.
5669 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5671 struct btrfs_path path;
5672 struct node_refs nrefs;
5673 struct btrfs_root_item *root_item = &root->root_item;
5679 * We need to manually check the first inode item(256)
5680 * As the following traversal function will only start from
5681 * the first inode item in the leaf, if inode item(256) is missing
5682 * we will just skip it forever.
5684 ret = check_fs_first_inode(root, ext_ref);
5689 memset(&nrefs, 0, sizeof(nrefs));
5690 level = btrfs_header_level(root->node);
5691 btrfs_init_path(&path);
5693 if (btrfs_root_refs(root_item) > 0 ||
5694 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5695 path.nodes[level] = root->node;
5696 path.slots[level] = 0;
5697 extent_buffer_get(root->node);
5699 struct btrfs_key key;
5701 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5702 level = root_item->drop_level;
5703 path.lowest_level = level;
5704 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5711 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5714 /* if ret is negative, walk shall stop */
5720 ret = walk_up_tree_v2(root, &path, &level);
5722 /* Normal exit, reset ret to err */
5729 btrfs_release_path(&path);
5734 * Find the relative ref for root_ref and root_backref.
5736 * @root: the root of the root tree.
5737 * @ref_key: the key of the root ref.
5739 * Return 0 if no error occurred.
5741 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5742 struct extent_buffer *node, int slot)
5744 struct btrfs_path path;
5745 struct btrfs_key key;
5746 struct btrfs_root_ref *ref;
5747 struct btrfs_root_ref *backref;
5748 char ref_name[BTRFS_NAME_LEN] = {0};
5749 char backref_name[BTRFS_NAME_LEN] = {0};
5755 u32 backref_namelen;
5760 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5761 ref_dirid = btrfs_root_ref_dirid(node, ref);
5762 ref_seq = btrfs_root_ref_sequence(node, ref);
5763 ref_namelen = btrfs_root_ref_name_len(node, ref);
5765 if (ref_namelen <= BTRFS_NAME_LEN) {
5768 len = BTRFS_NAME_LEN;
5769 warning("%s[%llu %llu] ref_name too long",
5770 ref_key->type == BTRFS_ROOT_REF_KEY ?
5771 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5774 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5776 /* Find relative root_ref */
5777 key.objectid = ref_key->offset;
5778 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5779 key.offset = ref_key->objectid;
5781 btrfs_init_path(&path);
5782 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5784 err |= ROOT_REF_MISSING;
5785 error("%s[%llu %llu] couldn't find relative ref",
5786 ref_key->type == BTRFS_ROOT_REF_KEY ?
5787 "ROOT_REF" : "ROOT_BACKREF",
5788 ref_key->objectid, ref_key->offset);
5792 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5793 struct btrfs_root_ref);
5794 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5795 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5796 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5798 if (backref_namelen <= BTRFS_NAME_LEN) {
5799 len = backref_namelen;
5801 len = BTRFS_NAME_LEN;
5802 warning("%s[%llu %llu] ref_name too long",
5803 key.type == BTRFS_ROOT_REF_KEY ?
5804 "ROOT_REF" : "ROOT_BACKREF",
5805 key.objectid, key.offset);
5807 read_extent_buffer(path.nodes[0], backref_name,
5808 (unsigned long)(backref + 1), len);
5810 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5811 ref_namelen != backref_namelen ||
5812 strncmp(ref_name, backref_name, len)) {
5813 err |= ROOT_REF_MISMATCH;
5814 error("%s[%llu %llu] mismatch relative ref",
5815 ref_key->type == BTRFS_ROOT_REF_KEY ?
5816 "ROOT_REF" : "ROOT_BACKREF",
5817 ref_key->objectid, ref_key->offset);
5820 btrfs_release_path(&path);
5825 * Check all fs/file tree in low_memory mode.
5827 * 1. for fs tree root item, call check_fs_root_v2()
5828 * 2. for fs tree root ref/backref, call check_root_ref()
5830 * Return 0 if no error occurred.
5832 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5834 struct btrfs_root *tree_root = fs_info->tree_root;
5835 struct btrfs_root *cur_root = NULL;
5836 struct btrfs_path path;
5837 struct btrfs_key key;
5838 struct extent_buffer *node;
5839 unsigned int ext_ref;
5844 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5846 btrfs_init_path(&path);
5847 key.objectid = BTRFS_FS_TREE_OBJECTID;
5849 key.type = BTRFS_ROOT_ITEM_KEY;
5851 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5855 } else if (ret > 0) {
5861 node = path.nodes[0];
5862 slot = path.slots[0];
5863 btrfs_item_key_to_cpu(node, &key, slot);
5864 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5866 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5867 fs_root_objectid(key.objectid)) {
5868 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5869 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5872 key.offset = (u64)-1;
5873 cur_root = btrfs_read_fs_root(fs_info, &key);
5876 if (IS_ERR(cur_root)) {
5877 error("Fail to read fs/subvol tree: %lld",
5883 ret = check_fs_root_v2(cur_root, ext_ref);
5886 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5887 btrfs_free_fs_root(cur_root);
5888 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5889 key.type == BTRFS_ROOT_BACKREF_KEY) {
5890 ret = check_root_ref(tree_root, &key, node, slot);
5894 ret = btrfs_next_item(tree_root, &path);
5904 btrfs_release_path(&path);
5908 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5909 struct cache_tree *root_cache)
5913 if (!ctx.progress_enabled)
5914 fprintf(stderr, "checking fs roots\n");
5915 if (check_mode == CHECK_MODE_LOWMEM)
5916 ret = check_fs_roots_v2(fs_info);
5918 ret = check_fs_roots(fs_info, root_cache);
5923 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5925 struct extent_backref *back, *tmp;
5926 struct tree_backref *tback;
5927 struct data_backref *dback;
5931 rbtree_postorder_for_each_entry_safe(back, tmp,
5932 &rec->backref_tree, node) {
5933 if (!back->found_extent_tree) {
5937 if (back->is_data) {
5938 dback = to_data_backref(back);
5939 fprintf(stderr, "Data backref %llu %s %llu"
5940 " owner %llu offset %llu num_refs %lu"
5941 " not found in extent tree\n",
5942 (unsigned long long)rec->start,
5943 back->full_backref ?
5945 back->full_backref ?
5946 (unsigned long long)dback->parent:
5947 (unsigned long long)dback->root,
5948 (unsigned long long)dback->owner,
5949 (unsigned long long)dback->offset,
5950 (unsigned long)dback->num_refs);
5952 tback = to_tree_backref(back);
5953 fprintf(stderr, "Tree backref %llu parent %llu"
5954 " root %llu not found in extent tree\n",
5955 (unsigned long long)rec->start,
5956 (unsigned long long)tback->parent,
5957 (unsigned long long)tback->root);
5960 if (!back->is_data && !back->found_ref) {
5964 tback = to_tree_backref(back);
5965 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5966 (unsigned long long)rec->start,
5967 back->full_backref ? "parent" : "root",
5968 back->full_backref ?
5969 (unsigned long long)tback->parent :
5970 (unsigned long long)tback->root, back);
5972 if (back->is_data) {
5973 dback = to_data_backref(back);
5974 if (dback->found_ref != dback->num_refs) {
5978 fprintf(stderr, "Incorrect local backref count"
5979 " on %llu %s %llu owner %llu"
5980 " offset %llu found %u wanted %u back %p\n",
5981 (unsigned long long)rec->start,
5982 back->full_backref ?
5984 back->full_backref ?
5985 (unsigned long long)dback->parent:
5986 (unsigned long long)dback->root,
5987 (unsigned long long)dback->owner,
5988 (unsigned long long)dback->offset,
5989 dback->found_ref, dback->num_refs, back);
5991 if (dback->disk_bytenr != rec->start) {
5995 fprintf(stderr, "Backref disk bytenr does not"
5996 " match extent record, bytenr=%llu, "
5997 "ref bytenr=%llu\n",
5998 (unsigned long long)rec->start,
5999 (unsigned long long)dback->disk_bytenr);
6002 if (dback->bytes != rec->nr) {
6006 fprintf(stderr, "Backref bytes do not match "
6007 "extent backref, bytenr=%llu, ref "
6008 "bytes=%llu, backref bytes=%llu\n",
6009 (unsigned long long)rec->start,
6010 (unsigned long long)rec->nr,
6011 (unsigned long long)dback->bytes);
6014 if (!back->is_data) {
6017 dback = to_data_backref(back);
6018 found += dback->found_ref;
6021 if (found != rec->refs) {
6025 fprintf(stderr, "Incorrect global backref count "
6026 "on %llu found %llu wanted %llu\n",
6027 (unsigned long long)rec->start,
6028 (unsigned long long)found,
6029 (unsigned long long)rec->refs);
6035 static void __free_one_backref(struct rb_node *node)
6037 struct extent_backref *back = rb_node_to_extent_backref(node);
6042 static void free_all_extent_backrefs(struct extent_record *rec)
6044 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6047 static void free_extent_record_cache(struct cache_tree *extent_cache)
6049 struct cache_extent *cache;
6050 struct extent_record *rec;
6053 cache = first_cache_extent(extent_cache);
6056 rec = container_of(cache, struct extent_record, cache);
6057 remove_cache_extent(extent_cache, cache);
6058 free_all_extent_backrefs(rec);
6063 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6064 struct extent_record *rec)
6066 if (rec->content_checked && rec->owner_ref_checked &&
6067 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6068 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6069 !rec->bad_full_backref && !rec->crossing_stripes &&
6070 !rec->wrong_chunk_type) {
6071 remove_cache_extent(extent_cache, &rec->cache);
6072 free_all_extent_backrefs(rec);
6073 list_del_init(&rec->list);
6079 static int check_owner_ref(struct btrfs_root *root,
6080 struct extent_record *rec,
6081 struct extent_buffer *buf)
6083 struct extent_backref *node, *tmp;
6084 struct tree_backref *back;
6085 struct btrfs_root *ref_root;
6086 struct btrfs_key key;
6087 struct btrfs_path path;
6088 struct extent_buffer *parent;
6093 rbtree_postorder_for_each_entry_safe(node, tmp,
6094 &rec->backref_tree, node) {
6097 if (!node->found_ref)
6099 if (node->full_backref)
6101 back = to_tree_backref(node);
6102 if (btrfs_header_owner(buf) == back->root)
6105 BUG_ON(rec->is_root);
6107 /* try to find the block by search corresponding fs tree */
6108 key.objectid = btrfs_header_owner(buf);
6109 key.type = BTRFS_ROOT_ITEM_KEY;
6110 key.offset = (u64)-1;
6112 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6113 if (IS_ERR(ref_root))
6116 level = btrfs_header_level(buf);
6118 btrfs_item_key_to_cpu(buf, &key, 0);
6120 btrfs_node_key_to_cpu(buf, &key, 0);
6122 btrfs_init_path(&path);
6123 path.lowest_level = level + 1;
6124 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6128 parent = path.nodes[level + 1];
6129 if (parent && buf->start == btrfs_node_blockptr(parent,
6130 path.slots[level + 1]))
6133 btrfs_release_path(&path);
6134 return found ? 0 : 1;
6137 static int is_extent_tree_record(struct extent_record *rec)
6139 struct extent_backref *node, *tmp;
6140 struct tree_backref *back;
6143 rbtree_postorder_for_each_entry_safe(node, tmp,
6144 &rec->backref_tree, node) {
6147 back = to_tree_backref(node);
6148 if (node->full_backref)
6150 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6157 static int record_bad_block_io(struct btrfs_fs_info *info,
6158 struct cache_tree *extent_cache,
6161 struct extent_record *rec;
6162 struct cache_extent *cache;
6163 struct btrfs_key key;
6165 cache = lookup_cache_extent(extent_cache, start, len);
6169 rec = container_of(cache, struct extent_record, cache);
6170 if (!is_extent_tree_record(rec))
6173 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6174 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6177 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6178 struct extent_buffer *buf, int slot)
6180 if (btrfs_header_level(buf)) {
6181 struct btrfs_key_ptr ptr1, ptr2;
6183 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6184 sizeof(struct btrfs_key_ptr));
6185 read_extent_buffer(buf, &ptr2,
6186 btrfs_node_key_ptr_offset(slot + 1),
6187 sizeof(struct btrfs_key_ptr));
6188 write_extent_buffer(buf, &ptr1,
6189 btrfs_node_key_ptr_offset(slot + 1),
6190 sizeof(struct btrfs_key_ptr));
6191 write_extent_buffer(buf, &ptr2,
6192 btrfs_node_key_ptr_offset(slot),
6193 sizeof(struct btrfs_key_ptr));
6195 struct btrfs_disk_key key;
6196 btrfs_node_key(buf, &key, 0);
6197 btrfs_fixup_low_keys(root, path, &key,
6198 btrfs_header_level(buf) + 1);
6201 struct btrfs_item *item1, *item2;
6202 struct btrfs_key k1, k2;
6203 char *item1_data, *item2_data;
6204 u32 item1_offset, item2_offset, item1_size, item2_size;
6206 item1 = btrfs_item_nr(slot);
6207 item2 = btrfs_item_nr(slot + 1);
6208 btrfs_item_key_to_cpu(buf, &k1, slot);
6209 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6210 item1_offset = btrfs_item_offset(buf, item1);
6211 item2_offset = btrfs_item_offset(buf, item2);
6212 item1_size = btrfs_item_size(buf, item1);
6213 item2_size = btrfs_item_size(buf, item2);
6215 item1_data = malloc(item1_size);
6218 item2_data = malloc(item2_size);
6224 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6225 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6227 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6228 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6232 btrfs_set_item_offset(buf, item1, item2_offset);
6233 btrfs_set_item_offset(buf, item2, item1_offset);
6234 btrfs_set_item_size(buf, item1, item2_size);
6235 btrfs_set_item_size(buf, item2, item1_size);
6237 path->slots[0] = slot;
6238 btrfs_set_item_key_unsafe(root, path, &k2);
6239 path->slots[0] = slot + 1;
6240 btrfs_set_item_key_unsafe(root, path, &k1);
6245 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6247 struct extent_buffer *buf;
6248 struct btrfs_key k1, k2;
6250 int level = path->lowest_level;
6253 buf = path->nodes[level];
6254 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6256 btrfs_node_key_to_cpu(buf, &k1, i);
6257 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6259 btrfs_item_key_to_cpu(buf, &k1, i);
6260 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6262 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6264 ret = swap_values(root, path, buf, i);
6267 btrfs_mark_buffer_dirty(buf);
6273 static int delete_bogus_item(struct btrfs_root *root,
6274 struct btrfs_path *path,
6275 struct extent_buffer *buf, int slot)
6277 struct btrfs_key key;
6278 int nritems = btrfs_header_nritems(buf);
6280 btrfs_item_key_to_cpu(buf, &key, slot);
6282 /* These are all the keys we can deal with missing. */
6283 if (key.type != BTRFS_DIR_INDEX_KEY &&
6284 key.type != BTRFS_EXTENT_ITEM_KEY &&
6285 key.type != BTRFS_METADATA_ITEM_KEY &&
6286 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6287 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6290 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6291 (unsigned long long)key.objectid, key.type,
6292 (unsigned long long)key.offset, slot, buf->start);
6293 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6294 btrfs_item_nr_offset(slot + 1),
6295 sizeof(struct btrfs_item) *
6296 (nritems - slot - 1));
6297 btrfs_set_header_nritems(buf, nritems - 1);
6299 struct btrfs_disk_key disk_key;
6301 btrfs_item_key(buf, &disk_key, 0);
6302 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6304 btrfs_mark_buffer_dirty(buf);
6308 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6310 struct extent_buffer *buf;
6314 /* We should only get this for leaves */
6315 BUG_ON(path->lowest_level);
6316 buf = path->nodes[0];
6318 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6319 unsigned int shift = 0, offset;
6321 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6322 BTRFS_LEAF_DATA_SIZE(root)) {
6323 if (btrfs_item_end_nr(buf, i) >
6324 BTRFS_LEAF_DATA_SIZE(root)) {
6325 ret = delete_bogus_item(root, path, buf, i);
6328 fprintf(stderr, "item is off the end of the "
6329 "leaf, can't fix\n");
6333 shift = BTRFS_LEAF_DATA_SIZE(root) -
6334 btrfs_item_end_nr(buf, i);
6335 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6336 btrfs_item_offset_nr(buf, i - 1)) {
6337 if (btrfs_item_end_nr(buf, i) >
6338 btrfs_item_offset_nr(buf, i - 1)) {
6339 ret = delete_bogus_item(root, path, buf, i);
6342 fprintf(stderr, "items overlap, can't fix\n");
6346 shift = btrfs_item_offset_nr(buf, i - 1) -
6347 btrfs_item_end_nr(buf, i);
6352 printf("Shifting item nr %d by %u bytes in block %llu\n",
6353 i, shift, (unsigned long long)buf->start);
6354 offset = btrfs_item_offset_nr(buf, i);
6355 memmove_extent_buffer(buf,
6356 btrfs_leaf_data(buf) + offset + shift,
6357 btrfs_leaf_data(buf) + offset,
6358 btrfs_item_size_nr(buf, i));
6359 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6361 btrfs_mark_buffer_dirty(buf);
6365 * We may have moved things, in which case we want to exit so we don't
6366 * write those changes out. Once we have proper abort functionality in
6367 * progs this can be changed to something nicer.
6374 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6375 * then just return -EIO.
6377 static int try_to_fix_bad_block(struct btrfs_root *root,
6378 struct extent_buffer *buf,
6379 enum btrfs_tree_block_status status)
6381 struct btrfs_trans_handle *trans;
6382 struct ulist *roots;
6383 struct ulist_node *node;
6384 struct btrfs_root *search_root;
6385 struct btrfs_path path;
6386 struct ulist_iterator iter;
6387 struct btrfs_key root_key, key;
6390 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6391 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6394 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6398 btrfs_init_path(&path);
6399 ULIST_ITER_INIT(&iter);
6400 while ((node = ulist_next(roots, &iter))) {
6401 root_key.objectid = node->val;
6402 root_key.type = BTRFS_ROOT_ITEM_KEY;
6403 root_key.offset = (u64)-1;
6405 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6412 trans = btrfs_start_transaction(search_root, 0);
6413 if (IS_ERR(trans)) {
6414 ret = PTR_ERR(trans);
6418 path.lowest_level = btrfs_header_level(buf);
6419 path.skip_check_block = 1;
6420 if (path.lowest_level)
6421 btrfs_node_key_to_cpu(buf, &key, 0);
6423 btrfs_item_key_to_cpu(buf, &key, 0);
6424 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6427 btrfs_commit_transaction(trans, search_root);
6430 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6431 ret = fix_key_order(search_root, &path);
6432 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6433 ret = fix_item_offset(search_root, &path);
6435 btrfs_commit_transaction(trans, search_root);
6438 btrfs_release_path(&path);
6439 btrfs_commit_transaction(trans, search_root);
6442 btrfs_release_path(&path);
6446 static int check_block(struct btrfs_root *root,
6447 struct cache_tree *extent_cache,
6448 struct extent_buffer *buf, u64 flags)
6450 struct extent_record *rec;
6451 struct cache_extent *cache;
6452 struct btrfs_key key;
6453 enum btrfs_tree_block_status status;
6457 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6460 rec = container_of(cache, struct extent_record, cache);
6461 rec->generation = btrfs_header_generation(buf);
6463 level = btrfs_header_level(buf);
6464 if (btrfs_header_nritems(buf) > 0) {
6467 btrfs_item_key_to_cpu(buf, &key, 0);
6469 btrfs_node_key_to_cpu(buf, &key, 0);
6471 rec->info_objectid = key.objectid;
6473 rec->info_level = level;
6475 if (btrfs_is_leaf(buf))
6476 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6478 status = btrfs_check_node(root, &rec->parent_key, buf);
6480 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6482 status = try_to_fix_bad_block(root, buf, status);
6483 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6485 fprintf(stderr, "bad block %llu\n",
6486 (unsigned long long)buf->start);
6489 * Signal to callers we need to start the scan over
6490 * again since we'll have cowed blocks.
6495 rec->content_checked = 1;
6496 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6497 rec->owner_ref_checked = 1;
6499 ret = check_owner_ref(root, rec, buf);
6501 rec->owner_ref_checked = 1;
6505 maybe_free_extent_rec(extent_cache, rec);
6510 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6511 u64 parent, u64 root)
6513 struct list_head *cur = rec->backrefs.next;
6514 struct extent_backref *node;
6515 struct tree_backref *back;
6517 while(cur != &rec->backrefs) {
6518 node = to_extent_backref(cur);
6522 back = to_tree_backref(node);
6524 if (!node->full_backref)
6526 if (parent == back->parent)
6529 if (node->full_backref)
6531 if (back->root == root)
6539 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6540 u64 parent, u64 root)
6542 struct tree_backref *ref = malloc(sizeof(*ref));
6546 memset(&ref->node, 0, sizeof(ref->node));
6548 ref->parent = parent;
6549 ref->node.full_backref = 1;
6552 ref->node.full_backref = 0;
6559 static struct data_backref *find_data_backref(struct extent_record *rec,
6560 u64 parent, u64 root,
6561 u64 owner, u64 offset,
6563 u64 disk_bytenr, u64 bytes)
6565 struct list_head *cur = rec->backrefs.next;
6566 struct extent_backref *node;
6567 struct data_backref *back;
6569 while(cur != &rec->backrefs) {
6570 node = to_extent_backref(cur);
6574 back = to_data_backref(node);
6576 if (!node->full_backref)
6578 if (parent == back->parent)
6581 if (node->full_backref)
6583 if (back->root == root && back->owner == owner &&
6584 back->offset == offset) {
6585 if (found_ref && node->found_ref &&
6586 (back->bytes != bytes ||
6587 back->disk_bytenr != disk_bytenr))
6597 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6598 u64 parent, u64 root,
6599 u64 owner, u64 offset,
6602 struct data_backref *ref = malloc(sizeof(*ref));
6606 memset(&ref->node, 0, sizeof(ref->node));
6607 ref->node.is_data = 1;
6610 ref->parent = parent;
6613 ref->node.full_backref = 1;
6617 ref->offset = offset;
6618 ref->node.full_backref = 0;
6620 ref->bytes = max_size;
6623 if (max_size > rec->max_size)
6624 rec->max_size = max_size;
6628 /* Check if the type of extent matches with its chunk */
6629 static void check_extent_type(struct extent_record *rec)
6631 struct btrfs_block_group_cache *bg_cache;
6633 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6637 /* data extent, check chunk directly*/
6638 if (!rec->metadata) {
6639 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6640 rec->wrong_chunk_type = 1;
6644 /* metadata extent, check the obvious case first */
6645 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6646 BTRFS_BLOCK_GROUP_METADATA))) {
6647 rec->wrong_chunk_type = 1;
6652 * Check SYSTEM extent, as it's also marked as metadata, we can only
6653 * make sure it's a SYSTEM extent by its backref
6655 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6656 struct extent_backref *node;
6657 struct tree_backref *tback;
6660 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6661 if (node->is_data) {
6662 /* tree block shouldn't have data backref */
6663 rec->wrong_chunk_type = 1;
6666 tback = container_of(node, struct tree_backref, node);
6668 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6669 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6671 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6672 if (!(bg_cache->flags & bg_type))
6673 rec->wrong_chunk_type = 1;
6678 * Allocate a new extent record, fill default values from @tmpl and insert int
6679 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6680 * the cache, otherwise it fails.
6682 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6683 struct extent_record *tmpl)
6685 struct extent_record *rec;
6688 BUG_ON(tmpl->max_size == 0);
6689 rec = malloc(sizeof(*rec));
6692 rec->start = tmpl->start;
6693 rec->max_size = tmpl->max_size;
6694 rec->nr = max(tmpl->nr, tmpl->max_size);
6695 rec->found_rec = tmpl->found_rec;
6696 rec->content_checked = tmpl->content_checked;
6697 rec->owner_ref_checked = tmpl->owner_ref_checked;
6698 rec->num_duplicates = 0;
6699 rec->metadata = tmpl->metadata;
6700 rec->flag_block_full_backref = FLAG_UNSET;
6701 rec->bad_full_backref = 0;
6702 rec->crossing_stripes = 0;
6703 rec->wrong_chunk_type = 0;
6704 rec->is_root = tmpl->is_root;
6705 rec->refs = tmpl->refs;
6706 rec->extent_item_refs = tmpl->extent_item_refs;
6707 rec->parent_generation = tmpl->parent_generation;
6708 INIT_LIST_HEAD(&rec->backrefs);
6709 INIT_LIST_HEAD(&rec->dups);
6710 INIT_LIST_HEAD(&rec->list);
6711 rec->backref_tree = RB_ROOT;
6712 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6713 rec->cache.start = tmpl->start;
6714 rec->cache.size = tmpl->nr;
6715 ret = insert_cache_extent(extent_cache, &rec->cache);
6720 bytes_used += rec->nr;
6723 rec->crossing_stripes = check_crossing_stripes(global_info,
6724 rec->start, global_info->nodesize);
6725 check_extent_type(rec);
6730 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6732 * - refs - if found, increase refs
6733 * - is_root - if found, set
6734 * - content_checked - if found, set
6735 * - owner_ref_checked - if found, set
6737 * If not found, create a new one, initialize and insert.
6739 static int add_extent_rec(struct cache_tree *extent_cache,
6740 struct extent_record *tmpl)
6742 struct extent_record *rec;
6743 struct cache_extent *cache;
6747 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6749 rec = container_of(cache, struct extent_record, cache);
6753 rec->nr = max(tmpl->nr, tmpl->max_size);
6756 * We need to make sure to reset nr to whatever the extent
6757 * record says was the real size, this way we can compare it to
6760 if (tmpl->found_rec) {
6761 if (tmpl->start != rec->start || rec->found_rec) {
6762 struct extent_record *tmp;
6765 if (list_empty(&rec->list))
6766 list_add_tail(&rec->list,
6767 &duplicate_extents);
6770 * We have to do this song and dance in case we
6771 * find an extent record that falls inside of
6772 * our current extent record but does not have
6773 * the same objectid.
6775 tmp = malloc(sizeof(*tmp));
6778 tmp->start = tmpl->start;
6779 tmp->max_size = tmpl->max_size;
6782 tmp->metadata = tmpl->metadata;
6783 tmp->extent_item_refs = tmpl->extent_item_refs;
6784 INIT_LIST_HEAD(&tmp->list);
6785 list_add_tail(&tmp->list, &rec->dups);
6786 rec->num_duplicates++;
6793 if (tmpl->extent_item_refs && !dup) {
6794 if (rec->extent_item_refs) {
6795 fprintf(stderr, "block %llu rec "
6796 "extent_item_refs %llu, passed %llu\n",
6797 (unsigned long long)tmpl->start,
6798 (unsigned long long)
6799 rec->extent_item_refs,
6800 (unsigned long long)tmpl->extent_item_refs);
6802 rec->extent_item_refs = tmpl->extent_item_refs;
6806 if (tmpl->content_checked)
6807 rec->content_checked = 1;
6808 if (tmpl->owner_ref_checked)
6809 rec->owner_ref_checked = 1;
6810 memcpy(&rec->parent_key, &tmpl->parent_key,
6811 sizeof(tmpl->parent_key));
6812 if (tmpl->parent_generation)
6813 rec->parent_generation = tmpl->parent_generation;
6814 if (rec->max_size < tmpl->max_size)
6815 rec->max_size = tmpl->max_size;
6818 * A metadata extent can't cross stripe_len boundary, otherwise
6819 * kernel scrub won't be able to handle it.
6820 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6824 rec->crossing_stripes = check_crossing_stripes(
6825 global_info, rec->start,
6826 global_info->nodesize);
6827 check_extent_type(rec);
6828 maybe_free_extent_rec(extent_cache, rec);
6832 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6837 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6838 u64 parent, u64 root, int found_ref)
6840 struct extent_record *rec;
6841 struct tree_backref *back;
6842 struct cache_extent *cache;
6844 bool insert = false;
6846 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6848 struct extent_record tmpl;
6850 memset(&tmpl, 0, sizeof(tmpl));
6851 tmpl.start = bytenr;
6856 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6860 /* really a bug in cache_extent implement now */
6861 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6866 rec = container_of(cache, struct extent_record, cache);
6867 if (rec->start != bytenr) {
6869 * Several cause, from unaligned bytenr to over lapping extents
6874 back = find_tree_backref(rec, parent, root);
6876 back = alloc_tree_backref(rec, parent, root);
6883 if (back->node.found_ref) {
6884 fprintf(stderr, "Extent back ref already exists "
6885 "for %llu parent %llu root %llu \n",
6886 (unsigned long long)bytenr,
6887 (unsigned long long)parent,
6888 (unsigned long long)root);
6890 back->node.found_ref = 1;
6892 if (back->node.found_extent_tree) {
6893 fprintf(stderr, "Extent back ref already exists "
6894 "for %llu parent %llu root %llu \n",
6895 (unsigned long long)bytenr,
6896 (unsigned long long)parent,
6897 (unsigned long long)root);
6899 back->node.found_extent_tree = 1;
6902 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6903 compare_extent_backref));
6904 check_extent_type(rec);
6905 maybe_free_extent_rec(extent_cache, rec);
6909 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6910 u64 parent, u64 root, u64 owner, u64 offset,
6911 u32 num_refs, int found_ref, u64 max_size)
6913 struct extent_record *rec;
6914 struct data_backref *back;
6915 struct cache_extent *cache;
6917 bool insert = false;
6919 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6921 struct extent_record tmpl;
6923 memset(&tmpl, 0, sizeof(tmpl));
6924 tmpl.start = bytenr;
6926 tmpl.max_size = max_size;
6928 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6932 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6937 rec = container_of(cache, struct extent_record, cache);
6938 if (rec->max_size < max_size)
6939 rec->max_size = max_size;
6942 * If found_ref is set then max_size is the real size and must match the
6943 * existing refs. So if we have already found a ref then we need to
6944 * make sure that this ref matches the existing one, otherwise we need
6945 * to add a new backref so we can notice that the backrefs don't match
6946 * and we need to figure out who is telling the truth. This is to
6947 * account for that awful fsync bug I introduced where we'd end up with
6948 * a btrfs_file_extent_item that would have its length include multiple
6949 * prealloc extents or point inside of a prealloc extent.
6951 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6954 back = alloc_data_backref(rec, parent, root, owner, offset,
6961 BUG_ON(num_refs != 1);
6962 if (back->node.found_ref)
6963 BUG_ON(back->bytes != max_size);
6964 back->node.found_ref = 1;
6965 back->found_ref += 1;
6966 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6967 back->bytes = max_size;
6968 back->disk_bytenr = bytenr;
6970 /* Need to reinsert if not already in the tree */
6972 rb_erase(&back->node.node, &rec->backref_tree);
6977 rec->content_checked = 1;
6978 rec->owner_ref_checked = 1;
6980 if (back->node.found_extent_tree) {
6981 fprintf(stderr, "Extent back ref already exists "
6982 "for %llu parent %llu root %llu "
6983 "owner %llu offset %llu num_refs %lu\n",
6984 (unsigned long long)bytenr,
6985 (unsigned long long)parent,
6986 (unsigned long long)root,
6987 (unsigned long long)owner,
6988 (unsigned long long)offset,
6989 (unsigned long)num_refs);
6991 back->num_refs = num_refs;
6992 back->node.found_extent_tree = 1;
6995 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6996 compare_extent_backref));
6998 maybe_free_extent_rec(extent_cache, rec);
7002 static int add_pending(struct cache_tree *pending,
7003 struct cache_tree *seen, u64 bytenr, u32 size)
7006 ret = add_cache_extent(seen, bytenr, size);
7009 add_cache_extent(pending, bytenr, size);
7013 static int pick_next_pending(struct cache_tree *pending,
7014 struct cache_tree *reada,
7015 struct cache_tree *nodes,
7016 u64 last, struct block_info *bits, int bits_nr,
7019 unsigned long node_start = last;
7020 struct cache_extent *cache;
7023 cache = search_cache_extent(reada, 0);
7025 bits[0].start = cache->start;
7026 bits[0].size = cache->size;
7031 if (node_start > 32768)
7032 node_start -= 32768;
7034 cache = search_cache_extent(nodes, node_start);
7036 cache = search_cache_extent(nodes, 0);
7039 cache = search_cache_extent(pending, 0);
7044 bits[ret].start = cache->start;
7045 bits[ret].size = cache->size;
7046 cache = next_cache_extent(cache);
7048 } while (cache && ret < bits_nr);
7054 bits[ret].start = cache->start;
7055 bits[ret].size = cache->size;
7056 cache = next_cache_extent(cache);
7058 } while (cache && ret < bits_nr);
7060 if (bits_nr - ret > 8) {
7061 u64 lookup = bits[0].start + bits[0].size;
7062 struct cache_extent *next;
7063 next = search_cache_extent(pending, lookup);
7065 if (next->start - lookup > 32768)
7067 bits[ret].start = next->start;
7068 bits[ret].size = next->size;
7069 lookup = next->start + next->size;
7073 next = next_cache_extent(next);
7081 static void free_chunk_record(struct cache_extent *cache)
7083 struct chunk_record *rec;
7085 rec = container_of(cache, struct chunk_record, cache);
7086 list_del_init(&rec->list);
7087 list_del_init(&rec->dextents);
7091 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7093 cache_tree_free_extents(chunk_cache, free_chunk_record);
7096 static void free_device_record(struct rb_node *node)
7098 struct device_record *rec;
7100 rec = container_of(node, struct device_record, node);
7104 FREE_RB_BASED_TREE(device_cache, free_device_record);
7106 int insert_block_group_record(struct block_group_tree *tree,
7107 struct block_group_record *bg_rec)
7111 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7115 list_add_tail(&bg_rec->list, &tree->block_groups);
7119 static void free_block_group_record(struct cache_extent *cache)
7121 struct block_group_record *rec;
7123 rec = container_of(cache, struct block_group_record, cache);
7124 list_del_init(&rec->list);
7128 void free_block_group_tree(struct block_group_tree *tree)
7130 cache_tree_free_extents(&tree->tree, free_block_group_record);
7133 int insert_device_extent_record(struct device_extent_tree *tree,
7134 struct device_extent_record *de_rec)
7139 * Device extent is a bit different from the other extents, because
7140 * the extents which belong to the different devices may have the
7141 * same start and size, so we need use the special extent cache
7142 * search/insert functions.
7144 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7148 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7149 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7153 static void free_device_extent_record(struct cache_extent *cache)
7155 struct device_extent_record *rec;
7157 rec = container_of(cache, struct device_extent_record, cache);
7158 if (!list_empty(&rec->chunk_list))
7159 list_del_init(&rec->chunk_list);
7160 if (!list_empty(&rec->device_list))
7161 list_del_init(&rec->device_list);
7165 void free_device_extent_tree(struct device_extent_tree *tree)
7167 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7170 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7171 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7172 struct extent_buffer *leaf, int slot)
7174 struct btrfs_extent_ref_v0 *ref0;
7175 struct btrfs_key key;
7178 btrfs_item_key_to_cpu(leaf, &key, slot);
7179 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7180 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7181 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7184 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7185 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7191 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7192 struct btrfs_key *key,
7195 struct btrfs_chunk *ptr;
7196 struct chunk_record *rec;
7199 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7200 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7202 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7204 fprintf(stderr, "memory allocation failed\n");
7208 INIT_LIST_HEAD(&rec->list);
7209 INIT_LIST_HEAD(&rec->dextents);
7212 rec->cache.start = key->offset;
7213 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7215 rec->generation = btrfs_header_generation(leaf);
7217 rec->objectid = key->objectid;
7218 rec->type = key->type;
7219 rec->offset = key->offset;
7221 rec->length = rec->cache.size;
7222 rec->owner = btrfs_chunk_owner(leaf, ptr);
7223 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7224 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7225 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7226 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7227 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7228 rec->num_stripes = num_stripes;
7229 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7231 for (i = 0; i < rec->num_stripes; ++i) {
7232 rec->stripes[i].devid =
7233 btrfs_stripe_devid_nr(leaf, ptr, i);
7234 rec->stripes[i].offset =
7235 btrfs_stripe_offset_nr(leaf, ptr, i);
7236 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7237 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7244 static int process_chunk_item(struct cache_tree *chunk_cache,
7245 struct btrfs_key *key, struct extent_buffer *eb,
7248 struct chunk_record *rec;
7249 struct btrfs_chunk *chunk;
7252 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7254 * Do extra check for this chunk item,
7256 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7257 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7258 * and owner<->key_type check.
7260 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7263 error("chunk(%llu, %llu) is not valid, ignore it",
7264 key->offset, btrfs_chunk_length(eb, chunk));
7267 rec = btrfs_new_chunk_record(eb, key, slot);
7268 ret = insert_cache_extent(chunk_cache, &rec->cache);
7270 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7271 rec->offset, rec->length);
7278 static int process_device_item(struct rb_root *dev_cache,
7279 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7281 struct btrfs_dev_item *ptr;
7282 struct device_record *rec;
7285 ptr = btrfs_item_ptr(eb,
7286 slot, struct btrfs_dev_item);
7288 rec = malloc(sizeof(*rec));
7290 fprintf(stderr, "memory allocation failed\n");
7294 rec->devid = key->offset;
7295 rec->generation = btrfs_header_generation(eb);
7297 rec->objectid = key->objectid;
7298 rec->type = key->type;
7299 rec->offset = key->offset;
7301 rec->devid = btrfs_device_id(eb, ptr);
7302 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7303 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7305 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7307 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7314 struct block_group_record *
7315 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7318 struct btrfs_block_group_item *ptr;
7319 struct block_group_record *rec;
7321 rec = calloc(1, sizeof(*rec));
7323 fprintf(stderr, "memory allocation failed\n");
7327 rec->cache.start = key->objectid;
7328 rec->cache.size = key->offset;
7330 rec->generation = btrfs_header_generation(leaf);
7332 rec->objectid = key->objectid;
7333 rec->type = key->type;
7334 rec->offset = key->offset;
7336 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7337 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7339 INIT_LIST_HEAD(&rec->list);
7344 static int process_block_group_item(struct block_group_tree *block_group_cache,
7345 struct btrfs_key *key,
7346 struct extent_buffer *eb, int slot)
7348 struct block_group_record *rec;
7351 rec = btrfs_new_block_group_record(eb, key, slot);
7352 ret = insert_block_group_record(block_group_cache, rec);
7354 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7355 rec->objectid, rec->offset);
7362 struct device_extent_record *
7363 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7364 struct btrfs_key *key, int slot)
7366 struct device_extent_record *rec;
7367 struct btrfs_dev_extent *ptr;
7369 rec = calloc(1, sizeof(*rec));
7371 fprintf(stderr, "memory allocation failed\n");
7375 rec->cache.objectid = key->objectid;
7376 rec->cache.start = key->offset;
7378 rec->generation = btrfs_header_generation(leaf);
7380 rec->objectid = key->objectid;
7381 rec->type = key->type;
7382 rec->offset = key->offset;
7384 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7385 rec->chunk_objecteid =
7386 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7388 btrfs_dev_extent_chunk_offset(leaf, ptr);
7389 rec->length = btrfs_dev_extent_length(leaf, ptr);
7390 rec->cache.size = rec->length;
7392 INIT_LIST_HEAD(&rec->chunk_list);
7393 INIT_LIST_HEAD(&rec->device_list);
7399 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7400 struct btrfs_key *key, struct extent_buffer *eb,
7403 struct device_extent_record *rec;
7406 rec = btrfs_new_device_extent_record(eb, key, slot);
7407 ret = insert_device_extent_record(dev_extent_cache, rec);
7410 "Device extent[%llu, %llu, %llu] existed.\n",
7411 rec->objectid, rec->offset, rec->length);
7418 static int process_extent_item(struct btrfs_root *root,
7419 struct cache_tree *extent_cache,
7420 struct extent_buffer *eb, int slot)
7422 struct btrfs_extent_item *ei;
7423 struct btrfs_extent_inline_ref *iref;
7424 struct btrfs_extent_data_ref *dref;
7425 struct btrfs_shared_data_ref *sref;
7426 struct btrfs_key key;
7427 struct extent_record tmpl;
7432 u32 item_size = btrfs_item_size_nr(eb, slot);
7438 btrfs_item_key_to_cpu(eb, &key, slot);
7440 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7442 num_bytes = root->fs_info->nodesize;
7444 num_bytes = key.offset;
7447 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7448 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7449 key.objectid, root->fs_info->sectorsize);
7452 if (item_size < sizeof(*ei)) {
7453 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7454 struct btrfs_extent_item_v0 *ei0;
7455 BUG_ON(item_size != sizeof(*ei0));
7456 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7457 refs = btrfs_extent_refs_v0(eb, ei0);
7461 memset(&tmpl, 0, sizeof(tmpl));
7462 tmpl.start = key.objectid;
7463 tmpl.nr = num_bytes;
7464 tmpl.extent_item_refs = refs;
7465 tmpl.metadata = metadata;
7467 tmpl.max_size = num_bytes;
7469 return add_extent_rec(extent_cache, &tmpl);
7472 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7473 refs = btrfs_extent_refs(eb, ei);
7474 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7478 if (metadata && num_bytes != root->fs_info->nodesize) {
7479 error("ignore invalid metadata extent, length %llu does not equal to %u",
7480 num_bytes, root->fs_info->nodesize);
7483 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7484 error("ignore invalid data extent, length %llu is not aligned to %u",
7485 num_bytes, root->fs_info->sectorsize);
7489 memset(&tmpl, 0, sizeof(tmpl));
7490 tmpl.start = key.objectid;
7491 tmpl.nr = num_bytes;
7492 tmpl.extent_item_refs = refs;
7493 tmpl.metadata = metadata;
7495 tmpl.max_size = num_bytes;
7496 add_extent_rec(extent_cache, &tmpl);
7498 ptr = (unsigned long)(ei + 1);
7499 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7500 key.type == BTRFS_EXTENT_ITEM_KEY)
7501 ptr += sizeof(struct btrfs_tree_block_info);
7503 end = (unsigned long)ei + item_size;
7505 iref = (struct btrfs_extent_inline_ref *)ptr;
7506 type = btrfs_extent_inline_ref_type(eb, iref);
7507 offset = btrfs_extent_inline_ref_offset(eb, iref);
7509 case BTRFS_TREE_BLOCK_REF_KEY:
7510 ret = add_tree_backref(extent_cache, key.objectid,
7514 "add_tree_backref failed (extent items tree block): %s",
7517 case BTRFS_SHARED_BLOCK_REF_KEY:
7518 ret = add_tree_backref(extent_cache, key.objectid,
7522 "add_tree_backref failed (extent items shared block): %s",
7525 case BTRFS_EXTENT_DATA_REF_KEY:
7526 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7527 add_data_backref(extent_cache, key.objectid, 0,
7528 btrfs_extent_data_ref_root(eb, dref),
7529 btrfs_extent_data_ref_objectid(eb,
7531 btrfs_extent_data_ref_offset(eb, dref),
7532 btrfs_extent_data_ref_count(eb, dref),
7535 case BTRFS_SHARED_DATA_REF_KEY:
7536 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7537 add_data_backref(extent_cache, key.objectid, offset,
7539 btrfs_shared_data_ref_count(eb, sref),
7543 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7544 key.objectid, key.type, num_bytes);
7547 ptr += btrfs_extent_inline_ref_size(type);
7554 static int check_cache_range(struct btrfs_root *root,
7555 struct btrfs_block_group_cache *cache,
7556 u64 offset, u64 bytes)
7558 struct btrfs_free_space *entry;
7564 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7565 bytenr = btrfs_sb_offset(i);
7566 ret = btrfs_rmap_block(root->fs_info,
7567 cache->key.objectid, bytenr, 0,
7568 &logical, &nr, &stripe_len);
7573 if (logical[nr] + stripe_len <= offset)
7575 if (offset + bytes <= logical[nr])
7577 if (logical[nr] == offset) {
7578 if (stripe_len >= bytes) {
7582 bytes -= stripe_len;
7583 offset += stripe_len;
7584 } else if (logical[nr] < offset) {
7585 if (logical[nr] + stripe_len >=
7590 bytes = (offset + bytes) -
7591 (logical[nr] + stripe_len);
7592 offset = logical[nr] + stripe_len;
7595 * Could be tricky, the super may land in the
7596 * middle of the area we're checking. First
7597 * check the easiest case, it's at the end.
7599 if (logical[nr] + stripe_len >=
7601 bytes = logical[nr] - offset;
7605 /* Check the left side */
7606 ret = check_cache_range(root, cache,
7608 logical[nr] - offset);
7614 /* Now we continue with the right side */
7615 bytes = (offset + bytes) -
7616 (logical[nr] + stripe_len);
7617 offset = logical[nr] + stripe_len;
7624 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7626 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7627 offset, offset+bytes);
7631 if (entry->offset != offset) {
7632 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7637 if (entry->bytes != bytes) {
7638 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7639 bytes, entry->bytes, offset);
7643 unlink_free_space(cache->free_space_ctl, entry);
7648 static int verify_space_cache(struct btrfs_root *root,
7649 struct btrfs_block_group_cache *cache)
7651 struct btrfs_path path;
7652 struct extent_buffer *leaf;
7653 struct btrfs_key key;
7657 root = root->fs_info->extent_root;
7659 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7661 btrfs_init_path(&path);
7662 key.objectid = last;
7664 key.type = BTRFS_EXTENT_ITEM_KEY;
7665 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7670 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7671 ret = btrfs_next_leaf(root, &path);
7679 leaf = path.nodes[0];
7680 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7681 if (key.objectid >= cache->key.offset + cache->key.objectid)
7683 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7684 key.type != BTRFS_METADATA_ITEM_KEY) {
7689 if (last == key.objectid) {
7690 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7691 last = key.objectid + key.offset;
7693 last = key.objectid + root->fs_info->nodesize;
7698 ret = check_cache_range(root, cache, last,
7699 key.objectid - last);
7702 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7703 last = key.objectid + key.offset;
7705 last = key.objectid + root->fs_info->nodesize;
7709 if (last < cache->key.objectid + cache->key.offset)
7710 ret = check_cache_range(root, cache, last,
7711 cache->key.objectid +
7712 cache->key.offset - last);
7715 btrfs_release_path(&path);
7718 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7719 fprintf(stderr, "There are still entries left in the space "
7727 static int check_space_cache(struct btrfs_root *root)
7729 struct btrfs_block_group_cache *cache;
7730 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7734 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7735 btrfs_super_generation(root->fs_info->super_copy) !=
7736 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7737 printf("cache and super generation don't match, space cache "
7738 "will be invalidated\n");
7742 if (ctx.progress_enabled) {
7743 ctx.tp = TASK_FREE_SPACE;
7744 task_start(ctx.info);
7748 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7752 start = cache->key.objectid + cache->key.offset;
7753 if (!cache->free_space_ctl) {
7754 if (btrfs_init_free_space_ctl(cache,
7755 root->fs_info->sectorsize)) {
7760 btrfs_remove_free_space_cache(cache);
7763 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7764 ret = exclude_super_stripes(root, cache);
7766 fprintf(stderr, "could not exclude super stripes: %s\n",
7771 ret = load_free_space_tree(root->fs_info, cache);
7772 free_excluded_extents(root, cache);
7774 fprintf(stderr, "could not load free space tree: %s\n",
7781 ret = load_free_space_cache(root->fs_info, cache);
7786 ret = verify_space_cache(root, cache);
7788 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7789 cache->key.objectid);
7794 task_stop(ctx.info);
7796 return error ? -EINVAL : 0;
7799 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7800 u64 num_bytes, unsigned long leaf_offset,
7801 struct extent_buffer *eb) {
7803 struct btrfs_fs_info *fs_info = root->fs_info;
7805 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7807 unsigned long csum_offset;
7811 u64 data_checked = 0;
7817 if (num_bytes % fs_info->sectorsize)
7820 data = malloc(num_bytes);
7824 while (offset < num_bytes) {
7827 read_len = num_bytes - offset;
7828 /* read as much space once a time */
7829 ret = read_extent_data(fs_info, data + offset,
7830 bytenr + offset, &read_len, mirror);
7834 /* verify every 4k data's checksum */
7835 while (data_checked < read_len) {
7837 tmp = offset + data_checked;
7839 csum = btrfs_csum_data((char *)data + tmp,
7840 csum, fs_info->sectorsize);
7841 btrfs_csum_final(csum, (u8 *)&csum);
7843 csum_offset = leaf_offset +
7844 tmp / fs_info->sectorsize * csum_size;
7845 read_extent_buffer(eb, (char *)&csum_expected,
7846 csum_offset, csum_size);
7847 /* try another mirror */
7848 if (csum != csum_expected) {
7849 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7850 mirror, bytenr + tmp,
7851 csum, csum_expected);
7852 num_copies = btrfs_num_copies(root->fs_info,
7854 if (mirror < num_copies - 1) {
7859 data_checked += fs_info->sectorsize;
7868 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7871 struct btrfs_path path;
7872 struct extent_buffer *leaf;
7873 struct btrfs_key key;
7876 btrfs_init_path(&path);
7877 key.objectid = bytenr;
7878 key.type = BTRFS_EXTENT_ITEM_KEY;
7879 key.offset = (u64)-1;
7882 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7885 fprintf(stderr, "Error looking up extent record %d\n", ret);
7886 btrfs_release_path(&path);
7889 if (path.slots[0] > 0) {
7892 ret = btrfs_prev_leaf(root, &path);
7895 } else if (ret > 0) {
7902 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7905 * Block group items come before extent items if they have the same
7906 * bytenr, so walk back one more just in case. Dear future traveller,
7907 * first congrats on mastering time travel. Now if it's not too much
7908 * trouble could you go back to 2006 and tell Chris to make the
7909 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7910 * EXTENT_ITEM_KEY please?
7912 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7913 if (path.slots[0] > 0) {
7916 ret = btrfs_prev_leaf(root, &path);
7919 } else if (ret > 0) {
7924 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7928 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7929 ret = btrfs_next_leaf(root, &path);
7931 fprintf(stderr, "Error going to next leaf "
7933 btrfs_release_path(&path);
7939 leaf = path.nodes[0];
7940 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7941 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7945 if (key.objectid + key.offset < bytenr) {
7949 if (key.objectid > bytenr + num_bytes)
7952 if (key.objectid == bytenr) {
7953 if (key.offset >= num_bytes) {
7957 num_bytes -= key.offset;
7958 bytenr += key.offset;
7959 } else if (key.objectid < bytenr) {
7960 if (key.objectid + key.offset >= bytenr + num_bytes) {
7964 num_bytes = (bytenr + num_bytes) -
7965 (key.objectid + key.offset);
7966 bytenr = key.objectid + key.offset;
7968 if (key.objectid + key.offset < bytenr + num_bytes) {
7969 u64 new_start = key.objectid + key.offset;
7970 u64 new_bytes = bytenr + num_bytes - new_start;
7973 * Weird case, the extent is in the middle of
7974 * our range, we'll have to search one side
7975 * and then the other. Not sure if this happens
7976 * in real life, but no harm in coding it up
7977 * anyway just in case.
7979 btrfs_release_path(&path);
7980 ret = check_extent_exists(root, new_start,
7983 fprintf(stderr, "Right section didn't "
7987 num_bytes = key.objectid - bytenr;
7990 num_bytes = key.objectid - bytenr;
7997 if (num_bytes && !ret) {
7998 fprintf(stderr, "There are no extents for csum range "
7999 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8003 btrfs_release_path(&path);
8007 static int check_csums(struct btrfs_root *root)
8009 struct btrfs_path path;
8010 struct extent_buffer *leaf;
8011 struct btrfs_key key;
8012 u64 offset = 0, num_bytes = 0;
8013 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8017 unsigned long leaf_offset;
8019 root = root->fs_info->csum_root;
8020 if (!extent_buffer_uptodate(root->node)) {
8021 fprintf(stderr, "No valid csum tree found\n");
8025 btrfs_init_path(&path);
8026 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8027 key.type = BTRFS_EXTENT_CSUM_KEY;
8029 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8031 fprintf(stderr, "Error searching csum tree %d\n", ret);
8032 btrfs_release_path(&path);
8036 if (ret > 0 && path.slots[0])
8041 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8042 ret = btrfs_next_leaf(root, &path);
8044 fprintf(stderr, "Error going to next leaf "
8051 leaf = path.nodes[0];
8053 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8054 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8059 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8060 csum_size) * root->fs_info->sectorsize;
8061 if (!check_data_csum)
8062 goto skip_csum_check;
8063 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8064 ret = check_extent_csums(root, key.offset, data_len,
8070 offset = key.offset;
8071 } else if (key.offset != offset + num_bytes) {
8072 ret = check_extent_exists(root, offset, num_bytes);
8074 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8075 "there is no extent record\n",
8076 offset, offset+num_bytes);
8079 offset = key.offset;
8082 num_bytes += data_len;
8086 btrfs_release_path(&path);
8090 static int is_dropped_key(struct btrfs_key *key,
8091 struct btrfs_key *drop_key) {
8092 if (key->objectid < drop_key->objectid)
8094 else if (key->objectid == drop_key->objectid) {
8095 if (key->type < drop_key->type)
8097 else if (key->type == drop_key->type) {
8098 if (key->offset < drop_key->offset)
8106 * Here are the rules for FULL_BACKREF.
8108 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8109 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8111 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8112 * if it happened after the relocation occurred since we'll have dropped the
8113 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8114 * have no real way to know for sure.
8116 * We process the blocks one root at a time, and we start from the lowest root
8117 * objectid and go to the highest. So we can just lookup the owner backref for
8118 * the record and if we don't find it then we know it doesn't exist and we have
8121 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8122 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8123 * be set or not and then we can check later once we've gathered all the refs.
8125 static int calc_extent_flag(struct cache_tree *extent_cache,
8126 struct extent_buffer *buf,
8127 struct root_item_record *ri,
8130 struct extent_record *rec;
8131 struct cache_extent *cache;
8132 struct tree_backref *tback;
8135 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8136 /* we have added this extent before */
8140 rec = container_of(cache, struct extent_record, cache);
8143 * Except file/reloc tree, we can not have
8146 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8151 if (buf->start == ri->bytenr)
8154 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8157 owner = btrfs_header_owner(buf);
8158 if (owner == ri->objectid)
8161 tback = find_tree_backref(rec, 0, owner);
8166 if (rec->flag_block_full_backref != FLAG_UNSET &&
8167 rec->flag_block_full_backref != 0)
8168 rec->bad_full_backref = 1;
8171 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8172 if (rec->flag_block_full_backref != FLAG_UNSET &&
8173 rec->flag_block_full_backref != 1)
8174 rec->bad_full_backref = 1;
8178 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8180 fprintf(stderr, "Invalid key type(");
8181 print_key_type(stderr, 0, key_type);
8182 fprintf(stderr, ") found in root(");
8183 print_objectid(stderr, rootid, 0);
8184 fprintf(stderr, ")\n");
8188 * Check if the key is valid with its extent buffer.
8190 * This is a early check in case invalid key exists in a extent buffer
8191 * This is not comprehensive yet, but should prevent wrong key/item passed
8194 static int check_type_with_root(u64 rootid, u8 key_type)
8197 /* Only valid in chunk tree */
8198 case BTRFS_DEV_ITEM_KEY:
8199 case BTRFS_CHUNK_ITEM_KEY:
8200 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8203 /* valid in csum and log tree */
8204 case BTRFS_CSUM_TREE_OBJECTID:
8205 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8209 case BTRFS_EXTENT_ITEM_KEY:
8210 case BTRFS_METADATA_ITEM_KEY:
8211 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8212 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8215 case BTRFS_ROOT_ITEM_KEY:
8216 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8219 case BTRFS_DEV_EXTENT_KEY:
8220 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8226 report_mismatch_key_root(key_type, rootid);
8230 static int run_next_block(struct btrfs_root *root,
8231 struct block_info *bits,
8234 struct cache_tree *pending,
8235 struct cache_tree *seen,
8236 struct cache_tree *reada,
8237 struct cache_tree *nodes,
8238 struct cache_tree *extent_cache,
8239 struct cache_tree *chunk_cache,
8240 struct rb_root *dev_cache,
8241 struct block_group_tree *block_group_cache,
8242 struct device_extent_tree *dev_extent_cache,
8243 struct root_item_record *ri)
8245 struct btrfs_fs_info *fs_info = root->fs_info;
8246 struct extent_buffer *buf;
8247 struct extent_record *rec = NULL;
8258 struct btrfs_key key;
8259 struct cache_extent *cache;
8262 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8263 bits_nr, &reada_bits);
8268 for(i = 0; i < nritems; i++) {
8269 ret = add_cache_extent(reada, bits[i].start,
8274 /* fixme, get the parent transid */
8275 readahead_tree_block(fs_info, bits[i].start, 0);
8278 *last = bits[0].start;
8279 bytenr = bits[0].start;
8280 size = bits[0].size;
8282 cache = lookup_cache_extent(pending, bytenr, size);
8284 remove_cache_extent(pending, cache);
8287 cache = lookup_cache_extent(reada, bytenr, size);
8289 remove_cache_extent(reada, cache);
8292 cache = lookup_cache_extent(nodes, bytenr, size);
8294 remove_cache_extent(nodes, cache);
8297 cache = lookup_cache_extent(extent_cache, bytenr, size);
8299 rec = container_of(cache, struct extent_record, cache);
8300 gen = rec->parent_generation;
8303 /* fixme, get the real parent transid */
8304 buf = read_tree_block(root->fs_info, bytenr, gen);
8305 if (!extent_buffer_uptodate(buf)) {
8306 record_bad_block_io(root->fs_info,
8307 extent_cache, bytenr, size);
8311 nritems = btrfs_header_nritems(buf);
8314 if (!init_extent_tree) {
8315 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8316 btrfs_header_level(buf), 1, NULL,
8319 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8321 fprintf(stderr, "Couldn't calc extent flags\n");
8322 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8327 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8329 fprintf(stderr, "Couldn't calc extent flags\n");
8330 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8334 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8336 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8337 ri->objectid == btrfs_header_owner(buf)) {
8339 * Ok we got to this block from it's original owner and
8340 * we have FULL_BACKREF set. Relocation can leave
8341 * converted blocks over so this is altogether possible,
8342 * however it's not possible if the generation > the
8343 * last snapshot, so check for this case.
8345 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8346 btrfs_header_generation(buf) > ri->last_snapshot) {
8347 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8348 rec->bad_full_backref = 1;
8353 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8354 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8355 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8356 rec->bad_full_backref = 1;
8360 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8361 rec->flag_block_full_backref = 1;
8365 rec->flag_block_full_backref = 0;
8367 owner = btrfs_header_owner(buf);
8370 ret = check_block(root, extent_cache, buf, flags);
8374 if (btrfs_is_leaf(buf)) {
8375 btree_space_waste += btrfs_leaf_free_space(root, buf);
8376 for (i = 0; i < nritems; i++) {
8377 struct btrfs_file_extent_item *fi;
8378 btrfs_item_key_to_cpu(buf, &key, i);
8380 * Check key type against the leaf owner.
8381 * Could filter quite a lot of early error if
8384 if (check_type_with_root(btrfs_header_owner(buf),
8386 fprintf(stderr, "ignoring invalid key\n");
8389 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8390 process_extent_item(root, extent_cache, buf,
8394 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8395 process_extent_item(root, extent_cache, buf,
8399 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8401 btrfs_item_size_nr(buf, i);
8404 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8405 process_chunk_item(chunk_cache, &key, buf, i);
8408 if (key.type == BTRFS_DEV_ITEM_KEY) {
8409 process_device_item(dev_cache, &key, buf, i);
8412 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8413 process_block_group_item(block_group_cache,
8417 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8418 process_device_extent_item(dev_extent_cache,
8423 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8424 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8425 process_extent_ref_v0(extent_cache, buf, i);
8432 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8433 ret = add_tree_backref(extent_cache,
8434 key.objectid, 0, key.offset, 0);
8437 "add_tree_backref failed (leaf tree block): %s",
8441 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8442 ret = add_tree_backref(extent_cache,
8443 key.objectid, key.offset, 0, 0);
8446 "add_tree_backref failed (leaf shared block): %s",
8450 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8451 struct btrfs_extent_data_ref *ref;
8452 ref = btrfs_item_ptr(buf, i,
8453 struct btrfs_extent_data_ref);
8454 add_data_backref(extent_cache,
8456 btrfs_extent_data_ref_root(buf, ref),
8457 btrfs_extent_data_ref_objectid(buf,
8459 btrfs_extent_data_ref_offset(buf, ref),
8460 btrfs_extent_data_ref_count(buf, ref),
8461 0, root->fs_info->sectorsize);
8464 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8465 struct btrfs_shared_data_ref *ref;
8466 ref = btrfs_item_ptr(buf, i,
8467 struct btrfs_shared_data_ref);
8468 add_data_backref(extent_cache,
8469 key.objectid, key.offset, 0, 0, 0,
8470 btrfs_shared_data_ref_count(buf, ref),
8471 0, root->fs_info->sectorsize);
8474 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8475 struct bad_item *bad;
8477 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8481 bad = malloc(sizeof(struct bad_item));
8484 INIT_LIST_HEAD(&bad->list);
8485 memcpy(&bad->key, &key,
8486 sizeof(struct btrfs_key));
8487 bad->root_id = owner;
8488 list_add_tail(&bad->list, &delete_items);
8491 if (key.type != BTRFS_EXTENT_DATA_KEY)
8493 fi = btrfs_item_ptr(buf, i,
8494 struct btrfs_file_extent_item);
8495 if (btrfs_file_extent_type(buf, fi) ==
8496 BTRFS_FILE_EXTENT_INLINE)
8498 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8501 data_bytes_allocated +=
8502 btrfs_file_extent_disk_num_bytes(buf, fi);
8503 if (data_bytes_allocated < root->fs_info->sectorsize) {
8506 data_bytes_referenced +=
8507 btrfs_file_extent_num_bytes(buf, fi);
8508 add_data_backref(extent_cache,
8509 btrfs_file_extent_disk_bytenr(buf, fi),
8510 parent, owner, key.objectid, key.offset -
8511 btrfs_file_extent_offset(buf, fi), 1, 1,
8512 btrfs_file_extent_disk_num_bytes(buf, fi));
8516 struct btrfs_key first_key;
8518 first_key.objectid = 0;
8521 btrfs_item_key_to_cpu(buf, &first_key, 0);
8522 level = btrfs_header_level(buf);
8523 for (i = 0; i < nritems; i++) {
8524 struct extent_record tmpl;
8526 ptr = btrfs_node_blockptr(buf, i);
8527 size = root->fs_info->nodesize;
8528 btrfs_node_key_to_cpu(buf, &key, i);
8530 if ((level == ri->drop_level)
8531 && is_dropped_key(&key, &ri->drop_key)) {
8536 memset(&tmpl, 0, sizeof(tmpl));
8537 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8538 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8543 tmpl.max_size = size;
8544 ret = add_extent_rec(extent_cache, &tmpl);
8548 ret = add_tree_backref(extent_cache, ptr, parent,
8552 "add_tree_backref failed (non-leaf block): %s",
8558 add_pending(nodes, seen, ptr, size);
8560 add_pending(pending, seen, ptr, size);
8563 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8564 nritems) * sizeof(struct btrfs_key_ptr);
8566 total_btree_bytes += buf->len;
8567 if (fs_root_objectid(btrfs_header_owner(buf)))
8568 total_fs_tree_bytes += buf->len;
8569 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8570 total_extent_tree_bytes += buf->len;
8572 free_extent_buffer(buf);
8576 static int add_root_to_pending(struct extent_buffer *buf,
8577 struct cache_tree *extent_cache,
8578 struct cache_tree *pending,
8579 struct cache_tree *seen,
8580 struct cache_tree *nodes,
8583 struct extent_record tmpl;
8586 if (btrfs_header_level(buf) > 0)
8587 add_pending(nodes, seen, buf->start, buf->len);
8589 add_pending(pending, seen, buf->start, buf->len);
8591 memset(&tmpl, 0, sizeof(tmpl));
8592 tmpl.start = buf->start;
8597 tmpl.max_size = buf->len;
8598 add_extent_rec(extent_cache, &tmpl);
8600 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8601 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8602 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8605 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8610 /* as we fix the tree, we might be deleting blocks that
8611 * we're tracking for repair. This hook makes sure we
8612 * remove any backrefs for blocks as we are fixing them.
8614 static int free_extent_hook(struct btrfs_trans_handle *trans,
8615 struct btrfs_root *root,
8616 u64 bytenr, u64 num_bytes, u64 parent,
8617 u64 root_objectid, u64 owner, u64 offset,
8620 struct extent_record *rec;
8621 struct cache_extent *cache;
8623 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8625 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8626 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8630 rec = container_of(cache, struct extent_record, cache);
8632 struct data_backref *back;
8633 back = find_data_backref(rec, parent, root_objectid, owner,
8634 offset, 1, bytenr, num_bytes);
8637 if (back->node.found_ref) {
8638 back->found_ref -= refs_to_drop;
8640 rec->refs -= refs_to_drop;
8642 if (back->node.found_extent_tree) {
8643 back->num_refs -= refs_to_drop;
8644 if (rec->extent_item_refs)
8645 rec->extent_item_refs -= refs_to_drop;
8647 if (back->found_ref == 0)
8648 back->node.found_ref = 0;
8649 if (back->num_refs == 0)
8650 back->node.found_extent_tree = 0;
8652 if (!back->node.found_extent_tree && back->node.found_ref) {
8653 rb_erase(&back->node.node, &rec->backref_tree);
8657 struct tree_backref *back;
8658 back = find_tree_backref(rec, parent, root_objectid);
8661 if (back->node.found_ref) {
8664 back->node.found_ref = 0;
8666 if (back->node.found_extent_tree) {
8667 if (rec->extent_item_refs)
8668 rec->extent_item_refs--;
8669 back->node.found_extent_tree = 0;
8671 if (!back->node.found_extent_tree && back->node.found_ref) {
8672 rb_erase(&back->node.node, &rec->backref_tree);
8676 maybe_free_extent_rec(extent_cache, rec);
8681 static int delete_extent_records(struct btrfs_trans_handle *trans,
8682 struct btrfs_root *root,
8683 struct btrfs_path *path,
8686 struct btrfs_key key;
8687 struct btrfs_key found_key;
8688 struct extent_buffer *leaf;
8693 key.objectid = bytenr;
8695 key.offset = (u64)-1;
8698 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8705 if (path->slots[0] == 0)
8711 leaf = path->nodes[0];
8712 slot = path->slots[0];
8714 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8715 if (found_key.objectid != bytenr)
8718 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8719 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8720 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8721 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8722 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8723 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8724 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8725 btrfs_release_path(path);
8726 if (found_key.type == 0) {
8727 if (found_key.offset == 0)
8729 key.offset = found_key.offset - 1;
8730 key.type = found_key.type;
8732 key.type = found_key.type - 1;
8733 key.offset = (u64)-1;
8737 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8738 found_key.objectid, found_key.type, found_key.offset);
8740 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8743 btrfs_release_path(path);
8745 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8746 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8747 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8748 found_key.offset : root->fs_info->nodesize;
8750 ret = btrfs_update_block_group(trans, root, bytenr,
8757 btrfs_release_path(path);
8762 * for a single backref, this will allocate a new extent
8763 * and add the backref to it.
8765 static int record_extent(struct btrfs_trans_handle *trans,
8766 struct btrfs_fs_info *info,
8767 struct btrfs_path *path,
8768 struct extent_record *rec,
8769 struct extent_backref *back,
8770 int allocated, u64 flags)
8773 struct btrfs_root *extent_root = info->extent_root;
8774 struct extent_buffer *leaf;
8775 struct btrfs_key ins_key;
8776 struct btrfs_extent_item *ei;
8777 struct data_backref *dback;
8778 struct btrfs_tree_block_info *bi;
8781 rec->max_size = max_t(u64, rec->max_size,
8785 u32 item_size = sizeof(*ei);
8788 item_size += sizeof(*bi);
8790 ins_key.objectid = rec->start;
8791 ins_key.offset = rec->max_size;
8792 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8794 ret = btrfs_insert_empty_item(trans, extent_root, path,
8795 &ins_key, item_size);
8799 leaf = path->nodes[0];
8800 ei = btrfs_item_ptr(leaf, path->slots[0],
8801 struct btrfs_extent_item);
8803 btrfs_set_extent_refs(leaf, ei, 0);
8804 btrfs_set_extent_generation(leaf, ei, rec->generation);
8806 if (back->is_data) {
8807 btrfs_set_extent_flags(leaf, ei,
8808 BTRFS_EXTENT_FLAG_DATA);
8810 struct btrfs_disk_key copy_key;;
8812 bi = (struct btrfs_tree_block_info *)(ei + 1);
8813 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8816 btrfs_set_disk_key_objectid(©_key,
8817 rec->info_objectid);
8818 btrfs_set_disk_key_type(©_key, 0);
8819 btrfs_set_disk_key_offset(©_key, 0);
8821 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8822 btrfs_set_tree_block_key(leaf, bi, ©_key);
8824 btrfs_set_extent_flags(leaf, ei,
8825 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8828 btrfs_mark_buffer_dirty(leaf);
8829 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8830 rec->max_size, 1, 0);
8833 btrfs_release_path(path);
8836 if (back->is_data) {
8840 dback = to_data_backref(back);
8841 if (back->full_backref)
8842 parent = dback->parent;
8846 for (i = 0; i < dback->found_ref; i++) {
8847 /* if parent != 0, we're doing a full backref
8848 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8849 * just makes the backref allocator create a data
8852 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8853 rec->start, rec->max_size,
8857 BTRFS_FIRST_FREE_OBJECTID :
8863 fprintf(stderr, "adding new data backref"
8864 " on %llu %s %llu owner %llu"
8865 " offset %llu found %d\n",
8866 (unsigned long long)rec->start,
8867 back->full_backref ?
8869 back->full_backref ?
8870 (unsigned long long)parent :
8871 (unsigned long long)dback->root,
8872 (unsigned long long)dback->owner,
8873 (unsigned long long)dback->offset,
8877 struct tree_backref *tback;
8879 tback = to_tree_backref(back);
8880 if (back->full_backref)
8881 parent = tback->parent;
8885 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8886 rec->start, rec->max_size,
8887 parent, tback->root, 0, 0);
8888 fprintf(stderr, "adding new tree backref on "
8889 "start %llu len %llu parent %llu root %llu\n",
8890 rec->start, rec->max_size, parent, tback->root);
8893 btrfs_release_path(path);
8897 static struct extent_entry *find_entry(struct list_head *entries,
8898 u64 bytenr, u64 bytes)
8900 struct extent_entry *entry = NULL;
8902 list_for_each_entry(entry, entries, list) {
8903 if (entry->bytenr == bytenr && entry->bytes == bytes)
8910 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8912 struct extent_entry *entry, *best = NULL, *prev = NULL;
8914 list_for_each_entry(entry, entries, list) {
8916 * If there are as many broken entries as entries then we know
8917 * not to trust this particular entry.
8919 if (entry->broken == entry->count)
8923 * Special case, when there are only two entries and 'best' is
8933 * If our current entry == best then we can't be sure our best
8934 * is really the best, so we need to keep searching.
8936 if (best && best->count == entry->count) {
8942 /* Prev == entry, not good enough, have to keep searching */
8943 if (!prev->broken && prev->count == entry->count)
8947 best = (prev->count > entry->count) ? prev : entry;
8948 else if (best->count < entry->count)
8956 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8957 struct data_backref *dback, struct extent_entry *entry)
8959 struct btrfs_trans_handle *trans;
8960 struct btrfs_root *root;
8961 struct btrfs_file_extent_item *fi;
8962 struct extent_buffer *leaf;
8963 struct btrfs_key key;
8967 key.objectid = dback->root;
8968 key.type = BTRFS_ROOT_ITEM_KEY;
8969 key.offset = (u64)-1;
8970 root = btrfs_read_fs_root(info, &key);
8972 fprintf(stderr, "Couldn't find root for our ref\n");
8977 * The backref points to the original offset of the extent if it was
8978 * split, so we need to search down to the offset we have and then walk
8979 * forward until we find the backref we're looking for.
8981 key.objectid = dback->owner;
8982 key.type = BTRFS_EXTENT_DATA_KEY;
8983 key.offset = dback->offset;
8984 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8986 fprintf(stderr, "Error looking up ref %d\n", ret);
8991 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8992 ret = btrfs_next_leaf(root, path);
8994 fprintf(stderr, "Couldn't find our ref, next\n");
8998 leaf = path->nodes[0];
8999 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9000 if (key.objectid != dback->owner ||
9001 key.type != BTRFS_EXTENT_DATA_KEY) {
9002 fprintf(stderr, "Couldn't find our ref, search\n");
9005 fi = btrfs_item_ptr(leaf, path->slots[0],
9006 struct btrfs_file_extent_item);
9007 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9008 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9010 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9015 btrfs_release_path(path);
9017 trans = btrfs_start_transaction(root, 1);
9019 return PTR_ERR(trans);
9022 * Ok we have the key of the file extent we want to fix, now we can cow
9023 * down to the thing and fix it.
9025 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9027 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9028 key.objectid, key.type, key.offset, ret);
9032 fprintf(stderr, "Well that's odd, we just found this key "
9033 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9038 leaf = path->nodes[0];
9039 fi = btrfs_item_ptr(leaf, path->slots[0],
9040 struct btrfs_file_extent_item);
9042 if (btrfs_file_extent_compression(leaf, fi) &&
9043 dback->disk_bytenr != entry->bytenr) {
9044 fprintf(stderr, "Ref doesn't match the record start and is "
9045 "compressed, please take a btrfs-image of this file "
9046 "system and send it to a btrfs developer so they can "
9047 "complete this functionality for bytenr %Lu\n",
9048 dback->disk_bytenr);
9053 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9054 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9055 } else if (dback->disk_bytenr > entry->bytenr) {
9056 u64 off_diff, offset;
9058 off_diff = dback->disk_bytenr - entry->bytenr;
9059 offset = btrfs_file_extent_offset(leaf, fi);
9060 if (dback->disk_bytenr + offset +
9061 btrfs_file_extent_num_bytes(leaf, fi) >
9062 entry->bytenr + entry->bytes) {
9063 fprintf(stderr, "Ref is past the entry end, please "
9064 "take a btrfs-image of this file system and "
9065 "send it to a btrfs developer, ref %Lu\n",
9066 dback->disk_bytenr);
9071 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9072 btrfs_set_file_extent_offset(leaf, fi, offset);
9073 } else if (dback->disk_bytenr < entry->bytenr) {
9076 offset = btrfs_file_extent_offset(leaf, fi);
9077 if (dback->disk_bytenr + offset < entry->bytenr) {
9078 fprintf(stderr, "Ref is before the entry start, please"
9079 " take a btrfs-image of this file system and "
9080 "send it to a btrfs developer, ref %Lu\n",
9081 dback->disk_bytenr);
9086 offset += dback->disk_bytenr;
9087 offset -= entry->bytenr;
9088 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9089 btrfs_set_file_extent_offset(leaf, fi, offset);
9092 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9095 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9096 * only do this if we aren't using compression, otherwise it's a
9099 if (!btrfs_file_extent_compression(leaf, fi))
9100 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9102 printf("ram bytes may be wrong?\n");
9103 btrfs_mark_buffer_dirty(leaf);
9105 err = btrfs_commit_transaction(trans, root);
9106 btrfs_release_path(path);
9107 return ret ? ret : err;
9110 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9111 struct extent_record *rec)
9113 struct extent_backref *back, *tmp;
9114 struct data_backref *dback;
9115 struct extent_entry *entry, *best = NULL;
9118 int broken_entries = 0;
9123 * Metadata is easy and the backrefs should always agree on bytenr and
9124 * size, if not we've got bigger issues.
9129 rbtree_postorder_for_each_entry_safe(back, tmp,
9130 &rec->backref_tree, node) {
9131 if (back->full_backref || !back->is_data)
9134 dback = to_data_backref(back);
9137 * We only pay attention to backrefs that we found a real
9140 if (dback->found_ref == 0)
9144 * For now we only catch when the bytes don't match, not the
9145 * bytenr. We can easily do this at the same time, but I want
9146 * to have a fs image to test on before we just add repair
9147 * functionality willy-nilly so we know we won't screw up the
9151 entry = find_entry(&entries, dback->disk_bytenr,
9154 entry = malloc(sizeof(struct extent_entry));
9159 memset(entry, 0, sizeof(*entry));
9160 entry->bytenr = dback->disk_bytenr;
9161 entry->bytes = dback->bytes;
9162 list_add_tail(&entry->list, &entries);
9167 * If we only have on entry we may think the entries agree when
9168 * in reality they don't so we have to do some extra checking.
9170 if (dback->disk_bytenr != rec->start ||
9171 dback->bytes != rec->nr || back->broken)
9182 /* Yay all the backrefs agree, carry on good sir */
9183 if (nr_entries <= 1 && !mismatch)
9186 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9187 "%Lu\n", rec->start);
9190 * First we want to see if the backrefs can agree amongst themselves who
9191 * is right, so figure out which one of the entries has the highest
9194 best = find_most_right_entry(&entries);
9197 * Ok so we may have an even split between what the backrefs think, so
9198 * this is where we use the extent ref to see what it thinks.
9201 entry = find_entry(&entries, rec->start, rec->nr);
9202 if (!entry && (!broken_entries || !rec->found_rec)) {
9203 fprintf(stderr, "Backrefs don't agree with each other "
9204 "and extent record doesn't agree with anybody,"
9205 " so we can't fix bytenr %Lu bytes %Lu\n",
9206 rec->start, rec->nr);
9209 } else if (!entry) {
9211 * Ok our backrefs were broken, we'll assume this is the
9212 * correct value and add an entry for this range.
9214 entry = malloc(sizeof(struct extent_entry));
9219 memset(entry, 0, sizeof(*entry));
9220 entry->bytenr = rec->start;
9221 entry->bytes = rec->nr;
9222 list_add_tail(&entry->list, &entries);
9226 best = find_most_right_entry(&entries);
9228 fprintf(stderr, "Backrefs and extent record evenly "
9229 "split on who is right, this is going to "
9230 "require user input to fix bytenr %Lu bytes "
9231 "%Lu\n", rec->start, rec->nr);
9238 * I don't think this can happen currently as we'll abort() if we catch
9239 * this case higher up, but in case somebody removes that we still can't
9240 * deal with it properly here yet, so just bail out of that's the case.
9242 if (best->bytenr != rec->start) {
9243 fprintf(stderr, "Extent start and backref starts don't match, "
9244 "please use btrfs-image on this file system and send "
9245 "it to a btrfs developer so they can make fsck fix "
9246 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9247 rec->start, rec->nr);
9253 * Ok great we all agreed on an extent record, let's go find the real
9254 * references and fix up the ones that don't match.
9256 rbtree_postorder_for_each_entry_safe(back, tmp,
9257 &rec->backref_tree, node) {
9258 if (back->full_backref || !back->is_data)
9261 dback = to_data_backref(back);
9264 * Still ignoring backrefs that don't have a real ref attached
9267 if (dback->found_ref == 0)
9270 if (dback->bytes == best->bytes &&
9271 dback->disk_bytenr == best->bytenr)
9274 ret = repair_ref(info, path, dback, best);
9280 * Ok we messed with the actual refs, which means we need to drop our
9281 * entire cache and go back and rescan. I know this is a huge pain and
9282 * adds a lot of extra work, but it's the only way to be safe. Once all
9283 * the backrefs agree we may not need to do anything to the extent
9288 while (!list_empty(&entries)) {
9289 entry = list_entry(entries.next, struct extent_entry, list);
9290 list_del_init(&entry->list);
9296 static int process_duplicates(struct cache_tree *extent_cache,
9297 struct extent_record *rec)
9299 struct extent_record *good, *tmp;
9300 struct cache_extent *cache;
9304 * If we found a extent record for this extent then return, or if we
9305 * have more than one duplicate we are likely going to need to delete
9308 if (rec->found_rec || rec->num_duplicates > 1)
9311 /* Shouldn't happen but just in case */
9312 BUG_ON(!rec->num_duplicates);
9315 * So this happens if we end up with a backref that doesn't match the
9316 * actual extent entry. So either the backref is bad or the extent
9317 * entry is bad. Either way we want to have the extent_record actually
9318 * reflect what we found in the extent_tree, so we need to take the
9319 * duplicate out and use that as the extent_record since the only way we
9320 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9322 remove_cache_extent(extent_cache, &rec->cache);
9324 good = to_extent_record(rec->dups.next);
9325 list_del_init(&good->list);
9326 INIT_LIST_HEAD(&good->backrefs);
9327 INIT_LIST_HEAD(&good->dups);
9328 good->cache.start = good->start;
9329 good->cache.size = good->nr;
9330 good->content_checked = 0;
9331 good->owner_ref_checked = 0;
9332 good->num_duplicates = 0;
9333 good->refs = rec->refs;
9334 list_splice_init(&rec->backrefs, &good->backrefs);
9336 cache = lookup_cache_extent(extent_cache, good->start,
9340 tmp = container_of(cache, struct extent_record, cache);
9343 * If we find another overlapping extent and it's found_rec is
9344 * set then it's a duplicate and we need to try and delete
9347 if (tmp->found_rec || tmp->num_duplicates > 0) {
9348 if (list_empty(&good->list))
9349 list_add_tail(&good->list,
9350 &duplicate_extents);
9351 good->num_duplicates += tmp->num_duplicates + 1;
9352 list_splice_init(&tmp->dups, &good->dups);
9353 list_del_init(&tmp->list);
9354 list_add_tail(&tmp->list, &good->dups);
9355 remove_cache_extent(extent_cache, &tmp->cache);
9360 * Ok we have another non extent item backed extent rec, so lets
9361 * just add it to this extent and carry on like we did above.
9363 good->refs += tmp->refs;
9364 list_splice_init(&tmp->backrefs, &good->backrefs);
9365 remove_cache_extent(extent_cache, &tmp->cache);
9368 ret = insert_cache_extent(extent_cache, &good->cache);
9371 return good->num_duplicates ? 0 : 1;
9374 static int delete_duplicate_records(struct btrfs_root *root,
9375 struct extent_record *rec)
9377 struct btrfs_trans_handle *trans;
9378 LIST_HEAD(delete_list);
9379 struct btrfs_path path;
9380 struct extent_record *tmp, *good, *n;
9383 struct btrfs_key key;
9385 btrfs_init_path(&path);
9388 /* Find the record that covers all of the duplicates. */
9389 list_for_each_entry(tmp, &rec->dups, list) {
9390 if (good->start < tmp->start)
9392 if (good->nr > tmp->nr)
9395 if (tmp->start + tmp->nr < good->start + good->nr) {
9396 fprintf(stderr, "Ok we have overlapping extents that "
9397 "aren't completely covered by each other, this "
9398 "is going to require more careful thought. "
9399 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9400 tmp->start, tmp->nr, good->start, good->nr);
9407 list_add_tail(&rec->list, &delete_list);
9409 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9412 list_move_tail(&tmp->list, &delete_list);
9415 root = root->fs_info->extent_root;
9416 trans = btrfs_start_transaction(root, 1);
9417 if (IS_ERR(trans)) {
9418 ret = PTR_ERR(trans);
9422 list_for_each_entry(tmp, &delete_list, list) {
9423 if (tmp->found_rec == 0)
9425 key.objectid = tmp->start;
9426 key.type = BTRFS_EXTENT_ITEM_KEY;
9427 key.offset = tmp->nr;
9429 /* Shouldn't happen but just in case */
9430 if (tmp->metadata) {
9431 fprintf(stderr, "Well this shouldn't happen, extent "
9432 "record overlaps but is metadata? "
9433 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9437 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9443 ret = btrfs_del_item(trans, root, &path);
9446 btrfs_release_path(&path);
9449 err = btrfs_commit_transaction(trans, root);
9453 while (!list_empty(&delete_list)) {
9454 tmp = to_extent_record(delete_list.next);
9455 list_del_init(&tmp->list);
9461 while (!list_empty(&rec->dups)) {
9462 tmp = to_extent_record(rec->dups.next);
9463 list_del_init(&tmp->list);
9467 btrfs_release_path(&path);
9469 if (!ret && !nr_del)
9470 rec->num_duplicates = 0;
9472 return ret ? ret : nr_del;
9475 static int find_possible_backrefs(struct btrfs_fs_info *info,
9476 struct btrfs_path *path,
9477 struct cache_tree *extent_cache,
9478 struct extent_record *rec)
9480 struct btrfs_root *root;
9481 struct extent_backref *back, *tmp;
9482 struct data_backref *dback;
9483 struct cache_extent *cache;
9484 struct btrfs_file_extent_item *fi;
9485 struct btrfs_key key;
9489 rbtree_postorder_for_each_entry_safe(back, tmp,
9490 &rec->backref_tree, node) {
9491 /* Don't care about full backrefs (poor unloved backrefs) */
9492 if (back->full_backref || !back->is_data)
9495 dback = to_data_backref(back);
9497 /* We found this one, we don't need to do a lookup */
9498 if (dback->found_ref)
9501 key.objectid = dback->root;
9502 key.type = BTRFS_ROOT_ITEM_KEY;
9503 key.offset = (u64)-1;
9505 root = btrfs_read_fs_root(info, &key);
9507 /* No root, definitely a bad ref, skip */
9508 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9510 /* Other err, exit */
9512 return PTR_ERR(root);
9514 key.objectid = dback->owner;
9515 key.type = BTRFS_EXTENT_DATA_KEY;
9516 key.offset = dback->offset;
9517 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9519 btrfs_release_path(path);
9522 /* Didn't find it, we can carry on */
9527 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9528 struct btrfs_file_extent_item);
9529 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9530 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9531 btrfs_release_path(path);
9532 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9534 struct extent_record *tmp;
9535 tmp = container_of(cache, struct extent_record, cache);
9538 * If we found an extent record for the bytenr for this
9539 * particular backref then we can't add it to our
9540 * current extent record. We only want to add backrefs
9541 * that don't have a corresponding extent item in the
9542 * extent tree since they likely belong to this record
9543 * and we need to fix it if it doesn't match bytenrs.
9549 dback->found_ref += 1;
9550 dback->disk_bytenr = bytenr;
9551 dback->bytes = bytes;
9554 * Set this so the verify backref code knows not to trust the
9555 * values in this backref.
9564 * Record orphan data ref into corresponding root.
9566 * Return 0 if the extent item contains data ref and recorded.
9567 * Return 1 if the extent item contains no useful data ref
9568 * On that case, it may contains only shared_dataref or metadata backref
9569 * or the file extent exists(this should be handled by the extent bytenr
9571 * Return <0 if something goes wrong.
9573 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9574 struct extent_record *rec)
9576 struct btrfs_key key;
9577 struct btrfs_root *dest_root;
9578 struct extent_backref *back, *tmp;
9579 struct data_backref *dback;
9580 struct orphan_data_extent *orphan;
9581 struct btrfs_path path;
9582 int recorded_data_ref = 0;
9587 btrfs_init_path(&path);
9588 rbtree_postorder_for_each_entry_safe(back, tmp,
9589 &rec->backref_tree, node) {
9590 if (back->full_backref || !back->is_data ||
9591 !back->found_extent_tree)
9593 dback = to_data_backref(back);
9594 if (dback->found_ref)
9596 key.objectid = dback->root;
9597 key.type = BTRFS_ROOT_ITEM_KEY;
9598 key.offset = (u64)-1;
9600 dest_root = btrfs_read_fs_root(fs_info, &key);
9602 /* For non-exist root we just skip it */
9603 if (IS_ERR(dest_root) || !dest_root)
9606 key.objectid = dback->owner;
9607 key.type = BTRFS_EXTENT_DATA_KEY;
9608 key.offset = dback->offset;
9610 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9611 btrfs_release_path(&path);
9613 * For ret < 0, it's OK since the fs-tree may be corrupted,
9614 * we need to record it for inode/file extent rebuild.
9615 * For ret > 0, we record it only for file extent rebuild.
9616 * For ret == 0, the file extent exists but only bytenr
9617 * mismatch, let the original bytenr fix routine to handle,
9623 orphan = malloc(sizeof(*orphan));
9628 INIT_LIST_HEAD(&orphan->list);
9629 orphan->root = dback->root;
9630 orphan->objectid = dback->owner;
9631 orphan->offset = dback->offset;
9632 orphan->disk_bytenr = rec->cache.start;
9633 orphan->disk_len = rec->cache.size;
9634 list_add(&dest_root->orphan_data_extents, &orphan->list);
9635 recorded_data_ref = 1;
9638 btrfs_release_path(&path);
9640 return !recorded_data_ref;
9646 * when an incorrect extent item is found, this will delete
9647 * all of the existing entries for it and recreate them
9648 * based on what the tree scan found.
9650 static int fixup_extent_refs(struct btrfs_fs_info *info,
9651 struct cache_tree *extent_cache,
9652 struct extent_record *rec)
9654 struct btrfs_trans_handle *trans = NULL;
9656 struct btrfs_path path;
9657 struct cache_extent *cache;
9658 struct extent_backref *back, *tmp;
9662 if (rec->flag_block_full_backref)
9663 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9665 btrfs_init_path(&path);
9666 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9668 * Sometimes the backrefs themselves are so broken they don't
9669 * get attached to any meaningful rec, so first go back and
9670 * check any of our backrefs that we couldn't find and throw
9671 * them into the list if we find the backref so that
9672 * verify_backrefs can figure out what to do.
9674 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9679 /* step one, make sure all of the backrefs agree */
9680 ret = verify_backrefs(info, &path, rec);
9684 trans = btrfs_start_transaction(info->extent_root, 1);
9685 if (IS_ERR(trans)) {
9686 ret = PTR_ERR(trans);
9690 /* step two, delete all the existing records */
9691 ret = delete_extent_records(trans, info->extent_root, &path,
9697 /* was this block corrupt? If so, don't add references to it */
9698 cache = lookup_cache_extent(info->corrupt_blocks,
9699 rec->start, rec->max_size);
9705 /* step three, recreate all the refs we did find */
9706 rbtree_postorder_for_each_entry_safe(back, tmp,
9707 &rec->backref_tree, node) {
9709 * if we didn't find any references, don't create a
9712 if (!back->found_ref)
9715 rec->bad_full_backref = 0;
9716 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9724 int err = btrfs_commit_transaction(trans, info->extent_root);
9730 fprintf(stderr, "Repaired extent references for %llu\n",
9731 (unsigned long long)rec->start);
9733 btrfs_release_path(&path);
9737 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9738 struct extent_record *rec)
9740 struct btrfs_trans_handle *trans;
9741 struct btrfs_root *root = fs_info->extent_root;
9742 struct btrfs_path path;
9743 struct btrfs_extent_item *ei;
9744 struct btrfs_key key;
9748 key.objectid = rec->start;
9749 if (rec->metadata) {
9750 key.type = BTRFS_METADATA_ITEM_KEY;
9751 key.offset = rec->info_level;
9753 key.type = BTRFS_EXTENT_ITEM_KEY;
9754 key.offset = rec->max_size;
9757 trans = btrfs_start_transaction(root, 0);
9759 return PTR_ERR(trans);
9761 btrfs_init_path(&path);
9762 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9764 btrfs_release_path(&path);
9765 btrfs_commit_transaction(trans, root);
9768 fprintf(stderr, "Didn't find extent for %llu\n",
9769 (unsigned long long)rec->start);
9770 btrfs_release_path(&path);
9771 btrfs_commit_transaction(trans, root);
9775 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9776 struct btrfs_extent_item);
9777 flags = btrfs_extent_flags(path.nodes[0], ei);
9778 if (rec->flag_block_full_backref) {
9779 fprintf(stderr, "setting full backref on %llu\n",
9780 (unsigned long long)key.objectid);
9781 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9783 fprintf(stderr, "clearing full backref on %llu\n",
9784 (unsigned long long)key.objectid);
9785 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9787 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9788 btrfs_mark_buffer_dirty(path.nodes[0]);
9789 btrfs_release_path(&path);
9790 ret = btrfs_commit_transaction(trans, root);
9792 fprintf(stderr, "Repaired extent flags for %llu\n",
9793 (unsigned long long)rec->start);
9798 /* right now we only prune from the extent allocation tree */
9799 static int prune_one_block(struct btrfs_trans_handle *trans,
9800 struct btrfs_fs_info *info,
9801 struct btrfs_corrupt_block *corrupt)
9804 struct btrfs_path path;
9805 struct extent_buffer *eb;
9809 int level = corrupt->level + 1;
9811 btrfs_init_path(&path);
9813 /* we want to stop at the parent to our busted block */
9814 path.lowest_level = level;
9816 ret = btrfs_search_slot(trans, info->extent_root,
9817 &corrupt->key, &path, -1, 1);
9822 eb = path.nodes[level];
9829 * hopefully the search gave us the block we want to prune,
9830 * lets try that first
9832 slot = path.slots[level];
9833 found = btrfs_node_blockptr(eb, slot);
9834 if (found == corrupt->cache.start)
9837 nritems = btrfs_header_nritems(eb);
9839 /* the search failed, lets scan this node and hope we find it */
9840 for (slot = 0; slot < nritems; slot++) {
9841 found = btrfs_node_blockptr(eb, slot);
9842 if (found == corrupt->cache.start)
9846 * we couldn't find the bad block. TODO, search all the nodes for pointers
9849 if (eb == info->extent_root->node) {
9854 btrfs_release_path(&path);
9859 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9860 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9863 btrfs_release_path(&path);
9867 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9869 struct btrfs_trans_handle *trans = NULL;
9870 struct cache_extent *cache;
9871 struct btrfs_corrupt_block *corrupt;
9874 cache = search_cache_extent(info->corrupt_blocks, 0);
9878 trans = btrfs_start_transaction(info->extent_root, 1);
9880 return PTR_ERR(trans);
9882 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9883 prune_one_block(trans, info, corrupt);
9884 remove_cache_extent(info->corrupt_blocks, cache);
9887 return btrfs_commit_transaction(trans, info->extent_root);
9891 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9893 struct btrfs_block_group_cache *cache;
9898 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9899 &start, &end, EXTENT_DIRTY);
9902 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9907 cache = btrfs_lookup_first_block_group(fs_info, start);
9912 start = cache->key.objectid + cache->key.offset;
9916 static int check_extent_refs(struct btrfs_root *root,
9917 struct cache_tree *extent_cache)
9919 struct extent_record *rec;
9920 struct cache_extent *cache;
9926 * if we're doing a repair, we have to make sure
9927 * we don't allocate from the problem extents.
9928 * In the worst case, this will be all the
9931 cache = search_cache_extent(extent_cache, 0);
9933 rec = container_of(cache, struct extent_record, cache);
9934 set_extent_dirty(root->fs_info->excluded_extents,
9936 rec->start + rec->max_size - 1);
9937 cache = next_cache_extent(cache);
9940 /* pin down all the corrupted blocks too */
9941 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9943 set_extent_dirty(root->fs_info->excluded_extents,
9945 cache->start + cache->size - 1);
9946 cache = next_cache_extent(cache);
9948 prune_corrupt_blocks(root->fs_info);
9949 reset_cached_block_groups(root->fs_info);
9952 reset_cached_block_groups(root->fs_info);
9955 * We need to delete any duplicate entries we find first otherwise we
9956 * could mess up the extent tree when we have backrefs that actually
9957 * belong to a different extent item and not the weird duplicate one.
9959 while (repair && !list_empty(&duplicate_extents)) {
9960 rec = to_extent_record(duplicate_extents.next);
9961 list_del_init(&rec->list);
9963 /* Sometimes we can find a backref before we find an actual
9964 * extent, so we need to process it a little bit to see if there
9965 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9966 * if this is a backref screwup. If we need to delete stuff
9967 * process_duplicates() will return 0, otherwise it will return
9970 if (process_duplicates(extent_cache, rec))
9972 ret = delete_duplicate_records(root, rec);
9976 * delete_duplicate_records will return the number of entries
9977 * deleted, so if it's greater than 0 then we know we actually
9978 * did something and we need to remove.
9991 cache = search_cache_extent(extent_cache, 0);
9994 rec = container_of(cache, struct extent_record, cache);
9995 if (rec->num_duplicates) {
9996 fprintf(stderr, "extent item %llu has multiple extent "
9997 "items\n", (unsigned long long)rec->start);
10001 if (rec->refs != rec->extent_item_refs) {
10002 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10003 (unsigned long long)rec->start,
10004 (unsigned long long)rec->nr);
10005 fprintf(stderr, "extent item %llu, found %llu\n",
10006 (unsigned long long)rec->extent_item_refs,
10007 (unsigned long long)rec->refs);
10008 ret = record_orphan_data_extents(root->fs_info, rec);
10014 if (all_backpointers_checked(rec, 1)) {
10015 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10016 (unsigned long long)rec->start,
10017 (unsigned long long)rec->nr);
10021 if (!rec->owner_ref_checked) {
10022 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10023 (unsigned long long)rec->start,
10024 (unsigned long long)rec->nr);
10029 if (repair && fix) {
10030 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10036 if (rec->bad_full_backref) {
10037 fprintf(stderr, "bad full backref, on [%llu]\n",
10038 (unsigned long long)rec->start);
10040 ret = fixup_extent_flags(root->fs_info, rec);
10048 * Although it's not a extent ref's problem, we reuse this
10049 * routine for error reporting.
10050 * No repair function yet.
10052 if (rec->crossing_stripes) {
10054 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10055 rec->start, rec->start + rec->max_size);
10059 if (rec->wrong_chunk_type) {
10061 "bad extent [%llu, %llu), type mismatch with chunk\n",
10062 rec->start, rec->start + rec->max_size);
10066 remove_cache_extent(extent_cache, cache);
10067 free_all_extent_backrefs(rec);
10068 if (!init_extent_tree && repair && (!cur_err || fix))
10069 clear_extent_dirty(root->fs_info->excluded_extents,
10071 rec->start + rec->max_size - 1);
10076 if (ret && ret != -EAGAIN) {
10077 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10080 struct btrfs_trans_handle *trans;
10082 root = root->fs_info->extent_root;
10083 trans = btrfs_start_transaction(root, 1);
10084 if (IS_ERR(trans)) {
10085 ret = PTR_ERR(trans);
10089 ret = btrfs_fix_block_accounting(trans, root);
10092 ret = btrfs_commit_transaction(trans, root);
10101 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10105 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10106 stripe_size = length;
10107 stripe_size /= num_stripes;
10108 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10109 stripe_size = length * 2;
10110 stripe_size /= num_stripes;
10111 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10112 stripe_size = length;
10113 stripe_size /= (num_stripes - 1);
10114 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10115 stripe_size = length;
10116 stripe_size /= (num_stripes - 2);
10118 stripe_size = length;
10120 return stripe_size;
10124 * Check the chunk with its block group/dev list ref:
10125 * Return 0 if all refs seems valid.
10126 * Return 1 if part of refs seems valid, need later check for rebuild ref
10127 * like missing block group and needs to search extent tree to rebuild them.
10128 * Return -1 if essential refs are missing and unable to rebuild.
10130 static int check_chunk_refs(struct chunk_record *chunk_rec,
10131 struct block_group_tree *block_group_cache,
10132 struct device_extent_tree *dev_extent_cache,
10135 struct cache_extent *block_group_item;
10136 struct block_group_record *block_group_rec;
10137 struct cache_extent *dev_extent_item;
10138 struct device_extent_record *dev_extent_rec;
10142 int metadump_v2 = 0;
10146 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10148 chunk_rec->length);
10149 if (block_group_item) {
10150 block_group_rec = container_of(block_group_item,
10151 struct block_group_record,
10153 if (chunk_rec->length != block_group_rec->offset ||
10154 chunk_rec->offset != block_group_rec->objectid ||
10156 chunk_rec->type_flags != block_group_rec->flags)) {
10159 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10160 chunk_rec->objectid,
10165 chunk_rec->type_flags,
10166 block_group_rec->objectid,
10167 block_group_rec->type,
10168 block_group_rec->offset,
10169 block_group_rec->offset,
10170 block_group_rec->objectid,
10171 block_group_rec->flags);
10174 list_del_init(&block_group_rec->list);
10175 chunk_rec->bg_rec = block_group_rec;
10180 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10181 chunk_rec->objectid,
10186 chunk_rec->type_flags);
10193 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10194 chunk_rec->num_stripes);
10195 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10196 devid = chunk_rec->stripes[i].devid;
10197 offset = chunk_rec->stripes[i].offset;
10198 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10199 devid, offset, length);
10200 if (dev_extent_item) {
10201 dev_extent_rec = container_of(dev_extent_item,
10202 struct device_extent_record,
10204 if (dev_extent_rec->objectid != devid ||
10205 dev_extent_rec->offset != offset ||
10206 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10207 dev_extent_rec->length != length) {
10210 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10211 chunk_rec->objectid,
10214 chunk_rec->stripes[i].devid,
10215 chunk_rec->stripes[i].offset,
10216 dev_extent_rec->objectid,
10217 dev_extent_rec->offset,
10218 dev_extent_rec->length);
10221 list_move(&dev_extent_rec->chunk_list,
10222 &chunk_rec->dextents);
10227 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10228 chunk_rec->objectid,
10231 chunk_rec->stripes[i].devid,
10232 chunk_rec->stripes[i].offset);
10239 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10240 int check_chunks(struct cache_tree *chunk_cache,
10241 struct block_group_tree *block_group_cache,
10242 struct device_extent_tree *dev_extent_cache,
10243 struct list_head *good, struct list_head *bad,
10244 struct list_head *rebuild, int silent)
10246 struct cache_extent *chunk_item;
10247 struct chunk_record *chunk_rec;
10248 struct block_group_record *bg_rec;
10249 struct device_extent_record *dext_rec;
10253 chunk_item = first_cache_extent(chunk_cache);
10254 while (chunk_item) {
10255 chunk_rec = container_of(chunk_item, struct chunk_record,
10257 err = check_chunk_refs(chunk_rec, block_group_cache,
10258 dev_extent_cache, silent);
10261 if (err == 0 && good)
10262 list_add_tail(&chunk_rec->list, good);
10263 if (err > 0 && rebuild)
10264 list_add_tail(&chunk_rec->list, rebuild);
10265 if (err < 0 && bad)
10266 list_add_tail(&chunk_rec->list, bad);
10267 chunk_item = next_cache_extent(chunk_item);
10270 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10273 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10281 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10285 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10286 dext_rec->objectid,
10296 static int check_device_used(struct device_record *dev_rec,
10297 struct device_extent_tree *dext_cache)
10299 struct cache_extent *cache;
10300 struct device_extent_record *dev_extent_rec;
10301 u64 total_byte = 0;
10303 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10305 dev_extent_rec = container_of(cache,
10306 struct device_extent_record,
10308 if (dev_extent_rec->objectid != dev_rec->devid)
10311 list_del_init(&dev_extent_rec->device_list);
10312 total_byte += dev_extent_rec->length;
10313 cache = next_cache_extent(cache);
10316 if (total_byte != dev_rec->byte_used) {
10318 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10319 total_byte, dev_rec->byte_used, dev_rec->objectid,
10320 dev_rec->type, dev_rec->offset);
10327 /* check btrfs_dev_item -> btrfs_dev_extent */
10328 static int check_devices(struct rb_root *dev_cache,
10329 struct device_extent_tree *dev_extent_cache)
10331 struct rb_node *dev_node;
10332 struct device_record *dev_rec;
10333 struct device_extent_record *dext_rec;
10337 dev_node = rb_first(dev_cache);
10339 dev_rec = container_of(dev_node, struct device_record, node);
10340 err = check_device_used(dev_rec, dev_extent_cache);
10344 dev_node = rb_next(dev_node);
10346 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10349 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10350 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10357 static int add_root_item_to_list(struct list_head *head,
10358 u64 objectid, u64 bytenr, u64 last_snapshot,
10359 u8 level, u8 drop_level,
10360 struct btrfs_key *drop_key)
10363 struct root_item_record *ri_rec;
10364 ri_rec = malloc(sizeof(*ri_rec));
10367 ri_rec->bytenr = bytenr;
10368 ri_rec->objectid = objectid;
10369 ri_rec->level = level;
10370 ri_rec->drop_level = drop_level;
10371 ri_rec->last_snapshot = last_snapshot;
10373 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10374 list_add_tail(&ri_rec->list, head);
10379 static void free_root_item_list(struct list_head *list)
10381 struct root_item_record *ri_rec;
10383 while (!list_empty(list)) {
10384 ri_rec = list_first_entry(list, struct root_item_record,
10386 list_del_init(&ri_rec->list);
10391 static int deal_root_from_list(struct list_head *list,
10392 struct btrfs_root *root,
10393 struct block_info *bits,
10395 struct cache_tree *pending,
10396 struct cache_tree *seen,
10397 struct cache_tree *reada,
10398 struct cache_tree *nodes,
10399 struct cache_tree *extent_cache,
10400 struct cache_tree *chunk_cache,
10401 struct rb_root *dev_cache,
10402 struct block_group_tree *block_group_cache,
10403 struct device_extent_tree *dev_extent_cache)
10408 while (!list_empty(list)) {
10409 struct root_item_record *rec;
10410 struct extent_buffer *buf;
10411 rec = list_entry(list->next,
10412 struct root_item_record, list);
10414 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10415 if (!extent_buffer_uptodate(buf)) {
10416 free_extent_buffer(buf);
10420 ret = add_root_to_pending(buf, extent_cache, pending,
10421 seen, nodes, rec->objectid);
10425 * To rebuild extent tree, we need deal with snapshot
10426 * one by one, otherwise we deal with node firstly which
10427 * can maximize readahead.
10430 ret = run_next_block(root, bits, bits_nr, &last,
10431 pending, seen, reada, nodes,
10432 extent_cache, chunk_cache,
10433 dev_cache, block_group_cache,
10434 dev_extent_cache, rec);
10438 free_extent_buffer(buf);
10439 list_del(&rec->list);
10445 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10446 reada, nodes, extent_cache, chunk_cache,
10447 dev_cache, block_group_cache,
10448 dev_extent_cache, NULL);
10458 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10460 struct rb_root dev_cache;
10461 struct cache_tree chunk_cache;
10462 struct block_group_tree block_group_cache;
10463 struct device_extent_tree dev_extent_cache;
10464 struct cache_tree extent_cache;
10465 struct cache_tree seen;
10466 struct cache_tree pending;
10467 struct cache_tree reada;
10468 struct cache_tree nodes;
10469 struct extent_io_tree excluded_extents;
10470 struct cache_tree corrupt_blocks;
10471 struct btrfs_path path;
10472 struct btrfs_key key;
10473 struct btrfs_key found_key;
10475 struct block_info *bits;
10477 struct extent_buffer *leaf;
10479 struct btrfs_root_item ri;
10480 struct list_head dropping_trees;
10481 struct list_head normal_trees;
10482 struct btrfs_root *root1;
10483 struct btrfs_root *root;
10487 root = fs_info->fs_root;
10488 dev_cache = RB_ROOT;
10489 cache_tree_init(&chunk_cache);
10490 block_group_tree_init(&block_group_cache);
10491 device_extent_tree_init(&dev_extent_cache);
10493 cache_tree_init(&extent_cache);
10494 cache_tree_init(&seen);
10495 cache_tree_init(&pending);
10496 cache_tree_init(&nodes);
10497 cache_tree_init(&reada);
10498 cache_tree_init(&corrupt_blocks);
10499 extent_io_tree_init(&excluded_extents);
10500 INIT_LIST_HEAD(&dropping_trees);
10501 INIT_LIST_HEAD(&normal_trees);
10504 fs_info->excluded_extents = &excluded_extents;
10505 fs_info->fsck_extent_cache = &extent_cache;
10506 fs_info->free_extent_hook = free_extent_hook;
10507 fs_info->corrupt_blocks = &corrupt_blocks;
10511 bits = malloc(bits_nr * sizeof(struct block_info));
10517 if (ctx.progress_enabled) {
10518 ctx.tp = TASK_EXTENTS;
10519 task_start(ctx.info);
10523 root1 = fs_info->tree_root;
10524 level = btrfs_header_level(root1->node);
10525 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10526 root1->node->start, 0, level, 0, NULL);
10529 root1 = fs_info->chunk_root;
10530 level = btrfs_header_level(root1->node);
10531 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10532 root1->node->start, 0, level, 0, NULL);
10535 btrfs_init_path(&path);
10538 key.type = BTRFS_ROOT_ITEM_KEY;
10539 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10543 leaf = path.nodes[0];
10544 slot = path.slots[0];
10545 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10546 ret = btrfs_next_leaf(root, &path);
10549 leaf = path.nodes[0];
10550 slot = path.slots[0];
10552 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10553 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10554 unsigned long offset;
10557 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10558 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10559 last_snapshot = btrfs_root_last_snapshot(&ri);
10560 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10561 level = btrfs_root_level(&ri);
10562 ret = add_root_item_to_list(&normal_trees,
10563 found_key.objectid,
10564 btrfs_root_bytenr(&ri),
10565 last_snapshot, level,
10570 level = btrfs_root_level(&ri);
10571 objectid = found_key.objectid;
10572 btrfs_disk_key_to_cpu(&found_key,
10573 &ri.drop_progress);
10574 ret = add_root_item_to_list(&dropping_trees,
10576 btrfs_root_bytenr(&ri),
10577 last_snapshot, level,
10578 ri.drop_level, &found_key);
10585 btrfs_release_path(&path);
10588 * check_block can return -EAGAIN if it fixes something, please keep
10589 * this in mind when dealing with return values from these functions, if
10590 * we get -EAGAIN we want to fall through and restart the loop.
10592 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10593 &seen, &reada, &nodes, &extent_cache,
10594 &chunk_cache, &dev_cache, &block_group_cache,
10595 &dev_extent_cache);
10597 if (ret == -EAGAIN)
10601 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10602 &pending, &seen, &reada, &nodes,
10603 &extent_cache, &chunk_cache, &dev_cache,
10604 &block_group_cache, &dev_extent_cache);
10606 if (ret == -EAGAIN)
10611 ret = check_chunks(&chunk_cache, &block_group_cache,
10612 &dev_extent_cache, NULL, NULL, NULL, 0);
10614 if (ret == -EAGAIN)
10619 ret = check_extent_refs(root, &extent_cache);
10621 if (ret == -EAGAIN)
10626 ret = check_devices(&dev_cache, &dev_extent_cache);
10631 task_stop(ctx.info);
10633 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10634 extent_io_tree_cleanup(&excluded_extents);
10635 fs_info->fsck_extent_cache = NULL;
10636 fs_info->free_extent_hook = NULL;
10637 fs_info->corrupt_blocks = NULL;
10638 fs_info->excluded_extents = NULL;
10641 free_chunk_cache_tree(&chunk_cache);
10642 free_device_cache_tree(&dev_cache);
10643 free_block_group_tree(&block_group_cache);
10644 free_device_extent_tree(&dev_extent_cache);
10645 free_extent_cache_tree(&seen);
10646 free_extent_cache_tree(&pending);
10647 free_extent_cache_tree(&reada);
10648 free_extent_cache_tree(&nodes);
10649 free_root_item_list(&normal_trees);
10650 free_root_item_list(&dropping_trees);
10653 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10654 free_extent_cache_tree(&seen);
10655 free_extent_cache_tree(&pending);
10656 free_extent_cache_tree(&reada);
10657 free_extent_cache_tree(&nodes);
10658 free_chunk_cache_tree(&chunk_cache);
10659 free_block_group_tree(&block_group_cache);
10660 free_device_cache_tree(&dev_cache);
10661 free_device_extent_tree(&dev_extent_cache);
10662 free_extent_record_cache(&extent_cache);
10663 free_root_item_list(&normal_trees);
10664 free_root_item_list(&dropping_trees);
10665 extent_io_tree_cleanup(&excluded_extents);
10670 * Check backrefs of a tree block given by @bytenr or @eb.
10672 * @root: the root containing the @bytenr or @eb
10673 * @eb: tree block extent buffer, can be NULL
10674 * @bytenr: bytenr of the tree block to search
10675 * @level: tree level of the tree block
10676 * @owner: owner of the tree block
10678 * Return >0 for any error found and output error message
10679 * Return 0 for no error found
10681 static int check_tree_block_ref(struct btrfs_root *root,
10682 struct extent_buffer *eb, u64 bytenr,
10683 int level, u64 owner)
10685 struct btrfs_key key;
10686 struct btrfs_root *extent_root = root->fs_info->extent_root;
10687 struct btrfs_path path;
10688 struct btrfs_extent_item *ei;
10689 struct btrfs_extent_inline_ref *iref;
10690 struct extent_buffer *leaf;
10696 u32 nodesize = root->fs_info->nodesize;
10699 int tree_reloc_root = 0;
10704 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10705 btrfs_header_bytenr(root->node) == bytenr)
10706 tree_reloc_root = 1;
10708 btrfs_init_path(&path);
10709 key.objectid = bytenr;
10710 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10711 key.type = BTRFS_METADATA_ITEM_KEY;
10713 key.type = BTRFS_EXTENT_ITEM_KEY;
10714 key.offset = (u64)-1;
10716 /* Search for the backref in extent tree */
10717 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10719 err |= BACKREF_MISSING;
10722 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10724 err |= BACKREF_MISSING;
10728 leaf = path.nodes[0];
10729 slot = path.slots[0];
10730 btrfs_item_key_to_cpu(leaf, &key, slot);
10732 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10734 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10735 skinny_level = (int)key.offset;
10736 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10738 struct btrfs_tree_block_info *info;
10740 info = (struct btrfs_tree_block_info *)(ei + 1);
10741 skinny_level = btrfs_tree_block_level(leaf, info);
10742 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10749 if (!(btrfs_extent_flags(leaf, ei) &
10750 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10752 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10753 key.objectid, nodesize,
10754 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10755 err = BACKREF_MISMATCH;
10757 header_gen = btrfs_header_generation(eb);
10758 extent_gen = btrfs_extent_generation(leaf, ei);
10759 if (header_gen != extent_gen) {
10761 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10762 key.objectid, nodesize, header_gen,
10764 err = BACKREF_MISMATCH;
10766 if (level != skinny_level) {
10768 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10769 key.objectid, nodesize, level, skinny_level);
10770 err = BACKREF_MISMATCH;
10772 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10774 "extent[%llu %u] is referred by other roots than %llu",
10775 key.objectid, nodesize, root->objectid);
10776 err = BACKREF_MISMATCH;
10781 * Iterate the extent/metadata item to find the exact backref
10783 item_size = btrfs_item_size_nr(leaf, slot);
10784 ptr = (unsigned long)iref;
10785 end = (unsigned long)ei + item_size;
10786 while (ptr < end) {
10787 iref = (struct btrfs_extent_inline_ref *)ptr;
10788 type = btrfs_extent_inline_ref_type(leaf, iref);
10789 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10791 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10792 (offset == root->objectid || offset == owner)) {
10794 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10796 * Backref of tree reloc root points to itself, no need
10797 * to check backref any more.
10799 if (tree_reloc_root)
10802 /* Check if the backref points to valid referencer */
10803 found_ref = !check_tree_block_ref(root, NULL,
10804 offset, level + 1, owner);
10809 ptr += btrfs_extent_inline_ref_size(type);
10813 * Inlined extent item doesn't have what we need, check
10814 * TREE_BLOCK_REF_KEY
10817 btrfs_release_path(&path);
10818 key.objectid = bytenr;
10819 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10820 key.offset = root->objectid;
10822 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10827 err |= BACKREF_MISSING;
10829 btrfs_release_path(&path);
10830 if (eb && (err & BACKREF_MISSING))
10831 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10832 bytenr, nodesize, owner, level);
10837 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10839 * Return >0 any error found and output error message
10840 * Return 0 for no error found
10842 static int check_extent_data_item(struct btrfs_root *root,
10843 struct extent_buffer *eb, int slot)
10845 struct btrfs_file_extent_item *fi;
10846 struct btrfs_path path;
10847 struct btrfs_root *extent_root = root->fs_info->extent_root;
10848 struct btrfs_key fi_key;
10849 struct btrfs_key dbref_key;
10850 struct extent_buffer *leaf;
10851 struct btrfs_extent_item *ei;
10852 struct btrfs_extent_inline_ref *iref;
10853 struct btrfs_extent_data_ref *dref;
10856 u64 disk_num_bytes;
10857 u64 extent_num_bytes;
10864 int found_dbackref = 0;
10868 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10869 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10871 /* Nothing to check for hole and inline data extents */
10872 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10873 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10876 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10877 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10878 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10880 /* Check unaligned disk_num_bytes and num_bytes */
10881 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10883 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10884 fi_key.objectid, fi_key.offset, disk_num_bytes,
10885 root->fs_info->sectorsize);
10886 err |= BYTES_UNALIGNED;
10888 data_bytes_allocated += disk_num_bytes;
10890 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10892 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10893 fi_key.objectid, fi_key.offset, extent_num_bytes,
10894 root->fs_info->sectorsize);
10895 err |= BYTES_UNALIGNED;
10897 data_bytes_referenced += extent_num_bytes;
10899 owner = btrfs_header_owner(eb);
10901 /* Check the extent item of the file extent in extent tree */
10902 btrfs_init_path(&path);
10903 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10904 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10905 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10907 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10911 leaf = path.nodes[0];
10912 slot = path.slots[0];
10913 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10915 extent_flags = btrfs_extent_flags(leaf, ei);
10917 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10919 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10920 disk_bytenr, disk_num_bytes,
10921 BTRFS_EXTENT_FLAG_DATA);
10922 err |= BACKREF_MISMATCH;
10925 /* Check data backref inside that extent item */
10926 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10927 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10928 ptr = (unsigned long)iref;
10929 end = (unsigned long)ei + item_size;
10930 while (ptr < end) {
10931 iref = (struct btrfs_extent_inline_ref *)ptr;
10932 type = btrfs_extent_inline_ref_type(leaf, iref);
10933 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10935 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10936 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10937 if (ref_root == owner || ref_root == root->objectid)
10938 found_dbackref = 1;
10939 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10940 found_dbackref = !check_tree_block_ref(root, NULL,
10941 btrfs_extent_inline_ref_offset(leaf, iref),
10945 if (found_dbackref)
10947 ptr += btrfs_extent_inline_ref_size(type);
10950 if (!found_dbackref) {
10951 btrfs_release_path(&path);
10953 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10954 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10955 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10956 dbref_key.offset = hash_extent_data_ref(root->objectid,
10957 fi_key.objectid, fi_key.offset);
10959 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10960 &dbref_key, &path, 0, 0);
10962 found_dbackref = 1;
10966 btrfs_release_path(&path);
10969 * Neither inlined nor EXTENT_DATA_REF found, try
10970 * SHARED_DATA_REF as last chance.
10972 dbref_key.objectid = disk_bytenr;
10973 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10974 dbref_key.offset = eb->start;
10976 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10977 &dbref_key, &path, 0, 0);
10979 found_dbackref = 1;
10985 if (!found_dbackref)
10986 err |= BACKREF_MISSING;
10987 btrfs_release_path(&path);
10988 if (err & BACKREF_MISSING) {
10989 error("data extent[%llu %llu] backref lost",
10990 disk_bytenr, disk_num_bytes);
10996 * Get real tree block level for the case like shared block
10997 * Return >= 0 as tree level
10998 * Return <0 for error
11000 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11002 struct extent_buffer *eb;
11003 struct btrfs_path path;
11004 struct btrfs_key key;
11005 struct btrfs_extent_item *ei;
11012 /* Search extent tree for extent generation and level */
11013 key.objectid = bytenr;
11014 key.type = BTRFS_METADATA_ITEM_KEY;
11015 key.offset = (u64)-1;
11017 btrfs_init_path(&path);
11018 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11021 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11029 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11030 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11031 struct btrfs_extent_item);
11032 flags = btrfs_extent_flags(path.nodes[0], ei);
11033 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11038 /* Get transid for later read_tree_block() check */
11039 transid = btrfs_extent_generation(path.nodes[0], ei);
11041 /* Get backref level as one source */
11042 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11043 backref_level = key.offset;
11045 struct btrfs_tree_block_info *info;
11047 info = (struct btrfs_tree_block_info *)(ei + 1);
11048 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11050 btrfs_release_path(&path);
11052 /* Get level from tree block as an alternative source */
11053 eb = read_tree_block(fs_info, bytenr, transid);
11054 if (!extent_buffer_uptodate(eb)) {
11055 free_extent_buffer(eb);
11058 header_level = btrfs_header_level(eb);
11059 free_extent_buffer(eb);
11061 if (header_level != backref_level)
11063 return header_level;
11066 btrfs_release_path(&path);
11071 * Check if a tree block backref is valid (points to a valid tree block)
11072 * if level == -1, level will be resolved
11073 * Return >0 for any error found and print error message
11075 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11076 u64 bytenr, int level)
11078 struct btrfs_root *root;
11079 struct btrfs_key key;
11080 struct btrfs_path path;
11081 struct extent_buffer *eb;
11082 struct extent_buffer *node;
11083 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11087 /* Query level for level == -1 special case */
11089 level = query_tree_block_level(fs_info, bytenr);
11091 err |= REFERENCER_MISSING;
11095 key.objectid = root_id;
11096 key.type = BTRFS_ROOT_ITEM_KEY;
11097 key.offset = (u64)-1;
11099 root = btrfs_read_fs_root(fs_info, &key);
11100 if (IS_ERR(root)) {
11101 err |= REFERENCER_MISSING;
11105 /* Read out the tree block to get item/node key */
11106 eb = read_tree_block(fs_info, bytenr, 0);
11107 if (!extent_buffer_uptodate(eb)) {
11108 err |= REFERENCER_MISSING;
11109 free_extent_buffer(eb);
11113 /* Empty tree, no need to check key */
11114 if (!btrfs_header_nritems(eb) && !level) {
11115 free_extent_buffer(eb);
11120 btrfs_node_key_to_cpu(eb, &key, 0);
11122 btrfs_item_key_to_cpu(eb, &key, 0);
11124 free_extent_buffer(eb);
11126 btrfs_init_path(&path);
11127 path.lowest_level = level;
11128 /* Search with the first key, to ensure we can reach it */
11129 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11131 err |= REFERENCER_MISSING;
11135 node = path.nodes[level];
11136 if (btrfs_header_bytenr(node) != bytenr) {
11138 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11139 bytenr, nodesize, bytenr,
11140 btrfs_header_bytenr(node));
11141 err |= REFERENCER_MISMATCH;
11143 if (btrfs_header_level(node) != level) {
11145 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11146 bytenr, nodesize, level,
11147 btrfs_header_level(node));
11148 err |= REFERENCER_MISMATCH;
11152 btrfs_release_path(&path);
11154 if (err & REFERENCER_MISSING) {
11156 error("extent [%llu %d] lost referencer (owner: %llu)",
11157 bytenr, nodesize, root_id);
11160 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11161 bytenr, nodesize, root_id, level);
11168 * Check if tree block @eb is tree reloc root.
11169 * Return 0 if it's not or any problem happens
11170 * Return 1 if it's a tree reloc root
11172 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11173 struct extent_buffer *eb)
11175 struct btrfs_root *tree_reloc_root;
11176 struct btrfs_key key;
11177 u64 bytenr = btrfs_header_bytenr(eb);
11178 u64 owner = btrfs_header_owner(eb);
11181 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11182 key.offset = owner;
11183 key.type = BTRFS_ROOT_ITEM_KEY;
11185 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11186 if (IS_ERR(tree_reloc_root))
11189 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11191 btrfs_free_fs_root(tree_reloc_root);
11196 * Check referencer for shared block backref
11197 * If level == -1, this function will resolve the level.
11199 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11200 u64 parent, u64 bytenr, int level)
11202 struct extent_buffer *eb;
11204 int found_parent = 0;
11207 eb = read_tree_block(fs_info, parent, 0);
11208 if (!extent_buffer_uptodate(eb))
11212 level = query_tree_block_level(fs_info, bytenr);
11216 /* It's possible it's a tree reloc root */
11217 if (parent == bytenr) {
11218 if (is_tree_reloc_root(fs_info, eb))
11223 if (level + 1 != btrfs_header_level(eb))
11226 nr = btrfs_header_nritems(eb);
11227 for (i = 0; i < nr; i++) {
11228 if (bytenr == btrfs_node_blockptr(eb, i)) {
11234 free_extent_buffer(eb);
11235 if (!found_parent) {
11237 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11238 bytenr, fs_info->nodesize, parent, level);
11239 return REFERENCER_MISSING;
11245 * Check referencer for normal (inlined) data ref
11246 * If len == 0, it will be resolved by searching in extent tree
11248 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11249 u64 root_id, u64 objectid, u64 offset,
11250 u64 bytenr, u64 len, u32 count)
11252 struct btrfs_root *root;
11253 struct btrfs_root *extent_root = fs_info->extent_root;
11254 struct btrfs_key key;
11255 struct btrfs_path path;
11256 struct extent_buffer *leaf;
11257 struct btrfs_file_extent_item *fi;
11258 u32 found_count = 0;
11263 key.objectid = bytenr;
11264 key.type = BTRFS_EXTENT_ITEM_KEY;
11265 key.offset = (u64)-1;
11267 btrfs_init_path(&path);
11268 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11271 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11274 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11275 if (key.objectid != bytenr ||
11276 key.type != BTRFS_EXTENT_ITEM_KEY)
11279 btrfs_release_path(&path);
11281 key.objectid = root_id;
11282 key.type = BTRFS_ROOT_ITEM_KEY;
11283 key.offset = (u64)-1;
11284 btrfs_init_path(&path);
11286 root = btrfs_read_fs_root(fs_info, &key);
11290 key.objectid = objectid;
11291 key.type = BTRFS_EXTENT_DATA_KEY;
11293 * It can be nasty as data backref offset is
11294 * file offset - file extent offset, which is smaller or
11295 * equal to original backref offset. The only special case is
11296 * overflow. So we need to special check and do further search.
11298 key.offset = offset & (1ULL << 63) ? 0 : offset;
11300 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11305 * Search afterwards to get correct one
11306 * NOTE: As we must do a comprehensive check on the data backref to
11307 * make sure the dref count also matches, we must iterate all file
11308 * extents for that inode.
11311 leaf = path.nodes[0];
11312 slot = path.slots[0];
11314 if (slot >= btrfs_header_nritems(leaf))
11316 btrfs_item_key_to_cpu(leaf, &key, slot);
11317 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11319 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11321 * Except normal disk bytenr and disk num bytes, we still
11322 * need to do extra check on dbackref offset as
11323 * dbackref offset = file_offset - file_extent_offset
11325 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11326 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11327 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11332 ret = btrfs_next_item(root, &path);
11337 btrfs_release_path(&path);
11338 if (found_count != count) {
11340 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11341 bytenr, len, root_id, objectid, offset, count, found_count);
11342 return REFERENCER_MISSING;
11348 * Check if the referencer of a shared data backref exists
11350 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11351 u64 parent, u64 bytenr)
11353 struct extent_buffer *eb;
11354 struct btrfs_key key;
11355 struct btrfs_file_extent_item *fi;
11357 int found_parent = 0;
11360 eb = read_tree_block(fs_info, parent, 0);
11361 if (!extent_buffer_uptodate(eb))
11364 nr = btrfs_header_nritems(eb);
11365 for (i = 0; i < nr; i++) {
11366 btrfs_item_key_to_cpu(eb, &key, i);
11367 if (key.type != BTRFS_EXTENT_DATA_KEY)
11370 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11371 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11374 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11381 free_extent_buffer(eb);
11382 if (!found_parent) {
11383 error("shared extent %llu referencer lost (parent: %llu)",
11385 return REFERENCER_MISSING;
11391 * This function will check a given extent item, including its backref and
11392 * itself (like crossing stripe boundary and type)
11394 * Since we don't use extent_record anymore, introduce new error bit
11396 static int check_extent_item(struct btrfs_fs_info *fs_info,
11397 struct extent_buffer *eb, int slot)
11399 struct btrfs_extent_item *ei;
11400 struct btrfs_extent_inline_ref *iref;
11401 struct btrfs_extent_data_ref *dref;
11405 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11406 u32 item_size = btrfs_item_size_nr(eb, slot);
11411 struct btrfs_key key;
11415 btrfs_item_key_to_cpu(eb, &key, slot);
11416 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11417 bytes_used += key.offset;
11419 bytes_used += nodesize;
11421 if (item_size < sizeof(*ei)) {
11423 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11424 * old thing when on disk format is still un-determined.
11425 * No need to care about it anymore
11427 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11431 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11432 flags = btrfs_extent_flags(eb, ei);
11434 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11436 if (metadata && check_crossing_stripes(global_info, key.objectid,
11438 error("bad metadata [%llu, %llu) crossing stripe boundary",
11439 key.objectid, key.objectid + nodesize);
11440 err |= CROSSING_STRIPE_BOUNDARY;
11443 ptr = (unsigned long)(ei + 1);
11445 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11446 /* Old EXTENT_ITEM metadata */
11447 struct btrfs_tree_block_info *info;
11449 info = (struct btrfs_tree_block_info *)ptr;
11450 level = btrfs_tree_block_level(eb, info);
11451 ptr += sizeof(struct btrfs_tree_block_info);
11453 /* New METADATA_ITEM */
11454 level = key.offset;
11456 end = (unsigned long)ei + item_size;
11459 /* Reached extent item end normally */
11463 /* Beyond extent item end, wrong item size */
11465 err |= ITEM_SIZE_MISMATCH;
11466 error("extent item at bytenr %llu slot %d has wrong size",
11471 /* Now check every backref in this extent item */
11472 iref = (struct btrfs_extent_inline_ref *)ptr;
11473 type = btrfs_extent_inline_ref_type(eb, iref);
11474 offset = btrfs_extent_inline_ref_offset(eb, iref);
11476 case BTRFS_TREE_BLOCK_REF_KEY:
11477 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11481 case BTRFS_SHARED_BLOCK_REF_KEY:
11482 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11486 case BTRFS_EXTENT_DATA_REF_KEY:
11487 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11488 ret = check_extent_data_backref(fs_info,
11489 btrfs_extent_data_ref_root(eb, dref),
11490 btrfs_extent_data_ref_objectid(eb, dref),
11491 btrfs_extent_data_ref_offset(eb, dref),
11492 key.objectid, key.offset,
11493 btrfs_extent_data_ref_count(eb, dref));
11496 case BTRFS_SHARED_DATA_REF_KEY:
11497 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11501 error("extent[%llu %d %llu] has unknown ref type: %d",
11502 key.objectid, key.type, key.offset, type);
11503 err |= UNKNOWN_TYPE;
11507 ptr += btrfs_extent_inline_ref_size(type);
11515 * Check if a dev extent item is referred correctly by its chunk
11517 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11518 struct extent_buffer *eb, int slot)
11520 struct btrfs_root *chunk_root = fs_info->chunk_root;
11521 struct btrfs_dev_extent *ptr;
11522 struct btrfs_path path;
11523 struct btrfs_key chunk_key;
11524 struct btrfs_key devext_key;
11525 struct btrfs_chunk *chunk;
11526 struct extent_buffer *l;
11530 int found_chunk = 0;
11533 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11534 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11535 length = btrfs_dev_extent_length(eb, ptr);
11537 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11538 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11539 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11541 btrfs_init_path(&path);
11542 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11547 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11548 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11553 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11556 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11557 for (i = 0; i < num_stripes; i++) {
11558 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11559 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11561 if (devid == devext_key.objectid &&
11562 offset == devext_key.offset) {
11568 btrfs_release_path(&path);
11569 if (!found_chunk) {
11571 "device extent[%llu, %llu, %llu] did not find the related chunk",
11572 devext_key.objectid, devext_key.offset, length);
11573 return REFERENCER_MISSING;
11579 * Check if the used space is correct with the dev item
11581 static int check_dev_item(struct btrfs_fs_info *fs_info,
11582 struct extent_buffer *eb, int slot)
11584 struct btrfs_root *dev_root = fs_info->dev_root;
11585 struct btrfs_dev_item *dev_item;
11586 struct btrfs_path path;
11587 struct btrfs_key key;
11588 struct btrfs_dev_extent *ptr;
11594 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11595 dev_id = btrfs_device_id(eb, dev_item);
11596 used = btrfs_device_bytes_used(eb, dev_item);
11598 key.objectid = dev_id;
11599 key.type = BTRFS_DEV_EXTENT_KEY;
11602 btrfs_init_path(&path);
11603 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11605 btrfs_item_key_to_cpu(eb, &key, slot);
11606 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11607 key.objectid, key.type, key.offset);
11608 btrfs_release_path(&path);
11609 return REFERENCER_MISSING;
11612 /* Iterate dev_extents to calculate the used space of a device */
11614 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11617 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11618 if (key.objectid > dev_id)
11620 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11623 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11624 struct btrfs_dev_extent);
11625 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11627 ret = btrfs_next_item(dev_root, &path);
11631 btrfs_release_path(&path);
11633 if (used != total) {
11634 btrfs_item_key_to_cpu(eb, &key, slot);
11636 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11637 total, used, BTRFS_ROOT_TREE_OBJECTID,
11638 BTRFS_DEV_EXTENT_KEY, dev_id);
11639 return ACCOUNTING_MISMATCH;
11645 * Check a block group item with its referener (chunk) and its used space
11646 * with extent/metadata item
11648 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11649 struct extent_buffer *eb, int slot)
11651 struct btrfs_root *extent_root = fs_info->extent_root;
11652 struct btrfs_root *chunk_root = fs_info->chunk_root;
11653 struct btrfs_block_group_item *bi;
11654 struct btrfs_block_group_item bg_item;
11655 struct btrfs_path path;
11656 struct btrfs_key bg_key;
11657 struct btrfs_key chunk_key;
11658 struct btrfs_key extent_key;
11659 struct btrfs_chunk *chunk;
11660 struct extent_buffer *leaf;
11661 struct btrfs_extent_item *ei;
11662 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11670 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11671 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11672 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11673 used = btrfs_block_group_used(&bg_item);
11674 bg_flags = btrfs_block_group_flags(&bg_item);
11676 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11677 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11678 chunk_key.offset = bg_key.objectid;
11680 btrfs_init_path(&path);
11681 /* Search for the referencer chunk */
11682 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11685 "block group[%llu %llu] did not find the related chunk item",
11686 bg_key.objectid, bg_key.offset);
11687 err |= REFERENCER_MISSING;
11689 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11690 struct btrfs_chunk);
11691 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11694 "block group[%llu %llu] related chunk item length does not match",
11695 bg_key.objectid, bg_key.offset);
11696 err |= REFERENCER_MISMATCH;
11699 btrfs_release_path(&path);
11701 /* Search from the block group bytenr */
11702 extent_key.objectid = bg_key.objectid;
11703 extent_key.type = 0;
11704 extent_key.offset = 0;
11706 btrfs_init_path(&path);
11707 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11711 /* Iterate extent tree to account used space */
11713 leaf = path.nodes[0];
11715 /* Search slot can point to the last item beyond leaf nritems */
11716 if (path.slots[0] >= btrfs_header_nritems(leaf))
11719 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11720 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11723 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11724 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11726 if (extent_key.objectid < bg_key.objectid)
11729 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11732 total += extent_key.offset;
11734 ei = btrfs_item_ptr(leaf, path.slots[0],
11735 struct btrfs_extent_item);
11736 flags = btrfs_extent_flags(leaf, ei);
11737 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11738 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11740 "bad extent[%llu, %llu) type mismatch with chunk",
11741 extent_key.objectid,
11742 extent_key.objectid + extent_key.offset);
11743 err |= CHUNK_TYPE_MISMATCH;
11745 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11746 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11747 BTRFS_BLOCK_GROUP_METADATA))) {
11749 "bad extent[%llu, %llu) type mismatch with chunk",
11750 extent_key.objectid,
11751 extent_key.objectid + nodesize);
11752 err |= CHUNK_TYPE_MISMATCH;
11756 ret = btrfs_next_item(extent_root, &path);
11762 btrfs_release_path(&path);
11764 if (total != used) {
11766 "block group[%llu %llu] used %llu but extent items used %llu",
11767 bg_key.objectid, bg_key.offset, used, total);
11768 err |= ACCOUNTING_MISMATCH;
11774 * Check a chunk item.
11775 * Including checking all referred dev_extents and block group
11777 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11778 struct extent_buffer *eb, int slot)
11780 struct btrfs_root *extent_root = fs_info->extent_root;
11781 struct btrfs_root *dev_root = fs_info->dev_root;
11782 struct btrfs_path path;
11783 struct btrfs_key chunk_key;
11784 struct btrfs_key bg_key;
11785 struct btrfs_key devext_key;
11786 struct btrfs_chunk *chunk;
11787 struct extent_buffer *leaf;
11788 struct btrfs_block_group_item *bi;
11789 struct btrfs_block_group_item bg_item;
11790 struct btrfs_dev_extent *ptr;
11802 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11803 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11804 length = btrfs_chunk_length(eb, chunk);
11805 chunk_end = chunk_key.offset + length;
11806 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11809 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11811 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11814 type = btrfs_chunk_type(eb, chunk);
11816 bg_key.objectid = chunk_key.offset;
11817 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11818 bg_key.offset = length;
11820 btrfs_init_path(&path);
11821 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11824 "chunk[%llu %llu) did not find the related block group item",
11825 chunk_key.offset, chunk_end);
11826 err |= REFERENCER_MISSING;
11828 leaf = path.nodes[0];
11829 bi = btrfs_item_ptr(leaf, path.slots[0],
11830 struct btrfs_block_group_item);
11831 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11833 if (btrfs_block_group_flags(&bg_item) != type) {
11835 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11836 chunk_key.offset, chunk_end, type,
11837 btrfs_block_group_flags(&bg_item));
11838 err |= REFERENCER_MISSING;
11842 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11843 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11844 for (i = 0; i < num_stripes; i++) {
11845 btrfs_release_path(&path);
11846 btrfs_init_path(&path);
11847 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11848 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11849 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11851 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11854 goto not_match_dev;
11856 leaf = path.nodes[0];
11857 ptr = btrfs_item_ptr(leaf, path.slots[0],
11858 struct btrfs_dev_extent);
11859 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11860 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11861 if (objectid != chunk_key.objectid ||
11862 offset != chunk_key.offset ||
11863 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11864 goto not_match_dev;
11867 err |= BACKREF_MISSING;
11869 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11870 chunk_key.objectid, chunk_end, i);
11873 btrfs_release_path(&path);
11879 * Main entry function to check known items and update related accounting info
11881 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11883 struct btrfs_fs_info *fs_info = root->fs_info;
11884 struct btrfs_key key;
11887 struct btrfs_extent_data_ref *dref;
11892 btrfs_item_key_to_cpu(eb, &key, slot);
11896 case BTRFS_EXTENT_DATA_KEY:
11897 ret = check_extent_data_item(root, eb, slot);
11900 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11901 ret = check_block_group_item(fs_info, eb, slot);
11904 case BTRFS_DEV_ITEM_KEY:
11905 ret = check_dev_item(fs_info, eb, slot);
11908 case BTRFS_CHUNK_ITEM_KEY:
11909 ret = check_chunk_item(fs_info, eb, slot);
11912 case BTRFS_DEV_EXTENT_KEY:
11913 ret = check_dev_extent_item(fs_info, eb, slot);
11916 case BTRFS_EXTENT_ITEM_KEY:
11917 case BTRFS_METADATA_ITEM_KEY:
11918 ret = check_extent_item(fs_info, eb, slot);
11921 case BTRFS_EXTENT_CSUM_KEY:
11922 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11924 case BTRFS_TREE_BLOCK_REF_KEY:
11925 ret = check_tree_block_backref(fs_info, key.offset,
11929 case BTRFS_EXTENT_DATA_REF_KEY:
11930 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11931 ret = check_extent_data_backref(fs_info,
11932 btrfs_extent_data_ref_root(eb, dref),
11933 btrfs_extent_data_ref_objectid(eb, dref),
11934 btrfs_extent_data_ref_offset(eb, dref),
11936 btrfs_extent_data_ref_count(eb, dref));
11939 case BTRFS_SHARED_BLOCK_REF_KEY:
11940 ret = check_shared_block_backref(fs_info, key.offset,
11944 case BTRFS_SHARED_DATA_REF_KEY:
11945 ret = check_shared_data_backref(fs_info, key.offset,
11953 if (++slot < btrfs_header_nritems(eb))
11960 * Helper function for later fs/subvol tree check. To determine if a tree
11961 * block should be checked.
11962 * This function will ensure only the direct referencer with lowest rootid to
11963 * check a fs/subvolume tree block.
11965 * Backref check at extent tree would detect errors like missing subvolume
11966 * tree, so we can do aggressive check to reduce duplicated checks.
11968 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11970 struct btrfs_root *extent_root = root->fs_info->extent_root;
11971 struct btrfs_key key;
11972 struct btrfs_path path;
11973 struct extent_buffer *leaf;
11975 struct btrfs_extent_item *ei;
11981 struct btrfs_extent_inline_ref *iref;
11984 btrfs_init_path(&path);
11985 key.objectid = btrfs_header_bytenr(eb);
11986 key.type = BTRFS_METADATA_ITEM_KEY;
11987 key.offset = (u64)-1;
11990 * Any failure in backref resolving means we can't determine
11991 * whom the tree block belongs to.
11992 * So in that case, we need to check that tree block
11994 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11998 ret = btrfs_previous_extent_item(extent_root, &path,
11999 btrfs_header_bytenr(eb));
12003 leaf = path.nodes[0];
12004 slot = path.slots[0];
12005 btrfs_item_key_to_cpu(leaf, &key, slot);
12006 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12008 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12009 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12011 struct btrfs_tree_block_info *info;
12013 info = (struct btrfs_tree_block_info *)(ei + 1);
12014 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12017 item_size = btrfs_item_size_nr(leaf, slot);
12018 ptr = (unsigned long)iref;
12019 end = (unsigned long)ei + item_size;
12020 while (ptr < end) {
12021 iref = (struct btrfs_extent_inline_ref *)ptr;
12022 type = btrfs_extent_inline_ref_type(leaf, iref);
12023 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12026 * We only check the tree block if current root is
12027 * the lowest referencer of it.
12029 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12030 offset < root->objectid) {
12031 btrfs_release_path(&path);
12035 ptr += btrfs_extent_inline_ref_size(type);
12038 * Normally we should also check keyed tree block ref, but that may be
12039 * very time consuming. Inlined ref should already make us skip a lot
12040 * of refs now. So skip search keyed tree block ref.
12044 btrfs_release_path(&path);
12049 * Traversal function for tree block. We will do:
12050 * 1) Skip shared fs/subvolume tree blocks
12051 * 2) Update related bytes accounting
12052 * 3) Pre-order traversal
12054 static int traverse_tree_block(struct btrfs_root *root,
12055 struct extent_buffer *node)
12057 struct extent_buffer *eb;
12058 struct btrfs_key key;
12059 struct btrfs_key drop_key;
12067 * Skip shared fs/subvolume tree block, in that case they will
12068 * be checked by referencer with lowest rootid
12070 if (is_fstree(root->objectid) && !should_check(root, node))
12073 /* Update bytes accounting */
12074 total_btree_bytes += node->len;
12075 if (fs_root_objectid(btrfs_header_owner(node)))
12076 total_fs_tree_bytes += node->len;
12077 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12078 total_extent_tree_bytes += node->len;
12080 /* pre-order tranversal, check itself first */
12081 level = btrfs_header_level(node);
12082 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12083 btrfs_header_level(node),
12084 btrfs_header_owner(node));
12088 "check %s failed root %llu bytenr %llu level %d, force continue check",
12089 level ? "node":"leaf", root->objectid,
12090 btrfs_header_bytenr(node), btrfs_header_level(node));
12093 btree_space_waste += btrfs_leaf_free_space(root, node);
12094 ret = check_leaf_items(root, node);
12099 nr = btrfs_header_nritems(node);
12100 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12101 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12102 sizeof(struct btrfs_key_ptr);
12104 /* Then check all its children */
12105 for (i = 0; i < nr; i++) {
12106 u64 blocknr = btrfs_node_blockptr(node, i);
12108 btrfs_node_key_to_cpu(node, &key, i);
12109 if (level == root->root_item.drop_level &&
12110 is_dropped_key(&key, &drop_key))
12114 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12115 * to call the function itself.
12117 eb = read_tree_block(root->fs_info, blocknr, 0);
12118 if (extent_buffer_uptodate(eb)) {
12119 ret = traverse_tree_block(root, eb);
12122 free_extent_buffer(eb);
12129 * Low memory usage version check_chunks_and_extents.
12131 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12133 struct btrfs_path path;
12134 struct btrfs_key key;
12135 struct btrfs_root *root1;
12136 struct btrfs_root *root;
12137 struct btrfs_root *cur_root;
12141 root = fs_info->fs_root;
12143 root1 = root->fs_info->chunk_root;
12144 ret = traverse_tree_block(root1, root1->node);
12147 root1 = root->fs_info->tree_root;
12148 ret = traverse_tree_block(root1, root1->node);
12151 btrfs_init_path(&path);
12152 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12154 key.type = BTRFS_ROOT_ITEM_KEY;
12156 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12158 error("cannot find extent treet in tree_root");
12163 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12164 if (key.type != BTRFS_ROOT_ITEM_KEY)
12166 key.offset = (u64)-1;
12168 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12169 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12172 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12173 if (IS_ERR(cur_root) || !cur_root) {
12174 error("failed to read tree: %lld", key.objectid);
12178 ret = traverse_tree_block(cur_root, cur_root->node);
12181 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12182 btrfs_free_fs_root(cur_root);
12184 ret = btrfs_next_item(root1, &path);
12190 btrfs_release_path(&path);
12194 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12198 if (!ctx.progress_enabled)
12199 fprintf(stderr, "checking extents\n");
12200 if (check_mode == CHECK_MODE_LOWMEM)
12201 ret = check_chunks_and_extents_v2(fs_info);
12203 ret = check_chunks_and_extents(fs_info);
12208 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12209 struct btrfs_root *root, int overwrite)
12211 struct extent_buffer *c;
12212 struct extent_buffer *old = root->node;
12215 struct btrfs_disk_key disk_key = {0,0,0};
12221 extent_buffer_get(c);
12224 c = btrfs_alloc_free_block(trans, root,
12225 root->fs_info->nodesize,
12226 root->root_key.objectid,
12227 &disk_key, level, 0, 0);
12230 extent_buffer_get(c);
12234 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12235 btrfs_set_header_level(c, level);
12236 btrfs_set_header_bytenr(c, c->start);
12237 btrfs_set_header_generation(c, trans->transid);
12238 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12239 btrfs_set_header_owner(c, root->root_key.objectid);
12241 write_extent_buffer(c, root->fs_info->fsid,
12242 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12244 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12245 btrfs_header_chunk_tree_uuid(c),
12248 btrfs_mark_buffer_dirty(c);
12250 * this case can happen in the following case:
12252 * 1.overwrite previous root.
12254 * 2.reinit reloc data root, this is because we skip pin
12255 * down reloc data tree before which means we can allocate
12256 * same block bytenr here.
12258 if (old->start == c->start) {
12259 btrfs_set_root_generation(&root->root_item,
12261 root->root_item.level = btrfs_header_level(root->node);
12262 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12263 &root->root_key, &root->root_item);
12265 free_extent_buffer(c);
12269 free_extent_buffer(old);
12271 add_root_to_dirty_list(root);
12275 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12276 struct extent_buffer *eb, int tree_root)
12278 struct extent_buffer *tmp;
12279 struct btrfs_root_item *ri;
12280 struct btrfs_key key;
12282 int level = btrfs_header_level(eb);
12288 * If we have pinned this block before, don't pin it again.
12289 * This can not only avoid forever loop with broken filesystem
12290 * but also give us some speedups.
12292 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12293 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12296 btrfs_pin_extent(fs_info, eb->start, eb->len);
12298 nritems = btrfs_header_nritems(eb);
12299 for (i = 0; i < nritems; i++) {
12301 btrfs_item_key_to_cpu(eb, &key, i);
12302 if (key.type != BTRFS_ROOT_ITEM_KEY)
12304 /* Skip the extent root and reloc roots */
12305 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12306 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12307 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12309 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12310 bytenr = btrfs_disk_root_bytenr(eb, ri);
12313 * If at any point we start needing the real root we
12314 * will have to build a stump root for the root we are
12315 * in, but for now this doesn't actually use the root so
12316 * just pass in extent_root.
12318 tmp = read_tree_block(fs_info, bytenr, 0);
12319 if (!extent_buffer_uptodate(tmp)) {
12320 fprintf(stderr, "Error reading root block\n");
12323 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12324 free_extent_buffer(tmp);
12328 bytenr = btrfs_node_blockptr(eb, i);
12330 /* If we aren't the tree root don't read the block */
12331 if (level == 1 && !tree_root) {
12332 btrfs_pin_extent(fs_info, bytenr,
12333 fs_info->nodesize);
12337 tmp = read_tree_block(fs_info, bytenr, 0);
12338 if (!extent_buffer_uptodate(tmp)) {
12339 fprintf(stderr, "Error reading tree block\n");
12342 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12343 free_extent_buffer(tmp);
12352 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12356 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12360 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12363 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12365 struct btrfs_block_group_cache *cache;
12366 struct btrfs_path path;
12367 struct extent_buffer *leaf;
12368 struct btrfs_chunk *chunk;
12369 struct btrfs_key key;
12373 btrfs_init_path(&path);
12375 key.type = BTRFS_CHUNK_ITEM_KEY;
12377 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12379 btrfs_release_path(&path);
12384 * We do this in case the block groups were screwed up and had alloc
12385 * bits that aren't actually set on the chunks. This happens with
12386 * restored images every time and could happen in real life I guess.
12388 fs_info->avail_data_alloc_bits = 0;
12389 fs_info->avail_metadata_alloc_bits = 0;
12390 fs_info->avail_system_alloc_bits = 0;
12392 /* First we need to create the in-memory block groups */
12394 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12395 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12397 btrfs_release_path(&path);
12405 leaf = path.nodes[0];
12406 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12407 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12412 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12413 btrfs_add_block_group(fs_info, 0,
12414 btrfs_chunk_type(leaf, chunk),
12415 key.objectid, key.offset,
12416 btrfs_chunk_length(leaf, chunk));
12417 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12418 key.offset + btrfs_chunk_length(leaf, chunk));
12423 cache = btrfs_lookup_first_block_group(fs_info, start);
12427 start = cache->key.objectid + cache->key.offset;
12430 btrfs_release_path(&path);
12434 static int reset_balance(struct btrfs_trans_handle *trans,
12435 struct btrfs_fs_info *fs_info)
12437 struct btrfs_root *root = fs_info->tree_root;
12438 struct btrfs_path path;
12439 struct extent_buffer *leaf;
12440 struct btrfs_key key;
12441 int del_slot, del_nr = 0;
12445 btrfs_init_path(&path);
12446 key.objectid = BTRFS_BALANCE_OBJECTID;
12447 key.type = BTRFS_BALANCE_ITEM_KEY;
12449 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12454 goto reinit_data_reloc;
12459 ret = btrfs_del_item(trans, root, &path);
12462 btrfs_release_path(&path);
12464 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12465 key.type = BTRFS_ROOT_ITEM_KEY;
12467 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12471 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12476 ret = btrfs_del_items(trans, root, &path,
12483 btrfs_release_path(&path);
12486 ret = btrfs_search_slot(trans, root, &key, &path,
12493 leaf = path.nodes[0];
12494 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12495 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12497 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12502 del_slot = path.slots[0];
12511 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12515 btrfs_release_path(&path);
12518 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12519 key.type = BTRFS_ROOT_ITEM_KEY;
12520 key.offset = (u64)-1;
12521 root = btrfs_read_fs_root(fs_info, &key);
12522 if (IS_ERR(root)) {
12523 fprintf(stderr, "Error reading data reloc tree\n");
12524 ret = PTR_ERR(root);
12527 record_root_in_trans(trans, root);
12528 ret = btrfs_fsck_reinit_root(trans, root, 0);
12531 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12533 btrfs_release_path(&path);
12537 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12538 struct btrfs_fs_info *fs_info)
12544 * The only reason we don't do this is because right now we're just
12545 * walking the trees we find and pinning down their bytes, we don't look
12546 * at any of the leaves. In order to do mixed groups we'd have to check
12547 * the leaves of any fs roots and pin down the bytes for any file
12548 * extents we find. Not hard but why do it if we don't have to?
12550 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12551 fprintf(stderr, "We don't support re-initing the extent tree "
12552 "for mixed block groups yet, please notify a btrfs "
12553 "developer you want to do this so they can add this "
12554 "functionality.\n");
12559 * first we need to walk all of the trees except the extent tree and pin
12560 * down the bytes that are in use so we don't overwrite any existing
12563 ret = pin_metadata_blocks(fs_info);
12565 fprintf(stderr, "error pinning down used bytes\n");
12570 * Need to drop all the block groups since we're going to recreate all
12573 btrfs_free_block_groups(fs_info);
12574 ret = reset_block_groups(fs_info);
12576 fprintf(stderr, "error resetting the block groups\n");
12580 /* Ok we can allocate now, reinit the extent root */
12581 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12583 fprintf(stderr, "extent root initialization failed\n");
12585 * When the transaction code is updated we should end the
12586 * transaction, but for now progs only knows about commit so
12587 * just return an error.
12593 * Now we have all the in-memory block groups setup so we can make
12594 * allocations properly, and the metadata we care about is safe since we
12595 * pinned all of it above.
12598 struct btrfs_block_group_cache *cache;
12600 cache = btrfs_lookup_first_block_group(fs_info, start);
12603 start = cache->key.objectid + cache->key.offset;
12604 ret = btrfs_insert_item(trans, fs_info->extent_root,
12605 &cache->key, &cache->item,
12606 sizeof(cache->item));
12608 fprintf(stderr, "Error adding block group\n");
12611 btrfs_extent_post_op(trans, fs_info->extent_root);
12614 ret = reset_balance(trans, fs_info);
12616 fprintf(stderr, "error resetting the pending balance\n");
12621 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12623 struct btrfs_path path;
12624 struct btrfs_trans_handle *trans;
12625 struct btrfs_key key;
12628 printf("Recowing metadata block %llu\n", eb->start);
12629 key.objectid = btrfs_header_owner(eb);
12630 key.type = BTRFS_ROOT_ITEM_KEY;
12631 key.offset = (u64)-1;
12633 root = btrfs_read_fs_root(root->fs_info, &key);
12634 if (IS_ERR(root)) {
12635 fprintf(stderr, "Couldn't find owner root %llu\n",
12637 return PTR_ERR(root);
12640 trans = btrfs_start_transaction(root, 1);
12642 return PTR_ERR(trans);
12644 btrfs_init_path(&path);
12645 path.lowest_level = btrfs_header_level(eb);
12646 if (path.lowest_level)
12647 btrfs_node_key_to_cpu(eb, &key, 0);
12649 btrfs_item_key_to_cpu(eb, &key, 0);
12651 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12652 btrfs_commit_transaction(trans, root);
12653 btrfs_release_path(&path);
12657 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12659 struct btrfs_path path;
12660 struct btrfs_trans_handle *trans;
12661 struct btrfs_key key;
12664 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12665 bad->key.type, bad->key.offset);
12666 key.objectid = bad->root_id;
12667 key.type = BTRFS_ROOT_ITEM_KEY;
12668 key.offset = (u64)-1;
12670 root = btrfs_read_fs_root(root->fs_info, &key);
12671 if (IS_ERR(root)) {
12672 fprintf(stderr, "Couldn't find owner root %llu\n",
12674 return PTR_ERR(root);
12677 trans = btrfs_start_transaction(root, 1);
12679 return PTR_ERR(trans);
12681 btrfs_init_path(&path);
12682 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12688 ret = btrfs_del_item(trans, root, &path);
12690 btrfs_commit_transaction(trans, root);
12691 btrfs_release_path(&path);
12695 static int zero_log_tree(struct btrfs_root *root)
12697 struct btrfs_trans_handle *trans;
12700 trans = btrfs_start_transaction(root, 1);
12701 if (IS_ERR(trans)) {
12702 ret = PTR_ERR(trans);
12705 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12706 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12707 ret = btrfs_commit_transaction(trans, root);
12711 static int populate_csum(struct btrfs_trans_handle *trans,
12712 struct btrfs_root *csum_root, char *buf, u64 start,
12715 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12720 while (offset < len) {
12721 sectorsize = fs_info->sectorsize;
12722 ret = read_extent_data(fs_info, buf, start + offset,
12726 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12727 start + offset, buf, sectorsize);
12730 offset += sectorsize;
12735 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12736 struct btrfs_root *csum_root,
12737 struct btrfs_root *cur_root)
12739 struct btrfs_path path;
12740 struct btrfs_key key;
12741 struct extent_buffer *node;
12742 struct btrfs_file_extent_item *fi;
12749 buf = malloc(cur_root->fs_info->sectorsize);
12753 btrfs_init_path(&path);
12757 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12760 /* Iterate all regular file extents and fill its csum */
12762 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12764 if (key.type != BTRFS_EXTENT_DATA_KEY)
12766 node = path.nodes[0];
12767 slot = path.slots[0];
12768 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12769 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12771 start = btrfs_file_extent_disk_bytenr(node, fi);
12772 len = btrfs_file_extent_disk_num_bytes(node, fi);
12774 ret = populate_csum(trans, csum_root, buf, start, len);
12775 if (ret == -EEXIST)
12781 * TODO: if next leaf is corrupted, jump to nearest next valid
12784 ret = btrfs_next_item(cur_root, &path);
12794 btrfs_release_path(&path);
12799 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12800 struct btrfs_root *csum_root)
12802 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12803 struct btrfs_path path;
12804 struct btrfs_root *tree_root = fs_info->tree_root;
12805 struct btrfs_root *cur_root;
12806 struct extent_buffer *node;
12807 struct btrfs_key key;
12811 btrfs_init_path(&path);
12812 key.objectid = BTRFS_FS_TREE_OBJECTID;
12814 key.type = BTRFS_ROOT_ITEM_KEY;
12815 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12824 node = path.nodes[0];
12825 slot = path.slots[0];
12826 btrfs_item_key_to_cpu(node, &key, slot);
12827 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12829 if (key.type != BTRFS_ROOT_ITEM_KEY)
12831 if (!is_fstree(key.objectid))
12833 key.offset = (u64)-1;
12835 cur_root = btrfs_read_fs_root(fs_info, &key);
12836 if (IS_ERR(cur_root) || !cur_root) {
12837 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12841 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12846 ret = btrfs_next_item(tree_root, &path);
12856 btrfs_release_path(&path);
12860 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12861 struct btrfs_root *csum_root)
12863 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12864 struct btrfs_path path;
12865 struct btrfs_extent_item *ei;
12866 struct extent_buffer *leaf;
12868 struct btrfs_key key;
12871 btrfs_init_path(&path);
12873 key.type = BTRFS_EXTENT_ITEM_KEY;
12875 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12877 btrfs_release_path(&path);
12881 buf = malloc(csum_root->fs_info->sectorsize);
12883 btrfs_release_path(&path);
12888 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12889 ret = btrfs_next_leaf(extent_root, &path);
12897 leaf = path.nodes[0];
12899 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12900 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12905 ei = btrfs_item_ptr(leaf, path.slots[0],
12906 struct btrfs_extent_item);
12907 if (!(btrfs_extent_flags(leaf, ei) &
12908 BTRFS_EXTENT_FLAG_DATA)) {
12913 ret = populate_csum(trans, csum_root, buf, key.objectid,
12920 btrfs_release_path(&path);
12926 * Recalculate the csum and put it into the csum tree.
12928 * Extent tree init will wipe out all the extent info, so in that case, we
12929 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12930 * will use fs/subvol trees to init the csum tree.
12932 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12933 struct btrfs_root *csum_root,
12934 int search_fs_tree)
12936 if (search_fs_tree)
12937 return fill_csum_tree_from_fs(trans, csum_root);
12939 return fill_csum_tree_from_extent(trans, csum_root);
12942 static void free_roots_info_cache(void)
12944 if (!roots_info_cache)
12947 while (!cache_tree_empty(roots_info_cache)) {
12948 struct cache_extent *entry;
12949 struct root_item_info *rii;
12951 entry = first_cache_extent(roots_info_cache);
12954 remove_cache_extent(roots_info_cache, entry);
12955 rii = container_of(entry, struct root_item_info, cache_extent);
12959 free(roots_info_cache);
12960 roots_info_cache = NULL;
12963 static int build_roots_info_cache(struct btrfs_fs_info *info)
12966 struct btrfs_key key;
12967 struct extent_buffer *leaf;
12968 struct btrfs_path path;
12970 if (!roots_info_cache) {
12971 roots_info_cache = malloc(sizeof(*roots_info_cache));
12972 if (!roots_info_cache)
12974 cache_tree_init(roots_info_cache);
12977 btrfs_init_path(&path);
12979 key.type = BTRFS_EXTENT_ITEM_KEY;
12981 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12984 leaf = path.nodes[0];
12987 struct btrfs_key found_key;
12988 struct btrfs_extent_item *ei;
12989 struct btrfs_extent_inline_ref *iref;
12990 int slot = path.slots[0];
12995 struct cache_extent *entry;
12996 struct root_item_info *rii;
12998 if (slot >= btrfs_header_nritems(leaf)) {
12999 ret = btrfs_next_leaf(info->extent_root, &path);
13006 leaf = path.nodes[0];
13007 slot = path.slots[0];
13010 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13012 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13013 found_key.type != BTRFS_METADATA_ITEM_KEY)
13016 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13017 flags = btrfs_extent_flags(leaf, ei);
13019 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13020 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13023 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13024 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13025 level = found_key.offset;
13027 struct btrfs_tree_block_info *binfo;
13029 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13030 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13031 level = btrfs_tree_block_level(leaf, binfo);
13035 * For a root extent, it must be of the following type and the
13036 * first (and only one) iref in the item.
13038 type = btrfs_extent_inline_ref_type(leaf, iref);
13039 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13042 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13043 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13045 rii = malloc(sizeof(struct root_item_info));
13050 rii->cache_extent.start = root_id;
13051 rii->cache_extent.size = 1;
13052 rii->level = (u8)-1;
13053 entry = &rii->cache_extent;
13054 ret = insert_cache_extent(roots_info_cache, entry);
13057 rii = container_of(entry, struct root_item_info,
13061 ASSERT(rii->cache_extent.start == root_id);
13062 ASSERT(rii->cache_extent.size == 1);
13064 if (level > rii->level || rii->level == (u8)-1) {
13065 rii->level = level;
13066 rii->bytenr = found_key.objectid;
13067 rii->gen = btrfs_extent_generation(leaf, ei);
13068 rii->node_count = 1;
13069 } else if (level == rii->level) {
13077 btrfs_release_path(&path);
13082 static int maybe_repair_root_item(struct btrfs_path *path,
13083 const struct btrfs_key *root_key,
13084 const int read_only_mode)
13086 const u64 root_id = root_key->objectid;
13087 struct cache_extent *entry;
13088 struct root_item_info *rii;
13089 struct btrfs_root_item ri;
13090 unsigned long offset;
13092 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13095 "Error: could not find extent items for root %llu\n",
13096 root_key->objectid);
13100 rii = container_of(entry, struct root_item_info, cache_extent);
13101 ASSERT(rii->cache_extent.start == root_id);
13102 ASSERT(rii->cache_extent.size == 1);
13104 if (rii->node_count != 1) {
13106 "Error: could not find btree root extent for root %llu\n",
13111 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13112 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13114 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13115 btrfs_root_level(&ri) != rii->level ||
13116 btrfs_root_generation(&ri) != rii->gen) {
13119 * If we're in repair mode but our caller told us to not update
13120 * the root item, i.e. just check if it needs to be updated, don't
13121 * print this message, since the caller will call us again shortly
13122 * for the same root item without read only mode (the caller will
13123 * open a transaction first).
13125 if (!(read_only_mode && repair))
13127 "%sroot item for root %llu,"
13128 " current bytenr %llu, current gen %llu, current level %u,"
13129 " new bytenr %llu, new gen %llu, new level %u\n",
13130 (read_only_mode ? "" : "fixing "),
13132 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13133 btrfs_root_level(&ri),
13134 rii->bytenr, rii->gen, rii->level);
13136 if (btrfs_root_generation(&ri) > rii->gen) {
13138 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13139 root_id, btrfs_root_generation(&ri), rii->gen);
13143 if (!read_only_mode) {
13144 btrfs_set_root_bytenr(&ri, rii->bytenr);
13145 btrfs_set_root_level(&ri, rii->level);
13146 btrfs_set_root_generation(&ri, rii->gen);
13147 write_extent_buffer(path->nodes[0], &ri,
13148 offset, sizeof(ri));
13158 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13159 * caused read-only snapshots to be corrupted if they were created at a moment
13160 * when the source subvolume/snapshot had orphan items. The issue was that the
13161 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13162 * node instead of the post orphan cleanup root node.
13163 * So this function, and its callees, just detects and fixes those cases. Even
13164 * though the regression was for read-only snapshots, this function applies to
13165 * any snapshot/subvolume root.
13166 * This must be run before any other repair code - not doing it so, makes other
13167 * repair code delete or modify backrefs in the extent tree for example, which
13168 * will result in an inconsistent fs after repairing the root items.
13170 static int repair_root_items(struct btrfs_fs_info *info)
13172 struct btrfs_path path;
13173 struct btrfs_key key;
13174 struct extent_buffer *leaf;
13175 struct btrfs_trans_handle *trans = NULL;
13178 int need_trans = 0;
13180 btrfs_init_path(&path);
13182 ret = build_roots_info_cache(info);
13186 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13187 key.type = BTRFS_ROOT_ITEM_KEY;
13192 * Avoid opening and committing transactions if a leaf doesn't have
13193 * any root items that need to be fixed, so that we avoid rotating
13194 * backup roots unnecessarily.
13197 trans = btrfs_start_transaction(info->tree_root, 1);
13198 if (IS_ERR(trans)) {
13199 ret = PTR_ERR(trans);
13204 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13208 leaf = path.nodes[0];
13211 struct btrfs_key found_key;
13213 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13214 int no_more_keys = find_next_key(&path, &key);
13216 btrfs_release_path(&path);
13218 ret = btrfs_commit_transaction(trans,
13230 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13232 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13234 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13237 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13241 if (!trans && repair) {
13244 btrfs_release_path(&path);
13254 free_roots_info_cache();
13255 btrfs_release_path(&path);
13257 btrfs_commit_transaction(trans, info->tree_root);
13264 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13266 struct btrfs_trans_handle *trans;
13267 struct btrfs_block_group_cache *bg_cache;
13271 /* Clear all free space cache inodes and its extent data */
13273 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13276 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13279 current = bg_cache->key.objectid + bg_cache->key.offset;
13282 /* Don't forget to set cache_generation to -1 */
13283 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13284 if (IS_ERR(trans)) {
13285 error("failed to update super block cache generation");
13286 return PTR_ERR(trans);
13288 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13289 btrfs_commit_transaction(trans, fs_info->tree_root);
13294 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13299 if (clear_version == 1) {
13300 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13302 "free space cache v2 detected, use --clear-space-cache v2");
13306 printf("Clearing free space cache\n");
13307 ret = clear_free_space_cache(fs_info);
13309 error("failed to clear free space cache");
13312 printf("Free space cache cleared\n");
13314 } else if (clear_version == 2) {
13315 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13316 printf("no free space cache v2 to clear\n");
13320 printf("Clear free space cache v2\n");
13321 ret = btrfs_clear_free_space_tree(fs_info);
13323 error("failed to clear free space cache v2: %d", ret);
13326 printf("free space cache v2 cleared\n");
13333 const char * const cmd_check_usage[] = {
13334 "btrfs check [options] <device>",
13335 "Check structural integrity of a filesystem (unmounted).",
13336 "Check structural integrity of an unmounted filesystem. Verify internal",
13337 "trees' consistency and item connectivity. In the repair mode try to",
13338 "fix the problems found. ",
13339 "WARNING: the repair mode is considered dangerous",
13341 "-s|--super <superblock> use this superblock copy",
13342 "-b|--backup use the first valid backup root copy",
13343 "--force skip mount checks, repair is not possible",
13344 "--repair try to repair the filesystem",
13345 "--readonly run in read-only mode (default)",
13346 "--init-csum-tree create a new CRC tree",
13347 "--init-extent-tree create a new extent tree",
13348 "--mode <MODE> allows choice of memory/IO trade-offs",
13349 " where MODE is one of:",
13350 " original - read inodes and extents to memory (requires",
13351 " more memory, does less IO)",
13352 " lowmem - try to use less memory but read blocks again",
13354 "--check-data-csum verify checksums of data blocks",
13355 "-Q|--qgroup-report print a report on qgroup consistency",
13356 "-E|--subvol-extents <subvolid>",
13357 " print subvolume extents and sharing state",
13358 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13359 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13360 "-p|--progress indicate progress",
13361 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13365 int cmd_check(int argc, char **argv)
13367 struct cache_tree root_cache;
13368 struct btrfs_root *root;
13369 struct btrfs_fs_info *info;
13372 u64 tree_root_bytenr = 0;
13373 u64 chunk_root_bytenr = 0;
13374 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13378 int init_csum_tree = 0;
13380 int clear_space_cache = 0;
13381 int qgroup_report = 0;
13382 int qgroups_repaired = 0;
13383 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13388 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13389 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13390 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13391 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13392 GETOPT_VAL_FORCE };
13393 static const struct option long_options[] = {
13394 { "super", required_argument, NULL, 's' },
13395 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13396 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13397 { "init-csum-tree", no_argument, NULL,
13398 GETOPT_VAL_INIT_CSUM },
13399 { "init-extent-tree", no_argument, NULL,
13400 GETOPT_VAL_INIT_EXTENT },
13401 { "check-data-csum", no_argument, NULL,
13402 GETOPT_VAL_CHECK_CSUM },
13403 { "backup", no_argument, NULL, 'b' },
13404 { "subvol-extents", required_argument, NULL, 'E' },
13405 { "qgroup-report", no_argument, NULL, 'Q' },
13406 { "tree-root", required_argument, NULL, 'r' },
13407 { "chunk-root", required_argument, NULL,
13408 GETOPT_VAL_CHUNK_TREE },
13409 { "progress", no_argument, NULL, 'p' },
13410 { "mode", required_argument, NULL,
13412 { "clear-space-cache", required_argument, NULL,
13413 GETOPT_VAL_CLEAR_SPACE_CACHE},
13414 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13415 { NULL, 0, NULL, 0}
13418 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13422 case 'a': /* ignored */ break;
13424 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13427 num = arg_strtou64(optarg);
13428 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13430 "super mirror should be less than %d",
13431 BTRFS_SUPER_MIRROR_MAX);
13434 bytenr = btrfs_sb_offset(((int)num));
13435 printf("using SB copy %llu, bytenr %llu\n", num,
13436 (unsigned long long)bytenr);
13442 subvolid = arg_strtou64(optarg);
13445 tree_root_bytenr = arg_strtou64(optarg);
13447 case GETOPT_VAL_CHUNK_TREE:
13448 chunk_root_bytenr = arg_strtou64(optarg);
13451 ctx.progress_enabled = true;
13455 usage(cmd_check_usage);
13456 case GETOPT_VAL_REPAIR:
13457 printf("enabling repair mode\n");
13459 ctree_flags |= OPEN_CTREE_WRITES;
13461 case GETOPT_VAL_READONLY:
13464 case GETOPT_VAL_INIT_CSUM:
13465 printf("Creating a new CRC tree\n");
13466 init_csum_tree = 1;
13468 ctree_flags |= OPEN_CTREE_WRITES;
13470 case GETOPT_VAL_INIT_EXTENT:
13471 init_extent_tree = 1;
13472 ctree_flags |= (OPEN_CTREE_WRITES |
13473 OPEN_CTREE_NO_BLOCK_GROUPS);
13476 case GETOPT_VAL_CHECK_CSUM:
13477 check_data_csum = 1;
13479 case GETOPT_VAL_MODE:
13480 check_mode = parse_check_mode(optarg);
13481 if (check_mode == CHECK_MODE_UNKNOWN) {
13482 error("unknown mode: %s", optarg);
13486 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13487 if (strcmp(optarg, "v1") == 0) {
13488 clear_space_cache = 1;
13489 } else if (strcmp(optarg, "v2") == 0) {
13490 clear_space_cache = 2;
13491 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13494 "invalid argument to --clear-space-cache, must be v1 or v2");
13497 ctree_flags |= OPEN_CTREE_WRITES;
13499 case GETOPT_VAL_FORCE:
13505 if (check_argc_exact(argc - optind, 1))
13506 usage(cmd_check_usage);
13508 if (ctx.progress_enabled) {
13509 ctx.tp = TASK_NOTHING;
13510 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13513 /* This check is the only reason for --readonly to exist */
13514 if (readonly && repair) {
13515 error("repair options are not compatible with --readonly");
13520 * experimental and dangerous
13522 if (repair && check_mode == CHECK_MODE_LOWMEM)
13523 warning("low-memory mode repair support is only partial");
13526 cache_tree_init(&root_cache);
13528 ret = check_mounted(argv[optind]);
13531 error("could not check mount status: %s",
13537 "%s is currently mounted, use --force if you really intend to check the filesystem",
13545 error("repair and --force is not yet supported");
13552 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13556 "filesystem mounted, continuing because of --force");
13558 /* A block device is mounted in exclusive mode by kernel */
13559 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13562 /* only allow partial opening under repair mode */
13564 ctree_flags |= OPEN_CTREE_PARTIAL;
13566 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13567 chunk_root_bytenr, ctree_flags);
13569 error("cannot open file system");
13575 global_info = info;
13576 root = info->fs_root;
13577 uuid_unparse(info->super_copy->fsid, uuidbuf);
13579 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13582 * Check the bare minimum before starting anything else that could rely
13583 * on it, namely the tree roots, any local consistency checks
13585 if (!extent_buffer_uptodate(info->tree_root->node) ||
13586 !extent_buffer_uptodate(info->dev_root->node) ||
13587 !extent_buffer_uptodate(info->chunk_root->node)) {
13588 error("critical roots corrupted, unable to check the filesystem");
13594 if (clear_space_cache) {
13595 ret = do_clear_free_space_cache(info, clear_space_cache);
13601 * repair mode will force us to commit transaction which
13602 * will make us fail to load log tree when mounting.
13604 if (repair && btrfs_super_log_root(info->super_copy)) {
13605 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13611 ret = zero_log_tree(root);
13614 error("failed to zero log tree: %d", ret);
13619 if (qgroup_report) {
13620 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13622 ret = qgroup_verify_all(info);
13629 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13630 subvolid, argv[optind], uuidbuf);
13631 ret = print_extent_state(info, subvolid);
13636 if (init_extent_tree || init_csum_tree) {
13637 struct btrfs_trans_handle *trans;
13639 trans = btrfs_start_transaction(info->extent_root, 0);
13640 if (IS_ERR(trans)) {
13641 error("error starting transaction");
13642 ret = PTR_ERR(trans);
13647 if (init_extent_tree) {
13648 printf("Creating a new extent tree\n");
13649 ret = reinit_extent_tree(trans, info);
13655 if (init_csum_tree) {
13656 printf("Reinitialize checksum tree\n");
13657 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13659 error("checksum tree initialization failed: %d",
13666 ret = fill_csum_tree(trans, info->csum_root,
13670 error("checksum tree refilling failed: %d", ret);
13675 * Ok now we commit and run the normal fsck, which will add
13676 * extent entries for all of the items it finds.
13678 ret = btrfs_commit_transaction(trans, info->extent_root);
13683 if (!extent_buffer_uptodate(info->extent_root->node)) {
13684 error("critical: extent_root, unable to check the filesystem");
13689 if (!extent_buffer_uptodate(info->csum_root->node)) {
13690 error("critical: csum_root, unable to check the filesystem");
13696 ret = do_check_chunks_and_extents(info);
13700 "errors found in extent allocation tree or chunk allocation");
13702 ret = repair_root_items(info);
13705 error("failed to repair root items: %s", strerror(-ret));
13709 fprintf(stderr, "Fixed %d roots.\n", ret);
13711 } else if (ret > 0) {
13713 "Found %d roots with an outdated root item.\n",
13716 "Please run a filesystem check with the option --repair to fix them.\n");
13722 if (!ctx.progress_enabled) {
13723 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13724 fprintf(stderr, "checking free space tree\n");
13726 fprintf(stderr, "checking free space cache\n");
13728 ret = check_space_cache(root);
13731 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13732 error("errors found in free space tree");
13734 error("errors found in free space cache");
13739 * We used to have to have these hole extents in between our real
13740 * extents so if we don't have this flag set we need to make sure there
13741 * are no gaps in the file extents for inodes, otherwise we can just
13742 * ignore it when this happens.
13744 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13745 ret = do_check_fs_roots(info, &root_cache);
13748 error("errors found in fs roots");
13752 fprintf(stderr, "checking csums\n");
13753 ret = check_csums(root);
13756 error("errors found in csum tree");
13760 fprintf(stderr, "checking root refs\n");
13761 /* For low memory mode, check_fs_roots_v2 handles root refs */
13762 if (check_mode != CHECK_MODE_LOWMEM) {
13763 ret = check_root_refs(root, &root_cache);
13766 error("errors found in root refs");
13771 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13772 struct extent_buffer *eb;
13774 eb = list_first_entry(&root->fs_info->recow_ebs,
13775 struct extent_buffer, recow);
13776 list_del_init(&eb->recow);
13777 ret = recow_extent_buffer(root, eb);
13780 error("fails to fix transid errors");
13785 while (!list_empty(&delete_items)) {
13786 struct bad_item *bad;
13788 bad = list_first_entry(&delete_items, struct bad_item, list);
13789 list_del_init(&bad->list);
13791 ret = delete_bad_item(root, bad);
13797 if (info->quota_enabled) {
13798 fprintf(stderr, "checking quota groups\n");
13799 ret = qgroup_verify_all(info);
13802 error("failed to check quota groups");
13806 ret = repair_qgroups(info, &qgroups_repaired);
13809 error("failed to repair quota groups");
13815 if (!list_empty(&root->fs_info->recow_ebs)) {
13816 error("transid errors in file system");
13821 printf("found %llu bytes used, ",
13822 (unsigned long long)bytes_used);
13824 printf("error(s) found\n");
13826 printf("no error found\n");
13827 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13828 printf("total tree bytes: %llu\n",
13829 (unsigned long long)total_btree_bytes);
13830 printf("total fs tree bytes: %llu\n",
13831 (unsigned long long)total_fs_tree_bytes);
13832 printf("total extent tree bytes: %llu\n",
13833 (unsigned long long)total_extent_tree_bytes);
13834 printf("btree space waste bytes: %llu\n",
13835 (unsigned long long)btree_space_waste);
13836 printf("file data blocks allocated: %llu\n referenced %llu\n",
13837 (unsigned long long)data_bytes_allocated,
13838 (unsigned long long)data_bytes_referenced);
13840 free_qgroup_counts();
13841 free_root_recs_tree(&root_cache);
13845 if (ctx.progress_enabled)
13846 task_deinit(ctx.info);