2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
136 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 return container_of(back, struct data_backref, node);
141 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
144 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
145 struct data_backref *back1 = to_data_backref(ext1);
146 struct data_backref *back2 = to_data_backref(ext2);
148 WARN_ON(!ext1->is_data);
149 WARN_ON(!ext2->is_data);
151 /* parent and root are a union, so this covers both */
152 if (back1->parent > back2->parent)
154 if (back1->parent < back2->parent)
157 /* This is a full backref and the parents match. */
158 if (back1->node.full_backref)
161 if (back1->owner > back2->owner)
163 if (back1->owner < back2->owner)
166 if (back1->offset > back2->offset)
168 if (back1->offset < back2->offset)
171 if (back1->found_ref && back2->found_ref) {
172 if (back1->disk_bytenr > back2->disk_bytenr)
174 if (back1->disk_bytenr < back2->disk_bytenr)
177 if (back1->bytes > back2->bytes)
179 if (back1->bytes < back2->bytes)
187 * Much like data_backref, just removed the undetermined members
188 * and change it to use list_head.
189 * During extent scan, it is stored in root->orphan_data_extent.
190 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192 struct orphan_data_extent {
193 struct list_head list;
201 struct tree_backref {
202 struct extent_backref node;
209 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 return container_of(back, struct tree_backref, node);
214 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
217 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
218 struct tree_backref *back1 = to_tree_backref(ext1);
219 struct tree_backref *back2 = to_tree_backref(ext2);
221 WARN_ON(ext1->is_data);
222 WARN_ON(ext2->is_data);
224 /* parent and root are a union, so this covers both */
225 if (back1->parent > back2->parent)
227 if (back1->parent < back2->parent)
233 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
236 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238 if (ext1->is_data > ext2->is_data)
241 if (ext1->is_data < ext2->is_data)
244 if (ext1->full_backref > ext2->full_backref)
246 if (ext1->full_backref < ext2->full_backref)
250 return compare_data_backref(node1, node2);
252 return compare_tree_backref(node1, node2);
255 /* Explicit initialization for extent_record::flag_block_full_backref */
256 enum { FLAG_UNSET = 2 };
258 struct extent_record {
259 struct list_head backrefs;
260 struct list_head dups;
261 struct rb_root backref_tree;
262 struct list_head list;
263 struct cache_extent cache;
264 struct btrfs_disk_key parent_key;
269 u64 extent_item_refs;
271 u64 parent_generation;
275 unsigned int flag_block_full_backref:2;
276 unsigned int found_rec:1;
277 unsigned int content_checked:1;
278 unsigned int owner_ref_checked:1;
279 unsigned int is_root:1;
280 unsigned int metadata:1;
281 unsigned int bad_full_backref:1;
282 unsigned int crossing_stripes:1;
283 unsigned int wrong_chunk_type:1;
286 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 return container_of(entry, struct extent_record, list);
291 struct inode_backref {
292 struct list_head list;
293 unsigned int found_dir_item:1;
294 unsigned int found_dir_index:1;
295 unsigned int found_inode_ref:1;
305 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 return list_entry(entry, struct inode_backref, list);
310 struct root_item_record {
311 struct list_head list;
317 struct btrfs_key drop_key;
320 #define REF_ERR_NO_DIR_ITEM (1 << 0)
321 #define REF_ERR_NO_DIR_INDEX (1 << 1)
322 #define REF_ERR_NO_INODE_REF (1 << 2)
323 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
324 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
325 #define REF_ERR_DUP_INODE_REF (1 << 5)
326 #define REF_ERR_INDEX_UNMATCH (1 << 6)
327 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
328 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
329 #define REF_ERR_NO_ROOT_REF (1 << 9)
330 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
331 #define REF_ERR_DUP_ROOT_REF (1 << 11)
332 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
334 struct file_extent_hole {
340 struct inode_record {
341 struct list_head backrefs;
342 unsigned int checked:1;
343 unsigned int merging:1;
344 unsigned int found_inode_item:1;
345 unsigned int found_dir_item:1;
346 unsigned int found_file_extent:1;
347 unsigned int found_csum_item:1;
348 unsigned int some_csum_missing:1;
349 unsigned int nodatasum:1;
362 struct rb_root holes;
363 struct list_head orphan_extents;
368 #define I_ERR_NO_INODE_ITEM (1 << 0)
369 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
370 #define I_ERR_DUP_INODE_ITEM (1 << 2)
371 #define I_ERR_DUP_DIR_INDEX (1 << 3)
372 #define I_ERR_ODD_DIR_ITEM (1 << 4)
373 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
374 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
375 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
376 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
377 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
378 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
379 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
380 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
381 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
382 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
384 struct root_backref {
385 struct list_head list;
386 unsigned int found_dir_item:1;
387 unsigned int found_dir_index:1;
388 unsigned int found_back_ref:1;
389 unsigned int found_forward_ref:1;
390 unsigned int reachable:1;
399 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 return list_entry(entry, struct root_backref, list);
405 struct list_head backrefs;
406 struct cache_extent cache;
407 unsigned int found_root_item:1;
413 struct cache_extent cache;
418 struct cache_extent cache;
419 struct cache_tree root_cache;
420 struct cache_tree inode_cache;
421 struct inode_record *current;
430 struct walk_control {
431 struct cache_tree shared;
432 struct shared_node *nodes[BTRFS_MAX_LEVEL];
438 struct btrfs_key key;
440 struct list_head list;
443 struct extent_entry {
448 struct list_head list;
451 struct root_item_info {
452 /* level of the root */
454 /* number of nodes at this level, must be 1 for a root */
458 struct cache_extent cache_extent;
462 * Error bit for low memory mode check.
464 * Currently no caller cares about it yet. Just internal use for error
467 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
468 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
469 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
470 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
471 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
472 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
473 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
474 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
475 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
476 #define CHUNK_TYPE_MISMATCH (1 << 8)
478 static void *print_status_check(void *p)
480 struct task_ctx *priv = p;
481 const char work_indicator[] = { '.', 'o', 'O', 'o' };
483 static char *task_position_string[] = {
485 "checking free space cache",
489 task_period_start(priv->info, 1000 /* 1s */);
491 if (priv->tp == TASK_NOTHING)
495 printf("%s [%c]\r", task_position_string[priv->tp],
496 work_indicator[count % 4]);
499 task_period_wait(priv->info);
504 static int print_status_return(void *p)
512 static enum btrfs_check_mode parse_check_mode(const char *str)
514 if (strcmp(str, "lowmem") == 0)
515 return CHECK_MODE_LOWMEM;
516 if (strcmp(str, "orig") == 0)
517 return CHECK_MODE_ORIGINAL;
518 if (strcmp(str, "original") == 0)
519 return CHECK_MODE_ORIGINAL;
521 return CHECK_MODE_UNKNOWN;
524 /* Compatible function to allow reuse of old codes */
525 static u64 first_extent_gap(struct rb_root *holes)
527 struct file_extent_hole *hole;
529 if (RB_EMPTY_ROOT(holes))
532 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
536 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 struct file_extent_hole *hole1;
539 struct file_extent_hole *hole2;
541 hole1 = rb_entry(node1, struct file_extent_hole, node);
542 hole2 = rb_entry(node2, struct file_extent_hole, node);
544 if (hole1->start > hole2->start)
546 if (hole1->start < hole2->start)
548 /* Now hole1->start == hole2->start */
549 if (hole1->len >= hole2->len)
551 * Hole 1 will be merge center
552 * Same hole will be merged later
555 /* Hole 2 will be merge center */
560 * Add a hole to the record
562 * This will do hole merge for copy_file_extent_holes(),
563 * which will ensure there won't be continuous holes.
565 static int add_file_extent_hole(struct rb_root *holes,
568 struct file_extent_hole *hole;
569 struct file_extent_hole *prev = NULL;
570 struct file_extent_hole *next = NULL;
572 hole = malloc(sizeof(*hole));
577 /* Since compare will not return 0, no -EEXIST will happen */
578 rb_insert(holes, &hole->node, compare_hole);
580 /* simple merge with previous hole */
581 if (rb_prev(&hole->node))
582 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584 if (prev && prev->start + prev->len >= hole->start) {
585 hole->len = hole->start + hole->len - prev->start;
586 hole->start = prev->start;
587 rb_erase(&prev->node, holes);
592 /* iterate merge with next holes */
594 if (!rb_next(&hole->node))
596 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598 if (hole->start + hole->len >= next->start) {
599 if (hole->start + hole->len <= next->start + next->len)
600 hole->len = next->start + next->len -
602 rb_erase(&next->node, holes);
611 static int compare_hole_range(struct rb_node *node, void *data)
613 struct file_extent_hole *hole;
616 hole = (struct file_extent_hole *)data;
619 hole = rb_entry(node, struct file_extent_hole, node);
620 if (start < hole->start)
622 if (start >= hole->start && start < hole->start + hole->len)
628 * Delete a hole in the record
630 * This will do the hole split and is much restrict than add.
632 static int del_file_extent_hole(struct rb_root *holes,
635 struct file_extent_hole *hole;
636 struct file_extent_hole tmp;
641 struct rb_node *node;
648 node = rb_search(holes, &tmp, compare_hole_range, NULL);
651 hole = rb_entry(node, struct file_extent_hole, node);
652 if (start + len > hole->start + hole->len)
656 * Now there will be no overlap, delete the hole and re-add the
657 * split(s) if they exists.
659 if (start > hole->start) {
660 prev_start = hole->start;
661 prev_len = start - hole->start;
664 if (hole->start + hole->len > start + len) {
665 next_start = start + len;
666 next_len = hole->start + hole->len - start - len;
669 rb_erase(node, holes);
672 ret = add_file_extent_hole(holes, prev_start, prev_len);
677 ret = add_file_extent_hole(holes, next_start, next_len);
684 static int copy_file_extent_holes(struct rb_root *dst,
687 struct file_extent_hole *hole;
688 struct rb_node *node;
691 node = rb_first(src);
693 hole = rb_entry(node, struct file_extent_hole, node);
694 ret = add_file_extent_hole(dst, hole->start, hole->len);
697 node = rb_next(node);
702 static void free_file_extent_holes(struct rb_root *holes)
704 struct rb_node *node;
705 struct file_extent_hole *hole;
707 node = rb_first(holes);
709 hole = rb_entry(node, struct file_extent_hole, node);
710 rb_erase(node, holes);
712 node = rb_first(holes);
716 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718 static void record_root_in_trans(struct btrfs_trans_handle *trans,
719 struct btrfs_root *root)
721 if (root->last_trans != trans->transid) {
722 root->track_dirty = 1;
723 root->last_trans = trans->transid;
724 root->commit_root = root->node;
725 extent_buffer_get(root->node);
729 static u8 imode_to_type(u32 imode)
732 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
733 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
734 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
735 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
736 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
737 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
738 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
739 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
742 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
746 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 struct device_record *rec1;
749 struct device_record *rec2;
751 rec1 = rb_entry(node1, struct device_record, node);
752 rec2 = rb_entry(node2, struct device_record, node);
753 if (rec1->devid > rec2->devid)
755 else if (rec1->devid < rec2->devid)
761 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 struct inode_record *rec;
764 struct inode_backref *backref;
765 struct inode_backref *orig;
766 struct inode_backref *tmp;
767 struct orphan_data_extent *src_orphan;
768 struct orphan_data_extent *dst_orphan;
773 rec = malloc(sizeof(*rec));
775 return ERR_PTR(-ENOMEM);
776 memcpy(rec, orig_rec, sizeof(*rec));
778 INIT_LIST_HEAD(&rec->backrefs);
779 INIT_LIST_HEAD(&rec->orphan_extents);
780 rec->holes = RB_ROOT;
782 list_for_each_entry(orig, &orig_rec->backrefs, list) {
783 size = sizeof(*orig) + orig->namelen + 1;
784 backref = malloc(size);
789 memcpy(backref, orig, size);
790 list_add_tail(&backref->list, &rec->backrefs);
792 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
793 dst_orphan = malloc(sizeof(*dst_orphan));
798 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
799 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
808 rb = rb_first(&rec->holes);
810 struct file_extent_hole *hole;
812 hole = rb_entry(rb, struct file_extent_hole, node);
818 if (!list_empty(&rec->backrefs))
819 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
820 list_del(&orig->list);
824 if (!list_empty(&rec->orphan_extents))
825 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
826 list_del(&orig->list);
835 static void print_orphan_data_extents(struct list_head *orphan_extents,
838 struct orphan_data_extent *orphan;
840 if (list_empty(orphan_extents))
842 printf("The following data extent is lost in tree %llu:\n",
844 list_for_each_entry(orphan, orphan_extents, list) {
845 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
846 orphan->objectid, orphan->offset, orphan->disk_bytenr,
851 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 u64 root_objectid = root->root_key.objectid;
854 int errors = rec->errors;
858 /* reloc root errors, we print its corresponding fs root objectid*/
859 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
860 root_objectid = root->root_key.offset;
861 fprintf(stderr, "reloc");
863 fprintf(stderr, "root %llu inode %llu errors %x",
864 (unsigned long long) root_objectid,
865 (unsigned long long) rec->ino, rec->errors);
867 if (errors & I_ERR_NO_INODE_ITEM)
868 fprintf(stderr, ", no inode item");
869 if (errors & I_ERR_NO_ORPHAN_ITEM)
870 fprintf(stderr, ", no orphan item");
871 if (errors & I_ERR_DUP_INODE_ITEM)
872 fprintf(stderr, ", dup inode item");
873 if (errors & I_ERR_DUP_DIR_INDEX)
874 fprintf(stderr, ", dup dir index");
875 if (errors & I_ERR_ODD_DIR_ITEM)
876 fprintf(stderr, ", odd dir item");
877 if (errors & I_ERR_ODD_FILE_EXTENT)
878 fprintf(stderr, ", odd file extent");
879 if (errors & I_ERR_BAD_FILE_EXTENT)
880 fprintf(stderr, ", bad file extent");
881 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
882 fprintf(stderr, ", file extent overlap");
883 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
884 fprintf(stderr, ", file extent discount");
885 if (errors & I_ERR_DIR_ISIZE_WRONG)
886 fprintf(stderr, ", dir isize wrong");
887 if (errors & I_ERR_FILE_NBYTES_WRONG)
888 fprintf(stderr, ", nbytes wrong");
889 if (errors & I_ERR_ODD_CSUM_ITEM)
890 fprintf(stderr, ", odd csum item");
891 if (errors & I_ERR_SOME_CSUM_MISSING)
892 fprintf(stderr, ", some csum missing");
893 if (errors & I_ERR_LINK_COUNT_WRONG)
894 fprintf(stderr, ", link count wrong");
895 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
896 fprintf(stderr, ", orphan file extent");
897 fprintf(stderr, "\n");
898 /* Print the orphan extents if needed */
899 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902 /* Print the holes if needed */
903 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
904 struct file_extent_hole *hole;
905 struct rb_node *node;
908 node = rb_first(&rec->holes);
909 fprintf(stderr, "Found file extent holes:\n");
912 hole = rb_entry(node, struct file_extent_hole, node);
913 fprintf(stderr, "\tstart: %llu, len: %llu\n",
914 hole->start, hole->len);
915 node = rb_next(node);
918 fprintf(stderr, "\tstart: 0, len: %llu\n",
920 root->fs_info->sectorsize));
924 static void print_ref_error(int errors)
926 if (errors & REF_ERR_NO_DIR_ITEM)
927 fprintf(stderr, ", no dir item");
928 if (errors & REF_ERR_NO_DIR_INDEX)
929 fprintf(stderr, ", no dir index");
930 if (errors & REF_ERR_NO_INODE_REF)
931 fprintf(stderr, ", no inode ref");
932 if (errors & REF_ERR_DUP_DIR_ITEM)
933 fprintf(stderr, ", dup dir item");
934 if (errors & REF_ERR_DUP_DIR_INDEX)
935 fprintf(stderr, ", dup dir index");
936 if (errors & REF_ERR_DUP_INODE_REF)
937 fprintf(stderr, ", dup inode ref");
938 if (errors & REF_ERR_INDEX_UNMATCH)
939 fprintf(stderr, ", index mismatch");
940 if (errors & REF_ERR_FILETYPE_UNMATCH)
941 fprintf(stderr, ", filetype mismatch");
942 if (errors & REF_ERR_NAME_TOO_LONG)
943 fprintf(stderr, ", name too long");
944 if (errors & REF_ERR_NO_ROOT_REF)
945 fprintf(stderr, ", no root ref");
946 if (errors & REF_ERR_NO_ROOT_BACKREF)
947 fprintf(stderr, ", no root backref");
948 if (errors & REF_ERR_DUP_ROOT_REF)
949 fprintf(stderr, ", dup root ref");
950 if (errors & REF_ERR_DUP_ROOT_BACKREF)
951 fprintf(stderr, ", dup root backref");
952 fprintf(stderr, "\n");
955 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958 struct ptr_node *node;
959 struct cache_extent *cache;
960 struct inode_record *rec = NULL;
963 cache = lookup_cache_extent(inode_cache, ino, 1);
965 node = container_of(cache, struct ptr_node, cache);
967 if (mod && rec->refs > 1) {
968 node->data = clone_inode_rec(rec);
969 if (IS_ERR(node->data))
975 rec = calloc(1, sizeof(*rec));
977 return ERR_PTR(-ENOMEM);
979 rec->extent_start = (u64)-1;
981 INIT_LIST_HEAD(&rec->backrefs);
982 INIT_LIST_HEAD(&rec->orphan_extents);
983 rec->holes = RB_ROOT;
985 node = malloc(sizeof(*node));
988 return ERR_PTR(-ENOMEM);
990 node->cache.start = ino;
991 node->cache.size = 1;
994 if (ino == BTRFS_FREE_INO_OBJECTID)
997 ret = insert_cache_extent(inode_cache, &node->cache);
999 return ERR_PTR(-EEXIST);
1004 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 struct orphan_data_extent *orphan;
1008 while (!list_empty(orphan_extents)) {
1009 orphan = list_entry(orphan_extents->next,
1010 struct orphan_data_extent, list);
1011 list_del(&orphan->list);
1016 static void free_inode_rec(struct inode_record *rec)
1018 struct inode_backref *backref;
1020 if (--rec->refs > 0)
1023 while (!list_empty(&rec->backrefs)) {
1024 backref = to_inode_backref(rec->backrefs.next);
1025 list_del(&backref->list);
1028 free_orphan_data_extents(&rec->orphan_extents);
1029 free_file_extent_holes(&rec->holes);
1033 static int can_free_inode_rec(struct inode_record *rec)
1035 if (!rec->errors && rec->checked && rec->found_inode_item &&
1036 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1041 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1042 struct inode_record *rec)
1044 struct cache_extent *cache;
1045 struct inode_backref *tmp, *backref;
1046 struct ptr_node *node;
1049 if (!rec->found_inode_item)
1052 filetype = imode_to_type(rec->imode);
1053 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1054 if (backref->found_dir_item && backref->found_dir_index) {
1055 if (backref->filetype != filetype)
1056 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1057 if (!backref->errors && backref->found_inode_ref &&
1058 rec->nlink == rec->found_link) {
1059 list_del(&backref->list);
1065 if (!rec->checked || rec->merging)
1068 if (S_ISDIR(rec->imode)) {
1069 if (rec->found_size != rec->isize)
1070 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1071 if (rec->found_file_extent)
1072 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1073 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1074 if (rec->found_dir_item)
1075 rec->errors |= I_ERR_ODD_DIR_ITEM;
1076 if (rec->found_size != rec->nbytes)
1077 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1078 if (rec->nlink > 0 && !no_holes &&
1079 (rec->extent_end < rec->isize ||
1080 first_extent_gap(&rec->holes) < rec->isize))
1081 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1084 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1085 if (rec->found_csum_item && rec->nodatasum)
1086 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1087 if (rec->some_csum_missing && !rec->nodatasum)
1088 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1091 BUG_ON(rec->refs != 1);
1092 if (can_free_inode_rec(rec)) {
1093 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1094 node = container_of(cache, struct ptr_node, cache);
1095 BUG_ON(node->data != rec);
1096 remove_cache_extent(inode_cache, &node->cache);
1098 free_inode_rec(rec);
1102 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 struct btrfs_path path;
1105 struct btrfs_key key;
1108 key.objectid = BTRFS_ORPHAN_OBJECTID;
1109 key.type = BTRFS_ORPHAN_ITEM_KEY;
1112 btrfs_init_path(&path);
1113 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1114 btrfs_release_path(&path);
1120 static int process_inode_item(struct extent_buffer *eb,
1121 int slot, struct btrfs_key *key,
1122 struct shared_node *active_node)
1124 struct inode_record *rec;
1125 struct btrfs_inode_item *item;
1127 rec = active_node->current;
1128 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1129 if (rec->found_inode_item) {
1130 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1134 rec->nlink = btrfs_inode_nlink(eb, item);
1135 rec->isize = btrfs_inode_size(eb, item);
1136 rec->nbytes = btrfs_inode_nbytes(eb, item);
1137 rec->imode = btrfs_inode_mode(eb, item);
1138 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140 rec->found_inode_item = 1;
1141 if (rec->nlink == 0)
1142 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1143 maybe_free_inode_rec(&active_node->inode_cache, rec);
1147 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149 int namelen, u64 dir)
1151 struct inode_backref *backref;
1153 list_for_each_entry(backref, &rec->backrefs, list) {
1154 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156 if (backref->dir != dir || backref->namelen != namelen)
1158 if (memcmp(name, backref->name, namelen))
1163 backref = malloc(sizeof(*backref) + namelen + 1);
1166 memset(backref, 0, sizeof(*backref));
1168 backref->namelen = namelen;
1169 memcpy(backref->name, name, namelen);
1170 backref->name[namelen] = '\0';
1171 list_add_tail(&backref->list, &rec->backrefs);
1175 static int add_inode_backref(struct cache_tree *inode_cache,
1176 u64 ino, u64 dir, u64 index,
1177 const char *name, int namelen,
1178 u8 filetype, u8 itemtype, int errors)
1180 struct inode_record *rec;
1181 struct inode_backref *backref;
1183 rec = get_inode_rec(inode_cache, ino, 1);
1184 BUG_ON(IS_ERR(rec));
1185 backref = get_inode_backref(rec, name, namelen, dir);
1188 backref->errors |= errors;
1189 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1190 if (backref->found_dir_index)
1191 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1192 if (backref->found_inode_ref && backref->index != index)
1193 backref->errors |= REF_ERR_INDEX_UNMATCH;
1194 if (backref->found_dir_item && backref->filetype != filetype)
1195 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197 backref->index = index;
1198 backref->filetype = filetype;
1199 backref->found_dir_index = 1;
1200 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202 if (backref->found_dir_item)
1203 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1204 if (backref->found_dir_index && backref->filetype != filetype)
1205 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207 backref->filetype = filetype;
1208 backref->found_dir_item = 1;
1209 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1210 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1211 if (backref->found_inode_ref)
1212 backref->errors |= REF_ERR_DUP_INODE_REF;
1213 if (backref->found_dir_index && backref->index != index)
1214 backref->errors |= REF_ERR_INDEX_UNMATCH;
1216 backref->index = index;
1218 backref->ref_type = itemtype;
1219 backref->found_inode_ref = 1;
1224 maybe_free_inode_rec(inode_cache, rec);
1228 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1229 struct cache_tree *dst_cache)
1231 struct inode_backref *backref;
1236 list_for_each_entry(backref, &src->backrefs, list) {
1237 if (backref->found_dir_index) {
1238 add_inode_backref(dst_cache, dst->ino, backref->dir,
1239 backref->index, backref->name,
1240 backref->namelen, backref->filetype,
1241 BTRFS_DIR_INDEX_KEY, backref->errors);
1243 if (backref->found_dir_item) {
1245 add_inode_backref(dst_cache, dst->ino,
1246 backref->dir, 0, backref->name,
1247 backref->namelen, backref->filetype,
1248 BTRFS_DIR_ITEM_KEY, backref->errors);
1250 if (backref->found_inode_ref) {
1251 add_inode_backref(dst_cache, dst->ino,
1252 backref->dir, backref->index,
1253 backref->name, backref->namelen, 0,
1254 backref->ref_type, backref->errors);
1258 if (src->found_dir_item)
1259 dst->found_dir_item = 1;
1260 if (src->found_file_extent)
1261 dst->found_file_extent = 1;
1262 if (src->found_csum_item)
1263 dst->found_csum_item = 1;
1264 if (src->some_csum_missing)
1265 dst->some_csum_missing = 1;
1266 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1267 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1272 BUG_ON(src->found_link < dir_count);
1273 dst->found_link += src->found_link - dir_count;
1274 dst->found_size += src->found_size;
1275 if (src->extent_start != (u64)-1) {
1276 if (dst->extent_start == (u64)-1) {
1277 dst->extent_start = src->extent_start;
1278 dst->extent_end = src->extent_end;
1280 if (dst->extent_end > src->extent_start)
1281 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1282 else if (dst->extent_end < src->extent_start) {
1283 ret = add_file_extent_hole(&dst->holes,
1285 src->extent_start - dst->extent_end);
1287 if (dst->extent_end < src->extent_end)
1288 dst->extent_end = src->extent_end;
1292 dst->errors |= src->errors;
1293 if (src->found_inode_item) {
1294 if (!dst->found_inode_item) {
1295 dst->nlink = src->nlink;
1296 dst->isize = src->isize;
1297 dst->nbytes = src->nbytes;
1298 dst->imode = src->imode;
1299 dst->nodatasum = src->nodatasum;
1300 dst->found_inode_item = 1;
1302 dst->errors |= I_ERR_DUP_INODE_ITEM;
1310 static int splice_shared_node(struct shared_node *src_node,
1311 struct shared_node *dst_node)
1313 struct cache_extent *cache;
1314 struct ptr_node *node, *ins;
1315 struct cache_tree *src, *dst;
1316 struct inode_record *rec, *conflict;
1317 u64 current_ino = 0;
1321 if (--src_node->refs == 0)
1323 if (src_node->current)
1324 current_ino = src_node->current->ino;
1326 src = &src_node->root_cache;
1327 dst = &dst_node->root_cache;
1329 cache = search_cache_extent(src, 0);
1331 node = container_of(cache, struct ptr_node, cache);
1333 cache = next_cache_extent(cache);
1336 remove_cache_extent(src, &node->cache);
1339 ins = malloc(sizeof(*ins));
1341 ins->cache.start = node->cache.start;
1342 ins->cache.size = node->cache.size;
1346 ret = insert_cache_extent(dst, &ins->cache);
1347 if (ret == -EEXIST) {
1348 conflict = get_inode_rec(dst, rec->ino, 1);
1349 BUG_ON(IS_ERR(conflict));
1350 merge_inode_recs(rec, conflict, dst);
1352 conflict->checked = 1;
1353 if (dst_node->current == conflict)
1354 dst_node->current = NULL;
1356 maybe_free_inode_rec(dst, conflict);
1357 free_inode_rec(rec);
1364 if (src == &src_node->root_cache) {
1365 src = &src_node->inode_cache;
1366 dst = &dst_node->inode_cache;
1370 if (current_ino > 0 && (!dst_node->current ||
1371 current_ino > dst_node->current->ino)) {
1372 if (dst_node->current) {
1373 dst_node->current->checked = 1;
1374 maybe_free_inode_rec(dst, dst_node->current);
1376 dst_node->current = get_inode_rec(dst, current_ino, 1);
1377 BUG_ON(IS_ERR(dst_node->current));
1382 static void free_inode_ptr(struct cache_extent *cache)
1384 struct ptr_node *node;
1385 struct inode_record *rec;
1387 node = container_of(cache, struct ptr_node, cache);
1389 free_inode_rec(rec);
1393 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398 struct cache_extent *cache;
1399 struct shared_node *node;
1401 cache = lookup_cache_extent(shared, bytenr, 1);
1403 node = container_of(cache, struct shared_node, cache);
1409 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1412 struct shared_node *node;
1414 node = calloc(1, sizeof(*node));
1417 node->cache.start = bytenr;
1418 node->cache.size = 1;
1419 cache_tree_init(&node->root_cache);
1420 cache_tree_init(&node->inode_cache);
1423 ret = insert_cache_extent(shared, &node->cache);
1428 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1429 struct walk_control *wc, int level)
1431 struct shared_node *node;
1432 struct shared_node *dest;
1435 if (level == wc->active_node)
1438 BUG_ON(wc->active_node <= level);
1439 node = find_shared_node(&wc->shared, bytenr);
1441 ret = add_shared_node(&wc->shared, bytenr, refs);
1443 node = find_shared_node(&wc->shared, bytenr);
1444 wc->nodes[level] = node;
1445 wc->active_node = level;
1449 if (wc->root_level == wc->active_node &&
1450 btrfs_root_refs(&root->root_item) == 0) {
1451 if (--node->refs == 0) {
1452 free_inode_recs_tree(&node->root_cache);
1453 free_inode_recs_tree(&node->inode_cache);
1454 remove_cache_extent(&wc->shared, &node->cache);
1460 dest = wc->nodes[wc->active_node];
1461 splice_shared_node(node, dest);
1462 if (node->refs == 0) {
1463 remove_cache_extent(&wc->shared, &node->cache);
1469 static int leave_shared_node(struct btrfs_root *root,
1470 struct walk_control *wc, int level)
1472 struct shared_node *node;
1473 struct shared_node *dest;
1476 if (level == wc->root_level)
1479 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1483 BUG_ON(i >= BTRFS_MAX_LEVEL);
1485 node = wc->nodes[wc->active_node];
1486 wc->nodes[wc->active_node] = NULL;
1487 wc->active_node = i;
1489 dest = wc->nodes[wc->active_node];
1490 if (wc->active_node < wc->root_level ||
1491 btrfs_root_refs(&root->root_item) > 0) {
1492 BUG_ON(node->refs <= 1);
1493 splice_shared_node(node, dest);
1495 BUG_ON(node->refs < 2);
1504 * 1 - if the root with id child_root_id is a child of root parent_root_id
1505 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1506 * has other root(s) as parent(s)
1507 * 2 - if the root child_root_id doesn't have any parent roots
1509 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512 struct btrfs_path path;
1513 struct btrfs_key key;
1514 struct extent_buffer *leaf;
1518 btrfs_init_path(&path);
1520 key.objectid = parent_root_id;
1521 key.type = BTRFS_ROOT_REF_KEY;
1522 key.offset = child_root_id;
1523 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1527 btrfs_release_path(&path);
1531 key.objectid = child_root_id;
1532 key.type = BTRFS_ROOT_BACKREF_KEY;
1534 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1540 leaf = path.nodes[0];
1541 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1542 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545 leaf = path.nodes[0];
1548 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1549 if (key.objectid != child_root_id ||
1550 key.type != BTRFS_ROOT_BACKREF_KEY)
1555 if (key.offset == parent_root_id) {
1556 btrfs_release_path(&path);
1563 btrfs_release_path(&path);
1566 return has_parent ? 0 : 2;
1569 static int process_dir_item(struct extent_buffer *eb,
1570 int slot, struct btrfs_key *key,
1571 struct shared_node *active_node)
1581 struct btrfs_dir_item *di;
1582 struct inode_record *rec;
1583 struct cache_tree *root_cache;
1584 struct cache_tree *inode_cache;
1585 struct btrfs_key location;
1586 char namebuf[BTRFS_NAME_LEN];
1588 root_cache = &active_node->root_cache;
1589 inode_cache = &active_node->inode_cache;
1590 rec = active_node->current;
1591 rec->found_dir_item = 1;
1593 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1594 total = btrfs_item_size_nr(eb, slot);
1595 while (cur < total) {
1597 btrfs_dir_item_key_to_cpu(eb, di, &location);
1598 name_len = btrfs_dir_name_len(eb, di);
1599 data_len = btrfs_dir_data_len(eb, di);
1600 filetype = btrfs_dir_type(eb, di);
1602 rec->found_size += name_len;
1603 if (cur + sizeof(*di) + name_len > total ||
1604 name_len > BTRFS_NAME_LEN) {
1605 error = REF_ERR_NAME_TOO_LONG;
1607 if (cur + sizeof(*di) > total)
1609 len = min_t(u32, total - cur - sizeof(*di),
1616 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618 if (key->type == BTRFS_DIR_ITEM_KEY &&
1619 key->offset != btrfs_name_hash(namebuf, len)) {
1620 rec->errors |= I_ERR_ODD_DIR_ITEM;
1621 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1622 key->objectid, key->offset, namebuf, len, filetype,
1623 key->offset, btrfs_name_hash(namebuf, len));
1626 if (location.type == BTRFS_INODE_ITEM_KEY) {
1627 add_inode_backref(inode_cache, location.objectid,
1628 key->objectid, key->offset, namebuf,
1629 len, filetype, key->type, error);
1630 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1631 add_inode_backref(root_cache, location.objectid,
1632 key->objectid, key->offset,
1633 namebuf, len, filetype,
1636 fprintf(stderr, "invalid location in dir item %u\n",
1638 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1639 key->objectid, key->offset, namebuf,
1640 len, filetype, key->type, error);
1643 len = sizeof(*di) + name_len + data_len;
1644 di = (struct btrfs_dir_item *)((char *)di + len);
1647 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1648 rec->errors |= I_ERR_DUP_DIR_INDEX;
1653 static int process_inode_ref(struct extent_buffer *eb,
1654 int slot, struct btrfs_key *key,
1655 struct shared_node *active_node)
1663 struct cache_tree *inode_cache;
1664 struct btrfs_inode_ref *ref;
1665 char namebuf[BTRFS_NAME_LEN];
1667 inode_cache = &active_node->inode_cache;
1669 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1670 total = btrfs_item_size_nr(eb, slot);
1671 while (cur < total) {
1672 name_len = btrfs_inode_ref_name_len(eb, ref);
1673 index = btrfs_inode_ref_index(eb, ref);
1675 /* inode_ref + namelen should not cross item boundary */
1676 if (cur + sizeof(*ref) + name_len > total ||
1677 name_len > BTRFS_NAME_LEN) {
1678 if (total < cur + sizeof(*ref))
1681 /* Still try to read out the remaining part */
1682 len = min_t(u32, total - cur - sizeof(*ref),
1684 error = REF_ERR_NAME_TOO_LONG;
1690 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1691 add_inode_backref(inode_cache, key->objectid, key->offset,
1692 index, namebuf, len, 0, key->type, error);
1694 len = sizeof(*ref) + name_len;
1695 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1701 static int process_inode_extref(struct extent_buffer *eb,
1702 int slot, struct btrfs_key *key,
1703 struct shared_node *active_node)
1712 struct cache_tree *inode_cache;
1713 struct btrfs_inode_extref *extref;
1714 char namebuf[BTRFS_NAME_LEN];
1716 inode_cache = &active_node->inode_cache;
1718 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1719 total = btrfs_item_size_nr(eb, slot);
1720 while (cur < total) {
1721 name_len = btrfs_inode_extref_name_len(eb, extref);
1722 index = btrfs_inode_extref_index(eb, extref);
1723 parent = btrfs_inode_extref_parent(eb, extref);
1724 if (name_len <= BTRFS_NAME_LEN) {
1728 len = BTRFS_NAME_LEN;
1729 error = REF_ERR_NAME_TOO_LONG;
1731 read_extent_buffer(eb, namebuf,
1732 (unsigned long)(extref + 1), len);
1733 add_inode_backref(inode_cache, key->objectid, parent,
1734 index, namebuf, len, 0, key->type, error);
1736 len = sizeof(*extref) + name_len;
1737 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1744 static int count_csum_range(struct btrfs_root *root, u64 start,
1745 u64 len, u64 *found)
1747 struct btrfs_key key;
1748 struct btrfs_path path;
1749 struct extent_buffer *leaf;
1754 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756 btrfs_init_path(&path);
1758 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760 key.type = BTRFS_EXTENT_CSUM_KEY;
1762 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1766 if (ret > 0 && path.slots[0] > 0) {
1767 leaf = path.nodes[0];
1768 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1769 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1770 key.type == BTRFS_EXTENT_CSUM_KEY)
1775 leaf = path.nodes[0];
1776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1777 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1782 leaf = path.nodes[0];
1785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1786 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1787 key.type != BTRFS_EXTENT_CSUM_KEY)
1790 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1791 if (key.offset >= start + len)
1794 if (key.offset > start)
1797 size = btrfs_item_size_nr(leaf, path.slots[0]);
1798 csum_end = key.offset + (size / csum_size) *
1799 root->fs_info->sectorsize;
1800 if (csum_end > start) {
1801 size = min(csum_end - start, len);
1810 btrfs_release_path(&path);
1816 static int process_file_extent(struct btrfs_root *root,
1817 struct extent_buffer *eb,
1818 int slot, struct btrfs_key *key,
1819 struct shared_node *active_node)
1821 struct inode_record *rec;
1822 struct btrfs_file_extent_item *fi;
1824 u64 disk_bytenr = 0;
1825 u64 extent_offset = 0;
1826 u64 mask = root->fs_info->sectorsize - 1;
1830 rec = active_node->current;
1831 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1832 rec->found_file_extent = 1;
1834 if (rec->extent_start == (u64)-1) {
1835 rec->extent_start = key->offset;
1836 rec->extent_end = key->offset;
1839 if (rec->extent_end > key->offset)
1840 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1841 else if (rec->extent_end < key->offset) {
1842 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1843 key->offset - rec->extent_end);
1848 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1849 extent_type = btrfs_file_extent_type(eb, fi);
1851 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1852 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1855 rec->found_size += num_bytes;
1856 num_bytes = (num_bytes + mask) & ~mask;
1857 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1858 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1859 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1860 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1861 extent_offset = btrfs_file_extent_offset(eb, fi);
1862 if (num_bytes == 0 || (num_bytes & mask))
1863 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1864 if (num_bytes + extent_offset >
1865 btrfs_file_extent_ram_bytes(eb, fi))
1866 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1868 (btrfs_file_extent_compression(eb, fi) ||
1869 btrfs_file_extent_encryption(eb, fi) ||
1870 btrfs_file_extent_other_encoding(eb, fi)))
1871 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1872 if (disk_bytenr > 0)
1873 rec->found_size += num_bytes;
1875 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877 rec->extent_end = key->offset + num_bytes;
1880 * The data reloc tree will copy full extents into its inode and then
1881 * copy the corresponding csums. Because the extent it copied could be
1882 * a preallocated extent that hasn't been written to yet there may be no
1883 * csums to copy, ergo we won't have csums for our file extent. This is
1884 * ok so just don't bother checking csums if the inode belongs to the
1887 if (disk_bytenr > 0 &&
1888 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890 if (btrfs_file_extent_compression(eb, fi))
1891 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893 disk_bytenr += extent_offset;
1895 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1898 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900 rec->found_csum_item = 1;
1901 if (found < num_bytes)
1902 rec->some_csum_missing = 1;
1903 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1911 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1912 struct walk_control *wc)
1914 struct btrfs_key key;
1918 struct cache_tree *inode_cache;
1919 struct shared_node *active_node;
1921 if (wc->root_level == wc->active_node &&
1922 btrfs_root_refs(&root->root_item) == 0)
1925 active_node = wc->nodes[wc->active_node];
1926 inode_cache = &active_node->inode_cache;
1927 nritems = btrfs_header_nritems(eb);
1928 for (i = 0; i < nritems; i++) {
1929 btrfs_item_key_to_cpu(eb, &key, i);
1931 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1936 if (active_node->current == NULL ||
1937 active_node->current->ino < key.objectid) {
1938 if (active_node->current) {
1939 active_node->current->checked = 1;
1940 maybe_free_inode_rec(inode_cache,
1941 active_node->current);
1943 active_node->current = get_inode_rec(inode_cache,
1945 BUG_ON(IS_ERR(active_node->current));
1948 case BTRFS_DIR_ITEM_KEY:
1949 case BTRFS_DIR_INDEX_KEY:
1950 ret = process_dir_item(eb, i, &key, active_node);
1952 case BTRFS_INODE_REF_KEY:
1953 ret = process_inode_ref(eb, i, &key, active_node);
1955 case BTRFS_INODE_EXTREF_KEY:
1956 ret = process_inode_extref(eb, i, &key, active_node);
1958 case BTRFS_INODE_ITEM_KEY:
1959 ret = process_inode_item(eb, i, &key, active_node);
1961 case BTRFS_EXTENT_DATA_KEY:
1962 ret = process_file_extent(root, eb, i, &key,
1973 u64 bytenr[BTRFS_MAX_LEVEL];
1974 u64 refs[BTRFS_MAX_LEVEL];
1975 int need_check[BTRFS_MAX_LEVEL];
1978 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1979 struct node_refs *nrefs, u64 level);
1980 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1981 unsigned int ext_ref);
1984 * Returns >0 Found error, not fatal, should continue
1985 * Returns <0 Fatal error, must exit the whole check
1986 * Returns 0 No errors found
1988 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1989 struct node_refs *nrefs, int *level, int ext_ref)
1991 struct extent_buffer *cur = path->nodes[0];
1992 struct btrfs_key key;
1996 int root_level = btrfs_header_level(root->node);
1998 int ret = 0; /* Final return value */
1999 int err = 0; /* Positive error bitmap */
2001 cur_bytenr = cur->start;
2003 /* skip to first inode item or the first inode number change */
2004 nritems = btrfs_header_nritems(cur);
2005 for (i = 0; i < nritems; i++) {
2006 btrfs_item_key_to_cpu(cur, &key, i);
2008 first_ino = key.objectid;
2009 if (key.type == BTRFS_INODE_ITEM_KEY ||
2010 (first_ino && first_ino != key.objectid))
2014 path->slots[0] = nritems;
2020 err |= check_inode_item(root, path, ext_ref);
2022 /* modify cur since check_inode_item may change path */
2023 cur = path->nodes[0];
2025 if (err & LAST_ITEM)
2028 /* still have inode items in thie leaf */
2029 if (cur->start == cur_bytenr)
2033 * we have switched to another leaf, above nodes may
2034 * have changed, here walk down the path, if a node
2035 * or leaf is shared, check whether we can skip this
2038 for (i = root_level; i >= 0; i--) {
2039 if (path->nodes[i]->start == nrefs->bytenr[i])
2042 ret = update_nodes_refs(root,
2043 path->nodes[i]->start,
2048 if (!nrefs->need_check[i]) {
2054 for (i = 0; i < *level; i++) {
2055 free_extent_buffer(path->nodes[i]);
2056 path->nodes[i] = NULL;
2065 static void reada_walk_down(struct btrfs_root *root,
2066 struct extent_buffer *node, int slot)
2068 struct btrfs_fs_info *fs_info = root->fs_info;
2075 level = btrfs_header_level(node);
2079 nritems = btrfs_header_nritems(node);
2080 for (i = slot; i < nritems; i++) {
2081 bytenr = btrfs_node_blockptr(node, i);
2082 ptr_gen = btrfs_node_ptr_generation(node, i);
2083 readahead_tree_block(fs_info, bytenr, ptr_gen);
2088 * Check the child node/leaf by the following condition:
2089 * 1. the first item key of the node/leaf should be the same with the one
2091 * 2. block in parent node should match the child node/leaf.
2092 * 3. generation of parent node and child's header should be consistent.
2094 * Or the child node/leaf pointed by the key in parent is not valid.
2096 * We hope to check leaf owner too, but since subvol may share leaves,
2097 * which makes leaf owner check not so strong, key check should be
2098 * sufficient enough for that case.
2100 static int check_child_node(struct extent_buffer *parent, int slot,
2101 struct extent_buffer *child)
2103 struct btrfs_key parent_key;
2104 struct btrfs_key child_key;
2107 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2108 if (btrfs_header_level(child) == 0)
2109 btrfs_item_key_to_cpu(child, &child_key, 0);
2111 btrfs_node_key_to_cpu(child, &child_key, 0);
2113 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2116 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2117 parent_key.objectid, parent_key.type, parent_key.offset,
2118 child_key.objectid, child_key.type, child_key.offset);
2120 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2122 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2123 btrfs_node_blockptr(parent, slot),
2124 btrfs_header_bytenr(child));
2126 if (btrfs_node_ptr_generation(parent, slot) !=
2127 btrfs_header_generation(child)) {
2129 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2130 btrfs_header_generation(child),
2131 btrfs_node_ptr_generation(parent, slot));
2137 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2138 * in every fs or file tree check. Here we find its all root ids, and only check
2139 * it in the fs or file tree which has the smallest root id.
2141 static int need_check(struct btrfs_root *root, struct ulist *roots)
2143 struct rb_node *node;
2144 struct ulist_node *u;
2146 if (roots->nnodes == 1)
2149 node = rb_first(&roots->root);
2150 u = rb_entry(node, struct ulist_node, rb_node);
2152 * current root id is not smallest, we skip it and let it be checked
2153 * in the fs or file tree who hash the smallest root id.
2155 if (root->objectid != u->val)
2162 * for a tree node or leaf, we record its reference count, so later if we still
2163 * process this node or leaf, don't need to compute its reference count again.
2165 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2166 struct node_refs *nrefs, u64 level)
2170 struct ulist *roots;
2172 if (nrefs->bytenr[level] != bytenr) {
2173 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2174 level, 1, &refs, NULL);
2178 nrefs->bytenr[level] = bytenr;
2179 nrefs->refs[level] = refs;
2181 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2186 check = need_check(root, roots);
2188 nrefs->need_check[level] = check;
2190 nrefs->need_check[level] = 1;
2197 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2198 struct walk_control *wc, int *level,
2199 struct node_refs *nrefs)
2201 enum btrfs_tree_block_status status;
2204 struct btrfs_fs_info *fs_info = root->fs_info;
2205 struct extent_buffer *next;
2206 struct extent_buffer *cur;
2210 WARN_ON(*level < 0);
2211 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2214 refs = nrefs->refs[*level];
2217 ret = btrfs_lookup_extent_info(NULL, root,
2218 path->nodes[*level]->start,
2219 *level, 1, &refs, NULL);
2224 nrefs->bytenr[*level] = path->nodes[*level]->start;
2225 nrefs->refs[*level] = refs;
2229 ret = enter_shared_node(root, path->nodes[*level]->start,
2237 while (*level >= 0) {
2238 WARN_ON(*level < 0);
2239 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2240 cur = path->nodes[*level];
2242 if (btrfs_header_level(cur) != *level)
2245 if (path->slots[*level] >= btrfs_header_nritems(cur))
2248 ret = process_one_leaf(root, cur, wc);
2253 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2254 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2256 if (bytenr == nrefs->bytenr[*level - 1]) {
2257 refs = nrefs->refs[*level - 1];
2259 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2260 *level - 1, 1, &refs, NULL);
2264 nrefs->bytenr[*level - 1] = bytenr;
2265 nrefs->refs[*level - 1] = refs;
2270 ret = enter_shared_node(root, bytenr, refs,
2273 path->slots[*level]++;
2278 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2279 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2280 free_extent_buffer(next);
2281 reada_walk_down(root, cur, path->slots[*level]);
2282 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2283 if (!extent_buffer_uptodate(next)) {
2284 struct btrfs_key node_key;
2286 btrfs_node_key_to_cpu(path->nodes[*level],
2288 path->slots[*level]);
2289 btrfs_add_corrupt_extent_record(root->fs_info,
2291 path->nodes[*level]->start,
2292 root->fs_info->nodesize,
2299 ret = check_child_node(cur, path->slots[*level], next);
2301 free_extent_buffer(next);
2306 if (btrfs_is_leaf(next))
2307 status = btrfs_check_leaf(root, NULL, next);
2309 status = btrfs_check_node(root, NULL, next);
2310 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2311 free_extent_buffer(next);
2316 *level = *level - 1;
2317 free_extent_buffer(path->nodes[*level]);
2318 path->nodes[*level] = next;
2319 path->slots[*level] = 0;
2322 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2326 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2327 unsigned int ext_ref);
2330 * Returns >0 Found error, should continue
2331 * Returns <0 Fatal error, must exit the whole check
2332 * Returns 0 No errors found
2334 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2335 int *level, struct node_refs *nrefs, int ext_ref)
2337 enum btrfs_tree_block_status status;
2340 struct btrfs_fs_info *fs_info = root->fs_info;
2341 struct extent_buffer *next;
2342 struct extent_buffer *cur;
2345 WARN_ON(*level < 0);
2346 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2348 ret = update_nodes_refs(root, path->nodes[*level]->start,
2353 while (*level >= 0) {
2354 WARN_ON(*level < 0);
2355 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2356 cur = path->nodes[*level];
2358 if (btrfs_header_level(cur) != *level)
2361 if (path->slots[*level] >= btrfs_header_nritems(cur))
2363 /* Don't forgot to check leaf/node validation */
2365 ret = btrfs_check_leaf(root, NULL, cur);
2366 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2370 ret = process_one_leaf_v2(root, path, nrefs,
2372 cur = path->nodes[*level];
2375 ret = btrfs_check_node(root, NULL, cur);
2376 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2381 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2384 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2387 if (!nrefs->need_check[*level - 1]) {
2388 path->slots[*level]++;
2392 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394 free_extent_buffer(next);
2395 reada_walk_down(root, cur, path->slots[*level]);
2396 next = read_tree_block(fs_info, bytenr, ptr_gen);
2397 if (!extent_buffer_uptodate(next)) {
2398 struct btrfs_key node_key;
2400 btrfs_node_key_to_cpu(path->nodes[*level],
2402 path->slots[*level]);
2403 btrfs_add_corrupt_extent_record(fs_info,
2405 path->nodes[*level]->start,
2413 ret = check_child_node(cur, path->slots[*level], next);
2417 if (btrfs_is_leaf(next))
2418 status = btrfs_check_leaf(root, NULL, next);
2420 status = btrfs_check_node(root, NULL, next);
2421 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422 free_extent_buffer(next);
2427 *level = *level - 1;
2428 free_extent_buffer(path->nodes[*level]);
2429 path->nodes[*level] = next;
2430 path->slots[*level] = 0;
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436 struct walk_control *wc, int *level)
2439 struct extent_buffer *leaf;
2441 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442 leaf = path->nodes[i];
2443 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2448 free_extent_buffer(path->nodes[*level]);
2449 path->nodes[*level] = NULL;
2450 BUG_ON(*level > wc->active_node);
2451 if (*level == wc->active_node)
2452 leave_shared_node(root, wc, *level);
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2463 struct extent_buffer *leaf;
2465 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466 leaf = path->nodes[i];
2467 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2472 free_extent_buffer(path->nodes[*level]);
2473 path->nodes[*level] = NULL;
2480 static int check_root_dir(struct inode_record *rec)
2482 struct inode_backref *backref;
2485 if (!rec->found_inode_item || rec->errors)
2487 if (rec->nlink != 1 || rec->found_link != 0)
2489 if (list_empty(&rec->backrefs))
2491 backref = to_inode_backref(rec->backrefs.next);
2492 if (!backref->found_inode_ref)
2494 if (backref->index != 0 || backref->namelen != 2 ||
2495 memcmp(backref->name, "..", 2))
2497 if (backref->found_dir_index || backref->found_dir_item)
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505 struct btrfs_root *root, struct btrfs_path *path,
2506 struct inode_record *rec)
2508 struct btrfs_inode_item *ei;
2509 struct btrfs_key key;
2512 key.objectid = rec->ino;
2513 key.type = BTRFS_INODE_ITEM_KEY;
2514 key.offset = (u64)-1;
2516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (!path->slots[0]) {
2527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528 if (key.objectid != rec->ino) {
2533 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534 struct btrfs_inode_item);
2535 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536 btrfs_mark_buffer_dirty(path->nodes[0]);
2537 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539 root->root_key.objectid);
2541 btrfs_release_path(path);
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546 struct btrfs_root *root,
2547 struct btrfs_path *path,
2548 struct inode_record *rec)
2552 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553 btrfs_release_path(path);
2555 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560 struct btrfs_root *root,
2561 struct btrfs_path *path,
2562 struct inode_record *rec)
2564 struct btrfs_inode_item *ei;
2565 struct btrfs_key key;
2568 key.objectid = rec->ino;
2569 key.type = BTRFS_INODE_ITEM_KEY;
2572 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2579 /* Since ret == 0, no need to check anything */
2580 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581 struct btrfs_inode_item);
2582 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583 btrfs_mark_buffer_dirty(path->nodes[0]);
2584 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585 printf("reset nbytes for ino %llu root %llu\n",
2586 rec->ino, root->root_key.objectid);
2588 btrfs_release_path(path);
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593 struct cache_tree *inode_cache,
2594 struct inode_record *rec,
2595 struct inode_backref *backref)
2597 struct btrfs_path path;
2598 struct btrfs_trans_handle *trans;
2599 struct btrfs_dir_item *dir_item;
2600 struct extent_buffer *leaf;
2601 struct btrfs_key key;
2602 struct btrfs_disk_key disk_key;
2603 struct inode_record *dir_rec;
2604 unsigned long name_ptr;
2605 u32 data_size = sizeof(*dir_item) + backref->namelen;
2608 trans = btrfs_start_transaction(root, 1);
2610 return PTR_ERR(trans);
2612 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613 (unsigned long long)rec->ino);
2615 btrfs_init_path(&path);
2616 key.objectid = backref->dir;
2617 key.type = BTRFS_DIR_INDEX_KEY;
2618 key.offset = backref->index;
2619 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2622 leaf = path.nodes[0];
2623 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625 disk_key.objectid = cpu_to_le64(rec->ino);
2626 disk_key.type = BTRFS_INODE_ITEM_KEY;
2627 disk_key.offset = 0;
2629 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631 btrfs_set_dir_data_len(leaf, dir_item, 0);
2632 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633 name_ptr = (unsigned long)(dir_item + 1);
2634 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635 btrfs_mark_buffer_dirty(leaf);
2636 btrfs_release_path(&path);
2637 btrfs_commit_transaction(trans, root);
2639 backref->found_dir_index = 1;
2640 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641 BUG_ON(IS_ERR(dir_rec));
2644 dir_rec->found_size += backref->namelen;
2645 if (dir_rec->found_size == dir_rec->isize &&
2646 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648 if (dir_rec->found_size != dir_rec->isize)
2649 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2654 static int delete_dir_index(struct btrfs_root *root,
2655 struct inode_backref *backref)
2657 struct btrfs_trans_handle *trans;
2658 struct btrfs_dir_item *di;
2659 struct btrfs_path path;
2662 trans = btrfs_start_transaction(root, 1);
2664 return PTR_ERR(trans);
2666 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667 (unsigned long long)backref->dir,
2668 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669 (unsigned long long)root->objectid);
2671 btrfs_init_path(&path);
2672 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673 backref->name, backref->namelen,
2674 backref->index, -1);
2677 btrfs_release_path(&path);
2678 btrfs_commit_transaction(trans, root);
2685 ret = btrfs_del_item(trans, root, &path);
2687 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689 btrfs_release_path(&path);
2690 btrfs_commit_transaction(trans, root);
2694 static int create_inode_item(struct btrfs_root *root,
2695 struct inode_record *rec,
2698 struct btrfs_trans_handle *trans;
2699 struct btrfs_inode_item inode_item;
2700 time_t now = time(NULL);
2703 trans = btrfs_start_transaction(root, 1);
2704 if (IS_ERR(trans)) {
2705 ret = PTR_ERR(trans);
2709 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2710 "be incomplete, please check permissions and content after "
2711 "the fsck completes.\n", (unsigned long long)root->objectid,
2712 (unsigned long long)rec->ino);
2714 memset(&inode_item, 0, sizeof(inode_item));
2715 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2717 btrfs_set_stack_inode_nlink(&inode_item, 1);
2719 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2720 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2721 if (rec->found_dir_item) {
2722 if (rec->found_file_extent)
2723 fprintf(stderr, "root %llu inode %llu has both a dir "
2724 "item and extents, unsure if it is a dir or a "
2725 "regular file so setting it as a directory\n",
2726 (unsigned long long)root->objectid,
2727 (unsigned long long)rec->ino);
2728 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2729 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2730 } else if (!rec->found_dir_item) {
2731 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2732 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2734 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2735 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2736 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2737 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2738 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2739 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2740 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2741 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2743 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2745 btrfs_commit_transaction(trans, root);
2749 static int repair_inode_backrefs(struct btrfs_root *root,
2750 struct inode_record *rec,
2751 struct cache_tree *inode_cache,
2754 struct inode_backref *tmp, *backref;
2755 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2759 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2760 if (!delete && rec->ino == root_dirid) {
2761 if (!rec->found_inode_item) {
2762 ret = create_inode_item(root, rec, 1);
2769 /* Index 0 for root dir's are special, don't mess with it */
2770 if (rec->ino == root_dirid && backref->index == 0)
2774 ((backref->found_dir_index && !backref->found_inode_ref) ||
2775 (backref->found_dir_index && backref->found_inode_ref &&
2776 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2777 ret = delete_dir_index(root, backref);
2781 list_del(&backref->list);
2786 if (!delete && !backref->found_dir_index &&
2787 backref->found_dir_item && backref->found_inode_ref) {
2788 ret = add_missing_dir_index(root, inode_cache, rec,
2793 if (backref->found_dir_item &&
2794 backref->found_dir_index) {
2795 if (!backref->errors &&
2796 backref->found_inode_ref) {
2797 list_del(&backref->list);
2804 if (!delete && (!backref->found_dir_index &&
2805 !backref->found_dir_item &&
2806 backref->found_inode_ref)) {
2807 struct btrfs_trans_handle *trans;
2808 struct btrfs_key location;
2810 ret = check_dir_conflict(root, backref->name,
2816 * let nlink fixing routine to handle it,
2817 * which can do it better.
2822 location.objectid = rec->ino;
2823 location.type = BTRFS_INODE_ITEM_KEY;
2824 location.offset = 0;
2826 trans = btrfs_start_transaction(root, 1);
2827 if (IS_ERR(trans)) {
2828 ret = PTR_ERR(trans);
2831 fprintf(stderr, "adding missing dir index/item pair "
2833 (unsigned long long)rec->ino);
2834 ret = btrfs_insert_dir_item(trans, root, backref->name,
2836 backref->dir, &location,
2837 imode_to_type(rec->imode),
2840 btrfs_commit_transaction(trans, root);
2844 if (!delete && (backref->found_inode_ref &&
2845 backref->found_dir_index &&
2846 backref->found_dir_item &&
2847 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2848 !rec->found_inode_item)) {
2849 ret = create_inode_item(root, rec, 0);
2856 return ret ? ret : repaired;
2860 * To determine the file type for nlink/inode_item repair
2862 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2863 * Return -ENOENT if file type is not found.
2865 static int find_file_type(struct inode_record *rec, u8 *type)
2867 struct inode_backref *backref;
2869 /* For inode item recovered case */
2870 if (rec->found_inode_item) {
2871 *type = imode_to_type(rec->imode);
2875 list_for_each_entry(backref, &rec->backrefs, list) {
2876 if (backref->found_dir_index || backref->found_dir_item) {
2877 *type = backref->filetype;
2885 * To determine the file name for nlink repair
2887 * Return 0 if file name is found, set name and namelen.
2888 * Return -ENOENT if file name is not found.
2890 static int find_file_name(struct inode_record *rec,
2891 char *name, int *namelen)
2893 struct inode_backref *backref;
2895 list_for_each_entry(backref, &rec->backrefs, list) {
2896 if (backref->found_dir_index || backref->found_dir_item ||
2897 backref->found_inode_ref) {
2898 memcpy(name, backref->name, backref->namelen);
2899 *namelen = backref->namelen;
2906 /* Reset the nlink of the inode to the correct one */
2907 static int reset_nlink(struct btrfs_trans_handle *trans,
2908 struct btrfs_root *root,
2909 struct btrfs_path *path,
2910 struct inode_record *rec)
2912 struct inode_backref *backref;
2913 struct inode_backref *tmp;
2914 struct btrfs_key key;
2915 struct btrfs_inode_item *inode_item;
2918 /* We don't believe this either, reset it and iterate backref */
2919 rec->found_link = 0;
2921 /* Remove all backref including the valid ones */
2922 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2923 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2924 backref->index, backref->name,
2925 backref->namelen, 0);
2929 /* remove invalid backref, so it won't be added back */
2930 if (!(backref->found_dir_index &&
2931 backref->found_dir_item &&
2932 backref->found_inode_ref)) {
2933 list_del(&backref->list);
2940 /* Set nlink to 0 */
2941 key.objectid = rec->ino;
2942 key.type = BTRFS_INODE_ITEM_KEY;
2944 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2951 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2952 struct btrfs_inode_item);
2953 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2954 btrfs_mark_buffer_dirty(path->nodes[0]);
2955 btrfs_release_path(path);
2958 * Add back valid inode_ref/dir_item/dir_index,
2959 * add_link() will handle the nlink inc, so new nlink must be correct
2961 list_for_each_entry(backref, &rec->backrefs, list) {
2962 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2963 backref->name, backref->namelen,
2964 backref->filetype, &backref->index, 1);
2969 btrfs_release_path(path);
2973 static int get_highest_inode(struct btrfs_trans_handle *trans,
2974 struct btrfs_root *root,
2975 struct btrfs_path *path,
2978 struct btrfs_key key, found_key;
2981 btrfs_init_path(path);
2982 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2984 key.type = BTRFS_INODE_ITEM_KEY;
2985 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2987 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2988 path->slots[0] - 1);
2989 *highest_ino = found_key.objectid;
2992 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2994 btrfs_release_path(path);
2998 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2999 struct btrfs_root *root,
3000 struct btrfs_path *path,
3001 struct inode_record *rec)
3003 char *dir_name = "lost+found";
3004 char namebuf[BTRFS_NAME_LEN] = {0};
3009 int name_recovered = 0;
3010 int type_recovered = 0;
3014 * Get file name and type first before these invalid inode ref
3015 * are deleted by remove_all_invalid_backref()
3017 name_recovered = !find_file_name(rec, namebuf, &namelen);
3018 type_recovered = !find_file_type(rec, &type);
3020 if (!name_recovered) {
3021 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3022 rec->ino, rec->ino);
3023 namelen = count_digits(rec->ino);
3024 sprintf(namebuf, "%llu", rec->ino);
3027 if (!type_recovered) {
3028 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3030 type = BTRFS_FT_REG_FILE;
3034 ret = reset_nlink(trans, root, path, rec);
3037 "Failed to reset nlink for inode %llu: %s\n",
3038 rec->ino, strerror(-ret));
3042 if (rec->found_link == 0) {
3043 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3047 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3048 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3051 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3052 dir_name, strerror(-ret));
3055 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3056 namebuf, namelen, type, NULL, 1);
3058 * Add ".INO" suffix several times to handle case where
3059 * "FILENAME.INO" is already taken by another file.
3061 while (ret == -EEXIST) {
3063 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3065 if (namelen + count_digits(rec->ino) + 1 >
3070 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3072 namelen += count_digits(rec->ino) + 1;
3073 ret = btrfs_add_link(trans, root, rec->ino,
3074 lost_found_ino, namebuf,
3075 namelen, type, NULL, 1);
3079 "Failed to link the inode %llu to %s dir: %s\n",
3080 rec->ino, dir_name, strerror(-ret));
3084 * Just increase the found_link, don't actually add the
3085 * backref. This will make things easier and this inode
3086 * record will be freed after the repair is done.
3087 * So fsck will not report problem about this inode.
3090 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3091 namelen, namebuf, dir_name);
3093 printf("Fixed the nlink of inode %llu\n", rec->ino);
3096 * Clear the flag anyway, or we will loop forever for the same inode
3097 * as it will not be removed from the bad inode list and the dead loop
3100 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3101 btrfs_release_path(path);
3106 * Check if there is any normal(reg or prealloc) file extent for given
3108 * This is used to determine the file type when neither its dir_index/item or
3109 * inode_item exists.
3111 * This will *NOT* report error, if any error happens, just consider it does
3112 * not have any normal file extent.
3114 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3116 struct btrfs_path path;
3117 struct btrfs_key key;
3118 struct btrfs_key found_key;
3119 struct btrfs_file_extent_item *fi;
3123 btrfs_init_path(&path);
3125 key.type = BTRFS_EXTENT_DATA_KEY;
3128 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3133 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3134 ret = btrfs_next_leaf(root, &path);
3141 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3143 if (found_key.objectid != ino ||
3144 found_key.type != BTRFS_EXTENT_DATA_KEY)
3146 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3147 struct btrfs_file_extent_item);
3148 type = btrfs_file_extent_type(path.nodes[0], fi);
3149 if (type != BTRFS_FILE_EXTENT_INLINE) {
3155 btrfs_release_path(&path);
3159 static u32 btrfs_type_to_imode(u8 type)
3161 static u32 imode_by_btrfs_type[] = {
3162 [BTRFS_FT_REG_FILE] = S_IFREG,
3163 [BTRFS_FT_DIR] = S_IFDIR,
3164 [BTRFS_FT_CHRDEV] = S_IFCHR,
3165 [BTRFS_FT_BLKDEV] = S_IFBLK,
3166 [BTRFS_FT_FIFO] = S_IFIFO,
3167 [BTRFS_FT_SOCK] = S_IFSOCK,
3168 [BTRFS_FT_SYMLINK] = S_IFLNK,
3171 return imode_by_btrfs_type[(type)];
3174 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3175 struct btrfs_root *root,
3176 struct btrfs_path *path,
3177 struct inode_record *rec)
3181 int type_recovered = 0;
3184 printf("Trying to rebuild inode:%llu\n", rec->ino);
3186 type_recovered = !find_file_type(rec, &filetype);
3189 * Try to determine inode type if type not found.
3191 * For found regular file extent, it must be FILE.
3192 * For found dir_item/index, it must be DIR.
3194 * For undetermined one, use FILE as fallback.
3197 * 1. If found backref(inode_index/item is already handled) to it,
3199 * Need new inode-inode ref structure to allow search for that.
3201 if (!type_recovered) {
3202 if (rec->found_file_extent &&
3203 find_normal_file_extent(root, rec->ino)) {
3205 filetype = BTRFS_FT_REG_FILE;
3206 } else if (rec->found_dir_item) {
3208 filetype = BTRFS_FT_DIR;
3209 } else if (!list_empty(&rec->orphan_extents)) {
3211 filetype = BTRFS_FT_REG_FILE;
3213 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3216 filetype = BTRFS_FT_REG_FILE;
3220 ret = btrfs_new_inode(trans, root, rec->ino,
3221 mode | btrfs_type_to_imode(filetype));
3226 * Here inode rebuild is done, we only rebuild the inode item,
3227 * don't repair the nlink(like move to lost+found).
3228 * That is the job of nlink repair.
3230 * We just fill the record and return
3232 rec->found_dir_item = 1;
3233 rec->imode = mode | btrfs_type_to_imode(filetype);
3235 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3236 /* Ensure the inode_nlinks repair function will be called */
3237 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3242 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3243 struct btrfs_root *root,
3244 struct btrfs_path *path,
3245 struct inode_record *rec)
3247 struct orphan_data_extent *orphan;
3248 struct orphan_data_extent *tmp;
3251 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3253 * Check for conflicting file extents
3255 * Here we don't know whether the extents is compressed or not,
3256 * so we can only assume it not compressed nor data offset,
3257 * and use its disk_len as extent length.
3259 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3260 orphan->offset, orphan->disk_len, 0);
3261 btrfs_release_path(path);
3266 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3267 orphan->disk_bytenr, orphan->disk_len);
3268 ret = btrfs_free_extent(trans,
3269 root->fs_info->extent_root,
3270 orphan->disk_bytenr, orphan->disk_len,
3271 0, root->objectid, orphan->objectid,
3276 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3277 orphan->offset, orphan->disk_bytenr,
3278 orphan->disk_len, orphan->disk_len);
3282 /* Update file size info */
3283 rec->found_size += orphan->disk_len;
3284 if (rec->found_size == rec->nbytes)
3285 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3287 /* Update the file extent hole info too */
3288 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3292 if (RB_EMPTY_ROOT(&rec->holes))
3293 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3295 list_del(&orphan->list);
3298 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3303 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3304 struct btrfs_root *root,
3305 struct btrfs_path *path,
3306 struct inode_record *rec)
3308 struct rb_node *node;
3309 struct file_extent_hole *hole;
3313 node = rb_first(&rec->holes);
3317 hole = rb_entry(node, struct file_extent_hole, node);
3318 ret = btrfs_punch_hole(trans, root, rec->ino,
3319 hole->start, hole->len);
3322 ret = del_file_extent_hole(&rec->holes, hole->start,
3326 if (RB_EMPTY_ROOT(&rec->holes))
3327 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3328 node = rb_first(&rec->holes);
3330 /* special case for a file losing all its file extent */
3332 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3333 round_up(rec->isize,
3334 root->fs_info->sectorsize));
3338 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3339 rec->ino, root->objectid);
3344 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3346 struct btrfs_trans_handle *trans;
3347 struct btrfs_path path;
3350 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3351 I_ERR_NO_ORPHAN_ITEM |
3352 I_ERR_LINK_COUNT_WRONG |
3353 I_ERR_NO_INODE_ITEM |
3354 I_ERR_FILE_EXTENT_ORPHAN |
3355 I_ERR_FILE_EXTENT_DISCOUNT|
3356 I_ERR_FILE_NBYTES_WRONG)))
3360 * For nlink repair, it may create a dir and add link, so
3361 * 2 for parent(256)'s dir_index and dir_item
3362 * 2 for lost+found dir's inode_item and inode_ref
3363 * 1 for the new inode_ref of the file
3364 * 2 for lost+found dir's dir_index and dir_item for the file
3366 trans = btrfs_start_transaction(root, 7);
3368 return PTR_ERR(trans);
3370 btrfs_init_path(&path);
3371 if (rec->errors & I_ERR_NO_INODE_ITEM)
3372 ret = repair_inode_no_item(trans, root, &path, rec);
3373 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3374 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3375 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3376 ret = repair_inode_discount_extent(trans, root, &path, rec);
3377 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3378 ret = repair_inode_isize(trans, root, &path, rec);
3379 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3380 ret = repair_inode_orphan_item(trans, root, &path, rec);
3381 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3382 ret = repair_inode_nlinks(trans, root, &path, rec);
3383 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3384 ret = repair_inode_nbytes(trans, root, &path, rec);
3385 btrfs_commit_transaction(trans, root);
3386 btrfs_release_path(&path);
3390 static int check_inode_recs(struct btrfs_root *root,
3391 struct cache_tree *inode_cache)
3393 struct cache_extent *cache;
3394 struct ptr_node *node;
3395 struct inode_record *rec;
3396 struct inode_backref *backref;
3401 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3403 if (btrfs_root_refs(&root->root_item) == 0) {
3404 if (!cache_tree_empty(inode_cache))
3405 fprintf(stderr, "warning line %d\n", __LINE__);
3410 * We need to repair backrefs first because we could change some of the
3411 * errors in the inode recs.
3413 * We also need to go through and delete invalid backrefs first and then
3414 * add the correct ones second. We do this because we may get EEXIST
3415 * when adding back the correct index because we hadn't yet deleted the
3418 * For example, if we were missing a dir index then the directories
3419 * isize would be wrong, so if we fixed the isize to what we thought it
3420 * would be and then fixed the backref we'd still have a invalid fs, so
3421 * we need to add back the dir index and then check to see if the isize
3426 if (stage == 3 && !err)
3429 cache = search_cache_extent(inode_cache, 0);
3430 while (repair && cache) {
3431 node = container_of(cache, struct ptr_node, cache);
3433 cache = next_cache_extent(cache);
3435 /* Need to free everything up and rescan */
3437 remove_cache_extent(inode_cache, &node->cache);
3439 free_inode_rec(rec);
3443 if (list_empty(&rec->backrefs))
3446 ret = repair_inode_backrefs(root, rec, inode_cache,
3460 rec = get_inode_rec(inode_cache, root_dirid, 0);
3461 BUG_ON(IS_ERR(rec));
3463 ret = check_root_dir(rec);
3465 fprintf(stderr, "root %llu root dir %llu error\n",
3466 (unsigned long long)root->root_key.objectid,
3467 (unsigned long long)root_dirid);
3468 print_inode_error(root, rec);
3473 struct btrfs_trans_handle *trans;
3475 trans = btrfs_start_transaction(root, 1);
3476 if (IS_ERR(trans)) {
3477 err = PTR_ERR(trans);
3482 "root %llu missing its root dir, recreating\n",
3483 (unsigned long long)root->objectid);
3485 ret = btrfs_make_root_dir(trans, root, root_dirid);
3488 btrfs_commit_transaction(trans, root);
3492 fprintf(stderr, "root %llu root dir %llu not found\n",
3493 (unsigned long long)root->root_key.objectid,
3494 (unsigned long long)root_dirid);
3498 cache = search_cache_extent(inode_cache, 0);
3501 node = container_of(cache, struct ptr_node, cache);
3503 remove_cache_extent(inode_cache, &node->cache);
3505 if (rec->ino == root_dirid ||
3506 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3507 free_inode_rec(rec);
3511 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3512 ret = check_orphan_item(root, rec->ino);
3514 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3515 if (can_free_inode_rec(rec)) {
3516 free_inode_rec(rec);
3521 if (!rec->found_inode_item)
3522 rec->errors |= I_ERR_NO_INODE_ITEM;
3523 if (rec->found_link != rec->nlink)
3524 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3526 ret = try_repair_inode(root, rec);
3527 if (ret == 0 && can_free_inode_rec(rec)) {
3528 free_inode_rec(rec);
3534 if (!(repair && ret == 0))
3536 print_inode_error(root, rec);
3537 list_for_each_entry(backref, &rec->backrefs, list) {
3538 if (!backref->found_dir_item)
3539 backref->errors |= REF_ERR_NO_DIR_ITEM;
3540 if (!backref->found_dir_index)
3541 backref->errors |= REF_ERR_NO_DIR_INDEX;
3542 if (!backref->found_inode_ref)
3543 backref->errors |= REF_ERR_NO_INODE_REF;
3544 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3545 " namelen %u name %s filetype %d errors %x",
3546 (unsigned long long)backref->dir,
3547 (unsigned long long)backref->index,
3548 backref->namelen, backref->name,
3549 backref->filetype, backref->errors);
3550 print_ref_error(backref->errors);
3552 free_inode_rec(rec);
3554 return (error > 0) ? -1 : 0;
3557 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3560 struct cache_extent *cache;
3561 struct root_record *rec = NULL;
3564 cache = lookup_cache_extent(root_cache, objectid, 1);
3566 rec = container_of(cache, struct root_record, cache);
3568 rec = calloc(1, sizeof(*rec));
3570 return ERR_PTR(-ENOMEM);
3571 rec->objectid = objectid;
3572 INIT_LIST_HEAD(&rec->backrefs);
3573 rec->cache.start = objectid;
3574 rec->cache.size = 1;
3576 ret = insert_cache_extent(root_cache, &rec->cache);
3578 return ERR_PTR(-EEXIST);
3583 static struct root_backref *get_root_backref(struct root_record *rec,
3584 u64 ref_root, u64 dir, u64 index,
3585 const char *name, int namelen)
3587 struct root_backref *backref;
3589 list_for_each_entry(backref, &rec->backrefs, list) {
3590 if (backref->ref_root != ref_root || backref->dir != dir ||
3591 backref->namelen != namelen)
3593 if (memcmp(name, backref->name, namelen))
3598 backref = calloc(1, sizeof(*backref) + namelen + 1);
3601 backref->ref_root = ref_root;
3603 backref->index = index;
3604 backref->namelen = namelen;
3605 memcpy(backref->name, name, namelen);
3606 backref->name[namelen] = '\0';
3607 list_add_tail(&backref->list, &rec->backrefs);
3611 static void free_root_record(struct cache_extent *cache)
3613 struct root_record *rec;
3614 struct root_backref *backref;
3616 rec = container_of(cache, struct root_record, cache);
3617 while (!list_empty(&rec->backrefs)) {
3618 backref = to_root_backref(rec->backrefs.next);
3619 list_del(&backref->list);
3626 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3628 static int add_root_backref(struct cache_tree *root_cache,
3629 u64 root_id, u64 ref_root, u64 dir, u64 index,
3630 const char *name, int namelen,
3631 int item_type, int errors)
3633 struct root_record *rec;
3634 struct root_backref *backref;
3636 rec = get_root_rec(root_cache, root_id);
3637 BUG_ON(IS_ERR(rec));
3638 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3641 backref->errors |= errors;
3643 if (item_type != BTRFS_DIR_ITEM_KEY) {
3644 if (backref->found_dir_index || backref->found_back_ref ||
3645 backref->found_forward_ref) {
3646 if (backref->index != index)
3647 backref->errors |= REF_ERR_INDEX_UNMATCH;
3649 backref->index = index;
3653 if (item_type == BTRFS_DIR_ITEM_KEY) {
3654 if (backref->found_forward_ref)
3656 backref->found_dir_item = 1;
3657 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3658 backref->found_dir_index = 1;
3659 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3660 if (backref->found_forward_ref)
3661 backref->errors |= REF_ERR_DUP_ROOT_REF;
3662 else if (backref->found_dir_item)
3664 backref->found_forward_ref = 1;
3665 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3666 if (backref->found_back_ref)
3667 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3668 backref->found_back_ref = 1;
3673 if (backref->found_forward_ref && backref->found_dir_item)
3674 backref->reachable = 1;
3678 static int merge_root_recs(struct btrfs_root *root,
3679 struct cache_tree *src_cache,
3680 struct cache_tree *dst_cache)
3682 struct cache_extent *cache;
3683 struct ptr_node *node;
3684 struct inode_record *rec;
3685 struct inode_backref *backref;
3688 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3689 free_inode_recs_tree(src_cache);
3694 cache = search_cache_extent(src_cache, 0);
3697 node = container_of(cache, struct ptr_node, cache);
3699 remove_cache_extent(src_cache, &node->cache);
3702 ret = is_child_root(root, root->objectid, rec->ino);
3708 list_for_each_entry(backref, &rec->backrefs, list) {
3709 BUG_ON(backref->found_inode_ref);
3710 if (backref->found_dir_item)
3711 add_root_backref(dst_cache, rec->ino,
3712 root->root_key.objectid, backref->dir,
3713 backref->index, backref->name,
3714 backref->namelen, BTRFS_DIR_ITEM_KEY,
3716 if (backref->found_dir_index)
3717 add_root_backref(dst_cache, rec->ino,
3718 root->root_key.objectid, backref->dir,
3719 backref->index, backref->name,
3720 backref->namelen, BTRFS_DIR_INDEX_KEY,
3724 free_inode_rec(rec);
3731 static int check_root_refs(struct btrfs_root *root,
3732 struct cache_tree *root_cache)
3734 struct root_record *rec;
3735 struct root_record *ref_root;
3736 struct root_backref *backref;
3737 struct cache_extent *cache;
3743 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3744 BUG_ON(IS_ERR(rec));
3747 /* fixme: this can not detect circular references */
3750 cache = search_cache_extent(root_cache, 0);
3754 rec = container_of(cache, struct root_record, cache);
3755 cache = next_cache_extent(cache);
3757 if (rec->found_ref == 0)
3760 list_for_each_entry(backref, &rec->backrefs, list) {
3761 if (!backref->reachable)
3764 ref_root = get_root_rec(root_cache,
3766 BUG_ON(IS_ERR(ref_root));
3767 if (ref_root->found_ref > 0)
3770 backref->reachable = 0;
3772 if (rec->found_ref == 0)
3778 cache = search_cache_extent(root_cache, 0);
3782 rec = container_of(cache, struct root_record, cache);
3783 cache = next_cache_extent(cache);
3785 if (rec->found_ref == 0 &&
3786 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3787 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3788 ret = check_orphan_item(root->fs_info->tree_root,
3794 * If we don't have a root item then we likely just have
3795 * a dir item in a snapshot for this root but no actual
3796 * ref key or anything so it's meaningless.
3798 if (!rec->found_root_item)
3801 fprintf(stderr, "fs tree %llu not referenced\n",
3802 (unsigned long long)rec->objectid);
3806 if (rec->found_ref > 0 && !rec->found_root_item)
3808 list_for_each_entry(backref, &rec->backrefs, list) {
3809 if (!backref->found_dir_item)
3810 backref->errors |= REF_ERR_NO_DIR_ITEM;
3811 if (!backref->found_dir_index)
3812 backref->errors |= REF_ERR_NO_DIR_INDEX;
3813 if (!backref->found_back_ref)
3814 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3815 if (!backref->found_forward_ref)
3816 backref->errors |= REF_ERR_NO_ROOT_REF;
3817 if (backref->reachable && backref->errors)
3824 fprintf(stderr, "fs tree %llu refs %u %s\n",
3825 (unsigned long long)rec->objectid, rec->found_ref,
3826 rec->found_root_item ? "" : "not found");
3828 list_for_each_entry(backref, &rec->backrefs, list) {
3829 if (!backref->reachable)
3831 if (!backref->errors && rec->found_root_item)
3833 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3834 " index %llu namelen %u name %s errors %x\n",
3835 (unsigned long long)backref->ref_root,
3836 (unsigned long long)backref->dir,
3837 (unsigned long long)backref->index,
3838 backref->namelen, backref->name,
3840 print_ref_error(backref->errors);
3843 return errors > 0 ? 1 : 0;
3846 static int process_root_ref(struct extent_buffer *eb, int slot,
3847 struct btrfs_key *key,
3848 struct cache_tree *root_cache)
3854 struct btrfs_root_ref *ref;
3855 char namebuf[BTRFS_NAME_LEN];
3858 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3860 dirid = btrfs_root_ref_dirid(eb, ref);
3861 index = btrfs_root_ref_sequence(eb, ref);
3862 name_len = btrfs_root_ref_name_len(eb, ref);
3864 if (name_len <= BTRFS_NAME_LEN) {
3868 len = BTRFS_NAME_LEN;
3869 error = REF_ERR_NAME_TOO_LONG;
3871 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3873 if (key->type == BTRFS_ROOT_REF_KEY) {
3874 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3875 index, namebuf, len, key->type, error);
3877 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3878 index, namebuf, len, key->type, error);
3883 static void free_corrupt_block(struct cache_extent *cache)
3885 struct btrfs_corrupt_block *corrupt;
3887 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3891 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3894 * Repair the btree of the given root.
3896 * The fix is to remove the node key in corrupt_blocks cache_tree.
3897 * and rebalance the tree.
3898 * After the fix, the btree should be writeable.
3900 static int repair_btree(struct btrfs_root *root,
3901 struct cache_tree *corrupt_blocks)
3903 struct btrfs_trans_handle *trans;
3904 struct btrfs_path path;
3905 struct btrfs_corrupt_block *corrupt;
3906 struct cache_extent *cache;
3907 struct btrfs_key key;
3912 if (cache_tree_empty(corrupt_blocks))
3915 trans = btrfs_start_transaction(root, 1);
3916 if (IS_ERR(trans)) {
3917 ret = PTR_ERR(trans);
3918 fprintf(stderr, "Error starting transaction: %s\n",
3922 btrfs_init_path(&path);
3923 cache = first_cache_extent(corrupt_blocks);
3925 corrupt = container_of(cache, struct btrfs_corrupt_block,
3927 level = corrupt->level;
3928 path.lowest_level = level;
3929 key.objectid = corrupt->key.objectid;
3930 key.type = corrupt->key.type;
3931 key.offset = corrupt->key.offset;
3934 * Here we don't want to do any tree balance, since it may
3935 * cause a balance with corrupted brother leaf/node,
3936 * so ins_len set to 0 here.
3937 * Balance will be done after all corrupt node/leaf is deleted.
3939 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3942 offset = btrfs_node_blockptr(path.nodes[level],
3945 /* Remove the ptr */
3946 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3950 * Remove the corresponding extent
3951 * return value is not concerned.
3953 btrfs_release_path(&path);
3954 ret = btrfs_free_extent(trans, root, offset,
3955 root->fs_info->nodesize, 0,
3956 root->root_key.objectid, level - 1, 0);
3957 cache = next_cache_extent(cache);
3960 /* Balance the btree using btrfs_search_slot() */
3961 cache = first_cache_extent(corrupt_blocks);
3963 corrupt = container_of(cache, struct btrfs_corrupt_block,
3965 memcpy(&key, &corrupt->key, sizeof(key));
3966 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3969 /* return will always >0 since it won't find the item */
3971 btrfs_release_path(&path);
3972 cache = next_cache_extent(cache);
3975 btrfs_commit_transaction(trans, root);
3976 btrfs_release_path(&path);
3980 static int check_fs_root(struct btrfs_root *root,
3981 struct cache_tree *root_cache,
3982 struct walk_control *wc)
3988 struct btrfs_path path;
3989 struct shared_node root_node;
3990 struct root_record *rec;
3991 struct btrfs_root_item *root_item = &root->root_item;
3992 struct cache_tree corrupt_blocks;
3993 struct orphan_data_extent *orphan;
3994 struct orphan_data_extent *tmp;
3995 enum btrfs_tree_block_status status;
3996 struct node_refs nrefs;
3999 * Reuse the corrupt_block cache tree to record corrupted tree block
4001 * Unlike the usage in extent tree check, here we do it in a per
4002 * fs/subvol tree base.
4004 cache_tree_init(&corrupt_blocks);
4005 root->fs_info->corrupt_blocks = &corrupt_blocks;
4007 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4008 rec = get_root_rec(root_cache, root->root_key.objectid);
4009 BUG_ON(IS_ERR(rec));
4010 if (btrfs_root_refs(root_item) > 0)
4011 rec->found_root_item = 1;
4014 btrfs_init_path(&path);
4015 memset(&root_node, 0, sizeof(root_node));
4016 cache_tree_init(&root_node.root_cache);
4017 cache_tree_init(&root_node.inode_cache);
4018 memset(&nrefs, 0, sizeof(nrefs));
4020 /* Move the orphan extent record to corresponding inode_record */
4021 list_for_each_entry_safe(orphan, tmp,
4022 &root->orphan_data_extents, list) {
4023 struct inode_record *inode;
4025 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4027 BUG_ON(IS_ERR(inode));
4028 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4029 list_move(&orphan->list, &inode->orphan_extents);
4032 level = btrfs_header_level(root->node);
4033 memset(wc->nodes, 0, sizeof(wc->nodes));
4034 wc->nodes[level] = &root_node;
4035 wc->active_node = level;
4036 wc->root_level = level;
4038 /* We may not have checked the root block, lets do that now */
4039 if (btrfs_is_leaf(root->node))
4040 status = btrfs_check_leaf(root, NULL, root->node);
4042 status = btrfs_check_node(root, NULL, root->node);
4043 if (status != BTRFS_TREE_BLOCK_CLEAN)
4046 if (btrfs_root_refs(root_item) > 0 ||
4047 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4048 path.nodes[level] = root->node;
4049 extent_buffer_get(root->node);
4050 path.slots[level] = 0;
4052 struct btrfs_key key;
4053 struct btrfs_disk_key found_key;
4055 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4056 level = root_item->drop_level;
4057 path.lowest_level = level;
4058 if (level > btrfs_header_level(root->node) ||
4059 level >= BTRFS_MAX_LEVEL) {
4060 error("ignoring invalid drop level: %u", level);
4063 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4066 btrfs_node_key(path.nodes[level], &found_key,
4068 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4069 sizeof(found_key)));
4073 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4079 wret = walk_up_tree(root, &path, wc, &level);
4086 btrfs_release_path(&path);
4088 if (!cache_tree_empty(&corrupt_blocks)) {
4089 struct cache_extent *cache;
4090 struct btrfs_corrupt_block *corrupt;
4092 printf("The following tree block(s) is corrupted in tree %llu:\n",
4093 root->root_key.objectid);
4094 cache = first_cache_extent(&corrupt_blocks);
4096 corrupt = container_of(cache,
4097 struct btrfs_corrupt_block,
4099 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4100 cache->start, corrupt->level,
4101 corrupt->key.objectid, corrupt->key.type,
4102 corrupt->key.offset);
4103 cache = next_cache_extent(cache);
4106 printf("Try to repair the btree for root %llu\n",
4107 root->root_key.objectid);
4108 ret = repair_btree(root, &corrupt_blocks);
4110 fprintf(stderr, "Failed to repair btree: %s\n",
4113 printf("Btree for root %llu is fixed\n",
4114 root->root_key.objectid);
4118 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4122 if (root_node.current) {
4123 root_node.current->checked = 1;
4124 maybe_free_inode_rec(&root_node.inode_cache,
4128 err = check_inode_recs(root, &root_node.inode_cache);
4132 free_corrupt_blocks_tree(&corrupt_blocks);
4133 root->fs_info->corrupt_blocks = NULL;
4134 free_orphan_data_extents(&root->orphan_data_extents);
4138 static int fs_root_objectid(u64 objectid)
4140 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4141 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4143 return is_fstree(objectid);
4146 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4147 struct cache_tree *root_cache)
4149 struct btrfs_path path;
4150 struct btrfs_key key;
4151 struct walk_control wc;
4152 struct extent_buffer *leaf, *tree_node;
4153 struct btrfs_root *tmp_root;
4154 struct btrfs_root *tree_root = fs_info->tree_root;
4158 if (ctx.progress_enabled) {
4159 ctx.tp = TASK_FS_ROOTS;
4160 task_start(ctx.info);
4164 * Just in case we made any changes to the extent tree that weren't
4165 * reflected into the free space cache yet.
4168 reset_cached_block_groups(fs_info);
4169 memset(&wc, 0, sizeof(wc));
4170 cache_tree_init(&wc.shared);
4171 btrfs_init_path(&path);
4176 key.type = BTRFS_ROOT_ITEM_KEY;
4177 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4182 tree_node = tree_root->node;
4184 if (tree_node != tree_root->node) {
4185 free_root_recs_tree(root_cache);
4186 btrfs_release_path(&path);
4189 leaf = path.nodes[0];
4190 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4191 ret = btrfs_next_leaf(tree_root, &path);
4197 leaf = path.nodes[0];
4199 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4200 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4201 fs_root_objectid(key.objectid)) {
4202 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4203 tmp_root = btrfs_read_fs_root_no_cache(
4206 key.offset = (u64)-1;
4207 tmp_root = btrfs_read_fs_root(
4210 if (IS_ERR(tmp_root)) {
4214 ret = check_fs_root(tmp_root, root_cache, &wc);
4215 if (ret == -EAGAIN) {
4216 free_root_recs_tree(root_cache);
4217 btrfs_release_path(&path);
4222 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4223 btrfs_free_fs_root(tmp_root);
4224 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4225 key.type == BTRFS_ROOT_BACKREF_KEY) {
4226 process_root_ref(leaf, path.slots[0], &key,
4233 btrfs_release_path(&path);
4235 free_extent_cache_tree(&wc.shared);
4236 if (!cache_tree_empty(&wc.shared))
4237 fprintf(stderr, "warning line %d\n", __LINE__);
4239 task_stop(ctx.info);
4245 * Find the @index according by @ino and name.
4246 * Notice:time efficiency is O(N)
4248 * @root: the root of the fs/file tree
4249 * @index_ret: the index as return value
4250 * @namebuf: the name to match
4251 * @name_len: the length of name to match
4252 * @file_type: the file_type of INODE_ITEM to match
4254 * Returns 0 if found and *@index_ret will be modified with right value
4255 * Returns< 0 not found and *@index_ret will be (u64)-1
4257 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4258 u64 *index_ret, char *namebuf, u32 name_len,
4261 struct btrfs_path path;
4262 struct extent_buffer *node;
4263 struct btrfs_dir_item *di;
4264 struct btrfs_key key;
4265 struct btrfs_key location;
4266 char name[BTRFS_NAME_LEN] = {0};
4278 /* search from the last index */
4279 key.objectid = dirid;
4280 key.offset = (u64)-1;
4281 key.type = BTRFS_DIR_INDEX_KEY;
4283 btrfs_init_path(&path);
4284 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4289 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4292 *index_ret = (64)-1;
4295 /* Check whether inode_id/filetype/name match */
4296 node = path.nodes[0];
4297 slot = path.slots[0];
4298 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4299 total = btrfs_item_size_nr(node, slot);
4300 while (cur < total) {
4302 len = btrfs_dir_name_len(node, di);
4303 data_len = btrfs_dir_data_len(node, di);
4305 btrfs_dir_item_key_to_cpu(node, di, &location);
4306 if (location.objectid != location_id ||
4307 location.type != BTRFS_INODE_ITEM_KEY ||
4308 location.offset != 0)
4311 filetype = btrfs_dir_type(node, di);
4312 if (file_type != filetype)
4315 if (len > BTRFS_NAME_LEN)
4316 len = BTRFS_NAME_LEN;
4318 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4319 if (len != name_len || strncmp(namebuf, name, len))
4322 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4323 *index_ret = key.offset;
4327 len += sizeof(*di) + data_len;
4328 di = (struct btrfs_dir_item *)((char *)di + len);
4334 btrfs_release_path(&path);
4339 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4340 * INODE_REF/INODE_EXTREF match.
4342 * @root: the root of the fs/file tree
4343 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4344 * value while find index
4345 * @location_key: location key of the struct btrfs_dir_item to match
4346 * @name: the name to match
4347 * @namelen: the length of name
4348 * @file_type: the type of file to math
4350 * Return 0 if no error occurred.
4351 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4352 * DIR_ITEM/DIR_INDEX
4353 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4354 * and DIR_ITEM/DIR_INDEX mismatch
4356 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4357 struct btrfs_key *location_key, char *name,
4358 u32 namelen, u8 file_type)
4360 struct btrfs_path path;
4361 struct extent_buffer *node;
4362 struct btrfs_dir_item *di;
4363 struct btrfs_key location;
4364 char namebuf[BTRFS_NAME_LEN] = {0};
4373 /* get the index by traversing all index */
4374 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4375 ret = find_dir_index(root, key->objectid,
4376 location_key->objectid, &key->offset,
4377 name, namelen, file_type);
4379 ret = DIR_INDEX_MISSING;
4383 btrfs_init_path(&path);
4384 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4386 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4391 /* Check whether inode_id/filetype/name match */
4392 node = path.nodes[0];
4393 slot = path.slots[0];
4394 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4395 total = btrfs_item_size_nr(node, slot);
4396 while (cur < total) {
4397 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4398 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4400 len = btrfs_dir_name_len(node, di);
4401 data_len = btrfs_dir_data_len(node, di);
4403 btrfs_dir_item_key_to_cpu(node, di, &location);
4404 if (location.objectid != location_key->objectid ||
4405 location.type != location_key->type ||
4406 location.offset != location_key->offset)
4409 filetype = btrfs_dir_type(node, di);
4410 if (file_type != filetype)
4413 if (len > BTRFS_NAME_LEN) {
4414 len = BTRFS_NAME_LEN;
4415 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4417 key->type == BTRFS_DIR_ITEM_KEY ?
4418 "DIR_ITEM" : "DIR_INDEX",
4419 key->objectid, key->offset, len);
4421 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4423 if (len != namelen || strncmp(namebuf, name, len))
4429 len += sizeof(*di) + data_len;
4430 di = (struct btrfs_dir_item *)((char *)di + len);
4435 btrfs_release_path(&path);
4440 * Traverse the given INODE_REF and call find_dir_item() to find related
4441 * DIR_ITEM/DIR_INDEX.
4443 * @root: the root of the fs/file tree
4444 * @ref_key: the key of the INODE_REF
4445 * @refs: the count of INODE_REF
4446 * @mode: the st_mode of INODE_ITEM
4448 * Return 0 if no error occurred.
4450 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4451 struct extent_buffer *node, int slot, u64 *refs,
4454 struct btrfs_key key;
4455 struct btrfs_key location;
4456 struct btrfs_inode_ref *ref;
4457 char namebuf[BTRFS_NAME_LEN] = {0};
4466 location.objectid = ref_key->objectid;
4467 location.type = BTRFS_INODE_ITEM_KEY;
4468 location.offset = 0;
4470 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4471 total = btrfs_item_size_nr(node, slot);
4474 /* Update inode ref count */
4477 index = btrfs_inode_ref_index(node, ref);
4478 name_len = btrfs_inode_ref_name_len(node, ref);
4479 if (cur + sizeof(*ref) + name_len > total ||
4480 name_len > BTRFS_NAME_LEN) {
4481 warning("root %llu INODE_REF[%llu %llu] name too long",
4482 root->objectid, ref_key->objectid, ref_key->offset);
4484 if (total < cur + sizeof(*ref))
4486 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4491 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4493 /* Check root dir ref name */
4494 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4495 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4496 root->objectid, ref_key->objectid, ref_key->offset,
4498 err |= ROOT_DIR_ERROR;
4501 /* Find related DIR_INDEX */
4502 key.objectid = ref_key->offset;
4503 key.type = BTRFS_DIR_INDEX_KEY;
4505 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4508 /* Find related dir_item */
4509 key.objectid = ref_key->offset;
4510 key.type = BTRFS_DIR_ITEM_KEY;
4511 key.offset = btrfs_name_hash(namebuf, len);
4512 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4515 len = sizeof(*ref) + name_len;
4516 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4526 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4527 * DIR_ITEM/DIR_INDEX.
4529 * @root: the root of the fs/file tree
4530 * @ref_key: the key of the INODE_EXTREF
4531 * @refs: the count of INODE_EXTREF
4532 * @mode: the st_mode of INODE_ITEM
4534 * Return 0 if no error occurred.
4536 static int check_inode_extref(struct btrfs_root *root,
4537 struct btrfs_key *ref_key,
4538 struct extent_buffer *node, int slot, u64 *refs,
4541 struct btrfs_key key;
4542 struct btrfs_key location;
4543 struct btrfs_inode_extref *extref;
4544 char namebuf[BTRFS_NAME_LEN] = {0};
4554 location.objectid = ref_key->objectid;
4555 location.type = BTRFS_INODE_ITEM_KEY;
4556 location.offset = 0;
4558 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4559 total = btrfs_item_size_nr(node, slot);
4562 /* update inode ref count */
4564 name_len = btrfs_inode_extref_name_len(node, extref);
4565 index = btrfs_inode_extref_index(node, extref);
4566 parent = btrfs_inode_extref_parent(node, extref);
4567 if (name_len <= BTRFS_NAME_LEN) {
4570 len = BTRFS_NAME_LEN;
4571 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4572 root->objectid, ref_key->objectid, ref_key->offset);
4574 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4576 /* Check root dir ref name */
4577 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4578 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4579 root->objectid, ref_key->objectid, ref_key->offset,
4581 err |= ROOT_DIR_ERROR;
4584 /* find related dir_index */
4585 key.objectid = parent;
4586 key.type = BTRFS_DIR_INDEX_KEY;
4588 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4591 /* find related dir_item */
4592 key.objectid = parent;
4593 key.type = BTRFS_DIR_ITEM_KEY;
4594 key.offset = btrfs_name_hash(namebuf, len);
4595 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4598 len = sizeof(*extref) + name_len;
4599 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4609 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4610 * DIR_ITEM/DIR_INDEX match.
4611 * Return with @index_ret.
4613 * @root: the root of the fs/file tree
4614 * @key: the key of the INODE_REF/INODE_EXTREF
4615 * @name: the name in the INODE_REF/INODE_EXTREF
4616 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4617 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4618 * value (64)-1 means do not check index
4619 * @ext_ref: the EXTENDED_IREF feature
4621 * Return 0 if no error occurred.
4622 * Return >0 for error bitmap
4624 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4625 char *name, int namelen, u64 *index_ret,
4626 unsigned int ext_ref)
4628 struct btrfs_path path;
4629 struct btrfs_inode_ref *ref;
4630 struct btrfs_inode_extref *extref;
4631 struct extent_buffer *node;
4632 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4645 btrfs_init_path(&path);
4646 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4648 ret = INODE_REF_MISSING;
4652 node = path.nodes[0];
4653 slot = path.slots[0];
4655 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4656 total = btrfs_item_size_nr(node, slot);
4658 /* Iterate all entry of INODE_REF */
4659 while (cur < total) {
4660 ret = INODE_REF_MISSING;
4662 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4663 ref_index = btrfs_inode_ref_index(node, ref);
4664 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4667 if (cur + sizeof(*ref) + ref_namelen > total ||
4668 ref_namelen > BTRFS_NAME_LEN) {
4669 warning("root %llu INODE %s[%llu %llu] name too long",
4671 key->type == BTRFS_INODE_REF_KEY ?
4673 key->objectid, key->offset);
4675 if (cur + sizeof(*ref) > total)
4677 len = min_t(u32, total - cur - sizeof(*ref),
4683 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4686 if (len != namelen || strncmp(ref_namebuf, name, len))
4689 *index_ret = ref_index;
4693 len = sizeof(*ref) + ref_namelen;
4694 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4699 /* Skip if not support EXTENDED_IREF feature */
4703 btrfs_release_path(&path);
4704 btrfs_init_path(&path);
4706 dir_id = key->offset;
4707 key->type = BTRFS_INODE_EXTREF_KEY;
4708 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4710 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4712 ret = INODE_REF_MISSING;
4716 node = path.nodes[0];
4717 slot = path.slots[0];
4719 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4721 total = btrfs_item_size_nr(node, slot);
4723 /* Iterate all entry of INODE_EXTREF */
4724 while (cur < total) {
4725 ret = INODE_REF_MISSING;
4727 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4728 ref_index = btrfs_inode_extref_index(node, extref);
4729 parent = btrfs_inode_extref_parent(node, extref);
4730 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4733 if (parent != dir_id)
4736 if (ref_namelen <= BTRFS_NAME_LEN) {
4739 len = BTRFS_NAME_LEN;
4740 warning("root %llu INODE %s[%llu %llu] name too long",
4742 key->type == BTRFS_INODE_REF_KEY ?
4744 key->objectid, key->offset);
4746 read_extent_buffer(node, ref_namebuf,
4747 (unsigned long)(extref + 1), len);
4749 if (len != namelen || strncmp(ref_namebuf, name, len))
4752 *index_ret = ref_index;
4757 len = sizeof(*extref) + ref_namelen;
4758 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4763 btrfs_release_path(&path);
4768 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4769 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4771 * @root: the root of the fs/file tree
4772 * @key: the key of the INODE_REF/INODE_EXTREF
4773 * @size: the st_size of the INODE_ITEM
4774 * @ext_ref: the EXTENDED_IREF feature
4776 * Return 0 if no error occurred.
4778 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4779 struct extent_buffer *node, int slot, u64 *size,
4780 unsigned int ext_ref)
4782 struct btrfs_dir_item *di;
4783 struct btrfs_inode_item *ii;
4784 struct btrfs_path path;
4785 struct btrfs_key location;
4786 char namebuf[BTRFS_NAME_LEN] = {0};
4799 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4800 * ignore index check.
4802 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4804 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4805 total = btrfs_item_size_nr(node, slot);
4807 while (cur < total) {
4808 data_len = btrfs_dir_data_len(node, di);
4810 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4811 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4812 "DIR_ITEM" : "DIR_INDEX",
4813 key->objectid, key->offset, data_len);
4815 name_len = btrfs_dir_name_len(node, di);
4816 if (cur + sizeof(*di) + name_len > total ||
4817 name_len > BTRFS_NAME_LEN) {
4818 warning("root %llu %s[%llu %llu] name too long",
4820 key->type == BTRFS_DIR_ITEM_KEY ?
4821 "DIR_ITEM" : "DIR_INDEX",
4822 key->objectid, key->offset);
4824 if (cur + sizeof(*di) > total)
4826 len = min_t(u32, total - cur - sizeof(*di),
4831 (*size) += name_len;
4833 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4834 filetype = btrfs_dir_type(node, di);
4836 if (key->type == BTRFS_DIR_ITEM_KEY &&
4837 key->offset != btrfs_name_hash(namebuf, len)) {
4839 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4840 root->objectid, key->objectid, key->offset,
4841 namebuf, len, filetype, key->offset,
4842 btrfs_name_hash(namebuf, len));
4845 btrfs_init_path(&path);
4846 btrfs_dir_item_key_to_cpu(node, di, &location);
4848 /* Ignore related ROOT_ITEM check */
4849 if (location.type == BTRFS_ROOT_ITEM_KEY)
4852 /* Check relative INODE_ITEM(existence/filetype) */
4853 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4855 err |= INODE_ITEM_MISSING;
4856 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4857 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4858 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4859 key->offset, location.objectid, name_len,
4864 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4865 struct btrfs_inode_item);
4866 mode = btrfs_inode_mode(path.nodes[0], ii);
4868 if (imode_to_type(mode) != filetype) {
4869 err |= INODE_ITEM_MISMATCH;
4870 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4871 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4872 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4873 key->offset, name_len, namebuf, filetype);
4876 /* Check relative INODE_REF/INODE_EXTREF */
4877 location.type = BTRFS_INODE_REF_KEY;
4878 location.offset = key->objectid;
4879 ret = find_inode_ref(root, &location, namebuf, len,
4882 if (ret & INODE_REF_MISSING)
4883 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4884 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4885 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4886 key->offset, name_len, namebuf, filetype);
4889 btrfs_release_path(&path);
4890 len = sizeof(*di) + name_len + data_len;
4891 di = (struct btrfs_dir_item *)((char *)di + len);
4894 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4895 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4896 root->objectid, key->objectid, key->offset);
4905 * Check file extent datasum/hole, update the size of the file extents,
4906 * check and update the last offset of the file extent.
4908 * @root: the root of fs/file tree.
4909 * @fkey: the key of the file extent.
4910 * @nodatasum: INODE_NODATASUM feature.
4911 * @size: the sum of all EXTENT_DATA items size for this inode.
4912 * @end: the offset of the last extent.
4914 * Return 0 if no error occurred.
4916 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4917 struct extent_buffer *node, int slot,
4918 unsigned int nodatasum, u64 *size, u64 *end)
4920 struct btrfs_file_extent_item *fi;
4923 u64 extent_num_bytes;
4925 u64 csum_found; /* In byte size, sectorsize aligned */
4926 u64 search_start; /* Logical range start we search for csum */
4927 u64 search_len; /* Logical range len we search for csum */
4928 unsigned int extent_type;
4929 unsigned int is_hole;
4934 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4936 /* Check inline extent */
4937 extent_type = btrfs_file_extent_type(node, fi);
4938 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4939 struct btrfs_item *e = btrfs_item_nr(slot);
4940 u32 item_inline_len;
4942 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4943 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4944 compressed = btrfs_file_extent_compression(node, fi);
4945 if (extent_num_bytes == 0) {
4947 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4948 root->objectid, fkey->objectid, fkey->offset);
4949 err |= FILE_EXTENT_ERROR;
4951 if (!compressed && extent_num_bytes != item_inline_len) {
4953 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4954 root->objectid, fkey->objectid, fkey->offset,
4955 extent_num_bytes, item_inline_len);
4956 err |= FILE_EXTENT_ERROR;
4958 *end += extent_num_bytes;
4959 *size += extent_num_bytes;
4963 /* Check extent type */
4964 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4965 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4966 err |= FILE_EXTENT_ERROR;
4967 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4968 root->objectid, fkey->objectid, fkey->offset);
4972 /* Check REG_EXTENT/PREALLOC_EXTENT */
4973 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4974 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4975 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4976 extent_offset = btrfs_file_extent_offset(node, fi);
4977 compressed = btrfs_file_extent_compression(node, fi);
4978 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4981 * Check EXTENT_DATA csum
4983 * For plain (uncompressed) extent, we should only check the range
4984 * we're referring to, as it's possible that part of prealloc extent
4985 * has been written, and has csum:
4987 * |<--- Original large preallocated extent A ---->|
4988 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4991 * For compressed extent, we should check the whole range.
4994 search_start = disk_bytenr + extent_offset;
4995 search_len = extent_num_bytes;
4997 search_start = disk_bytenr;
4998 search_len = disk_num_bytes;
5000 ret = count_csum_range(root, search_start, search_len, &csum_found);
5001 if (csum_found > 0 && nodatasum) {
5002 err |= ODD_CSUM_ITEM;
5003 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5004 root->objectid, fkey->objectid, fkey->offset);
5005 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5006 !is_hole && (ret < 0 || csum_found < search_len)) {
5007 err |= CSUM_ITEM_MISSING;
5008 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5009 root->objectid, fkey->objectid, fkey->offset,
5010 csum_found, search_len);
5011 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5012 err |= ODD_CSUM_ITEM;
5013 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5014 root->objectid, fkey->objectid, fkey->offset, csum_found);
5017 /* Check EXTENT_DATA hole */
5018 if (!no_holes && *end != fkey->offset) {
5019 err |= FILE_EXTENT_ERROR;
5020 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5021 root->objectid, fkey->objectid, fkey->offset);
5024 *end += extent_num_bytes;
5026 *size += extent_num_bytes;
5032 * Set inode item nbytes to @nbytes
5034 * Returns 0 on success
5035 * Returns != 0 on error
5037 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5038 struct btrfs_path *path,
5039 u64 ino, u64 nbytes)
5041 struct btrfs_trans_handle *trans;
5042 struct btrfs_inode_item *ii;
5043 struct btrfs_key key;
5044 struct btrfs_key research_key;
5048 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5051 key.type = BTRFS_INODE_ITEM_KEY;
5054 trans = btrfs_start_transaction(root, 1);
5055 if (IS_ERR(trans)) {
5056 ret = PTR_ERR(trans);
5061 btrfs_release_path(path);
5062 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5070 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5071 struct btrfs_inode_item);
5072 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5073 btrfs_mark_buffer_dirty(path->nodes[0]);
5075 btrfs_commit_transaction(trans, root);
5078 error("failed to set nbytes in inode %llu root %llu",
5079 ino, root->root_key.objectid);
5081 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5082 root->root_key.objectid, nbytes);
5085 btrfs_release_path(path);
5086 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5093 * Set directory inode isize to @isize.
5095 * Returns 0 on success.
5096 * Returns != 0 on error.
5098 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5099 struct btrfs_path *path,
5102 struct btrfs_trans_handle *trans;
5103 struct btrfs_inode_item *ii;
5104 struct btrfs_key key;
5105 struct btrfs_key research_key;
5109 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5112 key.type = BTRFS_INODE_ITEM_KEY;
5115 trans = btrfs_start_transaction(root, 1);
5116 if (IS_ERR(trans)) {
5117 ret = PTR_ERR(trans);
5122 btrfs_release_path(path);
5123 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5131 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5132 struct btrfs_inode_item);
5133 btrfs_set_inode_size(path->nodes[0], ii, isize);
5134 btrfs_mark_buffer_dirty(path->nodes[0]);
5136 btrfs_commit_transaction(trans, root);
5139 error("failed to set isize in inode %llu root %llu",
5140 ino, root->root_key.objectid);
5142 printf("Set isize in inode %llu root %llu to %llu\n",
5143 ino, root->root_key.objectid, isize);
5145 btrfs_release_path(path);
5146 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5153 * Wrapper function for btrfs_add_orphan_item().
5155 * Returns 0 on success.
5156 * Returns != 0 on error.
5158 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5159 struct btrfs_path *path, u64 ino)
5161 struct btrfs_trans_handle *trans;
5162 struct btrfs_key research_key;
5166 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5168 trans = btrfs_start_transaction(root, 1);
5169 if (IS_ERR(trans)) {
5170 ret = PTR_ERR(trans);
5175 btrfs_release_path(path);
5176 ret = btrfs_add_orphan_item(trans, root, path, ino);
5178 btrfs_commit_transaction(trans, root);
5181 error("failed to add inode %llu as orphan item root %llu",
5182 ino, root->root_key.objectid);
5184 printf("Added inode %llu as orphan item root %llu\n",
5185 ino, root->root_key.objectid);
5187 btrfs_release_path(path);
5188 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5195 * Check INODE_ITEM and related ITEMs (the same inode number)
5196 * 1. check link count
5197 * 2. check inode ref/extref
5198 * 3. check dir item/index
5200 * @ext_ref: the EXTENDED_IREF feature
5202 * Return 0 if no error occurred.
5203 * Return >0 for error or hit the traversal is done(by error bitmap)
5205 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5206 unsigned int ext_ref)
5208 struct extent_buffer *node;
5209 struct btrfs_inode_item *ii;
5210 struct btrfs_key key;
5219 u64 extent_size = 0;
5221 unsigned int nodatasum;
5226 node = path->nodes[0];
5227 slot = path->slots[0];
5229 btrfs_item_key_to_cpu(node, &key, slot);
5230 inode_id = key.objectid;
5232 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5233 ret = btrfs_next_item(root, path);
5239 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5240 isize = btrfs_inode_size(node, ii);
5241 nbytes = btrfs_inode_nbytes(node, ii);
5242 mode = btrfs_inode_mode(node, ii);
5243 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5244 nlink = btrfs_inode_nlink(node, ii);
5245 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5248 ret = btrfs_next_item(root, path);
5250 /* out will fill 'err' rusing current statistics */
5252 } else if (ret > 0) {
5257 node = path->nodes[0];
5258 slot = path->slots[0];
5259 btrfs_item_key_to_cpu(node, &key, slot);
5260 if (key.objectid != inode_id)
5264 case BTRFS_INODE_REF_KEY:
5265 ret = check_inode_ref(root, &key, node, slot, &refs,
5269 case BTRFS_INODE_EXTREF_KEY:
5270 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5271 warning("root %llu EXTREF[%llu %llu] isn't supported",
5272 root->objectid, key.objectid,
5274 ret = check_inode_extref(root, &key, node, slot, &refs,
5278 case BTRFS_DIR_ITEM_KEY:
5279 case BTRFS_DIR_INDEX_KEY:
5281 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5282 root->objectid, inode_id,
5283 imode_to_type(mode), key.objectid,
5286 ret = check_dir_item(root, &key, node, slot, &size,
5290 case BTRFS_EXTENT_DATA_KEY:
5292 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5293 root->objectid, inode_id, key.objectid,
5296 ret = check_file_extent(root, &key, node, slot,
5297 nodatasum, &extent_size,
5301 case BTRFS_XATTR_ITEM_KEY:
5304 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5305 key.objectid, key.type, key.offset);
5310 /* verify INODE_ITEM nlink/isize/nbytes */
5313 err |= LINK_COUNT_ERROR;
5314 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5315 root->objectid, inode_id, nlink);
5319 * Just a warning, as dir inode nbytes is just an
5320 * instructive value.
5322 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5323 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5324 root->objectid, inode_id,
5325 root->fs_info->nodesize);
5328 if (isize != size) {
5330 ret = repair_dir_isize_lowmem(root, path,
5332 if (!repair || ret) {
5335 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5336 root->objectid, inode_id, isize, size);
5340 if (nlink != refs) {
5341 err |= LINK_COUNT_ERROR;
5342 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5343 root->objectid, inode_id, nlink, refs);
5344 } else if (!nlink) {
5346 ret = repair_inode_orphan_item_lowmem(root,
5348 if (!repair || ret) {
5350 error("root %llu INODE[%llu] is orphan item",
5351 root->objectid, inode_id);
5355 if (!nbytes && !no_holes && extent_end < isize) {
5356 err |= NBYTES_ERROR;
5357 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5358 root->objectid, inode_id, isize);
5361 if (nbytes != extent_size) {
5363 ret = repair_inode_nbytes_lowmem(root, path,
5364 inode_id, extent_size);
5365 if (!repair || ret) {
5366 err |= NBYTES_ERROR;
5368 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5369 root->objectid, inode_id, nbytes,
5379 * check first root dir's inode_item and inode_ref
5381 * returns 0 means no error
5382 * returns >0 means error
5383 * returns <0 means fatal error
5385 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5387 struct btrfs_path path;
5388 struct btrfs_key key;
5389 struct btrfs_inode_item *ii;
5395 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5396 key.type = BTRFS_INODE_ITEM_KEY;
5399 /* For root being dropped, we don't need to check first inode */
5400 if (btrfs_root_refs(&root->root_item) == 0 &&
5401 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5402 BTRFS_FIRST_FREE_OBJECTID)
5405 btrfs_init_path(&path);
5406 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5411 err |= INODE_ITEM_MISSING;
5413 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5414 struct btrfs_inode_item);
5415 mode = btrfs_inode_mode(path.nodes[0], ii);
5416 if (imode_to_type(mode) != BTRFS_FT_DIR)
5417 err |= INODE_ITEM_MISMATCH;
5420 /* lookup first inode ref */
5421 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5422 key.type = BTRFS_INODE_REF_KEY;
5423 /* special index value */
5426 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5432 btrfs_release_path(&path);
5433 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5434 error("root dir INODE_ITEM is %s",
5435 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5436 if (err & INODE_REF_MISSING)
5437 error("root dir INODE_REF is missing");
5439 return ret < 0 ? ret : err;
5442 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5443 u64 parent, u64 root)
5445 struct rb_node *node;
5446 struct tree_backref *back = NULL;
5447 struct tree_backref match = {
5454 match.parent = parent;
5455 match.node.full_backref = 1;
5460 node = rb_search(&rec->backref_tree, &match.node.node,
5461 (rb_compare_keys)compare_extent_backref, NULL);
5463 back = to_tree_backref(rb_node_to_extent_backref(node));
5468 static struct data_backref *find_data_backref(struct extent_record *rec,
5469 u64 parent, u64 root,
5470 u64 owner, u64 offset,
5472 u64 disk_bytenr, u64 bytes)
5474 struct rb_node *node;
5475 struct data_backref *back = NULL;
5476 struct data_backref match = {
5483 .found_ref = found_ref,
5484 .disk_bytenr = disk_bytenr,
5488 match.parent = parent;
5489 match.node.full_backref = 1;
5494 node = rb_search(&rec->backref_tree, &match.node.node,
5495 (rb_compare_keys)compare_extent_backref, NULL);
5497 back = to_data_backref(rb_node_to_extent_backref(node));
5502 * Iterate all item on the tree and call check_inode_item() to check.
5504 * @root: the root of the tree to be checked.
5505 * @ext_ref: the EXTENDED_IREF feature
5507 * Return 0 if no error found.
5508 * Return <0 for error.
5510 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5512 struct btrfs_path path;
5513 struct node_refs nrefs;
5514 struct btrfs_root_item *root_item = &root->root_item;
5520 * We need to manually check the first inode item(256)
5521 * As the following traversal function will only start from
5522 * the first inode item in the leaf, if inode item(256) is missing
5523 * we will just skip it forever.
5525 ret = check_fs_first_inode(root, ext_ref);
5530 memset(&nrefs, 0, sizeof(nrefs));
5531 level = btrfs_header_level(root->node);
5532 btrfs_init_path(&path);
5534 if (btrfs_root_refs(root_item) > 0 ||
5535 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5536 path.nodes[level] = root->node;
5537 path.slots[level] = 0;
5538 extent_buffer_get(root->node);
5540 struct btrfs_key key;
5542 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5543 level = root_item->drop_level;
5544 path.lowest_level = level;
5545 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5552 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5555 /* if ret is negative, walk shall stop */
5561 ret = walk_up_tree_v2(root, &path, &level);
5563 /* Normal exit, reset ret to err */
5570 btrfs_release_path(&path);
5575 * Find the relative ref for root_ref and root_backref.
5577 * @root: the root of the root tree.
5578 * @ref_key: the key of the root ref.
5580 * Return 0 if no error occurred.
5582 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5583 struct extent_buffer *node, int slot)
5585 struct btrfs_path path;
5586 struct btrfs_key key;
5587 struct btrfs_root_ref *ref;
5588 struct btrfs_root_ref *backref;
5589 char ref_name[BTRFS_NAME_LEN] = {0};
5590 char backref_name[BTRFS_NAME_LEN] = {0};
5596 u32 backref_namelen;
5601 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5602 ref_dirid = btrfs_root_ref_dirid(node, ref);
5603 ref_seq = btrfs_root_ref_sequence(node, ref);
5604 ref_namelen = btrfs_root_ref_name_len(node, ref);
5606 if (ref_namelen <= BTRFS_NAME_LEN) {
5609 len = BTRFS_NAME_LEN;
5610 warning("%s[%llu %llu] ref_name too long",
5611 ref_key->type == BTRFS_ROOT_REF_KEY ?
5612 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5615 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5617 /* Find relative root_ref */
5618 key.objectid = ref_key->offset;
5619 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5620 key.offset = ref_key->objectid;
5622 btrfs_init_path(&path);
5623 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5625 err |= ROOT_REF_MISSING;
5626 error("%s[%llu %llu] couldn't find relative ref",
5627 ref_key->type == BTRFS_ROOT_REF_KEY ?
5628 "ROOT_REF" : "ROOT_BACKREF",
5629 ref_key->objectid, ref_key->offset);
5633 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5634 struct btrfs_root_ref);
5635 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5636 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5637 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5639 if (backref_namelen <= BTRFS_NAME_LEN) {
5640 len = backref_namelen;
5642 len = BTRFS_NAME_LEN;
5643 warning("%s[%llu %llu] ref_name too long",
5644 key.type == BTRFS_ROOT_REF_KEY ?
5645 "ROOT_REF" : "ROOT_BACKREF",
5646 key.objectid, key.offset);
5648 read_extent_buffer(path.nodes[0], backref_name,
5649 (unsigned long)(backref + 1), len);
5651 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5652 ref_namelen != backref_namelen ||
5653 strncmp(ref_name, backref_name, len)) {
5654 err |= ROOT_REF_MISMATCH;
5655 error("%s[%llu %llu] mismatch relative ref",
5656 ref_key->type == BTRFS_ROOT_REF_KEY ?
5657 "ROOT_REF" : "ROOT_BACKREF",
5658 ref_key->objectid, ref_key->offset);
5661 btrfs_release_path(&path);
5666 * Check all fs/file tree in low_memory mode.
5668 * 1. for fs tree root item, call check_fs_root_v2()
5669 * 2. for fs tree root ref/backref, call check_root_ref()
5671 * Return 0 if no error occurred.
5673 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5675 struct btrfs_root *tree_root = fs_info->tree_root;
5676 struct btrfs_root *cur_root = NULL;
5677 struct btrfs_path path;
5678 struct btrfs_key key;
5679 struct extent_buffer *node;
5680 unsigned int ext_ref;
5685 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5687 btrfs_init_path(&path);
5688 key.objectid = BTRFS_FS_TREE_OBJECTID;
5690 key.type = BTRFS_ROOT_ITEM_KEY;
5692 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5696 } else if (ret > 0) {
5702 node = path.nodes[0];
5703 slot = path.slots[0];
5704 btrfs_item_key_to_cpu(node, &key, slot);
5705 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5707 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5708 fs_root_objectid(key.objectid)) {
5709 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5710 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5713 key.offset = (u64)-1;
5714 cur_root = btrfs_read_fs_root(fs_info, &key);
5717 if (IS_ERR(cur_root)) {
5718 error("Fail to read fs/subvol tree: %lld",
5724 ret = check_fs_root_v2(cur_root, ext_ref);
5727 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5728 btrfs_free_fs_root(cur_root);
5729 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5730 key.type == BTRFS_ROOT_BACKREF_KEY) {
5731 ret = check_root_ref(tree_root, &key, node, slot);
5735 ret = btrfs_next_item(tree_root, &path);
5745 btrfs_release_path(&path);
5749 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5750 struct cache_tree *root_cache)
5754 if (!ctx.progress_enabled)
5755 fprintf(stderr, "checking fs roots\n");
5756 if (check_mode == CHECK_MODE_LOWMEM)
5757 ret = check_fs_roots_v2(fs_info);
5759 ret = check_fs_roots(fs_info, root_cache);
5764 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5766 struct extent_backref *back, *tmp;
5767 struct tree_backref *tback;
5768 struct data_backref *dback;
5772 rbtree_postorder_for_each_entry_safe(back, tmp,
5773 &rec->backref_tree, node) {
5774 if (!back->found_extent_tree) {
5778 if (back->is_data) {
5779 dback = to_data_backref(back);
5780 fprintf(stderr, "Data backref %llu %s %llu"
5781 " owner %llu offset %llu num_refs %lu"
5782 " not found in extent tree\n",
5783 (unsigned long long)rec->start,
5784 back->full_backref ?
5786 back->full_backref ?
5787 (unsigned long long)dback->parent:
5788 (unsigned long long)dback->root,
5789 (unsigned long long)dback->owner,
5790 (unsigned long long)dback->offset,
5791 (unsigned long)dback->num_refs);
5793 tback = to_tree_backref(back);
5794 fprintf(stderr, "Tree backref %llu parent %llu"
5795 " root %llu not found in extent tree\n",
5796 (unsigned long long)rec->start,
5797 (unsigned long long)tback->parent,
5798 (unsigned long long)tback->root);
5801 if (!back->is_data && !back->found_ref) {
5805 tback = to_tree_backref(back);
5806 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5807 (unsigned long long)rec->start,
5808 back->full_backref ? "parent" : "root",
5809 back->full_backref ?
5810 (unsigned long long)tback->parent :
5811 (unsigned long long)tback->root, back);
5813 if (back->is_data) {
5814 dback = to_data_backref(back);
5815 if (dback->found_ref != dback->num_refs) {
5819 fprintf(stderr, "Incorrect local backref count"
5820 " on %llu %s %llu owner %llu"
5821 " offset %llu found %u wanted %u back %p\n",
5822 (unsigned long long)rec->start,
5823 back->full_backref ?
5825 back->full_backref ?
5826 (unsigned long long)dback->parent:
5827 (unsigned long long)dback->root,
5828 (unsigned long long)dback->owner,
5829 (unsigned long long)dback->offset,
5830 dback->found_ref, dback->num_refs, back);
5832 if (dback->disk_bytenr != rec->start) {
5836 fprintf(stderr, "Backref disk bytenr does not"
5837 " match extent record, bytenr=%llu, "
5838 "ref bytenr=%llu\n",
5839 (unsigned long long)rec->start,
5840 (unsigned long long)dback->disk_bytenr);
5843 if (dback->bytes != rec->nr) {
5847 fprintf(stderr, "Backref bytes do not match "
5848 "extent backref, bytenr=%llu, ref "
5849 "bytes=%llu, backref bytes=%llu\n",
5850 (unsigned long long)rec->start,
5851 (unsigned long long)rec->nr,
5852 (unsigned long long)dback->bytes);
5855 if (!back->is_data) {
5858 dback = to_data_backref(back);
5859 found += dback->found_ref;
5862 if (found != rec->refs) {
5866 fprintf(stderr, "Incorrect global backref count "
5867 "on %llu found %llu wanted %llu\n",
5868 (unsigned long long)rec->start,
5869 (unsigned long long)found,
5870 (unsigned long long)rec->refs);
5876 static void __free_one_backref(struct rb_node *node)
5878 struct extent_backref *back = rb_node_to_extent_backref(node);
5883 static void free_all_extent_backrefs(struct extent_record *rec)
5885 rb_free_nodes(&rec->backref_tree, __free_one_backref);
5888 static void free_extent_record_cache(struct cache_tree *extent_cache)
5890 struct cache_extent *cache;
5891 struct extent_record *rec;
5894 cache = first_cache_extent(extent_cache);
5897 rec = container_of(cache, struct extent_record, cache);
5898 remove_cache_extent(extent_cache, cache);
5899 free_all_extent_backrefs(rec);
5904 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5905 struct extent_record *rec)
5907 if (rec->content_checked && rec->owner_ref_checked &&
5908 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5909 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5910 !rec->bad_full_backref && !rec->crossing_stripes &&
5911 !rec->wrong_chunk_type) {
5912 remove_cache_extent(extent_cache, &rec->cache);
5913 free_all_extent_backrefs(rec);
5914 list_del_init(&rec->list);
5920 static int check_owner_ref(struct btrfs_root *root,
5921 struct extent_record *rec,
5922 struct extent_buffer *buf)
5924 struct extent_backref *node, *tmp;
5925 struct tree_backref *back;
5926 struct btrfs_root *ref_root;
5927 struct btrfs_key key;
5928 struct btrfs_path path;
5929 struct extent_buffer *parent;
5934 rbtree_postorder_for_each_entry_safe(node, tmp,
5935 &rec->backref_tree, node) {
5938 if (!node->found_ref)
5940 if (node->full_backref)
5942 back = to_tree_backref(node);
5943 if (btrfs_header_owner(buf) == back->root)
5946 BUG_ON(rec->is_root);
5948 /* try to find the block by search corresponding fs tree */
5949 key.objectid = btrfs_header_owner(buf);
5950 key.type = BTRFS_ROOT_ITEM_KEY;
5951 key.offset = (u64)-1;
5953 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5954 if (IS_ERR(ref_root))
5957 level = btrfs_header_level(buf);
5959 btrfs_item_key_to_cpu(buf, &key, 0);
5961 btrfs_node_key_to_cpu(buf, &key, 0);
5963 btrfs_init_path(&path);
5964 path.lowest_level = level + 1;
5965 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5969 parent = path.nodes[level + 1];
5970 if (parent && buf->start == btrfs_node_blockptr(parent,
5971 path.slots[level + 1]))
5974 btrfs_release_path(&path);
5975 return found ? 0 : 1;
5978 static int is_extent_tree_record(struct extent_record *rec)
5980 struct extent_backref *node, *tmp;
5981 struct tree_backref *back;
5984 rbtree_postorder_for_each_entry_safe(node, tmp,
5985 &rec->backref_tree, node) {
5988 back = to_tree_backref(node);
5989 if (node->full_backref)
5991 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5998 static int record_bad_block_io(struct btrfs_fs_info *info,
5999 struct cache_tree *extent_cache,
6002 struct extent_record *rec;
6003 struct cache_extent *cache;
6004 struct btrfs_key key;
6006 cache = lookup_cache_extent(extent_cache, start, len);
6010 rec = container_of(cache, struct extent_record, cache);
6011 if (!is_extent_tree_record(rec))
6014 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6015 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6018 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6019 struct extent_buffer *buf, int slot)
6021 if (btrfs_header_level(buf)) {
6022 struct btrfs_key_ptr ptr1, ptr2;
6024 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6025 sizeof(struct btrfs_key_ptr));
6026 read_extent_buffer(buf, &ptr2,
6027 btrfs_node_key_ptr_offset(slot + 1),
6028 sizeof(struct btrfs_key_ptr));
6029 write_extent_buffer(buf, &ptr1,
6030 btrfs_node_key_ptr_offset(slot + 1),
6031 sizeof(struct btrfs_key_ptr));
6032 write_extent_buffer(buf, &ptr2,
6033 btrfs_node_key_ptr_offset(slot),
6034 sizeof(struct btrfs_key_ptr));
6036 struct btrfs_disk_key key;
6037 btrfs_node_key(buf, &key, 0);
6038 btrfs_fixup_low_keys(root, path, &key,
6039 btrfs_header_level(buf) + 1);
6042 struct btrfs_item *item1, *item2;
6043 struct btrfs_key k1, k2;
6044 char *item1_data, *item2_data;
6045 u32 item1_offset, item2_offset, item1_size, item2_size;
6047 item1 = btrfs_item_nr(slot);
6048 item2 = btrfs_item_nr(slot + 1);
6049 btrfs_item_key_to_cpu(buf, &k1, slot);
6050 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6051 item1_offset = btrfs_item_offset(buf, item1);
6052 item2_offset = btrfs_item_offset(buf, item2);
6053 item1_size = btrfs_item_size(buf, item1);
6054 item2_size = btrfs_item_size(buf, item2);
6056 item1_data = malloc(item1_size);
6059 item2_data = malloc(item2_size);
6065 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6066 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6068 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6069 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6073 btrfs_set_item_offset(buf, item1, item2_offset);
6074 btrfs_set_item_offset(buf, item2, item1_offset);
6075 btrfs_set_item_size(buf, item1, item2_size);
6076 btrfs_set_item_size(buf, item2, item1_size);
6078 path->slots[0] = slot;
6079 btrfs_set_item_key_unsafe(root, path, &k2);
6080 path->slots[0] = slot + 1;
6081 btrfs_set_item_key_unsafe(root, path, &k1);
6086 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6088 struct extent_buffer *buf;
6089 struct btrfs_key k1, k2;
6091 int level = path->lowest_level;
6094 buf = path->nodes[level];
6095 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6097 btrfs_node_key_to_cpu(buf, &k1, i);
6098 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6100 btrfs_item_key_to_cpu(buf, &k1, i);
6101 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6103 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6105 ret = swap_values(root, path, buf, i);
6108 btrfs_mark_buffer_dirty(buf);
6114 static int delete_bogus_item(struct btrfs_root *root,
6115 struct btrfs_path *path,
6116 struct extent_buffer *buf, int slot)
6118 struct btrfs_key key;
6119 int nritems = btrfs_header_nritems(buf);
6121 btrfs_item_key_to_cpu(buf, &key, slot);
6123 /* These are all the keys we can deal with missing. */
6124 if (key.type != BTRFS_DIR_INDEX_KEY &&
6125 key.type != BTRFS_EXTENT_ITEM_KEY &&
6126 key.type != BTRFS_METADATA_ITEM_KEY &&
6127 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6128 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6131 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6132 (unsigned long long)key.objectid, key.type,
6133 (unsigned long long)key.offset, slot, buf->start);
6134 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6135 btrfs_item_nr_offset(slot + 1),
6136 sizeof(struct btrfs_item) *
6137 (nritems - slot - 1));
6138 btrfs_set_header_nritems(buf, nritems - 1);
6140 struct btrfs_disk_key disk_key;
6142 btrfs_item_key(buf, &disk_key, 0);
6143 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6145 btrfs_mark_buffer_dirty(buf);
6149 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6151 struct extent_buffer *buf;
6155 /* We should only get this for leaves */
6156 BUG_ON(path->lowest_level);
6157 buf = path->nodes[0];
6159 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6160 unsigned int shift = 0, offset;
6162 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6163 BTRFS_LEAF_DATA_SIZE(root)) {
6164 if (btrfs_item_end_nr(buf, i) >
6165 BTRFS_LEAF_DATA_SIZE(root)) {
6166 ret = delete_bogus_item(root, path, buf, i);
6169 fprintf(stderr, "item is off the end of the "
6170 "leaf, can't fix\n");
6174 shift = BTRFS_LEAF_DATA_SIZE(root) -
6175 btrfs_item_end_nr(buf, i);
6176 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6177 btrfs_item_offset_nr(buf, i - 1)) {
6178 if (btrfs_item_end_nr(buf, i) >
6179 btrfs_item_offset_nr(buf, i - 1)) {
6180 ret = delete_bogus_item(root, path, buf, i);
6183 fprintf(stderr, "items overlap, can't fix\n");
6187 shift = btrfs_item_offset_nr(buf, i - 1) -
6188 btrfs_item_end_nr(buf, i);
6193 printf("Shifting item nr %d by %u bytes in block %llu\n",
6194 i, shift, (unsigned long long)buf->start);
6195 offset = btrfs_item_offset_nr(buf, i);
6196 memmove_extent_buffer(buf,
6197 btrfs_leaf_data(buf) + offset + shift,
6198 btrfs_leaf_data(buf) + offset,
6199 btrfs_item_size_nr(buf, i));
6200 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6202 btrfs_mark_buffer_dirty(buf);
6206 * We may have moved things, in which case we want to exit so we don't
6207 * write those changes out. Once we have proper abort functionality in
6208 * progs this can be changed to something nicer.
6215 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6216 * then just return -EIO.
6218 static int try_to_fix_bad_block(struct btrfs_root *root,
6219 struct extent_buffer *buf,
6220 enum btrfs_tree_block_status status)
6222 struct btrfs_trans_handle *trans;
6223 struct ulist *roots;
6224 struct ulist_node *node;
6225 struct btrfs_root *search_root;
6226 struct btrfs_path path;
6227 struct ulist_iterator iter;
6228 struct btrfs_key root_key, key;
6231 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6232 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6235 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6239 btrfs_init_path(&path);
6240 ULIST_ITER_INIT(&iter);
6241 while ((node = ulist_next(roots, &iter))) {
6242 root_key.objectid = node->val;
6243 root_key.type = BTRFS_ROOT_ITEM_KEY;
6244 root_key.offset = (u64)-1;
6246 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6253 trans = btrfs_start_transaction(search_root, 0);
6254 if (IS_ERR(trans)) {
6255 ret = PTR_ERR(trans);
6259 path.lowest_level = btrfs_header_level(buf);
6260 path.skip_check_block = 1;
6261 if (path.lowest_level)
6262 btrfs_node_key_to_cpu(buf, &key, 0);
6264 btrfs_item_key_to_cpu(buf, &key, 0);
6265 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6268 btrfs_commit_transaction(trans, search_root);
6271 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6272 ret = fix_key_order(search_root, &path);
6273 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6274 ret = fix_item_offset(search_root, &path);
6276 btrfs_commit_transaction(trans, search_root);
6279 btrfs_release_path(&path);
6280 btrfs_commit_transaction(trans, search_root);
6283 btrfs_release_path(&path);
6287 static int check_block(struct btrfs_root *root,
6288 struct cache_tree *extent_cache,
6289 struct extent_buffer *buf, u64 flags)
6291 struct extent_record *rec;
6292 struct cache_extent *cache;
6293 struct btrfs_key key;
6294 enum btrfs_tree_block_status status;
6298 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6301 rec = container_of(cache, struct extent_record, cache);
6302 rec->generation = btrfs_header_generation(buf);
6304 level = btrfs_header_level(buf);
6305 if (btrfs_header_nritems(buf) > 0) {
6308 btrfs_item_key_to_cpu(buf, &key, 0);
6310 btrfs_node_key_to_cpu(buf, &key, 0);
6312 rec->info_objectid = key.objectid;
6314 rec->info_level = level;
6316 if (btrfs_is_leaf(buf))
6317 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6319 status = btrfs_check_node(root, &rec->parent_key, buf);
6321 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6323 status = try_to_fix_bad_block(root, buf, status);
6324 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6326 fprintf(stderr, "bad block %llu\n",
6327 (unsigned long long)buf->start);
6330 * Signal to callers we need to start the scan over
6331 * again since we'll have cowed blocks.
6336 rec->content_checked = 1;
6337 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6338 rec->owner_ref_checked = 1;
6340 ret = check_owner_ref(root, rec, buf);
6342 rec->owner_ref_checked = 1;
6346 maybe_free_extent_rec(extent_cache, rec);
6351 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6352 u64 parent, u64 root)
6354 struct list_head *cur = rec->backrefs.next;
6355 struct extent_backref *node;
6356 struct tree_backref *back;
6358 while(cur != &rec->backrefs) {
6359 node = to_extent_backref(cur);
6363 back = to_tree_backref(node);
6365 if (!node->full_backref)
6367 if (parent == back->parent)
6370 if (node->full_backref)
6372 if (back->root == root)
6380 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6381 u64 parent, u64 root)
6383 struct tree_backref *ref = malloc(sizeof(*ref));
6387 memset(&ref->node, 0, sizeof(ref->node));
6389 ref->parent = parent;
6390 ref->node.full_backref = 1;
6393 ref->node.full_backref = 0;
6400 static struct data_backref *find_data_backref(struct extent_record *rec,
6401 u64 parent, u64 root,
6402 u64 owner, u64 offset,
6404 u64 disk_bytenr, u64 bytes)
6406 struct list_head *cur = rec->backrefs.next;
6407 struct extent_backref *node;
6408 struct data_backref *back;
6410 while(cur != &rec->backrefs) {
6411 node = to_extent_backref(cur);
6415 back = to_data_backref(node);
6417 if (!node->full_backref)
6419 if (parent == back->parent)
6422 if (node->full_backref)
6424 if (back->root == root && back->owner == owner &&
6425 back->offset == offset) {
6426 if (found_ref && node->found_ref &&
6427 (back->bytes != bytes ||
6428 back->disk_bytenr != disk_bytenr))
6438 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6439 u64 parent, u64 root,
6440 u64 owner, u64 offset,
6443 struct data_backref *ref = malloc(sizeof(*ref));
6447 memset(&ref->node, 0, sizeof(ref->node));
6448 ref->node.is_data = 1;
6451 ref->parent = parent;
6454 ref->node.full_backref = 1;
6458 ref->offset = offset;
6459 ref->node.full_backref = 0;
6461 ref->bytes = max_size;
6464 if (max_size > rec->max_size)
6465 rec->max_size = max_size;
6469 /* Check if the type of extent matches with its chunk */
6470 static void check_extent_type(struct extent_record *rec)
6472 struct btrfs_block_group_cache *bg_cache;
6474 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6478 /* data extent, check chunk directly*/
6479 if (!rec->metadata) {
6480 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6481 rec->wrong_chunk_type = 1;
6485 /* metadata extent, check the obvious case first */
6486 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6487 BTRFS_BLOCK_GROUP_METADATA))) {
6488 rec->wrong_chunk_type = 1;
6493 * Check SYSTEM extent, as it's also marked as metadata, we can only
6494 * make sure it's a SYSTEM extent by its backref
6496 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6497 struct extent_backref *node;
6498 struct tree_backref *tback;
6501 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6502 if (node->is_data) {
6503 /* tree block shouldn't have data backref */
6504 rec->wrong_chunk_type = 1;
6507 tback = container_of(node, struct tree_backref, node);
6509 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6510 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6512 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6513 if (!(bg_cache->flags & bg_type))
6514 rec->wrong_chunk_type = 1;
6519 * Allocate a new extent record, fill default values from @tmpl and insert int
6520 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6521 * the cache, otherwise it fails.
6523 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6524 struct extent_record *tmpl)
6526 struct extent_record *rec;
6529 BUG_ON(tmpl->max_size == 0);
6530 rec = malloc(sizeof(*rec));
6533 rec->start = tmpl->start;
6534 rec->max_size = tmpl->max_size;
6535 rec->nr = max(tmpl->nr, tmpl->max_size);
6536 rec->found_rec = tmpl->found_rec;
6537 rec->content_checked = tmpl->content_checked;
6538 rec->owner_ref_checked = tmpl->owner_ref_checked;
6539 rec->num_duplicates = 0;
6540 rec->metadata = tmpl->metadata;
6541 rec->flag_block_full_backref = FLAG_UNSET;
6542 rec->bad_full_backref = 0;
6543 rec->crossing_stripes = 0;
6544 rec->wrong_chunk_type = 0;
6545 rec->is_root = tmpl->is_root;
6546 rec->refs = tmpl->refs;
6547 rec->extent_item_refs = tmpl->extent_item_refs;
6548 rec->parent_generation = tmpl->parent_generation;
6549 INIT_LIST_HEAD(&rec->backrefs);
6550 INIT_LIST_HEAD(&rec->dups);
6551 INIT_LIST_HEAD(&rec->list);
6552 rec->backref_tree = RB_ROOT;
6553 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6554 rec->cache.start = tmpl->start;
6555 rec->cache.size = tmpl->nr;
6556 ret = insert_cache_extent(extent_cache, &rec->cache);
6561 bytes_used += rec->nr;
6564 rec->crossing_stripes = check_crossing_stripes(global_info,
6565 rec->start, global_info->nodesize);
6566 check_extent_type(rec);
6571 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6573 * - refs - if found, increase refs
6574 * - is_root - if found, set
6575 * - content_checked - if found, set
6576 * - owner_ref_checked - if found, set
6578 * If not found, create a new one, initialize and insert.
6580 static int add_extent_rec(struct cache_tree *extent_cache,
6581 struct extent_record *tmpl)
6583 struct extent_record *rec;
6584 struct cache_extent *cache;
6588 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6590 rec = container_of(cache, struct extent_record, cache);
6594 rec->nr = max(tmpl->nr, tmpl->max_size);
6597 * We need to make sure to reset nr to whatever the extent
6598 * record says was the real size, this way we can compare it to
6601 if (tmpl->found_rec) {
6602 if (tmpl->start != rec->start || rec->found_rec) {
6603 struct extent_record *tmp;
6606 if (list_empty(&rec->list))
6607 list_add_tail(&rec->list,
6608 &duplicate_extents);
6611 * We have to do this song and dance in case we
6612 * find an extent record that falls inside of
6613 * our current extent record but does not have
6614 * the same objectid.
6616 tmp = malloc(sizeof(*tmp));
6619 tmp->start = tmpl->start;
6620 tmp->max_size = tmpl->max_size;
6623 tmp->metadata = tmpl->metadata;
6624 tmp->extent_item_refs = tmpl->extent_item_refs;
6625 INIT_LIST_HEAD(&tmp->list);
6626 list_add_tail(&tmp->list, &rec->dups);
6627 rec->num_duplicates++;
6634 if (tmpl->extent_item_refs && !dup) {
6635 if (rec->extent_item_refs) {
6636 fprintf(stderr, "block %llu rec "
6637 "extent_item_refs %llu, passed %llu\n",
6638 (unsigned long long)tmpl->start,
6639 (unsigned long long)
6640 rec->extent_item_refs,
6641 (unsigned long long)tmpl->extent_item_refs);
6643 rec->extent_item_refs = tmpl->extent_item_refs;
6647 if (tmpl->content_checked)
6648 rec->content_checked = 1;
6649 if (tmpl->owner_ref_checked)
6650 rec->owner_ref_checked = 1;
6651 memcpy(&rec->parent_key, &tmpl->parent_key,
6652 sizeof(tmpl->parent_key));
6653 if (tmpl->parent_generation)
6654 rec->parent_generation = tmpl->parent_generation;
6655 if (rec->max_size < tmpl->max_size)
6656 rec->max_size = tmpl->max_size;
6659 * A metadata extent can't cross stripe_len boundary, otherwise
6660 * kernel scrub won't be able to handle it.
6661 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6665 rec->crossing_stripes = check_crossing_stripes(
6666 global_info, rec->start,
6667 global_info->nodesize);
6668 check_extent_type(rec);
6669 maybe_free_extent_rec(extent_cache, rec);
6673 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6678 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6679 u64 parent, u64 root, int found_ref)
6681 struct extent_record *rec;
6682 struct tree_backref *back;
6683 struct cache_extent *cache;
6685 bool insert = false;
6687 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6689 struct extent_record tmpl;
6691 memset(&tmpl, 0, sizeof(tmpl));
6692 tmpl.start = bytenr;
6697 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6701 /* really a bug in cache_extent implement now */
6702 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6707 rec = container_of(cache, struct extent_record, cache);
6708 if (rec->start != bytenr) {
6710 * Several cause, from unaligned bytenr to over lapping extents
6715 back = find_tree_backref(rec, parent, root);
6717 back = alloc_tree_backref(rec, parent, root);
6724 if (back->node.found_ref) {
6725 fprintf(stderr, "Extent back ref already exists "
6726 "for %llu parent %llu root %llu \n",
6727 (unsigned long long)bytenr,
6728 (unsigned long long)parent,
6729 (unsigned long long)root);
6731 back->node.found_ref = 1;
6733 if (back->node.found_extent_tree) {
6734 fprintf(stderr, "Extent back ref already exists "
6735 "for %llu parent %llu root %llu \n",
6736 (unsigned long long)bytenr,
6737 (unsigned long long)parent,
6738 (unsigned long long)root);
6740 back->node.found_extent_tree = 1;
6743 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6744 compare_extent_backref));
6745 check_extent_type(rec);
6746 maybe_free_extent_rec(extent_cache, rec);
6750 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6751 u64 parent, u64 root, u64 owner, u64 offset,
6752 u32 num_refs, int found_ref, u64 max_size)
6754 struct extent_record *rec;
6755 struct data_backref *back;
6756 struct cache_extent *cache;
6758 bool insert = false;
6760 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6762 struct extent_record tmpl;
6764 memset(&tmpl, 0, sizeof(tmpl));
6765 tmpl.start = bytenr;
6767 tmpl.max_size = max_size;
6769 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6773 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6778 rec = container_of(cache, struct extent_record, cache);
6779 if (rec->max_size < max_size)
6780 rec->max_size = max_size;
6783 * If found_ref is set then max_size is the real size and must match the
6784 * existing refs. So if we have already found a ref then we need to
6785 * make sure that this ref matches the existing one, otherwise we need
6786 * to add a new backref so we can notice that the backrefs don't match
6787 * and we need to figure out who is telling the truth. This is to
6788 * account for that awful fsync bug I introduced where we'd end up with
6789 * a btrfs_file_extent_item that would have its length include multiple
6790 * prealloc extents or point inside of a prealloc extent.
6792 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6795 back = alloc_data_backref(rec, parent, root, owner, offset,
6802 BUG_ON(num_refs != 1);
6803 if (back->node.found_ref)
6804 BUG_ON(back->bytes != max_size);
6805 back->node.found_ref = 1;
6806 back->found_ref += 1;
6807 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6808 back->bytes = max_size;
6809 back->disk_bytenr = bytenr;
6811 /* Need to reinsert if not already in the tree */
6813 rb_erase(&back->node.node, &rec->backref_tree);
6818 rec->content_checked = 1;
6819 rec->owner_ref_checked = 1;
6821 if (back->node.found_extent_tree) {
6822 fprintf(stderr, "Extent back ref already exists "
6823 "for %llu parent %llu root %llu "
6824 "owner %llu offset %llu num_refs %lu\n",
6825 (unsigned long long)bytenr,
6826 (unsigned long long)parent,
6827 (unsigned long long)root,
6828 (unsigned long long)owner,
6829 (unsigned long long)offset,
6830 (unsigned long)num_refs);
6832 back->num_refs = num_refs;
6833 back->node.found_extent_tree = 1;
6836 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6837 compare_extent_backref));
6839 maybe_free_extent_rec(extent_cache, rec);
6843 static int add_pending(struct cache_tree *pending,
6844 struct cache_tree *seen, u64 bytenr, u32 size)
6847 ret = add_cache_extent(seen, bytenr, size);
6850 add_cache_extent(pending, bytenr, size);
6854 static int pick_next_pending(struct cache_tree *pending,
6855 struct cache_tree *reada,
6856 struct cache_tree *nodes,
6857 u64 last, struct block_info *bits, int bits_nr,
6860 unsigned long node_start = last;
6861 struct cache_extent *cache;
6864 cache = search_cache_extent(reada, 0);
6866 bits[0].start = cache->start;
6867 bits[0].size = cache->size;
6872 if (node_start > 32768)
6873 node_start -= 32768;
6875 cache = search_cache_extent(nodes, node_start);
6877 cache = search_cache_extent(nodes, 0);
6880 cache = search_cache_extent(pending, 0);
6885 bits[ret].start = cache->start;
6886 bits[ret].size = cache->size;
6887 cache = next_cache_extent(cache);
6889 } while (cache && ret < bits_nr);
6895 bits[ret].start = cache->start;
6896 bits[ret].size = cache->size;
6897 cache = next_cache_extent(cache);
6899 } while (cache && ret < bits_nr);
6901 if (bits_nr - ret > 8) {
6902 u64 lookup = bits[0].start + bits[0].size;
6903 struct cache_extent *next;
6904 next = search_cache_extent(pending, lookup);
6906 if (next->start - lookup > 32768)
6908 bits[ret].start = next->start;
6909 bits[ret].size = next->size;
6910 lookup = next->start + next->size;
6914 next = next_cache_extent(next);
6922 static void free_chunk_record(struct cache_extent *cache)
6924 struct chunk_record *rec;
6926 rec = container_of(cache, struct chunk_record, cache);
6927 list_del_init(&rec->list);
6928 list_del_init(&rec->dextents);
6932 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6934 cache_tree_free_extents(chunk_cache, free_chunk_record);
6937 static void free_device_record(struct rb_node *node)
6939 struct device_record *rec;
6941 rec = container_of(node, struct device_record, node);
6945 FREE_RB_BASED_TREE(device_cache, free_device_record);
6947 int insert_block_group_record(struct block_group_tree *tree,
6948 struct block_group_record *bg_rec)
6952 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6956 list_add_tail(&bg_rec->list, &tree->block_groups);
6960 static void free_block_group_record(struct cache_extent *cache)
6962 struct block_group_record *rec;
6964 rec = container_of(cache, struct block_group_record, cache);
6965 list_del_init(&rec->list);
6969 void free_block_group_tree(struct block_group_tree *tree)
6971 cache_tree_free_extents(&tree->tree, free_block_group_record);
6974 int insert_device_extent_record(struct device_extent_tree *tree,
6975 struct device_extent_record *de_rec)
6980 * Device extent is a bit different from the other extents, because
6981 * the extents which belong to the different devices may have the
6982 * same start and size, so we need use the special extent cache
6983 * search/insert functions.
6985 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6989 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6990 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6994 static void free_device_extent_record(struct cache_extent *cache)
6996 struct device_extent_record *rec;
6998 rec = container_of(cache, struct device_extent_record, cache);
6999 if (!list_empty(&rec->chunk_list))
7000 list_del_init(&rec->chunk_list);
7001 if (!list_empty(&rec->device_list))
7002 list_del_init(&rec->device_list);
7006 void free_device_extent_tree(struct device_extent_tree *tree)
7008 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7011 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7012 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7013 struct extent_buffer *leaf, int slot)
7015 struct btrfs_extent_ref_v0 *ref0;
7016 struct btrfs_key key;
7019 btrfs_item_key_to_cpu(leaf, &key, slot);
7020 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7021 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7022 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7025 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7026 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7032 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7033 struct btrfs_key *key,
7036 struct btrfs_chunk *ptr;
7037 struct chunk_record *rec;
7040 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7041 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7043 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7045 fprintf(stderr, "memory allocation failed\n");
7049 INIT_LIST_HEAD(&rec->list);
7050 INIT_LIST_HEAD(&rec->dextents);
7053 rec->cache.start = key->offset;
7054 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7056 rec->generation = btrfs_header_generation(leaf);
7058 rec->objectid = key->objectid;
7059 rec->type = key->type;
7060 rec->offset = key->offset;
7062 rec->length = rec->cache.size;
7063 rec->owner = btrfs_chunk_owner(leaf, ptr);
7064 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7065 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7066 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7067 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7068 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7069 rec->num_stripes = num_stripes;
7070 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7072 for (i = 0; i < rec->num_stripes; ++i) {
7073 rec->stripes[i].devid =
7074 btrfs_stripe_devid_nr(leaf, ptr, i);
7075 rec->stripes[i].offset =
7076 btrfs_stripe_offset_nr(leaf, ptr, i);
7077 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7078 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7085 static int process_chunk_item(struct cache_tree *chunk_cache,
7086 struct btrfs_key *key, struct extent_buffer *eb,
7089 struct chunk_record *rec;
7090 struct btrfs_chunk *chunk;
7093 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7095 * Do extra check for this chunk item,
7097 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7098 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7099 * and owner<->key_type check.
7101 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7104 error("chunk(%llu, %llu) is not valid, ignore it",
7105 key->offset, btrfs_chunk_length(eb, chunk));
7108 rec = btrfs_new_chunk_record(eb, key, slot);
7109 ret = insert_cache_extent(chunk_cache, &rec->cache);
7111 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7112 rec->offset, rec->length);
7119 static int process_device_item(struct rb_root *dev_cache,
7120 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7122 struct btrfs_dev_item *ptr;
7123 struct device_record *rec;
7126 ptr = btrfs_item_ptr(eb,
7127 slot, struct btrfs_dev_item);
7129 rec = malloc(sizeof(*rec));
7131 fprintf(stderr, "memory allocation failed\n");
7135 rec->devid = key->offset;
7136 rec->generation = btrfs_header_generation(eb);
7138 rec->objectid = key->objectid;
7139 rec->type = key->type;
7140 rec->offset = key->offset;
7142 rec->devid = btrfs_device_id(eb, ptr);
7143 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7144 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7146 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7148 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7155 struct block_group_record *
7156 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7159 struct btrfs_block_group_item *ptr;
7160 struct block_group_record *rec;
7162 rec = calloc(1, sizeof(*rec));
7164 fprintf(stderr, "memory allocation failed\n");
7168 rec->cache.start = key->objectid;
7169 rec->cache.size = key->offset;
7171 rec->generation = btrfs_header_generation(leaf);
7173 rec->objectid = key->objectid;
7174 rec->type = key->type;
7175 rec->offset = key->offset;
7177 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7178 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7180 INIT_LIST_HEAD(&rec->list);
7185 static int process_block_group_item(struct block_group_tree *block_group_cache,
7186 struct btrfs_key *key,
7187 struct extent_buffer *eb, int slot)
7189 struct block_group_record *rec;
7192 rec = btrfs_new_block_group_record(eb, key, slot);
7193 ret = insert_block_group_record(block_group_cache, rec);
7195 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7196 rec->objectid, rec->offset);
7203 struct device_extent_record *
7204 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7205 struct btrfs_key *key, int slot)
7207 struct device_extent_record *rec;
7208 struct btrfs_dev_extent *ptr;
7210 rec = calloc(1, sizeof(*rec));
7212 fprintf(stderr, "memory allocation failed\n");
7216 rec->cache.objectid = key->objectid;
7217 rec->cache.start = key->offset;
7219 rec->generation = btrfs_header_generation(leaf);
7221 rec->objectid = key->objectid;
7222 rec->type = key->type;
7223 rec->offset = key->offset;
7225 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7226 rec->chunk_objecteid =
7227 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7229 btrfs_dev_extent_chunk_offset(leaf, ptr);
7230 rec->length = btrfs_dev_extent_length(leaf, ptr);
7231 rec->cache.size = rec->length;
7233 INIT_LIST_HEAD(&rec->chunk_list);
7234 INIT_LIST_HEAD(&rec->device_list);
7240 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7241 struct btrfs_key *key, struct extent_buffer *eb,
7244 struct device_extent_record *rec;
7247 rec = btrfs_new_device_extent_record(eb, key, slot);
7248 ret = insert_device_extent_record(dev_extent_cache, rec);
7251 "Device extent[%llu, %llu, %llu] existed.\n",
7252 rec->objectid, rec->offset, rec->length);
7259 static int process_extent_item(struct btrfs_root *root,
7260 struct cache_tree *extent_cache,
7261 struct extent_buffer *eb, int slot)
7263 struct btrfs_extent_item *ei;
7264 struct btrfs_extent_inline_ref *iref;
7265 struct btrfs_extent_data_ref *dref;
7266 struct btrfs_shared_data_ref *sref;
7267 struct btrfs_key key;
7268 struct extent_record tmpl;
7273 u32 item_size = btrfs_item_size_nr(eb, slot);
7279 btrfs_item_key_to_cpu(eb, &key, slot);
7281 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7283 num_bytes = root->fs_info->nodesize;
7285 num_bytes = key.offset;
7288 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7289 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7290 key.objectid, root->fs_info->sectorsize);
7293 if (item_size < sizeof(*ei)) {
7294 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7295 struct btrfs_extent_item_v0 *ei0;
7296 BUG_ON(item_size != sizeof(*ei0));
7297 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7298 refs = btrfs_extent_refs_v0(eb, ei0);
7302 memset(&tmpl, 0, sizeof(tmpl));
7303 tmpl.start = key.objectid;
7304 tmpl.nr = num_bytes;
7305 tmpl.extent_item_refs = refs;
7306 tmpl.metadata = metadata;
7308 tmpl.max_size = num_bytes;
7310 return add_extent_rec(extent_cache, &tmpl);
7313 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7314 refs = btrfs_extent_refs(eb, ei);
7315 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7319 if (metadata && num_bytes != root->fs_info->nodesize) {
7320 error("ignore invalid metadata extent, length %llu does not equal to %u",
7321 num_bytes, root->fs_info->nodesize);
7324 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7325 error("ignore invalid data extent, length %llu is not aligned to %u",
7326 num_bytes, root->fs_info->sectorsize);
7330 memset(&tmpl, 0, sizeof(tmpl));
7331 tmpl.start = key.objectid;
7332 tmpl.nr = num_bytes;
7333 tmpl.extent_item_refs = refs;
7334 tmpl.metadata = metadata;
7336 tmpl.max_size = num_bytes;
7337 add_extent_rec(extent_cache, &tmpl);
7339 ptr = (unsigned long)(ei + 1);
7340 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7341 key.type == BTRFS_EXTENT_ITEM_KEY)
7342 ptr += sizeof(struct btrfs_tree_block_info);
7344 end = (unsigned long)ei + item_size;
7346 iref = (struct btrfs_extent_inline_ref *)ptr;
7347 type = btrfs_extent_inline_ref_type(eb, iref);
7348 offset = btrfs_extent_inline_ref_offset(eb, iref);
7350 case BTRFS_TREE_BLOCK_REF_KEY:
7351 ret = add_tree_backref(extent_cache, key.objectid,
7355 "add_tree_backref failed (extent items tree block): %s",
7358 case BTRFS_SHARED_BLOCK_REF_KEY:
7359 ret = add_tree_backref(extent_cache, key.objectid,
7363 "add_tree_backref failed (extent items shared block): %s",
7366 case BTRFS_EXTENT_DATA_REF_KEY:
7367 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7368 add_data_backref(extent_cache, key.objectid, 0,
7369 btrfs_extent_data_ref_root(eb, dref),
7370 btrfs_extent_data_ref_objectid(eb,
7372 btrfs_extent_data_ref_offset(eb, dref),
7373 btrfs_extent_data_ref_count(eb, dref),
7376 case BTRFS_SHARED_DATA_REF_KEY:
7377 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7378 add_data_backref(extent_cache, key.objectid, offset,
7380 btrfs_shared_data_ref_count(eb, sref),
7384 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7385 key.objectid, key.type, num_bytes);
7388 ptr += btrfs_extent_inline_ref_size(type);
7395 static int check_cache_range(struct btrfs_root *root,
7396 struct btrfs_block_group_cache *cache,
7397 u64 offset, u64 bytes)
7399 struct btrfs_free_space *entry;
7405 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7406 bytenr = btrfs_sb_offset(i);
7407 ret = btrfs_rmap_block(root->fs_info,
7408 cache->key.objectid, bytenr, 0,
7409 &logical, &nr, &stripe_len);
7414 if (logical[nr] + stripe_len <= offset)
7416 if (offset + bytes <= logical[nr])
7418 if (logical[nr] == offset) {
7419 if (stripe_len >= bytes) {
7423 bytes -= stripe_len;
7424 offset += stripe_len;
7425 } else if (logical[nr] < offset) {
7426 if (logical[nr] + stripe_len >=
7431 bytes = (offset + bytes) -
7432 (logical[nr] + stripe_len);
7433 offset = logical[nr] + stripe_len;
7436 * Could be tricky, the super may land in the
7437 * middle of the area we're checking. First
7438 * check the easiest case, it's at the end.
7440 if (logical[nr] + stripe_len >=
7442 bytes = logical[nr] - offset;
7446 /* Check the left side */
7447 ret = check_cache_range(root, cache,
7449 logical[nr] - offset);
7455 /* Now we continue with the right side */
7456 bytes = (offset + bytes) -
7457 (logical[nr] + stripe_len);
7458 offset = logical[nr] + stripe_len;
7465 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7467 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7468 offset, offset+bytes);
7472 if (entry->offset != offset) {
7473 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7478 if (entry->bytes != bytes) {
7479 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7480 bytes, entry->bytes, offset);
7484 unlink_free_space(cache->free_space_ctl, entry);
7489 static int verify_space_cache(struct btrfs_root *root,
7490 struct btrfs_block_group_cache *cache)
7492 struct btrfs_path path;
7493 struct extent_buffer *leaf;
7494 struct btrfs_key key;
7498 root = root->fs_info->extent_root;
7500 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7502 btrfs_init_path(&path);
7503 key.objectid = last;
7505 key.type = BTRFS_EXTENT_ITEM_KEY;
7506 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7511 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7512 ret = btrfs_next_leaf(root, &path);
7520 leaf = path.nodes[0];
7521 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7522 if (key.objectid >= cache->key.offset + cache->key.objectid)
7524 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7525 key.type != BTRFS_METADATA_ITEM_KEY) {
7530 if (last == key.objectid) {
7531 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7532 last = key.objectid + key.offset;
7534 last = key.objectid + root->fs_info->nodesize;
7539 ret = check_cache_range(root, cache, last,
7540 key.objectid - last);
7543 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7544 last = key.objectid + key.offset;
7546 last = key.objectid + root->fs_info->nodesize;
7550 if (last < cache->key.objectid + cache->key.offset)
7551 ret = check_cache_range(root, cache, last,
7552 cache->key.objectid +
7553 cache->key.offset - last);
7556 btrfs_release_path(&path);
7559 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7560 fprintf(stderr, "There are still entries left in the space "
7568 static int check_space_cache(struct btrfs_root *root)
7570 struct btrfs_block_group_cache *cache;
7571 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7575 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7576 btrfs_super_generation(root->fs_info->super_copy) !=
7577 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7578 printf("cache and super generation don't match, space cache "
7579 "will be invalidated\n");
7583 if (ctx.progress_enabled) {
7584 ctx.tp = TASK_FREE_SPACE;
7585 task_start(ctx.info);
7589 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7593 start = cache->key.objectid + cache->key.offset;
7594 if (!cache->free_space_ctl) {
7595 if (btrfs_init_free_space_ctl(cache,
7596 root->fs_info->sectorsize)) {
7601 btrfs_remove_free_space_cache(cache);
7604 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7605 ret = exclude_super_stripes(root, cache);
7607 fprintf(stderr, "could not exclude super stripes: %s\n",
7612 ret = load_free_space_tree(root->fs_info, cache);
7613 free_excluded_extents(root, cache);
7615 fprintf(stderr, "could not load free space tree: %s\n",
7622 ret = load_free_space_cache(root->fs_info, cache);
7627 ret = verify_space_cache(root, cache);
7629 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7630 cache->key.objectid);
7635 task_stop(ctx.info);
7637 return error ? -EINVAL : 0;
7640 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7641 u64 num_bytes, unsigned long leaf_offset,
7642 struct extent_buffer *eb) {
7644 struct btrfs_fs_info *fs_info = root->fs_info;
7646 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7648 unsigned long csum_offset;
7652 u64 data_checked = 0;
7658 if (num_bytes % fs_info->sectorsize)
7661 data = malloc(num_bytes);
7665 while (offset < num_bytes) {
7668 read_len = num_bytes - offset;
7669 /* read as much space once a time */
7670 ret = read_extent_data(fs_info, data + offset,
7671 bytenr + offset, &read_len, mirror);
7675 /* verify every 4k data's checksum */
7676 while (data_checked < read_len) {
7678 tmp = offset + data_checked;
7680 csum = btrfs_csum_data((char *)data + tmp,
7681 csum, fs_info->sectorsize);
7682 btrfs_csum_final(csum, (u8 *)&csum);
7684 csum_offset = leaf_offset +
7685 tmp / fs_info->sectorsize * csum_size;
7686 read_extent_buffer(eb, (char *)&csum_expected,
7687 csum_offset, csum_size);
7688 /* try another mirror */
7689 if (csum != csum_expected) {
7690 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7691 mirror, bytenr + tmp,
7692 csum, csum_expected);
7693 num_copies = btrfs_num_copies(root->fs_info,
7695 if (mirror < num_copies - 1) {
7700 data_checked += fs_info->sectorsize;
7709 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7712 struct btrfs_path path;
7713 struct extent_buffer *leaf;
7714 struct btrfs_key key;
7717 btrfs_init_path(&path);
7718 key.objectid = bytenr;
7719 key.type = BTRFS_EXTENT_ITEM_KEY;
7720 key.offset = (u64)-1;
7723 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7726 fprintf(stderr, "Error looking up extent record %d\n", ret);
7727 btrfs_release_path(&path);
7730 if (path.slots[0] > 0) {
7733 ret = btrfs_prev_leaf(root, &path);
7736 } else if (ret > 0) {
7743 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7746 * Block group items come before extent items if they have the same
7747 * bytenr, so walk back one more just in case. Dear future traveller,
7748 * first congrats on mastering time travel. Now if it's not too much
7749 * trouble could you go back to 2006 and tell Chris to make the
7750 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7751 * EXTENT_ITEM_KEY please?
7753 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7754 if (path.slots[0] > 0) {
7757 ret = btrfs_prev_leaf(root, &path);
7760 } else if (ret > 0) {
7765 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7769 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7770 ret = btrfs_next_leaf(root, &path);
7772 fprintf(stderr, "Error going to next leaf "
7774 btrfs_release_path(&path);
7780 leaf = path.nodes[0];
7781 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7782 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7786 if (key.objectid + key.offset < bytenr) {
7790 if (key.objectid > bytenr + num_bytes)
7793 if (key.objectid == bytenr) {
7794 if (key.offset >= num_bytes) {
7798 num_bytes -= key.offset;
7799 bytenr += key.offset;
7800 } else if (key.objectid < bytenr) {
7801 if (key.objectid + key.offset >= bytenr + num_bytes) {
7805 num_bytes = (bytenr + num_bytes) -
7806 (key.objectid + key.offset);
7807 bytenr = key.objectid + key.offset;
7809 if (key.objectid + key.offset < bytenr + num_bytes) {
7810 u64 new_start = key.objectid + key.offset;
7811 u64 new_bytes = bytenr + num_bytes - new_start;
7814 * Weird case, the extent is in the middle of
7815 * our range, we'll have to search one side
7816 * and then the other. Not sure if this happens
7817 * in real life, but no harm in coding it up
7818 * anyway just in case.
7820 btrfs_release_path(&path);
7821 ret = check_extent_exists(root, new_start,
7824 fprintf(stderr, "Right section didn't "
7828 num_bytes = key.objectid - bytenr;
7831 num_bytes = key.objectid - bytenr;
7838 if (num_bytes && !ret) {
7839 fprintf(stderr, "There are no extents for csum range "
7840 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7844 btrfs_release_path(&path);
7848 static int check_csums(struct btrfs_root *root)
7850 struct btrfs_path path;
7851 struct extent_buffer *leaf;
7852 struct btrfs_key key;
7853 u64 offset = 0, num_bytes = 0;
7854 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7858 unsigned long leaf_offset;
7860 root = root->fs_info->csum_root;
7861 if (!extent_buffer_uptodate(root->node)) {
7862 fprintf(stderr, "No valid csum tree found\n");
7866 btrfs_init_path(&path);
7867 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7868 key.type = BTRFS_EXTENT_CSUM_KEY;
7870 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7872 fprintf(stderr, "Error searching csum tree %d\n", ret);
7873 btrfs_release_path(&path);
7877 if (ret > 0 && path.slots[0])
7882 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7883 ret = btrfs_next_leaf(root, &path);
7885 fprintf(stderr, "Error going to next leaf "
7892 leaf = path.nodes[0];
7894 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7895 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7900 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7901 csum_size) * root->fs_info->sectorsize;
7902 if (!check_data_csum)
7903 goto skip_csum_check;
7904 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7905 ret = check_extent_csums(root, key.offset, data_len,
7911 offset = key.offset;
7912 } else if (key.offset != offset + num_bytes) {
7913 ret = check_extent_exists(root, offset, num_bytes);
7915 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7916 "there is no extent record\n",
7917 offset, offset+num_bytes);
7920 offset = key.offset;
7923 num_bytes += data_len;
7927 btrfs_release_path(&path);
7931 static int is_dropped_key(struct btrfs_key *key,
7932 struct btrfs_key *drop_key) {
7933 if (key->objectid < drop_key->objectid)
7935 else if (key->objectid == drop_key->objectid) {
7936 if (key->type < drop_key->type)
7938 else if (key->type == drop_key->type) {
7939 if (key->offset < drop_key->offset)
7947 * Here are the rules for FULL_BACKREF.
7949 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7950 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7952 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7953 * if it happened after the relocation occurred since we'll have dropped the
7954 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7955 * have no real way to know for sure.
7957 * We process the blocks one root at a time, and we start from the lowest root
7958 * objectid and go to the highest. So we can just lookup the owner backref for
7959 * the record and if we don't find it then we know it doesn't exist and we have
7962 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7963 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7964 * be set or not and then we can check later once we've gathered all the refs.
7966 static int calc_extent_flag(struct cache_tree *extent_cache,
7967 struct extent_buffer *buf,
7968 struct root_item_record *ri,
7971 struct extent_record *rec;
7972 struct cache_extent *cache;
7973 struct tree_backref *tback;
7976 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7977 /* we have added this extent before */
7981 rec = container_of(cache, struct extent_record, cache);
7984 * Except file/reloc tree, we can not have
7987 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7992 if (buf->start == ri->bytenr)
7995 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7998 owner = btrfs_header_owner(buf);
7999 if (owner == ri->objectid)
8002 tback = find_tree_backref(rec, 0, owner);
8007 if (rec->flag_block_full_backref != FLAG_UNSET &&
8008 rec->flag_block_full_backref != 0)
8009 rec->bad_full_backref = 1;
8012 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8013 if (rec->flag_block_full_backref != FLAG_UNSET &&
8014 rec->flag_block_full_backref != 1)
8015 rec->bad_full_backref = 1;
8019 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8021 fprintf(stderr, "Invalid key type(");
8022 print_key_type(stderr, 0, key_type);
8023 fprintf(stderr, ") found in root(");
8024 print_objectid(stderr, rootid, 0);
8025 fprintf(stderr, ")\n");
8029 * Check if the key is valid with its extent buffer.
8031 * This is a early check in case invalid key exists in a extent buffer
8032 * This is not comprehensive yet, but should prevent wrong key/item passed
8035 static int check_type_with_root(u64 rootid, u8 key_type)
8038 /* Only valid in chunk tree */
8039 case BTRFS_DEV_ITEM_KEY:
8040 case BTRFS_CHUNK_ITEM_KEY:
8041 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8044 /* valid in csum and log tree */
8045 case BTRFS_CSUM_TREE_OBJECTID:
8046 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8050 case BTRFS_EXTENT_ITEM_KEY:
8051 case BTRFS_METADATA_ITEM_KEY:
8052 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8053 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8056 case BTRFS_ROOT_ITEM_KEY:
8057 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8060 case BTRFS_DEV_EXTENT_KEY:
8061 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8067 report_mismatch_key_root(key_type, rootid);
8071 static int run_next_block(struct btrfs_root *root,
8072 struct block_info *bits,
8075 struct cache_tree *pending,
8076 struct cache_tree *seen,
8077 struct cache_tree *reada,
8078 struct cache_tree *nodes,
8079 struct cache_tree *extent_cache,
8080 struct cache_tree *chunk_cache,
8081 struct rb_root *dev_cache,
8082 struct block_group_tree *block_group_cache,
8083 struct device_extent_tree *dev_extent_cache,
8084 struct root_item_record *ri)
8086 struct btrfs_fs_info *fs_info = root->fs_info;
8087 struct extent_buffer *buf;
8088 struct extent_record *rec = NULL;
8099 struct btrfs_key key;
8100 struct cache_extent *cache;
8103 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8104 bits_nr, &reada_bits);
8109 for(i = 0; i < nritems; i++) {
8110 ret = add_cache_extent(reada, bits[i].start,
8115 /* fixme, get the parent transid */
8116 readahead_tree_block(fs_info, bits[i].start, 0);
8119 *last = bits[0].start;
8120 bytenr = bits[0].start;
8121 size = bits[0].size;
8123 cache = lookup_cache_extent(pending, bytenr, size);
8125 remove_cache_extent(pending, cache);
8128 cache = lookup_cache_extent(reada, bytenr, size);
8130 remove_cache_extent(reada, cache);
8133 cache = lookup_cache_extent(nodes, bytenr, size);
8135 remove_cache_extent(nodes, cache);
8138 cache = lookup_cache_extent(extent_cache, bytenr, size);
8140 rec = container_of(cache, struct extent_record, cache);
8141 gen = rec->parent_generation;
8144 /* fixme, get the real parent transid */
8145 buf = read_tree_block(root->fs_info, bytenr, gen);
8146 if (!extent_buffer_uptodate(buf)) {
8147 record_bad_block_io(root->fs_info,
8148 extent_cache, bytenr, size);
8152 nritems = btrfs_header_nritems(buf);
8155 if (!init_extent_tree) {
8156 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8157 btrfs_header_level(buf), 1, NULL,
8160 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8162 fprintf(stderr, "Couldn't calc extent flags\n");
8163 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8168 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8170 fprintf(stderr, "Couldn't calc extent flags\n");
8171 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8175 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8177 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8178 ri->objectid == btrfs_header_owner(buf)) {
8180 * Ok we got to this block from it's original owner and
8181 * we have FULL_BACKREF set. Relocation can leave
8182 * converted blocks over so this is altogether possible,
8183 * however it's not possible if the generation > the
8184 * last snapshot, so check for this case.
8186 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8187 btrfs_header_generation(buf) > ri->last_snapshot) {
8188 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8189 rec->bad_full_backref = 1;
8194 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8195 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8196 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8197 rec->bad_full_backref = 1;
8201 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8202 rec->flag_block_full_backref = 1;
8206 rec->flag_block_full_backref = 0;
8208 owner = btrfs_header_owner(buf);
8211 ret = check_block(root, extent_cache, buf, flags);
8215 if (btrfs_is_leaf(buf)) {
8216 btree_space_waste += btrfs_leaf_free_space(root, buf);
8217 for (i = 0; i < nritems; i++) {
8218 struct btrfs_file_extent_item *fi;
8219 btrfs_item_key_to_cpu(buf, &key, i);
8221 * Check key type against the leaf owner.
8222 * Could filter quite a lot of early error if
8225 if (check_type_with_root(btrfs_header_owner(buf),
8227 fprintf(stderr, "ignoring invalid key\n");
8230 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8231 process_extent_item(root, extent_cache, buf,
8235 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8236 process_extent_item(root, extent_cache, buf,
8240 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8242 btrfs_item_size_nr(buf, i);
8245 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8246 process_chunk_item(chunk_cache, &key, buf, i);
8249 if (key.type == BTRFS_DEV_ITEM_KEY) {
8250 process_device_item(dev_cache, &key, buf, i);
8253 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8254 process_block_group_item(block_group_cache,
8258 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8259 process_device_extent_item(dev_extent_cache,
8264 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8265 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8266 process_extent_ref_v0(extent_cache, buf, i);
8273 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8274 ret = add_tree_backref(extent_cache,
8275 key.objectid, 0, key.offset, 0);
8278 "add_tree_backref failed (leaf tree block): %s",
8282 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8283 ret = add_tree_backref(extent_cache,
8284 key.objectid, key.offset, 0, 0);
8287 "add_tree_backref failed (leaf shared block): %s",
8291 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8292 struct btrfs_extent_data_ref *ref;
8293 ref = btrfs_item_ptr(buf, i,
8294 struct btrfs_extent_data_ref);
8295 add_data_backref(extent_cache,
8297 btrfs_extent_data_ref_root(buf, ref),
8298 btrfs_extent_data_ref_objectid(buf,
8300 btrfs_extent_data_ref_offset(buf, ref),
8301 btrfs_extent_data_ref_count(buf, ref),
8302 0, root->fs_info->sectorsize);
8305 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8306 struct btrfs_shared_data_ref *ref;
8307 ref = btrfs_item_ptr(buf, i,
8308 struct btrfs_shared_data_ref);
8309 add_data_backref(extent_cache,
8310 key.objectid, key.offset, 0, 0, 0,
8311 btrfs_shared_data_ref_count(buf, ref),
8312 0, root->fs_info->sectorsize);
8315 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8316 struct bad_item *bad;
8318 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8322 bad = malloc(sizeof(struct bad_item));
8325 INIT_LIST_HEAD(&bad->list);
8326 memcpy(&bad->key, &key,
8327 sizeof(struct btrfs_key));
8328 bad->root_id = owner;
8329 list_add_tail(&bad->list, &delete_items);
8332 if (key.type != BTRFS_EXTENT_DATA_KEY)
8334 fi = btrfs_item_ptr(buf, i,
8335 struct btrfs_file_extent_item);
8336 if (btrfs_file_extent_type(buf, fi) ==
8337 BTRFS_FILE_EXTENT_INLINE)
8339 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8342 data_bytes_allocated +=
8343 btrfs_file_extent_disk_num_bytes(buf, fi);
8344 if (data_bytes_allocated < root->fs_info->sectorsize) {
8347 data_bytes_referenced +=
8348 btrfs_file_extent_num_bytes(buf, fi);
8349 add_data_backref(extent_cache,
8350 btrfs_file_extent_disk_bytenr(buf, fi),
8351 parent, owner, key.objectid, key.offset -
8352 btrfs_file_extent_offset(buf, fi), 1, 1,
8353 btrfs_file_extent_disk_num_bytes(buf, fi));
8357 struct btrfs_key first_key;
8359 first_key.objectid = 0;
8362 btrfs_item_key_to_cpu(buf, &first_key, 0);
8363 level = btrfs_header_level(buf);
8364 for (i = 0; i < nritems; i++) {
8365 struct extent_record tmpl;
8367 ptr = btrfs_node_blockptr(buf, i);
8368 size = root->fs_info->nodesize;
8369 btrfs_node_key_to_cpu(buf, &key, i);
8371 if ((level == ri->drop_level)
8372 && is_dropped_key(&key, &ri->drop_key)) {
8377 memset(&tmpl, 0, sizeof(tmpl));
8378 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8379 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8384 tmpl.max_size = size;
8385 ret = add_extent_rec(extent_cache, &tmpl);
8389 ret = add_tree_backref(extent_cache, ptr, parent,
8393 "add_tree_backref failed (non-leaf block): %s",
8399 add_pending(nodes, seen, ptr, size);
8401 add_pending(pending, seen, ptr, size);
8404 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8405 nritems) * sizeof(struct btrfs_key_ptr);
8407 total_btree_bytes += buf->len;
8408 if (fs_root_objectid(btrfs_header_owner(buf)))
8409 total_fs_tree_bytes += buf->len;
8410 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8411 total_extent_tree_bytes += buf->len;
8413 free_extent_buffer(buf);
8417 static int add_root_to_pending(struct extent_buffer *buf,
8418 struct cache_tree *extent_cache,
8419 struct cache_tree *pending,
8420 struct cache_tree *seen,
8421 struct cache_tree *nodes,
8424 struct extent_record tmpl;
8427 if (btrfs_header_level(buf) > 0)
8428 add_pending(nodes, seen, buf->start, buf->len);
8430 add_pending(pending, seen, buf->start, buf->len);
8432 memset(&tmpl, 0, sizeof(tmpl));
8433 tmpl.start = buf->start;
8438 tmpl.max_size = buf->len;
8439 add_extent_rec(extent_cache, &tmpl);
8441 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8442 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8443 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8446 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8451 /* as we fix the tree, we might be deleting blocks that
8452 * we're tracking for repair. This hook makes sure we
8453 * remove any backrefs for blocks as we are fixing them.
8455 static int free_extent_hook(struct btrfs_trans_handle *trans,
8456 struct btrfs_root *root,
8457 u64 bytenr, u64 num_bytes, u64 parent,
8458 u64 root_objectid, u64 owner, u64 offset,
8461 struct extent_record *rec;
8462 struct cache_extent *cache;
8464 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8466 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8467 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8471 rec = container_of(cache, struct extent_record, cache);
8473 struct data_backref *back;
8474 back = find_data_backref(rec, parent, root_objectid, owner,
8475 offset, 1, bytenr, num_bytes);
8478 if (back->node.found_ref) {
8479 back->found_ref -= refs_to_drop;
8481 rec->refs -= refs_to_drop;
8483 if (back->node.found_extent_tree) {
8484 back->num_refs -= refs_to_drop;
8485 if (rec->extent_item_refs)
8486 rec->extent_item_refs -= refs_to_drop;
8488 if (back->found_ref == 0)
8489 back->node.found_ref = 0;
8490 if (back->num_refs == 0)
8491 back->node.found_extent_tree = 0;
8493 if (!back->node.found_extent_tree && back->node.found_ref) {
8494 rb_erase(&back->node.node, &rec->backref_tree);
8498 struct tree_backref *back;
8499 back = find_tree_backref(rec, parent, root_objectid);
8502 if (back->node.found_ref) {
8505 back->node.found_ref = 0;
8507 if (back->node.found_extent_tree) {
8508 if (rec->extent_item_refs)
8509 rec->extent_item_refs--;
8510 back->node.found_extent_tree = 0;
8512 if (!back->node.found_extent_tree && back->node.found_ref) {
8513 rb_erase(&back->node.node, &rec->backref_tree);
8517 maybe_free_extent_rec(extent_cache, rec);
8522 static int delete_extent_records(struct btrfs_trans_handle *trans,
8523 struct btrfs_root *root,
8524 struct btrfs_path *path,
8527 struct btrfs_key key;
8528 struct btrfs_key found_key;
8529 struct extent_buffer *leaf;
8534 key.objectid = bytenr;
8536 key.offset = (u64)-1;
8539 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8546 if (path->slots[0] == 0)
8552 leaf = path->nodes[0];
8553 slot = path->slots[0];
8555 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8556 if (found_key.objectid != bytenr)
8559 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8560 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8561 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8562 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8563 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8564 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8565 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8566 btrfs_release_path(path);
8567 if (found_key.type == 0) {
8568 if (found_key.offset == 0)
8570 key.offset = found_key.offset - 1;
8571 key.type = found_key.type;
8573 key.type = found_key.type - 1;
8574 key.offset = (u64)-1;
8578 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8579 found_key.objectid, found_key.type, found_key.offset);
8581 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8584 btrfs_release_path(path);
8586 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8587 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8588 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8589 found_key.offset : root->fs_info->nodesize;
8591 ret = btrfs_update_block_group(trans, root, bytenr,
8598 btrfs_release_path(path);
8603 * for a single backref, this will allocate a new extent
8604 * and add the backref to it.
8606 static int record_extent(struct btrfs_trans_handle *trans,
8607 struct btrfs_fs_info *info,
8608 struct btrfs_path *path,
8609 struct extent_record *rec,
8610 struct extent_backref *back,
8611 int allocated, u64 flags)
8614 struct btrfs_root *extent_root = info->extent_root;
8615 struct extent_buffer *leaf;
8616 struct btrfs_key ins_key;
8617 struct btrfs_extent_item *ei;
8618 struct data_backref *dback;
8619 struct btrfs_tree_block_info *bi;
8622 rec->max_size = max_t(u64, rec->max_size,
8626 u32 item_size = sizeof(*ei);
8629 item_size += sizeof(*bi);
8631 ins_key.objectid = rec->start;
8632 ins_key.offset = rec->max_size;
8633 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8635 ret = btrfs_insert_empty_item(trans, extent_root, path,
8636 &ins_key, item_size);
8640 leaf = path->nodes[0];
8641 ei = btrfs_item_ptr(leaf, path->slots[0],
8642 struct btrfs_extent_item);
8644 btrfs_set_extent_refs(leaf, ei, 0);
8645 btrfs_set_extent_generation(leaf, ei, rec->generation);
8647 if (back->is_data) {
8648 btrfs_set_extent_flags(leaf, ei,
8649 BTRFS_EXTENT_FLAG_DATA);
8651 struct btrfs_disk_key copy_key;;
8653 bi = (struct btrfs_tree_block_info *)(ei + 1);
8654 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8657 btrfs_set_disk_key_objectid(©_key,
8658 rec->info_objectid);
8659 btrfs_set_disk_key_type(©_key, 0);
8660 btrfs_set_disk_key_offset(©_key, 0);
8662 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8663 btrfs_set_tree_block_key(leaf, bi, ©_key);
8665 btrfs_set_extent_flags(leaf, ei,
8666 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8669 btrfs_mark_buffer_dirty(leaf);
8670 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8671 rec->max_size, 1, 0);
8674 btrfs_release_path(path);
8677 if (back->is_data) {
8681 dback = to_data_backref(back);
8682 if (back->full_backref)
8683 parent = dback->parent;
8687 for (i = 0; i < dback->found_ref; i++) {
8688 /* if parent != 0, we're doing a full backref
8689 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8690 * just makes the backref allocator create a data
8693 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8694 rec->start, rec->max_size,
8698 BTRFS_FIRST_FREE_OBJECTID :
8704 fprintf(stderr, "adding new data backref"
8705 " on %llu %s %llu owner %llu"
8706 " offset %llu found %d\n",
8707 (unsigned long long)rec->start,
8708 back->full_backref ?
8710 back->full_backref ?
8711 (unsigned long long)parent :
8712 (unsigned long long)dback->root,
8713 (unsigned long long)dback->owner,
8714 (unsigned long long)dback->offset,
8718 struct tree_backref *tback;
8720 tback = to_tree_backref(back);
8721 if (back->full_backref)
8722 parent = tback->parent;
8726 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8727 rec->start, rec->max_size,
8728 parent, tback->root, 0, 0);
8729 fprintf(stderr, "adding new tree backref on "
8730 "start %llu len %llu parent %llu root %llu\n",
8731 rec->start, rec->max_size, parent, tback->root);
8734 btrfs_release_path(path);
8738 static struct extent_entry *find_entry(struct list_head *entries,
8739 u64 bytenr, u64 bytes)
8741 struct extent_entry *entry = NULL;
8743 list_for_each_entry(entry, entries, list) {
8744 if (entry->bytenr == bytenr && entry->bytes == bytes)
8751 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8753 struct extent_entry *entry, *best = NULL, *prev = NULL;
8755 list_for_each_entry(entry, entries, list) {
8757 * If there are as many broken entries as entries then we know
8758 * not to trust this particular entry.
8760 if (entry->broken == entry->count)
8764 * Special case, when there are only two entries and 'best' is
8774 * If our current entry == best then we can't be sure our best
8775 * is really the best, so we need to keep searching.
8777 if (best && best->count == entry->count) {
8783 /* Prev == entry, not good enough, have to keep searching */
8784 if (!prev->broken && prev->count == entry->count)
8788 best = (prev->count > entry->count) ? prev : entry;
8789 else if (best->count < entry->count)
8797 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8798 struct data_backref *dback, struct extent_entry *entry)
8800 struct btrfs_trans_handle *trans;
8801 struct btrfs_root *root;
8802 struct btrfs_file_extent_item *fi;
8803 struct extent_buffer *leaf;
8804 struct btrfs_key key;
8808 key.objectid = dback->root;
8809 key.type = BTRFS_ROOT_ITEM_KEY;
8810 key.offset = (u64)-1;
8811 root = btrfs_read_fs_root(info, &key);
8813 fprintf(stderr, "Couldn't find root for our ref\n");
8818 * The backref points to the original offset of the extent if it was
8819 * split, so we need to search down to the offset we have and then walk
8820 * forward until we find the backref we're looking for.
8822 key.objectid = dback->owner;
8823 key.type = BTRFS_EXTENT_DATA_KEY;
8824 key.offset = dback->offset;
8825 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8827 fprintf(stderr, "Error looking up ref %d\n", ret);
8832 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8833 ret = btrfs_next_leaf(root, path);
8835 fprintf(stderr, "Couldn't find our ref, next\n");
8839 leaf = path->nodes[0];
8840 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8841 if (key.objectid != dback->owner ||
8842 key.type != BTRFS_EXTENT_DATA_KEY) {
8843 fprintf(stderr, "Couldn't find our ref, search\n");
8846 fi = btrfs_item_ptr(leaf, path->slots[0],
8847 struct btrfs_file_extent_item);
8848 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8849 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8851 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8856 btrfs_release_path(path);
8858 trans = btrfs_start_transaction(root, 1);
8860 return PTR_ERR(trans);
8863 * Ok we have the key of the file extent we want to fix, now we can cow
8864 * down to the thing and fix it.
8866 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8868 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8869 key.objectid, key.type, key.offset, ret);
8873 fprintf(stderr, "Well that's odd, we just found this key "
8874 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8879 leaf = path->nodes[0];
8880 fi = btrfs_item_ptr(leaf, path->slots[0],
8881 struct btrfs_file_extent_item);
8883 if (btrfs_file_extent_compression(leaf, fi) &&
8884 dback->disk_bytenr != entry->bytenr) {
8885 fprintf(stderr, "Ref doesn't match the record start and is "
8886 "compressed, please take a btrfs-image of this file "
8887 "system and send it to a btrfs developer so they can "
8888 "complete this functionality for bytenr %Lu\n",
8889 dback->disk_bytenr);
8894 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8895 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8896 } else if (dback->disk_bytenr > entry->bytenr) {
8897 u64 off_diff, offset;
8899 off_diff = dback->disk_bytenr - entry->bytenr;
8900 offset = btrfs_file_extent_offset(leaf, fi);
8901 if (dback->disk_bytenr + offset +
8902 btrfs_file_extent_num_bytes(leaf, fi) >
8903 entry->bytenr + entry->bytes) {
8904 fprintf(stderr, "Ref is past the entry end, please "
8905 "take a btrfs-image of this file system and "
8906 "send it to a btrfs developer, ref %Lu\n",
8907 dback->disk_bytenr);
8912 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8913 btrfs_set_file_extent_offset(leaf, fi, offset);
8914 } else if (dback->disk_bytenr < entry->bytenr) {
8917 offset = btrfs_file_extent_offset(leaf, fi);
8918 if (dback->disk_bytenr + offset < entry->bytenr) {
8919 fprintf(stderr, "Ref is before the entry start, please"
8920 " take a btrfs-image of this file system and "
8921 "send it to a btrfs developer, ref %Lu\n",
8922 dback->disk_bytenr);
8927 offset += dback->disk_bytenr;
8928 offset -= entry->bytenr;
8929 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8930 btrfs_set_file_extent_offset(leaf, fi, offset);
8933 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8936 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8937 * only do this if we aren't using compression, otherwise it's a
8940 if (!btrfs_file_extent_compression(leaf, fi))
8941 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8943 printf("ram bytes may be wrong?\n");
8944 btrfs_mark_buffer_dirty(leaf);
8946 err = btrfs_commit_transaction(trans, root);
8947 btrfs_release_path(path);
8948 return ret ? ret : err;
8951 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8952 struct extent_record *rec)
8954 struct extent_backref *back, *tmp;
8955 struct data_backref *dback;
8956 struct extent_entry *entry, *best = NULL;
8959 int broken_entries = 0;
8964 * Metadata is easy and the backrefs should always agree on bytenr and
8965 * size, if not we've got bigger issues.
8970 rbtree_postorder_for_each_entry_safe(back, tmp,
8971 &rec->backref_tree, node) {
8972 if (back->full_backref || !back->is_data)
8975 dback = to_data_backref(back);
8978 * We only pay attention to backrefs that we found a real
8981 if (dback->found_ref == 0)
8985 * For now we only catch when the bytes don't match, not the
8986 * bytenr. We can easily do this at the same time, but I want
8987 * to have a fs image to test on before we just add repair
8988 * functionality willy-nilly so we know we won't screw up the
8992 entry = find_entry(&entries, dback->disk_bytenr,
8995 entry = malloc(sizeof(struct extent_entry));
9000 memset(entry, 0, sizeof(*entry));
9001 entry->bytenr = dback->disk_bytenr;
9002 entry->bytes = dback->bytes;
9003 list_add_tail(&entry->list, &entries);
9008 * If we only have on entry we may think the entries agree when
9009 * in reality they don't so we have to do some extra checking.
9011 if (dback->disk_bytenr != rec->start ||
9012 dback->bytes != rec->nr || back->broken)
9023 /* Yay all the backrefs agree, carry on good sir */
9024 if (nr_entries <= 1 && !mismatch)
9027 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9028 "%Lu\n", rec->start);
9031 * First we want to see if the backrefs can agree amongst themselves who
9032 * is right, so figure out which one of the entries has the highest
9035 best = find_most_right_entry(&entries);
9038 * Ok so we may have an even split between what the backrefs think, so
9039 * this is where we use the extent ref to see what it thinks.
9042 entry = find_entry(&entries, rec->start, rec->nr);
9043 if (!entry && (!broken_entries || !rec->found_rec)) {
9044 fprintf(stderr, "Backrefs don't agree with each other "
9045 "and extent record doesn't agree with anybody,"
9046 " so we can't fix bytenr %Lu bytes %Lu\n",
9047 rec->start, rec->nr);
9050 } else if (!entry) {
9052 * Ok our backrefs were broken, we'll assume this is the
9053 * correct value and add an entry for this range.
9055 entry = malloc(sizeof(struct extent_entry));
9060 memset(entry, 0, sizeof(*entry));
9061 entry->bytenr = rec->start;
9062 entry->bytes = rec->nr;
9063 list_add_tail(&entry->list, &entries);
9067 best = find_most_right_entry(&entries);
9069 fprintf(stderr, "Backrefs and extent record evenly "
9070 "split on who is right, this is going to "
9071 "require user input to fix bytenr %Lu bytes "
9072 "%Lu\n", rec->start, rec->nr);
9079 * I don't think this can happen currently as we'll abort() if we catch
9080 * this case higher up, but in case somebody removes that we still can't
9081 * deal with it properly here yet, so just bail out of that's the case.
9083 if (best->bytenr != rec->start) {
9084 fprintf(stderr, "Extent start and backref starts don't match, "
9085 "please use btrfs-image on this file system and send "
9086 "it to a btrfs developer so they can make fsck fix "
9087 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9088 rec->start, rec->nr);
9094 * Ok great we all agreed on an extent record, let's go find the real
9095 * references and fix up the ones that don't match.
9097 rbtree_postorder_for_each_entry_safe(back, tmp,
9098 &rec->backref_tree, node) {
9099 if (back->full_backref || !back->is_data)
9102 dback = to_data_backref(back);
9105 * Still ignoring backrefs that don't have a real ref attached
9108 if (dback->found_ref == 0)
9111 if (dback->bytes == best->bytes &&
9112 dback->disk_bytenr == best->bytenr)
9115 ret = repair_ref(info, path, dback, best);
9121 * Ok we messed with the actual refs, which means we need to drop our
9122 * entire cache and go back and rescan. I know this is a huge pain and
9123 * adds a lot of extra work, but it's the only way to be safe. Once all
9124 * the backrefs agree we may not need to do anything to the extent
9129 while (!list_empty(&entries)) {
9130 entry = list_entry(entries.next, struct extent_entry, list);
9131 list_del_init(&entry->list);
9137 static int process_duplicates(struct cache_tree *extent_cache,
9138 struct extent_record *rec)
9140 struct extent_record *good, *tmp;
9141 struct cache_extent *cache;
9145 * If we found a extent record for this extent then return, or if we
9146 * have more than one duplicate we are likely going to need to delete
9149 if (rec->found_rec || rec->num_duplicates > 1)
9152 /* Shouldn't happen but just in case */
9153 BUG_ON(!rec->num_duplicates);
9156 * So this happens if we end up with a backref that doesn't match the
9157 * actual extent entry. So either the backref is bad or the extent
9158 * entry is bad. Either way we want to have the extent_record actually
9159 * reflect what we found in the extent_tree, so we need to take the
9160 * duplicate out and use that as the extent_record since the only way we
9161 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9163 remove_cache_extent(extent_cache, &rec->cache);
9165 good = to_extent_record(rec->dups.next);
9166 list_del_init(&good->list);
9167 INIT_LIST_HEAD(&good->backrefs);
9168 INIT_LIST_HEAD(&good->dups);
9169 good->cache.start = good->start;
9170 good->cache.size = good->nr;
9171 good->content_checked = 0;
9172 good->owner_ref_checked = 0;
9173 good->num_duplicates = 0;
9174 good->refs = rec->refs;
9175 list_splice_init(&rec->backrefs, &good->backrefs);
9177 cache = lookup_cache_extent(extent_cache, good->start,
9181 tmp = container_of(cache, struct extent_record, cache);
9184 * If we find another overlapping extent and it's found_rec is
9185 * set then it's a duplicate and we need to try and delete
9188 if (tmp->found_rec || tmp->num_duplicates > 0) {
9189 if (list_empty(&good->list))
9190 list_add_tail(&good->list,
9191 &duplicate_extents);
9192 good->num_duplicates += tmp->num_duplicates + 1;
9193 list_splice_init(&tmp->dups, &good->dups);
9194 list_del_init(&tmp->list);
9195 list_add_tail(&tmp->list, &good->dups);
9196 remove_cache_extent(extent_cache, &tmp->cache);
9201 * Ok we have another non extent item backed extent rec, so lets
9202 * just add it to this extent and carry on like we did above.
9204 good->refs += tmp->refs;
9205 list_splice_init(&tmp->backrefs, &good->backrefs);
9206 remove_cache_extent(extent_cache, &tmp->cache);
9209 ret = insert_cache_extent(extent_cache, &good->cache);
9212 return good->num_duplicates ? 0 : 1;
9215 static int delete_duplicate_records(struct btrfs_root *root,
9216 struct extent_record *rec)
9218 struct btrfs_trans_handle *trans;
9219 LIST_HEAD(delete_list);
9220 struct btrfs_path path;
9221 struct extent_record *tmp, *good, *n;
9224 struct btrfs_key key;
9226 btrfs_init_path(&path);
9229 /* Find the record that covers all of the duplicates. */
9230 list_for_each_entry(tmp, &rec->dups, list) {
9231 if (good->start < tmp->start)
9233 if (good->nr > tmp->nr)
9236 if (tmp->start + tmp->nr < good->start + good->nr) {
9237 fprintf(stderr, "Ok we have overlapping extents that "
9238 "aren't completely covered by each other, this "
9239 "is going to require more careful thought. "
9240 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9241 tmp->start, tmp->nr, good->start, good->nr);
9248 list_add_tail(&rec->list, &delete_list);
9250 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9253 list_move_tail(&tmp->list, &delete_list);
9256 root = root->fs_info->extent_root;
9257 trans = btrfs_start_transaction(root, 1);
9258 if (IS_ERR(trans)) {
9259 ret = PTR_ERR(trans);
9263 list_for_each_entry(tmp, &delete_list, list) {
9264 if (tmp->found_rec == 0)
9266 key.objectid = tmp->start;
9267 key.type = BTRFS_EXTENT_ITEM_KEY;
9268 key.offset = tmp->nr;
9270 /* Shouldn't happen but just in case */
9271 if (tmp->metadata) {
9272 fprintf(stderr, "Well this shouldn't happen, extent "
9273 "record overlaps but is metadata? "
9274 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9278 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9284 ret = btrfs_del_item(trans, root, &path);
9287 btrfs_release_path(&path);
9290 err = btrfs_commit_transaction(trans, root);
9294 while (!list_empty(&delete_list)) {
9295 tmp = to_extent_record(delete_list.next);
9296 list_del_init(&tmp->list);
9302 while (!list_empty(&rec->dups)) {
9303 tmp = to_extent_record(rec->dups.next);
9304 list_del_init(&tmp->list);
9308 btrfs_release_path(&path);
9310 if (!ret && !nr_del)
9311 rec->num_duplicates = 0;
9313 return ret ? ret : nr_del;
9316 static int find_possible_backrefs(struct btrfs_fs_info *info,
9317 struct btrfs_path *path,
9318 struct cache_tree *extent_cache,
9319 struct extent_record *rec)
9321 struct btrfs_root *root;
9322 struct extent_backref *back, *tmp;
9323 struct data_backref *dback;
9324 struct cache_extent *cache;
9325 struct btrfs_file_extent_item *fi;
9326 struct btrfs_key key;
9330 rbtree_postorder_for_each_entry_safe(back, tmp,
9331 &rec->backref_tree, node) {
9332 /* Don't care about full backrefs (poor unloved backrefs) */
9333 if (back->full_backref || !back->is_data)
9336 dback = to_data_backref(back);
9338 /* We found this one, we don't need to do a lookup */
9339 if (dback->found_ref)
9342 key.objectid = dback->root;
9343 key.type = BTRFS_ROOT_ITEM_KEY;
9344 key.offset = (u64)-1;
9346 root = btrfs_read_fs_root(info, &key);
9348 /* No root, definitely a bad ref, skip */
9349 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9351 /* Other err, exit */
9353 return PTR_ERR(root);
9355 key.objectid = dback->owner;
9356 key.type = BTRFS_EXTENT_DATA_KEY;
9357 key.offset = dback->offset;
9358 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9360 btrfs_release_path(path);
9363 /* Didn't find it, we can carry on */
9368 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9369 struct btrfs_file_extent_item);
9370 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9371 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9372 btrfs_release_path(path);
9373 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9375 struct extent_record *tmp;
9376 tmp = container_of(cache, struct extent_record, cache);
9379 * If we found an extent record for the bytenr for this
9380 * particular backref then we can't add it to our
9381 * current extent record. We only want to add backrefs
9382 * that don't have a corresponding extent item in the
9383 * extent tree since they likely belong to this record
9384 * and we need to fix it if it doesn't match bytenrs.
9390 dback->found_ref += 1;
9391 dback->disk_bytenr = bytenr;
9392 dback->bytes = bytes;
9395 * Set this so the verify backref code knows not to trust the
9396 * values in this backref.
9405 * Record orphan data ref into corresponding root.
9407 * Return 0 if the extent item contains data ref and recorded.
9408 * Return 1 if the extent item contains no useful data ref
9409 * On that case, it may contains only shared_dataref or metadata backref
9410 * or the file extent exists(this should be handled by the extent bytenr
9412 * Return <0 if something goes wrong.
9414 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9415 struct extent_record *rec)
9417 struct btrfs_key key;
9418 struct btrfs_root *dest_root;
9419 struct extent_backref *back, *tmp;
9420 struct data_backref *dback;
9421 struct orphan_data_extent *orphan;
9422 struct btrfs_path path;
9423 int recorded_data_ref = 0;
9428 btrfs_init_path(&path);
9429 rbtree_postorder_for_each_entry_safe(back, tmp,
9430 &rec->backref_tree, node) {
9431 if (back->full_backref || !back->is_data ||
9432 !back->found_extent_tree)
9434 dback = to_data_backref(back);
9435 if (dback->found_ref)
9437 key.objectid = dback->root;
9438 key.type = BTRFS_ROOT_ITEM_KEY;
9439 key.offset = (u64)-1;
9441 dest_root = btrfs_read_fs_root(fs_info, &key);
9443 /* For non-exist root we just skip it */
9444 if (IS_ERR(dest_root) || !dest_root)
9447 key.objectid = dback->owner;
9448 key.type = BTRFS_EXTENT_DATA_KEY;
9449 key.offset = dback->offset;
9451 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9452 btrfs_release_path(&path);
9454 * For ret < 0, it's OK since the fs-tree may be corrupted,
9455 * we need to record it for inode/file extent rebuild.
9456 * For ret > 0, we record it only for file extent rebuild.
9457 * For ret == 0, the file extent exists but only bytenr
9458 * mismatch, let the original bytenr fix routine to handle,
9464 orphan = malloc(sizeof(*orphan));
9469 INIT_LIST_HEAD(&orphan->list);
9470 orphan->root = dback->root;
9471 orphan->objectid = dback->owner;
9472 orphan->offset = dback->offset;
9473 orphan->disk_bytenr = rec->cache.start;
9474 orphan->disk_len = rec->cache.size;
9475 list_add(&dest_root->orphan_data_extents, &orphan->list);
9476 recorded_data_ref = 1;
9479 btrfs_release_path(&path);
9481 return !recorded_data_ref;
9487 * when an incorrect extent item is found, this will delete
9488 * all of the existing entries for it and recreate them
9489 * based on what the tree scan found.
9491 static int fixup_extent_refs(struct btrfs_fs_info *info,
9492 struct cache_tree *extent_cache,
9493 struct extent_record *rec)
9495 struct btrfs_trans_handle *trans = NULL;
9497 struct btrfs_path path;
9498 struct cache_extent *cache;
9499 struct extent_backref *back, *tmp;
9503 if (rec->flag_block_full_backref)
9504 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9506 btrfs_init_path(&path);
9507 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9509 * Sometimes the backrefs themselves are so broken they don't
9510 * get attached to any meaningful rec, so first go back and
9511 * check any of our backrefs that we couldn't find and throw
9512 * them into the list if we find the backref so that
9513 * verify_backrefs can figure out what to do.
9515 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9520 /* step one, make sure all of the backrefs agree */
9521 ret = verify_backrefs(info, &path, rec);
9525 trans = btrfs_start_transaction(info->extent_root, 1);
9526 if (IS_ERR(trans)) {
9527 ret = PTR_ERR(trans);
9531 /* step two, delete all the existing records */
9532 ret = delete_extent_records(trans, info->extent_root, &path,
9538 /* was this block corrupt? If so, don't add references to it */
9539 cache = lookup_cache_extent(info->corrupt_blocks,
9540 rec->start, rec->max_size);
9546 /* step three, recreate all the refs we did find */
9547 rbtree_postorder_for_each_entry_safe(back, tmp,
9548 &rec->backref_tree, node) {
9550 * if we didn't find any references, don't create a
9553 if (!back->found_ref)
9556 rec->bad_full_backref = 0;
9557 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9565 int err = btrfs_commit_transaction(trans, info->extent_root);
9571 fprintf(stderr, "Repaired extent references for %llu\n",
9572 (unsigned long long)rec->start);
9574 btrfs_release_path(&path);
9578 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9579 struct extent_record *rec)
9581 struct btrfs_trans_handle *trans;
9582 struct btrfs_root *root = fs_info->extent_root;
9583 struct btrfs_path path;
9584 struct btrfs_extent_item *ei;
9585 struct btrfs_key key;
9589 key.objectid = rec->start;
9590 if (rec->metadata) {
9591 key.type = BTRFS_METADATA_ITEM_KEY;
9592 key.offset = rec->info_level;
9594 key.type = BTRFS_EXTENT_ITEM_KEY;
9595 key.offset = rec->max_size;
9598 trans = btrfs_start_transaction(root, 0);
9600 return PTR_ERR(trans);
9602 btrfs_init_path(&path);
9603 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9605 btrfs_release_path(&path);
9606 btrfs_commit_transaction(trans, root);
9609 fprintf(stderr, "Didn't find extent for %llu\n",
9610 (unsigned long long)rec->start);
9611 btrfs_release_path(&path);
9612 btrfs_commit_transaction(trans, root);
9616 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9617 struct btrfs_extent_item);
9618 flags = btrfs_extent_flags(path.nodes[0], ei);
9619 if (rec->flag_block_full_backref) {
9620 fprintf(stderr, "setting full backref on %llu\n",
9621 (unsigned long long)key.objectid);
9622 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9624 fprintf(stderr, "clearing full backref on %llu\n",
9625 (unsigned long long)key.objectid);
9626 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9628 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9629 btrfs_mark_buffer_dirty(path.nodes[0]);
9630 btrfs_release_path(&path);
9631 ret = btrfs_commit_transaction(trans, root);
9633 fprintf(stderr, "Repaired extent flags for %llu\n",
9634 (unsigned long long)rec->start);
9639 /* right now we only prune from the extent allocation tree */
9640 static int prune_one_block(struct btrfs_trans_handle *trans,
9641 struct btrfs_fs_info *info,
9642 struct btrfs_corrupt_block *corrupt)
9645 struct btrfs_path path;
9646 struct extent_buffer *eb;
9650 int level = corrupt->level + 1;
9652 btrfs_init_path(&path);
9654 /* we want to stop at the parent to our busted block */
9655 path.lowest_level = level;
9657 ret = btrfs_search_slot(trans, info->extent_root,
9658 &corrupt->key, &path, -1, 1);
9663 eb = path.nodes[level];
9670 * hopefully the search gave us the block we want to prune,
9671 * lets try that first
9673 slot = path.slots[level];
9674 found = btrfs_node_blockptr(eb, slot);
9675 if (found == corrupt->cache.start)
9678 nritems = btrfs_header_nritems(eb);
9680 /* the search failed, lets scan this node and hope we find it */
9681 for (slot = 0; slot < nritems; slot++) {
9682 found = btrfs_node_blockptr(eb, slot);
9683 if (found == corrupt->cache.start)
9687 * we couldn't find the bad block. TODO, search all the nodes for pointers
9690 if (eb == info->extent_root->node) {
9695 btrfs_release_path(&path);
9700 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9701 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9704 btrfs_release_path(&path);
9708 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9710 struct btrfs_trans_handle *trans = NULL;
9711 struct cache_extent *cache;
9712 struct btrfs_corrupt_block *corrupt;
9715 cache = search_cache_extent(info->corrupt_blocks, 0);
9719 trans = btrfs_start_transaction(info->extent_root, 1);
9721 return PTR_ERR(trans);
9723 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9724 prune_one_block(trans, info, corrupt);
9725 remove_cache_extent(info->corrupt_blocks, cache);
9728 return btrfs_commit_transaction(trans, info->extent_root);
9732 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9734 struct btrfs_block_group_cache *cache;
9739 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9740 &start, &end, EXTENT_DIRTY);
9743 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9748 cache = btrfs_lookup_first_block_group(fs_info, start);
9753 start = cache->key.objectid + cache->key.offset;
9757 static int check_extent_refs(struct btrfs_root *root,
9758 struct cache_tree *extent_cache)
9760 struct extent_record *rec;
9761 struct cache_extent *cache;
9767 * if we're doing a repair, we have to make sure
9768 * we don't allocate from the problem extents.
9769 * In the worst case, this will be all the
9772 cache = search_cache_extent(extent_cache, 0);
9774 rec = container_of(cache, struct extent_record, cache);
9775 set_extent_dirty(root->fs_info->excluded_extents,
9777 rec->start + rec->max_size - 1);
9778 cache = next_cache_extent(cache);
9781 /* pin down all the corrupted blocks too */
9782 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9784 set_extent_dirty(root->fs_info->excluded_extents,
9786 cache->start + cache->size - 1);
9787 cache = next_cache_extent(cache);
9789 prune_corrupt_blocks(root->fs_info);
9790 reset_cached_block_groups(root->fs_info);
9793 reset_cached_block_groups(root->fs_info);
9796 * We need to delete any duplicate entries we find first otherwise we
9797 * could mess up the extent tree when we have backrefs that actually
9798 * belong to a different extent item and not the weird duplicate one.
9800 while (repair && !list_empty(&duplicate_extents)) {
9801 rec = to_extent_record(duplicate_extents.next);
9802 list_del_init(&rec->list);
9804 /* Sometimes we can find a backref before we find an actual
9805 * extent, so we need to process it a little bit to see if there
9806 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9807 * if this is a backref screwup. If we need to delete stuff
9808 * process_duplicates() will return 0, otherwise it will return
9811 if (process_duplicates(extent_cache, rec))
9813 ret = delete_duplicate_records(root, rec);
9817 * delete_duplicate_records will return the number of entries
9818 * deleted, so if it's greater than 0 then we know we actually
9819 * did something and we need to remove.
9832 cache = search_cache_extent(extent_cache, 0);
9835 rec = container_of(cache, struct extent_record, cache);
9836 if (rec->num_duplicates) {
9837 fprintf(stderr, "extent item %llu has multiple extent "
9838 "items\n", (unsigned long long)rec->start);
9842 if (rec->refs != rec->extent_item_refs) {
9843 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9844 (unsigned long long)rec->start,
9845 (unsigned long long)rec->nr);
9846 fprintf(stderr, "extent item %llu, found %llu\n",
9847 (unsigned long long)rec->extent_item_refs,
9848 (unsigned long long)rec->refs);
9849 ret = record_orphan_data_extents(root->fs_info, rec);
9855 if (all_backpointers_checked(rec, 1)) {
9856 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9857 (unsigned long long)rec->start,
9858 (unsigned long long)rec->nr);
9862 if (!rec->owner_ref_checked) {
9863 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9864 (unsigned long long)rec->start,
9865 (unsigned long long)rec->nr);
9870 if (repair && fix) {
9871 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9877 if (rec->bad_full_backref) {
9878 fprintf(stderr, "bad full backref, on [%llu]\n",
9879 (unsigned long long)rec->start);
9881 ret = fixup_extent_flags(root->fs_info, rec);
9889 * Although it's not a extent ref's problem, we reuse this
9890 * routine for error reporting.
9891 * No repair function yet.
9893 if (rec->crossing_stripes) {
9895 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9896 rec->start, rec->start + rec->max_size);
9900 if (rec->wrong_chunk_type) {
9902 "bad extent [%llu, %llu), type mismatch with chunk\n",
9903 rec->start, rec->start + rec->max_size);
9907 remove_cache_extent(extent_cache, cache);
9908 free_all_extent_backrefs(rec);
9909 if (!init_extent_tree && repair && (!cur_err || fix))
9910 clear_extent_dirty(root->fs_info->excluded_extents,
9912 rec->start + rec->max_size - 1);
9917 if (ret && ret != -EAGAIN) {
9918 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9921 struct btrfs_trans_handle *trans;
9923 root = root->fs_info->extent_root;
9924 trans = btrfs_start_transaction(root, 1);
9925 if (IS_ERR(trans)) {
9926 ret = PTR_ERR(trans);
9930 ret = btrfs_fix_block_accounting(trans, root);
9933 ret = btrfs_commit_transaction(trans, root);
9942 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9946 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9947 stripe_size = length;
9948 stripe_size /= num_stripes;
9949 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9950 stripe_size = length * 2;
9951 stripe_size /= num_stripes;
9952 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9953 stripe_size = length;
9954 stripe_size /= (num_stripes - 1);
9955 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9956 stripe_size = length;
9957 stripe_size /= (num_stripes - 2);
9959 stripe_size = length;
9965 * Check the chunk with its block group/dev list ref:
9966 * Return 0 if all refs seems valid.
9967 * Return 1 if part of refs seems valid, need later check for rebuild ref
9968 * like missing block group and needs to search extent tree to rebuild them.
9969 * Return -1 if essential refs are missing and unable to rebuild.
9971 static int check_chunk_refs(struct chunk_record *chunk_rec,
9972 struct block_group_tree *block_group_cache,
9973 struct device_extent_tree *dev_extent_cache,
9976 struct cache_extent *block_group_item;
9977 struct block_group_record *block_group_rec;
9978 struct cache_extent *dev_extent_item;
9979 struct device_extent_record *dev_extent_rec;
9983 int metadump_v2 = 0;
9987 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9990 if (block_group_item) {
9991 block_group_rec = container_of(block_group_item,
9992 struct block_group_record,
9994 if (chunk_rec->length != block_group_rec->offset ||
9995 chunk_rec->offset != block_group_rec->objectid ||
9997 chunk_rec->type_flags != block_group_rec->flags)) {
10000 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10001 chunk_rec->objectid,
10006 chunk_rec->type_flags,
10007 block_group_rec->objectid,
10008 block_group_rec->type,
10009 block_group_rec->offset,
10010 block_group_rec->offset,
10011 block_group_rec->objectid,
10012 block_group_rec->flags);
10015 list_del_init(&block_group_rec->list);
10016 chunk_rec->bg_rec = block_group_rec;
10021 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10022 chunk_rec->objectid,
10027 chunk_rec->type_flags);
10034 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10035 chunk_rec->num_stripes);
10036 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10037 devid = chunk_rec->stripes[i].devid;
10038 offset = chunk_rec->stripes[i].offset;
10039 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10040 devid, offset, length);
10041 if (dev_extent_item) {
10042 dev_extent_rec = container_of(dev_extent_item,
10043 struct device_extent_record,
10045 if (dev_extent_rec->objectid != devid ||
10046 dev_extent_rec->offset != offset ||
10047 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10048 dev_extent_rec->length != length) {
10051 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10052 chunk_rec->objectid,
10055 chunk_rec->stripes[i].devid,
10056 chunk_rec->stripes[i].offset,
10057 dev_extent_rec->objectid,
10058 dev_extent_rec->offset,
10059 dev_extent_rec->length);
10062 list_move(&dev_extent_rec->chunk_list,
10063 &chunk_rec->dextents);
10068 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10069 chunk_rec->objectid,
10072 chunk_rec->stripes[i].devid,
10073 chunk_rec->stripes[i].offset);
10080 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10081 int check_chunks(struct cache_tree *chunk_cache,
10082 struct block_group_tree *block_group_cache,
10083 struct device_extent_tree *dev_extent_cache,
10084 struct list_head *good, struct list_head *bad,
10085 struct list_head *rebuild, int silent)
10087 struct cache_extent *chunk_item;
10088 struct chunk_record *chunk_rec;
10089 struct block_group_record *bg_rec;
10090 struct device_extent_record *dext_rec;
10094 chunk_item = first_cache_extent(chunk_cache);
10095 while (chunk_item) {
10096 chunk_rec = container_of(chunk_item, struct chunk_record,
10098 err = check_chunk_refs(chunk_rec, block_group_cache,
10099 dev_extent_cache, silent);
10102 if (err == 0 && good)
10103 list_add_tail(&chunk_rec->list, good);
10104 if (err > 0 && rebuild)
10105 list_add_tail(&chunk_rec->list, rebuild);
10106 if (err < 0 && bad)
10107 list_add_tail(&chunk_rec->list, bad);
10108 chunk_item = next_cache_extent(chunk_item);
10111 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10114 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10122 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10126 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10127 dext_rec->objectid,
10137 static int check_device_used(struct device_record *dev_rec,
10138 struct device_extent_tree *dext_cache)
10140 struct cache_extent *cache;
10141 struct device_extent_record *dev_extent_rec;
10142 u64 total_byte = 0;
10144 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10146 dev_extent_rec = container_of(cache,
10147 struct device_extent_record,
10149 if (dev_extent_rec->objectid != dev_rec->devid)
10152 list_del_init(&dev_extent_rec->device_list);
10153 total_byte += dev_extent_rec->length;
10154 cache = next_cache_extent(cache);
10157 if (total_byte != dev_rec->byte_used) {
10159 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10160 total_byte, dev_rec->byte_used, dev_rec->objectid,
10161 dev_rec->type, dev_rec->offset);
10168 /* check btrfs_dev_item -> btrfs_dev_extent */
10169 static int check_devices(struct rb_root *dev_cache,
10170 struct device_extent_tree *dev_extent_cache)
10172 struct rb_node *dev_node;
10173 struct device_record *dev_rec;
10174 struct device_extent_record *dext_rec;
10178 dev_node = rb_first(dev_cache);
10180 dev_rec = container_of(dev_node, struct device_record, node);
10181 err = check_device_used(dev_rec, dev_extent_cache);
10185 dev_node = rb_next(dev_node);
10187 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10190 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10191 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10198 static int add_root_item_to_list(struct list_head *head,
10199 u64 objectid, u64 bytenr, u64 last_snapshot,
10200 u8 level, u8 drop_level,
10201 struct btrfs_key *drop_key)
10204 struct root_item_record *ri_rec;
10205 ri_rec = malloc(sizeof(*ri_rec));
10208 ri_rec->bytenr = bytenr;
10209 ri_rec->objectid = objectid;
10210 ri_rec->level = level;
10211 ri_rec->drop_level = drop_level;
10212 ri_rec->last_snapshot = last_snapshot;
10214 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10215 list_add_tail(&ri_rec->list, head);
10220 static void free_root_item_list(struct list_head *list)
10222 struct root_item_record *ri_rec;
10224 while (!list_empty(list)) {
10225 ri_rec = list_first_entry(list, struct root_item_record,
10227 list_del_init(&ri_rec->list);
10232 static int deal_root_from_list(struct list_head *list,
10233 struct btrfs_root *root,
10234 struct block_info *bits,
10236 struct cache_tree *pending,
10237 struct cache_tree *seen,
10238 struct cache_tree *reada,
10239 struct cache_tree *nodes,
10240 struct cache_tree *extent_cache,
10241 struct cache_tree *chunk_cache,
10242 struct rb_root *dev_cache,
10243 struct block_group_tree *block_group_cache,
10244 struct device_extent_tree *dev_extent_cache)
10249 while (!list_empty(list)) {
10250 struct root_item_record *rec;
10251 struct extent_buffer *buf;
10252 rec = list_entry(list->next,
10253 struct root_item_record, list);
10255 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10256 if (!extent_buffer_uptodate(buf)) {
10257 free_extent_buffer(buf);
10261 ret = add_root_to_pending(buf, extent_cache, pending,
10262 seen, nodes, rec->objectid);
10266 * To rebuild extent tree, we need deal with snapshot
10267 * one by one, otherwise we deal with node firstly which
10268 * can maximize readahead.
10271 ret = run_next_block(root, bits, bits_nr, &last,
10272 pending, seen, reada, nodes,
10273 extent_cache, chunk_cache,
10274 dev_cache, block_group_cache,
10275 dev_extent_cache, rec);
10279 free_extent_buffer(buf);
10280 list_del(&rec->list);
10286 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10287 reada, nodes, extent_cache, chunk_cache,
10288 dev_cache, block_group_cache,
10289 dev_extent_cache, NULL);
10299 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10301 struct rb_root dev_cache;
10302 struct cache_tree chunk_cache;
10303 struct block_group_tree block_group_cache;
10304 struct device_extent_tree dev_extent_cache;
10305 struct cache_tree extent_cache;
10306 struct cache_tree seen;
10307 struct cache_tree pending;
10308 struct cache_tree reada;
10309 struct cache_tree nodes;
10310 struct extent_io_tree excluded_extents;
10311 struct cache_tree corrupt_blocks;
10312 struct btrfs_path path;
10313 struct btrfs_key key;
10314 struct btrfs_key found_key;
10316 struct block_info *bits;
10318 struct extent_buffer *leaf;
10320 struct btrfs_root_item ri;
10321 struct list_head dropping_trees;
10322 struct list_head normal_trees;
10323 struct btrfs_root *root1;
10324 struct btrfs_root *root;
10328 root = fs_info->fs_root;
10329 dev_cache = RB_ROOT;
10330 cache_tree_init(&chunk_cache);
10331 block_group_tree_init(&block_group_cache);
10332 device_extent_tree_init(&dev_extent_cache);
10334 cache_tree_init(&extent_cache);
10335 cache_tree_init(&seen);
10336 cache_tree_init(&pending);
10337 cache_tree_init(&nodes);
10338 cache_tree_init(&reada);
10339 cache_tree_init(&corrupt_blocks);
10340 extent_io_tree_init(&excluded_extents);
10341 INIT_LIST_HEAD(&dropping_trees);
10342 INIT_LIST_HEAD(&normal_trees);
10345 fs_info->excluded_extents = &excluded_extents;
10346 fs_info->fsck_extent_cache = &extent_cache;
10347 fs_info->free_extent_hook = free_extent_hook;
10348 fs_info->corrupt_blocks = &corrupt_blocks;
10352 bits = malloc(bits_nr * sizeof(struct block_info));
10358 if (ctx.progress_enabled) {
10359 ctx.tp = TASK_EXTENTS;
10360 task_start(ctx.info);
10364 root1 = fs_info->tree_root;
10365 level = btrfs_header_level(root1->node);
10366 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10367 root1->node->start, 0, level, 0, NULL);
10370 root1 = fs_info->chunk_root;
10371 level = btrfs_header_level(root1->node);
10372 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10373 root1->node->start, 0, level, 0, NULL);
10376 btrfs_init_path(&path);
10379 key.type = BTRFS_ROOT_ITEM_KEY;
10380 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10384 leaf = path.nodes[0];
10385 slot = path.slots[0];
10386 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10387 ret = btrfs_next_leaf(root, &path);
10390 leaf = path.nodes[0];
10391 slot = path.slots[0];
10393 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10394 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10395 unsigned long offset;
10398 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10399 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10400 last_snapshot = btrfs_root_last_snapshot(&ri);
10401 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10402 level = btrfs_root_level(&ri);
10403 ret = add_root_item_to_list(&normal_trees,
10404 found_key.objectid,
10405 btrfs_root_bytenr(&ri),
10406 last_snapshot, level,
10411 level = btrfs_root_level(&ri);
10412 objectid = found_key.objectid;
10413 btrfs_disk_key_to_cpu(&found_key,
10414 &ri.drop_progress);
10415 ret = add_root_item_to_list(&dropping_trees,
10417 btrfs_root_bytenr(&ri),
10418 last_snapshot, level,
10419 ri.drop_level, &found_key);
10426 btrfs_release_path(&path);
10429 * check_block can return -EAGAIN if it fixes something, please keep
10430 * this in mind when dealing with return values from these functions, if
10431 * we get -EAGAIN we want to fall through and restart the loop.
10433 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10434 &seen, &reada, &nodes, &extent_cache,
10435 &chunk_cache, &dev_cache, &block_group_cache,
10436 &dev_extent_cache);
10438 if (ret == -EAGAIN)
10442 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10443 &pending, &seen, &reada, &nodes,
10444 &extent_cache, &chunk_cache, &dev_cache,
10445 &block_group_cache, &dev_extent_cache);
10447 if (ret == -EAGAIN)
10452 ret = check_chunks(&chunk_cache, &block_group_cache,
10453 &dev_extent_cache, NULL, NULL, NULL, 0);
10455 if (ret == -EAGAIN)
10460 ret = check_extent_refs(root, &extent_cache);
10462 if (ret == -EAGAIN)
10467 ret = check_devices(&dev_cache, &dev_extent_cache);
10472 task_stop(ctx.info);
10474 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10475 extent_io_tree_cleanup(&excluded_extents);
10476 fs_info->fsck_extent_cache = NULL;
10477 fs_info->free_extent_hook = NULL;
10478 fs_info->corrupt_blocks = NULL;
10479 fs_info->excluded_extents = NULL;
10482 free_chunk_cache_tree(&chunk_cache);
10483 free_device_cache_tree(&dev_cache);
10484 free_block_group_tree(&block_group_cache);
10485 free_device_extent_tree(&dev_extent_cache);
10486 free_extent_cache_tree(&seen);
10487 free_extent_cache_tree(&pending);
10488 free_extent_cache_tree(&reada);
10489 free_extent_cache_tree(&nodes);
10490 free_root_item_list(&normal_trees);
10491 free_root_item_list(&dropping_trees);
10494 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10495 free_extent_cache_tree(&seen);
10496 free_extent_cache_tree(&pending);
10497 free_extent_cache_tree(&reada);
10498 free_extent_cache_tree(&nodes);
10499 free_chunk_cache_tree(&chunk_cache);
10500 free_block_group_tree(&block_group_cache);
10501 free_device_cache_tree(&dev_cache);
10502 free_device_extent_tree(&dev_extent_cache);
10503 free_extent_record_cache(&extent_cache);
10504 free_root_item_list(&normal_trees);
10505 free_root_item_list(&dropping_trees);
10506 extent_io_tree_cleanup(&excluded_extents);
10511 * Check backrefs of a tree block given by @bytenr or @eb.
10513 * @root: the root containing the @bytenr or @eb
10514 * @eb: tree block extent buffer, can be NULL
10515 * @bytenr: bytenr of the tree block to search
10516 * @level: tree level of the tree block
10517 * @owner: owner of the tree block
10519 * Return >0 for any error found and output error message
10520 * Return 0 for no error found
10522 static int check_tree_block_ref(struct btrfs_root *root,
10523 struct extent_buffer *eb, u64 bytenr,
10524 int level, u64 owner)
10526 struct btrfs_key key;
10527 struct btrfs_root *extent_root = root->fs_info->extent_root;
10528 struct btrfs_path path;
10529 struct btrfs_extent_item *ei;
10530 struct btrfs_extent_inline_ref *iref;
10531 struct extent_buffer *leaf;
10537 u32 nodesize = root->fs_info->nodesize;
10540 int tree_reloc_root = 0;
10545 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10546 btrfs_header_bytenr(root->node) == bytenr)
10547 tree_reloc_root = 1;
10549 btrfs_init_path(&path);
10550 key.objectid = bytenr;
10551 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10552 key.type = BTRFS_METADATA_ITEM_KEY;
10554 key.type = BTRFS_EXTENT_ITEM_KEY;
10555 key.offset = (u64)-1;
10557 /* Search for the backref in extent tree */
10558 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10560 err |= BACKREF_MISSING;
10563 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10565 err |= BACKREF_MISSING;
10569 leaf = path.nodes[0];
10570 slot = path.slots[0];
10571 btrfs_item_key_to_cpu(leaf, &key, slot);
10573 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10575 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10576 skinny_level = (int)key.offset;
10577 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10579 struct btrfs_tree_block_info *info;
10581 info = (struct btrfs_tree_block_info *)(ei + 1);
10582 skinny_level = btrfs_tree_block_level(leaf, info);
10583 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10590 if (!(btrfs_extent_flags(leaf, ei) &
10591 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10593 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10594 key.objectid, nodesize,
10595 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10596 err = BACKREF_MISMATCH;
10598 header_gen = btrfs_header_generation(eb);
10599 extent_gen = btrfs_extent_generation(leaf, ei);
10600 if (header_gen != extent_gen) {
10602 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10603 key.objectid, nodesize, header_gen,
10605 err = BACKREF_MISMATCH;
10607 if (level != skinny_level) {
10609 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10610 key.objectid, nodesize, level, skinny_level);
10611 err = BACKREF_MISMATCH;
10613 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10615 "extent[%llu %u] is referred by other roots than %llu",
10616 key.objectid, nodesize, root->objectid);
10617 err = BACKREF_MISMATCH;
10622 * Iterate the extent/metadata item to find the exact backref
10624 item_size = btrfs_item_size_nr(leaf, slot);
10625 ptr = (unsigned long)iref;
10626 end = (unsigned long)ei + item_size;
10627 while (ptr < end) {
10628 iref = (struct btrfs_extent_inline_ref *)ptr;
10629 type = btrfs_extent_inline_ref_type(leaf, iref);
10630 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10632 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10633 (offset == root->objectid || offset == owner)) {
10635 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10637 * Backref of tree reloc root points to itself, no need
10638 * to check backref any more.
10640 if (tree_reloc_root)
10643 /* Check if the backref points to valid referencer */
10644 found_ref = !check_tree_block_ref(root, NULL,
10645 offset, level + 1, owner);
10650 ptr += btrfs_extent_inline_ref_size(type);
10654 * Inlined extent item doesn't have what we need, check
10655 * TREE_BLOCK_REF_KEY
10658 btrfs_release_path(&path);
10659 key.objectid = bytenr;
10660 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10661 key.offset = root->objectid;
10663 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10668 err |= BACKREF_MISSING;
10670 btrfs_release_path(&path);
10671 if (eb && (err & BACKREF_MISSING))
10672 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10673 bytenr, nodesize, owner, level);
10678 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10680 * Return >0 any error found and output error message
10681 * Return 0 for no error found
10683 static int check_extent_data_item(struct btrfs_root *root,
10684 struct extent_buffer *eb, int slot)
10686 struct btrfs_file_extent_item *fi;
10687 struct btrfs_path path;
10688 struct btrfs_root *extent_root = root->fs_info->extent_root;
10689 struct btrfs_key fi_key;
10690 struct btrfs_key dbref_key;
10691 struct extent_buffer *leaf;
10692 struct btrfs_extent_item *ei;
10693 struct btrfs_extent_inline_ref *iref;
10694 struct btrfs_extent_data_ref *dref;
10697 u64 disk_num_bytes;
10698 u64 extent_num_bytes;
10705 int found_dbackref = 0;
10709 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10710 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10712 /* Nothing to check for hole and inline data extents */
10713 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10714 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10717 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10718 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10719 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10721 /* Check unaligned disk_num_bytes and num_bytes */
10722 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10724 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10725 fi_key.objectid, fi_key.offset, disk_num_bytes,
10726 root->fs_info->sectorsize);
10727 err |= BYTES_UNALIGNED;
10729 data_bytes_allocated += disk_num_bytes;
10731 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10733 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10734 fi_key.objectid, fi_key.offset, extent_num_bytes,
10735 root->fs_info->sectorsize);
10736 err |= BYTES_UNALIGNED;
10738 data_bytes_referenced += extent_num_bytes;
10740 owner = btrfs_header_owner(eb);
10742 /* Check the extent item of the file extent in extent tree */
10743 btrfs_init_path(&path);
10744 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10745 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10746 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10748 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10752 leaf = path.nodes[0];
10753 slot = path.slots[0];
10754 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10756 extent_flags = btrfs_extent_flags(leaf, ei);
10758 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10760 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10761 disk_bytenr, disk_num_bytes,
10762 BTRFS_EXTENT_FLAG_DATA);
10763 err |= BACKREF_MISMATCH;
10766 /* Check data backref inside that extent item */
10767 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10768 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10769 ptr = (unsigned long)iref;
10770 end = (unsigned long)ei + item_size;
10771 while (ptr < end) {
10772 iref = (struct btrfs_extent_inline_ref *)ptr;
10773 type = btrfs_extent_inline_ref_type(leaf, iref);
10774 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10776 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10777 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10778 if (ref_root == owner || ref_root == root->objectid)
10779 found_dbackref = 1;
10780 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10781 found_dbackref = !check_tree_block_ref(root, NULL,
10782 btrfs_extent_inline_ref_offset(leaf, iref),
10786 if (found_dbackref)
10788 ptr += btrfs_extent_inline_ref_size(type);
10791 if (!found_dbackref) {
10792 btrfs_release_path(&path);
10794 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10795 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10796 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10797 dbref_key.offset = hash_extent_data_ref(root->objectid,
10798 fi_key.objectid, fi_key.offset);
10800 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10801 &dbref_key, &path, 0, 0);
10803 found_dbackref = 1;
10807 btrfs_release_path(&path);
10810 * Neither inlined nor EXTENT_DATA_REF found, try
10811 * SHARED_DATA_REF as last chance.
10813 dbref_key.objectid = disk_bytenr;
10814 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10815 dbref_key.offset = eb->start;
10817 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10818 &dbref_key, &path, 0, 0);
10820 found_dbackref = 1;
10826 if (!found_dbackref)
10827 err |= BACKREF_MISSING;
10828 btrfs_release_path(&path);
10829 if (err & BACKREF_MISSING) {
10830 error("data extent[%llu %llu] backref lost",
10831 disk_bytenr, disk_num_bytes);
10837 * Get real tree block level for the case like shared block
10838 * Return >= 0 as tree level
10839 * Return <0 for error
10841 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10843 struct extent_buffer *eb;
10844 struct btrfs_path path;
10845 struct btrfs_key key;
10846 struct btrfs_extent_item *ei;
10853 /* Search extent tree for extent generation and level */
10854 key.objectid = bytenr;
10855 key.type = BTRFS_METADATA_ITEM_KEY;
10856 key.offset = (u64)-1;
10858 btrfs_init_path(&path);
10859 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10862 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10870 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10871 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10872 struct btrfs_extent_item);
10873 flags = btrfs_extent_flags(path.nodes[0], ei);
10874 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10879 /* Get transid for later read_tree_block() check */
10880 transid = btrfs_extent_generation(path.nodes[0], ei);
10882 /* Get backref level as one source */
10883 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10884 backref_level = key.offset;
10886 struct btrfs_tree_block_info *info;
10888 info = (struct btrfs_tree_block_info *)(ei + 1);
10889 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10891 btrfs_release_path(&path);
10893 /* Get level from tree block as an alternative source */
10894 eb = read_tree_block(fs_info, bytenr, transid);
10895 if (!extent_buffer_uptodate(eb)) {
10896 free_extent_buffer(eb);
10899 header_level = btrfs_header_level(eb);
10900 free_extent_buffer(eb);
10902 if (header_level != backref_level)
10904 return header_level;
10907 btrfs_release_path(&path);
10912 * Check if a tree block backref is valid (points to a valid tree block)
10913 * if level == -1, level will be resolved
10914 * Return >0 for any error found and print error message
10916 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10917 u64 bytenr, int level)
10919 struct btrfs_root *root;
10920 struct btrfs_key key;
10921 struct btrfs_path path;
10922 struct extent_buffer *eb;
10923 struct extent_buffer *node;
10924 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10928 /* Query level for level == -1 special case */
10930 level = query_tree_block_level(fs_info, bytenr);
10932 err |= REFERENCER_MISSING;
10936 key.objectid = root_id;
10937 key.type = BTRFS_ROOT_ITEM_KEY;
10938 key.offset = (u64)-1;
10940 root = btrfs_read_fs_root(fs_info, &key);
10941 if (IS_ERR(root)) {
10942 err |= REFERENCER_MISSING;
10946 /* Read out the tree block to get item/node key */
10947 eb = read_tree_block(fs_info, bytenr, 0);
10948 if (!extent_buffer_uptodate(eb)) {
10949 err |= REFERENCER_MISSING;
10950 free_extent_buffer(eb);
10954 /* Empty tree, no need to check key */
10955 if (!btrfs_header_nritems(eb) && !level) {
10956 free_extent_buffer(eb);
10961 btrfs_node_key_to_cpu(eb, &key, 0);
10963 btrfs_item_key_to_cpu(eb, &key, 0);
10965 free_extent_buffer(eb);
10967 btrfs_init_path(&path);
10968 path.lowest_level = level;
10969 /* Search with the first key, to ensure we can reach it */
10970 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10972 err |= REFERENCER_MISSING;
10976 node = path.nodes[level];
10977 if (btrfs_header_bytenr(node) != bytenr) {
10979 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10980 bytenr, nodesize, bytenr,
10981 btrfs_header_bytenr(node));
10982 err |= REFERENCER_MISMATCH;
10984 if (btrfs_header_level(node) != level) {
10986 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10987 bytenr, nodesize, level,
10988 btrfs_header_level(node));
10989 err |= REFERENCER_MISMATCH;
10993 btrfs_release_path(&path);
10995 if (err & REFERENCER_MISSING) {
10997 error("extent [%llu %d] lost referencer (owner: %llu)",
10998 bytenr, nodesize, root_id);
11001 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11002 bytenr, nodesize, root_id, level);
11009 * Check if tree block @eb is tree reloc root.
11010 * Return 0 if it's not or any problem happens
11011 * Return 1 if it's a tree reloc root
11013 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11014 struct extent_buffer *eb)
11016 struct btrfs_root *tree_reloc_root;
11017 struct btrfs_key key;
11018 u64 bytenr = btrfs_header_bytenr(eb);
11019 u64 owner = btrfs_header_owner(eb);
11022 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11023 key.offset = owner;
11024 key.type = BTRFS_ROOT_ITEM_KEY;
11026 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11027 if (IS_ERR(tree_reloc_root))
11030 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11032 btrfs_free_fs_root(tree_reloc_root);
11037 * Check referencer for shared block backref
11038 * If level == -1, this function will resolve the level.
11040 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11041 u64 parent, u64 bytenr, int level)
11043 struct extent_buffer *eb;
11045 int found_parent = 0;
11048 eb = read_tree_block(fs_info, parent, 0);
11049 if (!extent_buffer_uptodate(eb))
11053 level = query_tree_block_level(fs_info, bytenr);
11057 /* It's possible it's a tree reloc root */
11058 if (parent == bytenr) {
11059 if (is_tree_reloc_root(fs_info, eb))
11064 if (level + 1 != btrfs_header_level(eb))
11067 nr = btrfs_header_nritems(eb);
11068 for (i = 0; i < nr; i++) {
11069 if (bytenr == btrfs_node_blockptr(eb, i)) {
11075 free_extent_buffer(eb);
11076 if (!found_parent) {
11078 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11079 bytenr, fs_info->nodesize, parent, level);
11080 return REFERENCER_MISSING;
11086 * Check referencer for normal (inlined) data ref
11087 * If len == 0, it will be resolved by searching in extent tree
11089 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11090 u64 root_id, u64 objectid, u64 offset,
11091 u64 bytenr, u64 len, u32 count)
11093 struct btrfs_root *root;
11094 struct btrfs_root *extent_root = fs_info->extent_root;
11095 struct btrfs_key key;
11096 struct btrfs_path path;
11097 struct extent_buffer *leaf;
11098 struct btrfs_file_extent_item *fi;
11099 u32 found_count = 0;
11104 key.objectid = bytenr;
11105 key.type = BTRFS_EXTENT_ITEM_KEY;
11106 key.offset = (u64)-1;
11108 btrfs_init_path(&path);
11109 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11112 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11115 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11116 if (key.objectid != bytenr ||
11117 key.type != BTRFS_EXTENT_ITEM_KEY)
11120 btrfs_release_path(&path);
11122 key.objectid = root_id;
11123 key.type = BTRFS_ROOT_ITEM_KEY;
11124 key.offset = (u64)-1;
11125 btrfs_init_path(&path);
11127 root = btrfs_read_fs_root(fs_info, &key);
11131 key.objectid = objectid;
11132 key.type = BTRFS_EXTENT_DATA_KEY;
11134 * It can be nasty as data backref offset is
11135 * file offset - file extent offset, which is smaller or
11136 * equal to original backref offset. The only special case is
11137 * overflow. So we need to special check and do further search.
11139 key.offset = offset & (1ULL << 63) ? 0 : offset;
11141 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11146 * Search afterwards to get correct one
11147 * NOTE: As we must do a comprehensive check on the data backref to
11148 * make sure the dref count also matches, we must iterate all file
11149 * extents for that inode.
11152 leaf = path.nodes[0];
11153 slot = path.slots[0];
11155 if (slot >= btrfs_header_nritems(leaf))
11157 btrfs_item_key_to_cpu(leaf, &key, slot);
11158 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11160 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11162 * Except normal disk bytenr and disk num bytes, we still
11163 * need to do extra check on dbackref offset as
11164 * dbackref offset = file_offset - file_extent_offset
11166 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11167 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11168 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11173 ret = btrfs_next_item(root, &path);
11178 btrfs_release_path(&path);
11179 if (found_count != count) {
11181 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11182 bytenr, len, root_id, objectid, offset, count, found_count);
11183 return REFERENCER_MISSING;
11189 * Check if the referencer of a shared data backref exists
11191 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11192 u64 parent, u64 bytenr)
11194 struct extent_buffer *eb;
11195 struct btrfs_key key;
11196 struct btrfs_file_extent_item *fi;
11198 int found_parent = 0;
11201 eb = read_tree_block(fs_info, parent, 0);
11202 if (!extent_buffer_uptodate(eb))
11205 nr = btrfs_header_nritems(eb);
11206 for (i = 0; i < nr; i++) {
11207 btrfs_item_key_to_cpu(eb, &key, i);
11208 if (key.type != BTRFS_EXTENT_DATA_KEY)
11211 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11212 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11215 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11222 free_extent_buffer(eb);
11223 if (!found_parent) {
11224 error("shared extent %llu referencer lost (parent: %llu)",
11226 return REFERENCER_MISSING;
11232 * This function will check a given extent item, including its backref and
11233 * itself (like crossing stripe boundary and type)
11235 * Since we don't use extent_record anymore, introduce new error bit
11237 static int check_extent_item(struct btrfs_fs_info *fs_info,
11238 struct extent_buffer *eb, int slot)
11240 struct btrfs_extent_item *ei;
11241 struct btrfs_extent_inline_ref *iref;
11242 struct btrfs_extent_data_ref *dref;
11246 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11247 u32 item_size = btrfs_item_size_nr(eb, slot);
11252 struct btrfs_key key;
11256 btrfs_item_key_to_cpu(eb, &key, slot);
11257 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11258 bytes_used += key.offset;
11260 bytes_used += nodesize;
11262 if (item_size < sizeof(*ei)) {
11264 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11265 * old thing when on disk format is still un-determined.
11266 * No need to care about it anymore
11268 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11272 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11273 flags = btrfs_extent_flags(eb, ei);
11275 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11277 if (metadata && check_crossing_stripes(global_info, key.objectid,
11279 error("bad metadata [%llu, %llu) crossing stripe boundary",
11280 key.objectid, key.objectid + nodesize);
11281 err |= CROSSING_STRIPE_BOUNDARY;
11284 ptr = (unsigned long)(ei + 1);
11286 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11287 /* Old EXTENT_ITEM metadata */
11288 struct btrfs_tree_block_info *info;
11290 info = (struct btrfs_tree_block_info *)ptr;
11291 level = btrfs_tree_block_level(eb, info);
11292 ptr += sizeof(struct btrfs_tree_block_info);
11294 /* New METADATA_ITEM */
11295 level = key.offset;
11297 end = (unsigned long)ei + item_size;
11300 /* Reached extent item end normally */
11304 /* Beyond extent item end, wrong item size */
11306 err |= ITEM_SIZE_MISMATCH;
11307 error("extent item at bytenr %llu slot %d has wrong size",
11312 /* Now check every backref in this extent item */
11313 iref = (struct btrfs_extent_inline_ref *)ptr;
11314 type = btrfs_extent_inline_ref_type(eb, iref);
11315 offset = btrfs_extent_inline_ref_offset(eb, iref);
11317 case BTRFS_TREE_BLOCK_REF_KEY:
11318 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11322 case BTRFS_SHARED_BLOCK_REF_KEY:
11323 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11327 case BTRFS_EXTENT_DATA_REF_KEY:
11328 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11329 ret = check_extent_data_backref(fs_info,
11330 btrfs_extent_data_ref_root(eb, dref),
11331 btrfs_extent_data_ref_objectid(eb, dref),
11332 btrfs_extent_data_ref_offset(eb, dref),
11333 key.objectid, key.offset,
11334 btrfs_extent_data_ref_count(eb, dref));
11337 case BTRFS_SHARED_DATA_REF_KEY:
11338 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11342 error("extent[%llu %d %llu] has unknown ref type: %d",
11343 key.objectid, key.type, key.offset, type);
11344 err |= UNKNOWN_TYPE;
11348 ptr += btrfs_extent_inline_ref_size(type);
11356 * Check if a dev extent item is referred correctly by its chunk
11358 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11359 struct extent_buffer *eb, int slot)
11361 struct btrfs_root *chunk_root = fs_info->chunk_root;
11362 struct btrfs_dev_extent *ptr;
11363 struct btrfs_path path;
11364 struct btrfs_key chunk_key;
11365 struct btrfs_key devext_key;
11366 struct btrfs_chunk *chunk;
11367 struct extent_buffer *l;
11371 int found_chunk = 0;
11374 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11375 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11376 length = btrfs_dev_extent_length(eb, ptr);
11378 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11379 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11380 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11382 btrfs_init_path(&path);
11383 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11388 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11389 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11394 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11397 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11398 for (i = 0; i < num_stripes; i++) {
11399 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11400 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11402 if (devid == devext_key.objectid &&
11403 offset == devext_key.offset) {
11409 btrfs_release_path(&path);
11410 if (!found_chunk) {
11412 "device extent[%llu, %llu, %llu] did not find the related chunk",
11413 devext_key.objectid, devext_key.offset, length);
11414 return REFERENCER_MISSING;
11420 * Check if the used space is correct with the dev item
11422 static int check_dev_item(struct btrfs_fs_info *fs_info,
11423 struct extent_buffer *eb, int slot)
11425 struct btrfs_root *dev_root = fs_info->dev_root;
11426 struct btrfs_dev_item *dev_item;
11427 struct btrfs_path path;
11428 struct btrfs_key key;
11429 struct btrfs_dev_extent *ptr;
11435 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11436 dev_id = btrfs_device_id(eb, dev_item);
11437 used = btrfs_device_bytes_used(eb, dev_item);
11439 key.objectid = dev_id;
11440 key.type = BTRFS_DEV_EXTENT_KEY;
11443 btrfs_init_path(&path);
11444 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11446 btrfs_item_key_to_cpu(eb, &key, slot);
11447 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11448 key.objectid, key.type, key.offset);
11449 btrfs_release_path(&path);
11450 return REFERENCER_MISSING;
11453 /* Iterate dev_extents to calculate the used space of a device */
11455 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11458 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11459 if (key.objectid > dev_id)
11461 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11464 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11465 struct btrfs_dev_extent);
11466 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11468 ret = btrfs_next_item(dev_root, &path);
11472 btrfs_release_path(&path);
11474 if (used != total) {
11475 btrfs_item_key_to_cpu(eb, &key, slot);
11477 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11478 total, used, BTRFS_ROOT_TREE_OBJECTID,
11479 BTRFS_DEV_EXTENT_KEY, dev_id);
11480 return ACCOUNTING_MISMATCH;
11486 * Check a block group item with its referener (chunk) and its used space
11487 * with extent/metadata item
11489 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11490 struct extent_buffer *eb, int slot)
11492 struct btrfs_root *extent_root = fs_info->extent_root;
11493 struct btrfs_root *chunk_root = fs_info->chunk_root;
11494 struct btrfs_block_group_item *bi;
11495 struct btrfs_block_group_item bg_item;
11496 struct btrfs_path path;
11497 struct btrfs_key bg_key;
11498 struct btrfs_key chunk_key;
11499 struct btrfs_key extent_key;
11500 struct btrfs_chunk *chunk;
11501 struct extent_buffer *leaf;
11502 struct btrfs_extent_item *ei;
11503 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11511 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11512 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11513 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11514 used = btrfs_block_group_used(&bg_item);
11515 bg_flags = btrfs_block_group_flags(&bg_item);
11517 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11518 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11519 chunk_key.offset = bg_key.objectid;
11521 btrfs_init_path(&path);
11522 /* Search for the referencer chunk */
11523 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11526 "block group[%llu %llu] did not find the related chunk item",
11527 bg_key.objectid, bg_key.offset);
11528 err |= REFERENCER_MISSING;
11530 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11531 struct btrfs_chunk);
11532 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11535 "block group[%llu %llu] related chunk item length does not match",
11536 bg_key.objectid, bg_key.offset);
11537 err |= REFERENCER_MISMATCH;
11540 btrfs_release_path(&path);
11542 /* Search from the block group bytenr */
11543 extent_key.objectid = bg_key.objectid;
11544 extent_key.type = 0;
11545 extent_key.offset = 0;
11547 btrfs_init_path(&path);
11548 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11552 /* Iterate extent tree to account used space */
11554 leaf = path.nodes[0];
11556 /* Search slot can point to the last item beyond leaf nritems */
11557 if (path.slots[0] >= btrfs_header_nritems(leaf))
11560 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11561 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11564 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11565 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11567 if (extent_key.objectid < bg_key.objectid)
11570 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11573 total += extent_key.offset;
11575 ei = btrfs_item_ptr(leaf, path.slots[0],
11576 struct btrfs_extent_item);
11577 flags = btrfs_extent_flags(leaf, ei);
11578 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11579 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11581 "bad extent[%llu, %llu) type mismatch with chunk",
11582 extent_key.objectid,
11583 extent_key.objectid + extent_key.offset);
11584 err |= CHUNK_TYPE_MISMATCH;
11586 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11587 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11588 BTRFS_BLOCK_GROUP_METADATA))) {
11590 "bad extent[%llu, %llu) type mismatch with chunk",
11591 extent_key.objectid,
11592 extent_key.objectid + nodesize);
11593 err |= CHUNK_TYPE_MISMATCH;
11597 ret = btrfs_next_item(extent_root, &path);
11603 btrfs_release_path(&path);
11605 if (total != used) {
11607 "block group[%llu %llu] used %llu but extent items used %llu",
11608 bg_key.objectid, bg_key.offset, used, total);
11609 err |= ACCOUNTING_MISMATCH;
11615 * Check a chunk item.
11616 * Including checking all referred dev_extents and block group
11618 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11619 struct extent_buffer *eb, int slot)
11621 struct btrfs_root *extent_root = fs_info->extent_root;
11622 struct btrfs_root *dev_root = fs_info->dev_root;
11623 struct btrfs_path path;
11624 struct btrfs_key chunk_key;
11625 struct btrfs_key bg_key;
11626 struct btrfs_key devext_key;
11627 struct btrfs_chunk *chunk;
11628 struct extent_buffer *leaf;
11629 struct btrfs_block_group_item *bi;
11630 struct btrfs_block_group_item bg_item;
11631 struct btrfs_dev_extent *ptr;
11643 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11644 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11645 length = btrfs_chunk_length(eb, chunk);
11646 chunk_end = chunk_key.offset + length;
11647 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11650 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11652 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11655 type = btrfs_chunk_type(eb, chunk);
11657 bg_key.objectid = chunk_key.offset;
11658 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11659 bg_key.offset = length;
11661 btrfs_init_path(&path);
11662 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11665 "chunk[%llu %llu) did not find the related block group item",
11666 chunk_key.offset, chunk_end);
11667 err |= REFERENCER_MISSING;
11669 leaf = path.nodes[0];
11670 bi = btrfs_item_ptr(leaf, path.slots[0],
11671 struct btrfs_block_group_item);
11672 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11674 if (btrfs_block_group_flags(&bg_item) != type) {
11676 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11677 chunk_key.offset, chunk_end, type,
11678 btrfs_block_group_flags(&bg_item));
11679 err |= REFERENCER_MISSING;
11683 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11684 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11685 for (i = 0; i < num_stripes; i++) {
11686 btrfs_release_path(&path);
11687 btrfs_init_path(&path);
11688 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11689 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11690 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11692 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11695 goto not_match_dev;
11697 leaf = path.nodes[0];
11698 ptr = btrfs_item_ptr(leaf, path.slots[0],
11699 struct btrfs_dev_extent);
11700 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11701 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11702 if (objectid != chunk_key.objectid ||
11703 offset != chunk_key.offset ||
11704 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11705 goto not_match_dev;
11708 err |= BACKREF_MISSING;
11710 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11711 chunk_key.objectid, chunk_end, i);
11714 btrfs_release_path(&path);
11720 * Main entry function to check known items and update related accounting info
11722 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11724 struct btrfs_fs_info *fs_info = root->fs_info;
11725 struct btrfs_key key;
11728 struct btrfs_extent_data_ref *dref;
11733 btrfs_item_key_to_cpu(eb, &key, slot);
11737 case BTRFS_EXTENT_DATA_KEY:
11738 ret = check_extent_data_item(root, eb, slot);
11741 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11742 ret = check_block_group_item(fs_info, eb, slot);
11745 case BTRFS_DEV_ITEM_KEY:
11746 ret = check_dev_item(fs_info, eb, slot);
11749 case BTRFS_CHUNK_ITEM_KEY:
11750 ret = check_chunk_item(fs_info, eb, slot);
11753 case BTRFS_DEV_EXTENT_KEY:
11754 ret = check_dev_extent_item(fs_info, eb, slot);
11757 case BTRFS_EXTENT_ITEM_KEY:
11758 case BTRFS_METADATA_ITEM_KEY:
11759 ret = check_extent_item(fs_info, eb, slot);
11762 case BTRFS_EXTENT_CSUM_KEY:
11763 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11765 case BTRFS_TREE_BLOCK_REF_KEY:
11766 ret = check_tree_block_backref(fs_info, key.offset,
11770 case BTRFS_EXTENT_DATA_REF_KEY:
11771 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11772 ret = check_extent_data_backref(fs_info,
11773 btrfs_extent_data_ref_root(eb, dref),
11774 btrfs_extent_data_ref_objectid(eb, dref),
11775 btrfs_extent_data_ref_offset(eb, dref),
11777 btrfs_extent_data_ref_count(eb, dref));
11780 case BTRFS_SHARED_BLOCK_REF_KEY:
11781 ret = check_shared_block_backref(fs_info, key.offset,
11785 case BTRFS_SHARED_DATA_REF_KEY:
11786 ret = check_shared_data_backref(fs_info, key.offset,
11794 if (++slot < btrfs_header_nritems(eb))
11801 * Helper function for later fs/subvol tree check. To determine if a tree
11802 * block should be checked.
11803 * This function will ensure only the direct referencer with lowest rootid to
11804 * check a fs/subvolume tree block.
11806 * Backref check at extent tree would detect errors like missing subvolume
11807 * tree, so we can do aggressive check to reduce duplicated checks.
11809 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11811 struct btrfs_root *extent_root = root->fs_info->extent_root;
11812 struct btrfs_key key;
11813 struct btrfs_path path;
11814 struct extent_buffer *leaf;
11816 struct btrfs_extent_item *ei;
11822 struct btrfs_extent_inline_ref *iref;
11825 btrfs_init_path(&path);
11826 key.objectid = btrfs_header_bytenr(eb);
11827 key.type = BTRFS_METADATA_ITEM_KEY;
11828 key.offset = (u64)-1;
11831 * Any failure in backref resolving means we can't determine
11832 * whom the tree block belongs to.
11833 * So in that case, we need to check that tree block
11835 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11839 ret = btrfs_previous_extent_item(extent_root, &path,
11840 btrfs_header_bytenr(eb));
11844 leaf = path.nodes[0];
11845 slot = path.slots[0];
11846 btrfs_item_key_to_cpu(leaf, &key, slot);
11847 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11849 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11850 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11852 struct btrfs_tree_block_info *info;
11854 info = (struct btrfs_tree_block_info *)(ei + 1);
11855 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11858 item_size = btrfs_item_size_nr(leaf, slot);
11859 ptr = (unsigned long)iref;
11860 end = (unsigned long)ei + item_size;
11861 while (ptr < end) {
11862 iref = (struct btrfs_extent_inline_ref *)ptr;
11863 type = btrfs_extent_inline_ref_type(leaf, iref);
11864 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11867 * We only check the tree block if current root is
11868 * the lowest referencer of it.
11870 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11871 offset < root->objectid) {
11872 btrfs_release_path(&path);
11876 ptr += btrfs_extent_inline_ref_size(type);
11879 * Normally we should also check keyed tree block ref, but that may be
11880 * very time consuming. Inlined ref should already make us skip a lot
11881 * of refs now. So skip search keyed tree block ref.
11885 btrfs_release_path(&path);
11890 * Traversal function for tree block. We will do:
11891 * 1) Skip shared fs/subvolume tree blocks
11892 * 2) Update related bytes accounting
11893 * 3) Pre-order traversal
11895 static int traverse_tree_block(struct btrfs_root *root,
11896 struct extent_buffer *node)
11898 struct extent_buffer *eb;
11899 struct btrfs_key key;
11900 struct btrfs_key drop_key;
11908 * Skip shared fs/subvolume tree block, in that case they will
11909 * be checked by referencer with lowest rootid
11911 if (is_fstree(root->objectid) && !should_check(root, node))
11914 /* Update bytes accounting */
11915 total_btree_bytes += node->len;
11916 if (fs_root_objectid(btrfs_header_owner(node)))
11917 total_fs_tree_bytes += node->len;
11918 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11919 total_extent_tree_bytes += node->len;
11921 /* pre-order tranversal, check itself first */
11922 level = btrfs_header_level(node);
11923 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11924 btrfs_header_level(node),
11925 btrfs_header_owner(node));
11929 "check %s failed root %llu bytenr %llu level %d, force continue check",
11930 level ? "node":"leaf", root->objectid,
11931 btrfs_header_bytenr(node), btrfs_header_level(node));
11934 btree_space_waste += btrfs_leaf_free_space(root, node);
11935 ret = check_leaf_items(root, node);
11940 nr = btrfs_header_nritems(node);
11941 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11942 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11943 sizeof(struct btrfs_key_ptr);
11945 /* Then check all its children */
11946 for (i = 0; i < nr; i++) {
11947 u64 blocknr = btrfs_node_blockptr(node, i);
11949 btrfs_node_key_to_cpu(node, &key, i);
11950 if (level == root->root_item.drop_level &&
11951 is_dropped_key(&key, &drop_key))
11955 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11956 * to call the function itself.
11958 eb = read_tree_block(root->fs_info, blocknr, 0);
11959 if (extent_buffer_uptodate(eb)) {
11960 ret = traverse_tree_block(root, eb);
11963 free_extent_buffer(eb);
11970 * Low memory usage version check_chunks_and_extents.
11972 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11974 struct btrfs_path path;
11975 struct btrfs_key key;
11976 struct btrfs_root *root1;
11977 struct btrfs_root *root;
11978 struct btrfs_root *cur_root;
11982 root = fs_info->fs_root;
11984 root1 = root->fs_info->chunk_root;
11985 ret = traverse_tree_block(root1, root1->node);
11988 root1 = root->fs_info->tree_root;
11989 ret = traverse_tree_block(root1, root1->node);
11992 btrfs_init_path(&path);
11993 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11995 key.type = BTRFS_ROOT_ITEM_KEY;
11997 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11999 error("cannot find extent treet in tree_root");
12004 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12005 if (key.type != BTRFS_ROOT_ITEM_KEY)
12007 key.offset = (u64)-1;
12009 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12010 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12013 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12014 if (IS_ERR(cur_root) || !cur_root) {
12015 error("failed to read tree: %lld", key.objectid);
12019 ret = traverse_tree_block(cur_root, cur_root->node);
12022 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12023 btrfs_free_fs_root(cur_root);
12025 ret = btrfs_next_item(root1, &path);
12031 btrfs_release_path(&path);
12035 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12039 if (!ctx.progress_enabled)
12040 fprintf(stderr, "checking extents\n");
12041 if (check_mode == CHECK_MODE_LOWMEM)
12042 ret = check_chunks_and_extents_v2(fs_info);
12044 ret = check_chunks_and_extents(fs_info);
12049 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12050 struct btrfs_root *root, int overwrite)
12052 struct extent_buffer *c;
12053 struct extent_buffer *old = root->node;
12056 struct btrfs_disk_key disk_key = {0,0,0};
12062 extent_buffer_get(c);
12065 c = btrfs_alloc_free_block(trans, root,
12066 root->fs_info->nodesize,
12067 root->root_key.objectid,
12068 &disk_key, level, 0, 0);
12071 extent_buffer_get(c);
12075 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12076 btrfs_set_header_level(c, level);
12077 btrfs_set_header_bytenr(c, c->start);
12078 btrfs_set_header_generation(c, trans->transid);
12079 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12080 btrfs_set_header_owner(c, root->root_key.objectid);
12082 write_extent_buffer(c, root->fs_info->fsid,
12083 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12085 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12086 btrfs_header_chunk_tree_uuid(c),
12089 btrfs_mark_buffer_dirty(c);
12091 * this case can happen in the following case:
12093 * 1.overwrite previous root.
12095 * 2.reinit reloc data root, this is because we skip pin
12096 * down reloc data tree before which means we can allocate
12097 * same block bytenr here.
12099 if (old->start == c->start) {
12100 btrfs_set_root_generation(&root->root_item,
12102 root->root_item.level = btrfs_header_level(root->node);
12103 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12104 &root->root_key, &root->root_item);
12106 free_extent_buffer(c);
12110 free_extent_buffer(old);
12112 add_root_to_dirty_list(root);
12116 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12117 struct extent_buffer *eb, int tree_root)
12119 struct extent_buffer *tmp;
12120 struct btrfs_root_item *ri;
12121 struct btrfs_key key;
12123 int level = btrfs_header_level(eb);
12129 * If we have pinned this block before, don't pin it again.
12130 * This can not only avoid forever loop with broken filesystem
12131 * but also give us some speedups.
12133 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12134 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12137 btrfs_pin_extent(fs_info, eb->start, eb->len);
12139 nritems = btrfs_header_nritems(eb);
12140 for (i = 0; i < nritems; i++) {
12142 btrfs_item_key_to_cpu(eb, &key, i);
12143 if (key.type != BTRFS_ROOT_ITEM_KEY)
12145 /* Skip the extent root and reloc roots */
12146 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12147 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12148 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12150 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12151 bytenr = btrfs_disk_root_bytenr(eb, ri);
12154 * If at any point we start needing the real root we
12155 * will have to build a stump root for the root we are
12156 * in, but for now this doesn't actually use the root so
12157 * just pass in extent_root.
12159 tmp = read_tree_block(fs_info, bytenr, 0);
12160 if (!extent_buffer_uptodate(tmp)) {
12161 fprintf(stderr, "Error reading root block\n");
12164 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12165 free_extent_buffer(tmp);
12169 bytenr = btrfs_node_blockptr(eb, i);
12171 /* If we aren't the tree root don't read the block */
12172 if (level == 1 && !tree_root) {
12173 btrfs_pin_extent(fs_info, bytenr,
12174 fs_info->nodesize);
12178 tmp = read_tree_block(fs_info, bytenr, 0);
12179 if (!extent_buffer_uptodate(tmp)) {
12180 fprintf(stderr, "Error reading tree block\n");
12183 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12184 free_extent_buffer(tmp);
12193 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12197 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12201 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12204 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12206 struct btrfs_block_group_cache *cache;
12207 struct btrfs_path path;
12208 struct extent_buffer *leaf;
12209 struct btrfs_chunk *chunk;
12210 struct btrfs_key key;
12214 btrfs_init_path(&path);
12216 key.type = BTRFS_CHUNK_ITEM_KEY;
12218 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12220 btrfs_release_path(&path);
12225 * We do this in case the block groups were screwed up and had alloc
12226 * bits that aren't actually set on the chunks. This happens with
12227 * restored images every time and could happen in real life I guess.
12229 fs_info->avail_data_alloc_bits = 0;
12230 fs_info->avail_metadata_alloc_bits = 0;
12231 fs_info->avail_system_alloc_bits = 0;
12233 /* First we need to create the in-memory block groups */
12235 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12236 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12238 btrfs_release_path(&path);
12246 leaf = path.nodes[0];
12247 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12248 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12253 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12254 btrfs_add_block_group(fs_info, 0,
12255 btrfs_chunk_type(leaf, chunk),
12256 key.objectid, key.offset,
12257 btrfs_chunk_length(leaf, chunk));
12258 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12259 key.offset + btrfs_chunk_length(leaf, chunk));
12264 cache = btrfs_lookup_first_block_group(fs_info, start);
12268 start = cache->key.objectid + cache->key.offset;
12271 btrfs_release_path(&path);
12275 static int reset_balance(struct btrfs_trans_handle *trans,
12276 struct btrfs_fs_info *fs_info)
12278 struct btrfs_root *root = fs_info->tree_root;
12279 struct btrfs_path path;
12280 struct extent_buffer *leaf;
12281 struct btrfs_key key;
12282 int del_slot, del_nr = 0;
12286 btrfs_init_path(&path);
12287 key.objectid = BTRFS_BALANCE_OBJECTID;
12288 key.type = BTRFS_BALANCE_ITEM_KEY;
12290 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12295 goto reinit_data_reloc;
12300 ret = btrfs_del_item(trans, root, &path);
12303 btrfs_release_path(&path);
12305 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12306 key.type = BTRFS_ROOT_ITEM_KEY;
12308 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12312 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12317 ret = btrfs_del_items(trans, root, &path,
12324 btrfs_release_path(&path);
12327 ret = btrfs_search_slot(trans, root, &key, &path,
12334 leaf = path.nodes[0];
12335 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12336 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12338 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12343 del_slot = path.slots[0];
12352 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12356 btrfs_release_path(&path);
12359 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12360 key.type = BTRFS_ROOT_ITEM_KEY;
12361 key.offset = (u64)-1;
12362 root = btrfs_read_fs_root(fs_info, &key);
12363 if (IS_ERR(root)) {
12364 fprintf(stderr, "Error reading data reloc tree\n");
12365 ret = PTR_ERR(root);
12368 record_root_in_trans(trans, root);
12369 ret = btrfs_fsck_reinit_root(trans, root, 0);
12372 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12374 btrfs_release_path(&path);
12378 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12379 struct btrfs_fs_info *fs_info)
12385 * The only reason we don't do this is because right now we're just
12386 * walking the trees we find and pinning down their bytes, we don't look
12387 * at any of the leaves. In order to do mixed groups we'd have to check
12388 * the leaves of any fs roots and pin down the bytes for any file
12389 * extents we find. Not hard but why do it if we don't have to?
12391 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12392 fprintf(stderr, "We don't support re-initing the extent tree "
12393 "for mixed block groups yet, please notify a btrfs "
12394 "developer you want to do this so they can add this "
12395 "functionality.\n");
12400 * first we need to walk all of the trees except the extent tree and pin
12401 * down the bytes that are in use so we don't overwrite any existing
12404 ret = pin_metadata_blocks(fs_info);
12406 fprintf(stderr, "error pinning down used bytes\n");
12411 * Need to drop all the block groups since we're going to recreate all
12414 btrfs_free_block_groups(fs_info);
12415 ret = reset_block_groups(fs_info);
12417 fprintf(stderr, "error resetting the block groups\n");
12421 /* Ok we can allocate now, reinit the extent root */
12422 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12424 fprintf(stderr, "extent root initialization failed\n");
12426 * When the transaction code is updated we should end the
12427 * transaction, but for now progs only knows about commit so
12428 * just return an error.
12434 * Now we have all the in-memory block groups setup so we can make
12435 * allocations properly, and the metadata we care about is safe since we
12436 * pinned all of it above.
12439 struct btrfs_block_group_cache *cache;
12441 cache = btrfs_lookup_first_block_group(fs_info, start);
12444 start = cache->key.objectid + cache->key.offset;
12445 ret = btrfs_insert_item(trans, fs_info->extent_root,
12446 &cache->key, &cache->item,
12447 sizeof(cache->item));
12449 fprintf(stderr, "Error adding block group\n");
12452 btrfs_extent_post_op(trans, fs_info->extent_root);
12455 ret = reset_balance(trans, fs_info);
12457 fprintf(stderr, "error resetting the pending balance\n");
12462 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12464 struct btrfs_path path;
12465 struct btrfs_trans_handle *trans;
12466 struct btrfs_key key;
12469 printf("Recowing metadata block %llu\n", eb->start);
12470 key.objectid = btrfs_header_owner(eb);
12471 key.type = BTRFS_ROOT_ITEM_KEY;
12472 key.offset = (u64)-1;
12474 root = btrfs_read_fs_root(root->fs_info, &key);
12475 if (IS_ERR(root)) {
12476 fprintf(stderr, "Couldn't find owner root %llu\n",
12478 return PTR_ERR(root);
12481 trans = btrfs_start_transaction(root, 1);
12483 return PTR_ERR(trans);
12485 btrfs_init_path(&path);
12486 path.lowest_level = btrfs_header_level(eb);
12487 if (path.lowest_level)
12488 btrfs_node_key_to_cpu(eb, &key, 0);
12490 btrfs_item_key_to_cpu(eb, &key, 0);
12492 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12493 btrfs_commit_transaction(trans, root);
12494 btrfs_release_path(&path);
12498 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12500 struct btrfs_path path;
12501 struct btrfs_trans_handle *trans;
12502 struct btrfs_key key;
12505 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12506 bad->key.type, bad->key.offset);
12507 key.objectid = bad->root_id;
12508 key.type = BTRFS_ROOT_ITEM_KEY;
12509 key.offset = (u64)-1;
12511 root = btrfs_read_fs_root(root->fs_info, &key);
12512 if (IS_ERR(root)) {
12513 fprintf(stderr, "Couldn't find owner root %llu\n",
12515 return PTR_ERR(root);
12518 trans = btrfs_start_transaction(root, 1);
12520 return PTR_ERR(trans);
12522 btrfs_init_path(&path);
12523 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12529 ret = btrfs_del_item(trans, root, &path);
12531 btrfs_commit_transaction(trans, root);
12532 btrfs_release_path(&path);
12536 static int zero_log_tree(struct btrfs_root *root)
12538 struct btrfs_trans_handle *trans;
12541 trans = btrfs_start_transaction(root, 1);
12542 if (IS_ERR(trans)) {
12543 ret = PTR_ERR(trans);
12546 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12547 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12548 ret = btrfs_commit_transaction(trans, root);
12552 static int populate_csum(struct btrfs_trans_handle *trans,
12553 struct btrfs_root *csum_root, char *buf, u64 start,
12556 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12561 while (offset < len) {
12562 sectorsize = fs_info->sectorsize;
12563 ret = read_extent_data(fs_info, buf, start + offset,
12567 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12568 start + offset, buf, sectorsize);
12571 offset += sectorsize;
12576 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12577 struct btrfs_root *csum_root,
12578 struct btrfs_root *cur_root)
12580 struct btrfs_path path;
12581 struct btrfs_key key;
12582 struct extent_buffer *node;
12583 struct btrfs_file_extent_item *fi;
12590 buf = malloc(cur_root->fs_info->sectorsize);
12594 btrfs_init_path(&path);
12598 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12601 /* Iterate all regular file extents and fill its csum */
12603 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12605 if (key.type != BTRFS_EXTENT_DATA_KEY)
12607 node = path.nodes[0];
12608 slot = path.slots[0];
12609 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12610 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12612 start = btrfs_file_extent_disk_bytenr(node, fi);
12613 len = btrfs_file_extent_disk_num_bytes(node, fi);
12615 ret = populate_csum(trans, csum_root, buf, start, len);
12616 if (ret == -EEXIST)
12622 * TODO: if next leaf is corrupted, jump to nearest next valid
12625 ret = btrfs_next_item(cur_root, &path);
12635 btrfs_release_path(&path);
12640 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12641 struct btrfs_root *csum_root)
12643 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12644 struct btrfs_path path;
12645 struct btrfs_root *tree_root = fs_info->tree_root;
12646 struct btrfs_root *cur_root;
12647 struct extent_buffer *node;
12648 struct btrfs_key key;
12652 btrfs_init_path(&path);
12653 key.objectid = BTRFS_FS_TREE_OBJECTID;
12655 key.type = BTRFS_ROOT_ITEM_KEY;
12656 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12665 node = path.nodes[0];
12666 slot = path.slots[0];
12667 btrfs_item_key_to_cpu(node, &key, slot);
12668 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12670 if (key.type != BTRFS_ROOT_ITEM_KEY)
12672 if (!is_fstree(key.objectid))
12674 key.offset = (u64)-1;
12676 cur_root = btrfs_read_fs_root(fs_info, &key);
12677 if (IS_ERR(cur_root) || !cur_root) {
12678 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12682 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12687 ret = btrfs_next_item(tree_root, &path);
12697 btrfs_release_path(&path);
12701 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12702 struct btrfs_root *csum_root)
12704 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12705 struct btrfs_path path;
12706 struct btrfs_extent_item *ei;
12707 struct extent_buffer *leaf;
12709 struct btrfs_key key;
12712 btrfs_init_path(&path);
12714 key.type = BTRFS_EXTENT_ITEM_KEY;
12716 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12718 btrfs_release_path(&path);
12722 buf = malloc(csum_root->fs_info->sectorsize);
12724 btrfs_release_path(&path);
12729 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12730 ret = btrfs_next_leaf(extent_root, &path);
12738 leaf = path.nodes[0];
12740 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12741 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12746 ei = btrfs_item_ptr(leaf, path.slots[0],
12747 struct btrfs_extent_item);
12748 if (!(btrfs_extent_flags(leaf, ei) &
12749 BTRFS_EXTENT_FLAG_DATA)) {
12754 ret = populate_csum(trans, csum_root, buf, key.objectid,
12761 btrfs_release_path(&path);
12767 * Recalculate the csum and put it into the csum tree.
12769 * Extent tree init will wipe out all the extent info, so in that case, we
12770 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12771 * will use fs/subvol trees to init the csum tree.
12773 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12774 struct btrfs_root *csum_root,
12775 int search_fs_tree)
12777 if (search_fs_tree)
12778 return fill_csum_tree_from_fs(trans, csum_root);
12780 return fill_csum_tree_from_extent(trans, csum_root);
12783 static void free_roots_info_cache(void)
12785 if (!roots_info_cache)
12788 while (!cache_tree_empty(roots_info_cache)) {
12789 struct cache_extent *entry;
12790 struct root_item_info *rii;
12792 entry = first_cache_extent(roots_info_cache);
12795 remove_cache_extent(roots_info_cache, entry);
12796 rii = container_of(entry, struct root_item_info, cache_extent);
12800 free(roots_info_cache);
12801 roots_info_cache = NULL;
12804 static int build_roots_info_cache(struct btrfs_fs_info *info)
12807 struct btrfs_key key;
12808 struct extent_buffer *leaf;
12809 struct btrfs_path path;
12811 if (!roots_info_cache) {
12812 roots_info_cache = malloc(sizeof(*roots_info_cache));
12813 if (!roots_info_cache)
12815 cache_tree_init(roots_info_cache);
12818 btrfs_init_path(&path);
12820 key.type = BTRFS_EXTENT_ITEM_KEY;
12822 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12825 leaf = path.nodes[0];
12828 struct btrfs_key found_key;
12829 struct btrfs_extent_item *ei;
12830 struct btrfs_extent_inline_ref *iref;
12831 int slot = path.slots[0];
12836 struct cache_extent *entry;
12837 struct root_item_info *rii;
12839 if (slot >= btrfs_header_nritems(leaf)) {
12840 ret = btrfs_next_leaf(info->extent_root, &path);
12847 leaf = path.nodes[0];
12848 slot = path.slots[0];
12851 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12853 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12854 found_key.type != BTRFS_METADATA_ITEM_KEY)
12857 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12858 flags = btrfs_extent_flags(leaf, ei);
12860 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12861 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12864 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12865 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12866 level = found_key.offset;
12868 struct btrfs_tree_block_info *binfo;
12870 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12871 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12872 level = btrfs_tree_block_level(leaf, binfo);
12876 * For a root extent, it must be of the following type and the
12877 * first (and only one) iref in the item.
12879 type = btrfs_extent_inline_ref_type(leaf, iref);
12880 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12883 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12884 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12886 rii = malloc(sizeof(struct root_item_info));
12891 rii->cache_extent.start = root_id;
12892 rii->cache_extent.size = 1;
12893 rii->level = (u8)-1;
12894 entry = &rii->cache_extent;
12895 ret = insert_cache_extent(roots_info_cache, entry);
12898 rii = container_of(entry, struct root_item_info,
12902 ASSERT(rii->cache_extent.start == root_id);
12903 ASSERT(rii->cache_extent.size == 1);
12905 if (level > rii->level || rii->level == (u8)-1) {
12906 rii->level = level;
12907 rii->bytenr = found_key.objectid;
12908 rii->gen = btrfs_extent_generation(leaf, ei);
12909 rii->node_count = 1;
12910 } else if (level == rii->level) {
12918 btrfs_release_path(&path);
12923 static int maybe_repair_root_item(struct btrfs_path *path,
12924 const struct btrfs_key *root_key,
12925 const int read_only_mode)
12927 const u64 root_id = root_key->objectid;
12928 struct cache_extent *entry;
12929 struct root_item_info *rii;
12930 struct btrfs_root_item ri;
12931 unsigned long offset;
12933 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12936 "Error: could not find extent items for root %llu\n",
12937 root_key->objectid);
12941 rii = container_of(entry, struct root_item_info, cache_extent);
12942 ASSERT(rii->cache_extent.start == root_id);
12943 ASSERT(rii->cache_extent.size == 1);
12945 if (rii->node_count != 1) {
12947 "Error: could not find btree root extent for root %llu\n",
12952 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12953 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12955 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12956 btrfs_root_level(&ri) != rii->level ||
12957 btrfs_root_generation(&ri) != rii->gen) {
12960 * If we're in repair mode but our caller told us to not update
12961 * the root item, i.e. just check if it needs to be updated, don't
12962 * print this message, since the caller will call us again shortly
12963 * for the same root item without read only mode (the caller will
12964 * open a transaction first).
12966 if (!(read_only_mode && repair))
12968 "%sroot item for root %llu,"
12969 " current bytenr %llu, current gen %llu, current level %u,"
12970 " new bytenr %llu, new gen %llu, new level %u\n",
12971 (read_only_mode ? "" : "fixing "),
12973 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12974 btrfs_root_level(&ri),
12975 rii->bytenr, rii->gen, rii->level);
12977 if (btrfs_root_generation(&ri) > rii->gen) {
12979 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12980 root_id, btrfs_root_generation(&ri), rii->gen);
12984 if (!read_only_mode) {
12985 btrfs_set_root_bytenr(&ri, rii->bytenr);
12986 btrfs_set_root_level(&ri, rii->level);
12987 btrfs_set_root_generation(&ri, rii->gen);
12988 write_extent_buffer(path->nodes[0], &ri,
12989 offset, sizeof(ri));
12999 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13000 * caused read-only snapshots to be corrupted if they were created at a moment
13001 * when the source subvolume/snapshot had orphan items. The issue was that the
13002 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13003 * node instead of the post orphan cleanup root node.
13004 * So this function, and its callees, just detects and fixes those cases. Even
13005 * though the regression was for read-only snapshots, this function applies to
13006 * any snapshot/subvolume root.
13007 * This must be run before any other repair code - not doing it so, makes other
13008 * repair code delete or modify backrefs in the extent tree for example, which
13009 * will result in an inconsistent fs after repairing the root items.
13011 static int repair_root_items(struct btrfs_fs_info *info)
13013 struct btrfs_path path;
13014 struct btrfs_key key;
13015 struct extent_buffer *leaf;
13016 struct btrfs_trans_handle *trans = NULL;
13019 int need_trans = 0;
13021 btrfs_init_path(&path);
13023 ret = build_roots_info_cache(info);
13027 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13028 key.type = BTRFS_ROOT_ITEM_KEY;
13033 * Avoid opening and committing transactions if a leaf doesn't have
13034 * any root items that need to be fixed, so that we avoid rotating
13035 * backup roots unnecessarily.
13038 trans = btrfs_start_transaction(info->tree_root, 1);
13039 if (IS_ERR(trans)) {
13040 ret = PTR_ERR(trans);
13045 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13049 leaf = path.nodes[0];
13052 struct btrfs_key found_key;
13054 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13055 int no_more_keys = find_next_key(&path, &key);
13057 btrfs_release_path(&path);
13059 ret = btrfs_commit_transaction(trans,
13071 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13073 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13075 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13078 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13082 if (!trans && repair) {
13085 btrfs_release_path(&path);
13095 free_roots_info_cache();
13096 btrfs_release_path(&path);
13098 btrfs_commit_transaction(trans, info->tree_root);
13105 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13107 struct btrfs_trans_handle *trans;
13108 struct btrfs_block_group_cache *bg_cache;
13112 /* Clear all free space cache inodes and its extent data */
13114 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13117 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13120 current = bg_cache->key.objectid + bg_cache->key.offset;
13123 /* Don't forget to set cache_generation to -1 */
13124 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13125 if (IS_ERR(trans)) {
13126 error("failed to update super block cache generation");
13127 return PTR_ERR(trans);
13129 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13130 btrfs_commit_transaction(trans, fs_info->tree_root);
13135 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13140 if (clear_version == 1) {
13141 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13143 "free space cache v2 detected, use --clear-space-cache v2");
13147 printf("Clearing free space cache\n");
13148 ret = clear_free_space_cache(fs_info);
13150 error("failed to clear free space cache");
13153 printf("Free space cache cleared\n");
13155 } else if (clear_version == 2) {
13156 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13157 printf("no free space cache v2 to clear\n");
13161 printf("Clear free space cache v2\n");
13162 ret = btrfs_clear_free_space_tree(fs_info);
13164 error("failed to clear free space cache v2: %d", ret);
13167 printf("free space cache v2 cleared\n");
13174 const char * const cmd_check_usage[] = {
13175 "btrfs check [options] <device>",
13176 "Check structural integrity of a filesystem (unmounted).",
13177 "Check structural integrity of an unmounted filesystem. Verify internal",
13178 "trees' consistency and item connectivity. In the repair mode try to",
13179 "fix the problems found. ",
13180 "WARNING: the repair mode is considered dangerous",
13182 "-s|--super <superblock> use this superblock copy",
13183 "-b|--backup use the first valid backup root copy",
13184 "--force skip mount checks, repair is not possible",
13185 "--repair try to repair the filesystem",
13186 "--readonly run in read-only mode (default)",
13187 "--init-csum-tree create a new CRC tree",
13188 "--init-extent-tree create a new extent tree",
13189 "--mode <MODE> allows choice of memory/IO trade-offs",
13190 " where MODE is one of:",
13191 " original - read inodes and extents to memory (requires",
13192 " more memory, does less IO)",
13193 " lowmem - try to use less memory but read blocks again",
13195 "--check-data-csum verify checksums of data blocks",
13196 "-Q|--qgroup-report print a report on qgroup consistency",
13197 "-E|--subvol-extents <subvolid>",
13198 " print subvolume extents and sharing state",
13199 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13200 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13201 "-p|--progress indicate progress",
13202 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13206 int cmd_check(int argc, char **argv)
13208 struct cache_tree root_cache;
13209 struct btrfs_root *root;
13210 struct btrfs_fs_info *info;
13213 u64 tree_root_bytenr = 0;
13214 u64 chunk_root_bytenr = 0;
13215 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13219 int init_csum_tree = 0;
13221 int clear_space_cache = 0;
13222 int qgroup_report = 0;
13223 int qgroups_repaired = 0;
13224 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13229 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13230 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13231 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13232 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13233 GETOPT_VAL_FORCE };
13234 static const struct option long_options[] = {
13235 { "super", required_argument, NULL, 's' },
13236 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13237 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13238 { "init-csum-tree", no_argument, NULL,
13239 GETOPT_VAL_INIT_CSUM },
13240 { "init-extent-tree", no_argument, NULL,
13241 GETOPT_VAL_INIT_EXTENT },
13242 { "check-data-csum", no_argument, NULL,
13243 GETOPT_VAL_CHECK_CSUM },
13244 { "backup", no_argument, NULL, 'b' },
13245 { "subvol-extents", required_argument, NULL, 'E' },
13246 { "qgroup-report", no_argument, NULL, 'Q' },
13247 { "tree-root", required_argument, NULL, 'r' },
13248 { "chunk-root", required_argument, NULL,
13249 GETOPT_VAL_CHUNK_TREE },
13250 { "progress", no_argument, NULL, 'p' },
13251 { "mode", required_argument, NULL,
13253 { "clear-space-cache", required_argument, NULL,
13254 GETOPT_VAL_CLEAR_SPACE_CACHE},
13255 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13256 { NULL, 0, NULL, 0}
13259 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13263 case 'a': /* ignored */ break;
13265 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13268 num = arg_strtou64(optarg);
13269 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13271 "super mirror should be less than %d",
13272 BTRFS_SUPER_MIRROR_MAX);
13275 bytenr = btrfs_sb_offset(((int)num));
13276 printf("using SB copy %llu, bytenr %llu\n", num,
13277 (unsigned long long)bytenr);
13283 subvolid = arg_strtou64(optarg);
13286 tree_root_bytenr = arg_strtou64(optarg);
13288 case GETOPT_VAL_CHUNK_TREE:
13289 chunk_root_bytenr = arg_strtou64(optarg);
13292 ctx.progress_enabled = true;
13296 usage(cmd_check_usage);
13297 case GETOPT_VAL_REPAIR:
13298 printf("enabling repair mode\n");
13300 ctree_flags |= OPEN_CTREE_WRITES;
13302 case GETOPT_VAL_READONLY:
13305 case GETOPT_VAL_INIT_CSUM:
13306 printf("Creating a new CRC tree\n");
13307 init_csum_tree = 1;
13309 ctree_flags |= OPEN_CTREE_WRITES;
13311 case GETOPT_VAL_INIT_EXTENT:
13312 init_extent_tree = 1;
13313 ctree_flags |= (OPEN_CTREE_WRITES |
13314 OPEN_CTREE_NO_BLOCK_GROUPS);
13317 case GETOPT_VAL_CHECK_CSUM:
13318 check_data_csum = 1;
13320 case GETOPT_VAL_MODE:
13321 check_mode = parse_check_mode(optarg);
13322 if (check_mode == CHECK_MODE_UNKNOWN) {
13323 error("unknown mode: %s", optarg);
13327 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13328 if (strcmp(optarg, "v1") == 0) {
13329 clear_space_cache = 1;
13330 } else if (strcmp(optarg, "v2") == 0) {
13331 clear_space_cache = 2;
13332 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13335 "invalid argument to --clear-space-cache, must be v1 or v2");
13338 ctree_flags |= OPEN_CTREE_WRITES;
13340 case GETOPT_VAL_FORCE:
13346 if (check_argc_exact(argc - optind, 1))
13347 usage(cmd_check_usage);
13349 if (ctx.progress_enabled) {
13350 ctx.tp = TASK_NOTHING;
13351 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13354 /* This check is the only reason for --readonly to exist */
13355 if (readonly && repair) {
13356 error("repair options are not compatible with --readonly");
13361 * experimental and dangerous
13363 if (repair && check_mode == CHECK_MODE_LOWMEM)
13364 warning("low-memory mode repair support is only partial");
13367 cache_tree_init(&root_cache);
13369 ret = check_mounted(argv[optind]);
13372 error("could not check mount status: %s",
13378 "%s is currently mounted, use --force if you really intend to check the filesystem",
13386 error("repair and --force is not yet supported");
13393 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13397 "filesystem mounted, continuing because of --force");
13399 /* A block device is mounted in exclusive mode by kernel */
13400 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13403 /* only allow partial opening under repair mode */
13405 ctree_flags |= OPEN_CTREE_PARTIAL;
13407 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13408 chunk_root_bytenr, ctree_flags);
13410 error("cannot open file system");
13416 global_info = info;
13417 root = info->fs_root;
13418 uuid_unparse(info->super_copy->fsid, uuidbuf);
13420 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13423 * Check the bare minimum before starting anything else that could rely
13424 * on it, namely the tree roots, any local consistency checks
13426 if (!extent_buffer_uptodate(info->tree_root->node) ||
13427 !extent_buffer_uptodate(info->dev_root->node) ||
13428 !extent_buffer_uptodate(info->chunk_root->node)) {
13429 error("critical roots corrupted, unable to check the filesystem");
13435 if (clear_space_cache) {
13436 ret = do_clear_free_space_cache(info, clear_space_cache);
13442 * repair mode will force us to commit transaction which
13443 * will make us fail to load log tree when mounting.
13445 if (repair && btrfs_super_log_root(info->super_copy)) {
13446 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13452 ret = zero_log_tree(root);
13455 error("failed to zero log tree: %d", ret);
13460 if (qgroup_report) {
13461 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13463 ret = qgroup_verify_all(info);
13470 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13471 subvolid, argv[optind], uuidbuf);
13472 ret = print_extent_state(info, subvolid);
13477 if (init_extent_tree || init_csum_tree) {
13478 struct btrfs_trans_handle *trans;
13480 trans = btrfs_start_transaction(info->extent_root, 0);
13481 if (IS_ERR(trans)) {
13482 error("error starting transaction");
13483 ret = PTR_ERR(trans);
13488 if (init_extent_tree) {
13489 printf("Creating a new extent tree\n");
13490 ret = reinit_extent_tree(trans, info);
13496 if (init_csum_tree) {
13497 printf("Reinitialize checksum tree\n");
13498 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13500 error("checksum tree initialization failed: %d",
13507 ret = fill_csum_tree(trans, info->csum_root,
13511 error("checksum tree refilling failed: %d", ret);
13516 * Ok now we commit and run the normal fsck, which will add
13517 * extent entries for all of the items it finds.
13519 ret = btrfs_commit_transaction(trans, info->extent_root);
13524 if (!extent_buffer_uptodate(info->extent_root->node)) {
13525 error("critical: extent_root, unable to check the filesystem");
13530 if (!extent_buffer_uptodate(info->csum_root->node)) {
13531 error("critical: csum_root, unable to check the filesystem");
13537 ret = do_check_chunks_and_extents(info);
13541 "errors found in extent allocation tree or chunk allocation");
13543 ret = repair_root_items(info);
13546 error("failed to repair root items: %s", strerror(-ret));
13550 fprintf(stderr, "Fixed %d roots.\n", ret);
13552 } else if (ret > 0) {
13554 "Found %d roots with an outdated root item.\n",
13557 "Please run a filesystem check with the option --repair to fix them.\n");
13563 if (!ctx.progress_enabled) {
13564 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13565 fprintf(stderr, "checking free space tree\n");
13567 fprintf(stderr, "checking free space cache\n");
13569 ret = check_space_cache(root);
13572 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13573 error("errors found in free space tree");
13575 error("errors found in free space cache");
13580 * We used to have to have these hole extents in between our real
13581 * extents so if we don't have this flag set we need to make sure there
13582 * are no gaps in the file extents for inodes, otherwise we can just
13583 * ignore it when this happens.
13585 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13586 ret = do_check_fs_roots(info, &root_cache);
13589 error("errors found in fs roots");
13593 fprintf(stderr, "checking csums\n");
13594 ret = check_csums(root);
13597 error("errors found in csum tree");
13601 fprintf(stderr, "checking root refs\n");
13602 /* For low memory mode, check_fs_roots_v2 handles root refs */
13603 if (check_mode != CHECK_MODE_LOWMEM) {
13604 ret = check_root_refs(root, &root_cache);
13607 error("errors found in root refs");
13612 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13613 struct extent_buffer *eb;
13615 eb = list_first_entry(&root->fs_info->recow_ebs,
13616 struct extent_buffer, recow);
13617 list_del_init(&eb->recow);
13618 ret = recow_extent_buffer(root, eb);
13621 error("fails to fix transid errors");
13626 while (!list_empty(&delete_items)) {
13627 struct bad_item *bad;
13629 bad = list_first_entry(&delete_items, struct bad_item, list);
13630 list_del_init(&bad->list);
13632 ret = delete_bad_item(root, bad);
13638 if (info->quota_enabled) {
13639 fprintf(stderr, "checking quota groups\n");
13640 ret = qgroup_verify_all(info);
13643 error("failed to check quota groups");
13647 ret = repair_qgroups(info, &qgroups_repaired);
13650 error("failed to repair quota groups");
13656 if (!list_empty(&root->fs_info->recow_ebs)) {
13657 error("transid errors in file system");
13662 printf("found %llu bytes used, ",
13663 (unsigned long long)bytes_used);
13665 printf("error(s) found\n");
13667 printf("no error found\n");
13668 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13669 printf("total tree bytes: %llu\n",
13670 (unsigned long long)total_btree_bytes);
13671 printf("total fs tree bytes: %llu\n",
13672 (unsigned long long)total_fs_tree_bytes);
13673 printf("total extent tree bytes: %llu\n",
13674 (unsigned long long)total_extent_tree_bytes);
13675 printf("btree space waste bytes: %llu\n",
13676 (unsigned long long)btree_space_waste);
13677 printf("file data blocks allocated: %llu\n referenced %llu\n",
13678 (unsigned long long)data_bytes_allocated,
13679 (unsigned long long)data_bytes_referenced);
13681 free_qgroup_counts();
13682 free_root_recs_tree(&root_cache);
13686 if (ctx.progress_enabled)
13687 task_deinit(ctx.info);