2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 return container_of(back, struct data_backref, node);
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
141 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143 struct data_backref *back1 = to_data_backref(ext1);
144 struct data_backref *back2 = to_data_backref(ext2);
146 WARN_ON(!ext1->is_data);
147 WARN_ON(!ext2->is_data);
149 /* parent and root are a union, so this covers both */
150 if (back1->parent > back2->parent)
152 if (back1->parent < back2->parent)
155 /* This is a full backref and the parents match. */
156 if (back1->node.full_backref)
159 if (back1->owner > back2->owner)
161 if (back1->owner < back2->owner)
164 if (back1->offset > back2->offset)
166 if (back1->offset < back2->offset)
169 if (back1->found_ref && back2->found_ref) {
170 if (back1->disk_bytenr > back2->disk_bytenr)
172 if (back1->disk_bytenr < back2->disk_bytenr)
175 if (back1->bytes > back2->bytes)
177 if (back1->bytes < back2->bytes)
185 * Much like data_backref, just removed the undetermined members
186 * and change it to use list_head.
187 * During extent scan, it is stored in root->orphan_data_extent.
188 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
190 struct orphan_data_extent {
191 struct list_head list;
199 struct tree_backref {
200 struct extent_backref node;
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
209 return container_of(back, struct tree_backref, node);
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
214 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216 struct tree_backref *back1 = to_tree_backref(ext1);
217 struct tree_backref *back2 = to_tree_backref(ext2);
219 WARN_ON(ext1->is_data);
220 WARN_ON(ext2->is_data);
222 /* parent and root are a union, so this covers both */
223 if (back1->parent > back2->parent)
225 if (back1->parent < back2->parent)
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
233 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
236 if (ext1->is_data > ext2->is_data)
239 if (ext1->is_data < ext2->is_data)
242 if (ext1->full_backref > ext2->full_backref)
244 if (ext1->full_backref < ext2->full_backref)
248 return compare_data_backref(node1, node2);
250 return compare_tree_backref(node1, node2);
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
256 struct extent_record {
257 struct list_head backrefs;
258 struct list_head dups;
259 struct rb_root backref_tree;
260 struct list_head list;
261 struct cache_extent cache;
262 struct btrfs_disk_key parent_key;
267 u64 extent_item_refs;
269 u64 parent_generation;
273 unsigned int flag_block_full_backref:2;
274 unsigned int found_rec:1;
275 unsigned int content_checked:1;
276 unsigned int owner_ref_checked:1;
277 unsigned int is_root:1;
278 unsigned int metadata:1;
279 unsigned int bad_full_backref:1;
280 unsigned int crossing_stripes:1;
281 unsigned int wrong_chunk_type:1;
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
286 return container_of(entry, struct extent_record, list);
289 struct inode_backref {
290 struct list_head list;
291 unsigned int found_dir_item:1;
292 unsigned int found_dir_index:1;
293 unsigned int found_inode_ref:1;
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
305 return list_entry(entry, struct inode_backref, list);
308 struct root_item_record {
309 struct list_head list;
315 struct btrfs_key drop_key;
318 #define REF_ERR_NO_DIR_ITEM (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX (1 << 1)
320 #define REF_ERR_NO_INODE_REF (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
323 #define REF_ERR_DUP_INODE_REF (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
332 struct file_extent_hole {
338 struct inode_record {
339 struct list_head backrefs;
340 unsigned int checked:1;
341 unsigned int merging:1;
342 unsigned int found_inode_item:1;
343 unsigned int found_dir_item:1;
344 unsigned int found_file_extent:1;
345 unsigned int found_csum_item:1;
346 unsigned int some_csum_missing:1;
347 unsigned int nodatasum:1;
360 struct rb_root holes;
361 struct list_head orphan_extents;
366 #define I_ERR_NO_INODE_ITEM (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
382 struct root_backref {
383 struct list_head list;
384 unsigned int found_dir_item:1;
385 unsigned int found_dir_index:1;
386 unsigned int found_back_ref:1;
387 unsigned int found_forward_ref:1;
388 unsigned int reachable:1;
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
399 return list_entry(entry, struct root_backref, list);
403 struct list_head backrefs;
404 struct cache_extent cache;
405 unsigned int found_root_item:1;
411 struct cache_extent cache;
416 struct cache_extent cache;
417 struct cache_tree root_cache;
418 struct cache_tree inode_cache;
419 struct inode_record *current;
428 struct walk_control {
429 struct cache_tree shared;
430 struct shared_node *nodes[BTRFS_MAX_LEVEL];
436 struct btrfs_key key;
438 struct list_head list;
441 struct extent_entry {
446 struct list_head list;
449 struct root_item_info {
450 /* level of the root */
452 /* number of nodes at this level, must be 1 for a root */
456 struct cache_extent cache_extent;
460 * Error bit for low memory mode check.
462 * Currently no caller cares about it yet. Just internal use for error
465 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH (1 << 8)
476 static void *print_status_check(void *p)
478 struct task_ctx *priv = p;
479 const char work_indicator[] = { '.', 'o', 'O', 'o' };
481 static char *task_position_string[] = {
483 "checking free space cache",
487 task_period_start(priv->info, 1000 /* 1s */);
489 if (priv->tp == TASK_NOTHING)
493 printf("%s [%c]\r", task_position_string[priv->tp],
494 work_indicator[count % 4]);
497 task_period_wait(priv->info);
502 static int print_status_return(void *p)
510 static enum btrfs_check_mode parse_check_mode(const char *str)
512 if (strcmp(str, "lowmem") == 0)
513 return CHECK_MODE_LOWMEM;
514 if (strcmp(str, "orig") == 0)
515 return CHECK_MODE_ORIGINAL;
516 if (strcmp(str, "original") == 0)
517 return CHECK_MODE_ORIGINAL;
519 return CHECK_MODE_UNKNOWN;
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
525 struct file_extent_hole *hole;
527 if (RB_EMPTY_ROOT(holes))
530 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
536 struct file_extent_hole *hole1;
537 struct file_extent_hole *hole2;
539 hole1 = rb_entry(node1, struct file_extent_hole, node);
540 hole2 = rb_entry(node2, struct file_extent_hole, node);
542 if (hole1->start > hole2->start)
544 if (hole1->start < hole2->start)
546 /* Now hole1->start == hole2->start */
547 if (hole1->len >= hole2->len)
549 * Hole 1 will be merge center
550 * Same hole will be merged later
553 /* Hole 2 will be merge center */
558 * Add a hole to the record
560 * This will do hole merge for copy_file_extent_holes(),
561 * which will ensure there won't be continuous holes.
563 static int add_file_extent_hole(struct rb_root *holes,
566 struct file_extent_hole *hole;
567 struct file_extent_hole *prev = NULL;
568 struct file_extent_hole *next = NULL;
570 hole = malloc(sizeof(*hole));
575 /* Since compare will not return 0, no -EEXIST will happen */
576 rb_insert(holes, &hole->node, compare_hole);
578 /* simple merge with previous hole */
579 if (rb_prev(&hole->node))
580 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
582 if (prev && prev->start + prev->len >= hole->start) {
583 hole->len = hole->start + hole->len - prev->start;
584 hole->start = prev->start;
585 rb_erase(&prev->node, holes);
590 /* iterate merge with next holes */
592 if (!rb_next(&hole->node))
594 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
596 if (hole->start + hole->len >= next->start) {
597 if (hole->start + hole->len <= next->start + next->len)
598 hole->len = next->start + next->len -
600 rb_erase(&next->node, holes);
609 static int compare_hole_range(struct rb_node *node, void *data)
611 struct file_extent_hole *hole;
614 hole = (struct file_extent_hole *)data;
617 hole = rb_entry(node, struct file_extent_hole, node);
618 if (start < hole->start)
620 if (start >= hole->start && start < hole->start + hole->len)
626 * Delete a hole in the record
628 * This will do the hole split and is much restrict than add.
630 static int del_file_extent_hole(struct rb_root *holes,
633 struct file_extent_hole *hole;
634 struct file_extent_hole tmp;
639 struct rb_node *node;
646 node = rb_search(holes, &tmp, compare_hole_range, NULL);
649 hole = rb_entry(node, struct file_extent_hole, node);
650 if (start + len > hole->start + hole->len)
654 * Now there will be no overlap, delete the hole and re-add the
655 * split(s) if they exists.
657 if (start > hole->start) {
658 prev_start = hole->start;
659 prev_len = start - hole->start;
662 if (hole->start + hole->len > start + len) {
663 next_start = start + len;
664 next_len = hole->start + hole->len - start - len;
667 rb_erase(node, holes);
670 ret = add_file_extent_hole(holes, prev_start, prev_len);
675 ret = add_file_extent_hole(holes, next_start, next_len);
682 static int copy_file_extent_holes(struct rb_root *dst,
685 struct file_extent_hole *hole;
686 struct rb_node *node;
689 node = rb_first(src);
691 hole = rb_entry(node, struct file_extent_hole, node);
692 ret = add_file_extent_hole(dst, hole->start, hole->len);
695 node = rb_next(node);
700 static void free_file_extent_holes(struct rb_root *holes)
702 struct rb_node *node;
703 struct file_extent_hole *hole;
705 node = rb_first(holes);
707 hole = rb_entry(node, struct file_extent_hole, node);
708 rb_erase(node, holes);
710 node = rb_first(holes);
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717 struct btrfs_root *root)
719 if (root->last_trans != trans->transid) {
720 root->track_dirty = 1;
721 root->last_trans = trans->transid;
722 root->commit_root = root->node;
723 extent_buffer_get(root->node);
727 static u8 imode_to_type(u32 imode)
730 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
732 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
733 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
734 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
735 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
736 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
737 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
740 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
746 struct device_record *rec1;
747 struct device_record *rec2;
749 rec1 = rb_entry(node1, struct device_record, node);
750 rec2 = rb_entry(node2, struct device_record, node);
751 if (rec1->devid > rec2->devid)
753 else if (rec1->devid < rec2->devid)
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
761 struct inode_record *rec;
762 struct inode_backref *backref;
763 struct inode_backref *orig;
764 struct inode_backref *tmp;
765 struct orphan_data_extent *src_orphan;
766 struct orphan_data_extent *dst_orphan;
771 rec = malloc(sizeof(*rec));
773 return ERR_PTR(-ENOMEM);
774 memcpy(rec, orig_rec, sizeof(*rec));
776 INIT_LIST_HEAD(&rec->backrefs);
777 INIT_LIST_HEAD(&rec->orphan_extents);
778 rec->holes = RB_ROOT;
780 list_for_each_entry(orig, &orig_rec->backrefs, list) {
781 size = sizeof(*orig) + orig->namelen + 1;
782 backref = malloc(size);
787 memcpy(backref, orig, size);
788 list_add_tail(&backref->list, &rec->backrefs);
790 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791 dst_orphan = malloc(sizeof(*dst_orphan));
796 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
799 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
806 rb = rb_first(&rec->holes);
808 struct file_extent_hole *hole;
810 hole = rb_entry(rb, struct file_extent_hole, node);
816 if (!list_empty(&rec->backrefs))
817 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818 list_del(&orig->list);
822 if (!list_empty(&rec->orphan_extents))
823 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824 list_del(&orig->list);
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
836 struct orphan_data_extent *orphan;
838 if (list_empty(orphan_extents))
840 printf("The following data extent is lost in tree %llu:\n",
842 list_for_each_entry(orphan, orphan_extents, list) {
843 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844 orphan->objectid, orphan->offset, orphan->disk_bytenr,
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
851 u64 root_objectid = root->root_key.objectid;
852 int errors = rec->errors;
856 /* reloc root errors, we print its corresponding fs root objectid*/
857 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858 root_objectid = root->root_key.offset;
859 fprintf(stderr, "reloc");
861 fprintf(stderr, "root %llu inode %llu errors %x",
862 (unsigned long long) root_objectid,
863 (unsigned long long) rec->ino, rec->errors);
865 if (errors & I_ERR_NO_INODE_ITEM)
866 fprintf(stderr, ", no inode item");
867 if (errors & I_ERR_NO_ORPHAN_ITEM)
868 fprintf(stderr, ", no orphan item");
869 if (errors & I_ERR_DUP_INODE_ITEM)
870 fprintf(stderr, ", dup inode item");
871 if (errors & I_ERR_DUP_DIR_INDEX)
872 fprintf(stderr, ", dup dir index");
873 if (errors & I_ERR_ODD_DIR_ITEM)
874 fprintf(stderr, ", odd dir item");
875 if (errors & I_ERR_ODD_FILE_EXTENT)
876 fprintf(stderr, ", odd file extent");
877 if (errors & I_ERR_BAD_FILE_EXTENT)
878 fprintf(stderr, ", bad file extent");
879 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880 fprintf(stderr, ", file extent overlap");
881 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882 fprintf(stderr, ", file extent discount");
883 if (errors & I_ERR_DIR_ISIZE_WRONG)
884 fprintf(stderr, ", dir isize wrong");
885 if (errors & I_ERR_FILE_NBYTES_WRONG)
886 fprintf(stderr, ", nbytes wrong");
887 if (errors & I_ERR_ODD_CSUM_ITEM)
888 fprintf(stderr, ", odd csum item");
889 if (errors & I_ERR_SOME_CSUM_MISSING)
890 fprintf(stderr, ", some csum missing");
891 if (errors & I_ERR_LINK_COUNT_WRONG)
892 fprintf(stderr, ", link count wrong");
893 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894 fprintf(stderr, ", orphan file extent");
895 fprintf(stderr, "\n");
896 /* Print the orphan extents if needed */
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
900 /* Print the holes if needed */
901 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902 struct file_extent_hole *hole;
903 struct rb_node *node;
906 node = rb_first(&rec->holes);
907 fprintf(stderr, "Found file extent holes:\n");
910 hole = rb_entry(node, struct file_extent_hole, node);
911 fprintf(stderr, "\tstart: %llu, len: %llu\n",
912 hole->start, hole->len);
913 node = rb_next(node);
916 fprintf(stderr, "\tstart: 0, len: %llu\n",
918 root->fs_info->sectorsize));
922 static void print_ref_error(int errors)
924 if (errors & REF_ERR_NO_DIR_ITEM)
925 fprintf(stderr, ", no dir item");
926 if (errors & REF_ERR_NO_DIR_INDEX)
927 fprintf(stderr, ", no dir index");
928 if (errors & REF_ERR_NO_INODE_REF)
929 fprintf(stderr, ", no inode ref");
930 if (errors & REF_ERR_DUP_DIR_ITEM)
931 fprintf(stderr, ", dup dir item");
932 if (errors & REF_ERR_DUP_DIR_INDEX)
933 fprintf(stderr, ", dup dir index");
934 if (errors & REF_ERR_DUP_INODE_REF)
935 fprintf(stderr, ", dup inode ref");
936 if (errors & REF_ERR_INDEX_UNMATCH)
937 fprintf(stderr, ", index mismatch");
938 if (errors & REF_ERR_FILETYPE_UNMATCH)
939 fprintf(stderr, ", filetype mismatch");
940 if (errors & REF_ERR_NAME_TOO_LONG)
941 fprintf(stderr, ", name too long");
942 if (errors & REF_ERR_NO_ROOT_REF)
943 fprintf(stderr, ", no root ref");
944 if (errors & REF_ERR_NO_ROOT_BACKREF)
945 fprintf(stderr, ", no root backref");
946 if (errors & REF_ERR_DUP_ROOT_REF)
947 fprintf(stderr, ", dup root ref");
948 if (errors & REF_ERR_DUP_ROOT_BACKREF)
949 fprintf(stderr, ", dup root backref");
950 fprintf(stderr, "\n");
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956 struct ptr_node *node;
957 struct cache_extent *cache;
958 struct inode_record *rec = NULL;
961 cache = lookup_cache_extent(inode_cache, ino, 1);
963 node = container_of(cache, struct ptr_node, cache);
965 if (mod && rec->refs > 1) {
966 node->data = clone_inode_rec(rec);
967 if (IS_ERR(node->data))
973 rec = calloc(1, sizeof(*rec));
975 return ERR_PTR(-ENOMEM);
977 rec->extent_start = (u64)-1;
979 INIT_LIST_HEAD(&rec->backrefs);
980 INIT_LIST_HEAD(&rec->orphan_extents);
981 rec->holes = RB_ROOT;
983 node = malloc(sizeof(*node));
986 return ERR_PTR(-ENOMEM);
988 node->cache.start = ino;
989 node->cache.size = 1;
992 if (ino == BTRFS_FREE_INO_OBJECTID)
995 ret = insert_cache_extent(inode_cache, &node->cache);
997 return ERR_PTR(-EEXIST);
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1004 struct orphan_data_extent *orphan;
1006 while (!list_empty(orphan_extents)) {
1007 orphan = list_entry(orphan_extents->next,
1008 struct orphan_data_extent, list);
1009 list_del(&orphan->list);
1014 static void free_inode_rec(struct inode_record *rec)
1016 struct inode_backref *backref;
1018 if (--rec->refs > 0)
1021 while (!list_empty(&rec->backrefs)) {
1022 backref = to_inode_backref(rec->backrefs.next);
1023 list_del(&backref->list);
1026 free_orphan_data_extents(&rec->orphan_extents);
1027 free_file_extent_holes(&rec->holes);
1031 static int can_free_inode_rec(struct inode_record *rec)
1033 if (!rec->errors && rec->checked && rec->found_inode_item &&
1034 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040 struct inode_record *rec)
1042 struct cache_extent *cache;
1043 struct inode_backref *tmp, *backref;
1044 struct ptr_node *node;
1047 if (!rec->found_inode_item)
1050 filetype = imode_to_type(rec->imode);
1051 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052 if (backref->found_dir_item && backref->found_dir_index) {
1053 if (backref->filetype != filetype)
1054 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055 if (!backref->errors && backref->found_inode_ref &&
1056 rec->nlink == rec->found_link) {
1057 list_del(&backref->list);
1063 if (!rec->checked || rec->merging)
1066 if (S_ISDIR(rec->imode)) {
1067 if (rec->found_size != rec->isize)
1068 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069 if (rec->found_file_extent)
1070 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072 if (rec->found_dir_item)
1073 rec->errors |= I_ERR_ODD_DIR_ITEM;
1074 if (rec->found_size != rec->nbytes)
1075 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076 if (rec->nlink > 0 && !no_holes &&
1077 (rec->extent_end < rec->isize ||
1078 first_extent_gap(&rec->holes) < rec->isize))
1079 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083 if (rec->found_csum_item && rec->nodatasum)
1084 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085 if (rec->some_csum_missing && !rec->nodatasum)
1086 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089 BUG_ON(rec->refs != 1);
1090 if (can_free_inode_rec(rec)) {
1091 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092 node = container_of(cache, struct ptr_node, cache);
1093 BUG_ON(node->data != rec);
1094 remove_cache_extent(inode_cache, &node->cache);
1096 free_inode_rec(rec);
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1102 struct btrfs_path path;
1103 struct btrfs_key key;
1106 key.objectid = BTRFS_ORPHAN_OBJECTID;
1107 key.type = BTRFS_ORPHAN_ITEM_KEY;
1110 btrfs_init_path(&path);
1111 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112 btrfs_release_path(&path);
1118 static int process_inode_item(struct extent_buffer *eb,
1119 int slot, struct btrfs_key *key,
1120 struct shared_node *active_node)
1122 struct inode_record *rec;
1123 struct btrfs_inode_item *item;
1125 rec = active_node->current;
1126 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127 if (rec->found_inode_item) {
1128 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132 rec->nlink = btrfs_inode_nlink(eb, item);
1133 rec->isize = btrfs_inode_size(eb, item);
1134 rec->nbytes = btrfs_inode_nbytes(eb, item);
1135 rec->imode = btrfs_inode_mode(eb, item);
1136 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1138 rec->found_inode_item = 1;
1139 if (rec->nlink == 0)
1140 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141 maybe_free_inode_rec(&active_node->inode_cache, rec);
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1147 int namelen, u64 dir)
1149 struct inode_backref *backref;
1151 list_for_each_entry(backref, &rec->backrefs, list) {
1152 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1154 if (backref->dir != dir || backref->namelen != namelen)
1156 if (memcmp(name, backref->name, namelen))
1161 backref = malloc(sizeof(*backref) + namelen + 1);
1164 memset(backref, 0, sizeof(*backref));
1166 backref->namelen = namelen;
1167 memcpy(backref->name, name, namelen);
1168 backref->name[namelen] = '\0';
1169 list_add_tail(&backref->list, &rec->backrefs);
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174 u64 ino, u64 dir, u64 index,
1175 const char *name, int namelen,
1176 u8 filetype, u8 itemtype, int errors)
1178 struct inode_record *rec;
1179 struct inode_backref *backref;
1181 rec = get_inode_rec(inode_cache, ino, 1);
1182 BUG_ON(IS_ERR(rec));
1183 backref = get_inode_backref(rec, name, namelen, dir);
1186 backref->errors |= errors;
1187 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188 if (backref->found_dir_index)
1189 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190 if (backref->found_inode_ref && backref->index != index)
1191 backref->errors |= REF_ERR_INDEX_UNMATCH;
1192 if (backref->found_dir_item && backref->filetype != filetype)
1193 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1195 backref->index = index;
1196 backref->filetype = filetype;
1197 backref->found_dir_index = 1;
1198 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1200 if (backref->found_dir_item)
1201 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202 if (backref->found_dir_index && backref->filetype != filetype)
1203 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1205 backref->filetype = filetype;
1206 backref->found_dir_item = 1;
1207 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209 if (backref->found_inode_ref)
1210 backref->errors |= REF_ERR_DUP_INODE_REF;
1211 if (backref->found_dir_index && backref->index != index)
1212 backref->errors |= REF_ERR_INDEX_UNMATCH;
1214 backref->index = index;
1216 backref->ref_type = itemtype;
1217 backref->found_inode_ref = 1;
1222 maybe_free_inode_rec(inode_cache, rec);
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227 struct cache_tree *dst_cache)
1229 struct inode_backref *backref;
1234 list_for_each_entry(backref, &src->backrefs, list) {
1235 if (backref->found_dir_index) {
1236 add_inode_backref(dst_cache, dst->ino, backref->dir,
1237 backref->index, backref->name,
1238 backref->namelen, backref->filetype,
1239 BTRFS_DIR_INDEX_KEY, backref->errors);
1241 if (backref->found_dir_item) {
1243 add_inode_backref(dst_cache, dst->ino,
1244 backref->dir, 0, backref->name,
1245 backref->namelen, backref->filetype,
1246 BTRFS_DIR_ITEM_KEY, backref->errors);
1248 if (backref->found_inode_ref) {
1249 add_inode_backref(dst_cache, dst->ino,
1250 backref->dir, backref->index,
1251 backref->name, backref->namelen, 0,
1252 backref->ref_type, backref->errors);
1256 if (src->found_dir_item)
1257 dst->found_dir_item = 1;
1258 if (src->found_file_extent)
1259 dst->found_file_extent = 1;
1260 if (src->found_csum_item)
1261 dst->found_csum_item = 1;
1262 if (src->some_csum_missing)
1263 dst->some_csum_missing = 1;
1264 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270 BUG_ON(src->found_link < dir_count);
1271 dst->found_link += src->found_link - dir_count;
1272 dst->found_size += src->found_size;
1273 if (src->extent_start != (u64)-1) {
1274 if (dst->extent_start == (u64)-1) {
1275 dst->extent_start = src->extent_start;
1276 dst->extent_end = src->extent_end;
1278 if (dst->extent_end > src->extent_start)
1279 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280 else if (dst->extent_end < src->extent_start) {
1281 ret = add_file_extent_hole(&dst->holes,
1283 src->extent_start - dst->extent_end);
1285 if (dst->extent_end < src->extent_end)
1286 dst->extent_end = src->extent_end;
1290 dst->errors |= src->errors;
1291 if (src->found_inode_item) {
1292 if (!dst->found_inode_item) {
1293 dst->nlink = src->nlink;
1294 dst->isize = src->isize;
1295 dst->nbytes = src->nbytes;
1296 dst->imode = src->imode;
1297 dst->nodatasum = src->nodatasum;
1298 dst->found_inode_item = 1;
1300 dst->errors |= I_ERR_DUP_INODE_ITEM;
1308 static int splice_shared_node(struct shared_node *src_node,
1309 struct shared_node *dst_node)
1311 struct cache_extent *cache;
1312 struct ptr_node *node, *ins;
1313 struct cache_tree *src, *dst;
1314 struct inode_record *rec, *conflict;
1315 u64 current_ino = 0;
1319 if (--src_node->refs == 0)
1321 if (src_node->current)
1322 current_ino = src_node->current->ino;
1324 src = &src_node->root_cache;
1325 dst = &dst_node->root_cache;
1327 cache = search_cache_extent(src, 0);
1329 node = container_of(cache, struct ptr_node, cache);
1331 cache = next_cache_extent(cache);
1334 remove_cache_extent(src, &node->cache);
1337 ins = malloc(sizeof(*ins));
1339 ins->cache.start = node->cache.start;
1340 ins->cache.size = node->cache.size;
1344 ret = insert_cache_extent(dst, &ins->cache);
1345 if (ret == -EEXIST) {
1346 conflict = get_inode_rec(dst, rec->ino, 1);
1347 BUG_ON(IS_ERR(conflict));
1348 merge_inode_recs(rec, conflict, dst);
1350 conflict->checked = 1;
1351 if (dst_node->current == conflict)
1352 dst_node->current = NULL;
1354 maybe_free_inode_rec(dst, conflict);
1355 free_inode_rec(rec);
1362 if (src == &src_node->root_cache) {
1363 src = &src_node->inode_cache;
1364 dst = &dst_node->inode_cache;
1368 if (current_ino > 0 && (!dst_node->current ||
1369 current_ino > dst_node->current->ino)) {
1370 if (dst_node->current) {
1371 dst_node->current->checked = 1;
1372 maybe_free_inode_rec(dst, dst_node->current);
1374 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375 BUG_ON(IS_ERR(dst_node->current));
1380 static void free_inode_ptr(struct cache_extent *cache)
1382 struct ptr_node *node;
1383 struct inode_record *rec;
1385 node = container_of(cache, struct ptr_node, cache);
1387 free_inode_rec(rec);
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396 struct cache_extent *cache;
1397 struct shared_node *node;
1399 cache = lookup_cache_extent(shared, bytenr, 1);
1401 node = container_of(cache, struct shared_node, cache);
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 struct shared_node *node;
1412 node = calloc(1, sizeof(*node));
1415 node->cache.start = bytenr;
1416 node->cache.size = 1;
1417 cache_tree_init(&node->root_cache);
1418 cache_tree_init(&node->inode_cache);
1421 ret = insert_cache_extent(shared, &node->cache);
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427 struct walk_control *wc, int level)
1429 struct shared_node *node;
1430 struct shared_node *dest;
1433 if (level == wc->active_node)
1436 BUG_ON(wc->active_node <= level);
1437 node = find_shared_node(&wc->shared, bytenr);
1439 ret = add_shared_node(&wc->shared, bytenr, refs);
1441 node = find_shared_node(&wc->shared, bytenr);
1442 wc->nodes[level] = node;
1443 wc->active_node = level;
1447 if (wc->root_level == wc->active_node &&
1448 btrfs_root_refs(&root->root_item) == 0) {
1449 if (--node->refs == 0) {
1450 free_inode_recs_tree(&node->root_cache);
1451 free_inode_recs_tree(&node->inode_cache);
1452 remove_cache_extent(&wc->shared, &node->cache);
1458 dest = wc->nodes[wc->active_node];
1459 splice_shared_node(node, dest);
1460 if (node->refs == 0) {
1461 remove_cache_extent(&wc->shared, &node->cache);
1467 static int leave_shared_node(struct btrfs_root *root,
1468 struct walk_control *wc, int level)
1470 struct shared_node *node;
1471 struct shared_node *dest;
1474 if (level == wc->root_level)
1477 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481 BUG_ON(i >= BTRFS_MAX_LEVEL);
1483 node = wc->nodes[wc->active_node];
1484 wc->nodes[wc->active_node] = NULL;
1485 wc->active_node = i;
1487 dest = wc->nodes[wc->active_node];
1488 if (wc->active_node < wc->root_level ||
1489 btrfs_root_refs(&root->root_item) > 0) {
1490 BUG_ON(node->refs <= 1);
1491 splice_shared_node(node, dest);
1493 BUG_ON(node->refs < 2);
1502 * 1 - if the root with id child_root_id is a child of root parent_root_id
1503 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1504 * has other root(s) as parent(s)
1505 * 2 - if the root child_root_id doesn't have any parent roots
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510 struct btrfs_path path;
1511 struct btrfs_key key;
1512 struct extent_buffer *leaf;
1516 btrfs_init_path(&path);
1518 key.objectid = parent_root_id;
1519 key.type = BTRFS_ROOT_REF_KEY;
1520 key.offset = child_root_id;
1521 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525 btrfs_release_path(&path);
1529 key.objectid = child_root_id;
1530 key.type = BTRFS_ROOT_BACKREF_KEY;
1532 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1538 leaf = path.nodes[0];
1539 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543 leaf = path.nodes[0];
1546 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547 if (key.objectid != child_root_id ||
1548 key.type != BTRFS_ROOT_BACKREF_KEY)
1553 if (key.offset == parent_root_id) {
1554 btrfs_release_path(&path);
1561 btrfs_release_path(&path);
1564 return has_parent ? 0 : 2;
1567 static int process_dir_item(struct extent_buffer *eb,
1568 int slot, struct btrfs_key *key,
1569 struct shared_node *active_node)
1579 struct btrfs_dir_item *di;
1580 struct inode_record *rec;
1581 struct cache_tree *root_cache;
1582 struct cache_tree *inode_cache;
1583 struct btrfs_key location;
1584 char namebuf[BTRFS_NAME_LEN];
1586 root_cache = &active_node->root_cache;
1587 inode_cache = &active_node->inode_cache;
1588 rec = active_node->current;
1589 rec->found_dir_item = 1;
1591 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592 total = btrfs_item_size_nr(eb, slot);
1593 while (cur < total) {
1595 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596 name_len = btrfs_dir_name_len(eb, di);
1597 data_len = btrfs_dir_data_len(eb, di);
1598 filetype = btrfs_dir_type(eb, di);
1600 rec->found_size += name_len;
1601 if (cur + sizeof(*di) + name_len > total ||
1602 name_len > BTRFS_NAME_LEN) {
1603 error = REF_ERR_NAME_TOO_LONG;
1605 if (cur + sizeof(*di) > total)
1607 len = min_t(u32, total - cur - sizeof(*di),
1614 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1616 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617 key->offset != btrfs_name_hash(namebuf, len)) {
1618 rec->errors |= I_ERR_ODD_DIR_ITEM;
1619 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620 key->objectid, key->offset, namebuf, len, filetype,
1621 key->offset, btrfs_name_hash(namebuf, len));
1624 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625 add_inode_backref(inode_cache, location.objectid,
1626 key->objectid, key->offset, namebuf,
1627 len, filetype, key->type, error);
1628 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629 add_inode_backref(root_cache, location.objectid,
1630 key->objectid, key->offset,
1631 namebuf, len, filetype,
1634 fprintf(stderr, "invalid location in dir item %u\n",
1636 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637 key->objectid, key->offset, namebuf,
1638 len, filetype, key->type, error);
1641 len = sizeof(*di) + name_len + data_len;
1642 di = (struct btrfs_dir_item *)((char *)di + len);
1645 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646 rec->errors |= I_ERR_DUP_DIR_INDEX;
1651 static int process_inode_ref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1661 struct cache_tree *inode_cache;
1662 struct btrfs_inode_ref *ref;
1663 char namebuf[BTRFS_NAME_LEN];
1665 inode_cache = &active_node->inode_cache;
1667 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668 total = btrfs_item_size_nr(eb, slot);
1669 while (cur < total) {
1670 name_len = btrfs_inode_ref_name_len(eb, ref);
1671 index = btrfs_inode_ref_index(eb, ref);
1673 /* inode_ref + namelen should not cross item boundary */
1674 if (cur + sizeof(*ref) + name_len > total ||
1675 name_len > BTRFS_NAME_LEN) {
1676 if (total < cur + sizeof(*ref))
1679 /* Still try to read out the remaining part */
1680 len = min_t(u32, total - cur - sizeof(*ref),
1682 error = REF_ERR_NAME_TOO_LONG;
1688 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689 add_inode_backref(inode_cache, key->objectid, key->offset,
1690 index, namebuf, len, 0, key->type, error);
1692 len = sizeof(*ref) + name_len;
1693 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1699 static int process_inode_extref(struct extent_buffer *eb,
1700 int slot, struct btrfs_key *key,
1701 struct shared_node *active_node)
1710 struct cache_tree *inode_cache;
1711 struct btrfs_inode_extref *extref;
1712 char namebuf[BTRFS_NAME_LEN];
1714 inode_cache = &active_node->inode_cache;
1716 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717 total = btrfs_item_size_nr(eb, slot);
1718 while (cur < total) {
1719 name_len = btrfs_inode_extref_name_len(eb, extref);
1720 index = btrfs_inode_extref_index(eb, extref);
1721 parent = btrfs_inode_extref_parent(eb, extref);
1722 if (name_len <= BTRFS_NAME_LEN) {
1726 len = BTRFS_NAME_LEN;
1727 error = REF_ERR_NAME_TOO_LONG;
1729 read_extent_buffer(eb, namebuf,
1730 (unsigned long)(extref + 1), len);
1731 add_inode_backref(inode_cache, key->objectid, parent,
1732 index, namebuf, len, 0, key->type, error);
1734 len = sizeof(*extref) + name_len;
1735 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743 u64 len, u64 *found)
1745 struct btrfs_key key;
1746 struct btrfs_path path;
1747 struct extent_buffer *leaf;
1752 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1754 btrfs_init_path(&path);
1756 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1758 key.type = BTRFS_EXTENT_CSUM_KEY;
1760 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764 if (ret > 0 && path.slots[0] > 0) {
1765 leaf = path.nodes[0];
1766 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768 key.type == BTRFS_EXTENT_CSUM_KEY)
1773 leaf = path.nodes[0];
1774 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780 leaf = path.nodes[0];
1783 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785 key.type != BTRFS_EXTENT_CSUM_KEY)
1788 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789 if (key.offset >= start + len)
1792 if (key.offset > start)
1795 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796 csum_end = key.offset + (size / csum_size) *
1797 root->fs_info->sectorsize;
1798 if (csum_end > start) {
1799 size = min(csum_end - start, len);
1808 btrfs_release_path(&path);
1814 static int process_file_extent(struct btrfs_root *root,
1815 struct extent_buffer *eb,
1816 int slot, struct btrfs_key *key,
1817 struct shared_node *active_node)
1819 struct inode_record *rec;
1820 struct btrfs_file_extent_item *fi;
1822 u64 disk_bytenr = 0;
1823 u64 extent_offset = 0;
1824 u64 mask = root->fs_info->sectorsize - 1;
1828 rec = active_node->current;
1829 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830 rec->found_file_extent = 1;
1832 if (rec->extent_start == (u64)-1) {
1833 rec->extent_start = key->offset;
1834 rec->extent_end = key->offset;
1837 if (rec->extent_end > key->offset)
1838 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839 else if (rec->extent_end < key->offset) {
1840 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841 key->offset - rec->extent_end);
1846 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847 extent_type = btrfs_file_extent_type(eb, fi);
1849 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1852 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853 rec->found_size += num_bytes;
1854 num_bytes = (num_bytes + mask) & ~mask;
1855 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859 extent_offset = btrfs_file_extent_offset(eb, fi);
1860 if (num_bytes == 0 || (num_bytes & mask))
1861 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862 if (num_bytes + extent_offset >
1863 btrfs_file_extent_ram_bytes(eb, fi))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866 (btrfs_file_extent_compression(eb, fi) ||
1867 btrfs_file_extent_encryption(eb, fi) ||
1868 btrfs_file_extent_other_encoding(eb, fi)))
1869 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870 if (disk_bytenr > 0)
1871 rec->found_size += num_bytes;
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1875 rec->extent_end = key->offset + num_bytes;
1878 * The data reloc tree will copy full extents into its inode and then
1879 * copy the corresponding csums. Because the extent it copied could be
1880 * a preallocated extent that hasn't been written to yet there may be no
1881 * csums to copy, ergo we won't have csums for our file extent. This is
1882 * ok so just don't bother checking csums if the inode belongs to the
1885 if (disk_bytenr > 0 &&
1886 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1888 if (btrfs_file_extent_compression(eb, fi))
1889 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1891 disk_bytenr += extent_offset;
1893 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1898 rec->found_csum_item = 1;
1899 if (found < num_bytes)
1900 rec->some_csum_missing = 1;
1901 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1903 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910 struct walk_control *wc)
1912 struct btrfs_key key;
1916 struct cache_tree *inode_cache;
1917 struct shared_node *active_node;
1919 if (wc->root_level == wc->active_node &&
1920 btrfs_root_refs(&root->root_item) == 0)
1923 active_node = wc->nodes[wc->active_node];
1924 inode_cache = &active_node->inode_cache;
1925 nritems = btrfs_header_nritems(eb);
1926 for (i = 0; i < nritems; i++) {
1927 btrfs_item_key_to_cpu(eb, &key, i);
1929 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1931 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934 if (active_node->current == NULL ||
1935 active_node->current->ino < key.objectid) {
1936 if (active_node->current) {
1937 active_node->current->checked = 1;
1938 maybe_free_inode_rec(inode_cache,
1939 active_node->current);
1941 active_node->current = get_inode_rec(inode_cache,
1943 BUG_ON(IS_ERR(active_node->current));
1946 case BTRFS_DIR_ITEM_KEY:
1947 case BTRFS_DIR_INDEX_KEY:
1948 ret = process_dir_item(eb, i, &key, active_node);
1950 case BTRFS_INODE_REF_KEY:
1951 ret = process_inode_ref(eb, i, &key, active_node);
1953 case BTRFS_INODE_EXTREF_KEY:
1954 ret = process_inode_extref(eb, i, &key, active_node);
1956 case BTRFS_INODE_ITEM_KEY:
1957 ret = process_inode_item(eb, i, &key, active_node);
1959 case BTRFS_EXTENT_DATA_KEY:
1960 ret = process_file_extent(root, eb, i, &key,
1971 u64 bytenr[BTRFS_MAX_LEVEL];
1972 u64 refs[BTRFS_MAX_LEVEL];
1973 int need_check[BTRFS_MAX_LEVEL];
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977 struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979 unsigned int ext_ref);
1982 * Returns >0 Found error, not fatal, should continue
1983 * Returns <0 Fatal error, must exit the whole check
1984 * Returns 0 No errors found
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987 struct node_refs *nrefs, int *level, int ext_ref)
1989 struct extent_buffer *cur = path->nodes[0];
1990 struct btrfs_key key;
1994 int root_level = btrfs_header_level(root->node);
1996 int ret = 0; /* Final return value */
1997 int err = 0; /* Positive error bitmap */
1999 cur_bytenr = cur->start;
2001 /* skip to first inode item or the first inode number change */
2002 nritems = btrfs_header_nritems(cur);
2003 for (i = 0; i < nritems; i++) {
2004 btrfs_item_key_to_cpu(cur, &key, i);
2006 first_ino = key.objectid;
2007 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008 (first_ino && first_ino != key.objectid))
2012 path->slots[0] = nritems;
2018 err |= check_inode_item(root, path, ext_ref);
2020 /* modify cur since check_inode_item may change path */
2021 cur = path->nodes[0];
2023 if (err & LAST_ITEM)
2026 /* still have inode items in thie leaf */
2027 if (cur->start == cur_bytenr)
2031 * we have switched to another leaf, above nodes may
2032 * have changed, here walk down the path, if a node
2033 * or leaf is shared, check whether we can skip this
2036 for (i = root_level; i >= 0; i--) {
2037 if (path->nodes[i]->start == nrefs->bytenr[i])
2040 ret = update_nodes_refs(root,
2041 path->nodes[i]->start,
2046 if (!nrefs->need_check[i]) {
2052 for (i = 0; i < *level; i++) {
2053 free_extent_buffer(path->nodes[i]);
2054 path->nodes[i] = NULL;
2063 static void reada_walk_down(struct btrfs_root *root,
2064 struct extent_buffer *node, int slot)
2066 struct btrfs_fs_info *fs_info = root->fs_info;
2073 level = btrfs_header_level(node);
2077 nritems = btrfs_header_nritems(node);
2078 for (i = slot; i < nritems; i++) {
2079 bytenr = btrfs_node_blockptr(node, i);
2080 ptr_gen = btrfs_node_ptr_generation(node, i);
2081 readahead_tree_block(fs_info, bytenr, ptr_gen);
2086 * Check the child node/leaf by the following condition:
2087 * 1. the first item key of the node/leaf should be the same with the one
2089 * 2. block in parent node should match the child node/leaf.
2090 * 3. generation of parent node and child's header should be consistent.
2092 * Or the child node/leaf pointed by the key in parent is not valid.
2094 * We hope to check leaf owner too, but since subvol may share leaves,
2095 * which makes leaf owner check not so strong, key check should be
2096 * sufficient enough for that case.
2098 static int check_child_node(struct extent_buffer *parent, int slot,
2099 struct extent_buffer *child)
2101 struct btrfs_key parent_key;
2102 struct btrfs_key child_key;
2105 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2106 if (btrfs_header_level(child) == 0)
2107 btrfs_item_key_to_cpu(child, &child_key, 0);
2109 btrfs_node_key_to_cpu(child, &child_key, 0);
2111 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2115 parent_key.objectid, parent_key.type, parent_key.offset,
2116 child_key.objectid, child_key.type, child_key.offset);
2118 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2120 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2121 btrfs_node_blockptr(parent, slot),
2122 btrfs_header_bytenr(child));
2124 if (btrfs_node_ptr_generation(parent, slot) !=
2125 btrfs_header_generation(child)) {
2127 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2128 btrfs_header_generation(child),
2129 btrfs_node_ptr_generation(parent, slot));
2135 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2136 * in every fs or file tree check. Here we find its all root ids, and only check
2137 * it in the fs or file tree which has the smallest root id.
2139 static int need_check(struct btrfs_root *root, struct ulist *roots)
2141 struct rb_node *node;
2142 struct ulist_node *u;
2144 if (roots->nnodes == 1)
2147 node = rb_first(&roots->root);
2148 u = rb_entry(node, struct ulist_node, rb_node);
2150 * current root id is not smallest, we skip it and let it be checked
2151 * in the fs or file tree who hash the smallest root id.
2153 if (root->objectid != u->val)
2160 * for a tree node or leaf, we record its reference count, so later if we still
2161 * process this node or leaf, don't need to compute its reference count again.
2163 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2164 struct node_refs *nrefs, u64 level)
2168 struct ulist *roots;
2170 if (nrefs->bytenr[level] != bytenr) {
2171 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2172 level, 1, &refs, NULL);
2176 nrefs->bytenr[level] = bytenr;
2177 nrefs->refs[level] = refs;
2179 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2184 check = need_check(root, roots);
2186 nrefs->need_check[level] = check;
2188 nrefs->need_check[level] = 1;
2195 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2196 struct walk_control *wc, int *level,
2197 struct node_refs *nrefs)
2199 enum btrfs_tree_block_status status;
2202 struct btrfs_fs_info *fs_info = root->fs_info;
2203 struct extent_buffer *next;
2204 struct extent_buffer *cur;
2208 WARN_ON(*level < 0);
2209 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2211 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2212 refs = nrefs->refs[*level];
2215 ret = btrfs_lookup_extent_info(NULL, root,
2216 path->nodes[*level]->start,
2217 *level, 1, &refs, NULL);
2222 nrefs->bytenr[*level] = path->nodes[*level]->start;
2223 nrefs->refs[*level] = refs;
2227 ret = enter_shared_node(root, path->nodes[*level]->start,
2235 while (*level >= 0) {
2236 WARN_ON(*level < 0);
2237 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238 cur = path->nodes[*level];
2240 if (btrfs_header_level(cur) != *level)
2243 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246 ret = process_one_leaf(root, cur, wc);
2251 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2252 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2254 if (bytenr == nrefs->bytenr[*level - 1]) {
2255 refs = nrefs->refs[*level - 1];
2257 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2258 *level - 1, 1, &refs, NULL);
2262 nrefs->bytenr[*level - 1] = bytenr;
2263 nrefs->refs[*level - 1] = refs;
2268 ret = enter_shared_node(root, bytenr, refs,
2271 path->slots[*level]++;
2276 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2277 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2278 free_extent_buffer(next);
2279 reada_walk_down(root, cur, path->slots[*level]);
2280 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2281 if (!extent_buffer_uptodate(next)) {
2282 struct btrfs_key node_key;
2284 btrfs_node_key_to_cpu(path->nodes[*level],
2286 path->slots[*level]);
2287 btrfs_add_corrupt_extent_record(root->fs_info,
2289 path->nodes[*level]->start,
2290 root->fs_info->nodesize,
2297 ret = check_child_node(cur, path->slots[*level], next);
2299 free_extent_buffer(next);
2304 if (btrfs_is_leaf(next))
2305 status = btrfs_check_leaf(root, NULL, next);
2307 status = btrfs_check_node(root, NULL, next);
2308 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309 free_extent_buffer(next);
2314 *level = *level - 1;
2315 free_extent_buffer(path->nodes[*level]);
2316 path->nodes[*level] = next;
2317 path->slots[*level] = 0;
2320 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2324 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2325 unsigned int ext_ref);
2328 * Returns >0 Found error, should continue
2329 * Returns <0 Fatal error, must exit the whole check
2330 * Returns 0 No errors found
2332 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2333 int *level, struct node_refs *nrefs, int ext_ref)
2335 enum btrfs_tree_block_status status;
2338 struct btrfs_fs_info *fs_info = root->fs_info;
2339 struct extent_buffer *next;
2340 struct extent_buffer *cur;
2343 WARN_ON(*level < 0);
2344 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2346 ret = update_nodes_refs(root, path->nodes[*level]->start,
2351 while (*level >= 0) {
2352 WARN_ON(*level < 0);
2353 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2354 cur = path->nodes[*level];
2356 if (btrfs_header_level(cur) != *level)
2359 if (path->slots[*level] >= btrfs_header_nritems(cur))
2361 /* Don't forgot to check leaf/node validation */
2363 ret = btrfs_check_leaf(root, NULL, cur);
2364 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368 ret = process_one_leaf_v2(root, path, nrefs,
2370 cur = path->nodes[*level];
2373 ret = btrfs_check_node(root, NULL, cur);
2374 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2379 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2380 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2382 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385 if (!nrefs->need_check[*level - 1]) {
2386 path->slots[*level]++;
2390 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392 free_extent_buffer(next);
2393 reada_walk_down(root, cur, path->slots[*level]);
2394 next = read_tree_block(fs_info, bytenr, ptr_gen);
2395 if (!extent_buffer_uptodate(next)) {
2396 struct btrfs_key node_key;
2398 btrfs_node_key_to_cpu(path->nodes[*level],
2400 path->slots[*level]);
2401 btrfs_add_corrupt_extent_record(fs_info,
2403 path->nodes[*level]->start,
2411 ret = check_child_node(cur, path->slots[*level], next);
2415 if (btrfs_is_leaf(next))
2416 status = btrfs_check_leaf(root, NULL, next);
2418 status = btrfs_check_node(root, NULL, next);
2419 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2420 free_extent_buffer(next);
2425 *level = *level - 1;
2426 free_extent_buffer(path->nodes[*level]);
2427 path->nodes[*level] = next;
2428 path->slots[*level] = 0;
2433 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2434 struct walk_control *wc, int *level)
2437 struct extent_buffer *leaf;
2439 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2440 leaf = path->nodes[i];
2441 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2446 free_extent_buffer(path->nodes[*level]);
2447 path->nodes[*level] = NULL;
2448 BUG_ON(*level > wc->active_node);
2449 if (*level == wc->active_node)
2450 leave_shared_node(root, wc, *level);
2457 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2461 struct extent_buffer *leaf;
2463 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2464 leaf = path->nodes[i];
2465 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2470 free_extent_buffer(path->nodes[*level]);
2471 path->nodes[*level] = NULL;
2478 static int check_root_dir(struct inode_record *rec)
2480 struct inode_backref *backref;
2483 if (!rec->found_inode_item || rec->errors)
2485 if (rec->nlink != 1 || rec->found_link != 0)
2487 if (list_empty(&rec->backrefs))
2489 backref = to_inode_backref(rec->backrefs.next);
2490 if (!backref->found_inode_ref)
2492 if (backref->index != 0 || backref->namelen != 2 ||
2493 memcmp(backref->name, "..", 2))
2495 if (backref->found_dir_index || backref->found_dir_item)
2502 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2503 struct btrfs_root *root, struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct btrfs_inode_item *ei;
2507 struct btrfs_key key;
2510 key.objectid = rec->ino;
2511 key.type = BTRFS_INODE_ITEM_KEY;
2512 key.offset = (u64)-1;
2514 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2518 if (!path->slots[0]) {
2525 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2526 if (key.objectid != rec->ino) {
2531 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2532 struct btrfs_inode_item);
2533 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2534 btrfs_mark_buffer_dirty(path->nodes[0]);
2535 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2536 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2537 root->root_key.objectid);
2539 btrfs_release_path(path);
2543 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2544 struct btrfs_root *root,
2545 struct btrfs_path *path,
2546 struct inode_record *rec)
2550 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2551 btrfs_release_path(path);
2553 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2557 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2558 struct btrfs_root *root,
2559 struct btrfs_path *path,
2560 struct inode_record *rec)
2562 struct btrfs_inode_item *ei;
2563 struct btrfs_key key;
2566 key.objectid = rec->ino;
2567 key.type = BTRFS_INODE_ITEM_KEY;
2570 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2577 /* Since ret == 0, no need to check anything */
2578 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2579 struct btrfs_inode_item);
2580 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2581 btrfs_mark_buffer_dirty(path->nodes[0]);
2582 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2583 printf("reset nbytes for ino %llu root %llu\n",
2584 rec->ino, root->root_key.objectid);
2586 btrfs_release_path(path);
2590 static int add_missing_dir_index(struct btrfs_root *root,
2591 struct cache_tree *inode_cache,
2592 struct inode_record *rec,
2593 struct inode_backref *backref)
2595 struct btrfs_path path;
2596 struct btrfs_trans_handle *trans;
2597 struct btrfs_dir_item *dir_item;
2598 struct extent_buffer *leaf;
2599 struct btrfs_key key;
2600 struct btrfs_disk_key disk_key;
2601 struct inode_record *dir_rec;
2602 unsigned long name_ptr;
2603 u32 data_size = sizeof(*dir_item) + backref->namelen;
2606 trans = btrfs_start_transaction(root, 1);
2608 return PTR_ERR(trans);
2610 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2611 (unsigned long long)rec->ino);
2613 btrfs_init_path(&path);
2614 key.objectid = backref->dir;
2615 key.type = BTRFS_DIR_INDEX_KEY;
2616 key.offset = backref->index;
2617 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620 leaf = path.nodes[0];
2621 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2623 disk_key.objectid = cpu_to_le64(rec->ino);
2624 disk_key.type = BTRFS_INODE_ITEM_KEY;
2625 disk_key.offset = 0;
2627 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2628 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2629 btrfs_set_dir_data_len(leaf, dir_item, 0);
2630 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2631 name_ptr = (unsigned long)(dir_item + 1);
2632 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2633 btrfs_mark_buffer_dirty(leaf);
2634 btrfs_release_path(&path);
2635 btrfs_commit_transaction(trans, root);
2637 backref->found_dir_index = 1;
2638 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2639 BUG_ON(IS_ERR(dir_rec));
2642 dir_rec->found_size += backref->namelen;
2643 if (dir_rec->found_size == dir_rec->isize &&
2644 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2645 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2646 if (dir_rec->found_size != dir_rec->isize)
2647 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2652 static int delete_dir_index(struct btrfs_root *root,
2653 struct inode_backref *backref)
2655 struct btrfs_trans_handle *trans;
2656 struct btrfs_dir_item *di;
2657 struct btrfs_path path;
2660 trans = btrfs_start_transaction(root, 1);
2662 return PTR_ERR(trans);
2664 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2665 (unsigned long long)backref->dir,
2666 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2667 (unsigned long long)root->objectid);
2669 btrfs_init_path(&path);
2670 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2671 backref->name, backref->namelen,
2672 backref->index, -1);
2675 btrfs_release_path(&path);
2676 btrfs_commit_transaction(trans, root);
2683 ret = btrfs_del_item(trans, root, &path);
2685 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2687 btrfs_release_path(&path);
2688 btrfs_commit_transaction(trans, root);
2692 static int create_inode_item(struct btrfs_root *root,
2693 struct inode_record *rec,
2696 struct btrfs_trans_handle *trans;
2697 struct btrfs_inode_item inode_item;
2698 time_t now = time(NULL);
2701 trans = btrfs_start_transaction(root, 1);
2702 if (IS_ERR(trans)) {
2703 ret = PTR_ERR(trans);
2707 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2708 "be incomplete, please check permissions and content after "
2709 "the fsck completes.\n", (unsigned long long)root->objectid,
2710 (unsigned long long)rec->ino);
2712 memset(&inode_item, 0, sizeof(inode_item));
2713 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2715 btrfs_set_stack_inode_nlink(&inode_item, 1);
2717 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2718 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2719 if (rec->found_dir_item) {
2720 if (rec->found_file_extent)
2721 fprintf(stderr, "root %llu inode %llu has both a dir "
2722 "item and extents, unsure if it is a dir or a "
2723 "regular file so setting it as a directory\n",
2724 (unsigned long long)root->objectid,
2725 (unsigned long long)rec->ino);
2726 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2727 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2728 } else if (!rec->found_dir_item) {
2729 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2730 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2732 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2733 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2734 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2735 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2736 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2737 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2738 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2739 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2741 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2743 btrfs_commit_transaction(trans, root);
2747 static int repair_inode_backrefs(struct btrfs_root *root,
2748 struct inode_record *rec,
2749 struct cache_tree *inode_cache,
2752 struct inode_backref *tmp, *backref;
2753 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2757 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2758 if (!delete && rec->ino == root_dirid) {
2759 if (!rec->found_inode_item) {
2760 ret = create_inode_item(root, rec, 1);
2767 /* Index 0 for root dir's are special, don't mess with it */
2768 if (rec->ino == root_dirid && backref->index == 0)
2772 ((backref->found_dir_index && !backref->found_inode_ref) ||
2773 (backref->found_dir_index && backref->found_inode_ref &&
2774 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2775 ret = delete_dir_index(root, backref);
2779 list_del(&backref->list);
2784 if (!delete && !backref->found_dir_index &&
2785 backref->found_dir_item && backref->found_inode_ref) {
2786 ret = add_missing_dir_index(root, inode_cache, rec,
2791 if (backref->found_dir_item &&
2792 backref->found_dir_index) {
2793 if (!backref->errors &&
2794 backref->found_inode_ref) {
2795 list_del(&backref->list);
2802 if (!delete && (!backref->found_dir_index &&
2803 !backref->found_dir_item &&
2804 backref->found_inode_ref)) {
2805 struct btrfs_trans_handle *trans;
2806 struct btrfs_key location;
2808 ret = check_dir_conflict(root, backref->name,
2814 * let nlink fixing routine to handle it,
2815 * which can do it better.
2820 location.objectid = rec->ino;
2821 location.type = BTRFS_INODE_ITEM_KEY;
2822 location.offset = 0;
2824 trans = btrfs_start_transaction(root, 1);
2825 if (IS_ERR(trans)) {
2826 ret = PTR_ERR(trans);
2829 fprintf(stderr, "adding missing dir index/item pair "
2831 (unsigned long long)rec->ino);
2832 ret = btrfs_insert_dir_item(trans, root, backref->name,
2834 backref->dir, &location,
2835 imode_to_type(rec->imode),
2838 btrfs_commit_transaction(trans, root);
2842 if (!delete && (backref->found_inode_ref &&
2843 backref->found_dir_index &&
2844 backref->found_dir_item &&
2845 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2846 !rec->found_inode_item)) {
2847 ret = create_inode_item(root, rec, 0);
2854 return ret ? ret : repaired;
2858 * To determine the file type for nlink/inode_item repair
2860 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2861 * Return -ENOENT if file type is not found.
2863 static int find_file_type(struct inode_record *rec, u8 *type)
2865 struct inode_backref *backref;
2867 /* For inode item recovered case */
2868 if (rec->found_inode_item) {
2869 *type = imode_to_type(rec->imode);
2873 list_for_each_entry(backref, &rec->backrefs, list) {
2874 if (backref->found_dir_index || backref->found_dir_item) {
2875 *type = backref->filetype;
2883 * To determine the file name for nlink repair
2885 * Return 0 if file name is found, set name and namelen.
2886 * Return -ENOENT if file name is not found.
2888 static int find_file_name(struct inode_record *rec,
2889 char *name, int *namelen)
2891 struct inode_backref *backref;
2893 list_for_each_entry(backref, &rec->backrefs, list) {
2894 if (backref->found_dir_index || backref->found_dir_item ||
2895 backref->found_inode_ref) {
2896 memcpy(name, backref->name, backref->namelen);
2897 *namelen = backref->namelen;
2904 /* Reset the nlink of the inode to the correct one */
2905 static int reset_nlink(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct inode_backref *backref;
2911 struct inode_backref *tmp;
2912 struct btrfs_key key;
2913 struct btrfs_inode_item *inode_item;
2916 /* We don't believe this either, reset it and iterate backref */
2917 rec->found_link = 0;
2919 /* Remove all backref including the valid ones */
2920 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2921 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2922 backref->index, backref->name,
2923 backref->namelen, 0);
2927 /* remove invalid backref, so it won't be added back */
2928 if (!(backref->found_dir_index &&
2929 backref->found_dir_item &&
2930 backref->found_inode_ref)) {
2931 list_del(&backref->list);
2938 /* Set nlink to 0 */
2939 key.objectid = rec->ino;
2940 key.type = BTRFS_INODE_ITEM_KEY;
2942 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2949 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2950 struct btrfs_inode_item);
2951 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2952 btrfs_mark_buffer_dirty(path->nodes[0]);
2953 btrfs_release_path(path);
2956 * Add back valid inode_ref/dir_item/dir_index,
2957 * add_link() will handle the nlink inc, so new nlink must be correct
2959 list_for_each_entry(backref, &rec->backrefs, list) {
2960 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2961 backref->name, backref->namelen,
2962 backref->filetype, &backref->index, 1);
2967 btrfs_release_path(path);
2971 static int get_highest_inode(struct btrfs_trans_handle *trans,
2972 struct btrfs_root *root,
2973 struct btrfs_path *path,
2976 struct btrfs_key key, found_key;
2979 btrfs_init_path(path);
2980 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2982 key.type = BTRFS_INODE_ITEM_KEY;
2983 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2985 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2986 path->slots[0] - 1);
2987 *highest_ino = found_key.objectid;
2990 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2992 btrfs_release_path(path);
2996 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2997 struct btrfs_root *root,
2998 struct btrfs_path *path,
2999 struct inode_record *rec)
3001 char *dir_name = "lost+found";
3002 char namebuf[BTRFS_NAME_LEN] = {0};
3007 int name_recovered = 0;
3008 int type_recovered = 0;
3012 * Get file name and type first before these invalid inode ref
3013 * are deleted by remove_all_invalid_backref()
3015 name_recovered = !find_file_name(rec, namebuf, &namelen);
3016 type_recovered = !find_file_type(rec, &type);
3018 if (!name_recovered) {
3019 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3020 rec->ino, rec->ino);
3021 namelen = count_digits(rec->ino);
3022 sprintf(namebuf, "%llu", rec->ino);
3025 if (!type_recovered) {
3026 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3028 type = BTRFS_FT_REG_FILE;
3032 ret = reset_nlink(trans, root, path, rec);
3035 "Failed to reset nlink for inode %llu: %s\n",
3036 rec->ino, strerror(-ret));
3040 if (rec->found_link == 0) {
3041 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3045 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3046 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3049 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3050 dir_name, strerror(-ret));
3053 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3054 namebuf, namelen, type, NULL, 1);
3056 * Add ".INO" suffix several times to handle case where
3057 * "FILENAME.INO" is already taken by another file.
3059 while (ret == -EEXIST) {
3061 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3063 if (namelen + count_digits(rec->ino) + 1 >
3068 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3070 namelen += count_digits(rec->ino) + 1;
3071 ret = btrfs_add_link(trans, root, rec->ino,
3072 lost_found_ino, namebuf,
3073 namelen, type, NULL, 1);
3077 "Failed to link the inode %llu to %s dir: %s\n",
3078 rec->ino, dir_name, strerror(-ret));
3082 * Just increase the found_link, don't actually add the
3083 * backref. This will make things easier and this inode
3084 * record will be freed after the repair is done.
3085 * So fsck will not report problem about this inode.
3088 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3089 namelen, namebuf, dir_name);
3091 printf("Fixed the nlink of inode %llu\n", rec->ino);
3094 * Clear the flag anyway, or we will loop forever for the same inode
3095 * as it will not be removed from the bad inode list and the dead loop
3098 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3099 btrfs_release_path(path);
3104 * Check if there is any normal(reg or prealloc) file extent for given
3106 * This is used to determine the file type when neither its dir_index/item or
3107 * inode_item exists.
3109 * This will *NOT* report error, if any error happens, just consider it does
3110 * not have any normal file extent.
3112 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3114 struct btrfs_path path;
3115 struct btrfs_key key;
3116 struct btrfs_key found_key;
3117 struct btrfs_file_extent_item *fi;
3121 btrfs_init_path(&path);
3123 key.type = BTRFS_EXTENT_DATA_KEY;
3126 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3131 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3132 ret = btrfs_next_leaf(root, &path);
3139 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3141 if (found_key.objectid != ino ||
3142 found_key.type != BTRFS_EXTENT_DATA_KEY)
3144 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3145 struct btrfs_file_extent_item);
3146 type = btrfs_file_extent_type(path.nodes[0], fi);
3147 if (type != BTRFS_FILE_EXTENT_INLINE) {
3153 btrfs_release_path(&path);
3157 static u32 btrfs_type_to_imode(u8 type)
3159 static u32 imode_by_btrfs_type[] = {
3160 [BTRFS_FT_REG_FILE] = S_IFREG,
3161 [BTRFS_FT_DIR] = S_IFDIR,
3162 [BTRFS_FT_CHRDEV] = S_IFCHR,
3163 [BTRFS_FT_BLKDEV] = S_IFBLK,
3164 [BTRFS_FT_FIFO] = S_IFIFO,
3165 [BTRFS_FT_SOCK] = S_IFSOCK,
3166 [BTRFS_FT_SYMLINK] = S_IFLNK,
3169 return imode_by_btrfs_type[(type)];
3172 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct inode_record *rec)
3179 int type_recovered = 0;
3182 printf("Trying to rebuild inode:%llu\n", rec->ino);
3184 type_recovered = !find_file_type(rec, &filetype);
3187 * Try to determine inode type if type not found.
3189 * For found regular file extent, it must be FILE.
3190 * For found dir_item/index, it must be DIR.
3192 * For undetermined one, use FILE as fallback.
3195 * 1. If found backref(inode_index/item is already handled) to it,
3197 * Need new inode-inode ref structure to allow search for that.
3199 if (!type_recovered) {
3200 if (rec->found_file_extent &&
3201 find_normal_file_extent(root, rec->ino)) {
3203 filetype = BTRFS_FT_REG_FILE;
3204 } else if (rec->found_dir_item) {
3206 filetype = BTRFS_FT_DIR;
3207 } else if (!list_empty(&rec->orphan_extents)) {
3209 filetype = BTRFS_FT_REG_FILE;
3211 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3214 filetype = BTRFS_FT_REG_FILE;
3218 ret = btrfs_new_inode(trans, root, rec->ino,
3219 mode | btrfs_type_to_imode(filetype));
3224 * Here inode rebuild is done, we only rebuild the inode item,
3225 * don't repair the nlink(like move to lost+found).
3226 * That is the job of nlink repair.
3228 * We just fill the record and return
3230 rec->found_dir_item = 1;
3231 rec->imode = mode | btrfs_type_to_imode(filetype);
3233 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3234 /* Ensure the inode_nlinks repair function will be called */
3235 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3240 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3241 struct btrfs_root *root,
3242 struct btrfs_path *path,
3243 struct inode_record *rec)
3245 struct orphan_data_extent *orphan;
3246 struct orphan_data_extent *tmp;
3249 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3251 * Check for conflicting file extents
3253 * Here we don't know whether the extents is compressed or not,
3254 * so we can only assume it not compressed nor data offset,
3255 * and use its disk_len as extent length.
3257 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3258 orphan->offset, orphan->disk_len, 0);
3259 btrfs_release_path(path);
3264 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3265 orphan->disk_bytenr, orphan->disk_len);
3266 ret = btrfs_free_extent(trans,
3267 root->fs_info->extent_root,
3268 orphan->disk_bytenr, orphan->disk_len,
3269 0, root->objectid, orphan->objectid,
3274 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3275 orphan->offset, orphan->disk_bytenr,
3276 orphan->disk_len, orphan->disk_len);
3280 /* Update file size info */
3281 rec->found_size += orphan->disk_len;
3282 if (rec->found_size == rec->nbytes)
3283 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3285 /* Update the file extent hole info too */
3286 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3290 if (RB_EMPTY_ROOT(&rec->holes))
3291 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3293 list_del(&orphan->list);
3296 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3301 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3302 struct btrfs_root *root,
3303 struct btrfs_path *path,
3304 struct inode_record *rec)
3306 struct rb_node *node;
3307 struct file_extent_hole *hole;
3311 node = rb_first(&rec->holes);
3315 hole = rb_entry(node, struct file_extent_hole, node);
3316 ret = btrfs_punch_hole(trans, root, rec->ino,
3317 hole->start, hole->len);
3320 ret = del_file_extent_hole(&rec->holes, hole->start,
3324 if (RB_EMPTY_ROOT(&rec->holes))
3325 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3326 node = rb_first(&rec->holes);
3328 /* special case for a file losing all its file extent */
3330 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3331 round_up(rec->isize,
3332 root->fs_info->sectorsize));
3336 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3337 rec->ino, root->objectid);
3342 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3344 struct btrfs_trans_handle *trans;
3345 struct btrfs_path path;
3348 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3349 I_ERR_NO_ORPHAN_ITEM |
3350 I_ERR_LINK_COUNT_WRONG |
3351 I_ERR_NO_INODE_ITEM |
3352 I_ERR_FILE_EXTENT_ORPHAN |
3353 I_ERR_FILE_EXTENT_DISCOUNT|
3354 I_ERR_FILE_NBYTES_WRONG)))
3358 * For nlink repair, it may create a dir and add link, so
3359 * 2 for parent(256)'s dir_index and dir_item
3360 * 2 for lost+found dir's inode_item and inode_ref
3361 * 1 for the new inode_ref of the file
3362 * 2 for lost+found dir's dir_index and dir_item for the file
3364 trans = btrfs_start_transaction(root, 7);
3366 return PTR_ERR(trans);
3368 btrfs_init_path(&path);
3369 if (rec->errors & I_ERR_NO_INODE_ITEM)
3370 ret = repair_inode_no_item(trans, root, &path, rec);
3371 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3372 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3373 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3374 ret = repair_inode_discount_extent(trans, root, &path, rec);
3375 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3376 ret = repair_inode_isize(trans, root, &path, rec);
3377 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3378 ret = repair_inode_orphan_item(trans, root, &path, rec);
3379 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3380 ret = repair_inode_nlinks(trans, root, &path, rec);
3381 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3382 ret = repair_inode_nbytes(trans, root, &path, rec);
3383 btrfs_commit_transaction(trans, root);
3384 btrfs_release_path(&path);
3388 static int check_inode_recs(struct btrfs_root *root,
3389 struct cache_tree *inode_cache)
3391 struct cache_extent *cache;
3392 struct ptr_node *node;
3393 struct inode_record *rec;
3394 struct inode_backref *backref;
3399 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3401 if (btrfs_root_refs(&root->root_item) == 0) {
3402 if (!cache_tree_empty(inode_cache))
3403 fprintf(stderr, "warning line %d\n", __LINE__);
3408 * We need to repair backrefs first because we could change some of the
3409 * errors in the inode recs.
3411 * We also need to go through and delete invalid backrefs first and then
3412 * add the correct ones second. We do this because we may get EEXIST
3413 * when adding back the correct index because we hadn't yet deleted the
3416 * For example, if we were missing a dir index then the directories
3417 * isize would be wrong, so if we fixed the isize to what we thought it
3418 * would be and then fixed the backref we'd still have a invalid fs, so
3419 * we need to add back the dir index and then check to see if the isize
3424 if (stage == 3 && !err)
3427 cache = search_cache_extent(inode_cache, 0);
3428 while (repair && cache) {
3429 node = container_of(cache, struct ptr_node, cache);
3431 cache = next_cache_extent(cache);
3433 /* Need to free everything up and rescan */
3435 remove_cache_extent(inode_cache, &node->cache);
3437 free_inode_rec(rec);
3441 if (list_empty(&rec->backrefs))
3444 ret = repair_inode_backrefs(root, rec, inode_cache,
3458 rec = get_inode_rec(inode_cache, root_dirid, 0);
3459 BUG_ON(IS_ERR(rec));
3461 ret = check_root_dir(rec);
3463 fprintf(stderr, "root %llu root dir %llu error\n",
3464 (unsigned long long)root->root_key.objectid,
3465 (unsigned long long)root_dirid);
3466 print_inode_error(root, rec);
3471 struct btrfs_trans_handle *trans;
3473 trans = btrfs_start_transaction(root, 1);
3474 if (IS_ERR(trans)) {
3475 err = PTR_ERR(trans);
3480 "root %llu missing its root dir, recreating\n",
3481 (unsigned long long)root->objectid);
3483 ret = btrfs_make_root_dir(trans, root, root_dirid);
3486 btrfs_commit_transaction(trans, root);
3490 fprintf(stderr, "root %llu root dir %llu not found\n",
3491 (unsigned long long)root->root_key.objectid,
3492 (unsigned long long)root_dirid);
3496 cache = search_cache_extent(inode_cache, 0);
3499 node = container_of(cache, struct ptr_node, cache);
3501 remove_cache_extent(inode_cache, &node->cache);
3503 if (rec->ino == root_dirid ||
3504 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3505 free_inode_rec(rec);
3509 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3510 ret = check_orphan_item(root, rec->ino);
3512 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3513 if (can_free_inode_rec(rec)) {
3514 free_inode_rec(rec);
3519 if (!rec->found_inode_item)
3520 rec->errors |= I_ERR_NO_INODE_ITEM;
3521 if (rec->found_link != rec->nlink)
3522 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3524 ret = try_repair_inode(root, rec);
3525 if (ret == 0 && can_free_inode_rec(rec)) {
3526 free_inode_rec(rec);
3532 if (!(repair && ret == 0))
3534 print_inode_error(root, rec);
3535 list_for_each_entry(backref, &rec->backrefs, list) {
3536 if (!backref->found_dir_item)
3537 backref->errors |= REF_ERR_NO_DIR_ITEM;
3538 if (!backref->found_dir_index)
3539 backref->errors |= REF_ERR_NO_DIR_INDEX;
3540 if (!backref->found_inode_ref)
3541 backref->errors |= REF_ERR_NO_INODE_REF;
3542 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3543 " namelen %u name %s filetype %d errors %x",
3544 (unsigned long long)backref->dir,
3545 (unsigned long long)backref->index,
3546 backref->namelen, backref->name,
3547 backref->filetype, backref->errors);
3548 print_ref_error(backref->errors);
3550 free_inode_rec(rec);
3552 return (error > 0) ? -1 : 0;
3555 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3558 struct cache_extent *cache;
3559 struct root_record *rec = NULL;
3562 cache = lookup_cache_extent(root_cache, objectid, 1);
3564 rec = container_of(cache, struct root_record, cache);
3566 rec = calloc(1, sizeof(*rec));
3568 return ERR_PTR(-ENOMEM);
3569 rec->objectid = objectid;
3570 INIT_LIST_HEAD(&rec->backrefs);
3571 rec->cache.start = objectid;
3572 rec->cache.size = 1;
3574 ret = insert_cache_extent(root_cache, &rec->cache);
3576 return ERR_PTR(-EEXIST);
3581 static struct root_backref *get_root_backref(struct root_record *rec,
3582 u64 ref_root, u64 dir, u64 index,
3583 const char *name, int namelen)
3585 struct root_backref *backref;
3587 list_for_each_entry(backref, &rec->backrefs, list) {
3588 if (backref->ref_root != ref_root || backref->dir != dir ||
3589 backref->namelen != namelen)
3591 if (memcmp(name, backref->name, namelen))
3596 backref = calloc(1, sizeof(*backref) + namelen + 1);
3599 backref->ref_root = ref_root;
3601 backref->index = index;
3602 backref->namelen = namelen;
3603 memcpy(backref->name, name, namelen);
3604 backref->name[namelen] = '\0';
3605 list_add_tail(&backref->list, &rec->backrefs);
3609 static void free_root_record(struct cache_extent *cache)
3611 struct root_record *rec;
3612 struct root_backref *backref;
3614 rec = container_of(cache, struct root_record, cache);
3615 while (!list_empty(&rec->backrefs)) {
3616 backref = to_root_backref(rec->backrefs.next);
3617 list_del(&backref->list);
3624 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3626 static int add_root_backref(struct cache_tree *root_cache,
3627 u64 root_id, u64 ref_root, u64 dir, u64 index,
3628 const char *name, int namelen,
3629 int item_type, int errors)
3631 struct root_record *rec;
3632 struct root_backref *backref;
3634 rec = get_root_rec(root_cache, root_id);
3635 BUG_ON(IS_ERR(rec));
3636 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3639 backref->errors |= errors;
3641 if (item_type != BTRFS_DIR_ITEM_KEY) {
3642 if (backref->found_dir_index || backref->found_back_ref ||
3643 backref->found_forward_ref) {
3644 if (backref->index != index)
3645 backref->errors |= REF_ERR_INDEX_UNMATCH;
3647 backref->index = index;
3651 if (item_type == BTRFS_DIR_ITEM_KEY) {
3652 if (backref->found_forward_ref)
3654 backref->found_dir_item = 1;
3655 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3656 backref->found_dir_index = 1;
3657 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3658 if (backref->found_forward_ref)
3659 backref->errors |= REF_ERR_DUP_ROOT_REF;
3660 else if (backref->found_dir_item)
3662 backref->found_forward_ref = 1;
3663 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3664 if (backref->found_back_ref)
3665 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3666 backref->found_back_ref = 1;
3671 if (backref->found_forward_ref && backref->found_dir_item)
3672 backref->reachable = 1;
3676 static int merge_root_recs(struct btrfs_root *root,
3677 struct cache_tree *src_cache,
3678 struct cache_tree *dst_cache)
3680 struct cache_extent *cache;
3681 struct ptr_node *node;
3682 struct inode_record *rec;
3683 struct inode_backref *backref;
3686 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3687 free_inode_recs_tree(src_cache);
3692 cache = search_cache_extent(src_cache, 0);
3695 node = container_of(cache, struct ptr_node, cache);
3697 remove_cache_extent(src_cache, &node->cache);
3700 ret = is_child_root(root, root->objectid, rec->ino);
3706 list_for_each_entry(backref, &rec->backrefs, list) {
3707 BUG_ON(backref->found_inode_ref);
3708 if (backref->found_dir_item)
3709 add_root_backref(dst_cache, rec->ino,
3710 root->root_key.objectid, backref->dir,
3711 backref->index, backref->name,
3712 backref->namelen, BTRFS_DIR_ITEM_KEY,
3714 if (backref->found_dir_index)
3715 add_root_backref(dst_cache, rec->ino,
3716 root->root_key.objectid, backref->dir,
3717 backref->index, backref->name,
3718 backref->namelen, BTRFS_DIR_INDEX_KEY,
3722 free_inode_rec(rec);
3729 static int check_root_refs(struct btrfs_root *root,
3730 struct cache_tree *root_cache)
3732 struct root_record *rec;
3733 struct root_record *ref_root;
3734 struct root_backref *backref;
3735 struct cache_extent *cache;
3741 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3742 BUG_ON(IS_ERR(rec));
3745 /* fixme: this can not detect circular references */
3748 cache = search_cache_extent(root_cache, 0);
3752 rec = container_of(cache, struct root_record, cache);
3753 cache = next_cache_extent(cache);
3755 if (rec->found_ref == 0)
3758 list_for_each_entry(backref, &rec->backrefs, list) {
3759 if (!backref->reachable)
3762 ref_root = get_root_rec(root_cache,
3764 BUG_ON(IS_ERR(ref_root));
3765 if (ref_root->found_ref > 0)
3768 backref->reachable = 0;
3770 if (rec->found_ref == 0)
3776 cache = search_cache_extent(root_cache, 0);
3780 rec = container_of(cache, struct root_record, cache);
3781 cache = next_cache_extent(cache);
3783 if (rec->found_ref == 0 &&
3784 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3785 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3786 ret = check_orphan_item(root->fs_info->tree_root,
3792 * If we don't have a root item then we likely just have
3793 * a dir item in a snapshot for this root but no actual
3794 * ref key or anything so it's meaningless.
3796 if (!rec->found_root_item)
3799 fprintf(stderr, "fs tree %llu not referenced\n",
3800 (unsigned long long)rec->objectid);
3804 if (rec->found_ref > 0 && !rec->found_root_item)
3806 list_for_each_entry(backref, &rec->backrefs, list) {
3807 if (!backref->found_dir_item)
3808 backref->errors |= REF_ERR_NO_DIR_ITEM;
3809 if (!backref->found_dir_index)
3810 backref->errors |= REF_ERR_NO_DIR_INDEX;
3811 if (!backref->found_back_ref)
3812 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3813 if (!backref->found_forward_ref)
3814 backref->errors |= REF_ERR_NO_ROOT_REF;
3815 if (backref->reachable && backref->errors)
3822 fprintf(stderr, "fs tree %llu refs %u %s\n",
3823 (unsigned long long)rec->objectid, rec->found_ref,
3824 rec->found_root_item ? "" : "not found");
3826 list_for_each_entry(backref, &rec->backrefs, list) {
3827 if (!backref->reachable)
3829 if (!backref->errors && rec->found_root_item)
3831 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3832 " index %llu namelen %u name %s errors %x\n",
3833 (unsigned long long)backref->ref_root,
3834 (unsigned long long)backref->dir,
3835 (unsigned long long)backref->index,
3836 backref->namelen, backref->name,
3838 print_ref_error(backref->errors);
3841 return errors > 0 ? 1 : 0;
3844 static int process_root_ref(struct extent_buffer *eb, int slot,
3845 struct btrfs_key *key,
3846 struct cache_tree *root_cache)
3852 struct btrfs_root_ref *ref;
3853 char namebuf[BTRFS_NAME_LEN];
3856 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3858 dirid = btrfs_root_ref_dirid(eb, ref);
3859 index = btrfs_root_ref_sequence(eb, ref);
3860 name_len = btrfs_root_ref_name_len(eb, ref);
3862 if (name_len <= BTRFS_NAME_LEN) {
3866 len = BTRFS_NAME_LEN;
3867 error = REF_ERR_NAME_TOO_LONG;
3869 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3871 if (key->type == BTRFS_ROOT_REF_KEY) {
3872 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3873 index, namebuf, len, key->type, error);
3875 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3876 index, namebuf, len, key->type, error);
3881 static void free_corrupt_block(struct cache_extent *cache)
3883 struct btrfs_corrupt_block *corrupt;
3885 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3889 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3892 * Repair the btree of the given root.
3894 * The fix is to remove the node key in corrupt_blocks cache_tree.
3895 * and rebalance the tree.
3896 * After the fix, the btree should be writeable.
3898 static int repair_btree(struct btrfs_root *root,
3899 struct cache_tree *corrupt_blocks)
3901 struct btrfs_trans_handle *trans;
3902 struct btrfs_path path;
3903 struct btrfs_corrupt_block *corrupt;
3904 struct cache_extent *cache;
3905 struct btrfs_key key;
3910 if (cache_tree_empty(corrupt_blocks))
3913 trans = btrfs_start_transaction(root, 1);
3914 if (IS_ERR(trans)) {
3915 ret = PTR_ERR(trans);
3916 fprintf(stderr, "Error starting transaction: %s\n",
3920 btrfs_init_path(&path);
3921 cache = first_cache_extent(corrupt_blocks);
3923 corrupt = container_of(cache, struct btrfs_corrupt_block,
3925 level = corrupt->level;
3926 path.lowest_level = level;
3927 key.objectid = corrupt->key.objectid;
3928 key.type = corrupt->key.type;
3929 key.offset = corrupt->key.offset;
3932 * Here we don't want to do any tree balance, since it may
3933 * cause a balance with corrupted brother leaf/node,
3934 * so ins_len set to 0 here.
3935 * Balance will be done after all corrupt node/leaf is deleted.
3937 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3940 offset = btrfs_node_blockptr(path.nodes[level],
3943 /* Remove the ptr */
3944 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3948 * Remove the corresponding extent
3949 * return value is not concerned.
3951 btrfs_release_path(&path);
3952 ret = btrfs_free_extent(trans, root, offset,
3953 root->fs_info->nodesize, 0,
3954 root->root_key.objectid, level - 1, 0);
3955 cache = next_cache_extent(cache);
3958 /* Balance the btree using btrfs_search_slot() */
3959 cache = first_cache_extent(corrupt_blocks);
3961 corrupt = container_of(cache, struct btrfs_corrupt_block,
3963 memcpy(&key, &corrupt->key, sizeof(key));
3964 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3967 /* return will always >0 since it won't find the item */
3969 btrfs_release_path(&path);
3970 cache = next_cache_extent(cache);
3973 btrfs_commit_transaction(trans, root);
3974 btrfs_release_path(&path);
3978 static int check_fs_root(struct btrfs_root *root,
3979 struct cache_tree *root_cache,
3980 struct walk_control *wc)
3986 struct btrfs_path path;
3987 struct shared_node root_node;
3988 struct root_record *rec;
3989 struct btrfs_root_item *root_item = &root->root_item;
3990 struct cache_tree corrupt_blocks;
3991 struct orphan_data_extent *orphan;
3992 struct orphan_data_extent *tmp;
3993 enum btrfs_tree_block_status status;
3994 struct node_refs nrefs;
3997 * Reuse the corrupt_block cache tree to record corrupted tree block
3999 * Unlike the usage in extent tree check, here we do it in a per
4000 * fs/subvol tree base.
4002 cache_tree_init(&corrupt_blocks);
4003 root->fs_info->corrupt_blocks = &corrupt_blocks;
4005 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4006 rec = get_root_rec(root_cache, root->root_key.objectid);
4007 BUG_ON(IS_ERR(rec));
4008 if (btrfs_root_refs(root_item) > 0)
4009 rec->found_root_item = 1;
4012 btrfs_init_path(&path);
4013 memset(&root_node, 0, sizeof(root_node));
4014 cache_tree_init(&root_node.root_cache);
4015 cache_tree_init(&root_node.inode_cache);
4016 memset(&nrefs, 0, sizeof(nrefs));
4018 /* Move the orphan extent record to corresponding inode_record */
4019 list_for_each_entry_safe(orphan, tmp,
4020 &root->orphan_data_extents, list) {
4021 struct inode_record *inode;
4023 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4025 BUG_ON(IS_ERR(inode));
4026 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4027 list_move(&orphan->list, &inode->orphan_extents);
4030 level = btrfs_header_level(root->node);
4031 memset(wc->nodes, 0, sizeof(wc->nodes));
4032 wc->nodes[level] = &root_node;
4033 wc->active_node = level;
4034 wc->root_level = level;
4036 /* We may not have checked the root block, lets do that now */
4037 if (btrfs_is_leaf(root->node))
4038 status = btrfs_check_leaf(root, NULL, root->node);
4040 status = btrfs_check_node(root, NULL, root->node);
4041 if (status != BTRFS_TREE_BLOCK_CLEAN)
4044 if (btrfs_root_refs(root_item) > 0 ||
4045 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4046 path.nodes[level] = root->node;
4047 extent_buffer_get(root->node);
4048 path.slots[level] = 0;
4050 struct btrfs_key key;
4051 struct btrfs_disk_key found_key;
4053 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4054 level = root_item->drop_level;
4055 path.lowest_level = level;
4056 if (level > btrfs_header_level(root->node) ||
4057 level >= BTRFS_MAX_LEVEL) {
4058 error("ignoring invalid drop level: %u", level);
4061 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4064 btrfs_node_key(path.nodes[level], &found_key,
4066 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4067 sizeof(found_key)));
4071 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4077 wret = walk_up_tree(root, &path, wc, &level);
4084 btrfs_release_path(&path);
4086 if (!cache_tree_empty(&corrupt_blocks)) {
4087 struct cache_extent *cache;
4088 struct btrfs_corrupt_block *corrupt;
4090 printf("The following tree block(s) is corrupted in tree %llu:\n",
4091 root->root_key.objectid);
4092 cache = first_cache_extent(&corrupt_blocks);
4094 corrupt = container_of(cache,
4095 struct btrfs_corrupt_block,
4097 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4098 cache->start, corrupt->level,
4099 corrupt->key.objectid, corrupt->key.type,
4100 corrupt->key.offset);
4101 cache = next_cache_extent(cache);
4104 printf("Try to repair the btree for root %llu\n",
4105 root->root_key.objectid);
4106 ret = repair_btree(root, &corrupt_blocks);
4108 fprintf(stderr, "Failed to repair btree: %s\n",
4111 printf("Btree for root %llu is fixed\n",
4112 root->root_key.objectid);
4116 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4120 if (root_node.current) {
4121 root_node.current->checked = 1;
4122 maybe_free_inode_rec(&root_node.inode_cache,
4126 err = check_inode_recs(root, &root_node.inode_cache);
4130 free_corrupt_blocks_tree(&corrupt_blocks);
4131 root->fs_info->corrupt_blocks = NULL;
4132 free_orphan_data_extents(&root->orphan_data_extents);
4136 static int fs_root_objectid(u64 objectid)
4138 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4139 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4141 return is_fstree(objectid);
4144 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4145 struct cache_tree *root_cache)
4147 struct btrfs_path path;
4148 struct btrfs_key key;
4149 struct walk_control wc;
4150 struct extent_buffer *leaf, *tree_node;
4151 struct btrfs_root *tmp_root;
4152 struct btrfs_root *tree_root = fs_info->tree_root;
4156 if (ctx.progress_enabled) {
4157 ctx.tp = TASK_FS_ROOTS;
4158 task_start(ctx.info);
4162 * Just in case we made any changes to the extent tree that weren't
4163 * reflected into the free space cache yet.
4166 reset_cached_block_groups(fs_info);
4167 memset(&wc, 0, sizeof(wc));
4168 cache_tree_init(&wc.shared);
4169 btrfs_init_path(&path);
4174 key.type = BTRFS_ROOT_ITEM_KEY;
4175 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4180 tree_node = tree_root->node;
4182 if (tree_node != tree_root->node) {
4183 free_root_recs_tree(root_cache);
4184 btrfs_release_path(&path);
4187 leaf = path.nodes[0];
4188 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4189 ret = btrfs_next_leaf(tree_root, &path);
4195 leaf = path.nodes[0];
4197 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4198 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4199 fs_root_objectid(key.objectid)) {
4200 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4201 tmp_root = btrfs_read_fs_root_no_cache(
4204 key.offset = (u64)-1;
4205 tmp_root = btrfs_read_fs_root(
4208 if (IS_ERR(tmp_root)) {
4212 ret = check_fs_root(tmp_root, root_cache, &wc);
4213 if (ret == -EAGAIN) {
4214 free_root_recs_tree(root_cache);
4215 btrfs_release_path(&path);
4220 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4221 btrfs_free_fs_root(tmp_root);
4222 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4223 key.type == BTRFS_ROOT_BACKREF_KEY) {
4224 process_root_ref(leaf, path.slots[0], &key,
4231 btrfs_release_path(&path);
4233 free_extent_cache_tree(&wc.shared);
4234 if (!cache_tree_empty(&wc.shared))
4235 fprintf(stderr, "warning line %d\n", __LINE__);
4237 task_stop(ctx.info);
4243 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4244 * INODE_REF/INODE_EXTREF match.
4246 * @root: the root of the fs/file tree
4247 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4248 * @key: the key of the DIR_ITEM/DIR_INDEX
4249 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4250 * distinguish root_dir between normal dir/file
4251 * @name: the name in the INODE_REF/INODE_EXTREF
4252 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4253 * @mode: the st_mode of INODE_ITEM
4255 * Return 0 if no error occurred.
4256 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4257 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4259 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4260 * not match for normal dir/file.
4262 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4263 struct btrfs_key *key, u64 index, char *name,
4264 u32 namelen, u32 mode)
4266 struct btrfs_path path;
4267 struct extent_buffer *node;
4268 struct btrfs_dir_item *di;
4269 struct btrfs_key location;
4270 char namebuf[BTRFS_NAME_LEN] = {0};
4280 btrfs_init_path(&path);
4281 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4283 ret = DIR_ITEM_MISSING;
4287 /* Process root dir and goto out*/
4290 ret = ROOT_DIR_ERROR;
4292 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4294 ref_key->type == BTRFS_INODE_REF_KEY ?
4296 ref_key->objectid, ref_key->offset,
4297 key->type == BTRFS_DIR_ITEM_KEY ?
4298 "DIR_ITEM" : "DIR_INDEX");
4306 /* Process normal file/dir */
4308 ret = DIR_ITEM_MISSING;
4310 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4312 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4313 ref_key->objectid, ref_key->offset,
4314 key->type == BTRFS_DIR_ITEM_KEY ?
4315 "DIR_ITEM" : "DIR_INDEX",
4316 key->objectid, key->offset, namelen, name,
4317 imode_to_type(mode));
4321 /* Check whether inode_id/filetype/name match */
4322 node = path.nodes[0];
4323 slot = path.slots[0];
4324 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4325 total = btrfs_item_size_nr(node, slot);
4326 while (cur < total) {
4327 ret = DIR_ITEM_MISMATCH;
4328 name_len = btrfs_dir_name_len(node, di);
4329 data_len = btrfs_dir_data_len(node, di);
4331 btrfs_dir_item_key_to_cpu(node, di, &location);
4332 if (location.objectid != ref_key->objectid ||
4333 location.type != BTRFS_INODE_ITEM_KEY ||
4334 location.offset != 0)
4337 filetype = btrfs_dir_type(node, di);
4338 if (imode_to_type(mode) != filetype)
4341 if (cur + sizeof(*di) + name_len > total ||
4342 name_len > BTRFS_NAME_LEN) {
4343 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4345 key->type == BTRFS_DIR_ITEM_KEY ?
4346 "DIR_ITEM" : "DIR_INDEX",
4347 key->objectid, key->offset, name_len);
4349 if (cur + sizeof(*di) > total)
4351 len = min_t(u32, total - cur - sizeof(*di),
4357 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4358 if (len != namelen || strncmp(namebuf, name, len))
4364 len = sizeof(*di) + name_len + data_len;
4365 di = (struct btrfs_dir_item *)((char *)di + len);
4368 if (ret == DIR_ITEM_MISMATCH)
4370 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4372 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4373 ref_key->objectid, ref_key->offset,
4374 key->type == BTRFS_DIR_ITEM_KEY ?
4375 "DIR_ITEM" : "DIR_INDEX",
4376 key->objectid, key->offset, namelen, name,
4377 imode_to_type(mode));
4379 btrfs_release_path(&path);
4384 * Traverse the given INODE_REF and call find_dir_item() to find related
4385 * DIR_ITEM/DIR_INDEX.
4387 * @root: the root of the fs/file tree
4388 * @ref_key: the key of the INODE_REF
4389 * @refs: the count of INODE_REF
4390 * @mode: the st_mode of INODE_ITEM
4392 * Return 0 if no error occurred.
4394 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4395 struct extent_buffer *node, int slot, u64 *refs,
4398 struct btrfs_key key;
4399 struct btrfs_inode_ref *ref;
4400 char namebuf[BTRFS_NAME_LEN] = {0};
4408 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4409 total = btrfs_item_size_nr(node, slot);
4412 /* Update inode ref count */
4415 index = btrfs_inode_ref_index(node, ref);
4416 name_len = btrfs_inode_ref_name_len(node, ref);
4417 if (cur + sizeof(*ref) + name_len > total ||
4418 name_len > BTRFS_NAME_LEN) {
4419 warning("root %llu INODE_REF[%llu %llu] name too long",
4420 root->objectid, ref_key->objectid, ref_key->offset);
4422 if (total < cur + sizeof(*ref))
4424 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4429 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4431 /* Check root dir ref name */
4432 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4433 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4434 root->objectid, ref_key->objectid, ref_key->offset,
4436 err |= ROOT_DIR_ERROR;
4439 /* Find related DIR_INDEX */
4440 key.objectid = ref_key->offset;
4441 key.type = BTRFS_DIR_INDEX_KEY;
4443 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4446 /* Find related dir_item */
4447 key.objectid = ref_key->offset;
4448 key.type = BTRFS_DIR_ITEM_KEY;
4449 key.offset = btrfs_name_hash(namebuf, len);
4450 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4453 len = sizeof(*ref) + name_len;
4454 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4464 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4465 * DIR_ITEM/DIR_INDEX.
4467 * @root: the root of the fs/file tree
4468 * @ref_key: the key of the INODE_EXTREF
4469 * @refs: the count of INODE_EXTREF
4470 * @mode: the st_mode of INODE_ITEM
4472 * Return 0 if no error occurred.
4474 static int check_inode_extref(struct btrfs_root *root,
4475 struct btrfs_key *ref_key,
4476 struct extent_buffer *node, int slot, u64 *refs,
4479 struct btrfs_key key;
4480 struct btrfs_inode_extref *extref;
4481 char namebuf[BTRFS_NAME_LEN] = {0};
4491 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4492 total = btrfs_item_size_nr(node, slot);
4495 /* update inode ref count */
4497 name_len = btrfs_inode_extref_name_len(node, extref);
4498 index = btrfs_inode_extref_index(node, extref);
4499 parent = btrfs_inode_extref_parent(node, extref);
4500 if (name_len <= BTRFS_NAME_LEN) {
4503 len = BTRFS_NAME_LEN;
4504 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4505 root->objectid, ref_key->objectid, ref_key->offset);
4507 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4509 /* Check root dir ref name */
4510 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4511 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4512 root->objectid, ref_key->objectid, ref_key->offset,
4514 err |= ROOT_DIR_ERROR;
4517 /* find related dir_index */
4518 key.objectid = parent;
4519 key.type = BTRFS_DIR_INDEX_KEY;
4521 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4524 /* find related dir_item */
4525 key.objectid = parent;
4526 key.type = BTRFS_DIR_ITEM_KEY;
4527 key.offset = btrfs_name_hash(namebuf, len);
4528 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4531 len = sizeof(*extref) + name_len;
4532 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4542 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4543 * DIR_ITEM/DIR_INDEX match.
4545 * @root: the root of the fs/file tree
4546 * @key: the key of the INODE_REF/INODE_EXTREF
4547 * @name: the name in the INODE_REF/INODE_EXTREF
4548 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4549 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4551 * @ext_ref: the EXTENDED_IREF feature
4553 * Return 0 if no error occurred.
4554 * Return >0 for error bitmap
4556 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4557 char *name, int namelen, u64 index,
4558 unsigned int ext_ref)
4560 struct btrfs_path path;
4561 struct btrfs_inode_ref *ref;
4562 struct btrfs_inode_extref *extref;
4563 struct extent_buffer *node;
4564 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4575 btrfs_init_path(&path);
4576 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4578 ret = INODE_REF_MISSING;
4582 node = path.nodes[0];
4583 slot = path.slots[0];
4585 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4586 total = btrfs_item_size_nr(node, slot);
4588 /* Iterate all entry of INODE_REF */
4589 while (cur < total) {
4590 ret = INODE_REF_MISSING;
4592 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4593 ref_index = btrfs_inode_ref_index(node, ref);
4594 if (index != (u64)-1 && index != ref_index)
4597 if (cur + sizeof(*ref) + ref_namelen > total ||
4598 ref_namelen > BTRFS_NAME_LEN) {
4599 warning("root %llu INODE %s[%llu %llu] name too long",
4601 key->type == BTRFS_INODE_REF_KEY ?
4603 key->objectid, key->offset);
4605 if (cur + sizeof(*ref) > total)
4607 len = min_t(u32, total - cur - sizeof(*ref),
4613 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4616 if (len != namelen || strncmp(ref_namebuf, name, len))
4622 len = sizeof(*ref) + ref_namelen;
4623 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4628 /* Skip if not support EXTENDED_IREF feature */
4632 btrfs_release_path(&path);
4633 btrfs_init_path(&path);
4635 dir_id = key->offset;
4636 key->type = BTRFS_INODE_EXTREF_KEY;
4637 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4639 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4641 ret = INODE_REF_MISSING;
4645 node = path.nodes[0];
4646 slot = path.slots[0];
4648 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4650 total = btrfs_item_size_nr(node, slot);
4652 /* Iterate all entry of INODE_EXTREF */
4653 while (cur < total) {
4654 ret = INODE_REF_MISSING;
4656 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4657 ref_index = btrfs_inode_extref_index(node, extref);
4658 parent = btrfs_inode_extref_parent(node, extref);
4659 if (index != (u64)-1 && index != ref_index)
4662 if (parent != dir_id)
4665 if (ref_namelen <= BTRFS_NAME_LEN) {
4668 len = BTRFS_NAME_LEN;
4669 warning("root %llu INODE %s[%llu %llu] name too long",
4671 key->type == BTRFS_INODE_REF_KEY ?
4673 key->objectid, key->offset);
4675 read_extent_buffer(node, ref_namebuf,
4676 (unsigned long)(extref + 1), len);
4678 if (len != namelen || strncmp(ref_namebuf, name, len))
4685 len = sizeof(*extref) + ref_namelen;
4686 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4691 btrfs_release_path(&path);
4696 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4697 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4699 * @root: the root of the fs/file tree
4700 * @key: the key of the INODE_REF/INODE_EXTREF
4701 * @size: the st_size of the INODE_ITEM
4702 * @ext_ref: the EXTENDED_IREF feature
4704 * Return 0 if no error occurred.
4706 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4707 struct extent_buffer *node, int slot, u64 *size,
4708 unsigned int ext_ref)
4710 struct btrfs_dir_item *di;
4711 struct btrfs_inode_item *ii;
4712 struct btrfs_path path;
4713 struct btrfs_key location;
4714 char namebuf[BTRFS_NAME_LEN] = {0};
4727 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4728 * ignore index check.
4730 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4732 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4733 total = btrfs_item_size_nr(node, slot);
4735 while (cur < total) {
4736 data_len = btrfs_dir_data_len(node, di);
4738 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4739 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4740 "DIR_ITEM" : "DIR_INDEX",
4741 key->objectid, key->offset, data_len);
4743 name_len = btrfs_dir_name_len(node, di);
4744 if (cur + sizeof(*di) + name_len > total ||
4745 name_len > BTRFS_NAME_LEN) {
4746 warning("root %llu %s[%llu %llu] name too long",
4748 key->type == BTRFS_DIR_ITEM_KEY ?
4749 "DIR_ITEM" : "DIR_INDEX",
4750 key->objectid, key->offset);
4752 if (cur + sizeof(*di) > total)
4754 len = min_t(u32, total - cur - sizeof(*di),
4759 (*size) += name_len;
4761 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4762 filetype = btrfs_dir_type(node, di);
4764 if (key->type == BTRFS_DIR_ITEM_KEY &&
4765 key->offset != btrfs_name_hash(namebuf, len)) {
4767 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4768 root->objectid, key->objectid, key->offset,
4769 namebuf, len, filetype, key->offset,
4770 btrfs_name_hash(namebuf, len));
4773 btrfs_init_path(&path);
4774 btrfs_dir_item_key_to_cpu(node, di, &location);
4776 /* Ignore related ROOT_ITEM check */
4777 if (location.type == BTRFS_ROOT_ITEM_KEY)
4780 /* Check relative INODE_ITEM(existence/filetype) */
4781 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4783 err |= INODE_ITEM_MISSING;
4784 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4785 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4786 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4787 key->offset, location.objectid, name_len,
4792 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4793 struct btrfs_inode_item);
4794 mode = btrfs_inode_mode(path.nodes[0], ii);
4796 if (imode_to_type(mode) != filetype) {
4797 err |= INODE_ITEM_MISMATCH;
4798 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4799 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4800 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4801 key->offset, name_len, namebuf, filetype);
4804 /* Check relative INODE_REF/INODE_EXTREF */
4805 location.type = BTRFS_INODE_REF_KEY;
4806 location.offset = key->objectid;
4807 ret = find_inode_ref(root, &location, namebuf, len,
4810 if (ret & INODE_REF_MISSING)
4811 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4812 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4813 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4814 key->offset, name_len, namebuf, filetype);
4817 btrfs_release_path(&path);
4818 len = sizeof(*di) + name_len + data_len;
4819 di = (struct btrfs_dir_item *)((char *)di + len);
4822 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4823 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4824 root->objectid, key->objectid, key->offset);
4833 * Check file extent datasum/hole, update the size of the file extents,
4834 * check and update the last offset of the file extent.
4836 * @root: the root of fs/file tree.
4837 * @fkey: the key of the file extent.
4838 * @nodatasum: INODE_NODATASUM feature.
4839 * @size: the sum of all EXTENT_DATA items size for this inode.
4840 * @end: the offset of the last extent.
4842 * Return 0 if no error occurred.
4844 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4845 struct extent_buffer *node, int slot,
4846 unsigned int nodatasum, u64 *size, u64 *end)
4848 struct btrfs_file_extent_item *fi;
4851 u64 extent_num_bytes;
4853 u64 csum_found; /* In byte size, sectorsize aligned */
4854 u64 search_start; /* Logical range start we search for csum */
4855 u64 search_len; /* Logical range len we search for csum */
4856 unsigned int extent_type;
4857 unsigned int is_hole;
4862 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4864 /* Check inline extent */
4865 extent_type = btrfs_file_extent_type(node, fi);
4866 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4867 struct btrfs_item *e = btrfs_item_nr(slot);
4868 u32 item_inline_len;
4870 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4871 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4872 compressed = btrfs_file_extent_compression(node, fi);
4873 if (extent_num_bytes == 0) {
4875 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4876 root->objectid, fkey->objectid, fkey->offset);
4877 err |= FILE_EXTENT_ERROR;
4879 if (!compressed && extent_num_bytes != item_inline_len) {
4881 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4882 root->objectid, fkey->objectid, fkey->offset,
4883 extent_num_bytes, item_inline_len);
4884 err |= FILE_EXTENT_ERROR;
4886 *end += extent_num_bytes;
4887 *size += extent_num_bytes;
4891 /* Check extent type */
4892 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4893 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4894 err |= FILE_EXTENT_ERROR;
4895 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4896 root->objectid, fkey->objectid, fkey->offset);
4900 /* Check REG_EXTENT/PREALLOC_EXTENT */
4901 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4902 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4903 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4904 extent_offset = btrfs_file_extent_offset(node, fi);
4905 compressed = btrfs_file_extent_compression(node, fi);
4906 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4909 * Check EXTENT_DATA csum
4911 * For plain (uncompressed) extent, we should only check the range
4912 * we're referring to, as it's possible that part of prealloc extent
4913 * has been written, and has csum:
4915 * |<--- Original large preallocated extent A ---->|
4916 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4919 * For compressed extent, we should check the whole range.
4922 search_start = disk_bytenr + extent_offset;
4923 search_len = extent_num_bytes;
4925 search_start = disk_bytenr;
4926 search_len = disk_num_bytes;
4928 ret = count_csum_range(root, search_start, search_len, &csum_found);
4929 if (csum_found > 0 && nodatasum) {
4930 err |= ODD_CSUM_ITEM;
4931 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4932 root->objectid, fkey->objectid, fkey->offset);
4933 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4934 !is_hole && (ret < 0 || csum_found < search_len)) {
4935 err |= CSUM_ITEM_MISSING;
4936 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4937 root->objectid, fkey->objectid, fkey->offset,
4938 csum_found, search_len);
4939 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4940 err |= ODD_CSUM_ITEM;
4941 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4942 root->objectid, fkey->objectid, fkey->offset, csum_found);
4945 /* Check EXTENT_DATA hole */
4946 if (!no_holes && *end != fkey->offset) {
4947 err |= FILE_EXTENT_ERROR;
4948 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4949 root->objectid, fkey->objectid, fkey->offset);
4952 *end += extent_num_bytes;
4954 *size += extent_num_bytes;
4960 * Set inode item nbytes to @nbytes
4962 * Returns 0 on success
4963 * Returns != 0 on error
4965 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
4966 struct btrfs_path *path,
4967 u64 ino, u64 nbytes)
4969 struct btrfs_trans_handle *trans;
4970 struct btrfs_inode_item *ii;
4971 struct btrfs_key key;
4972 struct btrfs_key research_key;
4976 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
4979 key.type = BTRFS_INODE_ITEM_KEY;
4982 trans = btrfs_start_transaction(root, 1);
4983 if (IS_ERR(trans)) {
4984 ret = PTR_ERR(trans);
4989 btrfs_release_path(path);
4990 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4998 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
4999 struct btrfs_inode_item);
5000 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5001 btrfs_mark_buffer_dirty(path->nodes[0]);
5003 btrfs_commit_transaction(trans, root);
5006 error("failed to set nbytes in inode %llu root %llu",
5007 ino, root->root_key.objectid);
5009 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5010 root->root_key.objectid, nbytes);
5013 btrfs_release_path(path);
5014 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5021 * Set directory inode isize to @isize.
5023 * Returns 0 on success.
5024 * Returns != 0 on error.
5026 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5027 struct btrfs_path *path,
5030 struct btrfs_trans_handle *trans;
5031 struct btrfs_inode_item *ii;
5032 struct btrfs_key key;
5033 struct btrfs_key research_key;
5037 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5040 key.type = BTRFS_INODE_ITEM_KEY;
5043 trans = btrfs_start_transaction(root, 1);
5044 if (IS_ERR(trans)) {
5045 ret = PTR_ERR(trans);
5050 btrfs_release_path(path);
5051 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5059 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5060 struct btrfs_inode_item);
5061 btrfs_set_inode_size(path->nodes[0], ii, isize);
5062 btrfs_mark_buffer_dirty(path->nodes[0]);
5064 btrfs_commit_transaction(trans, root);
5067 error("failed to set isize in inode %llu root %llu",
5068 ino, root->root_key.objectid);
5070 printf("Set isize in inode %llu root %llu to %llu\n",
5071 ino, root->root_key.objectid, isize);
5073 btrfs_release_path(path);
5074 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5081 * Check INODE_ITEM and related ITEMs (the same inode number)
5082 * 1. check link count
5083 * 2. check inode ref/extref
5084 * 3. check dir item/index
5086 * @ext_ref: the EXTENDED_IREF feature
5088 * Return 0 if no error occurred.
5089 * Return >0 for error or hit the traversal is done(by error bitmap)
5091 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5092 unsigned int ext_ref)
5094 struct extent_buffer *node;
5095 struct btrfs_inode_item *ii;
5096 struct btrfs_key key;
5105 u64 extent_size = 0;
5107 unsigned int nodatasum;
5112 node = path->nodes[0];
5113 slot = path->slots[0];
5115 btrfs_item_key_to_cpu(node, &key, slot);
5116 inode_id = key.objectid;
5118 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5119 ret = btrfs_next_item(root, path);
5125 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5126 isize = btrfs_inode_size(node, ii);
5127 nbytes = btrfs_inode_nbytes(node, ii);
5128 mode = btrfs_inode_mode(node, ii);
5129 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5130 nlink = btrfs_inode_nlink(node, ii);
5131 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5134 ret = btrfs_next_item(root, path);
5136 /* out will fill 'err' rusing current statistics */
5138 } else if (ret > 0) {
5143 node = path->nodes[0];
5144 slot = path->slots[0];
5145 btrfs_item_key_to_cpu(node, &key, slot);
5146 if (key.objectid != inode_id)
5150 case BTRFS_INODE_REF_KEY:
5151 ret = check_inode_ref(root, &key, node, slot, &refs,
5155 case BTRFS_INODE_EXTREF_KEY:
5156 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5157 warning("root %llu EXTREF[%llu %llu] isn't supported",
5158 root->objectid, key.objectid,
5160 ret = check_inode_extref(root, &key, node, slot, &refs,
5164 case BTRFS_DIR_ITEM_KEY:
5165 case BTRFS_DIR_INDEX_KEY:
5167 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5168 root->objectid, inode_id,
5169 imode_to_type(mode), key.objectid,
5172 ret = check_dir_item(root, &key, node, slot, &size,
5176 case BTRFS_EXTENT_DATA_KEY:
5178 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5179 root->objectid, inode_id, key.objectid,
5182 ret = check_file_extent(root, &key, node, slot,
5183 nodatasum, &extent_size,
5187 case BTRFS_XATTR_ITEM_KEY:
5190 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5191 key.objectid, key.type, key.offset);
5196 /* verify INODE_ITEM nlink/isize/nbytes */
5199 err |= LINK_COUNT_ERROR;
5200 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5201 root->objectid, inode_id, nlink);
5205 * Just a warning, as dir inode nbytes is just an
5206 * instructive value.
5208 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5209 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5210 root->objectid, inode_id,
5211 root->fs_info->nodesize);
5214 if (isize != size) {
5216 ret = repair_dir_isize_lowmem(root, path,
5218 if (!repair || ret) {
5221 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5222 root->objectid, inode_id, isize, size);
5226 if (nlink != refs) {
5227 err |= LINK_COUNT_ERROR;
5228 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5229 root->objectid, inode_id, nlink, refs);
5230 } else if (!nlink) {
5234 if (!nbytes && !no_holes && extent_end < isize) {
5235 err |= NBYTES_ERROR;
5236 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5237 root->objectid, inode_id, isize);
5240 if (nbytes != extent_size) {
5242 ret = repair_inode_nbytes_lowmem(root, path,
5243 inode_id, extent_size);
5244 if (!repair || ret) {
5245 err |= NBYTES_ERROR;
5247 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5248 root->objectid, inode_id, nbytes,
5257 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5259 struct btrfs_path path;
5260 struct btrfs_key key;
5264 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5265 key.type = BTRFS_INODE_ITEM_KEY;
5268 /* For root being dropped, we don't need to check first inode */
5269 if (btrfs_root_refs(&root->root_item) == 0 &&
5270 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5274 btrfs_init_path(&path);
5276 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5281 err |= INODE_ITEM_MISSING;
5282 error("first inode item of root %llu is missing",
5286 err |= check_inode_item(root, &path, ext_ref);
5291 btrfs_release_path(&path);
5295 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5296 u64 parent, u64 root)
5298 struct rb_node *node;
5299 struct tree_backref *back = NULL;
5300 struct tree_backref match = {
5307 match.parent = parent;
5308 match.node.full_backref = 1;
5313 node = rb_search(&rec->backref_tree, &match.node.node,
5314 (rb_compare_keys)compare_extent_backref, NULL);
5316 back = to_tree_backref(rb_node_to_extent_backref(node));
5321 static struct data_backref *find_data_backref(struct extent_record *rec,
5322 u64 parent, u64 root,
5323 u64 owner, u64 offset,
5325 u64 disk_bytenr, u64 bytes)
5327 struct rb_node *node;
5328 struct data_backref *back = NULL;
5329 struct data_backref match = {
5336 .found_ref = found_ref,
5337 .disk_bytenr = disk_bytenr,
5341 match.parent = parent;
5342 match.node.full_backref = 1;
5347 node = rb_search(&rec->backref_tree, &match.node.node,
5348 (rb_compare_keys)compare_extent_backref, NULL);
5350 back = to_data_backref(rb_node_to_extent_backref(node));
5355 * Iterate all item on the tree and call check_inode_item() to check.
5357 * @root: the root of the tree to be checked.
5358 * @ext_ref: the EXTENDED_IREF feature
5360 * Return 0 if no error found.
5361 * Return <0 for error.
5363 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5365 struct btrfs_path path;
5366 struct node_refs nrefs;
5367 struct btrfs_root_item *root_item = &root->root_item;
5373 * We need to manually check the first inode item(256)
5374 * As the following traversal function will only start from
5375 * the first inode item in the leaf, if inode item(256) is missing
5376 * we will just skip it forever.
5378 ret = check_fs_first_inode(root, ext_ref);
5382 memset(&nrefs, 0, sizeof(nrefs));
5383 level = btrfs_header_level(root->node);
5384 btrfs_init_path(&path);
5386 if (btrfs_root_refs(root_item) > 0 ||
5387 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5388 path.nodes[level] = root->node;
5389 path.slots[level] = 0;
5390 extent_buffer_get(root->node);
5392 struct btrfs_key key;
5394 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5395 level = root_item->drop_level;
5396 path.lowest_level = level;
5397 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5404 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5407 /* if ret is negative, walk shall stop */
5413 ret = walk_up_tree_v2(root, &path, &level);
5415 /* Normal exit, reset ret to err */
5422 btrfs_release_path(&path);
5427 * Find the relative ref for root_ref and root_backref.
5429 * @root: the root of the root tree.
5430 * @ref_key: the key of the root ref.
5432 * Return 0 if no error occurred.
5434 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5435 struct extent_buffer *node, int slot)
5437 struct btrfs_path path;
5438 struct btrfs_key key;
5439 struct btrfs_root_ref *ref;
5440 struct btrfs_root_ref *backref;
5441 char ref_name[BTRFS_NAME_LEN] = {0};
5442 char backref_name[BTRFS_NAME_LEN] = {0};
5448 u32 backref_namelen;
5453 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5454 ref_dirid = btrfs_root_ref_dirid(node, ref);
5455 ref_seq = btrfs_root_ref_sequence(node, ref);
5456 ref_namelen = btrfs_root_ref_name_len(node, ref);
5458 if (ref_namelen <= BTRFS_NAME_LEN) {
5461 len = BTRFS_NAME_LEN;
5462 warning("%s[%llu %llu] ref_name too long",
5463 ref_key->type == BTRFS_ROOT_REF_KEY ?
5464 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5467 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5469 /* Find relative root_ref */
5470 key.objectid = ref_key->offset;
5471 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5472 key.offset = ref_key->objectid;
5474 btrfs_init_path(&path);
5475 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5477 err |= ROOT_REF_MISSING;
5478 error("%s[%llu %llu] couldn't find relative ref",
5479 ref_key->type == BTRFS_ROOT_REF_KEY ?
5480 "ROOT_REF" : "ROOT_BACKREF",
5481 ref_key->objectid, ref_key->offset);
5485 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5486 struct btrfs_root_ref);
5487 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5488 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5489 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5491 if (backref_namelen <= BTRFS_NAME_LEN) {
5492 len = backref_namelen;
5494 len = BTRFS_NAME_LEN;
5495 warning("%s[%llu %llu] ref_name too long",
5496 key.type == BTRFS_ROOT_REF_KEY ?
5497 "ROOT_REF" : "ROOT_BACKREF",
5498 key.objectid, key.offset);
5500 read_extent_buffer(path.nodes[0], backref_name,
5501 (unsigned long)(backref + 1), len);
5503 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5504 ref_namelen != backref_namelen ||
5505 strncmp(ref_name, backref_name, len)) {
5506 err |= ROOT_REF_MISMATCH;
5507 error("%s[%llu %llu] mismatch relative ref",
5508 ref_key->type == BTRFS_ROOT_REF_KEY ?
5509 "ROOT_REF" : "ROOT_BACKREF",
5510 ref_key->objectid, ref_key->offset);
5513 btrfs_release_path(&path);
5518 * Check all fs/file tree in low_memory mode.
5520 * 1. for fs tree root item, call check_fs_root_v2()
5521 * 2. for fs tree root ref/backref, call check_root_ref()
5523 * Return 0 if no error occurred.
5525 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5527 struct btrfs_root *tree_root = fs_info->tree_root;
5528 struct btrfs_root *cur_root = NULL;
5529 struct btrfs_path path;
5530 struct btrfs_key key;
5531 struct extent_buffer *node;
5532 unsigned int ext_ref;
5537 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5539 btrfs_init_path(&path);
5540 key.objectid = BTRFS_FS_TREE_OBJECTID;
5542 key.type = BTRFS_ROOT_ITEM_KEY;
5544 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5548 } else if (ret > 0) {
5554 node = path.nodes[0];
5555 slot = path.slots[0];
5556 btrfs_item_key_to_cpu(node, &key, slot);
5557 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5559 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5560 fs_root_objectid(key.objectid)) {
5561 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5562 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5565 key.offset = (u64)-1;
5566 cur_root = btrfs_read_fs_root(fs_info, &key);
5569 if (IS_ERR(cur_root)) {
5570 error("Fail to read fs/subvol tree: %lld",
5576 ret = check_fs_root_v2(cur_root, ext_ref);
5579 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5580 btrfs_free_fs_root(cur_root);
5581 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5582 key.type == BTRFS_ROOT_BACKREF_KEY) {
5583 ret = check_root_ref(tree_root, &key, node, slot);
5587 ret = btrfs_next_item(tree_root, &path);
5597 btrfs_release_path(&path);
5601 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5602 struct cache_tree *root_cache)
5606 if (!ctx.progress_enabled)
5607 fprintf(stderr, "checking fs roots\n");
5608 if (check_mode == CHECK_MODE_LOWMEM)
5609 ret = check_fs_roots_v2(fs_info);
5611 ret = check_fs_roots(fs_info, root_cache);
5616 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5618 struct extent_backref *back, *tmp;
5619 struct tree_backref *tback;
5620 struct data_backref *dback;
5624 rbtree_postorder_for_each_entry_safe(back, tmp,
5625 &rec->backref_tree, node) {
5626 if (!back->found_extent_tree) {
5630 if (back->is_data) {
5631 dback = to_data_backref(back);
5632 fprintf(stderr, "Data backref %llu %s %llu"
5633 " owner %llu offset %llu num_refs %lu"
5634 " not found in extent tree\n",
5635 (unsigned long long)rec->start,
5636 back->full_backref ?
5638 back->full_backref ?
5639 (unsigned long long)dback->parent:
5640 (unsigned long long)dback->root,
5641 (unsigned long long)dback->owner,
5642 (unsigned long long)dback->offset,
5643 (unsigned long)dback->num_refs);
5645 tback = to_tree_backref(back);
5646 fprintf(stderr, "Tree backref %llu parent %llu"
5647 " root %llu not found in extent tree\n",
5648 (unsigned long long)rec->start,
5649 (unsigned long long)tback->parent,
5650 (unsigned long long)tback->root);
5653 if (!back->is_data && !back->found_ref) {
5657 tback = to_tree_backref(back);
5658 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5659 (unsigned long long)rec->start,
5660 back->full_backref ? "parent" : "root",
5661 back->full_backref ?
5662 (unsigned long long)tback->parent :
5663 (unsigned long long)tback->root, back);
5665 if (back->is_data) {
5666 dback = to_data_backref(back);
5667 if (dback->found_ref != dback->num_refs) {
5671 fprintf(stderr, "Incorrect local backref count"
5672 " on %llu %s %llu owner %llu"
5673 " offset %llu found %u wanted %u back %p\n",
5674 (unsigned long long)rec->start,
5675 back->full_backref ?
5677 back->full_backref ?
5678 (unsigned long long)dback->parent:
5679 (unsigned long long)dback->root,
5680 (unsigned long long)dback->owner,
5681 (unsigned long long)dback->offset,
5682 dback->found_ref, dback->num_refs, back);
5684 if (dback->disk_bytenr != rec->start) {
5688 fprintf(stderr, "Backref disk bytenr does not"
5689 " match extent record, bytenr=%llu, "
5690 "ref bytenr=%llu\n",
5691 (unsigned long long)rec->start,
5692 (unsigned long long)dback->disk_bytenr);
5695 if (dback->bytes != rec->nr) {
5699 fprintf(stderr, "Backref bytes do not match "
5700 "extent backref, bytenr=%llu, ref "
5701 "bytes=%llu, backref bytes=%llu\n",
5702 (unsigned long long)rec->start,
5703 (unsigned long long)rec->nr,
5704 (unsigned long long)dback->bytes);
5707 if (!back->is_data) {
5710 dback = to_data_backref(back);
5711 found += dback->found_ref;
5714 if (found != rec->refs) {
5718 fprintf(stderr, "Incorrect global backref count "
5719 "on %llu found %llu wanted %llu\n",
5720 (unsigned long long)rec->start,
5721 (unsigned long long)found,
5722 (unsigned long long)rec->refs);
5728 static void __free_one_backref(struct rb_node *node)
5730 struct extent_backref *back = rb_node_to_extent_backref(node);
5735 static void free_all_extent_backrefs(struct extent_record *rec)
5737 rb_free_nodes(&rec->backref_tree, __free_one_backref);
5740 static void free_extent_record_cache(struct cache_tree *extent_cache)
5742 struct cache_extent *cache;
5743 struct extent_record *rec;
5746 cache = first_cache_extent(extent_cache);
5749 rec = container_of(cache, struct extent_record, cache);
5750 remove_cache_extent(extent_cache, cache);
5751 free_all_extent_backrefs(rec);
5756 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5757 struct extent_record *rec)
5759 if (rec->content_checked && rec->owner_ref_checked &&
5760 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5761 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5762 !rec->bad_full_backref && !rec->crossing_stripes &&
5763 !rec->wrong_chunk_type) {
5764 remove_cache_extent(extent_cache, &rec->cache);
5765 free_all_extent_backrefs(rec);
5766 list_del_init(&rec->list);
5772 static int check_owner_ref(struct btrfs_root *root,
5773 struct extent_record *rec,
5774 struct extent_buffer *buf)
5776 struct extent_backref *node, *tmp;
5777 struct tree_backref *back;
5778 struct btrfs_root *ref_root;
5779 struct btrfs_key key;
5780 struct btrfs_path path;
5781 struct extent_buffer *parent;
5786 rbtree_postorder_for_each_entry_safe(node, tmp,
5787 &rec->backref_tree, node) {
5790 if (!node->found_ref)
5792 if (node->full_backref)
5794 back = to_tree_backref(node);
5795 if (btrfs_header_owner(buf) == back->root)
5798 BUG_ON(rec->is_root);
5800 /* try to find the block by search corresponding fs tree */
5801 key.objectid = btrfs_header_owner(buf);
5802 key.type = BTRFS_ROOT_ITEM_KEY;
5803 key.offset = (u64)-1;
5805 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5806 if (IS_ERR(ref_root))
5809 level = btrfs_header_level(buf);
5811 btrfs_item_key_to_cpu(buf, &key, 0);
5813 btrfs_node_key_to_cpu(buf, &key, 0);
5815 btrfs_init_path(&path);
5816 path.lowest_level = level + 1;
5817 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5821 parent = path.nodes[level + 1];
5822 if (parent && buf->start == btrfs_node_blockptr(parent,
5823 path.slots[level + 1]))
5826 btrfs_release_path(&path);
5827 return found ? 0 : 1;
5830 static int is_extent_tree_record(struct extent_record *rec)
5832 struct extent_backref *node, *tmp;
5833 struct tree_backref *back;
5836 rbtree_postorder_for_each_entry_safe(node, tmp,
5837 &rec->backref_tree, node) {
5840 back = to_tree_backref(node);
5841 if (node->full_backref)
5843 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5850 static int record_bad_block_io(struct btrfs_fs_info *info,
5851 struct cache_tree *extent_cache,
5854 struct extent_record *rec;
5855 struct cache_extent *cache;
5856 struct btrfs_key key;
5858 cache = lookup_cache_extent(extent_cache, start, len);
5862 rec = container_of(cache, struct extent_record, cache);
5863 if (!is_extent_tree_record(rec))
5866 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5867 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5870 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5871 struct extent_buffer *buf, int slot)
5873 if (btrfs_header_level(buf)) {
5874 struct btrfs_key_ptr ptr1, ptr2;
5876 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5877 sizeof(struct btrfs_key_ptr));
5878 read_extent_buffer(buf, &ptr2,
5879 btrfs_node_key_ptr_offset(slot + 1),
5880 sizeof(struct btrfs_key_ptr));
5881 write_extent_buffer(buf, &ptr1,
5882 btrfs_node_key_ptr_offset(slot + 1),
5883 sizeof(struct btrfs_key_ptr));
5884 write_extent_buffer(buf, &ptr2,
5885 btrfs_node_key_ptr_offset(slot),
5886 sizeof(struct btrfs_key_ptr));
5888 struct btrfs_disk_key key;
5889 btrfs_node_key(buf, &key, 0);
5890 btrfs_fixup_low_keys(root, path, &key,
5891 btrfs_header_level(buf) + 1);
5894 struct btrfs_item *item1, *item2;
5895 struct btrfs_key k1, k2;
5896 char *item1_data, *item2_data;
5897 u32 item1_offset, item2_offset, item1_size, item2_size;
5899 item1 = btrfs_item_nr(slot);
5900 item2 = btrfs_item_nr(slot + 1);
5901 btrfs_item_key_to_cpu(buf, &k1, slot);
5902 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5903 item1_offset = btrfs_item_offset(buf, item1);
5904 item2_offset = btrfs_item_offset(buf, item2);
5905 item1_size = btrfs_item_size(buf, item1);
5906 item2_size = btrfs_item_size(buf, item2);
5908 item1_data = malloc(item1_size);
5911 item2_data = malloc(item2_size);
5917 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5918 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5920 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5921 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5925 btrfs_set_item_offset(buf, item1, item2_offset);
5926 btrfs_set_item_offset(buf, item2, item1_offset);
5927 btrfs_set_item_size(buf, item1, item2_size);
5928 btrfs_set_item_size(buf, item2, item1_size);
5930 path->slots[0] = slot;
5931 btrfs_set_item_key_unsafe(root, path, &k2);
5932 path->slots[0] = slot + 1;
5933 btrfs_set_item_key_unsafe(root, path, &k1);
5938 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5940 struct extent_buffer *buf;
5941 struct btrfs_key k1, k2;
5943 int level = path->lowest_level;
5946 buf = path->nodes[level];
5947 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5949 btrfs_node_key_to_cpu(buf, &k1, i);
5950 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5952 btrfs_item_key_to_cpu(buf, &k1, i);
5953 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5955 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5957 ret = swap_values(root, path, buf, i);
5960 btrfs_mark_buffer_dirty(buf);
5966 static int delete_bogus_item(struct btrfs_root *root,
5967 struct btrfs_path *path,
5968 struct extent_buffer *buf, int slot)
5970 struct btrfs_key key;
5971 int nritems = btrfs_header_nritems(buf);
5973 btrfs_item_key_to_cpu(buf, &key, slot);
5975 /* These are all the keys we can deal with missing. */
5976 if (key.type != BTRFS_DIR_INDEX_KEY &&
5977 key.type != BTRFS_EXTENT_ITEM_KEY &&
5978 key.type != BTRFS_METADATA_ITEM_KEY &&
5979 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5980 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5983 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5984 (unsigned long long)key.objectid, key.type,
5985 (unsigned long long)key.offset, slot, buf->start);
5986 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5987 btrfs_item_nr_offset(slot + 1),
5988 sizeof(struct btrfs_item) *
5989 (nritems - slot - 1));
5990 btrfs_set_header_nritems(buf, nritems - 1);
5992 struct btrfs_disk_key disk_key;
5994 btrfs_item_key(buf, &disk_key, 0);
5995 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5997 btrfs_mark_buffer_dirty(buf);
6001 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6003 struct extent_buffer *buf;
6007 /* We should only get this for leaves */
6008 BUG_ON(path->lowest_level);
6009 buf = path->nodes[0];
6011 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6012 unsigned int shift = 0, offset;
6014 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6015 BTRFS_LEAF_DATA_SIZE(root)) {
6016 if (btrfs_item_end_nr(buf, i) >
6017 BTRFS_LEAF_DATA_SIZE(root)) {
6018 ret = delete_bogus_item(root, path, buf, i);
6021 fprintf(stderr, "item is off the end of the "
6022 "leaf, can't fix\n");
6026 shift = BTRFS_LEAF_DATA_SIZE(root) -
6027 btrfs_item_end_nr(buf, i);
6028 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6029 btrfs_item_offset_nr(buf, i - 1)) {
6030 if (btrfs_item_end_nr(buf, i) >
6031 btrfs_item_offset_nr(buf, i - 1)) {
6032 ret = delete_bogus_item(root, path, buf, i);
6035 fprintf(stderr, "items overlap, can't fix\n");
6039 shift = btrfs_item_offset_nr(buf, i - 1) -
6040 btrfs_item_end_nr(buf, i);
6045 printf("Shifting item nr %d by %u bytes in block %llu\n",
6046 i, shift, (unsigned long long)buf->start);
6047 offset = btrfs_item_offset_nr(buf, i);
6048 memmove_extent_buffer(buf,
6049 btrfs_leaf_data(buf) + offset + shift,
6050 btrfs_leaf_data(buf) + offset,
6051 btrfs_item_size_nr(buf, i));
6052 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6054 btrfs_mark_buffer_dirty(buf);
6058 * We may have moved things, in which case we want to exit so we don't
6059 * write those changes out. Once we have proper abort functionality in
6060 * progs this can be changed to something nicer.
6067 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6068 * then just return -EIO.
6070 static int try_to_fix_bad_block(struct btrfs_root *root,
6071 struct extent_buffer *buf,
6072 enum btrfs_tree_block_status status)
6074 struct btrfs_trans_handle *trans;
6075 struct ulist *roots;
6076 struct ulist_node *node;
6077 struct btrfs_root *search_root;
6078 struct btrfs_path path;
6079 struct ulist_iterator iter;
6080 struct btrfs_key root_key, key;
6083 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6084 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6087 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6091 btrfs_init_path(&path);
6092 ULIST_ITER_INIT(&iter);
6093 while ((node = ulist_next(roots, &iter))) {
6094 root_key.objectid = node->val;
6095 root_key.type = BTRFS_ROOT_ITEM_KEY;
6096 root_key.offset = (u64)-1;
6098 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6105 trans = btrfs_start_transaction(search_root, 0);
6106 if (IS_ERR(trans)) {
6107 ret = PTR_ERR(trans);
6111 path.lowest_level = btrfs_header_level(buf);
6112 path.skip_check_block = 1;
6113 if (path.lowest_level)
6114 btrfs_node_key_to_cpu(buf, &key, 0);
6116 btrfs_item_key_to_cpu(buf, &key, 0);
6117 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6120 btrfs_commit_transaction(trans, search_root);
6123 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6124 ret = fix_key_order(search_root, &path);
6125 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6126 ret = fix_item_offset(search_root, &path);
6128 btrfs_commit_transaction(trans, search_root);
6131 btrfs_release_path(&path);
6132 btrfs_commit_transaction(trans, search_root);
6135 btrfs_release_path(&path);
6139 static int check_block(struct btrfs_root *root,
6140 struct cache_tree *extent_cache,
6141 struct extent_buffer *buf, u64 flags)
6143 struct extent_record *rec;
6144 struct cache_extent *cache;
6145 struct btrfs_key key;
6146 enum btrfs_tree_block_status status;
6150 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6153 rec = container_of(cache, struct extent_record, cache);
6154 rec->generation = btrfs_header_generation(buf);
6156 level = btrfs_header_level(buf);
6157 if (btrfs_header_nritems(buf) > 0) {
6160 btrfs_item_key_to_cpu(buf, &key, 0);
6162 btrfs_node_key_to_cpu(buf, &key, 0);
6164 rec->info_objectid = key.objectid;
6166 rec->info_level = level;
6168 if (btrfs_is_leaf(buf))
6169 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6171 status = btrfs_check_node(root, &rec->parent_key, buf);
6173 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6175 status = try_to_fix_bad_block(root, buf, status);
6176 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6178 fprintf(stderr, "bad block %llu\n",
6179 (unsigned long long)buf->start);
6182 * Signal to callers we need to start the scan over
6183 * again since we'll have cowed blocks.
6188 rec->content_checked = 1;
6189 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6190 rec->owner_ref_checked = 1;
6192 ret = check_owner_ref(root, rec, buf);
6194 rec->owner_ref_checked = 1;
6198 maybe_free_extent_rec(extent_cache, rec);
6203 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6204 u64 parent, u64 root)
6206 struct list_head *cur = rec->backrefs.next;
6207 struct extent_backref *node;
6208 struct tree_backref *back;
6210 while(cur != &rec->backrefs) {
6211 node = to_extent_backref(cur);
6215 back = to_tree_backref(node);
6217 if (!node->full_backref)
6219 if (parent == back->parent)
6222 if (node->full_backref)
6224 if (back->root == root)
6232 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6233 u64 parent, u64 root)
6235 struct tree_backref *ref = malloc(sizeof(*ref));
6239 memset(&ref->node, 0, sizeof(ref->node));
6241 ref->parent = parent;
6242 ref->node.full_backref = 1;
6245 ref->node.full_backref = 0;
6252 static struct data_backref *find_data_backref(struct extent_record *rec,
6253 u64 parent, u64 root,
6254 u64 owner, u64 offset,
6256 u64 disk_bytenr, u64 bytes)
6258 struct list_head *cur = rec->backrefs.next;
6259 struct extent_backref *node;
6260 struct data_backref *back;
6262 while(cur != &rec->backrefs) {
6263 node = to_extent_backref(cur);
6267 back = to_data_backref(node);
6269 if (!node->full_backref)
6271 if (parent == back->parent)
6274 if (node->full_backref)
6276 if (back->root == root && back->owner == owner &&
6277 back->offset == offset) {
6278 if (found_ref && node->found_ref &&
6279 (back->bytes != bytes ||
6280 back->disk_bytenr != disk_bytenr))
6290 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6291 u64 parent, u64 root,
6292 u64 owner, u64 offset,
6295 struct data_backref *ref = malloc(sizeof(*ref));
6299 memset(&ref->node, 0, sizeof(ref->node));
6300 ref->node.is_data = 1;
6303 ref->parent = parent;
6306 ref->node.full_backref = 1;
6310 ref->offset = offset;
6311 ref->node.full_backref = 0;
6313 ref->bytes = max_size;
6316 if (max_size > rec->max_size)
6317 rec->max_size = max_size;
6321 /* Check if the type of extent matches with its chunk */
6322 static void check_extent_type(struct extent_record *rec)
6324 struct btrfs_block_group_cache *bg_cache;
6326 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6330 /* data extent, check chunk directly*/
6331 if (!rec->metadata) {
6332 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6333 rec->wrong_chunk_type = 1;
6337 /* metadata extent, check the obvious case first */
6338 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6339 BTRFS_BLOCK_GROUP_METADATA))) {
6340 rec->wrong_chunk_type = 1;
6345 * Check SYSTEM extent, as it's also marked as metadata, we can only
6346 * make sure it's a SYSTEM extent by its backref
6348 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6349 struct extent_backref *node;
6350 struct tree_backref *tback;
6353 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6354 if (node->is_data) {
6355 /* tree block shouldn't have data backref */
6356 rec->wrong_chunk_type = 1;
6359 tback = container_of(node, struct tree_backref, node);
6361 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6362 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6364 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6365 if (!(bg_cache->flags & bg_type))
6366 rec->wrong_chunk_type = 1;
6371 * Allocate a new extent record, fill default values from @tmpl and insert int
6372 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6373 * the cache, otherwise it fails.
6375 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6376 struct extent_record *tmpl)
6378 struct extent_record *rec;
6381 BUG_ON(tmpl->max_size == 0);
6382 rec = malloc(sizeof(*rec));
6385 rec->start = tmpl->start;
6386 rec->max_size = tmpl->max_size;
6387 rec->nr = max(tmpl->nr, tmpl->max_size);
6388 rec->found_rec = tmpl->found_rec;
6389 rec->content_checked = tmpl->content_checked;
6390 rec->owner_ref_checked = tmpl->owner_ref_checked;
6391 rec->num_duplicates = 0;
6392 rec->metadata = tmpl->metadata;
6393 rec->flag_block_full_backref = FLAG_UNSET;
6394 rec->bad_full_backref = 0;
6395 rec->crossing_stripes = 0;
6396 rec->wrong_chunk_type = 0;
6397 rec->is_root = tmpl->is_root;
6398 rec->refs = tmpl->refs;
6399 rec->extent_item_refs = tmpl->extent_item_refs;
6400 rec->parent_generation = tmpl->parent_generation;
6401 INIT_LIST_HEAD(&rec->backrefs);
6402 INIT_LIST_HEAD(&rec->dups);
6403 INIT_LIST_HEAD(&rec->list);
6404 rec->backref_tree = RB_ROOT;
6405 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6406 rec->cache.start = tmpl->start;
6407 rec->cache.size = tmpl->nr;
6408 ret = insert_cache_extent(extent_cache, &rec->cache);
6413 bytes_used += rec->nr;
6416 rec->crossing_stripes = check_crossing_stripes(global_info,
6417 rec->start, global_info->nodesize);
6418 check_extent_type(rec);
6423 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6425 * - refs - if found, increase refs
6426 * - is_root - if found, set
6427 * - content_checked - if found, set
6428 * - owner_ref_checked - if found, set
6430 * If not found, create a new one, initialize and insert.
6432 static int add_extent_rec(struct cache_tree *extent_cache,
6433 struct extent_record *tmpl)
6435 struct extent_record *rec;
6436 struct cache_extent *cache;
6440 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6442 rec = container_of(cache, struct extent_record, cache);
6446 rec->nr = max(tmpl->nr, tmpl->max_size);
6449 * We need to make sure to reset nr to whatever the extent
6450 * record says was the real size, this way we can compare it to
6453 if (tmpl->found_rec) {
6454 if (tmpl->start != rec->start || rec->found_rec) {
6455 struct extent_record *tmp;
6458 if (list_empty(&rec->list))
6459 list_add_tail(&rec->list,
6460 &duplicate_extents);
6463 * We have to do this song and dance in case we
6464 * find an extent record that falls inside of
6465 * our current extent record but does not have
6466 * the same objectid.
6468 tmp = malloc(sizeof(*tmp));
6471 tmp->start = tmpl->start;
6472 tmp->max_size = tmpl->max_size;
6475 tmp->metadata = tmpl->metadata;
6476 tmp->extent_item_refs = tmpl->extent_item_refs;
6477 INIT_LIST_HEAD(&tmp->list);
6478 list_add_tail(&tmp->list, &rec->dups);
6479 rec->num_duplicates++;
6486 if (tmpl->extent_item_refs && !dup) {
6487 if (rec->extent_item_refs) {
6488 fprintf(stderr, "block %llu rec "
6489 "extent_item_refs %llu, passed %llu\n",
6490 (unsigned long long)tmpl->start,
6491 (unsigned long long)
6492 rec->extent_item_refs,
6493 (unsigned long long)tmpl->extent_item_refs);
6495 rec->extent_item_refs = tmpl->extent_item_refs;
6499 if (tmpl->content_checked)
6500 rec->content_checked = 1;
6501 if (tmpl->owner_ref_checked)
6502 rec->owner_ref_checked = 1;
6503 memcpy(&rec->parent_key, &tmpl->parent_key,
6504 sizeof(tmpl->parent_key));
6505 if (tmpl->parent_generation)
6506 rec->parent_generation = tmpl->parent_generation;
6507 if (rec->max_size < tmpl->max_size)
6508 rec->max_size = tmpl->max_size;
6511 * A metadata extent can't cross stripe_len boundary, otherwise
6512 * kernel scrub won't be able to handle it.
6513 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6517 rec->crossing_stripes = check_crossing_stripes(
6518 global_info, rec->start,
6519 global_info->nodesize);
6520 check_extent_type(rec);
6521 maybe_free_extent_rec(extent_cache, rec);
6525 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6530 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6531 u64 parent, u64 root, int found_ref)
6533 struct extent_record *rec;
6534 struct tree_backref *back;
6535 struct cache_extent *cache;
6537 bool insert = false;
6539 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6541 struct extent_record tmpl;
6543 memset(&tmpl, 0, sizeof(tmpl));
6544 tmpl.start = bytenr;
6549 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6553 /* really a bug in cache_extent implement now */
6554 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6559 rec = container_of(cache, struct extent_record, cache);
6560 if (rec->start != bytenr) {
6562 * Several cause, from unaligned bytenr to over lapping extents
6567 back = find_tree_backref(rec, parent, root);
6569 back = alloc_tree_backref(rec, parent, root);
6576 if (back->node.found_ref) {
6577 fprintf(stderr, "Extent back ref already exists "
6578 "for %llu parent %llu root %llu \n",
6579 (unsigned long long)bytenr,
6580 (unsigned long long)parent,
6581 (unsigned long long)root);
6583 back->node.found_ref = 1;
6585 if (back->node.found_extent_tree) {
6586 fprintf(stderr, "Extent back ref already exists "
6587 "for %llu parent %llu root %llu \n",
6588 (unsigned long long)bytenr,
6589 (unsigned long long)parent,
6590 (unsigned long long)root);
6592 back->node.found_extent_tree = 1;
6595 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6596 compare_extent_backref));
6597 check_extent_type(rec);
6598 maybe_free_extent_rec(extent_cache, rec);
6602 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6603 u64 parent, u64 root, u64 owner, u64 offset,
6604 u32 num_refs, int found_ref, u64 max_size)
6606 struct extent_record *rec;
6607 struct data_backref *back;
6608 struct cache_extent *cache;
6610 bool insert = false;
6612 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6614 struct extent_record tmpl;
6616 memset(&tmpl, 0, sizeof(tmpl));
6617 tmpl.start = bytenr;
6619 tmpl.max_size = max_size;
6621 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6625 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6630 rec = container_of(cache, struct extent_record, cache);
6631 if (rec->max_size < max_size)
6632 rec->max_size = max_size;
6635 * If found_ref is set then max_size is the real size and must match the
6636 * existing refs. So if we have already found a ref then we need to
6637 * make sure that this ref matches the existing one, otherwise we need
6638 * to add a new backref so we can notice that the backrefs don't match
6639 * and we need to figure out who is telling the truth. This is to
6640 * account for that awful fsync bug I introduced where we'd end up with
6641 * a btrfs_file_extent_item that would have its length include multiple
6642 * prealloc extents or point inside of a prealloc extent.
6644 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6647 back = alloc_data_backref(rec, parent, root, owner, offset,
6654 BUG_ON(num_refs != 1);
6655 if (back->node.found_ref)
6656 BUG_ON(back->bytes != max_size);
6657 back->node.found_ref = 1;
6658 back->found_ref += 1;
6659 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6660 back->bytes = max_size;
6661 back->disk_bytenr = bytenr;
6663 /* Need to reinsert if not already in the tree */
6665 rb_erase(&back->node.node, &rec->backref_tree);
6670 rec->content_checked = 1;
6671 rec->owner_ref_checked = 1;
6673 if (back->node.found_extent_tree) {
6674 fprintf(stderr, "Extent back ref already exists "
6675 "for %llu parent %llu root %llu "
6676 "owner %llu offset %llu num_refs %lu\n",
6677 (unsigned long long)bytenr,
6678 (unsigned long long)parent,
6679 (unsigned long long)root,
6680 (unsigned long long)owner,
6681 (unsigned long long)offset,
6682 (unsigned long)num_refs);
6684 back->num_refs = num_refs;
6685 back->node.found_extent_tree = 1;
6688 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6689 compare_extent_backref));
6691 maybe_free_extent_rec(extent_cache, rec);
6695 static int add_pending(struct cache_tree *pending,
6696 struct cache_tree *seen, u64 bytenr, u32 size)
6699 ret = add_cache_extent(seen, bytenr, size);
6702 add_cache_extent(pending, bytenr, size);
6706 static int pick_next_pending(struct cache_tree *pending,
6707 struct cache_tree *reada,
6708 struct cache_tree *nodes,
6709 u64 last, struct block_info *bits, int bits_nr,
6712 unsigned long node_start = last;
6713 struct cache_extent *cache;
6716 cache = search_cache_extent(reada, 0);
6718 bits[0].start = cache->start;
6719 bits[0].size = cache->size;
6724 if (node_start > 32768)
6725 node_start -= 32768;
6727 cache = search_cache_extent(nodes, node_start);
6729 cache = search_cache_extent(nodes, 0);
6732 cache = search_cache_extent(pending, 0);
6737 bits[ret].start = cache->start;
6738 bits[ret].size = cache->size;
6739 cache = next_cache_extent(cache);
6741 } while (cache && ret < bits_nr);
6747 bits[ret].start = cache->start;
6748 bits[ret].size = cache->size;
6749 cache = next_cache_extent(cache);
6751 } while (cache && ret < bits_nr);
6753 if (bits_nr - ret > 8) {
6754 u64 lookup = bits[0].start + bits[0].size;
6755 struct cache_extent *next;
6756 next = search_cache_extent(pending, lookup);
6758 if (next->start - lookup > 32768)
6760 bits[ret].start = next->start;
6761 bits[ret].size = next->size;
6762 lookup = next->start + next->size;
6766 next = next_cache_extent(next);
6774 static void free_chunk_record(struct cache_extent *cache)
6776 struct chunk_record *rec;
6778 rec = container_of(cache, struct chunk_record, cache);
6779 list_del_init(&rec->list);
6780 list_del_init(&rec->dextents);
6784 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6786 cache_tree_free_extents(chunk_cache, free_chunk_record);
6789 static void free_device_record(struct rb_node *node)
6791 struct device_record *rec;
6793 rec = container_of(node, struct device_record, node);
6797 FREE_RB_BASED_TREE(device_cache, free_device_record);
6799 int insert_block_group_record(struct block_group_tree *tree,
6800 struct block_group_record *bg_rec)
6804 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6808 list_add_tail(&bg_rec->list, &tree->block_groups);
6812 static void free_block_group_record(struct cache_extent *cache)
6814 struct block_group_record *rec;
6816 rec = container_of(cache, struct block_group_record, cache);
6817 list_del_init(&rec->list);
6821 void free_block_group_tree(struct block_group_tree *tree)
6823 cache_tree_free_extents(&tree->tree, free_block_group_record);
6826 int insert_device_extent_record(struct device_extent_tree *tree,
6827 struct device_extent_record *de_rec)
6832 * Device extent is a bit different from the other extents, because
6833 * the extents which belong to the different devices may have the
6834 * same start and size, so we need use the special extent cache
6835 * search/insert functions.
6837 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6841 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6842 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6846 static void free_device_extent_record(struct cache_extent *cache)
6848 struct device_extent_record *rec;
6850 rec = container_of(cache, struct device_extent_record, cache);
6851 if (!list_empty(&rec->chunk_list))
6852 list_del_init(&rec->chunk_list);
6853 if (!list_empty(&rec->device_list))
6854 list_del_init(&rec->device_list);
6858 void free_device_extent_tree(struct device_extent_tree *tree)
6860 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6863 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6864 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6865 struct extent_buffer *leaf, int slot)
6867 struct btrfs_extent_ref_v0 *ref0;
6868 struct btrfs_key key;
6871 btrfs_item_key_to_cpu(leaf, &key, slot);
6872 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6873 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6874 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6877 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6878 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6884 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6885 struct btrfs_key *key,
6888 struct btrfs_chunk *ptr;
6889 struct chunk_record *rec;
6892 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6893 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6895 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6897 fprintf(stderr, "memory allocation failed\n");
6901 INIT_LIST_HEAD(&rec->list);
6902 INIT_LIST_HEAD(&rec->dextents);
6905 rec->cache.start = key->offset;
6906 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6908 rec->generation = btrfs_header_generation(leaf);
6910 rec->objectid = key->objectid;
6911 rec->type = key->type;
6912 rec->offset = key->offset;
6914 rec->length = rec->cache.size;
6915 rec->owner = btrfs_chunk_owner(leaf, ptr);
6916 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6917 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6918 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6919 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6920 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6921 rec->num_stripes = num_stripes;
6922 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6924 for (i = 0; i < rec->num_stripes; ++i) {
6925 rec->stripes[i].devid =
6926 btrfs_stripe_devid_nr(leaf, ptr, i);
6927 rec->stripes[i].offset =
6928 btrfs_stripe_offset_nr(leaf, ptr, i);
6929 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6930 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6937 static int process_chunk_item(struct cache_tree *chunk_cache,
6938 struct btrfs_key *key, struct extent_buffer *eb,
6941 struct chunk_record *rec;
6942 struct btrfs_chunk *chunk;
6945 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6947 * Do extra check for this chunk item,
6949 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6950 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6951 * and owner<->key_type check.
6953 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6956 error("chunk(%llu, %llu) is not valid, ignore it",
6957 key->offset, btrfs_chunk_length(eb, chunk));
6960 rec = btrfs_new_chunk_record(eb, key, slot);
6961 ret = insert_cache_extent(chunk_cache, &rec->cache);
6963 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6964 rec->offset, rec->length);
6971 static int process_device_item(struct rb_root *dev_cache,
6972 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6974 struct btrfs_dev_item *ptr;
6975 struct device_record *rec;
6978 ptr = btrfs_item_ptr(eb,
6979 slot, struct btrfs_dev_item);
6981 rec = malloc(sizeof(*rec));
6983 fprintf(stderr, "memory allocation failed\n");
6987 rec->devid = key->offset;
6988 rec->generation = btrfs_header_generation(eb);
6990 rec->objectid = key->objectid;
6991 rec->type = key->type;
6992 rec->offset = key->offset;
6994 rec->devid = btrfs_device_id(eb, ptr);
6995 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6996 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6998 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7000 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7007 struct block_group_record *
7008 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7011 struct btrfs_block_group_item *ptr;
7012 struct block_group_record *rec;
7014 rec = calloc(1, sizeof(*rec));
7016 fprintf(stderr, "memory allocation failed\n");
7020 rec->cache.start = key->objectid;
7021 rec->cache.size = key->offset;
7023 rec->generation = btrfs_header_generation(leaf);
7025 rec->objectid = key->objectid;
7026 rec->type = key->type;
7027 rec->offset = key->offset;
7029 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7030 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7032 INIT_LIST_HEAD(&rec->list);
7037 static int process_block_group_item(struct block_group_tree *block_group_cache,
7038 struct btrfs_key *key,
7039 struct extent_buffer *eb, int slot)
7041 struct block_group_record *rec;
7044 rec = btrfs_new_block_group_record(eb, key, slot);
7045 ret = insert_block_group_record(block_group_cache, rec);
7047 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7048 rec->objectid, rec->offset);
7055 struct device_extent_record *
7056 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7057 struct btrfs_key *key, int slot)
7059 struct device_extent_record *rec;
7060 struct btrfs_dev_extent *ptr;
7062 rec = calloc(1, sizeof(*rec));
7064 fprintf(stderr, "memory allocation failed\n");
7068 rec->cache.objectid = key->objectid;
7069 rec->cache.start = key->offset;
7071 rec->generation = btrfs_header_generation(leaf);
7073 rec->objectid = key->objectid;
7074 rec->type = key->type;
7075 rec->offset = key->offset;
7077 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7078 rec->chunk_objecteid =
7079 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7081 btrfs_dev_extent_chunk_offset(leaf, ptr);
7082 rec->length = btrfs_dev_extent_length(leaf, ptr);
7083 rec->cache.size = rec->length;
7085 INIT_LIST_HEAD(&rec->chunk_list);
7086 INIT_LIST_HEAD(&rec->device_list);
7092 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7093 struct btrfs_key *key, struct extent_buffer *eb,
7096 struct device_extent_record *rec;
7099 rec = btrfs_new_device_extent_record(eb, key, slot);
7100 ret = insert_device_extent_record(dev_extent_cache, rec);
7103 "Device extent[%llu, %llu, %llu] existed.\n",
7104 rec->objectid, rec->offset, rec->length);
7111 static int process_extent_item(struct btrfs_root *root,
7112 struct cache_tree *extent_cache,
7113 struct extent_buffer *eb, int slot)
7115 struct btrfs_extent_item *ei;
7116 struct btrfs_extent_inline_ref *iref;
7117 struct btrfs_extent_data_ref *dref;
7118 struct btrfs_shared_data_ref *sref;
7119 struct btrfs_key key;
7120 struct extent_record tmpl;
7125 u32 item_size = btrfs_item_size_nr(eb, slot);
7131 btrfs_item_key_to_cpu(eb, &key, slot);
7133 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7135 num_bytes = root->fs_info->nodesize;
7137 num_bytes = key.offset;
7140 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7141 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7142 key.objectid, root->fs_info->sectorsize);
7145 if (item_size < sizeof(*ei)) {
7146 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7147 struct btrfs_extent_item_v0 *ei0;
7148 BUG_ON(item_size != sizeof(*ei0));
7149 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7150 refs = btrfs_extent_refs_v0(eb, ei0);
7154 memset(&tmpl, 0, sizeof(tmpl));
7155 tmpl.start = key.objectid;
7156 tmpl.nr = num_bytes;
7157 tmpl.extent_item_refs = refs;
7158 tmpl.metadata = metadata;
7160 tmpl.max_size = num_bytes;
7162 return add_extent_rec(extent_cache, &tmpl);
7165 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7166 refs = btrfs_extent_refs(eb, ei);
7167 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7171 if (metadata && num_bytes != root->fs_info->nodesize) {
7172 error("ignore invalid metadata extent, length %llu does not equal to %u",
7173 num_bytes, root->fs_info->nodesize);
7176 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7177 error("ignore invalid data extent, length %llu is not aligned to %u",
7178 num_bytes, root->fs_info->sectorsize);
7182 memset(&tmpl, 0, sizeof(tmpl));
7183 tmpl.start = key.objectid;
7184 tmpl.nr = num_bytes;
7185 tmpl.extent_item_refs = refs;
7186 tmpl.metadata = metadata;
7188 tmpl.max_size = num_bytes;
7189 add_extent_rec(extent_cache, &tmpl);
7191 ptr = (unsigned long)(ei + 1);
7192 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7193 key.type == BTRFS_EXTENT_ITEM_KEY)
7194 ptr += sizeof(struct btrfs_tree_block_info);
7196 end = (unsigned long)ei + item_size;
7198 iref = (struct btrfs_extent_inline_ref *)ptr;
7199 type = btrfs_extent_inline_ref_type(eb, iref);
7200 offset = btrfs_extent_inline_ref_offset(eb, iref);
7202 case BTRFS_TREE_BLOCK_REF_KEY:
7203 ret = add_tree_backref(extent_cache, key.objectid,
7207 "add_tree_backref failed (extent items tree block): %s",
7210 case BTRFS_SHARED_BLOCK_REF_KEY:
7211 ret = add_tree_backref(extent_cache, key.objectid,
7215 "add_tree_backref failed (extent items shared block): %s",
7218 case BTRFS_EXTENT_DATA_REF_KEY:
7219 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7220 add_data_backref(extent_cache, key.objectid, 0,
7221 btrfs_extent_data_ref_root(eb, dref),
7222 btrfs_extent_data_ref_objectid(eb,
7224 btrfs_extent_data_ref_offset(eb, dref),
7225 btrfs_extent_data_ref_count(eb, dref),
7228 case BTRFS_SHARED_DATA_REF_KEY:
7229 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7230 add_data_backref(extent_cache, key.objectid, offset,
7232 btrfs_shared_data_ref_count(eb, sref),
7236 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7237 key.objectid, key.type, num_bytes);
7240 ptr += btrfs_extent_inline_ref_size(type);
7247 static int check_cache_range(struct btrfs_root *root,
7248 struct btrfs_block_group_cache *cache,
7249 u64 offset, u64 bytes)
7251 struct btrfs_free_space *entry;
7257 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7258 bytenr = btrfs_sb_offset(i);
7259 ret = btrfs_rmap_block(root->fs_info,
7260 cache->key.objectid, bytenr, 0,
7261 &logical, &nr, &stripe_len);
7266 if (logical[nr] + stripe_len <= offset)
7268 if (offset + bytes <= logical[nr])
7270 if (logical[nr] == offset) {
7271 if (stripe_len >= bytes) {
7275 bytes -= stripe_len;
7276 offset += stripe_len;
7277 } else if (logical[nr] < offset) {
7278 if (logical[nr] + stripe_len >=
7283 bytes = (offset + bytes) -
7284 (logical[nr] + stripe_len);
7285 offset = logical[nr] + stripe_len;
7288 * Could be tricky, the super may land in the
7289 * middle of the area we're checking. First
7290 * check the easiest case, it's at the end.
7292 if (logical[nr] + stripe_len >=
7294 bytes = logical[nr] - offset;
7298 /* Check the left side */
7299 ret = check_cache_range(root, cache,
7301 logical[nr] - offset);
7307 /* Now we continue with the right side */
7308 bytes = (offset + bytes) -
7309 (logical[nr] + stripe_len);
7310 offset = logical[nr] + stripe_len;
7317 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7319 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7320 offset, offset+bytes);
7324 if (entry->offset != offset) {
7325 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7330 if (entry->bytes != bytes) {
7331 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7332 bytes, entry->bytes, offset);
7336 unlink_free_space(cache->free_space_ctl, entry);
7341 static int verify_space_cache(struct btrfs_root *root,
7342 struct btrfs_block_group_cache *cache)
7344 struct btrfs_path path;
7345 struct extent_buffer *leaf;
7346 struct btrfs_key key;
7350 root = root->fs_info->extent_root;
7352 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7354 btrfs_init_path(&path);
7355 key.objectid = last;
7357 key.type = BTRFS_EXTENT_ITEM_KEY;
7358 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7363 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7364 ret = btrfs_next_leaf(root, &path);
7372 leaf = path.nodes[0];
7373 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7374 if (key.objectid >= cache->key.offset + cache->key.objectid)
7376 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7377 key.type != BTRFS_METADATA_ITEM_KEY) {
7382 if (last == key.objectid) {
7383 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7384 last = key.objectid + key.offset;
7386 last = key.objectid + root->fs_info->nodesize;
7391 ret = check_cache_range(root, cache, last,
7392 key.objectid - last);
7395 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7396 last = key.objectid + key.offset;
7398 last = key.objectid + root->fs_info->nodesize;
7402 if (last < cache->key.objectid + cache->key.offset)
7403 ret = check_cache_range(root, cache, last,
7404 cache->key.objectid +
7405 cache->key.offset - last);
7408 btrfs_release_path(&path);
7411 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7412 fprintf(stderr, "There are still entries left in the space "
7420 static int check_space_cache(struct btrfs_root *root)
7422 struct btrfs_block_group_cache *cache;
7423 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7427 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7428 btrfs_super_generation(root->fs_info->super_copy) !=
7429 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7430 printf("cache and super generation don't match, space cache "
7431 "will be invalidated\n");
7435 if (ctx.progress_enabled) {
7436 ctx.tp = TASK_FREE_SPACE;
7437 task_start(ctx.info);
7441 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7445 start = cache->key.objectid + cache->key.offset;
7446 if (!cache->free_space_ctl) {
7447 if (btrfs_init_free_space_ctl(cache,
7448 root->fs_info->sectorsize)) {
7453 btrfs_remove_free_space_cache(cache);
7456 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7457 ret = exclude_super_stripes(root, cache);
7459 fprintf(stderr, "could not exclude super stripes: %s\n",
7464 ret = load_free_space_tree(root->fs_info, cache);
7465 free_excluded_extents(root, cache);
7467 fprintf(stderr, "could not load free space tree: %s\n",
7474 ret = load_free_space_cache(root->fs_info, cache);
7479 ret = verify_space_cache(root, cache);
7481 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7482 cache->key.objectid);
7487 task_stop(ctx.info);
7489 return error ? -EINVAL : 0;
7492 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7493 u64 num_bytes, unsigned long leaf_offset,
7494 struct extent_buffer *eb) {
7496 struct btrfs_fs_info *fs_info = root->fs_info;
7498 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7500 unsigned long csum_offset;
7504 u64 data_checked = 0;
7510 if (num_bytes % fs_info->sectorsize)
7513 data = malloc(num_bytes);
7517 while (offset < num_bytes) {
7520 read_len = num_bytes - offset;
7521 /* read as much space once a time */
7522 ret = read_extent_data(fs_info, data + offset,
7523 bytenr + offset, &read_len, mirror);
7527 /* verify every 4k data's checksum */
7528 while (data_checked < read_len) {
7530 tmp = offset + data_checked;
7532 csum = btrfs_csum_data((char *)data + tmp,
7533 csum, fs_info->sectorsize);
7534 btrfs_csum_final(csum, (u8 *)&csum);
7536 csum_offset = leaf_offset +
7537 tmp / fs_info->sectorsize * csum_size;
7538 read_extent_buffer(eb, (char *)&csum_expected,
7539 csum_offset, csum_size);
7540 /* try another mirror */
7541 if (csum != csum_expected) {
7542 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7543 mirror, bytenr + tmp,
7544 csum, csum_expected);
7545 num_copies = btrfs_num_copies(root->fs_info,
7547 if (mirror < num_copies - 1) {
7552 data_checked += fs_info->sectorsize;
7561 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7564 struct btrfs_path path;
7565 struct extent_buffer *leaf;
7566 struct btrfs_key key;
7569 btrfs_init_path(&path);
7570 key.objectid = bytenr;
7571 key.type = BTRFS_EXTENT_ITEM_KEY;
7572 key.offset = (u64)-1;
7575 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7578 fprintf(stderr, "Error looking up extent record %d\n", ret);
7579 btrfs_release_path(&path);
7582 if (path.slots[0] > 0) {
7585 ret = btrfs_prev_leaf(root, &path);
7588 } else if (ret > 0) {
7595 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7598 * Block group items come before extent items if they have the same
7599 * bytenr, so walk back one more just in case. Dear future traveller,
7600 * first congrats on mastering time travel. Now if it's not too much
7601 * trouble could you go back to 2006 and tell Chris to make the
7602 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7603 * EXTENT_ITEM_KEY please?
7605 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7606 if (path.slots[0] > 0) {
7609 ret = btrfs_prev_leaf(root, &path);
7612 } else if (ret > 0) {
7617 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7621 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7622 ret = btrfs_next_leaf(root, &path);
7624 fprintf(stderr, "Error going to next leaf "
7626 btrfs_release_path(&path);
7632 leaf = path.nodes[0];
7633 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7634 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7638 if (key.objectid + key.offset < bytenr) {
7642 if (key.objectid > bytenr + num_bytes)
7645 if (key.objectid == bytenr) {
7646 if (key.offset >= num_bytes) {
7650 num_bytes -= key.offset;
7651 bytenr += key.offset;
7652 } else if (key.objectid < bytenr) {
7653 if (key.objectid + key.offset >= bytenr + num_bytes) {
7657 num_bytes = (bytenr + num_bytes) -
7658 (key.objectid + key.offset);
7659 bytenr = key.objectid + key.offset;
7661 if (key.objectid + key.offset < bytenr + num_bytes) {
7662 u64 new_start = key.objectid + key.offset;
7663 u64 new_bytes = bytenr + num_bytes - new_start;
7666 * Weird case, the extent is in the middle of
7667 * our range, we'll have to search one side
7668 * and then the other. Not sure if this happens
7669 * in real life, but no harm in coding it up
7670 * anyway just in case.
7672 btrfs_release_path(&path);
7673 ret = check_extent_exists(root, new_start,
7676 fprintf(stderr, "Right section didn't "
7680 num_bytes = key.objectid - bytenr;
7683 num_bytes = key.objectid - bytenr;
7690 if (num_bytes && !ret) {
7691 fprintf(stderr, "There are no extents for csum range "
7692 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7696 btrfs_release_path(&path);
7700 static int check_csums(struct btrfs_root *root)
7702 struct btrfs_path path;
7703 struct extent_buffer *leaf;
7704 struct btrfs_key key;
7705 u64 offset = 0, num_bytes = 0;
7706 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7710 unsigned long leaf_offset;
7712 root = root->fs_info->csum_root;
7713 if (!extent_buffer_uptodate(root->node)) {
7714 fprintf(stderr, "No valid csum tree found\n");
7718 btrfs_init_path(&path);
7719 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7720 key.type = BTRFS_EXTENT_CSUM_KEY;
7722 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7724 fprintf(stderr, "Error searching csum tree %d\n", ret);
7725 btrfs_release_path(&path);
7729 if (ret > 0 && path.slots[0])
7734 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7735 ret = btrfs_next_leaf(root, &path);
7737 fprintf(stderr, "Error going to next leaf "
7744 leaf = path.nodes[0];
7746 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7747 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7752 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7753 csum_size) * root->fs_info->sectorsize;
7754 if (!check_data_csum)
7755 goto skip_csum_check;
7756 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7757 ret = check_extent_csums(root, key.offset, data_len,
7763 offset = key.offset;
7764 } else if (key.offset != offset + num_bytes) {
7765 ret = check_extent_exists(root, offset, num_bytes);
7767 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7768 "there is no extent record\n",
7769 offset, offset+num_bytes);
7772 offset = key.offset;
7775 num_bytes += data_len;
7779 btrfs_release_path(&path);
7783 static int is_dropped_key(struct btrfs_key *key,
7784 struct btrfs_key *drop_key) {
7785 if (key->objectid < drop_key->objectid)
7787 else if (key->objectid == drop_key->objectid) {
7788 if (key->type < drop_key->type)
7790 else if (key->type == drop_key->type) {
7791 if (key->offset < drop_key->offset)
7799 * Here are the rules for FULL_BACKREF.
7801 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7802 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7804 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7805 * if it happened after the relocation occurred since we'll have dropped the
7806 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7807 * have no real way to know for sure.
7809 * We process the blocks one root at a time, and we start from the lowest root
7810 * objectid and go to the highest. So we can just lookup the owner backref for
7811 * the record and if we don't find it then we know it doesn't exist and we have
7814 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7815 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7816 * be set or not and then we can check later once we've gathered all the refs.
7818 static int calc_extent_flag(struct cache_tree *extent_cache,
7819 struct extent_buffer *buf,
7820 struct root_item_record *ri,
7823 struct extent_record *rec;
7824 struct cache_extent *cache;
7825 struct tree_backref *tback;
7828 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7829 /* we have added this extent before */
7833 rec = container_of(cache, struct extent_record, cache);
7836 * Except file/reloc tree, we can not have
7839 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7844 if (buf->start == ri->bytenr)
7847 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7850 owner = btrfs_header_owner(buf);
7851 if (owner == ri->objectid)
7854 tback = find_tree_backref(rec, 0, owner);
7859 if (rec->flag_block_full_backref != FLAG_UNSET &&
7860 rec->flag_block_full_backref != 0)
7861 rec->bad_full_backref = 1;
7864 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7865 if (rec->flag_block_full_backref != FLAG_UNSET &&
7866 rec->flag_block_full_backref != 1)
7867 rec->bad_full_backref = 1;
7871 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7873 fprintf(stderr, "Invalid key type(");
7874 print_key_type(stderr, 0, key_type);
7875 fprintf(stderr, ") found in root(");
7876 print_objectid(stderr, rootid, 0);
7877 fprintf(stderr, ")\n");
7881 * Check if the key is valid with its extent buffer.
7883 * This is a early check in case invalid key exists in a extent buffer
7884 * This is not comprehensive yet, but should prevent wrong key/item passed
7887 static int check_type_with_root(u64 rootid, u8 key_type)
7890 /* Only valid in chunk tree */
7891 case BTRFS_DEV_ITEM_KEY:
7892 case BTRFS_CHUNK_ITEM_KEY:
7893 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7896 /* valid in csum and log tree */
7897 case BTRFS_CSUM_TREE_OBJECTID:
7898 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7902 case BTRFS_EXTENT_ITEM_KEY:
7903 case BTRFS_METADATA_ITEM_KEY:
7904 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7905 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7908 case BTRFS_ROOT_ITEM_KEY:
7909 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7912 case BTRFS_DEV_EXTENT_KEY:
7913 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7919 report_mismatch_key_root(key_type, rootid);
7923 static int run_next_block(struct btrfs_root *root,
7924 struct block_info *bits,
7927 struct cache_tree *pending,
7928 struct cache_tree *seen,
7929 struct cache_tree *reada,
7930 struct cache_tree *nodes,
7931 struct cache_tree *extent_cache,
7932 struct cache_tree *chunk_cache,
7933 struct rb_root *dev_cache,
7934 struct block_group_tree *block_group_cache,
7935 struct device_extent_tree *dev_extent_cache,
7936 struct root_item_record *ri)
7938 struct btrfs_fs_info *fs_info = root->fs_info;
7939 struct extent_buffer *buf;
7940 struct extent_record *rec = NULL;
7951 struct btrfs_key key;
7952 struct cache_extent *cache;
7955 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7956 bits_nr, &reada_bits);
7961 for(i = 0; i < nritems; i++) {
7962 ret = add_cache_extent(reada, bits[i].start,
7967 /* fixme, get the parent transid */
7968 readahead_tree_block(fs_info, bits[i].start, 0);
7971 *last = bits[0].start;
7972 bytenr = bits[0].start;
7973 size = bits[0].size;
7975 cache = lookup_cache_extent(pending, bytenr, size);
7977 remove_cache_extent(pending, cache);
7980 cache = lookup_cache_extent(reada, bytenr, size);
7982 remove_cache_extent(reada, cache);
7985 cache = lookup_cache_extent(nodes, bytenr, size);
7987 remove_cache_extent(nodes, cache);
7990 cache = lookup_cache_extent(extent_cache, bytenr, size);
7992 rec = container_of(cache, struct extent_record, cache);
7993 gen = rec->parent_generation;
7996 /* fixme, get the real parent transid */
7997 buf = read_tree_block(root->fs_info, bytenr, gen);
7998 if (!extent_buffer_uptodate(buf)) {
7999 record_bad_block_io(root->fs_info,
8000 extent_cache, bytenr, size);
8004 nritems = btrfs_header_nritems(buf);
8007 if (!init_extent_tree) {
8008 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8009 btrfs_header_level(buf), 1, NULL,
8012 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8014 fprintf(stderr, "Couldn't calc extent flags\n");
8015 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8020 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8022 fprintf(stderr, "Couldn't calc extent flags\n");
8023 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8027 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8029 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8030 ri->objectid == btrfs_header_owner(buf)) {
8032 * Ok we got to this block from it's original owner and
8033 * we have FULL_BACKREF set. Relocation can leave
8034 * converted blocks over so this is altogether possible,
8035 * however it's not possible if the generation > the
8036 * last snapshot, so check for this case.
8038 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8039 btrfs_header_generation(buf) > ri->last_snapshot) {
8040 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8041 rec->bad_full_backref = 1;
8046 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8047 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8048 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8049 rec->bad_full_backref = 1;
8053 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8054 rec->flag_block_full_backref = 1;
8058 rec->flag_block_full_backref = 0;
8060 owner = btrfs_header_owner(buf);
8063 ret = check_block(root, extent_cache, buf, flags);
8067 if (btrfs_is_leaf(buf)) {
8068 btree_space_waste += btrfs_leaf_free_space(root, buf);
8069 for (i = 0; i < nritems; i++) {
8070 struct btrfs_file_extent_item *fi;
8071 btrfs_item_key_to_cpu(buf, &key, i);
8073 * Check key type against the leaf owner.
8074 * Could filter quite a lot of early error if
8077 if (check_type_with_root(btrfs_header_owner(buf),
8079 fprintf(stderr, "ignoring invalid key\n");
8082 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8083 process_extent_item(root, extent_cache, buf,
8087 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8088 process_extent_item(root, extent_cache, buf,
8092 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8094 btrfs_item_size_nr(buf, i);
8097 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8098 process_chunk_item(chunk_cache, &key, buf, i);
8101 if (key.type == BTRFS_DEV_ITEM_KEY) {
8102 process_device_item(dev_cache, &key, buf, i);
8105 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8106 process_block_group_item(block_group_cache,
8110 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8111 process_device_extent_item(dev_extent_cache,
8116 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8117 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8118 process_extent_ref_v0(extent_cache, buf, i);
8125 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8126 ret = add_tree_backref(extent_cache,
8127 key.objectid, 0, key.offset, 0);
8130 "add_tree_backref failed (leaf tree block): %s",
8134 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8135 ret = add_tree_backref(extent_cache,
8136 key.objectid, key.offset, 0, 0);
8139 "add_tree_backref failed (leaf shared block): %s",
8143 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8144 struct btrfs_extent_data_ref *ref;
8145 ref = btrfs_item_ptr(buf, i,
8146 struct btrfs_extent_data_ref);
8147 add_data_backref(extent_cache,
8149 btrfs_extent_data_ref_root(buf, ref),
8150 btrfs_extent_data_ref_objectid(buf,
8152 btrfs_extent_data_ref_offset(buf, ref),
8153 btrfs_extent_data_ref_count(buf, ref),
8154 0, root->fs_info->sectorsize);
8157 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8158 struct btrfs_shared_data_ref *ref;
8159 ref = btrfs_item_ptr(buf, i,
8160 struct btrfs_shared_data_ref);
8161 add_data_backref(extent_cache,
8162 key.objectid, key.offset, 0, 0, 0,
8163 btrfs_shared_data_ref_count(buf, ref),
8164 0, root->fs_info->sectorsize);
8167 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8168 struct bad_item *bad;
8170 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8174 bad = malloc(sizeof(struct bad_item));
8177 INIT_LIST_HEAD(&bad->list);
8178 memcpy(&bad->key, &key,
8179 sizeof(struct btrfs_key));
8180 bad->root_id = owner;
8181 list_add_tail(&bad->list, &delete_items);
8184 if (key.type != BTRFS_EXTENT_DATA_KEY)
8186 fi = btrfs_item_ptr(buf, i,
8187 struct btrfs_file_extent_item);
8188 if (btrfs_file_extent_type(buf, fi) ==
8189 BTRFS_FILE_EXTENT_INLINE)
8191 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8194 data_bytes_allocated +=
8195 btrfs_file_extent_disk_num_bytes(buf, fi);
8196 if (data_bytes_allocated < root->fs_info->sectorsize) {
8199 data_bytes_referenced +=
8200 btrfs_file_extent_num_bytes(buf, fi);
8201 add_data_backref(extent_cache,
8202 btrfs_file_extent_disk_bytenr(buf, fi),
8203 parent, owner, key.objectid, key.offset -
8204 btrfs_file_extent_offset(buf, fi), 1, 1,
8205 btrfs_file_extent_disk_num_bytes(buf, fi));
8209 struct btrfs_key first_key;
8211 first_key.objectid = 0;
8214 btrfs_item_key_to_cpu(buf, &first_key, 0);
8215 level = btrfs_header_level(buf);
8216 for (i = 0; i < nritems; i++) {
8217 struct extent_record tmpl;
8219 ptr = btrfs_node_blockptr(buf, i);
8220 size = root->fs_info->nodesize;
8221 btrfs_node_key_to_cpu(buf, &key, i);
8223 if ((level == ri->drop_level)
8224 && is_dropped_key(&key, &ri->drop_key)) {
8229 memset(&tmpl, 0, sizeof(tmpl));
8230 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8231 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8236 tmpl.max_size = size;
8237 ret = add_extent_rec(extent_cache, &tmpl);
8241 ret = add_tree_backref(extent_cache, ptr, parent,
8245 "add_tree_backref failed (non-leaf block): %s",
8251 add_pending(nodes, seen, ptr, size);
8253 add_pending(pending, seen, ptr, size);
8256 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8257 nritems) * sizeof(struct btrfs_key_ptr);
8259 total_btree_bytes += buf->len;
8260 if (fs_root_objectid(btrfs_header_owner(buf)))
8261 total_fs_tree_bytes += buf->len;
8262 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8263 total_extent_tree_bytes += buf->len;
8265 free_extent_buffer(buf);
8269 static int add_root_to_pending(struct extent_buffer *buf,
8270 struct cache_tree *extent_cache,
8271 struct cache_tree *pending,
8272 struct cache_tree *seen,
8273 struct cache_tree *nodes,
8276 struct extent_record tmpl;
8279 if (btrfs_header_level(buf) > 0)
8280 add_pending(nodes, seen, buf->start, buf->len);
8282 add_pending(pending, seen, buf->start, buf->len);
8284 memset(&tmpl, 0, sizeof(tmpl));
8285 tmpl.start = buf->start;
8290 tmpl.max_size = buf->len;
8291 add_extent_rec(extent_cache, &tmpl);
8293 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8294 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8295 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8298 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8303 /* as we fix the tree, we might be deleting blocks that
8304 * we're tracking for repair. This hook makes sure we
8305 * remove any backrefs for blocks as we are fixing them.
8307 static int free_extent_hook(struct btrfs_trans_handle *trans,
8308 struct btrfs_root *root,
8309 u64 bytenr, u64 num_bytes, u64 parent,
8310 u64 root_objectid, u64 owner, u64 offset,
8313 struct extent_record *rec;
8314 struct cache_extent *cache;
8316 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8318 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8319 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8323 rec = container_of(cache, struct extent_record, cache);
8325 struct data_backref *back;
8326 back = find_data_backref(rec, parent, root_objectid, owner,
8327 offset, 1, bytenr, num_bytes);
8330 if (back->node.found_ref) {
8331 back->found_ref -= refs_to_drop;
8333 rec->refs -= refs_to_drop;
8335 if (back->node.found_extent_tree) {
8336 back->num_refs -= refs_to_drop;
8337 if (rec->extent_item_refs)
8338 rec->extent_item_refs -= refs_to_drop;
8340 if (back->found_ref == 0)
8341 back->node.found_ref = 0;
8342 if (back->num_refs == 0)
8343 back->node.found_extent_tree = 0;
8345 if (!back->node.found_extent_tree && back->node.found_ref) {
8346 rb_erase(&back->node.node, &rec->backref_tree);
8350 struct tree_backref *back;
8351 back = find_tree_backref(rec, parent, root_objectid);
8354 if (back->node.found_ref) {
8357 back->node.found_ref = 0;
8359 if (back->node.found_extent_tree) {
8360 if (rec->extent_item_refs)
8361 rec->extent_item_refs--;
8362 back->node.found_extent_tree = 0;
8364 if (!back->node.found_extent_tree && back->node.found_ref) {
8365 rb_erase(&back->node.node, &rec->backref_tree);
8369 maybe_free_extent_rec(extent_cache, rec);
8374 static int delete_extent_records(struct btrfs_trans_handle *trans,
8375 struct btrfs_root *root,
8376 struct btrfs_path *path,
8379 struct btrfs_key key;
8380 struct btrfs_key found_key;
8381 struct extent_buffer *leaf;
8386 key.objectid = bytenr;
8388 key.offset = (u64)-1;
8391 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8398 if (path->slots[0] == 0)
8404 leaf = path->nodes[0];
8405 slot = path->slots[0];
8407 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8408 if (found_key.objectid != bytenr)
8411 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8412 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8413 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8414 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8415 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8416 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8417 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8418 btrfs_release_path(path);
8419 if (found_key.type == 0) {
8420 if (found_key.offset == 0)
8422 key.offset = found_key.offset - 1;
8423 key.type = found_key.type;
8425 key.type = found_key.type - 1;
8426 key.offset = (u64)-1;
8430 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8431 found_key.objectid, found_key.type, found_key.offset);
8433 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8436 btrfs_release_path(path);
8438 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8439 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8440 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8441 found_key.offset : root->fs_info->nodesize;
8443 ret = btrfs_update_block_group(trans, root, bytenr,
8450 btrfs_release_path(path);
8455 * for a single backref, this will allocate a new extent
8456 * and add the backref to it.
8458 static int record_extent(struct btrfs_trans_handle *trans,
8459 struct btrfs_fs_info *info,
8460 struct btrfs_path *path,
8461 struct extent_record *rec,
8462 struct extent_backref *back,
8463 int allocated, u64 flags)
8466 struct btrfs_root *extent_root = info->extent_root;
8467 struct extent_buffer *leaf;
8468 struct btrfs_key ins_key;
8469 struct btrfs_extent_item *ei;
8470 struct data_backref *dback;
8471 struct btrfs_tree_block_info *bi;
8474 rec->max_size = max_t(u64, rec->max_size,
8478 u32 item_size = sizeof(*ei);
8481 item_size += sizeof(*bi);
8483 ins_key.objectid = rec->start;
8484 ins_key.offset = rec->max_size;
8485 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8487 ret = btrfs_insert_empty_item(trans, extent_root, path,
8488 &ins_key, item_size);
8492 leaf = path->nodes[0];
8493 ei = btrfs_item_ptr(leaf, path->slots[0],
8494 struct btrfs_extent_item);
8496 btrfs_set_extent_refs(leaf, ei, 0);
8497 btrfs_set_extent_generation(leaf, ei, rec->generation);
8499 if (back->is_data) {
8500 btrfs_set_extent_flags(leaf, ei,
8501 BTRFS_EXTENT_FLAG_DATA);
8503 struct btrfs_disk_key copy_key;;
8505 bi = (struct btrfs_tree_block_info *)(ei + 1);
8506 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8509 btrfs_set_disk_key_objectid(©_key,
8510 rec->info_objectid);
8511 btrfs_set_disk_key_type(©_key, 0);
8512 btrfs_set_disk_key_offset(©_key, 0);
8514 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8515 btrfs_set_tree_block_key(leaf, bi, ©_key);
8517 btrfs_set_extent_flags(leaf, ei,
8518 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8521 btrfs_mark_buffer_dirty(leaf);
8522 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8523 rec->max_size, 1, 0);
8526 btrfs_release_path(path);
8529 if (back->is_data) {
8533 dback = to_data_backref(back);
8534 if (back->full_backref)
8535 parent = dback->parent;
8539 for (i = 0; i < dback->found_ref; i++) {
8540 /* if parent != 0, we're doing a full backref
8541 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8542 * just makes the backref allocator create a data
8545 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8546 rec->start, rec->max_size,
8550 BTRFS_FIRST_FREE_OBJECTID :
8556 fprintf(stderr, "adding new data backref"
8557 " on %llu %s %llu owner %llu"
8558 " offset %llu found %d\n",
8559 (unsigned long long)rec->start,
8560 back->full_backref ?
8562 back->full_backref ?
8563 (unsigned long long)parent :
8564 (unsigned long long)dback->root,
8565 (unsigned long long)dback->owner,
8566 (unsigned long long)dback->offset,
8570 struct tree_backref *tback;
8572 tback = to_tree_backref(back);
8573 if (back->full_backref)
8574 parent = tback->parent;
8578 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8579 rec->start, rec->max_size,
8580 parent, tback->root, 0, 0);
8581 fprintf(stderr, "adding new tree backref on "
8582 "start %llu len %llu parent %llu root %llu\n",
8583 rec->start, rec->max_size, parent, tback->root);
8586 btrfs_release_path(path);
8590 static struct extent_entry *find_entry(struct list_head *entries,
8591 u64 bytenr, u64 bytes)
8593 struct extent_entry *entry = NULL;
8595 list_for_each_entry(entry, entries, list) {
8596 if (entry->bytenr == bytenr && entry->bytes == bytes)
8603 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8605 struct extent_entry *entry, *best = NULL, *prev = NULL;
8607 list_for_each_entry(entry, entries, list) {
8609 * If there are as many broken entries as entries then we know
8610 * not to trust this particular entry.
8612 if (entry->broken == entry->count)
8616 * Special case, when there are only two entries and 'best' is
8626 * If our current entry == best then we can't be sure our best
8627 * is really the best, so we need to keep searching.
8629 if (best && best->count == entry->count) {
8635 /* Prev == entry, not good enough, have to keep searching */
8636 if (!prev->broken && prev->count == entry->count)
8640 best = (prev->count > entry->count) ? prev : entry;
8641 else if (best->count < entry->count)
8649 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8650 struct data_backref *dback, struct extent_entry *entry)
8652 struct btrfs_trans_handle *trans;
8653 struct btrfs_root *root;
8654 struct btrfs_file_extent_item *fi;
8655 struct extent_buffer *leaf;
8656 struct btrfs_key key;
8660 key.objectid = dback->root;
8661 key.type = BTRFS_ROOT_ITEM_KEY;
8662 key.offset = (u64)-1;
8663 root = btrfs_read_fs_root(info, &key);
8665 fprintf(stderr, "Couldn't find root for our ref\n");
8670 * The backref points to the original offset of the extent if it was
8671 * split, so we need to search down to the offset we have and then walk
8672 * forward until we find the backref we're looking for.
8674 key.objectid = dback->owner;
8675 key.type = BTRFS_EXTENT_DATA_KEY;
8676 key.offset = dback->offset;
8677 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8679 fprintf(stderr, "Error looking up ref %d\n", ret);
8684 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8685 ret = btrfs_next_leaf(root, path);
8687 fprintf(stderr, "Couldn't find our ref, next\n");
8691 leaf = path->nodes[0];
8692 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8693 if (key.objectid != dback->owner ||
8694 key.type != BTRFS_EXTENT_DATA_KEY) {
8695 fprintf(stderr, "Couldn't find our ref, search\n");
8698 fi = btrfs_item_ptr(leaf, path->slots[0],
8699 struct btrfs_file_extent_item);
8700 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8701 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8703 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8708 btrfs_release_path(path);
8710 trans = btrfs_start_transaction(root, 1);
8712 return PTR_ERR(trans);
8715 * Ok we have the key of the file extent we want to fix, now we can cow
8716 * down to the thing and fix it.
8718 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8720 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8721 key.objectid, key.type, key.offset, ret);
8725 fprintf(stderr, "Well that's odd, we just found this key "
8726 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8731 leaf = path->nodes[0];
8732 fi = btrfs_item_ptr(leaf, path->slots[0],
8733 struct btrfs_file_extent_item);
8735 if (btrfs_file_extent_compression(leaf, fi) &&
8736 dback->disk_bytenr != entry->bytenr) {
8737 fprintf(stderr, "Ref doesn't match the record start and is "
8738 "compressed, please take a btrfs-image of this file "
8739 "system and send it to a btrfs developer so they can "
8740 "complete this functionality for bytenr %Lu\n",
8741 dback->disk_bytenr);
8746 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8747 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8748 } else if (dback->disk_bytenr > entry->bytenr) {
8749 u64 off_diff, offset;
8751 off_diff = dback->disk_bytenr - entry->bytenr;
8752 offset = btrfs_file_extent_offset(leaf, fi);
8753 if (dback->disk_bytenr + offset +
8754 btrfs_file_extent_num_bytes(leaf, fi) >
8755 entry->bytenr + entry->bytes) {
8756 fprintf(stderr, "Ref is past the entry end, please "
8757 "take a btrfs-image of this file system and "
8758 "send it to a btrfs developer, ref %Lu\n",
8759 dback->disk_bytenr);
8764 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8765 btrfs_set_file_extent_offset(leaf, fi, offset);
8766 } else if (dback->disk_bytenr < entry->bytenr) {
8769 offset = btrfs_file_extent_offset(leaf, fi);
8770 if (dback->disk_bytenr + offset < entry->bytenr) {
8771 fprintf(stderr, "Ref is before the entry start, please"
8772 " take a btrfs-image of this file system and "
8773 "send it to a btrfs developer, ref %Lu\n",
8774 dback->disk_bytenr);
8779 offset += dback->disk_bytenr;
8780 offset -= entry->bytenr;
8781 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8782 btrfs_set_file_extent_offset(leaf, fi, offset);
8785 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8788 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8789 * only do this if we aren't using compression, otherwise it's a
8792 if (!btrfs_file_extent_compression(leaf, fi))
8793 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8795 printf("ram bytes may be wrong?\n");
8796 btrfs_mark_buffer_dirty(leaf);
8798 err = btrfs_commit_transaction(trans, root);
8799 btrfs_release_path(path);
8800 return ret ? ret : err;
8803 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8804 struct extent_record *rec)
8806 struct extent_backref *back, *tmp;
8807 struct data_backref *dback;
8808 struct extent_entry *entry, *best = NULL;
8811 int broken_entries = 0;
8816 * Metadata is easy and the backrefs should always agree on bytenr and
8817 * size, if not we've got bigger issues.
8822 rbtree_postorder_for_each_entry_safe(back, tmp,
8823 &rec->backref_tree, node) {
8824 if (back->full_backref || !back->is_data)
8827 dback = to_data_backref(back);
8830 * We only pay attention to backrefs that we found a real
8833 if (dback->found_ref == 0)
8837 * For now we only catch when the bytes don't match, not the
8838 * bytenr. We can easily do this at the same time, but I want
8839 * to have a fs image to test on before we just add repair
8840 * functionality willy-nilly so we know we won't screw up the
8844 entry = find_entry(&entries, dback->disk_bytenr,
8847 entry = malloc(sizeof(struct extent_entry));
8852 memset(entry, 0, sizeof(*entry));
8853 entry->bytenr = dback->disk_bytenr;
8854 entry->bytes = dback->bytes;
8855 list_add_tail(&entry->list, &entries);
8860 * If we only have on entry we may think the entries agree when
8861 * in reality they don't so we have to do some extra checking.
8863 if (dback->disk_bytenr != rec->start ||
8864 dback->bytes != rec->nr || back->broken)
8875 /* Yay all the backrefs agree, carry on good sir */
8876 if (nr_entries <= 1 && !mismatch)
8879 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8880 "%Lu\n", rec->start);
8883 * First we want to see if the backrefs can agree amongst themselves who
8884 * is right, so figure out which one of the entries has the highest
8887 best = find_most_right_entry(&entries);
8890 * Ok so we may have an even split between what the backrefs think, so
8891 * this is where we use the extent ref to see what it thinks.
8894 entry = find_entry(&entries, rec->start, rec->nr);
8895 if (!entry && (!broken_entries || !rec->found_rec)) {
8896 fprintf(stderr, "Backrefs don't agree with each other "
8897 "and extent record doesn't agree with anybody,"
8898 " so we can't fix bytenr %Lu bytes %Lu\n",
8899 rec->start, rec->nr);
8902 } else if (!entry) {
8904 * Ok our backrefs were broken, we'll assume this is the
8905 * correct value and add an entry for this range.
8907 entry = malloc(sizeof(struct extent_entry));
8912 memset(entry, 0, sizeof(*entry));
8913 entry->bytenr = rec->start;
8914 entry->bytes = rec->nr;
8915 list_add_tail(&entry->list, &entries);
8919 best = find_most_right_entry(&entries);
8921 fprintf(stderr, "Backrefs and extent record evenly "
8922 "split on who is right, this is going to "
8923 "require user input to fix bytenr %Lu bytes "
8924 "%Lu\n", rec->start, rec->nr);
8931 * I don't think this can happen currently as we'll abort() if we catch
8932 * this case higher up, but in case somebody removes that we still can't
8933 * deal with it properly here yet, so just bail out of that's the case.
8935 if (best->bytenr != rec->start) {
8936 fprintf(stderr, "Extent start and backref starts don't match, "
8937 "please use btrfs-image on this file system and send "
8938 "it to a btrfs developer so they can make fsck fix "
8939 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8940 rec->start, rec->nr);
8946 * Ok great we all agreed on an extent record, let's go find the real
8947 * references and fix up the ones that don't match.
8949 rbtree_postorder_for_each_entry_safe(back, tmp,
8950 &rec->backref_tree, node) {
8951 if (back->full_backref || !back->is_data)
8954 dback = to_data_backref(back);
8957 * Still ignoring backrefs that don't have a real ref attached
8960 if (dback->found_ref == 0)
8963 if (dback->bytes == best->bytes &&
8964 dback->disk_bytenr == best->bytenr)
8967 ret = repair_ref(info, path, dback, best);
8973 * Ok we messed with the actual refs, which means we need to drop our
8974 * entire cache and go back and rescan. I know this is a huge pain and
8975 * adds a lot of extra work, but it's the only way to be safe. Once all
8976 * the backrefs agree we may not need to do anything to the extent
8981 while (!list_empty(&entries)) {
8982 entry = list_entry(entries.next, struct extent_entry, list);
8983 list_del_init(&entry->list);
8989 static int process_duplicates(struct cache_tree *extent_cache,
8990 struct extent_record *rec)
8992 struct extent_record *good, *tmp;
8993 struct cache_extent *cache;
8997 * If we found a extent record for this extent then return, or if we
8998 * have more than one duplicate we are likely going to need to delete
9001 if (rec->found_rec || rec->num_duplicates > 1)
9004 /* Shouldn't happen but just in case */
9005 BUG_ON(!rec->num_duplicates);
9008 * So this happens if we end up with a backref that doesn't match the
9009 * actual extent entry. So either the backref is bad or the extent
9010 * entry is bad. Either way we want to have the extent_record actually
9011 * reflect what we found in the extent_tree, so we need to take the
9012 * duplicate out and use that as the extent_record since the only way we
9013 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9015 remove_cache_extent(extent_cache, &rec->cache);
9017 good = to_extent_record(rec->dups.next);
9018 list_del_init(&good->list);
9019 INIT_LIST_HEAD(&good->backrefs);
9020 INIT_LIST_HEAD(&good->dups);
9021 good->cache.start = good->start;
9022 good->cache.size = good->nr;
9023 good->content_checked = 0;
9024 good->owner_ref_checked = 0;
9025 good->num_duplicates = 0;
9026 good->refs = rec->refs;
9027 list_splice_init(&rec->backrefs, &good->backrefs);
9029 cache = lookup_cache_extent(extent_cache, good->start,
9033 tmp = container_of(cache, struct extent_record, cache);
9036 * If we find another overlapping extent and it's found_rec is
9037 * set then it's a duplicate and we need to try and delete
9040 if (tmp->found_rec || tmp->num_duplicates > 0) {
9041 if (list_empty(&good->list))
9042 list_add_tail(&good->list,
9043 &duplicate_extents);
9044 good->num_duplicates += tmp->num_duplicates + 1;
9045 list_splice_init(&tmp->dups, &good->dups);
9046 list_del_init(&tmp->list);
9047 list_add_tail(&tmp->list, &good->dups);
9048 remove_cache_extent(extent_cache, &tmp->cache);
9053 * Ok we have another non extent item backed extent rec, so lets
9054 * just add it to this extent and carry on like we did above.
9056 good->refs += tmp->refs;
9057 list_splice_init(&tmp->backrefs, &good->backrefs);
9058 remove_cache_extent(extent_cache, &tmp->cache);
9061 ret = insert_cache_extent(extent_cache, &good->cache);
9064 return good->num_duplicates ? 0 : 1;
9067 static int delete_duplicate_records(struct btrfs_root *root,
9068 struct extent_record *rec)
9070 struct btrfs_trans_handle *trans;
9071 LIST_HEAD(delete_list);
9072 struct btrfs_path path;
9073 struct extent_record *tmp, *good, *n;
9076 struct btrfs_key key;
9078 btrfs_init_path(&path);
9081 /* Find the record that covers all of the duplicates. */
9082 list_for_each_entry(tmp, &rec->dups, list) {
9083 if (good->start < tmp->start)
9085 if (good->nr > tmp->nr)
9088 if (tmp->start + tmp->nr < good->start + good->nr) {
9089 fprintf(stderr, "Ok we have overlapping extents that "
9090 "aren't completely covered by each other, this "
9091 "is going to require more careful thought. "
9092 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9093 tmp->start, tmp->nr, good->start, good->nr);
9100 list_add_tail(&rec->list, &delete_list);
9102 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9105 list_move_tail(&tmp->list, &delete_list);
9108 root = root->fs_info->extent_root;
9109 trans = btrfs_start_transaction(root, 1);
9110 if (IS_ERR(trans)) {
9111 ret = PTR_ERR(trans);
9115 list_for_each_entry(tmp, &delete_list, list) {
9116 if (tmp->found_rec == 0)
9118 key.objectid = tmp->start;
9119 key.type = BTRFS_EXTENT_ITEM_KEY;
9120 key.offset = tmp->nr;
9122 /* Shouldn't happen but just in case */
9123 if (tmp->metadata) {
9124 fprintf(stderr, "Well this shouldn't happen, extent "
9125 "record overlaps but is metadata? "
9126 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9130 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9136 ret = btrfs_del_item(trans, root, &path);
9139 btrfs_release_path(&path);
9142 err = btrfs_commit_transaction(trans, root);
9146 while (!list_empty(&delete_list)) {
9147 tmp = to_extent_record(delete_list.next);
9148 list_del_init(&tmp->list);
9154 while (!list_empty(&rec->dups)) {
9155 tmp = to_extent_record(rec->dups.next);
9156 list_del_init(&tmp->list);
9160 btrfs_release_path(&path);
9162 if (!ret && !nr_del)
9163 rec->num_duplicates = 0;
9165 return ret ? ret : nr_del;
9168 static int find_possible_backrefs(struct btrfs_fs_info *info,
9169 struct btrfs_path *path,
9170 struct cache_tree *extent_cache,
9171 struct extent_record *rec)
9173 struct btrfs_root *root;
9174 struct extent_backref *back, *tmp;
9175 struct data_backref *dback;
9176 struct cache_extent *cache;
9177 struct btrfs_file_extent_item *fi;
9178 struct btrfs_key key;
9182 rbtree_postorder_for_each_entry_safe(back, tmp,
9183 &rec->backref_tree, node) {
9184 /* Don't care about full backrefs (poor unloved backrefs) */
9185 if (back->full_backref || !back->is_data)
9188 dback = to_data_backref(back);
9190 /* We found this one, we don't need to do a lookup */
9191 if (dback->found_ref)
9194 key.objectid = dback->root;
9195 key.type = BTRFS_ROOT_ITEM_KEY;
9196 key.offset = (u64)-1;
9198 root = btrfs_read_fs_root(info, &key);
9200 /* No root, definitely a bad ref, skip */
9201 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9203 /* Other err, exit */
9205 return PTR_ERR(root);
9207 key.objectid = dback->owner;
9208 key.type = BTRFS_EXTENT_DATA_KEY;
9209 key.offset = dback->offset;
9210 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9212 btrfs_release_path(path);
9215 /* Didn't find it, we can carry on */
9220 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9221 struct btrfs_file_extent_item);
9222 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9223 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9224 btrfs_release_path(path);
9225 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9227 struct extent_record *tmp;
9228 tmp = container_of(cache, struct extent_record, cache);
9231 * If we found an extent record for the bytenr for this
9232 * particular backref then we can't add it to our
9233 * current extent record. We only want to add backrefs
9234 * that don't have a corresponding extent item in the
9235 * extent tree since they likely belong to this record
9236 * and we need to fix it if it doesn't match bytenrs.
9242 dback->found_ref += 1;
9243 dback->disk_bytenr = bytenr;
9244 dback->bytes = bytes;
9247 * Set this so the verify backref code knows not to trust the
9248 * values in this backref.
9257 * Record orphan data ref into corresponding root.
9259 * Return 0 if the extent item contains data ref and recorded.
9260 * Return 1 if the extent item contains no useful data ref
9261 * On that case, it may contains only shared_dataref or metadata backref
9262 * or the file extent exists(this should be handled by the extent bytenr
9264 * Return <0 if something goes wrong.
9266 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9267 struct extent_record *rec)
9269 struct btrfs_key key;
9270 struct btrfs_root *dest_root;
9271 struct extent_backref *back, *tmp;
9272 struct data_backref *dback;
9273 struct orphan_data_extent *orphan;
9274 struct btrfs_path path;
9275 int recorded_data_ref = 0;
9280 btrfs_init_path(&path);
9281 rbtree_postorder_for_each_entry_safe(back, tmp,
9282 &rec->backref_tree, node) {
9283 if (back->full_backref || !back->is_data ||
9284 !back->found_extent_tree)
9286 dback = to_data_backref(back);
9287 if (dback->found_ref)
9289 key.objectid = dback->root;
9290 key.type = BTRFS_ROOT_ITEM_KEY;
9291 key.offset = (u64)-1;
9293 dest_root = btrfs_read_fs_root(fs_info, &key);
9295 /* For non-exist root we just skip it */
9296 if (IS_ERR(dest_root) || !dest_root)
9299 key.objectid = dback->owner;
9300 key.type = BTRFS_EXTENT_DATA_KEY;
9301 key.offset = dback->offset;
9303 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9304 btrfs_release_path(&path);
9306 * For ret < 0, it's OK since the fs-tree may be corrupted,
9307 * we need to record it for inode/file extent rebuild.
9308 * For ret > 0, we record it only for file extent rebuild.
9309 * For ret == 0, the file extent exists but only bytenr
9310 * mismatch, let the original bytenr fix routine to handle,
9316 orphan = malloc(sizeof(*orphan));
9321 INIT_LIST_HEAD(&orphan->list);
9322 orphan->root = dback->root;
9323 orphan->objectid = dback->owner;
9324 orphan->offset = dback->offset;
9325 orphan->disk_bytenr = rec->cache.start;
9326 orphan->disk_len = rec->cache.size;
9327 list_add(&dest_root->orphan_data_extents, &orphan->list);
9328 recorded_data_ref = 1;
9331 btrfs_release_path(&path);
9333 return !recorded_data_ref;
9339 * when an incorrect extent item is found, this will delete
9340 * all of the existing entries for it and recreate them
9341 * based on what the tree scan found.
9343 static int fixup_extent_refs(struct btrfs_fs_info *info,
9344 struct cache_tree *extent_cache,
9345 struct extent_record *rec)
9347 struct btrfs_trans_handle *trans = NULL;
9349 struct btrfs_path path;
9350 struct cache_extent *cache;
9351 struct extent_backref *back, *tmp;
9355 if (rec->flag_block_full_backref)
9356 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9358 btrfs_init_path(&path);
9359 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9361 * Sometimes the backrefs themselves are so broken they don't
9362 * get attached to any meaningful rec, so first go back and
9363 * check any of our backrefs that we couldn't find and throw
9364 * them into the list if we find the backref so that
9365 * verify_backrefs can figure out what to do.
9367 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9372 /* step one, make sure all of the backrefs agree */
9373 ret = verify_backrefs(info, &path, rec);
9377 trans = btrfs_start_transaction(info->extent_root, 1);
9378 if (IS_ERR(trans)) {
9379 ret = PTR_ERR(trans);
9383 /* step two, delete all the existing records */
9384 ret = delete_extent_records(trans, info->extent_root, &path,
9390 /* was this block corrupt? If so, don't add references to it */
9391 cache = lookup_cache_extent(info->corrupt_blocks,
9392 rec->start, rec->max_size);
9398 /* step three, recreate all the refs we did find */
9399 rbtree_postorder_for_each_entry_safe(back, tmp,
9400 &rec->backref_tree, node) {
9402 * if we didn't find any references, don't create a
9405 if (!back->found_ref)
9408 rec->bad_full_backref = 0;
9409 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9417 int err = btrfs_commit_transaction(trans, info->extent_root);
9423 fprintf(stderr, "Repaired extent references for %llu\n",
9424 (unsigned long long)rec->start);
9426 btrfs_release_path(&path);
9430 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9431 struct extent_record *rec)
9433 struct btrfs_trans_handle *trans;
9434 struct btrfs_root *root = fs_info->extent_root;
9435 struct btrfs_path path;
9436 struct btrfs_extent_item *ei;
9437 struct btrfs_key key;
9441 key.objectid = rec->start;
9442 if (rec->metadata) {
9443 key.type = BTRFS_METADATA_ITEM_KEY;
9444 key.offset = rec->info_level;
9446 key.type = BTRFS_EXTENT_ITEM_KEY;
9447 key.offset = rec->max_size;
9450 trans = btrfs_start_transaction(root, 0);
9452 return PTR_ERR(trans);
9454 btrfs_init_path(&path);
9455 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9457 btrfs_release_path(&path);
9458 btrfs_commit_transaction(trans, root);
9461 fprintf(stderr, "Didn't find extent for %llu\n",
9462 (unsigned long long)rec->start);
9463 btrfs_release_path(&path);
9464 btrfs_commit_transaction(trans, root);
9468 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9469 struct btrfs_extent_item);
9470 flags = btrfs_extent_flags(path.nodes[0], ei);
9471 if (rec->flag_block_full_backref) {
9472 fprintf(stderr, "setting full backref on %llu\n",
9473 (unsigned long long)key.objectid);
9474 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9476 fprintf(stderr, "clearing full backref on %llu\n",
9477 (unsigned long long)key.objectid);
9478 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9480 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9481 btrfs_mark_buffer_dirty(path.nodes[0]);
9482 btrfs_release_path(&path);
9483 ret = btrfs_commit_transaction(trans, root);
9485 fprintf(stderr, "Repaired extent flags for %llu\n",
9486 (unsigned long long)rec->start);
9491 /* right now we only prune from the extent allocation tree */
9492 static int prune_one_block(struct btrfs_trans_handle *trans,
9493 struct btrfs_fs_info *info,
9494 struct btrfs_corrupt_block *corrupt)
9497 struct btrfs_path path;
9498 struct extent_buffer *eb;
9502 int level = corrupt->level + 1;
9504 btrfs_init_path(&path);
9506 /* we want to stop at the parent to our busted block */
9507 path.lowest_level = level;
9509 ret = btrfs_search_slot(trans, info->extent_root,
9510 &corrupt->key, &path, -1, 1);
9515 eb = path.nodes[level];
9522 * hopefully the search gave us the block we want to prune,
9523 * lets try that first
9525 slot = path.slots[level];
9526 found = btrfs_node_blockptr(eb, slot);
9527 if (found == corrupt->cache.start)
9530 nritems = btrfs_header_nritems(eb);
9532 /* the search failed, lets scan this node and hope we find it */
9533 for (slot = 0; slot < nritems; slot++) {
9534 found = btrfs_node_blockptr(eb, slot);
9535 if (found == corrupt->cache.start)
9539 * we couldn't find the bad block. TODO, search all the nodes for pointers
9542 if (eb == info->extent_root->node) {
9547 btrfs_release_path(&path);
9552 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9553 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9556 btrfs_release_path(&path);
9560 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9562 struct btrfs_trans_handle *trans = NULL;
9563 struct cache_extent *cache;
9564 struct btrfs_corrupt_block *corrupt;
9567 cache = search_cache_extent(info->corrupt_blocks, 0);
9571 trans = btrfs_start_transaction(info->extent_root, 1);
9573 return PTR_ERR(trans);
9575 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9576 prune_one_block(trans, info, corrupt);
9577 remove_cache_extent(info->corrupt_blocks, cache);
9580 return btrfs_commit_transaction(trans, info->extent_root);
9584 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9586 struct btrfs_block_group_cache *cache;
9591 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9592 &start, &end, EXTENT_DIRTY);
9595 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9600 cache = btrfs_lookup_first_block_group(fs_info, start);
9605 start = cache->key.objectid + cache->key.offset;
9609 static int check_extent_refs(struct btrfs_root *root,
9610 struct cache_tree *extent_cache)
9612 struct extent_record *rec;
9613 struct cache_extent *cache;
9619 * if we're doing a repair, we have to make sure
9620 * we don't allocate from the problem extents.
9621 * In the worst case, this will be all the
9624 cache = search_cache_extent(extent_cache, 0);
9626 rec = container_of(cache, struct extent_record, cache);
9627 set_extent_dirty(root->fs_info->excluded_extents,
9629 rec->start + rec->max_size - 1);
9630 cache = next_cache_extent(cache);
9633 /* pin down all the corrupted blocks too */
9634 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9636 set_extent_dirty(root->fs_info->excluded_extents,
9638 cache->start + cache->size - 1);
9639 cache = next_cache_extent(cache);
9641 prune_corrupt_blocks(root->fs_info);
9642 reset_cached_block_groups(root->fs_info);
9645 reset_cached_block_groups(root->fs_info);
9648 * We need to delete any duplicate entries we find first otherwise we
9649 * could mess up the extent tree when we have backrefs that actually
9650 * belong to a different extent item and not the weird duplicate one.
9652 while (repair && !list_empty(&duplicate_extents)) {
9653 rec = to_extent_record(duplicate_extents.next);
9654 list_del_init(&rec->list);
9656 /* Sometimes we can find a backref before we find an actual
9657 * extent, so we need to process it a little bit to see if there
9658 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9659 * if this is a backref screwup. If we need to delete stuff
9660 * process_duplicates() will return 0, otherwise it will return
9663 if (process_duplicates(extent_cache, rec))
9665 ret = delete_duplicate_records(root, rec);
9669 * delete_duplicate_records will return the number of entries
9670 * deleted, so if it's greater than 0 then we know we actually
9671 * did something and we need to remove.
9684 cache = search_cache_extent(extent_cache, 0);
9687 rec = container_of(cache, struct extent_record, cache);
9688 if (rec->num_duplicates) {
9689 fprintf(stderr, "extent item %llu has multiple extent "
9690 "items\n", (unsigned long long)rec->start);
9694 if (rec->refs != rec->extent_item_refs) {
9695 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9696 (unsigned long long)rec->start,
9697 (unsigned long long)rec->nr);
9698 fprintf(stderr, "extent item %llu, found %llu\n",
9699 (unsigned long long)rec->extent_item_refs,
9700 (unsigned long long)rec->refs);
9701 ret = record_orphan_data_extents(root->fs_info, rec);
9707 if (all_backpointers_checked(rec, 1)) {
9708 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9709 (unsigned long long)rec->start,
9710 (unsigned long long)rec->nr);
9714 if (!rec->owner_ref_checked) {
9715 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9716 (unsigned long long)rec->start,
9717 (unsigned long long)rec->nr);
9722 if (repair && fix) {
9723 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9729 if (rec->bad_full_backref) {
9730 fprintf(stderr, "bad full backref, on [%llu]\n",
9731 (unsigned long long)rec->start);
9733 ret = fixup_extent_flags(root->fs_info, rec);
9741 * Although it's not a extent ref's problem, we reuse this
9742 * routine for error reporting.
9743 * No repair function yet.
9745 if (rec->crossing_stripes) {
9747 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9748 rec->start, rec->start + rec->max_size);
9752 if (rec->wrong_chunk_type) {
9754 "bad extent [%llu, %llu), type mismatch with chunk\n",
9755 rec->start, rec->start + rec->max_size);
9759 remove_cache_extent(extent_cache, cache);
9760 free_all_extent_backrefs(rec);
9761 if (!init_extent_tree && repair && (!cur_err || fix))
9762 clear_extent_dirty(root->fs_info->excluded_extents,
9764 rec->start + rec->max_size - 1);
9769 if (ret && ret != -EAGAIN) {
9770 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9773 struct btrfs_trans_handle *trans;
9775 root = root->fs_info->extent_root;
9776 trans = btrfs_start_transaction(root, 1);
9777 if (IS_ERR(trans)) {
9778 ret = PTR_ERR(trans);
9782 ret = btrfs_fix_block_accounting(trans, root);
9785 ret = btrfs_commit_transaction(trans, root);
9794 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9798 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9799 stripe_size = length;
9800 stripe_size /= num_stripes;
9801 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9802 stripe_size = length * 2;
9803 stripe_size /= num_stripes;
9804 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9805 stripe_size = length;
9806 stripe_size /= (num_stripes - 1);
9807 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9808 stripe_size = length;
9809 stripe_size /= (num_stripes - 2);
9811 stripe_size = length;
9817 * Check the chunk with its block group/dev list ref:
9818 * Return 0 if all refs seems valid.
9819 * Return 1 if part of refs seems valid, need later check for rebuild ref
9820 * like missing block group and needs to search extent tree to rebuild them.
9821 * Return -1 if essential refs are missing and unable to rebuild.
9823 static int check_chunk_refs(struct chunk_record *chunk_rec,
9824 struct block_group_tree *block_group_cache,
9825 struct device_extent_tree *dev_extent_cache,
9828 struct cache_extent *block_group_item;
9829 struct block_group_record *block_group_rec;
9830 struct cache_extent *dev_extent_item;
9831 struct device_extent_record *dev_extent_rec;
9835 int metadump_v2 = 0;
9839 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9842 if (block_group_item) {
9843 block_group_rec = container_of(block_group_item,
9844 struct block_group_record,
9846 if (chunk_rec->length != block_group_rec->offset ||
9847 chunk_rec->offset != block_group_rec->objectid ||
9849 chunk_rec->type_flags != block_group_rec->flags)) {
9852 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9853 chunk_rec->objectid,
9858 chunk_rec->type_flags,
9859 block_group_rec->objectid,
9860 block_group_rec->type,
9861 block_group_rec->offset,
9862 block_group_rec->offset,
9863 block_group_rec->objectid,
9864 block_group_rec->flags);
9867 list_del_init(&block_group_rec->list);
9868 chunk_rec->bg_rec = block_group_rec;
9873 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9874 chunk_rec->objectid,
9879 chunk_rec->type_flags);
9886 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9887 chunk_rec->num_stripes);
9888 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9889 devid = chunk_rec->stripes[i].devid;
9890 offset = chunk_rec->stripes[i].offset;
9891 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9892 devid, offset, length);
9893 if (dev_extent_item) {
9894 dev_extent_rec = container_of(dev_extent_item,
9895 struct device_extent_record,
9897 if (dev_extent_rec->objectid != devid ||
9898 dev_extent_rec->offset != offset ||
9899 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9900 dev_extent_rec->length != length) {
9903 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9904 chunk_rec->objectid,
9907 chunk_rec->stripes[i].devid,
9908 chunk_rec->stripes[i].offset,
9909 dev_extent_rec->objectid,
9910 dev_extent_rec->offset,
9911 dev_extent_rec->length);
9914 list_move(&dev_extent_rec->chunk_list,
9915 &chunk_rec->dextents);
9920 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9921 chunk_rec->objectid,
9924 chunk_rec->stripes[i].devid,
9925 chunk_rec->stripes[i].offset);
9932 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9933 int check_chunks(struct cache_tree *chunk_cache,
9934 struct block_group_tree *block_group_cache,
9935 struct device_extent_tree *dev_extent_cache,
9936 struct list_head *good, struct list_head *bad,
9937 struct list_head *rebuild, int silent)
9939 struct cache_extent *chunk_item;
9940 struct chunk_record *chunk_rec;
9941 struct block_group_record *bg_rec;
9942 struct device_extent_record *dext_rec;
9946 chunk_item = first_cache_extent(chunk_cache);
9947 while (chunk_item) {
9948 chunk_rec = container_of(chunk_item, struct chunk_record,
9950 err = check_chunk_refs(chunk_rec, block_group_cache,
9951 dev_extent_cache, silent);
9954 if (err == 0 && good)
9955 list_add_tail(&chunk_rec->list, good);
9956 if (err > 0 && rebuild)
9957 list_add_tail(&chunk_rec->list, rebuild);
9959 list_add_tail(&chunk_rec->list, bad);
9960 chunk_item = next_cache_extent(chunk_item);
9963 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9966 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9974 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9978 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9989 static int check_device_used(struct device_record *dev_rec,
9990 struct device_extent_tree *dext_cache)
9992 struct cache_extent *cache;
9993 struct device_extent_record *dev_extent_rec;
9996 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9998 dev_extent_rec = container_of(cache,
9999 struct device_extent_record,
10001 if (dev_extent_rec->objectid != dev_rec->devid)
10004 list_del_init(&dev_extent_rec->device_list);
10005 total_byte += dev_extent_rec->length;
10006 cache = next_cache_extent(cache);
10009 if (total_byte != dev_rec->byte_used) {
10011 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10012 total_byte, dev_rec->byte_used, dev_rec->objectid,
10013 dev_rec->type, dev_rec->offset);
10020 /* check btrfs_dev_item -> btrfs_dev_extent */
10021 static int check_devices(struct rb_root *dev_cache,
10022 struct device_extent_tree *dev_extent_cache)
10024 struct rb_node *dev_node;
10025 struct device_record *dev_rec;
10026 struct device_extent_record *dext_rec;
10030 dev_node = rb_first(dev_cache);
10032 dev_rec = container_of(dev_node, struct device_record, node);
10033 err = check_device_used(dev_rec, dev_extent_cache);
10037 dev_node = rb_next(dev_node);
10039 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10042 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10043 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10050 static int add_root_item_to_list(struct list_head *head,
10051 u64 objectid, u64 bytenr, u64 last_snapshot,
10052 u8 level, u8 drop_level,
10053 struct btrfs_key *drop_key)
10056 struct root_item_record *ri_rec;
10057 ri_rec = malloc(sizeof(*ri_rec));
10060 ri_rec->bytenr = bytenr;
10061 ri_rec->objectid = objectid;
10062 ri_rec->level = level;
10063 ri_rec->drop_level = drop_level;
10064 ri_rec->last_snapshot = last_snapshot;
10066 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10067 list_add_tail(&ri_rec->list, head);
10072 static void free_root_item_list(struct list_head *list)
10074 struct root_item_record *ri_rec;
10076 while (!list_empty(list)) {
10077 ri_rec = list_first_entry(list, struct root_item_record,
10079 list_del_init(&ri_rec->list);
10084 static int deal_root_from_list(struct list_head *list,
10085 struct btrfs_root *root,
10086 struct block_info *bits,
10088 struct cache_tree *pending,
10089 struct cache_tree *seen,
10090 struct cache_tree *reada,
10091 struct cache_tree *nodes,
10092 struct cache_tree *extent_cache,
10093 struct cache_tree *chunk_cache,
10094 struct rb_root *dev_cache,
10095 struct block_group_tree *block_group_cache,
10096 struct device_extent_tree *dev_extent_cache)
10101 while (!list_empty(list)) {
10102 struct root_item_record *rec;
10103 struct extent_buffer *buf;
10104 rec = list_entry(list->next,
10105 struct root_item_record, list);
10107 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10108 if (!extent_buffer_uptodate(buf)) {
10109 free_extent_buffer(buf);
10113 ret = add_root_to_pending(buf, extent_cache, pending,
10114 seen, nodes, rec->objectid);
10118 * To rebuild extent tree, we need deal with snapshot
10119 * one by one, otherwise we deal with node firstly which
10120 * can maximize readahead.
10123 ret = run_next_block(root, bits, bits_nr, &last,
10124 pending, seen, reada, nodes,
10125 extent_cache, chunk_cache,
10126 dev_cache, block_group_cache,
10127 dev_extent_cache, rec);
10131 free_extent_buffer(buf);
10132 list_del(&rec->list);
10138 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10139 reada, nodes, extent_cache, chunk_cache,
10140 dev_cache, block_group_cache,
10141 dev_extent_cache, NULL);
10151 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10153 struct rb_root dev_cache;
10154 struct cache_tree chunk_cache;
10155 struct block_group_tree block_group_cache;
10156 struct device_extent_tree dev_extent_cache;
10157 struct cache_tree extent_cache;
10158 struct cache_tree seen;
10159 struct cache_tree pending;
10160 struct cache_tree reada;
10161 struct cache_tree nodes;
10162 struct extent_io_tree excluded_extents;
10163 struct cache_tree corrupt_blocks;
10164 struct btrfs_path path;
10165 struct btrfs_key key;
10166 struct btrfs_key found_key;
10168 struct block_info *bits;
10170 struct extent_buffer *leaf;
10172 struct btrfs_root_item ri;
10173 struct list_head dropping_trees;
10174 struct list_head normal_trees;
10175 struct btrfs_root *root1;
10176 struct btrfs_root *root;
10180 root = fs_info->fs_root;
10181 dev_cache = RB_ROOT;
10182 cache_tree_init(&chunk_cache);
10183 block_group_tree_init(&block_group_cache);
10184 device_extent_tree_init(&dev_extent_cache);
10186 cache_tree_init(&extent_cache);
10187 cache_tree_init(&seen);
10188 cache_tree_init(&pending);
10189 cache_tree_init(&nodes);
10190 cache_tree_init(&reada);
10191 cache_tree_init(&corrupt_blocks);
10192 extent_io_tree_init(&excluded_extents);
10193 INIT_LIST_HEAD(&dropping_trees);
10194 INIT_LIST_HEAD(&normal_trees);
10197 fs_info->excluded_extents = &excluded_extents;
10198 fs_info->fsck_extent_cache = &extent_cache;
10199 fs_info->free_extent_hook = free_extent_hook;
10200 fs_info->corrupt_blocks = &corrupt_blocks;
10204 bits = malloc(bits_nr * sizeof(struct block_info));
10210 if (ctx.progress_enabled) {
10211 ctx.tp = TASK_EXTENTS;
10212 task_start(ctx.info);
10216 root1 = fs_info->tree_root;
10217 level = btrfs_header_level(root1->node);
10218 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10219 root1->node->start, 0, level, 0, NULL);
10222 root1 = fs_info->chunk_root;
10223 level = btrfs_header_level(root1->node);
10224 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10225 root1->node->start, 0, level, 0, NULL);
10228 btrfs_init_path(&path);
10231 key.type = BTRFS_ROOT_ITEM_KEY;
10232 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10236 leaf = path.nodes[0];
10237 slot = path.slots[0];
10238 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10239 ret = btrfs_next_leaf(root, &path);
10242 leaf = path.nodes[0];
10243 slot = path.slots[0];
10245 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10246 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10247 unsigned long offset;
10250 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10251 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10252 last_snapshot = btrfs_root_last_snapshot(&ri);
10253 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10254 level = btrfs_root_level(&ri);
10255 ret = add_root_item_to_list(&normal_trees,
10256 found_key.objectid,
10257 btrfs_root_bytenr(&ri),
10258 last_snapshot, level,
10263 level = btrfs_root_level(&ri);
10264 objectid = found_key.objectid;
10265 btrfs_disk_key_to_cpu(&found_key,
10266 &ri.drop_progress);
10267 ret = add_root_item_to_list(&dropping_trees,
10269 btrfs_root_bytenr(&ri),
10270 last_snapshot, level,
10271 ri.drop_level, &found_key);
10278 btrfs_release_path(&path);
10281 * check_block can return -EAGAIN if it fixes something, please keep
10282 * this in mind when dealing with return values from these functions, if
10283 * we get -EAGAIN we want to fall through and restart the loop.
10285 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10286 &seen, &reada, &nodes, &extent_cache,
10287 &chunk_cache, &dev_cache, &block_group_cache,
10288 &dev_extent_cache);
10290 if (ret == -EAGAIN)
10294 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10295 &pending, &seen, &reada, &nodes,
10296 &extent_cache, &chunk_cache, &dev_cache,
10297 &block_group_cache, &dev_extent_cache);
10299 if (ret == -EAGAIN)
10304 ret = check_chunks(&chunk_cache, &block_group_cache,
10305 &dev_extent_cache, NULL, NULL, NULL, 0);
10307 if (ret == -EAGAIN)
10312 ret = check_extent_refs(root, &extent_cache);
10314 if (ret == -EAGAIN)
10319 ret = check_devices(&dev_cache, &dev_extent_cache);
10324 task_stop(ctx.info);
10326 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10327 extent_io_tree_cleanup(&excluded_extents);
10328 fs_info->fsck_extent_cache = NULL;
10329 fs_info->free_extent_hook = NULL;
10330 fs_info->corrupt_blocks = NULL;
10331 fs_info->excluded_extents = NULL;
10334 free_chunk_cache_tree(&chunk_cache);
10335 free_device_cache_tree(&dev_cache);
10336 free_block_group_tree(&block_group_cache);
10337 free_device_extent_tree(&dev_extent_cache);
10338 free_extent_cache_tree(&seen);
10339 free_extent_cache_tree(&pending);
10340 free_extent_cache_tree(&reada);
10341 free_extent_cache_tree(&nodes);
10342 free_root_item_list(&normal_trees);
10343 free_root_item_list(&dropping_trees);
10346 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10347 free_extent_cache_tree(&seen);
10348 free_extent_cache_tree(&pending);
10349 free_extent_cache_tree(&reada);
10350 free_extent_cache_tree(&nodes);
10351 free_chunk_cache_tree(&chunk_cache);
10352 free_block_group_tree(&block_group_cache);
10353 free_device_cache_tree(&dev_cache);
10354 free_device_extent_tree(&dev_extent_cache);
10355 free_extent_record_cache(&extent_cache);
10356 free_root_item_list(&normal_trees);
10357 free_root_item_list(&dropping_trees);
10358 extent_io_tree_cleanup(&excluded_extents);
10363 * Check backrefs of a tree block given by @bytenr or @eb.
10365 * @root: the root containing the @bytenr or @eb
10366 * @eb: tree block extent buffer, can be NULL
10367 * @bytenr: bytenr of the tree block to search
10368 * @level: tree level of the tree block
10369 * @owner: owner of the tree block
10371 * Return >0 for any error found and output error message
10372 * Return 0 for no error found
10374 static int check_tree_block_ref(struct btrfs_root *root,
10375 struct extent_buffer *eb, u64 bytenr,
10376 int level, u64 owner)
10378 struct btrfs_key key;
10379 struct btrfs_root *extent_root = root->fs_info->extent_root;
10380 struct btrfs_path path;
10381 struct btrfs_extent_item *ei;
10382 struct btrfs_extent_inline_ref *iref;
10383 struct extent_buffer *leaf;
10389 u32 nodesize = root->fs_info->nodesize;
10392 int tree_reloc_root = 0;
10397 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10398 btrfs_header_bytenr(root->node) == bytenr)
10399 tree_reloc_root = 1;
10401 btrfs_init_path(&path);
10402 key.objectid = bytenr;
10403 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10404 key.type = BTRFS_METADATA_ITEM_KEY;
10406 key.type = BTRFS_EXTENT_ITEM_KEY;
10407 key.offset = (u64)-1;
10409 /* Search for the backref in extent tree */
10410 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10412 err |= BACKREF_MISSING;
10415 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10417 err |= BACKREF_MISSING;
10421 leaf = path.nodes[0];
10422 slot = path.slots[0];
10423 btrfs_item_key_to_cpu(leaf, &key, slot);
10425 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10427 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10428 skinny_level = (int)key.offset;
10429 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10431 struct btrfs_tree_block_info *info;
10433 info = (struct btrfs_tree_block_info *)(ei + 1);
10434 skinny_level = btrfs_tree_block_level(leaf, info);
10435 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10442 if (!(btrfs_extent_flags(leaf, ei) &
10443 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10445 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10446 key.objectid, nodesize,
10447 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10448 err = BACKREF_MISMATCH;
10450 header_gen = btrfs_header_generation(eb);
10451 extent_gen = btrfs_extent_generation(leaf, ei);
10452 if (header_gen != extent_gen) {
10454 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10455 key.objectid, nodesize, header_gen,
10457 err = BACKREF_MISMATCH;
10459 if (level != skinny_level) {
10461 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10462 key.objectid, nodesize, level, skinny_level);
10463 err = BACKREF_MISMATCH;
10465 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10467 "extent[%llu %u] is referred by other roots than %llu",
10468 key.objectid, nodesize, root->objectid);
10469 err = BACKREF_MISMATCH;
10474 * Iterate the extent/metadata item to find the exact backref
10476 item_size = btrfs_item_size_nr(leaf, slot);
10477 ptr = (unsigned long)iref;
10478 end = (unsigned long)ei + item_size;
10479 while (ptr < end) {
10480 iref = (struct btrfs_extent_inline_ref *)ptr;
10481 type = btrfs_extent_inline_ref_type(leaf, iref);
10482 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10484 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10485 (offset == root->objectid || offset == owner)) {
10487 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10489 * Backref of tree reloc root points to itself, no need
10490 * to check backref any more.
10492 if (tree_reloc_root)
10495 /* Check if the backref points to valid referencer */
10496 found_ref = !check_tree_block_ref(root, NULL,
10497 offset, level + 1, owner);
10502 ptr += btrfs_extent_inline_ref_size(type);
10506 * Inlined extent item doesn't have what we need, check
10507 * TREE_BLOCK_REF_KEY
10510 btrfs_release_path(&path);
10511 key.objectid = bytenr;
10512 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10513 key.offset = root->objectid;
10515 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10520 err |= BACKREF_MISSING;
10522 btrfs_release_path(&path);
10523 if (eb && (err & BACKREF_MISSING))
10524 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10525 bytenr, nodesize, owner, level);
10530 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10532 * Return >0 any error found and output error message
10533 * Return 0 for no error found
10535 static int check_extent_data_item(struct btrfs_root *root,
10536 struct extent_buffer *eb, int slot)
10538 struct btrfs_file_extent_item *fi;
10539 struct btrfs_path path;
10540 struct btrfs_root *extent_root = root->fs_info->extent_root;
10541 struct btrfs_key fi_key;
10542 struct btrfs_key dbref_key;
10543 struct extent_buffer *leaf;
10544 struct btrfs_extent_item *ei;
10545 struct btrfs_extent_inline_ref *iref;
10546 struct btrfs_extent_data_ref *dref;
10549 u64 disk_num_bytes;
10550 u64 extent_num_bytes;
10557 int found_dbackref = 0;
10561 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10562 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10564 /* Nothing to check for hole and inline data extents */
10565 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10566 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10569 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10570 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10571 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10573 /* Check unaligned disk_num_bytes and num_bytes */
10574 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10576 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10577 fi_key.objectid, fi_key.offset, disk_num_bytes,
10578 root->fs_info->sectorsize);
10579 err |= BYTES_UNALIGNED;
10581 data_bytes_allocated += disk_num_bytes;
10583 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10585 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10586 fi_key.objectid, fi_key.offset, extent_num_bytes,
10587 root->fs_info->sectorsize);
10588 err |= BYTES_UNALIGNED;
10590 data_bytes_referenced += extent_num_bytes;
10592 owner = btrfs_header_owner(eb);
10594 /* Check the extent item of the file extent in extent tree */
10595 btrfs_init_path(&path);
10596 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10597 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10598 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10600 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10604 leaf = path.nodes[0];
10605 slot = path.slots[0];
10606 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10608 extent_flags = btrfs_extent_flags(leaf, ei);
10610 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10612 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10613 disk_bytenr, disk_num_bytes,
10614 BTRFS_EXTENT_FLAG_DATA);
10615 err |= BACKREF_MISMATCH;
10618 /* Check data backref inside that extent item */
10619 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10620 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10621 ptr = (unsigned long)iref;
10622 end = (unsigned long)ei + item_size;
10623 while (ptr < end) {
10624 iref = (struct btrfs_extent_inline_ref *)ptr;
10625 type = btrfs_extent_inline_ref_type(leaf, iref);
10626 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10628 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10629 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10630 if (ref_root == owner || ref_root == root->objectid)
10631 found_dbackref = 1;
10632 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10633 found_dbackref = !check_tree_block_ref(root, NULL,
10634 btrfs_extent_inline_ref_offset(leaf, iref),
10638 if (found_dbackref)
10640 ptr += btrfs_extent_inline_ref_size(type);
10643 if (!found_dbackref) {
10644 btrfs_release_path(&path);
10646 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10647 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10648 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10649 dbref_key.offset = hash_extent_data_ref(root->objectid,
10650 fi_key.objectid, fi_key.offset);
10652 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10653 &dbref_key, &path, 0, 0);
10655 found_dbackref = 1;
10659 btrfs_release_path(&path);
10662 * Neither inlined nor EXTENT_DATA_REF found, try
10663 * SHARED_DATA_REF as last chance.
10665 dbref_key.objectid = disk_bytenr;
10666 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10667 dbref_key.offset = eb->start;
10669 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10670 &dbref_key, &path, 0, 0);
10672 found_dbackref = 1;
10678 if (!found_dbackref)
10679 err |= BACKREF_MISSING;
10680 btrfs_release_path(&path);
10681 if (err & BACKREF_MISSING) {
10682 error("data extent[%llu %llu] backref lost",
10683 disk_bytenr, disk_num_bytes);
10689 * Get real tree block level for the case like shared block
10690 * Return >= 0 as tree level
10691 * Return <0 for error
10693 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10695 struct extent_buffer *eb;
10696 struct btrfs_path path;
10697 struct btrfs_key key;
10698 struct btrfs_extent_item *ei;
10705 /* Search extent tree for extent generation and level */
10706 key.objectid = bytenr;
10707 key.type = BTRFS_METADATA_ITEM_KEY;
10708 key.offset = (u64)-1;
10710 btrfs_init_path(&path);
10711 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10714 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10722 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10723 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10724 struct btrfs_extent_item);
10725 flags = btrfs_extent_flags(path.nodes[0], ei);
10726 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10731 /* Get transid for later read_tree_block() check */
10732 transid = btrfs_extent_generation(path.nodes[0], ei);
10734 /* Get backref level as one source */
10735 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10736 backref_level = key.offset;
10738 struct btrfs_tree_block_info *info;
10740 info = (struct btrfs_tree_block_info *)(ei + 1);
10741 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10743 btrfs_release_path(&path);
10745 /* Get level from tree block as an alternative source */
10746 eb = read_tree_block(fs_info, bytenr, transid);
10747 if (!extent_buffer_uptodate(eb)) {
10748 free_extent_buffer(eb);
10751 header_level = btrfs_header_level(eb);
10752 free_extent_buffer(eb);
10754 if (header_level != backref_level)
10756 return header_level;
10759 btrfs_release_path(&path);
10764 * Check if a tree block backref is valid (points to a valid tree block)
10765 * if level == -1, level will be resolved
10766 * Return >0 for any error found and print error message
10768 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10769 u64 bytenr, int level)
10771 struct btrfs_root *root;
10772 struct btrfs_key key;
10773 struct btrfs_path path;
10774 struct extent_buffer *eb;
10775 struct extent_buffer *node;
10776 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10780 /* Query level for level == -1 special case */
10782 level = query_tree_block_level(fs_info, bytenr);
10784 err |= REFERENCER_MISSING;
10788 key.objectid = root_id;
10789 key.type = BTRFS_ROOT_ITEM_KEY;
10790 key.offset = (u64)-1;
10792 root = btrfs_read_fs_root(fs_info, &key);
10793 if (IS_ERR(root)) {
10794 err |= REFERENCER_MISSING;
10798 /* Read out the tree block to get item/node key */
10799 eb = read_tree_block(fs_info, bytenr, 0);
10800 if (!extent_buffer_uptodate(eb)) {
10801 err |= REFERENCER_MISSING;
10802 free_extent_buffer(eb);
10806 /* Empty tree, no need to check key */
10807 if (!btrfs_header_nritems(eb) && !level) {
10808 free_extent_buffer(eb);
10813 btrfs_node_key_to_cpu(eb, &key, 0);
10815 btrfs_item_key_to_cpu(eb, &key, 0);
10817 free_extent_buffer(eb);
10819 btrfs_init_path(&path);
10820 path.lowest_level = level;
10821 /* Search with the first key, to ensure we can reach it */
10822 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10824 err |= REFERENCER_MISSING;
10828 node = path.nodes[level];
10829 if (btrfs_header_bytenr(node) != bytenr) {
10831 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10832 bytenr, nodesize, bytenr,
10833 btrfs_header_bytenr(node));
10834 err |= REFERENCER_MISMATCH;
10836 if (btrfs_header_level(node) != level) {
10838 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10839 bytenr, nodesize, level,
10840 btrfs_header_level(node));
10841 err |= REFERENCER_MISMATCH;
10845 btrfs_release_path(&path);
10847 if (err & REFERENCER_MISSING) {
10849 error("extent [%llu %d] lost referencer (owner: %llu)",
10850 bytenr, nodesize, root_id);
10853 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10854 bytenr, nodesize, root_id, level);
10861 * Check if tree block @eb is tree reloc root.
10862 * Return 0 if it's not or any problem happens
10863 * Return 1 if it's a tree reloc root
10865 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10866 struct extent_buffer *eb)
10868 struct btrfs_root *tree_reloc_root;
10869 struct btrfs_key key;
10870 u64 bytenr = btrfs_header_bytenr(eb);
10871 u64 owner = btrfs_header_owner(eb);
10874 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10875 key.offset = owner;
10876 key.type = BTRFS_ROOT_ITEM_KEY;
10878 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10879 if (IS_ERR(tree_reloc_root))
10882 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10884 btrfs_free_fs_root(tree_reloc_root);
10889 * Check referencer for shared block backref
10890 * If level == -1, this function will resolve the level.
10892 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10893 u64 parent, u64 bytenr, int level)
10895 struct extent_buffer *eb;
10897 int found_parent = 0;
10900 eb = read_tree_block(fs_info, parent, 0);
10901 if (!extent_buffer_uptodate(eb))
10905 level = query_tree_block_level(fs_info, bytenr);
10909 /* It's possible it's a tree reloc root */
10910 if (parent == bytenr) {
10911 if (is_tree_reloc_root(fs_info, eb))
10916 if (level + 1 != btrfs_header_level(eb))
10919 nr = btrfs_header_nritems(eb);
10920 for (i = 0; i < nr; i++) {
10921 if (bytenr == btrfs_node_blockptr(eb, i)) {
10927 free_extent_buffer(eb);
10928 if (!found_parent) {
10930 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10931 bytenr, fs_info->nodesize, parent, level);
10932 return REFERENCER_MISSING;
10938 * Check referencer for normal (inlined) data ref
10939 * If len == 0, it will be resolved by searching in extent tree
10941 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10942 u64 root_id, u64 objectid, u64 offset,
10943 u64 bytenr, u64 len, u32 count)
10945 struct btrfs_root *root;
10946 struct btrfs_root *extent_root = fs_info->extent_root;
10947 struct btrfs_key key;
10948 struct btrfs_path path;
10949 struct extent_buffer *leaf;
10950 struct btrfs_file_extent_item *fi;
10951 u32 found_count = 0;
10956 key.objectid = bytenr;
10957 key.type = BTRFS_EXTENT_ITEM_KEY;
10958 key.offset = (u64)-1;
10960 btrfs_init_path(&path);
10961 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10964 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10967 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10968 if (key.objectid != bytenr ||
10969 key.type != BTRFS_EXTENT_ITEM_KEY)
10972 btrfs_release_path(&path);
10974 key.objectid = root_id;
10975 key.type = BTRFS_ROOT_ITEM_KEY;
10976 key.offset = (u64)-1;
10977 btrfs_init_path(&path);
10979 root = btrfs_read_fs_root(fs_info, &key);
10983 key.objectid = objectid;
10984 key.type = BTRFS_EXTENT_DATA_KEY;
10986 * It can be nasty as data backref offset is
10987 * file offset - file extent offset, which is smaller or
10988 * equal to original backref offset. The only special case is
10989 * overflow. So we need to special check and do further search.
10991 key.offset = offset & (1ULL << 63) ? 0 : offset;
10993 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10998 * Search afterwards to get correct one
10999 * NOTE: As we must do a comprehensive check on the data backref to
11000 * make sure the dref count also matches, we must iterate all file
11001 * extents for that inode.
11004 leaf = path.nodes[0];
11005 slot = path.slots[0];
11007 if (slot >= btrfs_header_nritems(leaf))
11009 btrfs_item_key_to_cpu(leaf, &key, slot);
11010 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11012 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11014 * Except normal disk bytenr and disk num bytes, we still
11015 * need to do extra check on dbackref offset as
11016 * dbackref offset = file_offset - file_extent_offset
11018 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11019 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11020 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11025 ret = btrfs_next_item(root, &path);
11030 btrfs_release_path(&path);
11031 if (found_count != count) {
11033 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11034 bytenr, len, root_id, objectid, offset, count, found_count);
11035 return REFERENCER_MISSING;
11041 * Check if the referencer of a shared data backref exists
11043 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11044 u64 parent, u64 bytenr)
11046 struct extent_buffer *eb;
11047 struct btrfs_key key;
11048 struct btrfs_file_extent_item *fi;
11050 int found_parent = 0;
11053 eb = read_tree_block(fs_info, parent, 0);
11054 if (!extent_buffer_uptodate(eb))
11057 nr = btrfs_header_nritems(eb);
11058 for (i = 0; i < nr; i++) {
11059 btrfs_item_key_to_cpu(eb, &key, i);
11060 if (key.type != BTRFS_EXTENT_DATA_KEY)
11063 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11064 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11067 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11074 free_extent_buffer(eb);
11075 if (!found_parent) {
11076 error("shared extent %llu referencer lost (parent: %llu)",
11078 return REFERENCER_MISSING;
11084 * This function will check a given extent item, including its backref and
11085 * itself (like crossing stripe boundary and type)
11087 * Since we don't use extent_record anymore, introduce new error bit
11089 static int check_extent_item(struct btrfs_fs_info *fs_info,
11090 struct extent_buffer *eb, int slot)
11092 struct btrfs_extent_item *ei;
11093 struct btrfs_extent_inline_ref *iref;
11094 struct btrfs_extent_data_ref *dref;
11098 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11099 u32 item_size = btrfs_item_size_nr(eb, slot);
11104 struct btrfs_key key;
11108 btrfs_item_key_to_cpu(eb, &key, slot);
11109 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11110 bytes_used += key.offset;
11112 bytes_used += nodesize;
11114 if (item_size < sizeof(*ei)) {
11116 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11117 * old thing when on disk format is still un-determined.
11118 * No need to care about it anymore
11120 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11124 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11125 flags = btrfs_extent_flags(eb, ei);
11127 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11129 if (metadata && check_crossing_stripes(global_info, key.objectid,
11131 error("bad metadata [%llu, %llu) crossing stripe boundary",
11132 key.objectid, key.objectid + nodesize);
11133 err |= CROSSING_STRIPE_BOUNDARY;
11136 ptr = (unsigned long)(ei + 1);
11138 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11139 /* Old EXTENT_ITEM metadata */
11140 struct btrfs_tree_block_info *info;
11142 info = (struct btrfs_tree_block_info *)ptr;
11143 level = btrfs_tree_block_level(eb, info);
11144 ptr += sizeof(struct btrfs_tree_block_info);
11146 /* New METADATA_ITEM */
11147 level = key.offset;
11149 end = (unsigned long)ei + item_size;
11152 /* Reached extent item end normally */
11156 /* Beyond extent item end, wrong item size */
11158 err |= ITEM_SIZE_MISMATCH;
11159 error("extent item at bytenr %llu slot %d has wrong size",
11164 /* Now check every backref in this extent item */
11165 iref = (struct btrfs_extent_inline_ref *)ptr;
11166 type = btrfs_extent_inline_ref_type(eb, iref);
11167 offset = btrfs_extent_inline_ref_offset(eb, iref);
11169 case BTRFS_TREE_BLOCK_REF_KEY:
11170 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11174 case BTRFS_SHARED_BLOCK_REF_KEY:
11175 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11179 case BTRFS_EXTENT_DATA_REF_KEY:
11180 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11181 ret = check_extent_data_backref(fs_info,
11182 btrfs_extent_data_ref_root(eb, dref),
11183 btrfs_extent_data_ref_objectid(eb, dref),
11184 btrfs_extent_data_ref_offset(eb, dref),
11185 key.objectid, key.offset,
11186 btrfs_extent_data_ref_count(eb, dref));
11189 case BTRFS_SHARED_DATA_REF_KEY:
11190 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11194 error("extent[%llu %d %llu] has unknown ref type: %d",
11195 key.objectid, key.type, key.offset, type);
11196 err |= UNKNOWN_TYPE;
11200 ptr += btrfs_extent_inline_ref_size(type);
11208 * Check if a dev extent item is referred correctly by its chunk
11210 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11211 struct extent_buffer *eb, int slot)
11213 struct btrfs_root *chunk_root = fs_info->chunk_root;
11214 struct btrfs_dev_extent *ptr;
11215 struct btrfs_path path;
11216 struct btrfs_key chunk_key;
11217 struct btrfs_key devext_key;
11218 struct btrfs_chunk *chunk;
11219 struct extent_buffer *l;
11223 int found_chunk = 0;
11226 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11227 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11228 length = btrfs_dev_extent_length(eb, ptr);
11230 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11231 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11232 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11234 btrfs_init_path(&path);
11235 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11240 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11241 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11246 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11249 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11250 for (i = 0; i < num_stripes; i++) {
11251 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11252 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11254 if (devid == devext_key.objectid &&
11255 offset == devext_key.offset) {
11261 btrfs_release_path(&path);
11262 if (!found_chunk) {
11264 "device extent[%llu, %llu, %llu] did not find the related chunk",
11265 devext_key.objectid, devext_key.offset, length);
11266 return REFERENCER_MISSING;
11272 * Check if the used space is correct with the dev item
11274 static int check_dev_item(struct btrfs_fs_info *fs_info,
11275 struct extent_buffer *eb, int slot)
11277 struct btrfs_root *dev_root = fs_info->dev_root;
11278 struct btrfs_dev_item *dev_item;
11279 struct btrfs_path path;
11280 struct btrfs_key key;
11281 struct btrfs_dev_extent *ptr;
11287 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11288 dev_id = btrfs_device_id(eb, dev_item);
11289 used = btrfs_device_bytes_used(eb, dev_item);
11291 key.objectid = dev_id;
11292 key.type = BTRFS_DEV_EXTENT_KEY;
11295 btrfs_init_path(&path);
11296 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11298 btrfs_item_key_to_cpu(eb, &key, slot);
11299 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11300 key.objectid, key.type, key.offset);
11301 btrfs_release_path(&path);
11302 return REFERENCER_MISSING;
11305 /* Iterate dev_extents to calculate the used space of a device */
11307 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11310 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11311 if (key.objectid > dev_id)
11313 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11316 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11317 struct btrfs_dev_extent);
11318 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11320 ret = btrfs_next_item(dev_root, &path);
11324 btrfs_release_path(&path);
11326 if (used != total) {
11327 btrfs_item_key_to_cpu(eb, &key, slot);
11329 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11330 total, used, BTRFS_ROOT_TREE_OBJECTID,
11331 BTRFS_DEV_EXTENT_KEY, dev_id);
11332 return ACCOUNTING_MISMATCH;
11338 * Check a block group item with its referener (chunk) and its used space
11339 * with extent/metadata item
11341 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11342 struct extent_buffer *eb, int slot)
11344 struct btrfs_root *extent_root = fs_info->extent_root;
11345 struct btrfs_root *chunk_root = fs_info->chunk_root;
11346 struct btrfs_block_group_item *bi;
11347 struct btrfs_block_group_item bg_item;
11348 struct btrfs_path path;
11349 struct btrfs_key bg_key;
11350 struct btrfs_key chunk_key;
11351 struct btrfs_key extent_key;
11352 struct btrfs_chunk *chunk;
11353 struct extent_buffer *leaf;
11354 struct btrfs_extent_item *ei;
11355 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11363 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11364 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11365 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11366 used = btrfs_block_group_used(&bg_item);
11367 bg_flags = btrfs_block_group_flags(&bg_item);
11369 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11370 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11371 chunk_key.offset = bg_key.objectid;
11373 btrfs_init_path(&path);
11374 /* Search for the referencer chunk */
11375 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11378 "block group[%llu %llu] did not find the related chunk item",
11379 bg_key.objectid, bg_key.offset);
11380 err |= REFERENCER_MISSING;
11382 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11383 struct btrfs_chunk);
11384 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11387 "block group[%llu %llu] related chunk item length does not match",
11388 bg_key.objectid, bg_key.offset);
11389 err |= REFERENCER_MISMATCH;
11392 btrfs_release_path(&path);
11394 /* Search from the block group bytenr */
11395 extent_key.objectid = bg_key.objectid;
11396 extent_key.type = 0;
11397 extent_key.offset = 0;
11399 btrfs_init_path(&path);
11400 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11404 /* Iterate extent tree to account used space */
11406 leaf = path.nodes[0];
11408 /* Search slot can point to the last item beyond leaf nritems */
11409 if (path.slots[0] >= btrfs_header_nritems(leaf))
11412 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11413 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11416 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11417 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11419 if (extent_key.objectid < bg_key.objectid)
11422 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11425 total += extent_key.offset;
11427 ei = btrfs_item_ptr(leaf, path.slots[0],
11428 struct btrfs_extent_item);
11429 flags = btrfs_extent_flags(leaf, ei);
11430 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11431 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11433 "bad extent[%llu, %llu) type mismatch with chunk",
11434 extent_key.objectid,
11435 extent_key.objectid + extent_key.offset);
11436 err |= CHUNK_TYPE_MISMATCH;
11438 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11439 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11440 BTRFS_BLOCK_GROUP_METADATA))) {
11442 "bad extent[%llu, %llu) type mismatch with chunk",
11443 extent_key.objectid,
11444 extent_key.objectid + nodesize);
11445 err |= CHUNK_TYPE_MISMATCH;
11449 ret = btrfs_next_item(extent_root, &path);
11455 btrfs_release_path(&path);
11457 if (total != used) {
11459 "block group[%llu %llu] used %llu but extent items used %llu",
11460 bg_key.objectid, bg_key.offset, used, total);
11461 err |= ACCOUNTING_MISMATCH;
11467 * Check a chunk item.
11468 * Including checking all referred dev_extents and block group
11470 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11471 struct extent_buffer *eb, int slot)
11473 struct btrfs_root *extent_root = fs_info->extent_root;
11474 struct btrfs_root *dev_root = fs_info->dev_root;
11475 struct btrfs_path path;
11476 struct btrfs_key chunk_key;
11477 struct btrfs_key bg_key;
11478 struct btrfs_key devext_key;
11479 struct btrfs_chunk *chunk;
11480 struct extent_buffer *leaf;
11481 struct btrfs_block_group_item *bi;
11482 struct btrfs_block_group_item bg_item;
11483 struct btrfs_dev_extent *ptr;
11495 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11496 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11497 length = btrfs_chunk_length(eb, chunk);
11498 chunk_end = chunk_key.offset + length;
11499 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11502 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11504 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11507 type = btrfs_chunk_type(eb, chunk);
11509 bg_key.objectid = chunk_key.offset;
11510 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11511 bg_key.offset = length;
11513 btrfs_init_path(&path);
11514 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11517 "chunk[%llu %llu) did not find the related block group item",
11518 chunk_key.offset, chunk_end);
11519 err |= REFERENCER_MISSING;
11521 leaf = path.nodes[0];
11522 bi = btrfs_item_ptr(leaf, path.slots[0],
11523 struct btrfs_block_group_item);
11524 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11526 if (btrfs_block_group_flags(&bg_item) != type) {
11528 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11529 chunk_key.offset, chunk_end, type,
11530 btrfs_block_group_flags(&bg_item));
11531 err |= REFERENCER_MISSING;
11535 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11536 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11537 for (i = 0; i < num_stripes; i++) {
11538 btrfs_release_path(&path);
11539 btrfs_init_path(&path);
11540 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11541 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11542 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11544 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11547 goto not_match_dev;
11549 leaf = path.nodes[0];
11550 ptr = btrfs_item_ptr(leaf, path.slots[0],
11551 struct btrfs_dev_extent);
11552 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11553 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11554 if (objectid != chunk_key.objectid ||
11555 offset != chunk_key.offset ||
11556 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11557 goto not_match_dev;
11560 err |= BACKREF_MISSING;
11562 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11563 chunk_key.objectid, chunk_end, i);
11566 btrfs_release_path(&path);
11572 * Main entry function to check known items and update related accounting info
11574 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11576 struct btrfs_fs_info *fs_info = root->fs_info;
11577 struct btrfs_key key;
11580 struct btrfs_extent_data_ref *dref;
11585 btrfs_item_key_to_cpu(eb, &key, slot);
11589 case BTRFS_EXTENT_DATA_KEY:
11590 ret = check_extent_data_item(root, eb, slot);
11593 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11594 ret = check_block_group_item(fs_info, eb, slot);
11597 case BTRFS_DEV_ITEM_KEY:
11598 ret = check_dev_item(fs_info, eb, slot);
11601 case BTRFS_CHUNK_ITEM_KEY:
11602 ret = check_chunk_item(fs_info, eb, slot);
11605 case BTRFS_DEV_EXTENT_KEY:
11606 ret = check_dev_extent_item(fs_info, eb, slot);
11609 case BTRFS_EXTENT_ITEM_KEY:
11610 case BTRFS_METADATA_ITEM_KEY:
11611 ret = check_extent_item(fs_info, eb, slot);
11614 case BTRFS_EXTENT_CSUM_KEY:
11615 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11617 case BTRFS_TREE_BLOCK_REF_KEY:
11618 ret = check_tree_block_backref(fs_info, key.offset,
11622 case BTRFS_EXTENT_DATA_REF_KEY:
11623 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11624 ret = check_extent_data_backref(fs_info,
11625 btrfs_extent_data_ref_root(eb, dref),
11626 btrfs_extent_data_ref_objectid(eb, dref),
11627 btrfs_extent_data_ref_offset(eb, dref),
11629 btrfs_extent_data_ref_count(eb, dref));
11632 case BTRFS_SHARED_BLOCK_REF_KEY:
11633 ret = check_shared_block_backref(fs_info, key.offset,
11637 case BTRFS_SHARED_DATA_REF_KEY:
11638 ret = check_shared_data_backref(fs_info, key.offset,
11646 if (++slot < btrfs_header_nritems(eb))
11653 * Helper function for later fs/subvol tree check. To determine if a tree
11654 * block should be checked.
11655 * This function will ensure only the direct referencer with lowest rootid to
11656 * check a fs/subvolume tree block.
11658 * Backref check at extent tree would detect errors like missing subvolume
11659 * tree, so we can do aggressive check to reduce duplicated checks.
11661 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11663 struct btrfs_root *extent_root = root->fs_info->extent_root;
11664 struct btrfs_key key;
11665 struct btrfs_path path;
11666 struct extent_buffer *leaf;
11668 struct btrfs_extent_item *ei;
11674 struct btrfs_extent_inline_ref *iref;
11677 btrfs_init_path(&path);
11678 key.objectid = btrfs_header_bytenr(eb);
11679 key.type = BTRFS_METADATA_ITEM_KEY;
11680 key.offset = (u64)-1;
11683 * Any failure in backref resolving means we can't determine
11684 * whom the tree block belongs to.
11685 * So in that case, we need to check that tree block
11687 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11691 ret = btrfs_previous_extent_item(extent_root, &path,
11692 btrfs_header_bytenr(eb));
11696 leaf = path.nodes[0];
11697 slot = path.slots[0];
11698 btrfs_item_key_to_cpu(leaf, &key, slot);
11699 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11701 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11702 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11704 struct btrfs_tree_block_info *info;
11706 info = (struct btrfs_tree_block_info *)(ei + 1);
11707 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11710 item_size = btrfs_item_size_nr(leaf, slot);
11711 ptr = (unsigned long)iref;
11712 end = (unsigned long)ei + item_size;
11713 while (ptr < end) {
11714 iref = (struct btrfs_extent_inline_ref *)ptr;
11715 type = btrfs_extent_inline_ref_type(leaf, iref);
11716 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11719 * We only check the tree block if current root is
11720 * the lowest referencer of it.
11722 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11723 offset < root->objectid) {
11724 btrfs_release_path(&path);
11728 ptr += btrfs_extent_inline_ref_size(type);
11731 * Normally we should also check keyed tree block ref, but that may be
11732 * very time consuming. Inlined ref should already make us skip a lot
11733 * of refs now. So skip search keyed tree block ref.
11737 btrfs_release_path(&path);
11742 * Traversal function for tree block. We will do:
11743 * 1) Skip shared fs/subvolume tree blocks
11744 * 2) Update related bytes accounting
11745 * 3) Pre-order traversal
11747 static int traverse_tree_block(struct btrfs_root *root,
11748 struct extent_buffer *node)
11750 struct extent_buffer *eb;
11751 struct btrfs_key key;
11752 struct btrfs_key drop_key;
11760 * Skip shared fs/subvolume tree block, in that case they will
11761 * be checked by referencer with lowest rootid
11763 if (is_fstree(root->objectid) && !should_check(root, node))
11766 /* Update bytes accounting */
11767 total_btree_bytes += node->len;
11768 if (fs_root_objectid(btrfs_header_owner(node)))
11769 total_fs_tree_bytes += node->len;
11770 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11771 total_extent_tree_bytes += node->len;
11773 /* pre-order tranversal, check itself first */
11774 level = btrfs_header_level(node);
11775 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11776 btrfs_header_level(node),
11777 btrfs_header_owner(node));
11781 "check %s failed root %llu bytenr %llu level %d, force continue check",
11782 level ? "node":"leaf", root->objectid,
11783 btrfs_header_bytenr(node), btrfs_header_level(node));
11786 btree_space_waste += btrfs_leaf_free_space(root, node);
11787 ret = check_leaf_items(root, node);
11792 nr = btrfs_header_nritems(node);
11793 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11794 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11795 sizeof(struct btrfs_key_ptr);
11797 /* Then check all its children */
11798 for (i = 0; i < nr; i++) {
11799 u64 blocknr = btrfs_node_blockptr(node, i);
11801 btrfs_node_key_to_cpu(node, &key, i);
11802 if (level == root->root_item.drop_level &&
11803 is_dropped_key(&key, &drop_key))
11807 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11808 * to call the function itself.
11810 eb = read_tree_block(root->fs_info, blocknr, 0);
11811 if (extent_buffer_uptodate(eb)) {
11812 ret = traverse_tree_block(root, eb);
11815 free_extent_buffer(eb);
11822 * Low memory usage version check_chunks_and_extents.
11824 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11826 struct btrfs_path path;
11827 struct btrfs_key key;
11828 struct btrfs_root *root1;
11829 struct btrfs_root *root;
11830 struct btrfs_root *cur_root;
11834 root = fs_info->fs_root;
11836 root1 = root->fs_info->chunk_root;
11837 ret = traverse_tree_block(root1, root1->node);
11840 root1 = root->fs_info->tree_root;
11841 ret = traverse_tree_block(root1, root1->node);
11844 btrfs_init_path(&path);
11845 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11847 key.type = BTRFS_ROOT_ITEM_KEY;
11849 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11851 error("cannot find extent treet in tree_root");
11856 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11857 if (key.type != BTRFS_ROOT_ITEM_KEY)
11859 key.offset = (u64)-1;
11861 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11862 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11865 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11866 if (IS_ERR(cur_root) || !cur_root) {
11867 error("failed to read tree: %lld", key.objectid);
11871 ret = traverse_tree_block(cur_root, cur_root->node);
11874 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11875 btrfs_free_fs_root(cur_root);
11877 ret = btrfs_next_item(root1, &path);
11883 btrfs_release_path(&path);
11887 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11891 if (!ctx.progress_enabled)
11892 fprintf(stderr, "checking extents\n");
11893 if (check_mode == CHECK_MODE_LOWMEM)
11894 ret = check_chunks_and_extents_v2(fs_info);
11896 ret = check_chunks_and_extents(fs_info);
11901 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11902 struct btrfs_root *root, int overwrite)
11904 struct extent_buffer *c;
11905 struct extent_buffer *old = root->node;
11908 struct btrfs_disk_key disk_key = {0,0,0};
11914 extent_buffer_get(c);
11917 c = btrfs_alloc_free_block(trans, root,
11918 root->fs_info->nodesize,
11919 root->root_key.objectid,
11920 &disk_key, level, 0, 0);
11923 extent_buffer_get(c);
11927 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11928 btrfs_set_header_level(c, level);
11929 btrfs_set_header_bytenr(c, c->start);
11930 btrfs_set_header_generation(c, trans->transid);
11931 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11932 btrfs_set_header_owner(c, root->root_key.objectid);
11934 write_extent_buffer(c, root->fs_info->fsid,
11935 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11937 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11938 btrfs_header_chunk_tree_uuid(c),
11941 btrfs_mark_buffer_dirty(c);
11943 * this case can happen in the following case:
11945 * 1.overwrite previous root.
11947 * 2.reinit reloc data root, this is because we skip pin
11948 * down reloc data tree before which means we can allocate
11949 * same block bytenr here.
11951 if (old->start == c->start) {
11952 btrfs_set_root_generation(&root->root_item,
11954 root->root_item.level = btrfs_header_level(root->node);
11955 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11956 &root->root_key, &root->root_item);
11958 free_extent_buffer(c);
11962 free_extent_buffer(old);
11964 add_root_to_dirty_list(root);
11968 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11969 struct extent_buffer *eb, int tree_root)
11971 struct extent_buffer *tmp;
11972 struct btrfs_root_item *ri;
11973 struct btrfs_key key;
11975 int level = btrfs_header_level(eb);
11981 * If we have pinned this block before, don't pin it again.
11982 * This can not only avoid forever loop with broken filesystem
11983 * but also give us some speedups.
11985 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11986 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11989 btrfs_pin_extent(fs_info, eb->start, eb->len);
11991 nritems = btrfs_header_nritems(eb);
11992 for (i = 0; i < nritems; i++) {
11994 btrfs_item_key_to_cpu(eb, &key, i);
11995 if (key.type != BTRFS_ROOT_ITEM_KEY)
11997 /* Skip the extent root and reloc roots */
11998 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11999 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12000 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12002 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12003 bytenr = btrfs_disk_root_bytenr(eb, ri);
12006 * If at any point we start needing the real root we
12007 * will have to build a stump root for the root we are
12008 * in, but for now this doesn't actually use the root so
12009 * just pass in extent_root.
12011 tmp = read_tree_block(fs_info, bytenr, 0);
12012 if (!extent_buffer_uptodate(tmp)) {
12013 fprintf(stderr, "Error reading root block\n");
12016 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12017 free_extent_buffer(tmp);
12021 bytenr = btrfs_node_blockptr(eb, i);
12023 /* If we aren't the tree root don't read the block */
12024 if (level == 1 && !tree_root) {
12025 btrfs_pin_extent(fs_info, bytenr,
12026 fs_info->nodesize);
12030 tmp = read_tree_block(fs_info, bytenr, 0);
12031 if (!extent_buffer_uptodate(tmp)) {
12032 fprintf(stderr, "Error reading tree block\n");
12035 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12036 free_extent_buffer(tmp);
12045 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12049 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12053 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12056 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12058 struct btrfs_block_group_cache *cache;
12059 struct btrfs_path path;
12060 struct extent_buffer *leaf;
12061 struct btrfs_chunk *chunk;
12062 struct btrfs_key key;
12066 btrfs_init_path(&path);
12068 key.type = BTRFS_CHUNK_ITEM_KEY;
12070 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12072 btrfs_release_path(&path);
12077 * We do this in case the block groups were screwed up and had alloc
12078 * bits that aren't actually set on the chunks. This happens with
12079 * restored images every time and could happen in real life I guess.
12081 fs_info->avail_data_alloc_bits = 0;
12082 fs_info->avail_metadata_alloc_bits = 0;
12083 fs_info->avail_system_alloc_bits = 0;
12085 /* First we need to create the in-memory block groups */
12087 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12088 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12090 btrfs_release_path(&path);
12098 leaf = path.nodes[0];
12099 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12100 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12105 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12106 btrfs_add_block_group(fs_info, 0,
12107 btrfs_chunk_type(leaf, chunk),
12108 key.objectid, key.offset,
12109 btrfs_chunk_length(leaf, chunk));
12110 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12111 key.offset + btrfs_chunk_length(leaf, chunk));
12116 cache = btrfs_lookup_first_block_group(fs_info, start);
12120 start = cache->key.objectid + cache->key.offset;
12123 btrfs_release_path(&path);
12127 static int reset_balance(struct btrfs_trans_handle *trans,
12128 struct btrfs_fs_info *fs_info)
12130 struct btrfs_root *root = fs_info->tree_root;
12131 struct btrfs_path path;
12132 struct extent_buffer *leaf;
12133 struct btrfs_key key;
12134 int del_slot, del_nr = 0;
12138 btrfs_init_path(&path);
12139 key.objectid = BTRFS_BALANCE_OBJECTID;
12140 key.type = BTRFS_BALANCE_ITEM_KEY;
12142 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12147 goto reinit_data_reloc;
12152 ret = btrfs_del_item(trans, root, &path);
12155 btrfs_release_path(&path);
12157 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12158 key.type = BTRFS_ROOT_ITEM_KEY;
12160 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12164 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12169 ret = btrfs_del_items(trans, root, &path,
12176 btrfs_release_path(&path);
12179 ret = btrfs_search_slot(trans, root, &key, &path,
12186 leaf = path.nodes[0];
12187 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12188 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12190 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12195 del_slot = path.slots[0];
12204 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12208 btrfs_release_path(&path);
12211 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12212 key.type = BTRFS_ROOT_ITEM_KEY;
12213 key.offset = (u64)-1;
12214 root = btrfs_read_fs_root(fs_info, &key);
12215 if (IS_ERR(root)) {
12216 fprintf(stderr, "Error reading data reloc tree\n");
12217 ret = PTR_ERR(root);
12220 record_root_in_trans(trans, root);
12221 ret = btrfs_fsck_reinit_root(trans, root, 0);
12224 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12226 btrfs_release_path(&path);
12230 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12231 struct btrfs_fs_info *fs_info)
12237 * The only reason we don't do this is because right now we're just
12238 * walking the trees we find and pinning down their bytes, we don't look
12239 * at any of the leaves. In order to do mixed groups we'd have to check
12240 * the leaves of any fs roots and pin down the bytes for any file
12241 * extents we find. Not hard but why do it if we don't have to?
12243 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12244 fprintf(stderr, "We don't support re-initing the extent tree "
12245 "for mixed block groups yet, please notify a btrfs "
12246 "developer you want to do this so they can add this "
12247 "functionality.\n");
12252 * first we need to walk all of the trees except the extent tree and pin
12253 * down the bytes that are in use so we don't overwrite any existing
12256 ret = pin_metadata_blocks(fs_info);
12258 fprintf(stderr, "error pinning down used bytes\n");
12263 * Need to drop all the block groups since we're going to recreate all
12266 btrfs_free_block_groups(fs_info);
12267 ret = reset_block_groups(fs_info);
12269 fprintf(stderr, "error resetting the block groups\n");
12273 /* Ok we can allocate now, reinit the extent root */
12274 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12276 fprintf(stderr, "extent root initialization failed\n");
12278 * When the transaction code is updated we should end the
12279 * transaction, but for now progs only knows about commit so
12280 * just return an error.
12286 * Now we have all the in-memory block groups setup so we can make
12287 * allocations properly, and the metadata we care about is safe since we
12288 * pinned all of it above.
12291 struct btrfs_block_group_cache *cache;
12293 cache = btrfs_lookup_first_block_group(fs_info, start);
12296 start = cache->key.objectid + cache->key.offset;
12297 ret = btrfs_insert_item(trans, fs_info->extent_root,
12298 &cache->key, &cache->item,
12299 sizeof(cache->item));
12301 fprintf(stderr, "Error adding block group\n");
12304 btrfs_extent_post_op(trans, fs_info->extent_root);
12307 ret = reset_balance(trans, fs_info);
12309 fprintf(stderr, "error resetting the pending balance\n");
12314 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12316 struct btrfs_path path;
12317 struct btrfs_trans_handle *trans;
12318 struct btrfs_key key;
12321 printf("Recowing metadata block %llu\n", eb->start);
12322 key.objectid = btrfs_header_owner(eb);
12323 key.type = BTRFS_ROOT_ITEM_KEY;
12324 key.offset = (u64)-1;
12326 root = btrfs_read_fs_root(root->fs_info, &key);
12327 if (IS_ERR(root)) {
12328 fprintf(stderr, "Couldn't find owner root %llu\n",
12330 return PTR_ERR(root);
12333 trans = btrfs_start_transaction(root, 1);
12335 return PTR_ERR(trans);
12337 btrfs_init_path(&path);
12338 path.lowest_level = btrfs_header_level(eb);
12339 if (path.lowest_level)
12340 btrfs_node_key_to_cpu(eb, &key, 0);
12342 btrfs_item_key_to_cpu(eb, &key, 0);
12344 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12345 btrfs_commit_transaction(trans, root);
12346 btrfs_release_path(&path);
12350 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12352 struct btrfs_path path;
12353 struct btrfs_trans_handle *trans;
12354 struct btrfs_key key;
12357 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12358 bad->key.type, bad->key.offset);
12359 key.objectid = bad->root_id;
12360 key.type = BTRFS_ROOT_ITEM_KEY;
12361 key.offset = (u64)-1;
12363 root = btrfs_read_fs_root(root->fs_info, &key);
12364 if (IS_ERR(root)) {
12365 fprintf(stderr, "Couldn't find owner root %llu\n",
12367 return PTR_ERR(root);
12370 trans = btrfs_start_transaction(root, 1);
12372 return PTR_ERR(trans);
12374 btrfs_init_path(&path);
12375 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12381 ret = btrfs_del_item(trans, root, &path);
12383 btrfs_commit_transaction(trans, root);
12384 btrfs_release_path(&path);
12388 static int zero_log_tree(struct btrfs_root *root)
12390 struct btrfs_trans_handle *trans;
12393 trans = btrfs_start_transaction(root, 1);
12394 if (IS_ERR(trans)) {
12395 ret = PTR_ERR(trans);
12398 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12399 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12400 ret = btrfs_commit_transaction(trans, root);
12404 static int populate_csum(struct btrfs_trans_handle *trans,
12405 struct btrfs_root *csum_root, char *buf, u64 start,
12408 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12413 while (offset < len) {
12414 sectorsize = fs_info->sectorsize;
12415 ret = read_extent_data(fs_info, buf, start + offset,
12419 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12420 start + offset, buf, sectorsize);
12423 offset += sectorsize;
12428 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12429 struct btrfs_root *csum_root,
12430 struct btrfs_root *cur_root)
12432 struct btrfs_path path;
12433 struct btrfs_key key;
12434 struct extent_buffer *node;
12435 struct btrfs_file_extent_item *fi;
12442 buf = malloc(cur_root->fs_info->sectorsize);
12446 btrfs_init_path(&path);
12450 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12453 /* Iterate all regular file extents and fill its csum */
12455 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12457 if (key.type != BTRFS_EXTENT_DATA_KEY)
12459 node = path.nodes[0];
12460 slot = path.slots[0];
12461 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12462 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12464 start = btrfs_file_extent_disk_bytenr(node, fi);
12465 len = btrfs_file_extent_disk_num_bytes(node, fi);
12467 ret = populate_csum(trans, csum_root, buf, start, len);
12468 if (ret == -EEXIST)
12474 * TODO: if next leaf is corrupted, jump to nearest next valid
12477 ret = btrfs_next_item(cur_root, &path);
12487 btrfs_release_path(&path);
12492 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12493 struct btrfs_root *csum_root)
12495 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12496 struct btrfs_path path;
12497 struct btrfs_root *tree_root = fs_info->tree_root;
12498 struct btrfs_root *cur_root;
12499 struct extent_buffer *node;
12500 struct btrfs_key key;
12504 btrfs_init_path(&path);
12505 key.objectid = BTRFS_FS_TREE_OBJECTID;
12507 key.type = BTRFS_ROOT_ITEM_KEY;
12508 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12517 node = path.nodes[0];
12518 slot = path.slots[0];
12519 btrfs_item_key_to_cpu(node, &key, slot);
12520 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12522 if (key.type != BTRFS_ROOT_ITEM_KEY)
12524 if (!is_fstree(key.objectid))
12526 key.offset = (u64)-1;
12528 cur_root = btrfs_read_fs_root(fs_info, &key);
12529 if (IS_ERR(cur_root) || !cur_root) {
12530 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12534 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12539 ret = btrfs_next_item(tree_root, &path);
12549 btrfs_release_path(&path);
12553 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12554 struct btrfs_root *csum_root)
12556 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12557 struct btrfs_path path;
12558 struct btrfs_extent_item *ei;
12559 struct extent_buffer *leaf;
12561 struct btrfs_key key;
12564 btrfs_init_path(&path);
12566 key.type = BTRFS_EXTENT_ITEM_KEY;
12568 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12570 btrfs_release_path(&path);
12574 buf = malloc(csum_root->fs_info->sectorsize);
12576 btrfs_release_path(&path);
12581 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12582 ret = btrfs_next_leaf(extent_root, &path);
12590 leaf = path.nodes[0];
12592 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12593 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12598 ei = btrfs_item_ptr(leaf, path.slots[0],
12599 struct btrfs_extent_item);
12600 if (!(btrfs_extent_flags(leaf, ei) &
12601 BTRFS_EXTENT_FLAG_DATA)) {
12606 ret = populate_csum(trans, csum_root, buf, key.objectid,
12613 btrfs_release_path(&path);
12619 * Recalculate the csum and put it into the csum tree.
12621 * Extent tree init will wipe out all the extent info, so in that case, we
12622 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12623 * will use fs/subvol trees to init the csum tree.
12625 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12626 struct btrfs_root *csum_root,
12627 int search_fs_tree)
12629 if (search_fs_tree)
12630 return fill_csum_tree_from_fs(trans, csum_root);
12632 return fill_csum_tree_from_extent(trans, csum_root);
12635 static void free_roots_info_cache(void)
12637 if (!roots_info_cache)
12640 while (!cache_tree_empty(roots_info_cache)) {
12641 struct cache_extent *entry;
12642 struct root_item_info *rii;
12644 entry = first_cache_extent(roots_info_cache);
12647 remove_cache_extent(roots_info_cache, entry);
12648 rii = container_of(entry, struct root_item_info, cache_extent);
12652 free(roots_info_cache);
12653 roots_info_cache = NULL;
12656 static int build_roots_info_cache(struct btrfs_fs_info *info)
12659 struct btrfs_key key;
12660 struct extent_buffer *leaf;
12661 struct btrfs_path path;
12663 if (!roots_info_cache) {
12664 roots_info_cache = malloc(sizeof(*roots_info_cache));
12665 if (!roots_info_cache)
12667 cache_tree_init(roots_info_cache);
12670 btrfs_init_path(&path);
12672 key.type = BTRFS_EXTENT_ITEM_KEY;
12674 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12677 leaf = path.nodes[0];
12680 struct btrfs_key found_key;
12681 struct btrfs_extent_item *ei;
12682 struct btrfs_extent_inline_ref *iref;
12683 int slot = path.slots[0];
12688 struct cache_extent *entry;
12689 struct root_item_info *rii;
12691 if (slot >= btrfs_header_nritems(leaf)) {
12692 ret = btrfs_next_leaf(info->extent_root, &path);
12699 leaf = path.nodes[0];
12700 slot = path.slots[0];
12703 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12705 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12706 found_key.type != BTRFS_METADATA_ITEM_KEY)
12709 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12710 flags = btrfs_extent_flags(leaf, ei);
12712 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12713 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12716 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12717 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12718 level = found_key.offset;
12720 struct btrfs_tree_block_info *binfo;
12722 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12723 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12724 level = btrfs_tree_block_level(leaf, binfo);
12728 * For a root extent, it must be of the following type and the
12729 * first (and only one) iref in the item.
12731 type = btrfs_extent_inline_ref_type(leaf, iref);
12732 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12735 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12736 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12738 rii = malloc(sizeof(struct root_item_info));
12743 rii->cache_extent.start = root_id;
12744 rii->cache_extent.size = 1;
12745 rii->level = (u8)-1;
12746 entry = &rii->cache_extent;
12747 ret = insert_cache_extent(roots_info_cache, entry);
12750 rii = container_of(entry, struct root_item_info,
12754 ASSERT(rii->cache_extent.start == root_id);
12755 ASSERT(rii->cache_extent.size == 1);
12757 if (level > rii->level || rii->level == (u8)-1) {
12758 rii->level = level;
12759 rii->bytenr = found_key.objectid;
12760 rii->gen = btrfs_extent_generation(leaf, ei);
12761 rii->node_count = 1;
12762 } else if (level == rii->level) {
12770 btrfs_release_path(&path);
12775 static int maybe_repair_root_item(struct btrfs_path *path,
12776 const struct btrfs_key *root_key,
12777 const int read_only_mode)
12779 const u64 root_id = root_key->objectid;
12780 struct cache_extent *entry;
12781 struct root_item_info *rii;
12782 struct btrfs_root_item ri;
12783 unsigned long offset;
12785 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12788 "Error: could not find extent items for root %llu\n",
12789 root_key->objectid);
12793 rii = container_of(entry, struct root_item_info, cache_extent);
12794 ASSERT(rii->cache_extent.start == root_id);
12795 ASSERT(rii->cache_extent.size == 1);
12797 if (rii->node_count != 1) {
12799 "Error: could not find btree root extent for root %llu\n",
12804 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12805 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12807 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12808 btrfs_root_level(&ri) != rii->level ||
12809 btrfs_root_generation(&ri) != rii->gen) {
12812 * If we're in repair mode but our caller told us to not update
12813 * the root item, i.e. just check if it needs to be updated, don't
12814 * print this message, since the caller will call us again shortly
12815 * for the same root item without read only mode (the caller will
12816 * open a transaction first).
12818 if (!(read_only_mode && repair))
12820 "%sroot item for root %llu,"
12821 " current bytenr %llu, current gen %llu, current level %u,"
12822 " new bytenr %llu, new gen %llu, new level %u\n",
12823 (read_only_mode ? "" : "fixing "),
12825 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12826 btrfs_root_level(&ri),
12827 rii->bytenr, rii->gen, rii->level);
12829 if (btrfs_root_generation(&ri) > rii->gen) {
12831 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12832 root_id, btrfs_root_generation(&ri), rii->gen);
12836 if (!read_only_mode) {
12837 btrfs_set_root_bytenr(&ri, rii->bytenr);
12838 btrfs_set_root_level(&ri, rii->level);
12839 btrfs_set_root_generation(&ri, rii->gen);
12840 write_extent_buffer(path->nodes[0], &ri,
12841 offset, sizeof(ri));
12851 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12852 * caused read-only snapshots to be corrupted if they were created at a moment
12853 * when the source subvolume/snapshot had orphan items. The issue was that the
12854 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12855 * node instead of the post orphan cleanup root node.
12856 * So this function, and its callees, just detects and fixes those cases. Even
12857 * though the regression was for read-only snapshots, this function applies to
12858 * any snapshot/subvolume root.
12859 * This must be run before any other repair code - not doing it so, makes other
12860 * repair code delete or modify backrefs in the extent tree for example, which
12861 * will result in an inconsistent fs after repairing the root items.
12863 static int repair_root_items(struct btrfs_fs_info *info)
12865 struct btrfs_path path;
12866 struct btrfs_key key;
12867 struct extent_buffer *leaf;
12868 struct btrfs_trans_handle *trans = NULL;
12871 int need_trans = 0;
12873 btrfs_init_path(&path);
12875 ret = build_roots_info_cache(info);
12879 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12880 key.type = BTRFS_ROOT_ITEM_KEY;
12885 * Avoid opening and committing transactions if a leaf doesn't have
12886 * any root items that need to be fixed, so that we avoid rotating
12887 * backup roots unnecessarily.
12890 trans = btrfs_start_transaction(info->tree_root, 1);
12891 if (IS_ERR(trans)) {
12892 ret = PTR_ERR(trans);
12897 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12901 leaf = path.nodes[0];
12904 struct btrfs_key found_key;
12906 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12907 int no_more_keys = find_next_key(&path, &key);
12909 btrfs_release_path(&path);
12911 ret = btrfs_commit_transaction(trans,
12923 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12925 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12927 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12930 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12934 if (!trans && repair) {
12937 btrfs_release_path(&path);
12947 free_roots_info_cache();
12948 btrfs_release_path(&path);
12950 btrfs_commit_transaction(trans, info->tree_root);
12957 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12959 struct btrfs_trans_handle *trans;
12960 struct btrfs_block_group_cache *bg_cache;
12964 /* Clear all free space cache inodes and its extent data */
12966 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12969 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12972 current = bg_cache->key.objectid + bg_cache->key.offset;
12975 /* Don't forget to set cache_generation to -1 */
12976 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12977 if (IS_ERR(trans)) {
12978 error("failed to update super block cache generation");
12979 return PTR_ERR(trans);
12981 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12982 btrfs_commit_transaction(trans, fs_info->tree_root);
12987 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12992 if (clear_version == 1) {
12993 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12995 "free space cache v2 detected, use --clear-space-cache v2");
12999 printf("Clearing free space cache\n");
13000 ret = clear_free_space_cache(fs_info);
13002 error("failed to clear free space cache");
13005 printf("Free space cache cleared\n");
13007 } else if (clear_version == 2) {
13008 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13009 printf("no free space cache v2 to clear\n");
13013 printf("Clear free space cache v2\n");
13014 ret = btrfs_clear_free_space_tree(fs_info);
13016 error("failed to clear free space cache v2: %d", ret);
13019 printf("free space cache v2 cleared\n");
13026 const char * const cmd_check_usage[] = {
13027 "btrfs check [options] <device>",
13028 "Check structural integrity of a filesystem (unmounted).",
13029 "Check structural integrity of an unmounted filesystem. Verify internal",
13030 "trees' consistency and item connectivity. In the repair mode try to",
13031 "fix the problems found. ",
13032 "WARNING: the repair mode is considered dangerous",
13034 "-s|--super <superblock> use this superblock copy",
13035 "-b|--backup use the first valid backup root copy",
13036 "--force skip mount checks, repair is not possible",
13037 "--repair try to repair the filesystem",
13038 "--readonly run in read-only mode (default)",
13039 "--init-csum-tree create a new CRC tree",
13040 "--init-extent-tree create a new extent tree",
13041 "--mode <MODE> allows choice of memory/IO trade-offs",
13042 " where MODE is one of:",
13043 " original - read inodes and extents to memory (requires",
13044 " more memory, does less IO)",
13045 " lowmem - try to use less memory but read blocks again",
13047 "--check-data-csum verify checksums of data blocks",
13048 "-Q|--qgroup-report print a report on qgroup consistency",
13049 "-E|--subvol-extents <subvolid>",
13050 " print subvolume extents and sharing state",
13051 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13052 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13053 "-p|--progress indicate progress",
13054 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13058 int cmd_check(int argc, char **argv)
13060 struct cache_tree root_cache;
13061 struct btrfs_root *root;
13062 struct btrfs_fs_info *info;
13065 u64 tree_root_bytenr = 0;
13066 u64 chunk_root_bytenr = 0;
13067 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13071 int init_csum_tree = 0;
13073 int clear_space_cache = 0;
13074 int qgroup_report = 0;
13075 int qgroups_repaired = 0;
13076 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13081 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13082 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13083 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13084 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13085 GETOPT_VAL_FORCE };
13086 static const struct option long_options[] = {
13087 { "super", required_argument, NULL, 's' },
13088 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13089 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13090 { "init-csum-tree", no_argument, NULL,
13091 GETOPT_VAL_INIT_CSUM },
13092 { "init-extent-tree", no_argument, NULL,
13093 GETOPT_VAL_INIT_EXTENT },
13094 { "check-data-csum", no_argument, NULL,
13095 GETOPT_VAL_CHECK_CSUM },
13096 { "backup", no_argument, NULL, 'b' },
13097 { "subvol-extents", required_argument, NULL, 'E' },
13098 { "qgroup-report", no_argument, NULL, 'Q' },
13099 { "tree-root", required_argument, NULL, 'r' },
13100 { "chunk-root", required_argument, NULL,
13101 GETOPT_VAL_CHUNK_TREE },
13102 { "progress", no_argument, NULL, 'p' },
13103 { "mode", required_argument, NULL,
13105 { "clear-space-cache", required_argument, NULL,
13106 GETOPT_VAL_CLEAR_SPACE_CACHE},
13107 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13108 { NULL, 0, NULL, 0}
13111 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13115 case 'a': /* ignored */ break;
13117 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13120 num = arg_strtou64(optarg);
13121 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13123 "super mirror should be less than %d",
13124 BTRFS_SUPER_MIRROR_MAX);
13127 bytenr = btrfs_sb_offset(((int)num));
13128 printf("using SB copy %llu, bytenr %llu\n", num,
13129 (unsigned long long)bytenr);
13135 subvolid = arg_strtou64(optarg);
13138 tree_root_bytenr = arg_strtou64(optarg);
13140 case GETOPT_VAL_CHUNK_TREE:
13141 chunk_root_bytenr = arg_strtou64(optarg);
13144 ctx.progress_enabled = true;
13148 usage(cmd_check_usage);
13149 case GETOPT_VAL_REPAIR:
13150 printf("enabling repair mode\n");
13152 ctree_flags |= OPEN_CTREE_WRITES;
13154 case GETOPT_VAL_READONLY:
13157 case GETOPT_VAL_INIT_CSUM:
13158 printf("Creating a new CRC tree\n");
13159 init_csum_tree = 1;
13161 ctree_flags |= OPEN_CTREE_WRITES;
13163 case GETOPT_VAL_INIT_EXTENT:
13164 init_extent_tree = 1;
13165 ctree_flags |= (OPEN_CTREE_WRITES |
13166 OPEN_CTREE_NO_BLOCK_GROUPS);
13169 case GETOPT_VAL_CHECK_CSUM:
13170 check_data_csum = 1;
13172 case GETOPT_VAL_MODE:
13173 check_mode = parse_check_mode(optarg);
13174 if (check_mode == CHECK_MODE_UNKNOWN) {
13175 error("unknown mode: %s", optarg);
13179 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13180 if (strcmp(optarg, "v1") == 0) {
13181 clear_space_cache = 1;
13182 } else if (strcmp(optarg, "v2") == 0) {
13183 clear_space_cache = 2;
13184 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13187 "invalid argument to --clear-space-cache, must be v1 or v2");
13190 ctree_flags |= OPEN_CTREE_WRITES;
13192 case GETOPT_VAL_FORCE:
13198 if (check_argc_exact(argc - optind, 1))
13199 usage(cmd_check_usage);
13201 if (ctx.progress_enabled) {
13202 ctx.tp = TASK_NOTHING;
13203 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13206 /* This check is the only reason for --readonly to exist */
13207 if (readonly && repair) {
13208 error("repair options are not compatible with --readonly");
13213 * experimental and dangerous
13215 if (repair && check_mode == CHECK_MODE_LOWMEM)
13216 warning("low-memory mode repair support is only partial");
13219 cache_tree_init(&root_cache);
13221 ret = check_mounted(argv[optind]);
13224 error("could not check mount status: %s",
13230 "%s is currently mounted, use --force if you really intend to check the filesystem",
13238 error("repair and --force is not yet supported");
13245 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13249 "filesystem mounted, continuing because of --force");
13251 /* A block device is mounted in exclusive mode by kernel */
13252 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13255 /* only allow partial opening under repair mode */
13257 ctree_flags |= OPEN_CTREE_PARTIAL;
13259 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13260 chunk_root_bytenr, ctree_flags);
13262 error("cannot open file system");
13268 global_info = info;
13269 root = info->fs_root;
13270 uuid_unparse(info->super_copy->fsid, uuidbuf);
13272 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13275 * Check the bare minimum before starting anything else that could rely
13276 * on it, namely the tree roots, any local consistency checks
13278 if (!extent_buffer_uptodate(info->tree_root->node) ||
13279 !extent_buffer_uptodate(info->dev_root->node) ||
13280 !extent_buffer_uptodate(info->chunk_root->node)) {
13281 error("critical roots corrupted, unable to check the filesystem");
13287 if (clear_space_cache) {
13288 ret = do_clear_free_space_cache(info, clear_space_cache);
13294 * repair mode will force us to commit transaction which
13295 * will make us fail to load log tree when mounting.
13297 if (repair && btrfs_super_log_root(info->super_copy)) {
13298 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13304 ret = zero_log_tree(root);
13307 error("failed to zero log tree: %d", ret);
13312 if (qgroup_report) {
13313 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13315 ret = qgroup_verify_all(info);
13322 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13323 subvolid, argv[optind], uuidbuf);
13324 ret = print_extent_state(info, subvolid);
13329 if (init_extent_tree || init_csum_tree) {
13330 struct btrfs_trans_handle *trans;
13332 trans = btrfs_start_transaction(info->extent_root, 0);
13333 if (IS_ERR(trans)) {
13334 error("error starting transaction");
13335 ret = PTR_ERR(trans);
13340 if (init_extent_tree) {
13341 printf("Creating a new extent tree\n");
13342 ret = reinit_extent_tree(trans, info);
13348 if (init_csum_tree) {
13349 printf("Reinitialize checksum tree\n");
13350 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13352 error("checksum tree initialization failed: %d",
13359 ret = fill_csum_tree(trans, info->csum_root,
13363 error("checksum tree refilling failed: %d", ret);
13368 * Ok now we commit and run the normal fsck, which will add
13369 * extent entries for all of the items it finds.
13371 ret = btrfs_commit_transaction(trans, info->extent_root);
13376 if (!extent_buffer_uptodate(info->extent_root->node)) {
13377 error("critical: extent_root, unable to check the filesystem");
13382 if (!extent_buffer_uptodate(info->csum_root->node)) {
13383 error("critical: csum_root, unable to check the filesystem");
13389 ret = do_check_chunks_and_extents(info);
13393 "errors found in extent allocation tree or chunk allocation");
13395 ret = repair_root_items(info);
13398 error("failed to repair root items: %s", strerror(-ret));
13402 fprintf(stderr, "Fixed %d roots.\n", ret);
13404 } else if (ret > 0) {
13406 "Found %d roots with an outdated root item.\n",
13409 "Please run a filesystem check with the option --repair to fix them.\n");
13415 if (!ctx.progress_enabled) {
13416 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13417 fprintf(stderr, "checking free space tree\n");
13419 fprintf(stderr, "checking free space cache\n");
13421 ret = check_space_cache(root);
13424 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13425 error("errors found in free space tree");
13427 error("errors found in free space cache");
13432 * We used to have to have these hole extents in between our real
13433 * extents so if we don't have this flag set we need to make sure there
13434 * are no gaps in the file extents for inodes, otherwise we can just
13435 * ignore it when this happens.
13437 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13438 ret = do_check_fs_roots(info, &root_cache);
13441 error("errors found in fs roots");
13445 fprintf(stderr, "checking csums\n");
13446 ret = check_csums(root);
13449 error("errors found in csum tree");
13453 fprintf(stderr, "checking root refs\n");
13454 /* For low memory mode, check_fs_roots_v2 handles root refs */
13455 if (check_mode != CHECK_MODE_LOWMEM) {
13456 ret = check_root_refs(root, &root_cache);
13459 error("errors found in root refs");
13464 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13465 struct extent_buffer *eb;
13467 eb = list_first_entry(&root->fs_info->recow_ebs,
13468 struct extent_buffer, recow);
13469 list_del_init(&eb->recow);
13470 ret = recow_extent_buffer(root, eb);
13473 error("fails to fix transid errors");
13478 while (!list_empty(&delete_items)) {
13479 struct bad_item *bad;
13481 bad = list_first_entry(&delete_items, struct bad_item, list);
13482 list_del_init(&bad->list);
13484 ret = delete_bad_item(root, bad);
13490 if (info->quota_enabled) {
13491 fprintf(stderr, "checking quota groups\n");
13492 ret = qgroup_verify_all(info);
13495 error("failed to check quota groups");
13499 ret = repair_qgroups(info, &qgroups_repaired);
13502 error("failed to repair quota groups");
13508 if (!list_empty(&root->fs_info->recow_ebs)) {
13509 error("transid errors in file system");
13514 printf("found %llu bytes used, ",
13515 (unsigned long long)bytes_used);
13517 printf("error(s) found\n");
13519 printf("no error found\n");
13520 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13521 printf("total tree bytes: %llu\n",
13522 (unsigned long long)total_btree_bytes);
13523 printf("total fs tree bytes: %llu\n",
13524 (unsigned long long)total_fs_tree_bytes);
13525 printf("total extent tree bytes: %llu\n",
13526 (unsigned long long)total_extent_tree_bytes);
13527 printf("btree space waste bytes: %llu\n",
13528 (unsigned long long)btree_space_waste);
13529 printf("file data blocks allocated: %llu\n referenced %llu\n",
13530 (unsigned long long)data_bytes_allocated,
13531 (unsigned long long)data_bytes_referenced);
13533 free_qgroup_counts();
13534 free_root_recs_tree(&root_cache);
13538 if (ctx.progress_enabled)
13539 task_deinit(ctx.info);