2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 return container_of(back, struct data_backref, node);
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
141 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143 struct data_backref *back1 = to_data_backref(ext1);
144 struct data_backref *back2 = to_data_backref(ext2);
146 WARN_ON(!ext1->is_data);
147 WARN_ON(!ext2->is_data);
149 /* parent and root are a union, so this covers both */
150 if (back1->parent > back2->parent)
152 if (back1->parent < back2->parent)
155 /* This is a full backref and the parents match. */
156 if (back1->node.full_backref)
159 if (back1->owner > back2->owner)
161 if (back1->owner < back2->owner)
164 if (back1->offset > back2->offset)
166 if (back1->offset < back2->offset)
169 if (back1->found_ref && back2->found_ref) {
170 if (back1->disk_bytenr > back2->disk_bytenr)
172 if (back1->disk_bytenr < back2->disk_bytenr)
175 if (back1->bytes > back2->bytes)
177 if (back1->bytes < back2->bytes)
185 * Much like data_backref, just removed the undetermined members
186 * and change it to use list_head.
187 * During extent scan, it is stored in root->orphan_data_extent.
188 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
190 struct orphan_data_extent {
191 struct list_head list;
199 struct tree_backref {
200 struct extent_backref node;
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
209 return container_of(back, struct tree_backref, node);
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
214 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216 struct tree_backref *back1 = to_tree_backref(ext1);
217 struct tree_backref *back2 = to_tree_backref(ext2);
219 WARN_ON(ext1->is_data);
220 WARN_ON(ext2->is_data);
222 /* parent and root are a union, so this covers both */
223 if (back1->parent > back2->parent)
225 if (back1->parent < back2->parent)
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
233 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
236 if (ext1->is_data > ext2->is_data)
239 if (ext1->is_data < ext2->is_data)
242 if (ext1->full_backref > ext2->full_backref)
244 if (ext1->full_backref < ext2->full_backref)
248 return compare_data_backref(node1, node2);
250 return compare_tree_backref(node1, node2);
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
256 struct extent_record {
257 struct list_head backrefs;
258 struct list_head dups;
259 struct rb_root backref_tree;
260 struct list_head list;
261 struct cache_extent cache;
262 struct btrfs_disk_key parent_key;
267 u64 extent_item_refs;
269 u64 parent_generation;
273 unsigned int flag_block_full_backref:2;
274 unsigned int found_rec:1;
275 unsigned int content_checked:1;
276 unsigned int owner_ref_checked:1;
277 unsigned int is_root:1;
278 unsigned int metadata:1;
279 unsigned int bad_full_backref:1;
280 unsigned int crossing_stripes:1;
281 unsigned int wrong_chunk_type:1;
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
286 return container_of(entry, struct extent_record, list);
289 struct inode_backref {
290 struct list_head list;
291 unsigned int found_dir_item:1;
292 unsigned int found_dir_index:1;
293 unsigned int found_inode_ref:1;
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
305 return list_entry(entry, struct inode_backref, list);
308 struct root_item_record {
309 struct list_head list;
315 struct btrfs_key drop_key;
318 #define REF_ERR_NO_DIR_ITEM (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX (1 << 1)
320 #define REF_ERR_NO_INODE_REF (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
323 #define REF_ERR_DUP_INODE_REF (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
332 struct file_extent_hole {
338 struct inode_record {
339 struct list_head backrefs;
340 unsigned int checked:1;
341 unsigned int merging:1;
342 unsigned int found_inode_item:1;
343 unsigned int found_dir_item:1;
344 unsigned int found_file_extent:1;
345 unsigned int found_csum_item:1;
346 unsigned int some_csum_missing:1;
347 unsigned int nodatasum:1;
360 struct rb_root holes;
361 struct list_head orphan_extents;
366 #define I_ERR_NO_INODE_ITEM (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
382 struct root_backref {
383 struct list_head list;
384 unsigned int found_dir_item:1;
385 unsigned int found_dir_index:1;
386 unsigned int found_back_ref:1;
387 unsigned int found_forward_ref:1;
388 unsigned int reachable:1;
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
399 return list_entry(entry, struct root_backref, list);
403 struct list_head backrefs;
404 struct cache_extent cache;
405 unsigned int found_root_item:1;
411 struct cache_extent cache;
416 struct cache_extent cache;
417 struct cache_tree root_cache;
418 struct cache_tree inode_cache;
419 struct inode_record *current;
428 struct walk_control {
429 struct cache_tree shared;
430 struct shared_node *nodes[BTRFS_MAX_LEVEL];
436 struct btrfs_key key;
438 struct list_head list;
441 struct extent_entry {
446 struct list_head list;
449 struct root_item_info {
450 /* level of the root */
452 /* number of nodes at this level, must be 1 for a root */
456 struct cache_extent cache_extent;
460 * Error bit for low memory mode check.
462 * Currently no caller cares about it yet. Just internal use for error
465 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH (1 << 8)
476 static void *print_status_check(void *p)
478 struct task_ctx *priv = p;
479 const char work_indicator[] = { '.', 'o', 'O', 'o' };
481 static char *task_position_string[] = {
483 "checking free space cache",
487 task_period_start(priv->info, 1000 /* 1s */);
489 if (priv->tp == TASK_NOTHING)
493 printf("%s [%c]\r", task_position_string[priv->tp],
494 work_indicator[count % 4]);
497 task_period_wait(priv->info);
502 static int print_status_return(void *p)
510 static enum btrfs_check_mode parse_check_mode(const char *str)
512 if (strcmp(str, "lowmem") == 0)
513 return CHECK_MODE_LOWMEM;
514 if (strcmp(str, "orig") == 0)
515 return CHECK_MODE_ORIGINAL;
516 if (strcmp(str, "original") == 0)
517 return CHECK_MODE_ORIGINAL;
519 return CHECK_MODE_UNKNOWN;
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
525 struct file_extent_hole *hole;
527 if (RB_EMPTY_ROOT(holes))
530 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
536 struct file_extent_hole *hole1;
537 struct file_extent_hole *hole2;
539 hole1 = rb_entry(node1, struct file_extent_hole, node);
540 hole2 = rb_entry(node2, struct file_extent_hole, node);
542 if (hole1->start > hole2->start)
544 if (hole1->start < hole2->start)
546 /* Now hole1->start == hole2->start */
547 if (hole1->len >= hole2->len)
549 * Hole 1 will be merge center
550 * Same hole will be merged later
553 /* Hole 2 will be merge center */
558 * Add a hole to the record
560 * This will do hole merge for copy_file_extent_holes(),
561 * which will ensure there won't be continuous holes.
563 static int add_file_extent_hole(struct rb_root *holes,
566 struct file_extent_hole *hole;
567 struct file_extent_hole *prev = NULL;
568 struct file_extent_hole *next = NULL;
570 hole = malloc(sizeof(*hole));
575 /* Since compare will not return 0, no -EEXIST will happen */
576 rb_insert(holes, &hole->node, compare_hole);
578 /* simple merge with previous hole */
579 if (rb_prev(&hole->node))
580 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
582 if (prev && prev->start + prev->len >= hole->start) {
583 hole->len = hole->start + hole->len - prev->start;
584 hole->start = prev->start;
585 rb_erase(&prev->node, holes);
590 /* iterate merge with next holes */
592 if (!rb_next(&hole->node))
594 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
596 if (hole->start + hole->len >= next->start) {
597 if (hole->start + hole->len <= next->start + next->len)
598 hole->len = next->start + next->len -
600 rb_erase(&next->node, holes);
609 static int compare_hole_range(struct rb_node *node, void *data)
611 struct file_extent_hole *hole;
614 hole = (struct file_extent_hole *)data;
617 hole = rb_entry(node, struct file_extent_hole, node);
618 if (start < hole->start)
620 if (start >= hole->start && start < hole->start + hole->len)
626 * Delete a hole in the record
628 * This will do the hole split and is much restrict than add.
630 static int del_file_extent_hole(struct rb_root *holes,
633 struct file_extent_hole *hole;
634 struct file_extent_hole tmp;
639 struct rb_node *node;
646 node = rb_search(holes, &tmp, compare_hole_range, NULL);
649 hole = rb_entry(node, struct file_extent_hole, node);
650 if (start + len > hole->start + hole->len)
654 * Now there will be no overlap, delete the hole and re-add the
655 * split(s) if they exists.
657 if (start > hole->start) {
658 prev_start = hole->start;
659 prev_len = start - hole->start;
662 if (hole->start + hole->len > start + len) {
663 next_start = start + len;
664 next_len = hole->start + hole->len - start - len;
667 rb_erase(node, holes);
670 ret = add_file_extent_hole(holes, prev_start, prev_len);
675 ret = add_file_extent_hole(holes, next_start, next_len);
682 static int copy_file_extent_holes(struct rb_root *dst,
685 struct file_extent_hole *hole;
686 struct rb_node *node;
689 node = rb_first(src);
691 hole = rb_entry(node, struct file_extent_hole, node);
692 ret = add_file_extent_hole(dst, hole->start, hole->len);
695 node = rb_next(node);
700 static void free_file_extent_holes(struct rb_root *holes)
702 struct rb_node *node;
703 struct file_extent_hole *hole;
705 node = rb_first(holes);
707 hole = rb_entry(node, struct file_extent_hole, node);
708 rb_erase(node, holes);
710 node = rb_first(holes);
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717 struct btrfs_root *root)
719 if (root->last_trans != trans->transid) {
720 root->track_dirty = 1;
721 root->last_trans = trans->transid;
722 root->commit_root = root->node;
723 extent_buffer_get(root->node);
727 static u8 imode_to_type(u32 imode)
730 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
732 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
733 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
734 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
735 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
736 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
737 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
740 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
746 struct device_record *rec1;
747 struct device_record *rec2;
749 rec1 = rb_entry(node1, struct device_record, node);
750 rec2 = rb_entry(node2, struct device_record, node);
751 if (rec1->devid > rec2->devid)
753 else if (rec1->devid < rec2->devid)
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
761 struct inode_record *rec;
762 struct inode_backref *backref;
763 struct inode_backref *orig;
764 struct inode_backref *tmp;
765 struct orphan_data_extent *src_orphan;
766 struct orphan_data_extent *dst_orphan;
771 rec = malloc(sizeof(*rec));
773 return ERR_PTR(-ENOMEM);
774 memcpy(rec, orig_rec, sizeof(*rec));
776 INIT_LIST_HEAD(&rec->backrefs);
777 INIT_LIST_HEAD(&rec->orphan_extents);
778 rec->holes = RB_ROOT;
780 list_for_each_entry(orig, &orig_rec->backrefs, list) {
781 size = sizeof(*orig) + orig->namelen + 1;
782 backref = malloc(size);
787 memcpy(backref, orig, size);
788 list_add_tail(&backref->list, &rec->backrefs);
790 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791 dst_orphan = malloc(sizeof(*dst_orphan));
796 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
799 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
806 rb = rb_first(&rec->holes);
808 struct file_extent_hole *hole;
810 hole = rb_entry(rb, struct file_extent_hole, node);
816 if (!list_empty(&rec->backrefs))
817 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818 list_del(&orig->list);
822 if (!list_empty(&rec->orphan_extents))
823 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824 list_del(&orig->list);
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
836 struct orphan_data_extent *orphan;
838 if (list_empty(orphan_extents))
840 printf("The following data extent is lost in tree %llu:\n",
842 list_for_each_entry(orphan, orphan_extents, list) {
843 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844 orphan->objectid, orphan->offset, orphan->disk_bytenr,
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
851 u64 root_objectid = root->root_key.objectid;
852 int errors = rec->errors;
856 /* reloc root errors, we print its corresponding fs root objectid*/
857 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858 root_objectid = root->root_key.offset;
859 fprintf(stderr, "reloc");
861 fprintf(stderr, "root %llu inode %llu errors %x",
862 (unsigned long long) root_objectid,
863 (unsigned long long) rec->ino, rec->errors);
865 if (errors & I_ERR_NO_INODE_ITEM)
866 fprintf(stderr, ", no inode item");
867 if (errors & I_ERR_NO_ORPHAN_ITEM)
868 fprintf(stderr, ", no orphan item");
869 if (errors & I_ERR_DUP_INODE_ITEM)
870 fprintf(stderr, ", dup inode item");
871 if (errors & I_ERR_DUP_DIR_INDEX)
872 fprintf(stderr, ", dup dir index");
873 if (errors & I_ERR_ODD_DIR_ITEM)
874 fprintf(stderr, ", odd dir item");
875 if (errors & I_ERR_ODD_FILE_EXTENT)
876 fprintf(stderr, ", odd file extent");
877 if (errors & I_ERR_BAD_FILE_EXTENT)
878 fprintf(stderr, ", bad file extent");
879 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880 fprintf(stderr, ", file extent overlap");
881 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882 fprintf(stderr, ", file extent discount");
883 if (errors & I_ERR_DIR_ISIZE_WRONG)
884 fprintf(stderr, ", dir isize wrong");
885 if (errors & I_ERR_FILE_NBYTES_WRONG)
886 fprintf(stderr, ", nbytes wrong");
887 if (errors & I_ERR_ODD_CSUM_ITEM)
888 fprintf(stderr, ", odd csum item");
889 if (errors & I_ERR_SOME_CSUM_MISSING)
890 fprintf(stderr, ", some csum missing");
891 if (errors & I_ERR_LINK_COUNT_WRONG)
892 fprintf(stderr, ", link count wrong");
893 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894 fprintf(stderr, ", orphan file extent");
895 fprintf(stderr, "\n");
896 /* Print the orphan extents if needed */
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
900 /* Print the holes if needed */
901 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902 struct file_extent_hole *hole;
903 struct rb_node *node;
906 node = rb_first(&rec->holes);
907 fprintf(stderr, "Found file extent holes:\n");
910 hole = rb_entry(node, struct file_extent_hole, node);
911 fprintf(stderr, "\tstart: %llu, len: %llu\n",
912 hole->start, hole->len);
913 node = rb_next(node);
916 fprintf(stderr, "\tstart: 0, len: %llu\n",
918 root->fs_info->sectorsize));
922 static void print_ref_error(int errors)
924 if (errors & REF_ERR_NO_DIR_ITEM)
925 fprintf(stderr, ", no dir item");
926 if (errors & REF_ERR_NO_DIR_INDEX)
927 fprintf(stderr, ", no dir index");
928 if (errors & REF_ERR_NO_INODE_REF)
929 fprintf(stderr, ", no inode ref");
930 if (errors & REF_ERR_DUP_DIR_ITEM)
931 fprintf(stderr, ", dup dir item");
932 if (errors & REF_ERR_DUP_DIR_INDEX)
933 fprintf(stderr, ", dup dir index");
934 if (errors & REF_ERR_DUP_INODE_REF)
935 fprintf(stderr, ", dup inode ref");
936 if (errors & REF_ERR_INDEX_UNMATCH)
937 fprintf(stderr, ", index mismatch");
938 if (errors & REF_ERR_FILETYPE_UNMATCH)
939 fprintf(stderr, ", filetype mismatch");
940 if (errors & REF_ERR_NAME_TOO_LONG)
941 fprintf(stderr, ", name too long");
942 if (errors & REF_ERR_NO_ROOT_REF)
943 fprintf(stderr, ", no root ref");
944 if (errors & REF_ERR_NO_ROOT_BACKREF)
945 fprintf(stderr, ", no root backref");
946 if (errors & REF_ERR_DUP_ROOT_REF)
947 fprintf(stderr, ", dup root ref");
948 if (errors & REF_ERR_DUP_ROOT_BACKREF)
949 fprintf(stderr, ", dup root backref");
950 fprintf(stderr, "\n");
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956 struct ptr_node *node;
957 struct cache_extent *cache;
958 struct inode_record *rec = NULL;
961 cache = lookup_cache_extent(inode_cache, ino, 1);
963 node = container_of(cache, struct ptr_node, cache);
965 if (mod && rec->refs > 1) {
966 node->data = clone_inode_rec(rec);
967 if (IS_ERR(node->data))
973 rec = calloc(1, sizeof(*rec));
975 return ERR_PTR(-ENOMEM);
977 rec->extent_start = (u64)-1;
979 INIT_LIST_HEAD(&rec->backrefs);
980 INIT_LIST_HEAD(&rec->orphan_extents);
981 rec->holes = RB_ROOT;
983 node = malloc(sizeof(*node));
986 return ERR_PTR(-ENOMEM);
988 node->cache.start = ino;
989 node->cache.size = 1;
992 if (ino == BTRFS_FREE_INO_OBJECTID)
995 ret = insert_cache_extent(inode_cache, &node->cache);
997 return ERR_PTR(-EEXIST);
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1004 struct orphan_data_extent *orphan;
1006 while (!list_empty(orphan_extents)) {
1007 orphan = list_entry(orphan_extents->next,
1008 struct orphan_data_extent, list);
1009 list_del(&orphan->list);
1014 static void free_inode_rec(struct inode_record *rec)
1016 struct inode_backref *backref;
1018 if (--rec->refs > 0)
1021 while (!list_empty(&rec->backrefs)) {
1022 backref = to_inode_backref(rec->backrefs.next);
1023 list_del(&backref->list);
1026 free_orphan_data_extents(&rec->orphan_extents);
1027 free_file_extent_holes(&rec->holes);
1031 static int can_free_inode_rec(struct inode_record *rec)
1033 if (!rec->errors && rec->checked && rec->found_inode_item &&
1034 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040 struct inode_record *rec)
1042 struct cache_extent *cache;
1043 struct inode_backref *tmp, *backref;
1044 struct ptr_node *node;
1047 if (!rec->found_inode_item)
1050 filetype = imode_to_type(rec->imode);
1051 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052 if (backref->found_dir_item && backref->found_dir_index) {
1053 if (backref->filetype != filetype)
1054 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055 if (!backref->errors && backref->found_inode_ref &&
1056 rec->nlink == rec->found_link) {
1057 list_del(&backref->list);
1063 if (!rec->checked || rec->merging)
1066 if (S_ISDIR(rec->imode)) {
1067 if (rec->found_size != rec->isize)
1068 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069 if (rec->found_file_extent)
1070 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072 if (rec->found_dir_item)
1073 rec->errors |= I_ERR_ODD_DIR_ITEM;
1074 if (rec->found_size != rec->nbytes)
1075 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076 if (rec->nlink > 0 && !no_holes &&
1077 (rec->extent_end < rec->isize ||
1078 first_extent_gap(&rec->holes) < rec->isize))
1079 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083 if (rec->found_csum_item && rec->nodatasum)
1084 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085 if (rec->some_csum_missing && !rec->nodatasum)
1086 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089 BUG_ON(rec->refs != 1);
1090 if (can_free_inode_rec(rec)) {
1091 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092 node = container_of(cache, struct ptr_node, cache);
1093 BUG_ON(node->data != rec);
1094 remove_cache_extent(inode_cache, &node->cache);
1096 free_inode_rec(rec);
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1102 struct btrfs_path path;
1103 struct btrfs_key key;
1106 key.objectid = BTRFS_ORPHAN_OBJECTID;
1107 key.type = BTRFS_ORPHAN_ITEM_KEY;
1110 btrfs_init_path(&path);
1111 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112 btrfs_release_path(&path);
1118 static int process_inode_item(struct extent_buffer *eb,
1119 int slot, struct btrfs_key *key,
1120 struct shared_node *active_node)
1122 struct inode_record *rec;
1123 struct btrfs_inode_item *item;
1125 rec = active_node->current;
1126 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127 if (rec->found_inode_item) {
1128 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132 rec->nlink = btrfs_inode_nlink(eb, item);
1133 rec->isize = btrfs_inode_size(eb, item);
1134 rec->nbytes = btrfs_inode_nbytes(eb, item);
1135 rec->imode = btrfs_inode_mode(eb, item);
1136 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1138 rec->found_inode_item = 1;
1139 if (rec->nlink == 0)
1140 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141 maybe_free_inode_rec(&active_node->inode_cache, rec);
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1147 int namelen, u64 dir)
1149 struct inode_backref *backref;
1151 list_for_each_entry(backref, &rec->backrefs, list) {
1152 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1154 if (backref->dir != dir || backref->namelen != namelen)
1156 if (memcmp(name, backref->name, namelen))
1161 backref = malloc(sizeof(*backref) + namelen + 1);
1164 memset(backref, 0, sizeof(*backref));
1166 backref->namelen = namelen;
1167 memcpy(backref->name, name, namelen);
1168 backref->name[namelen] = '\0';
1169 list_add_tail(&backref->list, &rec->backrefs);
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174 u64 ino, u64 dir, u64 index,
1175 const char *name, int namelen,
1176 u8 filetype, u8 itemtype, int errors)
1178 struct inode_record *rec;
1179 struct inode_backref *backref;
1181 rec = get_inode_rec(inode_cache, ino, 1);
1182 BUG_ON(IS_ERR(rec));
1183 backref = get_inode_backref(rec, name, namelen, dir);
1186 backref->errors |= errors;
1187 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188 if (backref->found_dir_index)
1189 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190 if (backref->found_inode_ref && backref->index != index)
1191 backref->errors |= REF_ERR_INDEX_UNMATCH;
1192 if (backref->found_dir_item && backref->filetype != filetype)
1193 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1195 backref->index = index;
1196 backref->filetype = filetype;
1197 backref->found_dir_index = 1;
1198 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1200 if (backref->found_dir_item)
1201 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202 if (backref->found_dir_index && backref->filetype != filetype)
1203 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1205 backref->filetype = filetype;
1206 backref->found_dir_item = 1;
1207 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209 if (backref->found_inode_ref)
1210 backref->errors |= REF_ERR_DUP_INODE_REF;
1211 if (backref->found_dir_index && backref->index != index)
1212 backref->errors |= REF_ERR_INDEX_UNMATCH;
1214 backref->index = index;
1216 backref->ref_type = itemtype;
1217 backref->found_inode_ref = 1;
1222 maybe_free_inode_rec(inode_cache, rec);
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227 struct cache_tree *dst_cache)
1229 struct inode_backref *backref;
1234 list_for_each_entry(backref, &src->backrefs, list) {
1235 if (backref->found_dir_index) {
1236 add_inode_backref(dst_cache, dst->ino, backref->dir,
1237 backref->index, backref->name,
1238 backref->namelen, backref->filetype,
1239 BTRFS_DIR_INDEX_KEY, backref->errors);
1241 if (backref->found_dir_item) {
1243 add_inode_backref(dst_cache, dst->ino,
1244 backref->dir, 0, backref->name,
1245 backref->namelen, backref->filetype,
1246 BTRFS_DIR_ITEM_KEY, backref->errors);
1248 if (backref->found_inode_ref) {
1249 add_inode_backref(dst_cache, dst->ino,
1250 backref->dir, backref->index,
1251 backref->name, backref->namelen, 0,
1252 backref->ref_type, backref->errors);
1256 if (src->found_dir_item)
1257 dst->found_dir_item = 1;
1258 if (src->found_file_extent)
1259 dst->found_file_extent = 1;
1260 if (src->found_csum_item)
1261 dst->found_csum_item = 1;
1262 if (src->some_csum_missing)
1263 dst->some_csum_missing = 1;
1264 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270 BUG_ON(src->found_link < dir_count);
1271 dst->found_link += src->found_link - dir_count;
1272 dst->found_size += src->found_size;
1273 if (src->extent_start != (u64)-1) {
1274 if (dst->extent_start == (u64)-1) {
1275 dst->extent_start = src->extent_start;
1276 dst->extent_end = src->extent_end;
1278 if (dst->extent_end > src->extent_start)
1279 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280 else if (dst->extent_end < src->extent_start) {
1281 ret = add_file_extent_hole(&dst->holes,
1283 src->extent_start - dst->extent_end);
1285 if (dst->extent_end < src->extent_end)
1286 dst->extent_end = src->extent_end;
1290 dst->errors |= src->errors;
1291 if (src->found_inode_item) {
1292 if (!dst->found_inode_item) {
1293 dst->nlink = src->nlink;
1294 dst->isize = src->isize;
1295 dst->nbytes = src->nbytes;
1296 dst->imode = src->imode;
1297 dst->nodatasum = src->nodatasum;
1298 dst->found_inode_item = 1;
1300 dst->errors |= I_ERR_DUP_INODE_ITEM;
1308 static int splice_shared_node(struct shared_node *src_node,
1309 struct shared_node *dst_node)
1311 struct cache_extent *cache;
1312 struct ptr_node *node, *ins;
1313 struct cache_tree *src, *dst;
1314 struct inode_record *rec, *conflict;
1315 u64 current_ino = 0;
1319 if (--src_node->refs == 0)
1321 if (src_node->current)
1322 current_ino = src_node->current->ino;
1324 src = &src_node->root_cache;
1325 dst = &dst_node->root_cache;
1327 cache = search_cache_extent(src, 0);
1329 node = container_of(cache, struct ptr_node, cache);
1331 cache = next_cache_extent(cache);
1334 remove_cache_extent(src, &node->cache);
1337 ins = malloc(sizeof(*ins));
1339 ins->cache.start = node->cache.start;
1340 ins->cache.size = node->cache.size;
1344 ret = insert_cache_extent(dst, &ins->cache);
1345 if (ret == -EEXIST) {
1346 conflict = get_inode_rec(dst, rec->ino, 1);
1347 BUG_ON(IS_ERR(conflict));
1348 merge_inode_recs(rec, conflict, dst);
1350 conflict->checked = 1;
1351 if (dst_node->current == conflict)
1352 dst_node->current = NULL;
1354 maybe_free_inode_rec(dst, conflict);
1355 free_inode_rec(rec);
1362 if (src == &src_node->root_cache) {
1363 src = &src_node->inode_cache;
1364 dst = &dst_node->inode_cache;
1368 if (current_ino > 0 && (!dst_node->current ||
1369 current_ino > dst_node->current->ino)) {
1370 if (dst_node->current) {
1371 dst_node->current->checked = 1;
1372 maybe_free_inode_rec(dst, dst_node->current);
1374 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375 BUG_ON(IS_ERR(dst_node->current));
1380 static void free_inode_ptr(struct cache_extent *cache)
1382 struct ptr_node *node;
1383 struct inode_record *rec;
1385 node = container_of(cache, struct ptr_node, cache);
1387 free_inode_rec(rec);
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396 struct cache_extent *cache;
1397 struct shared_node *node;
1399 cache = lookup_cache_extent(shared, bytenr, 1);
1401 node = container_of(cache, struct shared_node, cache);
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 struct shared_node *node;
1412 node = calloc(1, sizeof(*node));
1415 node->cache.start = bytenr;
1416 node->cache.size = 1;
1417 cache_tree_init(&node->root_cache);
1418 cache_tree_init(&node->inode_cache);
1421 ret = insert_cache_extent(shared, &node->cache);
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427 struct walk_control *wc, int level)
1429 struct shared_node *node;
1430 struct shared_node *dest;
1433 if (level == wc->active_node)
1436 BUG_ON(wc->active_node <= level);
1437 node = find_shared_node(&wc->shared, bytenr);
1439 ret = add_shared_node(&wc->shared, bytenr, refs);
1441 node = find_shared_node(&wc->shared, bytenr);
1442 wc->nodes[level] = node;
1443 wc->active_node = level;
1447 if (wc->root_level == wc->active_node &&
1448 btrfs_root_refs(&root->root_item) == 0) {
1449 if (--node->refs == 0) {
1450 free_inode_recs_tree(&node->root_cache);
1451 free_inode_recs_tree(&node->inode_cache);
1452 remove_cache_extent(&wc->shared, &node->cache);
1458 dest = wc->nodes[wc->active_node];
1459 splice_shared_node(node, dest);
1460 if (node->refs == 0) {
1461 remove_cache_extent(&wc->shared, &node->cache);
1467 static int leave_shared_node(struct btrfs_root *root,
1468 struct walk_control *wc, int level)
1470 struct shared_node *node;
1471 struct shared_node *dest;
1474 if (level == wc->root_level)
1477 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481 BUG_ON(i >= BTRFS_MAX_LEVEL);
1483 node = wc->nodes[wc->active_node];
1484 wc->nodes[wc->active_node] = NULL;
1485 wc->active_node = i;
1487 dest = wc->nodes[wc->active_node];
1488 if (wc->active_node < wc->root_level ||
1489 btrfs_root_refs(&root->root_item) > 0) {
1490 BUG_ON(node->refs <= 1);
1491 splice_shared_node(node, dest);
1493 BUG_ON(node->refs < 2);
1502 * 1 - if the root with id child_root_id is a child of root parent_root_id
1503 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1504 * has other root(s) as parent(s)
1505 * 2 - if the root child_root_id doesn't have any parent roots
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510 struct btrfs_path path;
1511 struct btrfs_key key;
1512 struct extent_buffer *leaf;
1516 btrfs_init_path(&path);
1518 key.objectid = parent_root_id;
1519 key.type = BTRFS_ROOT_REF_KEY;
1520 key.offset = child_root_id;
1521 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525 btrfs_release_path(&path);
1529 key.objectid = child_root_id;
1530 key.type = BTRFS_ROOT_BACKREF_KEY;
1532 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1538 leaf = path.nodes[0];
1539 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543 leaf = path.nodes[0];
1546 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547 if (key.objectid != child_root_id ||
1548 key.type != BTRFS_ROOT_BACKREF_KEY)
1553 if (key.offset == parent_root_id) {
1554 btrfs_release_path(&path);
1561 btrfs_release_path(&path);
1564 return has_parent ? 0 : 2;
1567 static int process_dir_item(struct extent_buffer *eb,
1568 int slot, struct btrfs_key *key,
1569 struct shared_node *active_node)
1579 struct btrfs_dir_item *di;
1580 struct inode_record *rec;
1581 struct cache_tree *root_cache;
1582 struct cache_tree *inode_cache;
1583 struct btrfs_key location;
1584 char namebuf[BTRFS_NAME_LEN];
1586 root_cache = &active_node->root_cache;
1587 inode_cache = &active_node->inode_cache;
1588 rec = active_node->current;
1589 rec->found_dir_item = 1;
1591 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592 total = btrfs_item_size_nr(eb, slot);
1593 while (cur < total) {
1595 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596 name_len = btrfs_dir_name_len(eb, di);
1597 data_len = btrfs_dir_data_len(eb, di);
1598 filetype = btrfs_dir_type(eb, di);
1600 rec->found_size += name_len;
1601 if (cur + sizeof(*di) + name_len > total ||
1602 name_len > BTRFS_NAME_LEN) {
1603 error = REF_ERR_NAME_TOO_LONG;
1605 if (cur + sizeof(*di) > total)
1607 len = min_t(u32, total - cur - sizeof(*di),
1614 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1616 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617 key->offset != btrfs_name_hash(namebuf, len)) {
1618 rec->errors |= I_ERR_ODD_DIR_ITEM;
1619 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620 key->objectid, key->offset, namebuf, len, filetype,
1621 key->offset, btrfs_name_hash(namebuf, len));
1624 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625 add_inode_backref(inode_cache, location.objectid,
1626 key->objectid, key->offset, namebuf,
1627 len, filetype, key->type, error);
1628 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629 add_inode_backref(root_cache, location.objectid,
1630 key->objectid, key->offset,
1631 namebuf, len, filetype,
1634 fprintf(stderr, "invalid location in dir item %u\n",
1636 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637 key->objectid, key->offset, namebuf,
1638 len, filetype, key->type, error);
1641 len = sizeof(*di) + name_len + data_len;
1642 di = (struct btrfs_dir_item *)((char *)di + len);
1645 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646 rec->errors |= I_ERR_DUP_DIR_INDEX;
1651 static int process_inode_ref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1661 struct cache_tree *inode_cache;
1662 struct btrfs_inode_ref *ref;
1663 char namebuf[BTRFS_NAME_LEN];
1665 inode_cache = &active_node->inode_cache;
1667 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668 total = btrfs_item_size_nr(eb, slot);
1669 while (cur < total) {
1670 name_len = btrfs_inode_ref_name_len(eb, ref);
1671 index = btrfs_inode_ref_index(eb, ref);
1673 /* inode_ref + namelen should not cross item boundary */
1674 if (cur + sizeof(*ref) + name_len > total ||
1675 name_len > BTRFS_NAME_LEN) {
1676 if (total < cur + sizeof(*ref))
1679 /* Still try to read out the remaining part */
1680 len = min_t(u32, total - cur - sizeof(*ref),
1682 error = REF_ERR_NAME_TOO_LONG;
1688 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689 add_inode_backref(inode_cache, key->objectid, key->offset,
1690 index, namebuf, len, 0, key->type, error);
1692 len = sizeof(*ref) + name_len;
1693 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1699 static int process_inode_extref(struct extent_buffer *eb,
1700 int slot, struct btrfs_key *key,
1701 struct shared_node *active_node)
1710 struct cache_tree *inode_cache;
1711 struct btrfs_inode_extref *extref;
1712 char namebuf[BTRFS_NAME_LEN];
1714 inode_cache = &active_node->inode_cache;
1716 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717 total = btrfs_item_size_nr(eb, slot);
1718 while (cur < total) {
1719 name_len = btrfs_inode_extref_name_len(eb, extref);
1720 index = btrfs_inode_extref_index(eb, extref);
1721 parent = btrfs_inode_extref_parent(eb, extref);
1722 if (name_len <= BTRFS_NAME_LEN) {
1726 len = BTRFS_NAME_LEN;
1727 error = REF_ERR_NAME_TOO_LONG;
1729 read_extent_buffer(eb, namebuf,
1730 (unsigned long)(extref + 1), len);
1731 add_inode_backref(inode_cache, key->objectid, parent,
1732 index, namebuf, len, 0, key->type, error);
1734 len = sizeof(*extref) + name_len;
1735 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743 u64 len, u64 *found)
1745 struct btrfs_key key;
1746 struct btrfs_path path;
1747 struct extent_buffer *leaf;
1752 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1754 btrfs_init_path(&path);
1756 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1758 key.type = BTRFS_EXTENT_CSUM_KEY;
1760 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764 if (ret > 0 && path.slots[0] > 0) {
1765 leaf = path.nodes[0];
1766 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768 key.type == BTRFS_EXTENT_CSUM_KEY)
1773 leaf = path.nodes[0];
1774 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780 leaf = path.nodes[0];
1783 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785 key.type != BTRFS_EXTENT_CSUM_KEY)
1788 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789 if (key.offset >= start + len)
1792 if (key.offset > start)
1795 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796 csum_end = key.offset + (size / csum_size) *
1797 root->fs_info->sectorsize;
1798 if (csum_end > start) {
1799 size = min(csum_end - start, len);
1808 btrfs_release_path(&path);
1814 static int process_file_extent(struct btrfs_root *root,
1815 struct extent_buffer *eb,
1816 int slot, struct btrfs_key *key,
1817 struct shared_node *active_node)
1819 struct inode_record *rec;
1820 struct btrfs_file_extent_item *fi;
1822 u64 disk_bytenr = 0;
1823 u64 extent_offset = 0;
1824 u64 mask = root->fs_info->sectorsize - 1;
1828 rec = active_node->current;
1829 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830 rec->found_file_extent = 1;
1832 if (rec->extent_start == (u64)-1) {
1833 rec->extent_start = key->offset;
1834 rec->extent_end = key->offset;
1837 if (rec->extent_end > key->offset)
1838 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839 else if (rec->extent_end < key->offset) {
1840 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841 key->offset - rec->extent_end);
1846 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847 extent_type = btrfs_file_extent_type(eb, fi);
1849 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1852 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853 rec->found_size += num_bytes;
1854 num_bytes = (num_bytes + mask) & ~mask;
1855 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859 extent_offset = btrfs_file_extent_offset(eb, fi);
1860 if (num_bytes == 0 || (num_bytes & mask))
1861 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862 if (num_bytes + extent_offset >
1863 btrfs_file_extent_ram_bytes(eb, fi))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866 (btrfs_file_extent_compression(eb, fi) ||
1867 btrfs_file_extent_encryption(eb, fi) ||
1868 btrfs_file_extent_other_encoding(eb, fi)))
1869 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870 if (disk_bytenr > 0)
1871 rec->found_size += num_bytes;
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1875 rec->extent_end = key->offset + num_bytes;
1878 * The data reloc tree will copy full extents into its inode and then
1879 * copy the corresponding csums. Because the extent it copied could be
1880 * a preallocated extent that hasn't been written to yet there may be no
1881 * csums to copy, ergo we won't have csums for our file extent. This is
1882 * ok so just don't bother checking csums if the inode belongs to the
1885 if (disk_bytenr > 0 &&
1886 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1888 if (btrfs_file_extent_compression(eb, fi))
1889 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1891 disk_bytenr += extent_offset;
1893 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1898 rec->found_csum_item = 1;
1899 if (found < num_bytes)
1900 rec->some_csum_missing = 1;
1901 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1903 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910 struct walk_control *wc)
1912 struct btrfs_key key;
1916 struct cache_tree *inode_cache;
1917 struct shared_node *active_node;
1919 if (wc->root_level == wc->active_node &&
1920 btrfs_root_refs(&root->root_item) == 0)
1923 active_node = wc->nodes[wc->active_node];
1924 inode_cache = &active_node->inode_cache;
1925 nritems = btrfs_header_nritems(eb);
1926 for (i = 0; i < nritems; i++) {
1927 btrfs_item_key_to_cpu(eb, &key, i);
1929 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1931 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934 if (active_node->current == NULL ||
1935 active_node->current->ino < key.objectid) {
1936 if (active_node->current) {
1937 active_node->current->checked = 1;
1938 maybe_free_inode_rec(inode_cache,
1939 active_node->current);
1941 active_node->current = get_inode_rec(inode_cache,
1943 BUG_ON(IS_ERR(active_node->current));
1946 case BTRFS_DIR_ITEM_KEY:
1947 case BTRFS_DIR_INDEX_KEY:
1948 ret = process_dir_item(eb, i, &key, active_node);
1950 case BTRFS_INODE_REF_KEY:
1951 ret = process_inode_ref(eb, i, &key, active_node);
1953 case BTRFS_INODE_EXTREF_KEY:
1954 ret = process_inode_extref(eb, i, &key, active_node);
1956 case BTRFS_INODE_ITEM_KEY:
1957 ret = process_inode_item(eb, i, &key, active_node);
1959 case BTRFS_EXTENT_DATA_KEY:
1960 ret = process_file_extent(root, eb, i, &key,
1971 u64 bytenr[BTRFS_MAX_LEVEL];
1972 u64 refs[BTRFS_MAX_LEVEL];
1973 int need_check[BTRFS_MAX_LEVEL];
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977 struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979 unsigned int ext_ref);
1982 * Returns >0 Found error, not fatal, should continue
1983 * Returns <0 Fatal error, must exit the whole check
1984 * Returns 0 No errors found
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987 struct node_refs *nrefs, int *level, int ext_ref)
1989 struct extent_buffer *cur = path->nodes[0];
1990 struct btrfs_key key;
1994 int root_level = btrfs_header_level(root->node);
1996 int ret = 0; /* Final return value */
1997 int err = 0; /* Positive error bitmap */
1999 cur_bytenr = cur->start;
2001 /* skip to first inode item or the first inode number change */
2002 nritems = btrfs_header_nritems(cur);
2003 for (i = 0; i < nritems; i++) {
2004 btrfs_item_key_to_cpu(cur, &key, i);
2006 first_ino = key.objectid;
2007 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008 (first_ino && first_ino != key.objectid))
2012 path->slots[0] = nritems;
2018 err |= check_inode_item(root, path, ext_ref);
2020 /* modify cur since check_inode_item may change path */
2021 cur = path->nodes[0];
2023 if (err & LAST_ITEM)
2026 /* still have inode items in thie leaf */
2027 if (cur->start == cur_bytenr)
2031 * we have switched to another leaf, above nodes may
2032 * have changed, here walk down the path, if a node
2033 * or leaf is shared, check whether we can skip this
2036 for (i = root_level; i >= 0; i--) {
2037 if (path->nodes[i]->start == nrefs->bytenr[i])
2040 ret = update_nodes_refs(root,
2041 path->nodes[i]->start,
2046 if (!nrefs->need_check[i]) {
2052 for (i = 0; i < *level; i++) {
2053 free_extent_buffer(path->nodes[i]);
2054 path->nodes[i] = NULL;
2063 static void reada_walk_down(struct btrfs_root *root,
2064 struct extent_buffer *node, int slot)
2066 struct btrfs_fs_info *fs_info = root->fs_info;
2073 level = btrfs_header_level(node);
2077 nritems = btrfs_header_nritems(node);
2078 for (i = slot; i < nritems; i++) {
2079 bytenr = btrfs_node_blockptr(node, i);
2080 ptr_gen = btrfs_node_ptr_generation(node, i);
2081 readahead_tree_block(fs_info, bytenr, ptr_gen);
2086 * Check the child node/leaf by the following condition:
2087 * 1. the first item key of the node/leaf should be the same with the one
2089 * 2. block in parent node should match the child node/leaf.
2090 * 3. generation of parent node and child's header should be consistent.
2092 * Or the child node/leaf pointed by the key in parent is not valid.
2094 * We hope to check leaf owner too, but since subvol may share leaves,
2095 * which makes leaf owner check not so strong, key check should be
2096 * sufficient enough for that case.
2098 static int check_child_node(struct extent_buffer *parent, int slot,
2099 struct extent_buffer *child)
2101 struct btrfs_key parent_key;
2102 struct btrfs_key child_key;
2105 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2106 if (btrfs_header_level(child) == 0)
2107 btrfs_item_key_to_cpu(child, &child_key, 0);
2109 btrfs_node_key_to_cpu(child, &child_key, 0);
2111 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2115 parent_key.objectid, parent_key.type, parent_key.offset,
2116 child_key.objectid, child_key.type, child_key.offset);
2118 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2120 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2121 btrfs_node_blockptr(parent, slot),
2122 btrfs_header_bytenr(child));
2124 if (btrfs_node_ptr_generation(parent, slot) !=
2125 btrfs_header_generation(child)) {
2127 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2128 btrfs_header_generation(child),
2129 btrfs_node_ptr_generation(parent, slot));
2135 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2136 * in every fs or file tree check. Here we find its all root ids, and only check
2137 * it in the fs or file tree which has the smallest root id.
2139 static int need_check(struct btrfs_root *root, struct ulist *roots)
2141 struct rb_node *node;
2142 struct ulist_node *u;
2144 if (roots->nnodes == 1)
2147 node = rb_first(&roots->root);
2148 u = rb_entry(node, struct ulist_node, rb_node);
2150 * current root id is not smallest, we skip it and let it be checked
2151 * in the fs or file tree who hash the smallest root id.
2153 if (root->objectid != u->val)
2160 * for a tree node or leaf, we record its reference count, so later if we still
2161 * process this node or leaf, don't need to compute its reference count again.
2163 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2164 struct node_refs *nrefs, u64 level)
2168 struct ulist *roots;
2170 if (nrefs->bytenr[level] != bytenr) {
2171 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2172 level, 1, &refs, NULL);
2176 nrefs->bytenr[level] = bytenr;
2177 nrefs->refs[level] = refs;
2179 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2184 check = need_check(root, roots);
2186 nrefs->need_check[level] = check;
2188 nrefs->need_check[level] = 1;
2195 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2196 struct walk_control *wc, int *level,
2197 struct node_refs *nrefs)
2199 enum btrfs_tree_block_status status;
2202 struct btrfs_fs_info *fs_info = root->fs_info;
2203 struct extent_buffer *next;
2204 struct extent_buffer *cur;
2208 WARN_ON(*level < 0);
2209 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2211 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2212 refs = nrefs->refs[*level];
2215 ret = btrfs_lookup_extent_info(NULL, root,
2216 path->nodes[*level]->start,
2217 *level, 1, &refs, NULL);
2222 nrefs->bytenr[*level] = path->nodes[*level]->start;
2223 nrefs->refs[*level] = refs;
2227 ret = enter_shared_node(root, path->nodes[*level]->start,
2235 while (*level >= 0) {
2236 WARN_ON(*level < 0);
2237 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238 cur = path->nodes[*level];
2240 if (btrfs_header_level(cur) != *level)
2243 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246 ret = process_one_leaf(root, cur, wc);
2251 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2252 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2254 if (bytenr == nrefs->bytenr[*level - 1]) {
2255 refs = nrefs->refs[*level - 1];
2257 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2258 *level - 1, 1, &refs, NULL);
2262 nrefs->bytenr[*level - 1] = bytenr;
2263 nrefs->refs[*level - 1] = refs;
2268 ret = enter_shared_node(root, bytenr, refs,
2271 path->slots[*level]++;
2276 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2277 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2278 free_extent_buffer(next);
2279 reada_walk_down(root, cur, path->slots[*level]);
2280 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2281 if (!extent_buffer_uptodate(next)) {
2282 struct btrfs_key node_key;
2284 btrfs_node_key_to_cpu(path->nodes[*level],
2286 path->slots[*level]);
2287 btrfs_add_corrupt_extent_record(root->fs_info,
2289 path->nodes[*level]->start,
2290 root->fs_info->nodesize,
2297 ret = check_child_node(cur, path->slots[*level], next);
2299 free_extent_buffer(next);
2304 if (btrfs_is_leaf(next))
2305 status = btrfs_check_leaf(root, NULL, next);
2307 status = btrfs_check_node(root, NULL, next);
2308 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309 free_extent_buffer(next);
2314 *level = *level - 1;
2315 free_extent_buffer(path->nodes[*level]);
2316 path->nodes[*level] = next;
2317 path->slots[*level] = 0;
2320 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2324 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2325 unsigned int ext_ref);
2328 * Returns >0 Found error, should continue
2329 * Returns <0 Fatal error, must exit the whole check
2330 * Returns 0 No errors found
2332 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2333 int *level, struct node_refs *nrefs, int ext_ref)
2335 enum btrfs_tree_block_status status;
2338 struct btrfs_fs_info *fs_info = root->fs_info;
2339 struct extent_buffer *next;
2340 struct extent_buffer *cur;
2343 WARN_ON(*level < 0);
2344 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2346 ret = update_nodes_refs(root, path->nodes[*level]->start,
2351 while (*level >= 0) {
2352 WARN_ON(*level < 0);
2353 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2354 cur = path->nodes[*level];
2356 if (btrfs_header_level(cur) != *level)
2359 if (path->slots[*level] >= btrfs_header_nritems(cur))
2361 /* Don't forgot to check leaf/node validation */
2363 ret = btrfs_check_leaf(root, NULL, cur);
2364 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368 ret = process_one_leaf_v2(root, path, nrefs,
2370 cur = path->nodes[*level];
2373 ret = btrfs_check_node(root, NULL, cur);
2374 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2379 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2380 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2382 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385 if (!nrefs->need_check[*level - 1]) {
2386 path->slots[*level]++;
2390 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392 free_extent_buffer(next);
2393 reada_walk_down(root, cur, path->slots[*level]);
2394 next = read_tree_block(fs_info, bytenr, ptr_gen);
2395 if (!extent_buffer_uptodate(next)) {
2396 struct btrfs_key node_key;
2398 btrfs_node_key_to_cpu(path->nodes[*level],
2400 path->slots[*level]);
2401 btrfs_add_corrupt_extent_record(fs_info,
2403 path->nodes[*level]->start,
2411 ret = check_child_node(cur, path->slots[*level], next);
2415 if (btrfs_is_leaf(next))
2416 status = btrfs_check_leaf(root, NULL, next);
2418 status = btrfs_check_node(root, NULL, next);
2419 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2420 free_extent_buffer(next);
2425 *level = *level - 1;
2426 free_extent_buffer(path->nodes[*level]);
2427 path->nodes[*level] = next;
2428 path->slots[*level] = 0;
2433 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2434 struct walk_control *wc, int *level)
2437 struct extent_buffer *leaf;
2439 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2440 leaf = path->nodes[i];
2441 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2446 free_extent_buffer(path->nodes[*level]);
2447 path->nodes[*level] = NULL;
2448 BUG_ON(*level > wc->active_node);
2449 if (*level == wc->active_node)
2450 leave_shared_node(root, wc, *level);
2457 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2461 struct extent_buffer *leaf;
2463 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2464 leaf = path->nodes[i];
2465 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2470 free_extent_buffer(path->nodes[*level]);
2471 path->nodes[*level] = NULL;
2478 static int check_root_dir(struct inode_record *rec)
2480 struct inode_backref *backref;
2483 if (!rec->found_inode_item || rec->errors)
2485 if (rec->nlink != 1 || rec->found_link != 0)
2487 if (list_empty(&rec->backrefs))
2489 backref = to_inode_backref(rec->backrefs.next);
2490 if (!backref->found_inode_ref)
2492 if (backref->index != 0 || backref->namelen != 2 ||
2493 memcmp(backref->name, "..", 2))
2495 if (backref->found_dir_index || backref->found_dir_item)
2502 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2503 struct btrfs_root *root, struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct btrfs_inode_item *ei;
2507 struct btrfs_key key;
2510 key.objectid = rec->ino;
2511 key.type = BTRFS_INODE_ITEM_KEY;
2512 key.offset = (u64)-1;
2514 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2518 if (!path->slots[0]) {
2525 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2526 if (key.objectid != rec->ino) {
2531 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2532 struct btrfs_inode_item);
2533 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2534 btrfs_mark_buffer_dirty(path->nodes[0]);
2535 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2536 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2537 root->root_key.objectid);
2539 btrfs_release_path(path);
2543 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2544 struct btrfs_root *root,
2545 struct btrfs_path *path,
2546 struct inode_record *rec)
2550 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2551 btrfs_release_path(path);
2553 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2557 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2558 struct btrfs_root *root,
2559 struct btrfs_path *path,
2560 struct inode_record *rec)
2562 struct btrfs_inode_item *ei;
2563 struct btrfs_key key;
2566 key.objectid = rec->ino;
2567 key.type = BTRFS_INODE_ITEM_KEY;
2570 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2577 /* Since ret == 0, no need to check anything */
2578 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2579 struct btrfs_inode_item);
2580 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2581 btrfs_mark_buffer_dirty(path->nodes[0]);
2582 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2583 printf("reset nbytes for ino %llu root %llu\n",
2584 rec->ino, root->root_key.objectid);
2586 btrfs_release_path(path);
2590 static int add_missing_dir_index(struct btrfs_root *root,
2591 struct cache_tree *inode_cache,
2592 struct inode_record *rec,
2593 struct inode_backref *backref)
2595 struct btrfs_path path;
2596 struct btrfs_trans_handle *trans;
2597 struct btrfs_dir_item *dir_item;
2598 struct extent_buffer *leaf;
2599 struct btrfs_key key;
2600 struct btrfs_disk_key disk_key;
2601 struct inode_record *dir_rec;
2602 unsigned long name_ptr;
2603 u32 data_size = sizeof(*dir_item) + backref->namelen;
2606 trans = btrfs_start_transaction(root, 1);
2608 return PTR_ERR(trans);
2610 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2611 (unsigned long long)rec->ino);
2613 btrfs_init_path(&path);
2614 key.objectid = backref->dir;
2615 key.type = BTRFS_DIR_INDEX_KEY;
2616 key.offset = backref->index;
2617 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620 leaf = path.nodes[0];
2621 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2623 disk_key.objectid = cpu_to_le64(rec->ino);
2624 disk_key.type = BTRFS_INODE_ITEM_KEY;
2625 disk_key.offset = 0;
2627 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2628 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2629 btrfs_set_dir_data_len(leaf, dir_item, 0);
2630 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2631 name_ptr = (unsigned long)(dir_item + 1);
2632 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2633 btrfs_mark_buffer_dirty(leaf);
2634 btrfs_release_path(&path);
2635 btrfs_commit_transaction(trans, root);
2637 backref->found_dir_index = 1;
2638 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2639 BUG_ON(IS_ERR(dir_rec));
2642 dir_rec->found_size += backref->namelen;
2643 if (dir_rec->found_size == dir_rec->isize &&
2644 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2645 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2646 if (dir_rec->found_size != dir_rec->isize)
2647 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2652 static int delete_dir_index(struct btrfs_root *root,
2653 struct inode_backref *backref)
2655 struct btrfs_trans_handle *trans;
2656 struct btrfs_dir_item *di;
2657 struct btrfs_path path;
2660 trans = btrfs_start_transaction(root, 1);
2662 return PTR_ERR(trans);
2664 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2665 (unsigned long long)backref->dir,
2666 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2667 (unsigned long long)root->objectid);
2669 btrfs_init_path(&path);
2670 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2671 backref->name, backref->namelen,
2672 backref->index, -1);
2675 btrfs_release_path(&path);
2676 btrfs_commit_transaction(trans, root);
2683 ret = btrfs_del_item(trans, root, &path);
2685 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2687 btrfs_release_path(&path);
2688 btrfs_commit_transaction(trans, root);
2692 static int create_inode_item(struct btrfs_root *root,
2693 struct inode_record *rec,
2696 struct btrfs_trans_handle *trans;
2697 struct btrfs_inode_item inode_item;
2698 time_t now = time(NULL);
2701 trans = btrfs_start_transaction(root, 1);
2702 if (IS_ERR(trans)) {
2703 ret = PTR_ERR(trans);
2707 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2708 "be incomplete, please check permissions and content after "
2709 "the fsck completes.\n", (unsigned long long)root->objectid,
2710 (unsigned long long)rec->ino);
2712 memset(&inode_item, 0, sizeof(inode_item));
2713 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2715 btrfs_set_stack_inode_nlink(&inode_item, 1);
2717 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2718 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2719 if (rec->found_dir_item) {
2720 if (rec->found_file_extent)
2721 fprintf(stderr, "root %llu inode %llu has both a dir "
2722 "item and extents, unsure if it is a dir or a "
2723 "regular file so setting it as a directory\n",
2724 (unsigned long long)root->objectid,
2725 (unsigned long long)rec->ino);
2726 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2727 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2728 } else if (!rec->found_dir_item) {
2729 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2730 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2732 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2733 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2734 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2735 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2736 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2737 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2738 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2739 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2741 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2743 btrfs_commit_transaction(trans, root);
2747 static int repair_inode_backrefs(struct btrfs_root *root,
2748 struct inode_record *rec,
2749 struct cache_tree *inode_cache,
2752 struct inode_backref *tmp, *backref;
2753 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2757 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2758 if (!delete && rec->ino == root_dirid) {
2759 if (!rec->found_inode_item) {
2760 ret = create_inode_item(root, rec, 1);
2767 /* Index 0 for root dir's are special, don't mess with it */
2768 if (rec->ino == root_dirid && backref->index == 0)
2772 ((backref->found_dir_index && !backref->found_inode_ref) ||
2773 (backref->found_dir_index && backref->found_inode_ref &&
2774 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2775 ret = delete_dir_index(root, backref);
2779 list_del(&backref->list);
2784 if (!delete && !backref->found_dir_index &&
2785 backref->found_dir_item && backref->found_inode_ref) {
2786 ret = add_missing_dir_index(root, inode_cache, rec,
2791 if (backref->found_dir_item &&
2792 backref->found_dir_index) {
2793 if (!backref->errors &&
2794 backref->found_inode_ref) {
2795 list_del(&backref->list);
2802 if (!delete && (!backref->found_dir_index &&
2803 !backref->found_dir_item &&
2804 backref->found_inode_ref)) {
2805 struct btrfs_trans_handle *trans;
2806 struct btrfs_key location;
2808 ret = check_dir_conflict(root, backref->name,
2814 * let nlink fixing routine to handle it,
2815 * which can do it better.
2820 location.objectid = rec->ino;
2821 location.type = BTRFS_INODE_ITEM_KEY;
2822 location.offset = 0;
2824 trans = btrfs_start_transaction(root, 1);
2825 if (IS_ERR(trans)) {
2826 ret = PTR_ERR(trans);
2829 fprintf(stderr, "adding missing dir index/item pair "
2831 (unsigned long long)rec->ino);
2832 ret = btrfs_insert_dir_item(trans, root, backref->name,
2834 backref->dir, &location,
2835 imode_to_type(rec->imode),
2838 btrfs_commit_transaction(trans, root);
2842 if (!delete && (backref->found_inode_ref &&
2843 backref->found_dir_index &&
2844 backref->found_dir_item &&
2845 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2846 !rec->found_inode_item)) {
2847 ret = create_inode_item(root, rec, 0);
2854 return ret ? ret : repaired;
2858 * To determine the file type for nlink/inode_item repair
2860 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2861 * Return -ENOENT if file type is not found.
2863 static int find_file_type(struct inode_record *rec, u8 *type)
2865 struct inode_backref *backref;
2867 /* For inode item recovered case */
2868 if (rec->found_inode_item) {
2869 *type = imode_to_type(rec->imode);
2873 list_for_each_entry(backref, &rec->backrefs, list) {
2874 if (backref->found_dir_index || backref->found_dir_item) {
2875 *type = backref->filetype;
2883 * To determine the file name for nlink repair
2885 * Return 0 if file name is found, set name and namelen.
2886 * Return -ENOENT if file name is not found.
2888 static int find_file_name(struct inode_record *rec,
2889 char *name, int *namelen)
2891 struct inode_backref *backref;
2893 list_for_each_entry(backref, &rec->backrefs, list) {
2894 if (backref->found_dir_index || backref->found_dir_item ||
2895 backref->found_inode_ref) {
2896 memcpy(name, backref->name, backref->namelen);
2897 *namelen = backref->namelen;
2904 /* Reset the nlink of the inode to the correct one */
2905 static int reset_nlink(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct inode_backref *backref;
2911 struct inode_backref *tmp;
2912 struct btrfs_key key;
2913 struct btrfs_inode_item *inode_item;
2916 /* We don't believe this either, reset it and iterate backref */
2917 rec->found_link = 0;
2919 /* Remove all backref including the valid ones */
2920 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2921 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2922 backref->index, backref->name,
2923 backref->namelen, 0);
2927 /* remove invalid backref, so it won't be added back */
2928 if (!(backref->found_dir_index &&
2929 backref->found_dir_item &&
2930 backref->found_inode_ref)) {
2931 list_del(&backref->list);
2938 /* Set nlink to 0 */
2939 key.objectid = rec->ino;
2940 key.type = BTRFS_INODE_ITEM_KEY;
2942 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2949 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2950 struct btrfs_inode_item);
2951 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2952 btrfs_mark_buffer_dirty(path->nodes[0]);
2953 btrfs_release_path(path);
2956 * Add back valid inode_ref/dir_item/dir_index,
2957 * add_link() will handle the nlink inc, so new nlink must be correct
2959 list_for_each_entry(backref, &rec->backrefs, list) {
2960 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2961 backref->name, backref->namelen,
2962 backref->filetype, &backref->index, 1);
2967 btrfs_release_path(path);
2971 static int get_highest_inode(struct btrfs_trans_handle *trans,
2972 struct btrfs_root *root,
2973 struct btrfs_path *path,
2976 struct btrfs_key key, found_key;
2979 btrfs_init_path(path);
2980 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2982 key.type = BTRFS_INODE_ITEM_KEY;
2983 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2985 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2986 path->slots[0] - 1);
2987 *highest_ino = found_key.objectid;
2990 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2992 btrfs_release_path(path);
2996 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2997 struct btrfs_root *root,
2998 struct btrfs_path *path,
2999 struct inode_record *rec)
3001 char *dir_name = "lost+found";
3002 char namebuf[BTRFS_NAME_LEN] = {0};
3007 int name_recovered = 0;
3008 int type_recovered = 0;
3012 * Get file name and type first before these invalid inode ref
3013 * are deleted by remove_all_invalid_backref()
3015 name_recovered = !find_file_name(rec, namebuf, &namelen);
3016 type_recovered = !find_file_type(rec, &type);
3018 if (!name_recovered) {
3019 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3020 rec->ino, rec->ino);
3021 namelen = count_digits(rec->ino);
3022 sprintf(namebuf, "%llu", rec->ino);
3025 if (!type_recovered) {
3026 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3028 type = BTRFS_FT_REG_FILE;
3032 ret = reset_nlink(trans, root, path, rec);
3035 "Failed to reset nlink for inode %llu: %s\n",
3036 rec->ino, strerror(-ret));
3040 if (rec->found_link == 0) {
3041 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3045 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3046 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3049 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3050 dir_name, strerror(-ret));
3053 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3054 namebuf, namelen, type, NULL, 1);
3056 * Add ".INO" suffix several times to handle case where
3057 * "FILENAME.INO" is already taken by another file.
3059 while (ret == -EEXIST) {
3061 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3063 if (namelen + count_digits(rec->ino) + 1 >
3068 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3070 namelen += count_digits(rec->ino) + 1;
3071 ret = btrfs_add_link(trans, root, rec->ino,
3072 lost_found_ino, namebuf,
3073 namelen, type, NULL, 1);
3077 "Failed to link the inode %llu to %s dir: %s\n",
3078 rec->ino, dir_name, strerror(-ret));
3082 * Just increase the found_link, don't actually add the
3083 * backref. This will make things easier and this inode
3084 * record will be freed after the repair is done.
3085 * So fsck will not report problem about this inode.
3088 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3089 namelen, namebuf, dir_name);
3091 printf("Fixed the nlink of inode %llu\n", rec->ino);
3094 * Clear the flag anyway, or we will loop forever for the same inode
3095 * as it will not be removed from the bad inode list and the dead loop
3098 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3099 btrfs_release_path(path);
3104 * Check if there is any normal(reg or prealloc) file extent for given
3106 * This is used to determine the file type when neither its dir_index/item or
3107 * inode_item exists.
3109 * This will *NOT* report error, if any error happens, just consider it does
3110 * not have any normal file extent.
3112 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3114 struct btrfs_path path;
3115 struct btrfs_key key;
3116 struct btrfs_key found_key;
3117 struct btrfs_file_extent_item *fi;
3121 btrfs_init_path(&path);
3123 key.type = BTRFS_EXTENT_DATA_KEY;
3126 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3131 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3132 ret = btrfs_next_leaf(root, &path);
3139 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3141 if (found_key.objectid != ino ||
3142 found_key.type != BTRFS_EXTENT_DATA_KEY)
3144 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3145 struct btrfs_file_extent_item);
3146 type = btrfs_file_extent_type(path.nodes[0], fi);
3147 if (type != BTRFS_FILE_EXTENT_INLINE) {
3153 btrfs_release_path(&path);
3157 static u32 btrfs_type_to_imode(u8 type)
3159 static u32 imode_by_btrfs_type[] = {
3160 [BTRFS_FT_REG_FILE] = S_IFREG,
3161 [BTRFS_FT_DIR] = S_IFDIR,
3162 [BTRFS_FT_CHRDEV] = S_IFCHR,
3163 [BTRFS_FT_BLKDEV] = S_IFBLK,
3164 [BTRFS_FT_FIFO] = S_IFIFO,
3165 [BTRFS_FT_SOCK] = S_IFSOCK,
3166 [BTRFS_FT_SYMLINK] = S_IFLNK,
3169 return imode_by_btrfs_type[(type)];
3172 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct inode_record *rec)
3179 int type_recovered = 0;
3182 printf("Trying to rebuild inode:%llu\n", rec->ino);
3184 type_recovered = !find_file_type(rec, &filetype);
3187 * Try to determine inode type if type not found.
3189 * For found regular file extent, it must be FILE.
3190 * For found dir_item/index, it must be DIR.
3192 * For undetermined one, use FILE as fallback.
3195 * 1. If found backref(inode_index/item is already handled) to it,
3197 * Need new inode-inode ref structure to allow search for that.
3199 if (!type_recovered) {
3200 if (rec->found_file_extent &&
3201 find_normal_file_extent(root, rec->ino)) {
3203 filetype = BTRFS_FT_REG_FILE;
3204 } else if (rec->found_dir_item) {
3206 filetype = BTRFS_FT_DIR;
3207 } else if (!list_empty(&rec->orphan_extents)) {
3209 filetype = BTRFS_FT_REG_FILE;
3211 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3214 filetype = BTRFS_FT_REG_FILE;
3218 ret = btrfs_new_inode(trans, root, rec->ino,
3219 mode | btrfs_type_to_imode(filetype));
3224 * Here inode rebuild is done, we only rebuild the inode item,
3225 * don't repair the nlink(like move to lost+found).
3226 * That is the job of nlink repair.
3228 * We just fill the record and return
3230 rec->found_dir_item = 1;
3231 rec->imode = mode | btrfs_type_to_imode(filetype);
3233 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3234 /* Ensure the inode_nlinks repair function will be called */
3235 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3240 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3241 struct btrfs_root *root,
3242 struct btrfs_path *path,
3243 struct inode_record *rec)
3245 struct orphan_data_extent *orphan;
3246 struct orphan_data_extent *tmp;
3249 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3251 * Check for conflicting file extents
3253 * Here we don't know whether the extents is compressed or not,
3254 * so we can only assume it not compressed nor data offset,
3255 * and use its disk_len as extent length.
3257 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3258 orphan->offset, orphan->disk_len, 0);
3259 btrfs_release_path(path);
3264 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3265 orphan->disk_bytenr, orphan->disk_len);
3266 ret = btrfs_free_extent(trans,
3267 root->fs_info->extent_root,
3268 orphan->disk_bytenr, orphan->disk_len,
3269 0, root->objectid, orphan->objectid,
3274 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3275 orphan->offset, orphan->disk_bytenr,
3276 orphan->disk_len, orphan->disk_len);
3280 /* Update file size info */
3281 rec->found_size += orphan->disk_len;
3282 if (rec->found_size == rec->nbytes)
3283 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3285 /* Update the file extent hole info too */
3286 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3290 if (RB_EMPTY_ROOT(&rec->holes))
3291 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3293 list_del(&orphan->list);
3296 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3301 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3302 struct btrfs_root *root,
3303 struct btrfs_path *path,
3304 struct inode_record *rec)
3306 struct rb_node *node;
3307 struct file_extent_hole *hole;
3311 node = rb_first(&rec->holes);
3315 hole = rb_entry(node, struct file_extent_hole, node);
3316 ret = btrfs_punch_hole(trans, root, rec->ino,
3317 hole->start, hole->len);
3320 ret = del_file_extent_hole(&rec->holes, hole->start,
3324 if (RB_EMPTY_ROOT(&rec->holes))
3325 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3326 node = rb_first(&rec->holes);
3328 /* special case for a file losing all its file extent */
3330 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3331 round_up(rec->isize,
3332 root->fs_info->sectorsize));
3336 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3337 rec->ino, root->objectid);
3342 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3344 struct btrfs_trans_handle *trans;
3345 struct btrfs_path path;
3348 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3349 I_ERR_NO_ORPHAN_ITEM |
3350 I_ERR_LINK_COUNT_WRONG |
3351 I_ERR_NO_INODE_ITEM |
3352 I_ERR_FILE_EXTENT_ORPHAN |
3353 I_ERR_FILE_EXTENT_DISCOUNT|
3354 I_ERR_FILE_NBYTES_WRONG)))
3358 * For nlink repair, it may create a dir and add link, so
3359 * 2 for parent(256)'s dir_index and dir_item
3360 * 2 for lost+found dir's inode_item and inode_ref
3361 * 1 for the new inode_ref of the file
3362 * 2 for lost+found dir's dir_index and dir_item for the file
3364 trans = btrfs_start_transaction(root, 7);
3366 return PTR_ERR(trans);
3368 btrfs_init_path(&path);
3369 if (rec->errors & I_ERR_NO_INODE_ITEM)
3370 ret = repair_inode_no_item(trans, root, &path, rec);
3371 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3372 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3373 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3374 ret = repair_inode_discount_extent(trans, root, &path, rec);
3375 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3376 ret = repair_inode_isize(trans, root, &path, rec);
3377 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3378 ret = repair_inode_orphan_item(trans, root, &path, rec);
3379 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3380 ret = repair_inode_nlinks(trans, root, &path, rec);
3381 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3382 ret = repair_inode_nbytes(trans, root, &path, rec);
3383 btrfs_commit_transaction(trans, root);
3384 btrfs_release_path(&path);
3388 static int check_inode_recs(struct btrfs_root *root,
3389 struct cache_tree *inode_cache)
3391 struct cache_extent *cache;
3392 struct ptr_node *node;
3393 struct inode_record *rec;
3394 struct inode_backref *backref;
3399 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3401 if (btrfs_root_refs(&root->root_item) == 0) {
3402 if (!cache_tree_empty(inode_cache))
3403 fprintf(stderr, "warning line %d\n", __LINE__);
3408 * We need to repair backrefs first because we could change some of the
3409 * errors in the inode recs.
3411 * We also need to go through and delete invalid backrefs first and then
3412 * add the correct ones second. We do this because we may get EEXIST
3413 * when adding back the correct index because we hadn't yet deleted the
3416 * For example, if we were missing a dir index then the directories
3417 * isize would be wrong, so if we fixed the isize to what we thought it
3418 * would be and then fixed the backref we'd still have a invalid fs, so
3419 * we need to add back the dir index and then check to see if the isize
3424 if (stage == 3 && !err)
3427 cache = search_cache_extent(inode_cache, 0);
3428 while (repair && cache) {
3429 node = container_of(cache, struct ptr_node, cache);
3431 cache = next_cache_extent(cache);
3433 /* Need to free everything up and rescan */
3435 remove_cache_extent(inode_cache, &node->cache);
3437 free_inode_rec(rec);
3441 if (list_empty(&rec->backrefs))
3444 ret = repair_inode_backrefs(root, rec, inode_cache,
3458 rec = get_inode_rec(inode_cache, root_dirid, 0);
3459 BUG_ON(IS_ERR(rec));
3461 ret = check_root_dir(rec);
3463 fprintf(stderr, "root %llu root dir %llu error\n",
3464 (unsigned long long)root->root_key.objectid,
3465 (unsigned long long)root_dirid);
3466 print_inode_error(root, rec);
3471 struct btrfs_trans_handle *trans;
3473 trans = btrfs_start_transaction(root, 1);
3474 if (IS_ERR(trans)) {
3475 err = PTR_ERR(trans);
3480 "root %llu missing its root dir, recreating\n",
3481 (unsigned long long)root->objectid);
3483 ret = btrfs_make_root_dir(trans, root, root_dirid);
3486 btrfs_commit_transaction(trans, root);
3490 fprintf(stderr, "root %llu root dir %llu not found\n",
3491 (unsigned long long)root->root_key.objectid,
3492 (unsigned long long)root_dirid);
3496 cache = search_cache_extent(inode_cache, 0);
3499 node = container_of(cache, struct ptr_node, cache);
3501 remove_cache_extent(inode_cache, &node->cache);
3503 if (rec->ino == root_dirid ||
3504 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3505 free_inode_rec(rec);
3509 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3510 ret = check_orphan_item(root, rec->ino);
3512 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3513 if (can_free_inode_rec(rec)) {
3514 free_inode_rec(rec);
3519 if (!rec->found_inode_item)
3520 rec->errors |= I_ERR_NO_INODE_ITEM;
3521 if (rec->found_link != rec->nlink)
3522 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3524 ret = try_repair_inode(root, rec);
3525 if (ret == 0 && can_free_inode_rec(rec)) {
3526 free_inode_rec(rec);
3532 if (!(repair && ret == 0))
3534 print_inode_error(root, rec);
3535 list_for_each_entry(backref, &rec->backrefs, list) {
3536 if (!backref->found_dir_item)
3537 backref->errors |= REF_ERR_NO_DIR_ITEM;
3538 if (!backref->found_dir_index)
3539 backref->errors |= REF_ERR_NO_DIR_INDEX;
3540 if (!backref->found_inode_ref)
3541 backref->errors |= REF_ERR_NO_INODE_REF;
3542 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3543 " namelen %u name %s filetype %d errors %x",
3544 (unsigned long long)backref->dir,
3545 (unsigned long long)backref->index,
3546 backref->namelen, backref->name,
3547 backref->filetype, backref->errors);
3548 print_ref_error(backref->errors);
3550 free_inode_rec(rec);
3552 return (error > 0) ? -1 : 0;
3555 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3558 struct cache_extent *cache;
3559 struct root_record *rec = NULL;
3562 cache = lookup_cache_extent(root_cache, objectid, 1);
3564 rec = container_of(cache, struct root_record, cache);
3566 rec = calloc(1, sizeof(*rec));
3568 return ERR_PTR(-ENOMEM);
3569 rec->objectid = objectid;
3570 INIT_LIST_HEAD(&rec->backrefs);
3571 rec->cache.start = objectid;
3572 rec->cache.size = 1;
3574 ret = insert_cache_extent(root_cache, &rec->cache);
3576 return ERR_PTR(-EEXIST);
3581 static struct root_backref *get_root_backref(struct root_record *rec,
3582 u64 ref_root, u64 dir, u64 index,
3583 const char *name, int namelen)
3585 struct root_backref *backref;
3587 list_for_each_entry(backref, &rec->backrefs, list) {
3588 if (backref->ref_root != ref_root || backref->dir != dir ||
3589 backref->namelen != namelen)
3591 if (memcmp(name, backref->name, namelen))
3596 backref = calloc(1, sizeof(*backref) + namelen + 1);
3599 backref->ref_root = ref_root;
3601 backref->index = index;
3602 backref->namelen = namelen;
3603 memcpy(backref->name, name, namelen);
3604 backref->name[namelen] = '\0';
3605 list_add_tail(&backref->list, &rec->backrefs);
3609 static void free_root_record(struct cache_extent *cache)
3611 struct root_record *rec;
3612 struct root_backref *backref;
3614 rec = container_of(cache, struct root_record, cache);
3615 while (!list_empty(&rec->backrefs)) {
3616 backref = to_root_backref(rec->backrefs.next);
3617 list_del(&backref->list);
3624 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3626 static int add_root_backref(struct cache_tree *root_cache,
3627 u64 root_id, u64 ref_root, u64 dir, u64 index,
3628 const char *name, int namelen,
3629 int item_type, int errors)
3631 struct root_record *rec;
3632 struct root_backref *backref;
3634 rec = get_root_rec(root_cache, root_id);
3635 BUG_ON(IS_ERR(rec));
3636 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3639 backref->errors |= errors;
3641 if (item_type != BTRFS_DIR_ITEM_KEY) {
3642 if (backref->found_dir_index || backref->found_back_ref ||
3643 backref->found_forward_ref) {
3644 if (backref->index != index)
3645 backref->errors |= REF_ERR_INDEX_UNMATCH;
3647 backref->index = index;
3651 if (item_type == BTRFS_DIR_ITEM_KEY) {
3652 if (backref->found_forward_ref)
3654 backref->found_dir_item = 1;
3655 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3656 backref->found_dir_index = 1;
3657 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3658 if (backref->found_forward_ref)
3659 backref->errors |= REF_ERR_DUP_ROOT_REF;
3660 else if (backref->found_dir_item)
3662 backref->found_forward_ref = 1;
3663 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3664 if (backref->found_back_ref)
3665 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3666 backref->found_back_ref = 1;
3671 if (backref->found_forward_ref && backref->found_dir_item)
3672 backref->reachable = 1;
3676 static int merge_root_recs(struct btrfs_root *root,
3677 struct cache_tree *src_cache,
3678 struct cache_tree *dst_cache)
3680 struct cache_extent *cache;
3681 struct ptr_node *node;
3682 struct inode_record *rec;
3683 struct inode_backref *backref;
3686 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3687 free_inode_recs_tree(src_cache);
3692 cache = search_cache_extent(src_cache, 0);
3695 node = container_of(cache, struct ptr_node, cache);
3697 remove_cache_extent(src_cache, &node->cache);
3700 ret = is_child_root(root, root->objectid, rec->ino);
3706 list_for_each_entry(backref, &rec->backrefs, list) {
3707 BUG_ON(backref->found_inode_ref);
3708 if (backref->found_dir_item)
3709 add_root_backref(dst_cache, rec->ino,
3710 root->root_key.objectid, backref->dir,
3711 backref->index, backref->name,
3712 backref->namelen, BTRFS_DIR_ITEM_KEY,
3714 if (backref->found_dir_index)
3715 add_root_backref(dst_cache, rec->ino,
3716 root->root_key.objectid, backref->dir,
3717 backref->index, backref->name,
3718 backref->namelen, BTRFS_DIR_INDEX_KEY,
3722 free_inode_rec(rec);
3729 static int check_root_refs(struct btrfs_root *root,
3730 struct cache_tree *root_cache)
3732 struct root_record *rec;
3733 struct root_record *ref_root;
3734 struct root_backref *backref;
3735 struct cache_extent *cache;
3741 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3742 BUG_ON(IS_ERR(rec));
3745 /* fixme: this can not detect circular references */
3748 cache = search_cache_extent(root_cache, 0);
3752 rec = container_of(cache, struct root_record, cache);
3753 cache = next_cache_extent(cache);
3755 if (rec->found_ref == 0)
3758 list_for_each_entry(backref, &rec->backrefs, list) {
3759 if (!backref->reachable)
3762 ref_root = get_root_rec(root_cache,
3764 BUG_ON(IS_ERR(ref_root));
3765 if (ref_root->found_ref > 0)
3768 backref->reachable = 0;
3770 if (rec->found_ref == 0)
3776 cache = search_cache_extent(root_cache, 0);
3780 rec = container_of(cache, struct root_record, cache);
3781 cache = next_cache_extent(cache);
3783 if (rec->found_ref == 0 &&
3784 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3785 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3786 ret = check_orphan_item(root->fs_info->tree_root,
3792 * If we don't have a root item then we likely just have
3793 * a dir item in a snapshot for this root but no actual
3794 * ref key or anything so it's meaningless.
3796 if (!rec->found_root_item)
3799 fprintf(stderr, "fs tree %llu not referenced\n",
3800 (unsigned long long)rec->objectid);
3804 if (rec->found_ref > 0 && !rec->found_root_item)
3806 list_for_each_entry(backref, &rec->backrefs, list) {
3807 if (!backref->found_dir_item)
3808 backref->errors |= REF_ERR_NO_DIR_ITEM;
3809 if (!backref->found_dir_index)
3810 backref->errors |= REF_ERR_NO_DIR_INDEX;
3811 if (!backref->found_back_ref)
3812 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3813 if (!backref->found_forward_ref)
3814 backref->errors |= REF_ERR_NO_ROOT_REF;
3815 if (backref->reachable && backref->errors)
3822 fprintf(stderr, "fs tree %llu refs %u %s\n",
3823 (unsigned long long)rec->objectid, rec->found_ref,
3824 rec->found_root_item ? "" : "not found");
3826 list_for_each_entry(backref, &rec->backrefs, list) {
3827 if (!backref->reachable)
3829 if (!backref->errors && rec->found_root_item)
3831 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3832 " index %llu namelen %u name %s errors %x\n",
3833 (unsigned long long)backref->ref_root,
3834 (unsigned long long)backref->dir,
3835 (unsigned long long)backref->index,
3836 backref->namelen, backref->name,
3838 print_ref_error(backref->errors);
3841 return errors > 0 ? 1 : 0;
3844 static int process_root_ref(struct extent_buffer *eb, int slot,
3845 struct btrfs_key *key,
3846 struct cache_tree *root_cache)
3852 struct btrfs_root_ref *ref;
3853 char namebuf[BTRFS_NAME_LEN];
3856 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3858 dirid = btrfs_root_ref_dirid(eb, ref);
3859 index = btrfs_root_ref_sequence(eb, ref);
3860 name_len = btrfs_root_ref_name_len(eb, ref);
3862 if (name_len <= BTRFS_NAME_LEN) {
3866 len = BTRFS_NAME_LEN;
3867 error = REF_ERR_NAME_TOO_LONG;
3869 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3871 if (key->type == BTRFS_ROOT_REF_KEY) {
3872 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3873 index, namebuf, len, key->type, error);
3875 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3876 index, namebuf, len, key->type, error);
3881 static void free_corrupt_block(struct cache_extent *cache)
3883 struct btrfs_corrupt_block *corrupt;
3885 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3889 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3892 * Repair the btree of the given root.
3894 * The fix is to remove the node key in corrupt_blocks cache_tree.
3895 * and rebalance the tree.
3896 * After the fix, the btree should be writeable.
3898 static int repair_btree(struct btrfs_root *root,
3899 struct cache_tree *corrupt_blocks)
3901 struct btrfs_trans_handle *trans;
3902 struct btrfs_path path;
3903 struct btrfs_corrupt_block *corrupt;
3904 struct cache_extent *cache;
3905 struct btrfs_key key;
3910 if (cache_tree_empty(corrupt_blocks))
3913 trans = btrfs_start_transaction(root, 1);
3914 if (IS_ERR(trans)) {
3915 ret = PTR_ERR(trans);
3916 fprintf(stderr, "Error starting transaction: %s\n",
3920 btrfs_init_path(&path);
3921 cache = first_cache_extent(corrupt_blocks);
3923 corrupt = container_of(cache, struct btrfs_corrupt_block,
3925 level = corrupt->level;
3926 path.lowest_level = level;
3927 key.objectid = corrupt->key.objectid;
3928 key.type = corrupt->key.type;
3929 key.offset = corrupt->key.offset;
3932 * Here we don't want to do any tree balance, since it may
3933 * cause a balance with corrupted brother leaf/node,
3934 * so ins_len set to 0 here.
3935 * Balance will be done after all corrupt node/leaf is deleted.
3937 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3940 offset = btrfs_node_blockptr(path.nodes[level],
3943 /* Remove the ptr */
3944 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3948 * Remove the corresponding extent
3949 * return value is not concerned.
3951 btrfs_release_path(&path);
3952 ret = btrfs_free_extent(trans, root, offset,
3953 root->fs_info->nodesize, 0,
3954 root->root_key.objectid, level - 1, 0);
3955 cache = next_cache_extent(cache);
3958 /* Balance the btree using btrfs_search_slot() */
3959 cache = first_cache_extent(corrupt_blocks);
3961 corrupt = container_of(cache, struct btrfs_corrupt_block,
3963 memcpy(&key, &corrupt->key, sizeof(key));
3964 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3967 /* return will always >0 since it won't find the item */
3969 btrfs_release_path(&path);
3970 cache = next_cache_extent(cache);
3973 btrfs_commit_transaction(trans, root);
3974 btrfs_release_path(&path);
3978 static int check_fs_root(struct btrfs_root *root,
3979 struct cache_tree *root_cache,
3980 struct walk_control *wc)
3986 struct btrfs_path path;
3987 struct shared_node root_node;
3988 struct root_record *rec;
3989 struct btrfs_root_item *root_item = &root->root_item;
3990 struct cache_tree corrupt_blocks;
3991 struct orphan_data_extent *orphan;
3992 struct orphan_data_extent *tmp;
3993 enum btrfs_tree_block_status status;
3994 struct node_refs nrefs;
3997 * Reuse the corrupt_block cache tree to record corrupted tree block
3999 * Unlike the usage in extent tree check, here we do it in a per
4000 * fs/subvol tree base.
4002 cache_tree_init(&corrupt_blocks);
4003 root->fs_info->corrupt_blocks = &corrupt_blocks;
4005 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4006 rec = get_root_rec(root_cache, root->root_key.objectid);
4007 BUG_ON(IS_ERR(rec));
4008 if (btrfs_root_refs(root_item) > 0)
4009 rec->found_root_item = 1;
4012 btrfs_init_path(&path);
4013 memset(&root_node, 0, sizeof(root_node));
4014 cache_tree_init(&root_node.root_cache);
4015 cache_tree_init(&root_node.inode_cache);
4016 memset(&nrefs, 0, sizeof(nrefs));
4018 /* Move the orphan extent record to corresponding inode_record */
4019 list_for_each_entry_safe(orphan, tmp,
4020 &root->orphan_data_extents, list) {
4021 struct inode_record *inode;
4023 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4025 BUG_ON(IS_ERR(inode));
4026 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4027 list_move(&orphan->list, &inode->orphan_extents);
4030 level = btrfs_header_level(root->node);
4031 memset(wc->nodes, 0, sizeof(wc->nodes));
4032 wc->nodes[level] = &root_node;
4033 wc->active_node = level;
4034 wc->root_level = level;
4036 /* We may not have checked the root block, lets do that now */
4037 if (btrfs_is_leaf(root->node))
4038 status = btrfs_check_leaf(root, NULL, root->node);
4040 status = btrfs_check_node(root, NULL, root->node);
4041 if (status != BTRFS_TREE_BLOCK_CLEAN)
4044 if (btrfs_root_refs(root_item) > 0 ||
4045 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4046 path.nodes[level] = root->node;
4047 extent_buffer_get(root->node);
4048 path.slots[level] = 0;
4050 struct btrfs_key key;
4051 struct btrfs_disk_key found_key;
4053 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4054 level = root_item->drop_level;
4055 path.lowest_level = level;
4056 if (level > btrfs_header_level(root->node) ||
4057 level >= BTRFS_MAX_LEVEL) {
4058 error("ignoring invalid drop level: %u", level);
4061 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4064 btrfs_node_key(path.nodes[level], &found_key,
4066 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4067 sizeof(found_key)));
4071 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4077 wret = walk_up_tree(root, &path, wc, &level);
4084 btrfs_release_path(&path);
4086 if (!cache_tree_empty(&corrupt_blocks)) {
4087 struct cache_extent *cache;
4088 struct btrfs_corrupt_block *corrupt;
4090 printf("The following tree block(s) is corrupted in tree %llu:\n",
4091 root->root_key.objectid);
4092 cache = first_cache_extent(&corrupt_blocks);
4094 corrupt = container_of(cache,
4095 struct btrfs_corrupt_block,
4097 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4098 cache->start, corrupt->level,
4099 corrupt->key.objectid, corrupt->key.type,
4100 corrupt->key.offset);
4101 cache = next_cache_extent(cache);
4104 printf("Try to repair the btree for root %llu\n",
4105 root->root_key.objectid);
4106 ret = repair_btree(root, &corrupt_blocks);
4108 fprintf(stderr, "Failed to repair btree: %s\n",
4111 printf("Btree for root %llu is fixed\n",
4112 root->root_key.objectid);
4116 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4120 if (root_node.current) {
4121 root_node.current->checked = 1;
4122 maybe_free_inode_rec(&root_node.inode_cache,
4126 err = check_inode_recs(root, &root_node.inode_cache);
4130 free_corrupt_blocks_tree(&corrupt_blocks);
4131 root->fs_info->corrupt_blocks = NULL;
4132 free_orphan_data_extents(&root->orphan_data_extents);
4136 static int fs_root_objectid(u64 objectid)
4138 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4139 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4141 return is_fstree(objectid);
4144 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4145 struct cache_tree *root_cache)
4147 struct btrfs_path path;
4148 struct btrfs_key key;
4149 struct walk_control wc;
4150 struct extent_buffer *leaf, *tree_node;
4151 struct btrfs_root *tmp_root;
4152 struct btrfs_root *tree_root = fs_info->tree_root;
4156 if (ctx.progress_enabled) {
4157 ctx.tp = TASK_FS_ROOTS;
4158 task_start(ctx.info);
4162 * Just in case we made any changes to the extent tree that weren't
4163 * reflected into the free space cache yet.
4166 reset_cached_block_groups(fs_info);
4167 memset(&wc, 0, sizeof(wc));
4168 cache_tree_init(&wc.shared);
4169 btrfs_init_path(&path);
4174 key.type = BTRFS_ROOT_ITEM_KEY;
4175 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4180 tree_node = tree_root->node;
4182 if (tree_node != tree_root->node) {
4183 free_root_recs_tree(root_cache);
4184 btrfs_release_path(&path);
4187 leaf = path.nodes[0];
4188 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4189 ret = btrfs_next_leaf(tree_root, &path);
4195 leaf = path.nodes[0];
4197 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4198 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4199 fs_root_objectid(key.objectid)) {
4200 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4201 tmp_root = btrfs_read_fs_root_no_cache(
4204 key.offset = (u64)-1;
4205 tmp_root = btrfs_read_fs_root(
4208 if (IS_ERR(tmp_root)) {
4212 ret = check_fs_root(tmp_root, root_cache, &wc);
4213 if (ret == -EAGAIN) {
4214 free_root_recs_tree(root_cache);
4215 btrfs_release_path(&path);
4220 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4221 btrfs_free_fs_root(tmp_root);
4222 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4223 key.type == BTRFS_ROOT_BACKREF_KEY) {
4224 process_root_ref(leaf, path.slots[0], &key,
4231 btrfs_release_path(&path);
4233 free_extent_cache_tree(&wc.shared);
4234 if (!cache_tree_empty(&wc.shared))
4235 fprintf(stderr, "warning line %d\n", __LINE__);
4237 task_stop(ctx.info);
4243 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4244 * INODE_REF/INODE_EXTREF match.
4246 * @root: the root of the fs/file tree
4247 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4248 * @key: the key of the DIR_ITEM/DIR_INDEX
4249 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4250 * distinguish root_dir between normal dir/file
4251 * @name: the name in the INODE_REF/INODE_EXTREF
4252 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4253 * @mode: the st_mode of INODE_ITEM
4255 * Return 0 if no error occurred.
4256 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4257 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4259 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4260 * not match for normal dir/file.
4262 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4263 struct btrfs_key *key, u64 index, char *name,
4264 u32 namelen, u32 mode)
4266 struct btrfs_path path;
4267 struct extent_buffer *node;
4268 struct btrfs_dir_item *di;
4269 struct btrfs_key location;
4270 char namebuf[BTRFS_NAME_LEN] = {0};
4280 btrfs_init_path(&path);
4281 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4283 ret = DIR_ITEM_MISSING;
4287 /* Process root dir and goto out*/
4290 ret = ROOT_DIR_ERROR;
4292 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4294 ref_key->type == BTRFS_INODE_REF_KEY ?
4296 ref_key->objectid, ref_key->offset,
4297 key->type == BTRFS_DIR_ITEM_KEY ?
4298 "DIR_ITEM" : "DIR_INDEX");
4306 /* Process normal file/dir */
4308 ret = DIR_ITEM_MISSING;
4310 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4312 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4313 ref_key->objectid, ref_key->offset,
4314 key->type == BTRFS_DIR_ITEM_KEY ?
4315 "DIR_ITEM" : "DIR_INDEX",
4316 key->objectid, key->offset, namelen, name,
4317 imode_to_type(mode));
4321 /* Check whether inode_id/filetype/name match */
4322 node = path.nodes[0];
4323 slot = path.slots[0];
4324 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4325 total = btrfs_item_size_nr(node, slot);
4326 while (cur < total) {
4327 ret = DIR_ITEM_MISMATCH;
4328 name_len = btrfs_dir_name_len(node, di);
4329 data_len = btrfs_dir_data_len(node, di);
4331 btrfs_dir_item_key_to_cpu(node, di, &location);
4332 if (location.objectid != ref_key->objectid ||
4333 location.type != BTRFS_INODE_ITEM_KEY ||
4334 location.offset != 0)
4337 filetype = btrfs_dir_type(node, di);
4338 if (imode_to_type(mode) != filetype)
4341 if (cur + sizeof(*di) + name_len > total ||
4342 name_len > BTRFS_NAME_LEN) {
4343 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4345 key->type == BTRFS_DIR_ITEM_KEY ?
4346 "DIR_ITEM" : "DIR_INDEX",
4347 key->objectid, key->offset, name_len);
4349 if (cur + sizeof(*di) > total)
4351 len = min_t(u32, total - cur - sizeof(*di),
4357 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4358 if (len != namelen || strncmp(namebuf, name, len))
4364 len = sizeof(*di) + name_len + data_len;
4365 di = (struct btrfs_dir_item *)((char *)di + len);
4368 if (ret == DIR_ITEM_MISMATCH)
4370 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4372 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4373 ref_key->objectid, ref_key->offset,
4374 key->type == BTRFS_DIR_ITEM_KEY ?
4375 "DIR_ITEM" : "DIR_INDEX",
4376 key->objectid, key->offset, namelen, name,
4377 imode_to_type(mode));
4379 btrfs_release_path(&path);
4384 * Traverse the given INODE_REF and call find_dir_item() to find related
4385 * DIR_ITEM/DIR_INDEX.
4387 * @root: the root of the fs/file tree
4388 * @ref_key: the key of the INODE_REF
4389 * @refs: the count of INODE_REF
4390 * @mode: the st_mode of INODE_ITEM
4392 * Return 0 if no error occurred.
4394 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4395 struct extent_buffer *node, int slot, u64 *refs,
4398 struct btrfs_key key;
4399 struct btrfs_inode_ref *ref;
4400 char namebuf[BTRFS_NAME_LEN] = {0};
4408 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4409 total = btrfs_item_size_nr(node, slot);
4412 /* Update inode ref count */
4415 index = btrfs_inode_ref_index(node, ref);
4416 name_len = btrfs_inode_ref_name_len(node, ref);
4417 if (cur + sizeof(*ref) + name_len > total ||
4418 name_len > BTRFS_NAME_LEN) {
4419 warning("root %llu INODE_REF[%llu %llu] name too long",
4420 root->objectid, ref_key->objectid, ref_key->offset);
4422 if (total < cur + sizeof(*ref))
4424 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4429 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4431 /* Check root dir ref name */
4432 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4433 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4434 root->objectid, ref_key->objectid, ref_key->offset,
4436 err |= ROOT_DIR_ERROR;
4439 /* Find related DIR_INDEX */
4440 key.objectid = ref_key->offset;
4441 key.type = BTRFS_DIR_INDEX_KEY;
4443 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4446 /* Find related dir_item */
4447 key.objectid = ref_key->offset;
4448 key.type = BTRFS_DIR_ITEM_KEY;
4449 key.offset = btrfs_name_hash(namebuf, len);
4450 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4453 len = sizeof(*ref) + name_len;
4454 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4464 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4465 * DIR_ITEM/DIR_INDEX.
4467 * @root: the root of the fs/file tree
4468 * @ref_key: the key of the INODE_EXTREF
4469 * @refs: the count of INODE_EXTREF
4470 * @mode: the st_mode of INODE_ITEM
4472 * Return 0 if no error occurred.
4474 static int check_inode_extref(struct btrfs_root *root,
4475 struct btrfs_key *ref_key,
4476 struct extent_buffer *node, int slot, u64 *refs,
4479 struct btrfs_key key;
4480 struct btrfs_inode_extref *extref;
4481 char namebuf[BTRFS_NAME_LEN] = {0};
4491 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4492 total = btrfs_item_size_nr(node, slot);
4495 /* update inode ref count */
4497 name_len = btrfs_inode_extref_name_len(node, extref);
4498 index = btrfs_inode_extref_index(node, extref);
4499 parent = btrfs_inode_extref_parent(node, extref);
4500 if (name_len <= BTRFS_NAME_LEN) {
4503 len = BTRFS_NAME_LEN;
4504 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4505 root->objectid, ref_key->objectid, ref_key->offset);
4507 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4509 /* Check root dir ref name */
4510 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4511 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4512 root->objectid, ref_key->objectid, ref_key->offset,
4514 err |= ROOT_DIR_ERROR;
4517 /* find related dir_index */
4518 key.objectid = parent;
4519 key.type = BTRFS_DIR_INDEX_KEY;
4521 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4524 /* find related dir_item */
4525 key.objectid = parent;
4526 key.type = BTRFS_DIR_ITEM_KEY;
4527 key.offset = btrfs_name_hash(namebuf, len);
4528 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4531 len = sizeof(*extref) + name_len;
4532 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4542 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4543 * DIR_ITEM/DIR_INDEX match.
4545 * @root: the root of the fs/file tree
4546 * @key: the key of the INODE_REF/INODE_EXTREF
4547 * @name: the name in the INODE_REF/INODE_EXTREF
4548 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4549 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4551 * @ext_ref: the EXTENDED_IREF feature
4553 * Return 0 if no error occurred.
4554 * Return >0 for error bitmap
4556 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4557 char *name, int namelen, u64 index,
4558 unsigned int ext_ref)
4560 struct btrfs_path path;
4561 struct btrfs_inode_ref *ref;
4562 struct btrfs_inode_extref *extref;
4563 struct extent_buffer *node;
4564 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4575 btrfs_init_path(&path);
4576 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4578 ret = INODE_REF_MISSING;
4582 node = path.nodes[0];
4583 slot = path.slots[0];
4585 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4586 total = btrfs_item_size_nr(node, slot);
4588 /* Iterate all entry of INODE_REF */
4589 while (cur < total) {
4590 ret = INODE_REF_MISSING;
4592 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4593 ref_index = btrfs_inode_ref_index(node, ref);
4594 if (index != (u64)-1 && index != ref_index)
4597 if (cur + sizeof(*ref) + ref_namelen > total ||
4598 ref_namelen > BTRFS_NAME_LEN) {
4599 warning("root %llu INODE %s[%llu %llu] name too long",
4601 key->type == BTRFS_INODE_REF_KEY ?
4603 key->objectid, key->offset);
4605 if (cur + sizeof(*ref) > total)
4607 len = min_t(u32, total - cur - sizeof(*ref),
4613 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4616 if (len != namelen || strncmp(ref_namebuf, name, len))
4622 len = sizeof(*ref) + ref_namelen;
4623 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4628 /* Skip if not support EXTENDED_IREF feature */
4632 btrfs_release_path(&path);
4633 btrfs_init_path(&path);
4635 dir_id = key->offset;
4636 key->type = BTRFS_INODE_EXTREF_KEY;
4637 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4639 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4641 ret = INODE_REF_MISSING;
4645 node = path.nodes[0];
4646 slot = path.slots[0];
4648 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4650 total = btrfs_item_size_nr(node, slot);
4652 /* Iterate all entry of INODE_EXTREF */
4653 while (cur < total) {
4654 ret = INODE_REF_MISSING;
4656 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4657 ref_index = btrfs_inode_extref_index(node, extref);
4658 parent = btrfs_inode_extref_parent(node, extref);
4659 if (index != (u64)-1 && index != ref_index)
4662 if (parent != dir_id)
4665 if (ref_namelen <= BTRFS_NAME_LEN) {
4668 len = BTRFS_NAME_LEN;
4669 warning("root %llu INODE %s[%llu %llu] name too long",
4671 key->type == BTRFS_INODE_REF_KEY ?
4673 key->objectid, key->offset);
4675 read_extent_buffer(node, ref_namebuf,
4676 (unsigned long)(extref + 1), len);
4678 if (len != namelen || strncmp(ref_namebuf, name, len))
4685 len = sizeof(*extref) + ref_namelen;
4686 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4691 btrfs_release_path(&path);
4696 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4697 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4699 * @root: the root of the fs/file tree
4700 * @key: the key of the INODE_REF/INODE_EXTREF
4701 * @size: the st_size of the INODE_ITEM
4702 * @ext_ref: the EXTENDED_IREF feature
4704 * Return 0 if no error occurred.
4706 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4707 struct extent_buffer *node, int slot, u64 *size,
4708 unsigned int ext_ref)
4710 struct btrfs_dir_item *di;
4711 struct btrfs_inode_item *ii;
4712 struct btrfs_path path;
4713 struct btrfs_key location;
4714 char namebuf[BTRFS_NAME_LEN] = {0};
4727 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4728 * ignore index check.
4730 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4732 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4733 total = btrfs_item_size_nr(node, slot);
4735 while (cur < total) {
4736 data_len = btrfs_dir_data_len(node, di);
4738 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4739 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4740 "DIR_ITEM" : "DIR_INDEX",
4741 key->objectid, key->offset, data_len);
4743 name_len = btrfs_dir_name_len(node, di);
4744 if (cur + sizeof(*di) + name_len > total ||
4745 name_len > BTRFS_NAME_LEN) {
4746 warning("root %llu %s[%llu %llu] name too long",
4748 key->type == BTRFS_DIR_ITEM_KEY ?
4749 "DIR_ITEM" : "DIR_INDEX",
4750 key->objectid, key->offset);
4752 if (cur + sizeof(*di) > total)
4754 len = min_t(u32, total - cur - sizeof(*di),
4759 (*size) += name_len;
4761 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4762 filetype = btrfs_dir_type(node, di);
4764 if (key->type == BTRFS_DIR_ITEM_KEY &&
4765 key->offset != btrfs_name_hash(namebuf, len)) {
4767 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4768 root->objectid, key->objectid, key->offset,
4769 namebuf, len, filetype, key->offset,
4770 btrfs_name_hash(namebuf, len));
4773 btrfs_init_path(&path);
4774 btrfs_dir_item_key_to_cpu(node, di, &location);
4776 /* Ignore related ROOT_ITEM check */
4777 if (location.type == BTRFS_ROOT_ITEM_KEY)
4780 /* Check relative INODE_ITEM(existence/filetype) */
4781 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4783 err |= INODE_ITEM_MISSING;
4784 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4785 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4786 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4787 key->offset, location.objectid, name_len,
4792 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4793 struct btrfs_inode_item);
4794 mode = btrfs_inode_mode(path.nodes[0], ii);
4796 if (imode_to_type(mode) != filetype) {
4797 err |= INODE_ITEM_MISMATCH;
4798 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4799 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4800 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4801 key->offset, name_len, namebuf, filetype);
4804 /* Check relative INODE_REF/INODE_EXTREF */
4805 location.type = BTRFS_INODE_REF_KEY;
4806 location.offset = key->objectid;
4807 ret = find_inode_ref(root, &location, namebuf, len,
4810 if (ret & INODE_REF_MISSING)
4811 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4812 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4813 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4814 key->offset, name_len, namebuf, filetype);
4817 btrfs_release_path(&path);
4818 len = sizeof(*di) + name_len + data_len;
4819 di = (struct btrfs_dir_item *)((char *)di + len);
4822 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4823 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4824 root->objectid, key->objectid, key->offset);
4833 * Check file extent datasum/hole, update the size of the file extents,
4834 * check and update the last offset of the file extent.
4836 * @root: the root of fs/file tree.
4837 * @fkey: the key of the file extent.
4838 * @nodatasum: INODE_NODATASUM feature.
4839 * @size: the sum of all EXTENT_DATA items size for this inode.
4840 * @end: the offset of the last extent.
4842 * Return 0 if no error occurred.
4844 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4845 struct extent_buffer *node, int slot,
4846 unsigned int nodatasum, u64 *size, u64 *end)
4848 struct btrfs_file_extent_item *fi;
4851 u64 extent_num_bytes;
4853 u64 csum_found; /* In byte size, sectorsize aligned */
4854 u64 search_start; /* Logical range start we search for csum */
4855 u64 search_len; /* Logical range len we search for csum */
4856 unsigned int extent_type;
4857 unsigned int is_hole;
4862 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4864 /* Check inline extent */
4865 extent_type = btrfs_file_extent_type(node, fi);
4866 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4867 struct btrfs_item *e = btrfs_item_nr(slot);
4868 u32 item_inline_len;
4870 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4871 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4872 compressed = btrfs_file_extent_compression(node, fi);
4873 if (extent_num_bytes == 0) {
4875 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4876 root->objectid, fkey->objectid, fkey->offset);
4877 err |= FILE_EXTENT_ERROR;
4879 if (!compressed && extent_num_bytes != item_inline_len) {
4881 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4882 root->objectid, fkey->objectid, fkey->offset,
4883 extent_num_bytes, item_inline_len);
4884 err |= FILE_EXTENT_ERROR;
4886 *end += extent_num_bytes;
4887 *size += extent_num_bytes;
4891 /* Check extent type */
4892 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4893 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4894 err |= FILE_EXTENT_ERROR;
4895 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4896 root->objectid, fkey->objectid, fkey->offset);
4900 /* Check REG_EXTENT/PREALLOC_EXTENT */
4901 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4902 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4903 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4904 extent_offset = btrfs_file_extent_offset(node, fi);
4905 compressed = btrfs_file_extent_compression(node, fi);
4906 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4909 * Check EXTENT_DATA csum
4911 * For plain (uncompressed) extent, we should only check the range
4912 * we're referring to, as it's possible that part of prealloc extent
4913 * has been written, and has csum:
4915 * |<--- Original large preallocated extent A ---->|
4916 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4919 * For compressed extent, we should check the whole range.
4922 search_start = disk_bytenr + extent_offset;
4923 search_len = extent_num_bytes;
4925 search_start = disk_bytenr;
4926 search_len = disk_num_bytes;
4928 ret = count_csum_range(root, search_start, search_len, &csum_found);
4929 if (csum_found > 0 && nodatasum) {
4930 err |= ODD_CSUM_ITEM;
4931 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4932 root->objectid, fkey->objectid, fkey->offset);
4933 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4934 !is_hole && (ret < 0 || csum_found < search_len)) {
4935 err |= CSUM_ITEM_MISSING;
4936 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4937 root->objectid, fkey->objectid, fkey->offset,
4938 csum_found, search_len);
4939 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4940 err |= ODD_CSUM_ITEM;
4941 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4942 root->objectid, fkey->objectid, fkey->offset, csum_found);
4945 /* Check EXTENT_DATA hole */
4946 if (!no_holes && *end != fkey->offset) {
4947 err |= FILE_EXTENT_ERROR;
4948 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4949 root->objectid, fkey->objectid, fkey->offset);
4952 *end += extent_num_bytes;
4954 *size += extent_num_bytes;
4960 * Set inode item nbytes to @nbytes
4962 * Returns 0 on success
4963 * Returns != 0 on error
4965 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
4966 struct btrfs_path *path,
4967 u64 ino, u64 nbytes)
4969 struct btrfs_trans_handle *trans;
4970 struct btrfs_inode_item *ii;
4971 struct btrfs_key key;
4972 struct btrfs_key research_key;
4976 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
4979 key.type = BTRFS_INODE_ITEM_KEY;
4982 trans = btrfs_start_transaction(root, 1);
4983 if (IS_ERR(trans)) {
4984 ret = PTR_ERR(trans);
4989 btrfs_release_path(path);
4990 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4998 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
4999 struct btrfs_inode_item);
5000 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5001 btrfs_mark_buffer_dirty(path->nodes[0]);
5003 btrfs_commit_transaction(trans, root);
5006 error("failed to set nbytes in inode %llu root %llu",
5007 ino, root->root_key.objectid);
5009 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5010 root->root_key.objectid, nbytes);
5013 btrfs_release_path(path);
5014 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5021 * Check INODE_ITEM and related ITEMs (the same inode number)
5022 * 1. check link count
5023 * 2. check inode ref/extref
5024 * 3. check dir item/index
5026 * @ext_ref: the EXTENDED_IREF feature
5028 * Return 0 if no error occurred.
5029 * Return >0 for error or hit the traversal is done(by error bitmap)
5031 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5032 unsigned int ext_ref)
5034 struct extent_buffer *node;
5035 struct btrfs_inode_item *ii;
5036 struct btrfs_key key;
5045 u64 extent_size = 0;
5047 unsigned int nodatasum;
5052 node = path->nodes[0];
5053 slot = path->slots[0];
5055 btrfs_item_key_to_cpu(node, &key, slot);
5056 inode_id = key.objectid;
5058 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5059 ret = btrfs_next_item(root, path);
5065 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5066 isize = btrfs_inode_size(node, ii);
5067 nbytes = btrfs_inode_nbytes(node, ii);
5068 mode = btrfs_inode_mode(node, ii);
5069 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5070 nlink = btrfs_inode_nlink(node, ii);
5071 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5074 ret = btrfs_next_item(root, path);
5076 /* out will fill 'err' rusing current statistics */
5078 } else if (ret > 0) {
5083 node = path->nodes[0];
5084 slot = path->slots[0];
5085 btrfs_item_key_to_cpu(node, &key, slot);
5086 if (key.objectid != inode_id)
5090 case BTRFS_INODE_REF_KEY:
5091 ret = check_inode_ref(root, &key, node, slot, &refs,
5095 case BTRFS_INODE_EXTREF_KEY:
5096 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5097 warning("root %llu EXTREF[%llu %llu] isn't supported",
5098 root->objectid, key.objectid,
5100 ret = check_inode_extref(root, &key, node, slot, &refs,
5104 case BTRFS_DIR_ITEM_KEY:
5105 case BTRFS_DIR_INDEX_KEY:
5107 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5108 root->objectid, inode_id,
5109 imode_to_type(mode), key.objectid,
5112 ret = check_dir_item(root, &key, node, slot, &size,
5116 case BTRFS_EXTENT_DATA_KEY:
5118 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5119 root->objectid, inode_id, key.objectid,
5122 ret = check_file_extent(root, &key, node, slot,
5123 nodatasum, &extent_size,
5127 case BTRFS_XATTR_ITEM_KEY:
5130 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5131 key.objectid, key.type, key.offset);
5136 /* verify INODE_ITEM nlink/isize/nbytes */
5139 err |= LINK_COUNT_ERROR;
5140 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5141 root->objectid, inode_id, nlink);
5145 * Just a warning, as dir inode nbytes is just an
5146 * instructive value.
5148 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5149 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5150 root->objectid, inode_id,
5151 root->fs_info->nodesize);
5154 if (isize != size) {
5156 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5157 root->objectid, inode_id, isize, size);
5160 if (nlink != refs) {
5161 err |= LINK_COUNT_ERROR;
5162 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5163 root->objectid, inode_id, nlink, refs);
5164 } else if (!nlink) {
5168 if (!nbytes && !no_holes && extent_end < isize) {
5169 err |= NBYTES_ERROR;
5170 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5171 root->objectid, inode_id, isize);
5174 if (nbytes != extent_size) {
5176 ret = repair_inode_nbytes_lowmem(root, path,
5177 inode_id, extent_size);
5178 if (!repair || ret) {
5179 err |= NBYTES_ERROR;
5181 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5182 root->objectid, inode_id, nbytes,
5191 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5193 struct btrfs_path path;
5194 struct btrfs_key key;
5198 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5199 key.type = BTRFS_INODE_ITEM_KEY;
5202 /* For root being dropped, we don't need to check first inode */
5203 if (btrfs_root_refs(&root->root_item) == 0 &&
5204 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5208 btrfs_init_path(&path);
5210 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5215 err |= INODE_ITEM_MISSING;
5216 error("first inode item of root %llu is missing",
5220 err |= check_inode_item(root, &path, ext_ref);
5225 btrfs_release_path(&path);
5229 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5230 u64 parent, u64 root)
5232 struct rb_node *node;
5233 struct tree_backref *back = NULL;
5234 struct tree_backref match = {
5241 match.parent = parent;
5242 match.node.full_backref = 1;
5247 node = rb_search(&rec->backref_tree, &match.node.node,
5248 (rb_compare_keys)compare_extent_backref, NULL);
5250 back = to_tree_backref(rb_node_to_extent_backref(node));
5255 static struct data_backref *find_data_backref(struct extent_record *rec,
5256 u64 parent, u64 root,
5257 u64 owner, u64 offset,
5259 u64 disk_bytenr, u64 bytes)
5261 struct rb_node *node;
5262 struct data_backref *back = NULL;
5263 struct data_backref match = {
5270 .found_ref = found_ref,
5271 .disk_bytenr = disk_bytenr,
5275 match.parent = parent;
5276 match.node.full_backref = 1;
5281 node = rb_search(&rec->backref_tree, &match.node.node,
5282 (rb_compare_keys)compare_extent_backref, NULL);
5284 back = to_data_backref(rb_node_to_extent_backref(node));
5289 * Iterate all item on the tree and call check_inode_item() to check.
5291 * @root: the root of the tree to be checked.
5292 * @ext_ref: the EXTENDED_IREF feature
5294 * Return 0 if no error found.
5295 * Return <0 for error.
5297 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5299 struct btrfs_path path;
5300 struct node_refs nrefs;
5301 struct btrfs_root_item *root_item = &root->root_item;
5307 * We need to manually check the first inode item(256)
5308 * As the following traversal function will only start from
5309 * the first inode item in the leaf, if inode item(256) is missing
5310 * we will just skip it forever.
5312 ret = check_fs_first_inode(root, ext_ref);
5316 memset(&nrefs, 0, sizeof(nrefs));
5317 level = btrfs_header_level(root->node);
5318 btrfs_init_path(&path);
5320 if (btrfs_root_refs(root_item) > 0 ||
5321 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5322 path.nodes[level] = root->node;
5323 path.slots[level] = 0;
5324 extent_buffer_get(root->node);
5326 struct btrfs_key key;
5328 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5329 level = root_item->drop_level;
5330 path.lowest_level = level;
5331 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5338 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5341 /* if ret is negative, walk shall stop */
5347 ret = walk_up_tree_v2(root, &path, &level);
5349 /* Normal exit, reset ret to err */
5356 btrfs_release_path(&path);
5361 * Find the relative ref for root_ref and root_backref.
5363 * @root: the root of the root tree.
5364 * @ref_key: the key of the root ref.
5366 * Return 0 if no error occurred.
5368 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5369 struct extent_buffer *node, int slot)
5371 struct btrfs_path path;
5372 struct btrfs_key key;
5373 struct btrfs_root_ref *ref;
5374 struct btrfs_root_ref *backref;
5375 char ref_name[BTRFS_NAME_LEN] = {0};
5376 char backref_name[BTRFS_NAME_LEN] = {0};
5382 u32 backref_namelen;
5387 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5388 ref_dirid = btrfs_root_ref_dirid(node, ref);
5389 ref_seq = btrfs_root_ref_sequence(node, ref);
5390 ref_namelen = btrfs_root_ref_name_len(node, ref);
5392 if (ref_namelen <= BTRFS_NAME_LEN) {
5395 len = BTRFS_NAME_LEN;
5396 warning("%s[%llu %llu] ref_name too long",
5397 ref_key->type == BTRFS_ROOT_REF_KEY ?
5398 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5401 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5403 /* Find relative root_ref */
5404 key.objectid = ref_key->offset;
5405 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5406 key.offset = ref_key->objectid;
5408 btrfs_init_path(&path);
5409 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5411 err |= ROOT_REF_MISSING;
5412 error("%s[%llu %llu] couldn't find relative ref",
5413 ref_key->type == BTRFS_ROOT_REF_KEY ?
5414 "ROOT_REF" : "ROOT_BACKREF",
5415 ref_key->objectid, ref_key->offset);
5419 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5420 struct btrfs_root_ref);
5421 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5422 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5423 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5425 if (backref_namelen <= BTRFS_NAME_LEN) {
5426 len = backref_namelen;
5428 len = BTRFS_NAME_LEN;
5429 warning("%s[%llu %llu] ref_name too long",
5430 key.type == BTRFS_ROOT_REF_KEY ?
5431 "ROOT_REF" : "ROOT_BACKREF",
5432 key.objectid, key.offset);
5434 read_extent_buffer(path.nodes[0], backref_name,
5435 (unsigned long)(backref + 1), len);
5437 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5438 ref_namelen != backref_namelen ||
5439 strncmp(ref_name, backref_name, len)) {
5440 err |= ROOT_REF_MISMATCH;
5441 error("%s[%llu %llu] mismatch relative ref",
5442 ref_key->type == BTRFS_ROOT_REF_KEY ?
5443 "ROOT_REF" : "ROOT_BACKREF",
5444 ref_key->objectid, ref_key->offset);
5447 btrfs_release_path(&path);
5452 * Check all fs/file tree in low_memory mode.
5454 * 1. for fs tree root item, call check_fs_root_v2()
5455 * 2. for fs tree root ref/backref, call check_root_ref()
5457 * Return 0 if no error occurred.
5459 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5461 struct btrfs_root *tree_root = fs_info->tree_root;
5462 struct btrfs_root *cur_root = NULL;
5463 struct btrfs_path path;
5464 struct btrfs_key key;
5465 struct extent_buffer *node;
5466 unsigned int ext_ref;
5471 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5473 btrfs_init_path(&path);
5474 key.objectid = BTRFS_FS_TREE_OBJECTID;
5476 key.type = BTRFS_ROOT_ITEM_KEY;
5478 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5482 } else if (ret > 0) {
5488 node = path.nodes[0];
5489 slot = path.slots[0];
5490 btrfs_item_key_to_cpu(node, &key, slot);
5491 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5493 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5494 fs_root_objectid(key.objectid)) {
5495 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5496 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5499 key.offset = (u64)-1;
5500 cur_root = btrfs_read_fs_root(fs_info, &key);
5503 if (IS_ERR(cur_root)) {
5504 error("Fail to read fs/subvol tree: %lld",
5510 ret = check_fs_root_v2(cur_root, ext_ref);
5513 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5514 btrfs_free_fs_root(cur_root);
5515 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5516 key.type == BTRFS_ROOT_BACKREF_KEY) {
5517 ret = check_root_ref(tree_root, &key, node, slot);
5521 ret = btrfs_next_item(tree_root, &path);
5531 btrfs_release_path(&path);
5535 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5536 struct cache_tree *root_cache)
5540 if (!ctx.progress_enabled)
5541 fprintf(stderr, "checking fs roots\n");
5542 if (check_mode == CHECK_MODE_LOWMEM)
5543 ret = check_fs_roots_v2(fs_info);
5545 ret = check_fs_roots(fs_info, root_cache);
5550 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5552 struct extent_backref *back, *tmp;
5553 struct tree_backref *tback;
5554 struct data_backref *dback;
5558 rbtree_postorder_for_each_entry_safe(back, tmp,
5559 &rec->backref_tree, node) {
5560 if (!back->found_extent_tree) {
5564 if (back->is_data) {
5565 dback = to_data_backref(back);
5566 fprintf(stderr, "Data backref %llu %s %llu"
5567 " owner %llu offset %llu num_refs %lu"
5568 " not found in extent tree\n",
5569 (unsigned long long)rec->start,
5570 back->full_backref ?
5572 back->full_backref ?
5573 (unsigned long long)dback->parent:
5574 (unsigned long long)dback->root,
5575 (unsigned long long)dback->owner,
5576 (unsigned long long)dback->offset,
5577 (unsigned long)dback->num_refs);
5579 tback = to_tree_backref(back);
5580 fprintf(stderr, "Tree backref %llu parent %llu"
5581 " root %llu not found in extent tree\n",
5582 (unsigned long long)rec->start,
5583 (unsigned long long)tback->parent,
5584 (unsigned long long)tback->root);
5587 if (!back->is_data && !back->found_ref) {
5591 tback = to_tree_backref(back);
5592 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5593 (unsigned long long)rec->start,
5594 back->full_backref ? "parent" : "root",
5595 back->full_backref ?
5596 (unsigned long long)tback->parent :
5597 (unsigned long long)tback->root, back);
5599 if (back->is_data) {
5600 dback = to_data_backref(back);
5601 if (dback->found_ref != dback->num_refs) {
5605 fprintf(stderr, "Incorrect local backref count"
5606 " on %llu %s %llu owner %llu"
5607 " offset %llu found %u wanted %u back %p\n",
5608 (unsigned long long)rec->start,
5609 back->full_backref ?
5611 back->full_backref ?
5612 (unsigned long long)dback->parent:
5613 (unsigned long long)dback->root,
5614 (unsigned long long)dback->owner,
5615 (unsigned long long)dback->offset,
5616 dback->found_ref, dback->num_refs, back);
5618 if (dback->disk_bytenr != rec->start) {
5622 fprintf(stderr, "Backref disk bytenr does not"
5623 " match extent record, bytenr=%llu, "
5624 "ref bytenr=%llu\n",
5625 (unsigned long long)rec->start,
5626 (unsigned long long)dback->disk_bytenr);
5629 if (dback->bytes != rec->nr) {
5633 fprintf(stderr, "Backref bytes do not match "
5634 "extent backref, bytenr=%llu, ref "
5635 "bytes=%llu, backref bytes=%llu\n",
5636 (unsigned long long)rec->start,
5637 (unsigned long long)rec->nr,
5638 (unsigned long long)dback->bytes);
5641 if (!back->is_data) {
5644 dback = to_data_backref(back);
5645 found += dback->found_ref;
5648 if (found != rec->refs) {
5652 fprintf(stderr, "Incorrect global backref count "
5653 "on %llu found %llu wanted %llu\n",
5654 (unsigned long long)rec->start,
5655 (unsigned long long)found,
5656 (unsigned long long)rec->refs);
5662 static void __free_one_backref(struct rb_node *node)
5664 struct extent_backref *back = rb_node_to_extent_backref(node);
5669 static void free_all_extent_backrefs(struct extent_record *rec)
5671 rb_free_nodes(&rec->backref_tree, __free_one_backref);
5674 static void free_extent_record_cache(struct cache_tree *extent_cache)
5676 struct cache_extent *cache;
5677 struct extent_record *rec;
5680 cache = first_cache_extent(extent_cache);
5683 rec = container_of(cache, struct extent_record, cache);
5684 remove_cache_extent(extent_cache, cache);
5685 free_all_extent_backrefs(rec);
5690 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5691 struct extent_record *rec)
5693 if (rec->content_checked && rec->owner_ref_checked &&
5694 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5695 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5696 !rec->bad_full_backref && !rec->crossing_stripes &&
5697 !rec->wrong_chunk_type) {
5698 remove_cache_extent(extent_cache, &rec->cache);
5699 free_all_extent_backrefs(rec);
5700 list_del_init(&rec->list);
5706 static int check_owner_ref(struct btrfs_root *root,
5707 struct extent_record *rec,
5708 struct extent_buffer *buf)
5710 struct extent_backref *node, *tmp;
5711 struct tree_backref *back;
5712 struct btrfs_root *ref_root;
5713 struct btrfs_key key;
5714 struct btrfs_path path;
5715 struct extent_buffer *parent;
5720 rbtree_postorder_for_each_entry_safe(node, tmp,
5721 &rec->backref_tree, node) {
5724 if (!node->found_ref)
5726 if (node->full_backref)
5728 back = to_tree_backref(node);
5729 if (btrfs_header_owner(buf) == back->root)
5732 BUG_ON(rec->is_root);
5734 /* try to find the block by search corresponding fs tree */
5735 key.objectid = btrfs_header_owner(buf);
5736 key.type = BTRFS_ROOT_ITEM_KEY;
5737 key.offset = (u64)-1;
5739 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5740 if (IS_ERR(ref_root))
5743 level = btrfs_header_level(buf);
5745 btrfs_item_key_to_cpu(buf, &key, 0);
5747 btrfs_node_key_to_cpu(buf, &key, 0);
5749 btrfs_init_path(&path);
5750 path.lowest_level = level + 1;
5751 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5755 parent = path.nodes[level + 1];
5756 if (parent && buf->start == btrfs_node_blockptr(parent,
5757 path.slots[level + 1]))
5760 btrfs_release_path(&path);
5761 return found ? 0 : 1;
5764 static int is_extent_tree_record(struct extent_record *rec)
5766 struct extent_backref *node, *tmp;
5767 struct tree_backref *back;
5770 rbtree_postorder_for_each_entry_safe(node, tmp,
5771 &rec->backref_tree, node) {
5774 back = to_tree_backref(node);
5775 if (node->full_backref)
5777 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5784 static int record_bad_block_io(struct btrfs_fs_info *info,
5785 struct cache_tree *extent_cache,
5788 struct extent_record *rec;
5789 struct cache_extent *cache;
5790 struct btrfs_key key;
5792 cache = lookup_cache_extent(extent_cache, start, len);
5796 rec = container_of(cache, struct extent_record, cache);
5797 if (!is_extent_tree_record(rec))
5800 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5801 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5804 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5805 struct extent_buffer *buf, int slot)
5807 if (btrfs_header_level(buf)) {
5808 struct btrfs_key_ptr ptr1, ptr2;
5810 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5811 sizeof(struct btrfs_key_ptr));
5812 read_extent_buffer(buf, &ptr2,
5813 btrfs_node_key_ptr_offset(slot + 1),
5814 sizeof(struct btrfs_key_ptr));
5815 write_extent_buffer(buf, &ptr1,
5816 btrfs_node_key_ptr_offset(slot + 1),
5817 sizeof(struct btrfs_key_ptr));
5818 write_extent_buffer(buf, &ptr2,
5819 btrfs_node_key_ptr_offset(slot),
5820 sizeof(struct btrfs_key_ptr));
5822 struct btrfs_disk_key key;
5823 btrfs_node_key(buf, &key, 0);
5824 btrfs_fixup_low_keys(root, path, &key,
5825 btrfs_header_level(buf) + 1);
5828 struct btrfs_item *item1, *item2;
5829 struct btrfs_key k1, k2;
5830 char *item1_data, *item2_data;
5831 u32 item1_offset, item2_offset, item1_size, item2_size;
5833 item1 = btrfs_item_nr(slot);
5834 item2 = btrfs_item_nr(slot + 1);
5835 btrfs_item_key_to_cpu(buf, &k1, slot);
5836 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5837 item1_offset = btrfs_item_offset(buf, item1);
5838 item2_offset = btrfs_item_offset(buf, item2);
5839 item1_size = btrfs_item_size(buf, item1);
5840 item2_size = btrfs_item_size(buf, item2);
5842 item1_data = malloc(item1_size);
5845 item2_data = malloc(item2_size);
5851 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5852 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5854 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5855 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5859 btrfs_set_item_offset(buf, item1, item2_offset);
5860 btrfs_set_item_offset(buf, item2, item1_offset);
5861 btrfs_set_item_size(buf, item1, item2_size);
5862 btrfs_set_item_size(buf, item2, item1_size);
5864 path->slots[0] = slot;
5865 btrfs_set_item_key_unsafe(root, path, &k2);
5866 path->slots[0] = slot + 1;
5867 btrfs_set_item_key_unsafe(root, path, &k1);
5872 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5874 struct extent_buffer *buf;
5875 struct btrfs_key k1, k2;
5877 int level = path->lowest_level;
5880 buf = path->nodes[level];
5881 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5883 btrfs_node_key_to_cpu(buf, &k1, i);
5884 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5886 btrfs_item_key_to_cpu(buf, &k1, i);
5887 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5889 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5891 ret = swap_values(root, path, buf, i);
5894 btrfs_mark_buffer_dirty(buf);
5900 static int delete_bogus_item(struct btrfs_root *root,
5901 struct btrfs_path *path,
5902 struct extent_buffer *buf, int slot)
5904 struct btrfs_key key;
5905 int nritems = btrfs_header_nritems(buf);
5907 btrfs_item_key_to_cpu(buf, &key, slot);
5909 /* These are all the keys we can deal with missing. */
5910 if (key.type != BTRFS_DIR_INDEX_KEY &&
5911 key.type != BTRFS_EXTENT_ITEM_KEY &&
5912 key.type != BTRFS_METADATA_ITEM_KEY &&
5913 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5914 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5917 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5918 (unsigned long long)key.objectid, key.type,
5919 (unsigned long long)key.offset, slot, buf->start);
5920 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5921 btrfs_item_nr_offset(slot + 1),
5922 sizeof(struct btrfs_item) *
5923 (nritems - slot - 1));
5924 btrfs_set_header_nritems(buf, nritems - 1);
5926 struct btrfs_disk_key disk_key;
5928 btrfs_item_key(buf, &disk_key, 0);
5929 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5931 btrfs_mark_buffer_dirty(buf);
5935 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5937 struct extent_buffer *buf;
5941 /* We should only get this for leaves */
5942 BUG_ON(path->lowest_level);
5943 buf = path->nodes[0];
5945 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5946 unsigned int shift = 0, offset;
5948 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5949 BTRFS_LEAF_DATA_SIZE(root)) {
5950 if (btrfs_item_end_nr(buf, i) >
5951 BTRFS_LEAF_DATA_SIZE(root)) {
5952 ret = delete_bogus_item(root, path, buf, i);
5955 fprintf(stderr, "item is off the end of the "
5956 "leaf, can't fix\n");
5960 shift = BTRFS_LEAF_DATA_SIZE(root) -
5961 btrfs_item_end_nr(buf, i);
5962 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5963 btrfs_item_offset_nr(buf, i - 1)) {
5964 if (btrfs_item_end_nr(buf, i) >
5965 btrfs_item_offset_nr(buf, i - 1)) {
5966 ret = delete_bogus_item(root, path, buf, i);
5969 fprintf(stderr, "items overlap, can't fix\n");
5973 shift = btrfs_item_offset_nr(buf, i - 1) -
5974 btrfs_item_end_nr(buf, i);
5979 printf("Shifting item nr %d by %u bytes in block %llu\n",
5980 i, shift, (unsigned long long)buf->start);
5981 offset = btrfs_item_offset_nr(buf, i);
5982 memmove_extent_buffer(buf,
5983 btrfs_leaf_data(buf) + offset + shift,
5984 btrfs_leaf_data(buf) + offset,
5985 btrfs_item_size_nr(buf, i));
5986 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5988 btrfs_mark_buffer_dirty(buf);
5992 * We may have moved things, in which case we want to exit so we don't
5993 * write those changes out. Once we have proper abort functionality in
5994 * progs this can be changed to something nicer.
6001 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6002 * then just return -EIO.
6004 static int try_to_fix_bad_block(struct btrfs_root *root,
6005 struct extent_buffer *buf,
6006 enum btrfs_tree_block_status status)
6008 struct btrfs_trans_handle *trans;
6009 struct ulist *roots;
6010 struct ulist_node *node;
6011 struct btrfs_root *search_root;
6012 struct btrfs_path path;
6013 struct ulist_iterator iter;
6014 struct btrfs_key root_key, key;
6017 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6018 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6021 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6025 btrfs_init_path(&path);
6026 ULIST_ITER_INIT(&iter);
6027 while ((node = ulist_next(roots, &iter))) {
6028 root_key.objectid = node->val;
6029 root_key.type = BTRFS_ROOT_ITEM_KEY;
6030 root_key.offset = (u64)-1;
6032 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6039 trans = btrfs_start_transaction(search_root, 0);
6040 if (IS_ERR(trans)) {
6041 ret = PTR_ERR(trans);
6045 path.lowest_level = btrfs_header_level(buf);
6046 path.skip_check_block = 1;
6047 if (path.lowest_level)
6048 btrfs_node_key_to_cpu(buf, &key, 0);
6050 btrfs_item_key_to_cpu(buf, &key, 0);
6051 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6054 btrfs_commit_transaction(trans, search_root);
6057 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6058 ret = fix_key_order(search_root, &path);
6059 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6060 ret = fix_item_offset(search_root, &path);
6062 btrfs_commit_transaction(trans, search_root);
6065 btrfs_release_path(&path);
6066 btrfs_commit_transaction(trans, search_root);
6069 btrfs_release_path(&path);
6073 static int check_block(struct btrfs_root *root,
6074 struct cache_tree *extent_cache,
6075 struct extent_buffer *buf, u64 flags)
6077 struct extent_record *rec;
6078 struct cache_extent *cache;
6079 struct btrfs_key key;
6080 enum btrfs_tree_block_status status;
6084 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6087 rec = container_of(cache, struct extent_record, cache);
6088 rec->generation = btrfs_header_generation(buf);
6090 level = btrfs_header_level(buf);
6091 if (btrfs_header_nritems(buf) > 0) {
6094 btrfs_item_key_to_cpu(buf, &key, 0);
6096 btrfs_node_key_to_cpu(buf, &key, 0);
6098 rec->info_objectid = key.objectid;
6100 rec->info_level = level;
6102 if (btrfs_is_leaf(buf))
6103 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6105 status = btrfs_check_node(root, &rec->parent_key, buf);
6107 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6109 status = try_to_fix_bad_block(root, buf, status);
6110 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6112 fprintf(stderr, "bad block %llu\n",
6113 (unsigned long long)buf->start);
6116 * Signal to callers we need to start the scan over
6117 * again since we'll have cowed blocks.
6122 rec->content_checked = 1;
6123 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6124 rec->owner_ref_checked = 1;
6126 ret = check_owner_ref(root, rec, buf);
6128 rec->owner_ref_checked = 1;
6132 maybe_free_extent_rec(extent_cache, rec);
6137 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6138 u64 parent, u64 root)
6140 struct list_head *cur = rec->backrefs.next;
6141 struct extent_backref *node;
6142 struct tree_backref *back;
6144 while(cur != &rec->backrefs) {
6145 node = to_extent_backref(cur);
6149 back = to_tree_backref(node);
6151 if (!node->full_backref)
6153 if (parent == back->parent)
6156 if (node->full_backref)
6158 if (back->root == root)
6166 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6167 u64 parent, u64 root)
6169 struct tree_backref *ref = malloc(sizeof(*ref));
6173 memset(&ref->node, 0, sizeof(ref->node));
6175 ref->parent = parent;
6176 ref->node.full_backref = 1;
6179 ref->node.full_backref = 0;
6186 static struct data_backref *find_data_backref(struct extent_record *rec,
6187 u64 parent, u64 root,
6188 u64 owner, u64 offset,
6190 u64 disk_bytenr, u64 bytes)
6192 struct list_head *cur = rec->backrefs.next;
6193 struct extent_backref *node;
6194 struct data_backref *back;
6196 while(cur != &rec->backrefs) {
6197 node = to_extent_backref(cur);
6201 back = to_data_backref(node);
6203 if (!node->full_backref)
6205 if (parent == back->parent)
6208 if (node->full_backref)
6210 if (back->root == root && back->owner == owner &&
6211 back->offset == offset) {
6212 if (found_ref && node->found_ref &&
6213 (back->bytes != bytes ||
6214 back->disk_bytenr != disk_bytenr))
6224 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6225 u64 parent, u64 root,
6226 u64 owner, u64 offset,
6229 struct data_backref *ref = malloc(sizeof(*ref));
6233 memset(&ref->node, 0, sizeof(ref->node));
6234 ref->node.is_data = 1;
6237 ref->parent = parent;
6240 ref->node.full_backref = 1;
6244 ref->offset = offset;
6245 ref->node.full_backref = 0;
6247 ref->bytes = max_size;
6250 if (max_size > rec->max_size)
6251 rec->max_size = max_size;
6255 /* Check if the type of extent matches with its chunk */
6256 static void check_extent_type(struct extent_record *rec)
6258 struct btrfs_block_group_cache *bg_cache;
6260 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6264 /* data extent, check chunk directly*/
6265 if (!rec->metadata) {
6266 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6267 rec->wrong_chunk_type = 1;
6271 /* metadata extent, check the obvious case first */
6272 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6273 BTRFS_BLOCK_GROUP_METADATA))) {
6274 rec->wrong_chunk_type = 1;
6279 * Check SYSTEM extent, as it's also marked as metadata, we can only
6280 * make sure it's a SYSTEM extent by its backref
6282 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6283 struct extent_backref *node;
6284 struct tree_backref *tback;
6287 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6288 if (node->is_data) {
6289 /* tree block shouldn't have data backref */
6290 rec->wrong_chunk_type = 1;
6293 tback = container_of(node, struct tree_backref, node);
6295 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6296 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6298 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6299 if (!(bg_cache->flags & bg_type))
6300 rec->wrong_chunk_type = 1;
6305 * Allocate a new extent record, fill default values from @tmpl and insert int
6306 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6307 * the cache, otherwise it fails.
6309 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6310 struct extent_record *tmpl)
6312 struct extent_record *rec;
6315 BUG_ON(tmpl->max_size == 0);
6316 rec = malloc(sizeof(*rec));
6319 rec->start = tmpl->start;
6320 rec->max_size = tmpl->max_size;
6321 rec->nr = max(tmpl->nr, tmpl->max_size);
6322 rec->found_rec = tmpl->found_rec;
6323 rec->content_checked = tmpl->content_checked;
6324 rec->owner_ref_checked = tmpl->owner_ref_checked;
6325 rec->num_duplicates = 0;
6326 rec->metadata = tmpl->metadata;
6327 rec->flag_block_full_backref = FLAG_UNSET;
6328 rec->bad_full_backref = 0;
6329 rec->crossing_stripes = 0;
6330 rec->wrong_chunk_type = 0;
6331 rec->is_root = tmpl->is_root;
6332 rec->refs = tmpl->refs;
6333 rec->extent_item_refs = tmpl->extent_item_refs;
6334 rec->parent_generation = tmpl->parent_generation;
6335 INIT_LIST_HEAD(&rec->backrefs);
6336 INIT_LIST_HEAD(&rec->dups);
6337 INIT_LIST_HEAD(&rec->list);
6338 rec->backref_tree = RB_ROOT;
6339 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6340 rec->cache.start = tmpl->start;
6341 rec->cache.size = tmpl->nr;
6342 ret = insert_cache_extent(extent_cache, &rec->cache);
6347 bytes_used += rec->nr;
6350 rec->crossing_stripes = check_crossing_stripes(global_info,
6351 rec->start, global_info->nodesize);
6352 check_extent_type(rec);
6357 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6359 * - refs - if found, increase refs
6360 * - is_root - if found, set
6361 * - content_checked - if found, set
6362 * - owner_ref_checked - if found, set
6364 * If not found, create a new one, initialize and insert.
6366 static int add_extent_rec(struct cache_tree *extent_cache,
6367 struct extent_record *tmpl)
6369 struct extent_record *rec;
6370 struct cache_extent *cache;
6374 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6376 rec = container_of(cache, struct extent_record, cache);
6380 rec->nr = max(tmpl->nr, tmpl->max_size);
6383 * We need to make sure to reset nr to whatever the extent
6384 * record says was the real size, this way we can compare it to
6387 if (tmpl->found_rec) {
6388 if (tmpl->start != rec->start || rec->found_rec) {
6389 struct extent_record *tmp;
6392 if (list_empty(&rec->list))
6393 list_add_tail(&rec->list,
6394 &duplicate_extents);
6397 * We have to do this song and dance in case we
6398 * find an extent record that falls inside of
6399 * our current extent record but does not have
6400 * the same objectid.
6402 tmp = malloc(sizeof(*tmp));
6405 tmp->start = tmpl->start;
6406 tmp->max_size = tmpl->max_size;
6409 tmp->metadata = tmpl->metadata;
6410 tmp->extent_item_refs = tmpl->extent_item_refs;
6411 INIT_LIST_HEAD(&tmp->list);
6412 list_add_tail(&tmp->list, &rec->dups);
6413 rec->num_duplicates++;
6420 if (tmpl->extent_item_refs && !dup) {
6421 if (rec->extent_item_refs) {
6422 fprintf(stderr, "block %llu rec "
6423 "extent_item_refs %llu, passed %llu\n",
6424 (unsigned long long)tmpl->start,
6425 (unsigned long long)
6426 rec->extent_item_refs,
6427 (unsigned long long)tmpl->extent_item_refs);
6429 rec->extent_item_refs = tmpl->extent_item_refs;
6433 if (tmpl->content_checked)
6434 rec->content_checked = 1;
6435 if (tmpl->owner_ref_checked)
6436 rec->owner_ref_checked = 1;
6437 memcpy(&rec->parent_key, &tmpl->parent_key,
6438 sizeof(tmpl->parent_key));
6439 if (tmpl->parent_generation)
6440 rec->parent_generation = tmpl->parent_generation;
6441 if (rec->max_size < tmpl->max_size)
6442 rec->max_size = tmpl->max_size;
6445 * A metadata extent can't cross stripe_len boundary, otherwise
6446 * kernel scrub won't be able to handle it.
6447 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6451 rec->crossing_stripes = check_crossing_stripes(
6452 global_info, rec->start,
6453 global_info->nodesize);
6454 check_extent_type(rec);
6455 maybe_free_extent_rec(extent_cache, rec);
6459 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6464 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6465 u64 parent, u64 root, int found_ref)
6467 struct extent_record *rec;
6468 struct tree_backref *back;
6469 struct cache_extent *cache;
6471 bool insert = false;
6473 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6475 struct extent_record tmpl;
6477 memset(&tmpl, 0, sizeof(tmpl));
6478 tmpl.start = bytenr;
6483 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6487 /* really a bug in cache_extent implement now */
6488 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6493 rec = container_of(cache, struct extent_record, cache);
6494 if (rec->start != bytenr) {
6496 * Several cause, from unaligned bytenr to over lapping extents
6501 back = find_tree_backref(rec, parent, root);
6503 back = alloc_tree_backref(rec, parent, root);
6510 if (back->node.found_ref) {
6511 fprintf(stderr, "Extent back ref already exists "
6512 "for %llu parent %llu root %llu \n",
6513 (unsigned long long)bytenr,
6514 (unsigned long long)parent,
6515 (unsigned long long)root);
6517 back->node.found_ref = 1;
6519 if (back->node.found_extent_tree) {
6520 fprintf(stderr, "Extent back ref already exists "
6521 "for %llu parent %llu root %llu \n",
6522 (unsigned long long)bytenr,
6523 (unsigned long long)parent,
6524 (unsigned long long)root);
6526 back->node.found_extent_tree = 1;
6529 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6530 compare_extent_backref));
6531 check_extent_type(rec);
6532 maybe_free_extent_rec(extent_cache, rec);
6536 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6537 u64 parent, u64 root, u64 owner, u64 offset,
6538 u32 num_refs, int found_ref, u64 max_size)
6540 struct extent_record *rec;
6541 struct data_backref *back;
6542 struct cache_extent *cache;
6544 bool insert = false;
6546 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6548 struct extent_record tmpl;
6550 memset(&tmpl, 0, sizeof(tmpl));
6551 tmpl.start = bytenr;
6553 tmpl.max_size = max_size;
6555 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6559 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6564 rec = container_of(cache, struct extent_record, cache);
6565 if (rec->max_size < max_size)
6566 rec->max_size = max_size;
6569 * If found_ref is set then max_size is the real size and must match the
6570 * existing refs. So if we have already found a ref then we need to
6571 * make sure that this ref matches the existing one, otherwise we need
6572 * to add a new backref so we can notice that the backrefs don't match
6573 * and we need to figure out who is telling the truth. This is to
6574 * account for that awful fsync bug I introduced where we'd end up with
6575 * a btrfs_file_extent_item that would have its length include multiple
6576 * prealloc extents or point inside of a prealloc extent.
6578 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6581 back = alloc_data_backref(rec, parent, root, owner, offset,
6588 BUG_ON(num_refs != 1);
6589 if (back->node.found_ref)
6590 BUG_ON(back->bytes != max_size);
6591 back->node.found_ref = 1;
6592 back->found_ref += 1;
6593 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6594 back->bytes = max_size;
6595 back->disk_bytenr = bytenr;
6597 /* Need to reinsert if not already in the tree */
6599 rb_erase(&back->node.node, &rec->backref_tree);
6604 rec->content_checked = 1;
6605 rec->owner_ref_checked = 1;
6607 if (back->node.found_extent_tree) {
6608 fprintf(stderr, "Extent back ref already exists "
6609 "for %llu parent %llu root %llu "
6610 "owner %llu offset %llu num_refs %lu\n",
6611 (unsigned long long)bytenr,
6612 (unsigned long long)parent,
6613 (unsigned long long)root,
6614 (unsigned long long)owner,
6615 (unsigned long long)offset,
6616 (unsigned long)num_refs);
6618 back->num_refs = num_refs;
6619 back->node.found_extent_tree = 1;
6622 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6623 compare_extent_backref));
6625 maybe_free_extent_rec(extent_cache, rec);
6629 static int add_pending(struct cache_tree *pending,
6630 struct cache_tree *seen, u64 bytenr, u32 size)
6633 ret = add_cache_extent(seen, bytenr, size);
6636 add_cache_extent(pending, bytenr, size);
6640 static int pick_next_pending(struct cache_tree *pending,
6641 struct cache_tree *reada,
6642 struct cache_tree *nodes,
6643 u64 last, struct block_info *bits, int bits_nr,
6646 unsigned long node_start = last;
6647 struct cache_extent *cache;
6650 cache = search_cache_extent(reada, 0);
6652 bits[0].start = cache->start;
6653 bits[0].size = cache->size;
6658 if (node_start > 32768)
6659 node_start -= 32768;
6661 cache = search_cache_extent(nodes, node_start);
6663 cache = search_cache_extent(nodes, 0);
6666 cache = search_cache_extent(pending, 0);
6671 bits[ret].start = cache->start;
6672 bits[ret].size = cache->size;
6673 cache = next_cache_extent(cache);
6675 } while (cache && ret < bits_nr);
6681 bits[ret].start = cache->start;
6682 bits[ret].size = cache->size;
6683 cache = next_cache_extent(cache);
6685 } while (cache && ret < bits_nr);
6687 if (bits_nr - ret > 8) {
6688 u64 lookup = bits[0].start + bits[0].size;
6689 struct cache_extent *next;
6690 next = search_cache_extent(pending, lookup);
6692 if (next->start - lookup > 32768)
6694 bits[ret].start = next->start;
6695 bits[ret].size = next->size;
6696 lookup = next->start + next->size;
6700 next = next_cache_extent(next);
6708 static void free_chunk_record(struct cache_extent *cache)
6710 struct chunk_record *rec;
6712 rec = container_of(cache, struct chunk_record, cache);
6713 list_del_init(&rec->list);
6714 list_del_init(&rec->dextents);
6718 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6720 cache_tree_free_extents(chunk_cache, free_chunk_record);
6723 static void free_device_record(struct rb_node *node)
6725 struct device_record *rec;
6727 rec = container_of(node, struct device_record, node);
6731 FREE_RB_BASED_TREE(device_cache, free_device_record);
6733 int insert_block_group_record(struct block_group_tree *tree,
6734 struct block_group_record *bg_rec)
6738 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6742 list_add_tail(&bg_rec->list, &tree->block_groups);
6746 static void free_block_group_record(struct cache_extent *cache)
6748 struct block_group_record *rec;
6750 rec = container_of(cache, struct block_group_record, cache);
6751 list_del_init(&rec->list);
6755 void free_block_group_tree(struct block_group_tree *tree)
6757 cache_tree_free_extents(&tree->tree, free_block_group_record);
6760 int insert_device_extent_record(struct device_extent_tree *tree,
6761 struct device_extent_record *de_rec)
6766 * Device extent is a bit different from the other extents, because
6767 * the extents which belong to the different devices may have the
6768 * same start and size, so we need use the special extent cache
6769 * search/insert functions.
6771 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6775 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6776 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6780 static void free_device_extent_record(struct cache_extent *cache)
6782 struct device_extent_record *rec;
6784 rec = container_of(cache, struct device_extent_record, cache);
6785 if (!list_empty(&rec->chunk_list))
6786 list_del_init(&rec->chunk_list);
6787 if (!list_empty(&rec->device_list))
6788 list_del_init(&rec->device_list);
6792 void free_device_extent_tree(struct device_extent_tree *tree)
6794 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6797 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6798 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6799 struct extent_buffer *leaf, int slot)
6801 struct btrfs_extent_ref_v0 *ref0;
6802 struct btrfs_key key;
6805 btrfs_item_key_to_cpu(leaf, &key, slot);
6806 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6807 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6808 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6811 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6812 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6818 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6819 struct btrfs_key *key,
6822 struct btrfs_chunk *ptr;
6823 struct chunk_record *rec;
6826 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6827 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6829 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6831 fprintf(stderr, "memory allocation failed\n");
6835 INIT_LIST_HEAD(&rec->list);
6836 INIT_LIST_HEAD(&rec->dextents);
6839 rec->cache.start = key->offset;
6840 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6842 rec->generation = btrfs_header_generation(leaf);
6844 rec->objectid = key->objectid;
6845 rec->type = key->type;
6846 rec->offset = key->offset;
6848 rec->length = rec->cache.size;
6849 rec->owner = btrfs_chunk_owner(leaf, ptr);
6850 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6851 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6852 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6853 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6854 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6855 rec->num_stripes = num_stripes;
6856 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6858 for (i = 0; i < rec->num_stripes; ++i) {
6859 rec->stripes[i].devid =
6860 btrfs_stripe_devid_nr(leaf, ptr, i);
6861 rec->stripes[i].offset =
6862 btrfs_stripe_offset_nr(leaf, ptr, i);
6863 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6864 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6871 static int process_chunk_item(struct cache_tree *chunk_cache,
6872 struct btrfs_key *key, struct extent_buffer *eb,
6875 struct chunk_record *rec;
6876 struct btrfs_chunk *chunk;
6879 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6881 * Do extra check for this chunk item,
6883 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6884 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6885 * and owner<->key_type check.
6887 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6890 error("chunk(%llu, %llu) is not valid, ignore it",
6891 key->offset, btrfs_chunk_length(eb, chunk));
6894 rec = btrfs_new_chunk_record(eb, key, slot);
6895 ret = insert_cache_extent(chunk_cache, &rec->cache);
6897 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6898 rec->offset, rec->length);
6905 static int process_device_item(struct rb_root *dev_cache,
6906 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6908 struct btrfs_dev_item *ptr;
6909 struct device_record *rec;
6912 ptr = btrfs_item_ptr(eb,
6913 slot, struct btrfs_dev_item);
6915 rec = malloc(sizeof(*rec));
6917 fprintf(stderr, "memory allocation failed\n");
6921 rec->devid = key->offset;
6922 rec->generation = btrfs_header_generation(eb);
6924 rec->objectid = key->objectid;
6925 rec->type = key->type;
6926 rec->offset = key->offset;
6928 rec->devid = btrfs_device_id(eb, ptr);
6929 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6930 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6932 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6934 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6941 struct block_group_record *
6942 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6945 struct btrfs_block_group_item *ptr;
6946 struct block_group_record *rec;
6948 rec = calloc(1, sizeof(*rec));
6950 fprintf(stderr, "memory allocation failed\n");
6954 rec->cache.start = key->objectid;
6955 rec->cache.size = key->offset;
6957 rec->generation = btrfs_header_generation(leaf);
6959 rec->objectid = key->objectid;
6960 rec->type = key->type;
6961 rec->offset = key->offset;
6963 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6964 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6966 INIT_LIST_HEAD(&rec->list);
6971 static int process_block_group_item(struct block_group_tree *block_group_cache,
6972 struct btrfs_key *key,
6973 struct extent_buffer *eb, int slot)
6975 struct block_group_record *rec;
6978 rec = btrfs_new_block_group_record(eb, key, slot);
6979 ret = insert_block_group_record(block_group_cache, rec);
6981 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6982 rec->objectid, rec->offset);
6989 struct device_extent_record *
6990 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6991 struct btrfs_key *key, int slot)
6993 struct device_extent_record *rec;
6994 struct btrfs_dev_extent *ptr;
6996 rec = calloc(1, sizeof(*rec));
6998 fprintf(stderr, "memory allocation failed\n");
7002 rec->cache.objectid = key->objectid;
7003 rec->cache.start = key->offset;
7005 rec->generation = btrfs_header_generation(leaf);
7007 rec->objectid = key->objectid;
7008 rec->type = key->type;
7009 rec->offset = key->offset;
7011 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7012 rec->chunk_objecteid =
7013 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7015 btrfs_dev_extent_chunk_offset(leaf, ptr);
7016 rec->length = btrfs_dev_extent_length(leaf, ptr);
7017 rec->cache.size = rec->length;
7019 INIT_LIST_HEAD(&rec->chunk_list);
7020 INIT_LIST_HEAD(&rec->device_list);
7026 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7027 struct btrfs_key *key, struct extent_buffer *eb,
7030 struct device_extent_record *rec;
7033 rec = btrfs_new_device_extent_record(eb, key, slot);
7034 ret = insert_device_extent_record(dev_extent_cache, rec);
7037 "Device extent[%llu, %llu, %llu] existed.\n",
7038 rec->objectid, rec->offset, rec->length);
7045 static int process_extent_item(struct btrfs_root *root,
7046 struct cache_tree *extent_cache,
7047 struct extent_buffer *eb, int slot)
7049 struct btrfs_extent_item *ei;
7050 struct btrfs_extent_inline_ref *iref;
7051 struct btrfs_extent_data_ref *dref;
7052 struct btrfs_shared_data_ref *sref;
7053 struct btrfs_key key;
7054 struct extent_record tmpl;
7059 u32 item_size = btrfs_item_size_nr(eb, slot);
7065 btrfs_item_key_to_cpu(eb, &key, slot);
7067 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7069 num_bytes = root->fs_info->nodesize;
7071 num_bytes = key.offset;
7074 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7075 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7076 key.objectid, root->fs_info->sectorsize);
7079 if (item_size < sizeof(*ei)) {
7080 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7081 struct btrfs_extent_item_v0 *ei0;
7082 BUG_ON(item_size != sizeof(*ei0));
7083 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7084 refs = btrfs_extent_refs_v0(eb, ei0);
7088 memset(&tmpl, 0, sizeof(tmpl));
7089 tmpl.start = key.objectid;
7090 tmpl.nr = num_bytes;
7091 tmpl.extent_item_refs = refs;
7092 tmpl.metadata = metadata;
7094 tmpl.max_size = num_bytes;
7096 return add_extent_rec(extent_cache, &tmpl);
7099 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7100 refs = btrfs_extent_refs(eb, ei);
7101 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7105 if (metadata && num_bytes != root->fs_info->nodesize) {
7106 error("ignore invalid metadata extent, length %llu does not equal to %u",
7107 num_bytes, root->fs_info->nodesize);
7110 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7111 error("ignore invalid data extent, length %llu is not aligned to %u",
7112 num_bytes, root->fs_info->sectorsize);
7116 memset(&tmpl, 0, sizeof(tmpl));
7117 tmpl.start = key.objectid;
7118 tmpl.nr = num_bytes;
7119 tmpl.extent_item_refs = refs;
7120 tmpl.metadata = metadata;
7122 tmpl.max_size = num_bytes;
7123 add_extent_rec(extent_cache, &tmpl);
7125 ptr = (unsigned long)(ei + 1);
7126 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7127 key.type == BTRFS_EXTENT_ITEM_KEY)
7128 ptr += sizeof(struct btrfs_tree_block_info);
7130 end = (unsigned long)ei + item_size;
7132 iref = (struct btrfs_extent_inline_ref *)ptr;
7133 type = btrfs_extent_inline_ref_type(eb, iref);
7134 offset = btrfs_extent_inline_ref_offset(eb, iref);
7136 case BTRFS_TREE_BLOCK_REF_KEY:
7137 ret = add_tree_backref(extent_cache, key.objectid,
7141 "add_tree_backref failed (extent items tree block): %s",
7144 case BTRFS_SHARED_BLOCK_REF_KEY:
7145 ret = add_tree_backref(extent_cache, key.objectid,
7149 "add_tree_backref failed (extent items shared block): %s",
7152 case BTRFS_EXTENT_DATA_REF_KEY:
7153 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7154 add_data_backref(extent_cache, key.objectid, 0,
7155 btrfs_extent_data_ref_root(eb, dref),
7156 btrfs_extent_data_ref_objectid(eb,
7158 btrfs_extent_data_ref_offset(eb, dref),
7159 btrfs_extent_data_ref_count(eb, dref),
7162 case BTRFS_SHARED_DATA_REF_KEY:
7163 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7164 add_data_backref(extent_cache, key.objectid, offset,
7166 btrfs_shared_data_ref_count(eb, sref),
7170 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7171 key.objectid, key.type, num_bytes);
7174 ptr += btrfs_extent_inline_ref_size(type);
7181 static int check_cache_range(struct btrfs_root *root,
7182 struct btrfs_block_group_cache *cache,
7183 u64 offset, u64 bytes)
7185 struct btrfs_free_space *entry;
7191 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7192 bytenr = btrfs_sb_offset(i);
7193 ret = btrfs_rmap_block(root->fs_info,
7194 cache->key.objectid, bytenr, 0,
7195 &logical, &nr, &stripe_len);
7200 if (logical[nr] + stripe_len <= offset)
7202 if (offset + bytes <= logical[nr])
7204 if (logical[nr] == offset) {
7205 if (stripe_len >= bytes) {
7209 bytes -= stripe_len;
7210 offset += stripe_len;
7211 } else if (logical[nr] < offset) {
7212 if (logical[nr] + stripe_len >=
7217 bytes = (offset + bytes) -
7218 (logical[nr] + stripe_len);
7219 offset = logical[nr] + stripe_len;
7222 * Could be tricky, the super may land in the
7223 * middle of the area we're checking. First
7224 * check the easiest case, it's at the end.
7226 if (logical[nr] + stripe_len >=
7228 bytes = logical[nr] - offset;
7232 /* Check the left side */
7233 ret = check_cache_range(root, cache,
7235 logical[nr] - offset);
7241 /* Now we continue with the right side */
7242 bytes = (offset + bytes) -
7243 (logical[nr] + stripe_len);
7244 offset = logical[nr] + stripe_len;
7251 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7253 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7254 offset, offset+bytes);
7258 if (entry->offset != offset) {
7259 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7264 if (entry->bytes != bytes) {
7265 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7266 bytes, entry->bytes, offset);
7270 unlink_free_space(cache->free_space_ctl, entry);
7275 static int verify_space_cache(struct btrfs_root *root,
7276 struct btrfs_block_group_cache *cache)
7278 struct btrfs_path path;
7279 struct extent_buffer *leaf;
7280 struct btrfs_key key;
7284 root = root->fs_info->extent_root;
7286 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7288 btrfs_init_path(&path);
7289 key.objectid = last;
7291 key.type = BTRFS_EXTENT_ITEM_KEY;
7292 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7297 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7298 ret = btrfs_next_leaf(root, &path);
7306 leaf = path.nodes[0];
7307 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7308 if (key.objectid >= cache->key.offset + cache->key.objectid)
7310 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7311 key.type != BTRFS_METADATA_ITEM_KEY) {
7316 if (last == key.objectid) {
7317 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7318 last = key.objectid + key.offset;
7320 last = key.objectid + root->fs_info->nodesize;
7325 ret = check_cache_range(root, cache, last,
7326 key.objectid - last);
7329 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7330 last = key.objectid + key.offset;
7332 last = key.objectid + root->fs_info->nodesize;
7336 if (last < cache->key.objectid + cache->key.offset)
7337 ret = check_cache_range(root, cache, last,
7338 cache->key.objectid +
7339 cache->key.offset - last);
7342 btrfs_release_path(&path);
7345 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7346 fprintf(stderr, "There are still entries left in the space "
7354 static int check_space_cache(struct btrfs_root *root)
7356 struct btrfs_block_group_cache *cache;
7357 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7361 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7362 btrfs_super_generation(root->fs_info->super_copy) !=
7363 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7364 printf("cache and super generation don't match, space cache "
7365 "will be invalidated\n");
7369 if (ctx.progress_enabled) {
7370 ctx.tp = TASK_FREE_SPACE;
7371 task_start(ctx.info);
7375 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7379 start = cache->key.objectid + cache->key.offset;
7380 if (!cache->free_space_ctl) {
7381 if (btrfs_init_free_space_ctl(cache,
7382 root->fs_info->sectorsize)) {
7387 btrfs_remove_free_space_cache(cache);
7390 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7391 ret = exclude_super_stripes(root, cache);
7393 fprintf(stderr, "could not exclude super stripes: %s\n",
7398 ret = load_free_space_tree(root->fs_info, cache);
7399 free_excluded_extents(root, cache);
7401 fprintf(stderr, "could not load free space tree: %s\n",
7408 ret = load_free_space_cache(root->fs_info, cache);
7413 ret = verify_space_cache(root, cache);
7415 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7416 cache->key.objectid);
7421 task_stop(ctx.info);
7423 return error ? -EINVAL : 0;
7426 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7427 u64 num_bytes, unsigned long leaf_offset,
7428 struct extent_buffer *eb) {
7430 struct btrfs_fs_info *fs_info = root->fs_info;
7432 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7434 unsigned long csum_offset;
7438 u64 data_checked = 0;
7444 if (num_bytes % fs_info->sectorsize)
7447 data = malloc(num_bytes);
7451 while (offset < num_bytes) {
7454 read_len = num_bytes - offset;
7455 /* read as much space once a time */
7456 ret = read_extent_data(fs_info, data + offset,
7457 bytenr + offset, &read_len, mirror);
7461 /* verify every 4k data's checksum */
7462 while (data_checked < read_len) {
7464 tmp = offset + data_checked;
7466 csum = btrfs_csum_data((char *)data + tmp,
7467 csum, fs_info->sectorsize);
7468 btrfs_csum_final(csum, (u8 *)&csum);
7470 csum_offset = leaf_offset +
7471 tmp / fs_info->sectorsize * csum_size;
7472 read_extent_buffer(eb, (char *)&csum_expected,
7473 csum_offset, csum_size);
7474 /* try another mirror */
7475 if (csum != csum_expected) {
7476 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7477 mirror, bytenr + tmp,
7478 csum, csum_expected);
7479 num_copies = btrfs_num_copies(root->fs_info,
7481 if (mirror < num_copies - 1) {
7486 data_checked += fs_info->sectorsize;
7495 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7498 struct btrfs_path path;
7499 struct extent_buffer *leaf;
7500 struct btrfs_key key;
7503 btrfs_init_path(&path);
7504 key.objectid = bytenr;
7505 key.type = BTRFS_EXTENT_ITEM_KEY;
7506 key.offset = (u64)-1;
7509 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7512 fprintf(stderr, "Error looking up extent record %d\n", ret);
7513 btrfs_release_path(&path);
7516 if (path.slots[0] > 0) {
7519 ret = btrfs_prev_leaf(root, &path);
7522 } else if (ret > 0) {
7529 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7532 * Block group items come before extent items if they have the same
7533 * bytenr, so walk back one more just in case. Dear future traveller,
7534 * first congrats on mastering time travel. Now if it's not too much
7535 * trouble could you go back to 2006 and tell Chris to make the
7536 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7537 * EXTENT_ITEM_KEY please?
7539 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7540 if (path.slots[0] > 0) {
7543 ret = btrfs_prev_leaf(root, &path);
7546 } else if (ret > 0) {
7551 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7555 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7556 ret = btrfs_next_leaf(root, &path);
7558 fprintf(stderr, "Error going to next leaf "
7560 btrfs_release_path(&path);
7566 leaf = path.nodes[0];
7567 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7568 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7572 if (key.objectid + key.offset < bytenr) {
7576 if (key.objectid > bytenr + num_bytes)
7579 if (key.objectid == bytenr) {
7580 if (key.offset >= num_bytes) {
7584 num_bytes -= key.offset;
7585 bytenr += key.offset;
7586 } else if (key.objectid < bytenr) {
7587 if (key.objectid + key.offset >= bytenr + num_bytes) {
7591 num_bytes = (bytenr + num_bytes) -
7592 (key.objectid + key.offset);
7593 bytenr = key.objectid + key.offset;
7595 if (key.objectid + key.offset < bytenr + num_bytes) {
7596 u64 new_start = key.objectid + key.offset;
7597 u64 new_bytes = bytenr + num_bytes - new_start;
7600 * Weird case, the extent is in the middle of
7601 * our range, we'll have to search one side
7602 * and then the other. Not sure if this happens
7603 * in real life, but no harm in coding it up
7604 * anyway just in case.
7606 btrfs_release_path(&path);
7607 ret = check_extent_exists(root, new_start,
7610 fprintf(stderr, "Right section didn't "
7614 num_bytes = key.objectid - bytenr;
7617 num_bytes = key.objectid - bytenr;
7624 if (num_bytes && !ret) {
7625 fprintf(stderr, "There are no extents for csum range "
7626 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7630 btrfs_release_path(&path);
7634 static int check_csums(struct btrfs_root *root)
7636 struct btrfs_path path;
7637 struct extent_buffer *leaf;
7638 struct btrfs_key key;
7639 u64 offset = 0, num_bytes = 0;
7640 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7644 unsigned long leaf_offset;
7646 root = root->fs_info->csum_root;
7647 if (!extent_buffer_uptodate(root->node)) {
7648 fprintf(stderr, "No valid csum tree found\n");
7652 btrfs_init_path(&path);
7653 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7654 key.type = BTRFS_EXTENT_CSUM_KEY;
7656 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7658 fprintf(stderr, "Error searching csum tree %d\n", ret);
7659 btrfs_release_path(&path);
7663 if (ret > 0 && path.slots[0])
7668 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7669 ret = btrfs_next_leaf(root, &path);
7671 fprintf(stderr, "Error going to next leaf "
7678 leaf = path.nodes[0];
7680 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7681 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7686 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7687 csum_size) * root->fs_info->sectorsize;
7688 if (!check_data_csum)
7689 goto skip_csum_check;
7690 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7691 ret = check_extent_csums(root, key.offset, data_len,
7697 offset = key.offset;
7698 } else if (key.offset != offset + num_bytes) {
7699 ret = check_extent_exists(root, offset, num_bytes);
7701 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7702 "there is no extent record\n",
7703 offset, offset+num_bytes);
7706 offset = key.offset;
7709 num_bytes += data_len;
7713 btrfs_release_path(&path);
7717 static int is_dropped_key(struct btrfs_key *key,
7718 struct btrfs_key *drop_key) {
7719 if (key->objectid < drop_key->objectid)
7721 else if (key->objectid == drop_key->objectid) {
7722 if (key->type < drop_key->type)
7724 else if (key->type == drop_key->type) {
7725 if (key->offset < drop_key->offset)
7733 * Here are the rules for FULL_BACKREF.
7735 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7736 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7738 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7739 * if it happened after the relocation occurred since we'll have dropped the
7740 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7741 * have no real way to know for sure.
7743 * We process the blocks one root at a time, and we start from the lowest root
7744 * objectid and go to the highest. So we can just lookup the owner backref for
7745 * the record and if we don't find it then we know it doesn't exist and we have
7748 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7749 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7750 * be set or not and then we can check later once we've gathered all the refs.
7752 static int calc_extent_flag(struct cache_tree *extent_cache,
7753 struct extent_buffer *buf,
7754 struct root_item_record *ri,
7757 struct extent_record *rec;
7758 struct cache_extent *cache;
7759 struct tree_backref *tback;
7762 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7763 /* we have added this extent before */
7767 rec = container_of(cache, struct extent_record, cache);
7770 * Except file/reloc tree, we can not have
7773 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7778 if (buf->start == ri->bytenr)
7781 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7784 owner = btrfs_header_owner(buf);
7785 if (owner == ri->objectid)
7788 tback = find_tree_backref(rec, 0, owner);
7793 if (rec->flag_block_full_backref != FLAG_UNSET &&
7794 rec->flag_block_full_backref != 0)
7795 rec->bad_full_backref = 1;
7798 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7799 if (rec->flag_block_full_backref != FLAG_UNSET &&
7800 rec->flag_block_full_backref != 1)
7801 rec->bad_full_backref = 1;
7805 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7807 fprintf(stderr, "Invalid key type(");
7808 print_key_type(stderr, 0, key_type);
7809 fprintf(stderr, ") found in root(");
7810 print_objectid(stderr, rootid, 0);
7811 fprintf(stderr, ")\n");
7815 * Check if the key is valid with its extent buffer.
7817 * This is a early check in case invalid key exists in a extent buffer
7818 * This is not comprehensive yet, but should prevent wrong key/item passed
7821 static int check_type_with_root(u64 rootid, u8 key_type)
7824 /* Only valid in chunk tree */
7825 case BTRFS_DEV_ITEM_KEY:
7826 case BTRFS_CHUNK_ITEM_KEY:
7827 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7830 /* valid in csum and log tree */
7831 case BTRFS_CSUM_TREE_OBJECTID:
7832 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7836 case BTRFS_EXTENT_ITEM_KEY:
7837 case BTRFS_METADATA_ITEM_KEY:
7838 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7839 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7842 case BTRFS_ROOT_ITEM_KEY:
7843 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7846 case BTRFS_DEV_EXTENT_KEY:
7847 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7853 report_mismatch_key_root(key_type, rootid);
7857 static int run_next_block(struct btrfs_root *root,
7858 struct block_info *bits,
7861 struct cache_tree *pending,
7862 struct cache_tree *seen,
7863 struct cache_tree *reada,
7864 struct cache_tree *nodes,
7865 struct cache_tree *extent_cache,
7866 struct cache_tree *chunk_cache,
7867 struct rb_root *dev_cache,
7868 struct block_group_tree *block_group_cache,
7869 struct device_extent_tree *dev_extent_cache,
7870 struct root_item_record *ri)
7872 struct btrfs_fs_info *fs_info = root->fs_info;
7873 struct extent_buffer *buf;
7874 struct extent_record *rec = NULL;
7885 struct btrfs_key key;
7886 struct cache_extent *cache;
7889 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7890 bits_nr, &reada_bits);
7895 for(i = 0; i < nritems; i++) {
7896 ret = add_cache_extent(reada, bits[i].start,
7901 /* fixme, get the parent transid */
7902 readahead_tree_block(fs_info, bits[i].start, 0);
7905 *last = bits[0].start;
7906 bytenr = bits[0].start;
7907 size = bits[0].size;
7909 cache = lookup_cache_extent(pending, bytenr, size);
7911 remove_cache_extent(pending, cache);
7914 cache = lookup_cache_extent(reada, bytenr, size);
7916 remove_cache_extent(reada, cache);
7919 cache = lookup_cache_extent(nodes, bytenr, size);
7921 remove_cache_extent(nodes, cache);
7924 cache = lookup_cache_extent(extent_cache, bytenr, size);
7926 rec = container_of(cache, struct extent_record, cache);
7927 gen = rec->parent_generation;
7930 /* fixme, get the real parent transid */
7931 buf = read_tree_block(root->fs_info, bytenr, gen);
7932 if (!extent_buffer_uptodate(buf)) {
7933 record_bad_block_io(root->fs_info,
7934 extent_cache, bytenr, size);
7938 nritems = btrfs_header_nritems(buf);
7941 if (!init_extent_tree) {
7942 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7943 btrfs_header_level(buf), 1, NULL,
7946 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7948 fprintf(stderr, "Couldn't calc extent flags\n");
7949 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7954 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7956 fprintf(stderr, "Couldn't calc extent flags\n");
7957 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7961 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7963 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7964 ri->objectid == btrfs_header_owner(buf)) {
7966 * Ok we got to this block from it's original owner and
7967 * we have FULL_BACKREF set. Relocation can leave
7968 * converted blocks over so this is altogether possible,
7969 * however it's not possible if the generation > the
7970 * last snapshot, so check for this case.
7972 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7973 btrfs_header_generation(buf) > ri->last_snapshot) {
7974 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7975 rec->bad_full_backref = 1;
7980 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7981 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7982 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7983 rec->bad_full_backref = 1;
7987 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7988 rec->flag_block_full_backref = 1;
7992 rec->flag_block_full_backref = 0;
7994 owner = btrfs_header_owner(buf);
7997 ret = check_block(root, extent_cache, buf, flags);
8001 if (btrfs_is_leaf(buf)) {
8002 btree_space_waste += btrfs_leaf_free_space(root, buf);
8003 for (i = 0; i < nritems; i++) {
8004 struct btrfs_file_extent_item *fi;
8005 btrfs_item_key_to_cpu(buf, &key, i);
8007 * Check key type against the leaf owner.
8008 * Could filter quite a lot of early error if
8011 if (check_type_with_root(btrfs_header_owner(buf),
8013 fprintf(stderr, "ignoring invalid key\n");
8016 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8017 process_extent_item(root, extent_cache, buf,
8021 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8022 process_extent_item(root, extent_cache, buf,
8026 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8028 btrfs_item_size_nr(buf, i);
8031 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8032 process_chunk_item(chunk_cache, &key, buf, i);
8035 if (key.type == BTRFS_DEV_ITEM_KEY) {
8036 process_device_item(dev_cache, &key, buf, i);
8039 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8040 process_block_group_item(block_group_cache,
8044 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8045 process_device_extent_item(dev_extent_cache,
8050 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8051 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8052 process_extent_ref_v0(extent_cache, buf, i);
8059 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8060 ret = add_tree_backref(extent_cache,
8061 key.objectid, 0, key.offset, 0);
8064 "add_tree_backref failed (leaf tree block): %s",
8068 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8069 ret = add_tree_backref(extent_cache,
8070 key.objectid, key.offset, 0, 0);
8073 "add_tree_backref failed (leaf shared block): %s",
8077 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8078 struct btrfs_extent_data_ref *ref;
8079 ref = btrfs_item_ptr(buf, i,
8080 struct btrfs_extent_data_ref);
8081 add_data_backref(extent_cache,
8083 btrfs_extent_data_ref_root(buf, ref),
8084 btrfs_extent_data_ref_objectid(buf,
8086 btrfs_extent_data_ref_offset(buf, ref),
8087 btrfs_extent_data_ref_count(buf, ref),
8088 0, root->fs_info->sectorsize);
8091 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8092 struct btrfs_shared_data_ref *ref;
8093 ref = btrfs_item_ptr(buf, i,
8094 struct btrfs_shared_data_ref);
8095 add_data_backref(extent_cache,
8096 key.objectid, key.offset, 0, 0, 0,
8097 btrfs_shared_data_ref_count(buf, ref),
8098 0, root->fs_info->sectorsize);
8101 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8102 struct bad_item *bad;
8104 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8108 bad = malloc(sizeof(struct bad_item));
8111 INIT_LIST_HEAD(&bad->list);
8112 memcpy(&bad->key, &key,
8113 sizeof(struct btrfs_key));
8114 bad->root_id = owner;
8115 list_add_tail(&bad->list, &delete_items);
8118 if (key.type != BTRFS_EXTENT_DATA_KEY)
8120 fi = btrfs_item_ptr(buf, i,
8121 struct btrfs_file_extent_item);
8122 if (btrfs_file_extent_type(buf, fi) ==
8123 BTRFS_FILE_EXTENT_INLINE)
8125 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8128 data_bytes_allocated +=
8129 btrfs_file_extent_disk_num_bytes(buf, fi);
8130 if (data_bytes_allocated < root->fs_info->sectorsize) {
8133 data_bytes_referenced +=
8134 btrfs_file_extent_num_bytes(buf, fi);
8135 add_data_backref(extent_cache,
8136 btrfs_file_extent_disk_bytenr(buf, fi),
8137 parent, owner, key.objectid, key.offset -
8138 btrfs_file_extent_offset(buf, fi), 1, 1,
8139 btrfs_file_extent_disk_num_bytes(buf, fi));
8143 struct btrfs_key first_key;
8145 first_key.objectid = 0;
8148 btrfs_item_key_to_cpu(buf, &first_key, 0);
8149 level = btrfs_header_level(buf);
8150 for (i = 0; i < nritems; i++) {
8151 struct extent_record tmpl;
8153 ptr = btrfs_node_blockptr(buf, i);
8154 size = root->fs_info->nodesize;
8155 btrfs_node_key_to_cpu(buf, &key, i);
8157 if ((level == ri->drop_level)
8158 && is_dropped_key(&key, &ri->drop_key)) {
8163 memset(&tmpl, 0, sizeof(tmpl));
8164 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8165 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8170 tmpl.max_size = size;
8171 ret = add_extent_rec(extent_cache, &tmpl);
8175 ret = add_tree_backref(extent_cache, ptr, parent,
8179 "add_tree_backref failed (non-leaf block): %s",
8185 add_pending(nodes, seen, ptr, size);
8187 add_pending(pending, seen, ptr, size);
8190 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8191 nritems) * sizeof(struct btrfs_key_ptr);
8193 total_btree_bytes += buf->len;
8194 if (fs_root_objectid(btrfs_header_owner(buf)))
8195 total_fs_tree_bytes += buf->len;
8196 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8197 total_extent_tree_bytes += buf->len;
8199 free_extent_buffer(buf);
8203 static int add_root_to_pending(struct extent_buffer *buf,
8204 struct cache_tree *extent_cache,
8205 struct cache_tree *pending,
8206 struct cache_tree *seen,
8207 struct cache_tree *nodes,
8210 struct extent_record tmpl;
8213 if (btrfs_header_level(buf) > 0)
8214 add_pending(nodes, seen, buf->start, buf->len);
8216 add_pending(pending, seen, buf->start, buf->len);
8218 memset(&tmpl, 0, sizeof(tmpl));
8219 tmpl.start = buf->start;
8224 tmpl.max_size = buf->len;
8225 add_extent_rec(extent_cache, &tmpl);
8227 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8228 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8229 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8232 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8237 /* as we fix the tree, we might be deleting blocks that
8238 * we're tracking for repair. This hook makes sure we
8239 * remove any backrefs for blocks as we are fixing them.
8241 static int free_extent_hook(struct btrfs_trans_handle *trans,
8242 struct btrfs_root *root,
8243 u64 bytenr, u64 num_bytes, u64 parent,
8244 u64 root_objectid, u64 owner, u64 offset,
8247 struct extent_record *rec;
8248 struct cache_extent *cache;
8250 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8252 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8253 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8257 rec = container_of(cache, struct extent_record, cache);
8259 struct data_backref *back;
8260 back = find_data_backref(rec, parent, root_objectid, owner,
8261 offset, 1, bytenr, num_bytes);
8264 if (back->node.found_ref) {
8265 back->found_ref -= refs_to_drop;
8267 rec->refs -= refs_to_drop;
8269 if (back->node.found_extent_tree) {
8270 back->num_refs -= refs_to_drop;
8271 if (rec->extent_item_refs)
8272 rec->extent_item_refs -= refs_to_drop;
8274 if (back->found_ref == 0)
8275 back->node.found_ref = 0;
8276 if (back->num_refs == 0)
8277 back->node.found_extent_tree = 0;
8279 if (!back->node.found_extent_tree && back->node.found_ref) {
8280 rb_erase(&back->node.node, &rec->backref_tree);
8284 struct tree_backref *back;
8285 back = find_tree_backref(rec, parent, root_objectid);
8288 if (back->node.found_ref) {
8291 back->node.found_ref = 0;
8293 if (back->node.found_extent_tree) {
8294 if (rec->extent_item_refs)
8295 rec->extent_item_refs--;
8296 back->node.found_extent_tree = 0;
8298 if (!back->node.found_extent_tree && back->node.found_ref) {
8299 rb_erase(&back->node.node, &rec->backref_tree);
8303 maybe_free_extent_rec(extent_cache, rec);
8308 static int delete_extent_records(struct btrfs_trans_handle *trans,
8309 struct btrfs_root *root,
8310 struct btrfs_path *path,
8313 struct btrfs_key key;
8314 struct btrfs_key found_key;
8315 struct extent_buffer *leaf;
8320 key.objectid = bytenr;
8322 key.offset = (u64)-1;
8325 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8332 if (path->slots[0] == 0)
8338 leaf = path->nodes[0];
8339 slot = path->slots[0];
8341 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8342 if (found_key.objectid != bytenr)
8345 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8346 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8347 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8348 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8349 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8350 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8351 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8352 btrfs_release_path(path);
8353 if (found_key.type == 0) {
8354 if (found_key.offset == 0)
8356 key.offset = found_key.offset - 1;
8357 key.type = found_key.type;
8359 key.type = found_key.type - 1;
8360 key.offset = (u64)-1;
8364 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8365 found_key.objectid, found_key.type, found_key.offset);
8367 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8370 btrfs_release_path(path);
8372 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8373 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8374 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8375 found_key.offset : root->fs_info->nodesize;
8377 ret = btrfs_update_block_group(trans, root, bytenr,
8384 btrfs_release_path(path);
8389 * for a single backref, this will allocate a new extent
8390 * and add the backref to it.
8392 static int record_extent(struct btrfs_trans_handle *trans,
8393 struct btrfs_fs_info *info,
8394 struct btrfs_path *path,
8395 struct extent_record *rec,
8396 struct extent_backref *back,
8397 int allocated, u64 flags)
8400 struct btrfs_root *extent_root = info->extent_root;
8401 struct extent_buffer *leaf;
8402 struct btrfs_key ins_key;
8403 struct btrfs_extent_item *ei;
8404 struct data_backref *dback;
8405 struct btrfs_tree_block_info *bi;
8408 rec->max_size = max_t(u64, rec->max_size,
8412 u32 item_size = sizeof(*ei);
8415 item_size += sizeof(*bi);
8417 ins_key.objectid = rec->start;
8418 ins_key.offset = rec->max_size;
8419 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8421 ret = btrfs_insert_empty_item(trans, extent_root, path,
8422 &ins_key, item_size);
8426 leaf = path->nodes[0];
8427 ei = btrfs_item_ptr(leaf, path->slots[0],
8428 struct btrfs_extent_item);
8430 btrfs_set_extent_refs(leaf, ei, 0);
8431 btrfs_set_extent_generation(leaf, ei, rec->generation);
8433 if (back->is_data) {
8434 btrfs_set_extent_flags(leaf, ei,
8435 BTRFS_EXTENT_FLAG_DATA);
8437 struct btrfs_disk_key copy_key;;
8439 bi = (struct btrfs_tree_block_info *)(ei + 1);
8440 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8443 btrfs_set_disk_key_objectid(©_key,
8444 rec->info_objectid);
8445 btrfs_set_disk_key_type(©_key, 0);
8446 btrfs_set_disk_key_offset(©_key, 0);
8448 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8449 btrfs_set_tree_block_key(leaf, bi, ©_key);
8451 btrfs_set_extent_flags(leaf, ei,
8452 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8455 btrfs_mark_buffer_dirty(leaf);
8456 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8457 rec->max_size, 1, 0);
8460 btrfs_release_path(path);
8463 if (back->is_data) {
8467 dback = to_data_backref(back);
8468 if (back->full_backref)
8469 parent = dback->parent;
8473 for (i = 0; i < dback->found_ref; i++) {
8474 /* if parent != 0, we're doing a full backref
8475 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8476 * just makes the backref allocator create a data
8479 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8480 rec->start, rec->max_size,
8484 BTRFS_FIRST_FREE_OBJECTID :
8490 fprintf(stderr, "adding new data backref"
8491 " on %llu %s %llu owner %llu"
8492 " offset %llu found %d\n",
8493 (unsigned long long)rec->start,
8494 back->full_backref ?
8496 back->full_backref ?
8497 (unsigned long long)parent :
8498 (unsigned long long)dback->root,
8499 (unsigned long long)dback->owner,
8500 (unsigned long long)dback->offset,
8504 struct tree_backref *tback;
8506 tback = to_tree_backref(back);
8507 if (back->full_backref)
8508 parent = tback->parent;
8512 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8513 rec->start, rec->max_size,
8514 parent, tback->root, 0, 0);
8515 fprintf(stderr, "adding new tree backref on "
8516 "start %llu len %llu parent %llu root %llu\n",
8517 rec->start, rec->max_size, parent, tback->root);
8520 btrfs_release_path(path);
8524 static struct extent_entry *find_entry(struct list_head *entries,
8525 u64 bytenr, u64 bytes)
8527 struct extent_entry *entry = NULL;
8529 list_for_each_entry(entry, entries, list) {
8530 if (entry->bytenr == bytenr && entry->bytes == bytes)
8537 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8539 struct extent_entry *entry, *best = NULL, *prev = NULL;
8541 list_for_each_entry(entry, entries, list) {
8543 * If there are as many broken entries as entries then we know
8544 * not to trust this particular entry.
8546 if (entry->broken == entry->count)
8550 * Special case, when there are only two entries and 'best' is
8560 * If our current entry == best then we can't be sure our best
8561 * is really the best, so we need to keep searching.
8563 if (best && best->count == entry->count) {
8569 /* Prev == entry, not good enough, have to keep searching */
8570 if (!prev->broken && prev->count == entry->count)
8574 best = (prev->count > entry->count) ? prev : entry;
8575 else if (best->count < entry->count)
8583 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8584 struct data_backref *dback, struct extent_entry *entry)
8586 struct btrfs_trans_handle *trans;
8587 struct btrfs_root *root;
8588 struct btrfs_file_extent_item *fi;
8589 struct extent_buffer *leaf;
8590 struct btrfs_key key;
8594 key.objectid = dback->root;
8595 key.type = BTRFS_ROOT_ITEM_KEY;
8596 key.offset = (u64)-1;
8597 root = btrfs_read_fs_root(info, &key);
8599 fprintf(stderr, "Couldn't find root for our ref\n");
8604 * The backref points to the original offset of the extent if it was
8605 * split, so we need to search down to the offset we have and then walk
8606 * forward until we find the backref we're looking for.
8608 key.objectid = dback->owner;
8609 key.type = BTRFS_EXTENT_DATA_KEY;
8610 key.offset = dback->offset;
8611 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8613 fprintf(stderr, "Error looking up ref %d\n", ret);
8618 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8619 ret = btrfs_next_leaf(root, path);
8621 fprintf(stderr, "Couldn't find our ref, next\n");
8625 leaf = path->nodes[0];
8626 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8627 if (key.objectid != dback->owner ||
8628 key.type != BTRFS_EXTENT_DATA_KEY) {
8629 fprintf(stderr, "Couldn't find our ref, search\n");
8632 fi = btrfs_item_ptr(leaf, path->slots[0],
8633 struct btrfs_file_extent_item);
8634 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8635 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8637 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8642 btrfs_release_path(path);
8644 trans = btrfs_start_transaction(root, 1);
8646 return PTR_ERR(trans);
8649 * Ok we have the key of the file extent we want to fix, now we can cow
8650 * down to the thing and fix it.
8652 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8654 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8655 key.objectid, key.type, key.offset, ret);
8659 fprintf(stderr, "Well that's odd, we just found this key "
8660 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8665 leaf = path->nodes[0];
8666 fi = btrfs_item_ptr(leaf, path->slots[0],
8667 struct btrfs_file_extent_item);
8669 if (btrfs_file_extent_compression(leaf, fi) &&
8670 dback->disk_bytenr != entry->bytenr) {
8671 fprintf(stderr, "Ref doesn't match the record start and is "
8672 "compressed, please take a btrfs-image of this file "
8673 "system and send it to a btrfs developer so they can "
8674 "complete this functionality for bytenr %Lu\n",
8675 dback->disk_bytenr);
8680 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8681 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8682 } else if (dback->disk_bytenr > entry->bytenr) {
8683 u64 off_diff, offset;
8685 off_diff = dback->disk_bytenr - entry->bytenr;
8686 offset = btrfs_file_extent_offset(leaf, fi);
8687 if (dback->disk_bytenr + offset +
8688 btrfs_file_extent_num_bytes(leaf, fi) >
8689 entry->bytenr + entry->bytes) {
8690 fprintf(stderr, "Ref is past the entry end, please "
8691 "take a btrfs-image of this file system and "
8692 "send it to a btrfs developer, ref %Lu\n",
8693 dback->disk_bytenr);
8698 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8699 btrfs_set_file_extent_offset(leaf, fi, offset);
8700 } else if (dback->disk_bytenr < entry->bytenr) {
8703 offset = btrfs_file_extent_offset(leaf, fi);
8704 if (dback->disk_bytenr + offset < entry->bytenr) {
8705 fprintf(stderr, "Ref is before the entry start, please"
8706 " take a btrfs-image of this file system and "
8707 "send it to a btrfs developer, ref %Lu\n",
8708 dback->disk_bytenr);
8713 offset += dback->disk_bytenr;
8714 offset -= entry->bytenr;
8715 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8716 btrfs_set_file_extent_offset(leaf, fi, offset);
8719 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8722 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8723 * only do this if we aren't using compression, otherwise it's a
8726 if (!btrfs_file_extent_compression(leaf, fi))
8727 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8729 printf("ram bytes may be wrong?\n");
8730 btrfs_mark_buffer_dirty(leaf);
8732 err = btrfs_commit_transaction(trans, root);
8733 btrfs_release_path(path);
8734 return ret ? ret : err;
8737 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8738 struct extent_record *rec)
8740 struct extent_backref *back, *tmp;
8741 struct data_backref *dback;
8742 struct extent_entry *entry, *best = NULL;
8745 int broken_entries = 0;
8750 * Metadata is easy and the backrefs should always agree on bytenr and
8751 * size, if not we've got bigger issues.
8756 rbtree_postorder_for_each_entry_safe(back, tmp,
8757 &rec->backref_tree, node) {
8758 if (back->full_backref || !back->is_data)
8761 dback = to_data_backref(back);
8764 * We only pay attention to backrefs that we found a real
8767 if (dback->found_ref == 0)
8771 * For now we only catch when the bytes don't match, not the
8772 * bytenr. We can easily do this at the same time, but I want
8773 * to have a fs image to test on before we just add repair
8774 * functionality willy-nilly so we know we won't screw up the
8778 entry = find_entry(&entries, dback->disk_bytenr,
8781 entry = malloc(sizeof(struct extent_entry));
8786 memset(entry, 0, sizeof(*entry));
8787 entry->bytenr = dback->disk_bytenr;
8788 entry->bytes = dback->bytes;
8789 list_add_tail(&entry->list, &entries);
8794 * If we only have on entry we may think the entries agree when
8795 * in reality they don't so we have to do some extra checking.
8797 if (dback->disk_bytenr != rec->start ||
8798 dback->bytes != rec->nr || back->broken)
8809 /* Yay all the backrefs agree, carry on good sir */
8810 if (nr_entries <= 1 && !mismatch)
8813 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8814 "%Lu\n", rec->start);
8817 * First we want to see if the backrefs can agree amongst themselves who
8818 * is right, so figure out which one of the entries has the highest
8821 best = find_most_right_entry(&entries);
8824 * Ok so we may have an even split between what the backrefs think, so
8825 * this is where we use the extent ref to see what it thinks.
8828 entry = find_entry(&entries, rec->start, rec->nr);
8829 if (!entry && (!broken_entries || !rec->found_rec)) {
8830 fprintf(stderr, "Backrefs don't agree with each other "
8831 "and extent record doesn't agree with anybody,"
8832 " so we can't fix bytenr %Lu bytes %Lu\n",
8833 rec->start, rec->nr);
8836 } else if (!entry) {
8838 * Ok our backrefs were broken, we'll assume this is the
8839 * correct value and add an entry for this range.
8841 entry = malloc(sizeof(struct extent_entry));
8846 memset(entry, 0, sizeof(*entry));
8847 entry->bytenr = rec->start;
8848 entry->bytes = rec->nr;
8849 list_add_tail(&entry->list, &entries);
8853 best = find_most_right_entry(&entries);
8855 fprintf(stderr, "Backrefs and extent record evenly "
8856 "split on who is right, this is going to "
8857 "require user input to fix bytenr %Lu bytes "
8858 "%Lu\n", rec->start, rec->nr);
8865 * I don't think this can happen currently as we'll abort() if we catch
8866 * this case higher up, but in case somebody removes that we still can't
8867 * deal with it properly here yet, so just bail out of that's the case.
8869 if (best->bytenr != rec->start) {
8870 fprintf(stderr, "Extent start and backref starts don't match, "
8871 "please use btrfs-image on this file system and send "
8872 "it to a btrfs developer so they can make fsck fix "
8873 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8874 rec->start, rec->nr);
8880 * Ok great we all agreed on an extent record, let's go find the real
8881 * references and fix up the ones that don't match.
8883 rbtree_postorder_for_each_entry_safe(back, tmp,
8884 &rec->backref_tree, node) {
8885 if (back->full_backref || !back->is_data)
8888 dback = to_data_backref(back);
8891 * Still ignoring backrefs that don't have a real ref attached
8894 if (dback->found_ref == 0)
8897 if (dback->bytes == best->bytes &&
8898 dback->disk_bytenr == best->bytenr)
8901 ret = repair_ref(info, path, dback, best);
8907 * Ok we messed with the actual refs, which means we need to drop our
8908 * entire cache and go back and rescan. I know this is a huge pain and
8909 * adds a lot of extra work, but it's the only way to be safe. Once all
8910 * the backrefs agree we may not need to do anything to the extent
8915 while (!list_empty(&entries)) {
8916 entry = list_entry(entries.next, struct extent_entry, list);
8917 list_del_init(&entry->list);
8923 static int process_duplicates(struct cache_tree *extent_cache,
8924 struct extent_record *rec)
8926 struct extent_record *good, *tmp;
8927 struct cache_extent *cache;
8931 * If we found a extent record for this extent then return, or if we
8932 * have more than one duplicate we are likely going to need to delete
8935 if (rec->found_rec || rec->num_duplicates > 1)
8938 /* Shouldn't happen but just in case */
8939 BUG_ON(!rec->num_duplicates);
8942 * So this happens if we end up with a backref that doesn't match the
8943 * actual extent entry. So either the backref is bad or the extent
8944 * entry is bad. Either way we want to have the extent_record actually
8945 * reflect what we found in the extent_tree, so we need to take the
8946 * duplicate out and use that as the extent_record since the only way we
8947 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8949 remove_cache_extent(extent_cache, &rec->cache);
8951 good = to_extent_record(rec->dups.next);
8952 list_del_init(&good->list);
8953 INIT_LIST_HEAD(&good->backrefs);
8954 INIT_LIST_HEAD(&good->dups);
8955 good->cache.start = good->start;
8956 good->cache.size = good->nr;
8957 good->content_checked = 0;
8958 good->owner_ref_checked = 0;
8959 good->num_duplicates = 0;
8960 good->refs = rec->refs;
8961 list_splice_init(&rec->backrefs, &good->backrefs);
8963 cache = lookup_cache_extent(extent_cache, good->start,
8967 tmp = container_of(cache, struct extent_record, cache);
8970 * If we find another overlapping extent and it's found_rec is
8971 * set then it's a duplicate and we need to try and delete
8974 if (tmp->found_rec || tmp->num_duplicates > 0) {
8975 if (list_empty(&good->list))
8976 list_add_tail(&good->list,
8977 &duplicate_extents);
8978 good->num_duplicates += tmp->num_duplicates + 1;
8979 list_splice_init(&tmp->dups, &good->dups);
8980 list_del_init(&tmp->list);
8981 list_add_tail(&tmp->list, &good->dups);
8982 remove_cache_extent(extent_cache, &tmp->cache);
8987 * Ok we have another non extent item backed extent rec, so lets
8988 * just add it to this extent and carry on like we did above.
8990 good->refs += tmp->refs;
8991 list_splice_init(&tmp->backrefs, &good->backrefs);
8992 remove_cache_extent(extent_cache, &tmp->cache);
8995 ret = insert_cache_extent(extent_cache, &good->cache);
8998 return good->num_duplicates ? 0 : 1;
9001 static int delete_duplicate_records(struct btrfs_root *root,
9002 struct extent_record *rec)
9004 struct btrfs_trans_handle *trans;
9005 LIST_HEAD(delete_list);
9006 struct btrfs_path path;
9007 struct extent_record *tmp, *good, *n;
9010 struct btrfs_key key;
9012 btrfs_init_path(&path);
9015 /* Find the record that covers all of the duplicates. */
9016 list_for_each_entry(tmp, &rec->dups, list) {
9017 if (good->start < tmp->start)
9019 if (good->nr > tmp->nr)
9022 if (tmp->start + tmp->nr < good->start + good->nr) {
9023 fprintf(stderr, "Ok we have overlapping extents that "
9024 "aren't completely covered by each other, this "
9025 "is going to require more careful thought. "
9026 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9027 tmp->start, tmp->nr, good->start, good->nr);
9034 list_add_tail(&rec->list, &delete_list);
9036 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9039 list_move_tail(&tmp->list, &delete_list);
9042 root = root->fs_info->extent_root;
9043 trans = btrfs_start_transaction(root, 1);
9044 if (IS_ERR(trans)) {
9045 ret = PTR_ERR(trans);
9049 list_for_each_entry(tmp, &delete_list, list) {
9050 if (tmp->found_rec == 0)
9052 key.objectid = tmp->start;
9053 key.type = BTRFS_EXTENT_ITEM_KEY;
9054 key.offset = tmp->nr;
9056 /* Shouldn't happen but just in case */
9057 if (tmp->metadata) {
9058 fprintf(stderr, "Well this shouldn't happen, extent "
9059 "record overlaps but is metadata? "
9060 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9064 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9070 ret = btrfs_del_item(trans, root, &path);
9073 btrfs_release_path(&path);
9076 err = btrfs_commit_transaction(trans, root);
9080 while (!list_empty(&delete_list)) {
9081 tmp = to_extent_record(delete_list.next);
9082 list_del_init(&tmp->list);
9088 while (!list_empty(&rec->dups)) {
9089 tmp = to_extent_record(rec->dups.next);
9090 list_del_init(&tmp->list);
9094 btrfs_release_path(&path);
9096 if (!ret && !nr_del)
9097 rec->num_duplicates = 0;
9099 return ret ? ret : nr_del;
9102 static int find_possible_backrefs(struct btrfs_fs_info *info,
9103 struct btrfs_path *path,
9104 struct cache_tree *extent_cache,
9105 struct extent_record *rec)
9107 struct btrfs_root *root;
9108 struct extent_backref *back, *tmp;
9109 struct data_backref *dback;
9110 struct cache_extent *cache;
9111 struct btrfs_file_extent_item *fi;
9112 struct btrfs_key key;
9116 rbtree_postorder_for_each_entry_safe(back, tmp,
9117 &rec->backref_tree, node) {
9118 /* Don't care about full backrefs (poor unloved backrefs) */
9119 if (back->full_backref || !back->is_data)
9122 dback = to_data_backref(back);
9124 /* We found this one, we don't need to do a lookup */
9125 if (dback->found_ref)
9128 key.objectid = dback->root;
9129 key.type = BTRFS_ROOT_ITEM_KEY;
9130 key.offset = (u64)-1;
9132 root = btrfs_read_fs_root(info, &key);
9134 /* No root, definitely a bad ref, skip */
9135 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9137 /* Other err, exit */
9139 return PTR_ERR(root);
9141 key.objectid = dback->owner;
9142 key.type = BTRFS_EXTENT_DATA_KEY;
9143 key.offset = dback->offset;
9144 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9146 btrfs_release_path(path);
9149 /* Didn't find it, we can carry on */
9154 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9155 struct btrfs_file_extent_item);
9156 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9157 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9158 btrfs_release_path(path);
9159 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9161 struct extent_record *tmp;
9162 tmp = container_of(cache, struct extent_record, cache);
9165 * If we found an extent record for the bytenr for this
9166 * particular backref then we can't add it to our
9167 * current extent record. We only want to add backrefs
9168 * that don't have a corresponding extent item in the
9169 * extent tree since they likely belong to this record
9170 * and we need to fix it if it doesn't match bytenrs.
9176 dback->found_ref += 1;
9177 dback->disk_bytenr = bytenr;
9178 dback->bytes = bytes;
9181 * Set this so the verify backref code knows not to trust the
9182 * values in this backref.
9191 * Record orphan data ref into corresponding root.
9193 * Return 0 if the extent item contains data ref and recorded.
9194 * Return 1 if the extent item contains no useful data ref
9195 * On that case, it may contains only shared_dataref or metadata backref
9196 * or the file extent exists(this should be handled by the extent bytenr
9198 * Return <0 if something goes wrong.
9200 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9201 struct extent_record *rec)
9203 struct btrfs_key key;
9204 struct btrfs_root *dest_root;
9205 struct extent_backref *back, *tmp;
9206 struct data_backref *dback;
9207 struct orphan_data_extent *orphan;
9208 struct btrfs_path path;
9209 int recorded_data_ref = 0;
9214 btrfs_init_path(&path);
9215 rbtree_postorder_for_each_entry_safe(back, tmp,
9216 &rec->backref_tree, node) {
9217 if (back->full_backref || !back->is_data ||
9218 !back->found_extent_tree)
9220 dback = to_data_backref(back);
9221 if (dback->found_ref)
9223 key.objectid = dback->root;
9224 key.type = BTRFS_ROOT_ITEM_KEY;
9225 key.offset = (u64)-1;
9227 dest_root = btrfs_read_fs_root(fs_info, &key);
9229 /* For non-exist root we just skip it */
9230 if (IS_ERR(dest_root) || !dest_root)
9233 key.objectid = dback->owner;
9234 key.type = BTRFS_EXTENT_DATA_KEY;
9235 key.offset = dback->offset;
9237 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9238 btrfs_release_path(&path);
9240 * For ret < 0, it's OK since the fs-tree may be corrupted,
9241 * we need to record it for inode/file extent rebuild.
9242 * For ret > 0, we record it only for file extent rebuild.
9243 * For ret == 0, the file extent exists but only bytenr
9244 * mismatch, let the original bytenr fix routine to handle,
9250 orphan = malloc(sizeof(*orphan));
9255 INIT_LIST_HEAD(&orphan->list);
9256 orphan->root = dback->root;
9257 orphan->objectid = dback->owner;
9258 orphan->offset = dback->offset;
9259 orphan->disk_bytenr = rec->cache.start;
9260 orphan->disk_len = rec->cache.size;
9261 list_add(&dest_root->orphan_data_extents, &orphan->list);
9262 recorded_data_ref = 1;
9265 btrfs_release_path(&path);
9267 return !recorded_data_ref;
9273 * when an incorrect extent item is found, this will delete
9274 * all of the existing entries for it and recreate them
9275 * based on what the tree scan found.
9277 static int fixup_extent_refs(struct btrfs_fs_info *info,
9278 struct cache_tree *extent_cache,
9279 struct extent_record *rec)
9281 struct btrfs_trans_handle *trans = NULL;
9283 struct btrfs_path path;
9284 struct cache_extent *cache;
9285 struct extent_backref *back, *tmp;
9289 if (rec->flag_block_full_backref)
9290 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9292 btrfs_init_path(&path);
9293 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9295 * Sometimes the backrefs themselves are so broken they don't
9296 * get attached to any meaningful rec, so first go back and
9297 * check any of our backrefs that we couldn't find and throw
9298 * them into the list if we find the backref so that
9299 * verify_backrefs can figure out what to do.
9301 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9306 /* step one, make sure all of the backrefs agree */
9307 ret = verify_backrefs(info, &path, rec);
9311 trans = btrfs_start_transaction(info->extent_root, 1);
9312 if (IS_ERR(trans)) {
9313 ret = PTR_ERR(trans);
9317 /* step two, delete all the existing records */
9318 ret = delete_extent_records(trans, info->extent_root, &path,
9324 /* was this block corrupt? If so, don't add references to it */
9325 cache = lookup_cache_extent(info->corrupt_blocks,
9326 rec->start, rec->max_size);
9332 /* step three, recreate all the refs we did find */
9333 rbtree_postorder_for_each_entry_safe(back, tmp,
9334 &rec->backref_tree, node) {
9336 * if we didn't find any references, don't create a
9339 if (!back->found_ref)
9342 rec->bad_full_backref = 0;
9343 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9351 int err = btrfs_commit_transaction(trans, info->extent_root);
9357 fprintf(stderr, "Repaired extent references for %llu\n",
9358 (unsigned long long)rec->start);
9360 btrfs_release_path(&path);
9364 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9365 struct extent_record *rec)
9367 struct btrfs_trans_handle *trans;
9368 struct btrfs_root *root = fs_info->extent_root;
9369 struct btrfs_path path;
9370 struct btrfs_extent_item *ei;
9371 struct btrfs_key key;
9375 key.objectid = rec->start;
9376 if (rec->metadata) {
9377 key.type = BTRFS_METADATA_ITEM_KEY;
9378 key.offset = rec->info_level;
9380 key.type = BTRFS_EXTENT_ITEM_KEY;
9381 key.offset = rec->max_size;
9384 trans = btrfs_start_transaction(root, 0);
9386 return PTR_ERR(trans);
9388 btrfs_init_path(&path);
9389 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9391 btrfs_release_path(&path);
9392 btrfs_commit_transaction(trans, root);
9395 fprintf(stderr, "Didn't find extent for %llu\n",
9396 (unsigned long long)rec->start);
9397 btrfs_release_path(&path);
9398 btrfs_commit_transaction(trans, root);
9402 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9403 struct btrfs_extent_item);
9404 flags = btrfs_extent_flags(path.nodes[0], ei);
9405 if (rec->flag_block_full_backref) {
9406 fprintf(stderr, "setting full backref on %llu\n",
9407 (unsigned long long)key.objectid);
9408 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9410 fprintf(stderr, "clearing full backref on %llu\n",
9411 (unsigned long long)key.objectid);
9412 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9414 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9415 btrfs_mark_buffer_dirty(path.nodes[0]);
9416 btrfs_release_path(&path);
9417 ret = btrfs_commit_transaction(trans, root);
9419 fprintf(stderr, "Repaired extent flags for %llu\n",
9420 (unsigned long long)rec->start);
9425 /* right now we only prune from the extent allocation tree */
9426 static int prune_one_block(struct btrfs_trans_handle *trans,
9427 struct btrfs_fs_info *info,
9428 struct btrfs_corrupt_block *corrupt)
9431 struct btrfs_path path;
9432 struct extent_buffer *eb;
9436 int level = corrupt->level + 1;
9438 btrfs_init_path(&path);
9440 /* we want to stop at the parent to our busted block */
9441 path.lowest_level = level;
9443 ret = btrfs_search_slot(trans, info->extent_root,
9444 &corrupt->key, &path, -1, 1);
9449 eb = path.nodes[level];
9456 * hopefully the search gave us the block we want to prune,
9457 * lets try that first
9459 slot = path.slots[level];
9460 found = btrfs_node_blockptr(eb, slot);
9461 if (found == corrupt->cache.start)
9464 nritems = btrfs_header_nritems(eb);
9466 /* the search failed, lets scan this node and hope we find it */
9467 for (slot = 0; slot < nritems; slot++) {
9468 found = btrfs_node_blockptr(eb, slot);
9469 if (found == corrupt->cache.start)
9473 * we couldn't find the bad block. TODO, search all the nodes for pointers
9476 if (eb == info->extent_root->node) {
9481 btrfs_release_path(&path);
9486 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9487 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9490 btrfs_release_path(&path);
9494 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9496 struct btrfs_trans_handle *trans = NULL;
9497 struct cache_extent *cache;
9498 struct btrfs_corrupt_block *corrupt;
9501 cache = search_cache_extent(info->corrupt_blocks, 0);
9505 trans = btrfs_start_transaction(info->extent_root, 1);
9507 return PTR_ERR(trans);
9509 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9510 prune_one_block(trans, info, corrupt);
9511 remove_cache_extent(info->corrupt_blocks, cache);
9514 return btrfs_commit_transaction(trans, info->extent_root);
9518 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9520 struct btrfs_block_group_cache *cache;
9525 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9526 &start, &end, EXTENT_DIRTY);
9529 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9534 cache = btrfs_lookup_first_block_group(fs_info, start);
9539 start = cache->key.objectid + cache->key.offset;
9543 static int check_extent_refs(struct btrfs_root *root,
9544 struct cache_tree *extent_cache)
9546 struct extent_record *rec;
9547 struct cache_extent *cache;
9553 * if we're doing a repair, we have to make sure
9554 * we don't allocate from the problem extents.
9555 * In the worst case, this will be all the
9558 cache = search_cache_extent(extent_cache, 0);
9560 rec = container_of(cache, struct extent_record, cache);
9561 set_extent_dirty(root->fs_info->excluded_extents,
9563 rec->start + rec->max_size - 1);
9564 cache = next_cache_extent(cache);
9567 /* pin down all the corrupted blocks too */
9568 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9570 set_extent_dirty(root->fs_info->excluded_extents,
9572 cache->start + cache->size - 1);
9573 cache = next_cache_extent(cache);
9575 prune_corrupt_blocks(root->fs_info);
9576 reset_cached_block_groups(root->fs_info);
9579 reset_cached_block_groups(root->fs_info);
9582 * We need to delete any duplicate entries we find first otherwise we
9583 * could mess up the extent tree when we have backrefs that actually
9584 * belong to a different extent item and not the weird duplicate one.
9586 while (repair && !list_empty(&duplicate_extents)) {
9587 rec = to_extent_record(duplicate_extents.next);
9588 list_del_init(&rec->list);
9590 /* Sometimes we can find a backref before we find an actual
9591 * extent, so we need to process it a little bit to see if there
9592 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9593 * if this is a backref screwup. If we need to delete stuff
9594 * process_duplicates() will return 0, otherwise it will return
9597 if (process_duplicates(extent_cache, rec))
9599 ret = delete_duplicate_records(root, rec);
9603 * delete_duplicate_records will return the number of entries
9604 * deleted, so if it's greater than 0 then we know we actually
9605 * did something and we need to remove.
9618 cache = search_cache_extent(extent_cache, 0);
9621 rec = container_of(cache, struct extent_record, cache);
9622 if (rec->num_duplicates) {
9623 fprintf(stderr, "extent item %llu has multiple extent "
9624 "items\n", (unsigned long long)rec->start);
9628 if (rec->refs != rec->extent_item_refs) {
9629 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9630 (unsigned long long)rec->start,
9631 (unsigned long long)rec->nr);
9632 fprintf(stderr, "extent item %llu, found %llu\n",
9633 (unsigned long long)rec->extent_item_refs,
9634 (unsigned long long)rec->refs);
9635 ret = record_orphan_data_extents(root->fs_info, rec);
9641 if (all_backpointers_checked(rec, 1)) {
9642 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9643 (unsigned long long)rec->start,
9644 (unsigned long long)rec->nr);
9648 if (!rec->owner_ref_checked) {
9649 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9650 (unsigned long long)rec->start,
9651 (unsigned long long)rec->nr);
9656 if (repair && fix) {
9657 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9663 if (rec->bad_full_backref) {
9664 fprintf(stderr, "bad full backref, on [%llu]\n",
9665 (unsigned long long)rec->start);
9667 ret = fixup_extent_flags(root->fs_info, rec);
9675 * Although it's not a extent ref's problem, we reuse this
9676 * routine for error reporting.
9677 * No repair function yet.
9679 if (rec->crossing_stripes) {
9681 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9682 rec->start, rec->start + rec->max_size);
9686 if (rec->wrong_chunk_type) {
9688 "bad extent [%llu, %llu), type mismatch with chunk\n",
9689 rec->start, rec->start + rec->max_size);
9693 remove_cache_extent(extent_cache, cache);
9694 free_all_extent_backrefs(rec);
9695 if (!init_extent_tree && repair && (!cur_err || fix))
9696 clear_extent_dirty(root->fs_info->excluded_extents,
9698 rec->start + rec->max_size - 1);
9703 if (ret && ret != -EAGAIN) {
9704 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9707 struct btrfs_trans_handle *trans;
9709 root = root->fs_info->extent_root;
9710 trans = btrfs_start_transaction(root, 1);
9711 if (IS_ERR(trans)) {
9712 ret = PTR_ERR(trans);
9716 ret = btrfs_fix_block_accounting(trans, root);
9719 ret = btrfs_commit_transaction(trans, root);
9728 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9732 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9733 stripe_size = length;
9734 stripe_size /= num_stripes;
9735 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9736 stripe_size = length * 2;
9737 stripe_size /= num_stripes;
9738 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9739 stripe_size = length;
9740 stripe_size /= (num_stripes - 1);
9741 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9742 stripe_size = length;
9743 stripe_size /= (num_stripes - 2);
9745 stripe_size = length;
9751 * Check the chunk with its block group/dev list ref:
9752 * Return 0 if all refs seems valid.
9753 * Return 1 if part of refs seems valid, need later check for rebuild ref
9754 * like missing block group and needs to search extent tree to rebuild them.
9755 * Return -1 if essential refs are missing and unable to rebuild.
9757 static int check_chunk_refs(struct chunk_record *chunk_rec,
9758 struct block_group_tree *block_group_cache,
9759 struct device_extent_tree *dev_extent_cache,
9762 struct cache_extent *block_group_item;
9763 struct block_group_record *block_group_rec;
9764 struct cache_extent *dev_extent_item;
9765 struct device_extent_record *dev_extent_rec;
9769 int metadump_v2 = 0;
9773 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9776 if (block_group_item) {
9777 block_group_rec = container_of(block_group_item,
9778 struct block_group_record,
9780 if (chunk_rec->length != block_group_rec->offset ||
9781 chunk_rec->offset != block_group_rec->objectid ||
9783 chunk_rec->type_flags != block_group_rec->flags)) {
9786 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9787 chunk_rec->objectid,
9792 chunk_rec->type_flags,
9793 block_group_rec->objectid,
9794 block_group_rec->type,
9795 block_group_rec->offset,
9796 block_group_rec->offset,
9797 block_group_rec->objectid,
9798 block_group_rec->flags);
9801 list_del_init(&block_group_rec->list);
9802 chunk_rec->bg_rec = block_group_rec;
9807 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9808 chunk_rec->objectid,
9813 chunk_rec->type_flags);
9820 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9821 chunk_rec->num_stripes);
9822 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9823 devid = chunk_rec->stripes[i].devid;
9824 offset = chunk_rec->stripes[i].offset;
9825 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9826 devid, offset, length);
9827 if (dev_extent_item) {
9828 dev_extent_rec = container_of(dev_extent_item,
9829 struct device_extent_record,
9831 if (dev_extent_rec->objectid != devid ||
9832 dev_extent_rec->offset != offset ||
9833 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9834 dev_extent_rec->length != length) {
9837 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9838 chunk_rec->objectid,
9841 chunk_rec->stripes[i].devid,
9842 chunk_rec->stripes[i].offset,
9843 dev_extent_rec->objectid,
9844 dev_extent_rec->offset,
9845 dev_extent_rec->length);
9848 list_move(&dev_extent_rec->chunk_list,
9849 &chunk_rec->dextents);
9854 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9855 chunk_rec->objectid,
9858 chunk_rec->stripes[i].devid,
9859 chunk_rec->stripes[i].offset);
9866 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9867 int check_chunks(struct cache_tree *chunk_cache,
9868 struct block_group_tree *block_group_cache,
9869 struct device_extent_tree *dev_extent_cache,
9870 struct list_head *good, struct list_head *bad,
9871 struct list_head *rebuild, int silent)
9873 struct cache_extent *chunk_item;
9874 struct chunk_record *chunk_rec;
9875 struct block_group_record *bg_rec;
9876 struct device_extent_record *dext_rec;
9880 chunk_item = first_cache_extent(chunk_cache);
9881 while (chunk_item) {
9882 chunk_rec = container_of(chunk_item, struct chunk_record,
9884 err = check_chunk_refs(chunk_rec, block_group_cache,
9885 dev_extent_cache, silent);
9888 if (err == 0 && good)
9889 list_add_tail(&chunk_rec->list, good);
9890 if (err > 0 && rebuild)
9891 list_add_tail(&chunk_rec->list, rebuild);
9893 list_add_tail(&chunk_rec->list, bad);
9894 chunk_item = next_cache_extent(chunk_item);
9897 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9900 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9908 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9912 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9923 static int check_device_used(struct device_record *dev_rec,
9924 struct device_extent_tree *dext_cache)
9926 struct cache_extent *cache;
9927 struct device_extent_record *dev_extent_rec;
9930 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9932 dev_extent_rec = container_of(cache,
9933 struct device_extent_record,
9935 if (dev_extent_rec->objectid != dev_rec->devid)
9938 list_del_init(&dev_extent_rec->device_list);
9939 total_byte += dev_extent_rec->length;
9940 cache = next_cache_extent(cache);
9943 if (total_byte != dev_rec->byte_used) {
9945 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9946 total_byte, dev_rec->byte_used, dev_rec->objectid,
9947 dev_rec->type, dev_rec->offset);
9954 /* check btrfs_dev_item -> btrfs_dev_extent */
9955 static int check_devices(struct rb_root *dev_cache,
9956 struct device_extent_tree *dev_extent_cache)
9958 struct rb_node *dev_node;
9959 struct device_record *dev_rec;
9960 struct device_extent_record *dext_rec;
9964 dev_node = rb_first(dev_cache);
9966 dev_rec = container_of(dev_node, struct device_record, node);
9967 err = check_device_used(dev_rec, dev_extent_cache);
9971 dev_node = rb_next(dev_node);
9973 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9976 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9977 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9984 static int add_root_item_to_list(struct list_head *head,
9985 u64 objectid, u64 bytenr, u64 last_snapshot,
9986 u8 level, u8 drop_level,
9987 struct btrfs_key *drop_key)
9990 struct root_item_record *ri_rec;
9991 ri_rec = malloc(sizeof(*ri_rec));
9994 ri_rec->bytenr = bytenr;
9995 ri_rec->objectid = objectid;
9996 ri_rec->level = level;
9997 ri_rec->drop_level = drop_level;
9998 ri_rec->last_snapshot = last_snapshot;
10000 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10001 list_add_tail(&ri_rec->list, head);
10006 static void free_root_item_list(struct list_head *list)
10008 struct root_item_record *ri_rec;
10010 while (!list_empty(list)) {
10011 ri_rec = list_first_entry(list, struct root_item_record,
10013 list_del_init(&ri_rec->list);
10018 static int deal_root_from_list(struct list_head *list,
10019 struct btrfs_root *root,
10020 struct block_info *bits,
10022 struct cache_tree *pending,
10023 struct cache_tree *seen,
10024 struct cache_tree *reada,
10025 struct cache_tree *nodes,
10026 struct cache_tree *extent_cache,
10027 struct cache_tree *chunk_cache,
10028 struct rb_root *dev_cache,
10029 struct block_group_tree *block_group_cache,
10030 struct device_extent_tree *dev_extent_cache)
10035 while (!list_empty(list)) {
10036 struct root_item_record *rec;
10037 struct extent_buffer *buf;
10038 rec = list_entry(list->next,
10039 struct root_item_record, list);
10041 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10042 if (!extent_buffer_uptodate(buf)) {
10043 free_extent_buffer(buf);
10047 ret = add_root_to_pending(buf, extent_cache, pending,
10048 seen, nodes, rec->objectid);
10052 * To rebuild extent tree, we need deal with snapshot
10053 * one by one, otherwise we deal with node firstly which
10054 * can maximize readahead.
10057 ret = run_next_block(root, bits, bits_nr, &last,
10058 pending, seen, reada, nodes,
10059 extent_cache, chunk_cache,
10060 dev_cache, block_group_cache,
10061 dev_extent_cache, rec);
10065 free_extent_buffer(buf);
10066 list_del(&rec->list);
10072 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10073 reada, nodes, extent_cache, chunk_cache,
10074 dev_cache, block_group_cache,
10075 dev_extent_cache, NULL);
10085 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10087 struct rb_root dev_cache;
10088 struct cache_tree chunk_cache;
10089 struct block_group_tree block_group_cache;
10090 struct device_extent_tree dev_extent_cache;
10091 struct cache_tree extent_cache;
10092 struct cache_tree seen;
10093 struct cache_tree pending;
10094 struct cache_tree reada;
10095 struct cache_tree nodes;
10096 struct extent_io_tree excluded_extents;
10097 struct cache_tree corrupt_blocks;
10098 struct btrfs_path path;
10099 struct btrfs_key key;
10100 struct btrfs_key found_key;
10102 struct block_info *bits;
10104 struct extent_buffer *leaf;
10106 struct btrfs_root_item ri;
10107 struct list_head dropping_trees;
10108 struct list_head normal_trees;
10109 struct btrfs_root *root1;
10110 struct btrfs_root *root;
10114 root = fs_info->fs_root;
10115 dev_cache = RB_ROOT;
10116 cache_tree_init(&chunk_cache);
10117 block_group_tree_init(&block_group_cache);
10118 device_extent_tree_init(&dev_extent_cache);
10120 cache_tree_init(&extent_cache);
10121 cache_tree_init(&seen);
10122 cache_tree_init(&pending);
10123 cache_tree_init(&nodes);
10124 cache_tree_init(&reada);
10125 cache_tree_init(&corrupt_blocks);
10126 extent_io_tree_init(&excluded_extents);
10127 INIT_LIST_HEAD(&dropping_trees);
10128 INIT_LIST_HEAD(&normal_trees);
10131 fs_info->excluded_extents = &excluded_extents;
10132 fs_info->fsck_extent_cache = &extent_cache;
10133 fs_info->free_extent_hook = free_extent_hook;
10134 fs_info->corrupt_blocks = &corrupt_blocks;
10138 bits = malloc(bits_nr * sizeof(struct block_info));
10144 if (ctx.progress_enabled) {
10145 ctx.tp = TASK_EXTENTS;
10146 task_start(ctx.info);
10150 root1 = fs_info->tree_root;
10151 level = btrfs_header_level(root1->node);
10152 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10153 root1->node->start, 0, level, 0, NULL);
10156 root1 = fs_info->chunk_root;
10157 level = btrfs_header_level(root1->node);
10158 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10159 root1->node->start, 0, level, 0, NULL);
10162 btrfs_init_path(&path);
10165 key.type = BTRFS_ROOT_ITEM_KEY;
10166 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10170 leaf = path.nodes[0];
10171 slot = path.slots[0];
10172 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10173 ret = btrfs_next_leaf(root, &path);
10176 leaf = path.nodes[0];
10177 slot = path.slots[0];
10179 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10180 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10181 unsigned long offset;
10184 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10185 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10186 last_snapshot = btrfs_root_last_snapshot(&ri);
10187 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10188 level = btrfs_root_level(&ri);
10189 ret = add_root_item_to_list(&normal_trees,
10190 found_key.objectid,
10191 btrfs_root_bytenr(&ri),
10192 last_snapshot, level,
10197 level = btrfs_root_level(&ri);
10198 objectid = found_key.objectid;
10199 btrfs_disk_key_to_cpu(&found_key,
10200 &ri.drop_progress);
10201 ret = add_root_item_to_list(&dropping_trees,
10203 btrfs_root_bytenr(&ri),
10204 last_snapshot, level,
10205 ri.drop_level, &found_key);
10212 btrfs_release_path(&path);
10215 * check_block can return -EAGAIN if it fixes something, please keep
10216 * this in mind when dealing with return values from these functions, if
10217 * we get -EAGAIN we want to fall through and restart the loop.
10219 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10220 &seen, &reada, &nodes, &extent_cache,
10221 &chunk_cache, &dev_cache, &block_group_cache,
10222 &dev_extent_cache);
10224 if (ret == -EAGAIN)
10228 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10229 &pending, &seen, &reada, &nodes,
10230 &extent_cache, &chunk_cache, &dev_cache,
10231 &block_group_cache, &dev_extent_cache);
10233 if (ret == -EAGAIN)
10238 ret = check_chunks(&chunk_cache, &block_group_cache,
10239 &dev_extent_cache, NULL, NULL, NULL, 0);
10241 if (ret == -EAGAIN)
10246 ret = check_extent_refs(root, &extent_cache);
10248 if (ret == -EAGAIN)
10253 ret = check_devices(&dev_cache, &dev_extent_cache);
10258 task_stop(ctx.info);
10260 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10261 extent_io_tree_cleanup(&excluded_extents);
10262 fs_info->fsck_extent_cache = NULL;
10263 fs_info->free_extent_hook = NULL;
10264 fs_info->corrupt_blocks = NULL;
10265 fs_info->excluded_extents = NULL;
10268 free_chunk_cache_tree(&chunk_cache);
10269 free_device_cache_tree(&dev_cache);
10270 free_block_group_tree(&block_group_cache);
10271 free_device_extent_tree(&dev_extent_cache);
10272 free_extent_cache_tree(&seen);
10273 free_extent_cache_tree(&pending);
10274 free_extent_cache_tree(&reada);
10275 free_extent_cache_tree(&nodes);
10276 free_root_item_list(&normal_trees);
10277 free_root_item_list(&dropping_trees);
10280 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10281 free_extent_cache_tree(&seen);
10282 free_extent_cache_tree(&pending);
10283 free_extent_cache_tree(&reada);
10284 free_extent_cache_tree(&nodes);
10285 free_chunk_cache_tree(&chunk_cache);
10286 free_block_group_tree(&block_group_cache);
10287 free_device_cache_tree(&dev_cache);
10288 free_device_extent_tree(&dev_extent_cache);
10289 free_extent_record_cache(&extent_cache);
10290 free_root_item_list(&normal_trees);
10291 free_root_item_list(&dropping_trees);
10292 extent_io_tree_cleanup(&excluded_extents);
10297 * Check backrefs of a tree block given by @bytenr or @eb.
10299 * @root: the root containing the @bytenr or @eb
10300 * @eb: tree block extent buffer, can be NULL
10301 * @bytenr: bytenr of the tree block to search
10302 * @level: tree level of the tree block
10303 * @owner: owner of the tree block
10305 * Return >0 for any error found and output error message
10306 * Return 0 for no error found
10308 static int check_tree_block_ref(struct btrfs_root *root,
10309 struct extent_buffer *eb, u64 bytenr,
10310 int level, u64 owner)
10312 struct btrfs_key key;
10313 struct btrfs_root *extent_root = root->fs_info->extent_root;
10314 struct btrfs_path path;
10315 struct btrfs_extent_item *ei;
10316 struct btrfs_extent_inline_ref *iref;
10317 struct extent_buffer *leaf;
10323 u32 nodesize = root->fs_info->nodesize;
10326 int tree_reloc_root = 0;
10331 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10332 btrfs_header_bytenr(root->node) == bytenr)
10333 tree_reloc_root = 1;
10335 btrfs_init_path(&path);
10336 key.objectid = bytenr;
10337 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10338 key.type = BTRFS_METADATA_ITEM_KEY;
10340 key.type = BTRFS_EXTENT_ITEM_KEY;
10341 key.offset = (u64)-1;
10343 /* Search for the backref in extent tree */
10344 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10346 err |= BACKREF_MISSING;
10349 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10351 err |= BACKREF_MISSING;
10355 leaf = path.nodes[0];
10356 slot = path.slots[0];
10357 btrfs_item_key_to_cpu(leaf, &key, slot);
10359 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10361 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10362 skinny_level = (int)key.offset;
10363 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10365 struct btrfs_tree_block_info *info;
10367 info = (struct btrfs_tree_block_info *)(ei + 1);
10368 skinny_level = btrfs_tree_block_level(leaf, info);
10369 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10376 if (!(btrfs_extent_flags(leaf, ei) &
10377 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10379 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10380 key.objectid, nodesize,
10381 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10382 err = BACKREF_MISMATCH;
10384 header_gen = btrfs_header_generation(eb);
10385 extent_gen = btrfs_extent_generation(leaf, ei);
10386 if (header_gen != extent_gen) {
10388 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10389 key.objectid, nodesize, header_gen,
10391 err = BACKREF_MISMATCH;
10393 if (level != skinny_level) {
10395 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10396 key.objectid, nodesize, level, skinny_level);
10397 err = BACKREF_MISMATCH;
10399 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10401 "extent[%llu %u] is referred by other roots than %llu",
10402 key.objectid, nodesize, root->objectid);
10403 err = BACKREF_MISMATCH;
10408 * Iterate the extent/metadata item to find the exact backref
10410 item_size = btrfs_item_size_nr(leaf, slot);
10411 ptr = (unsigned long)iref;
10412 end = (unsigned long)ei + item_size;
10413 while (ptr < end) {
10414 iref = (struct btrfs_extent_inline_ref *)ptr;
10415 type = btrfs_extent_inline_ref_type(leaf, iref);
10416 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10418 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10419 (offset == root->objectid || offset == owner)) {
10421 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10423 * Backref of tree reloc root points to itself, no need
10424 * to check backref any more.
10426 if (tree_reloc_root)
10429 /* Check if the backref points to valid referencer */
10430 found_ref = !check_tree_block_ref(root, NULL,
10431 offset, level + 1, owner);
10436 ptr += btrfs_extent_inline_ref_size(type);
10440 * Inlined extent item doesn't have what we need, check
10441 * TREE_BLOCK_REF_KEY
10444 btrfs_release_path(&path);
10445 key.objectid = bytenr;
10446 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10447 key.offset = root->objectid;
10449 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10454 err |= BACKREF_MISSING;
10456 btrfs_release_path(&path);
10457 if (eb && (err & BACKREF_MISSING))
10458 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10459 bytenr, nodesize, owner, level);
10464 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10466 * Return >0 any error found and output error message
10467 * Return 0 for no error found
10469 static int check_extent_data_item(struct btrfs_root *root,
10470 struct extent_buffer *eb, int slot)
10472 struct btrfs_file_extent_item *fi;
10473 struct btrfs_path path;
10474 struct btrfs_root *extent_root = root->fs_info->extent_root;
10475 struct btrfs_key fi_key;
10476 struct btrfs_key dbref_key;
10477 struct extent_buffer *leaf;
10478 struct btrfs_extent_item *ei;
10479 struct btrfs_extent_inline_ref *iref;
10480 struct btrfs_extent_data_ref *dref;
10483 u64 disk_num_bytes;
10484 u64 extent_num_bytes;
10491 int found_dbackref = 0;
10495 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10496 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10498 /* Nothing to check for hole and inline data extents */
10499 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10500 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10503 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10504 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10505 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10507 /* Check unaligned disk_num_bytes and num_bytes */
10508 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10510 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10511 fi_key.objectid, fi_key.offset, disk_num_bytes,
10512 root->fs_info->sectorsize);
10513 err |= BYTES_UNALIGNED;
10515 data_bytes_allocated += disk_num_bytes;
10517 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10519 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10520 fi_key.objectid, fi_key.offset, extent_num_bytes,
10521 root->fs_info->sectorsize);
10522 err |= BYTES_UNALIGNED;
10524 data_bytes_referenced += extent_num_bytes;
10526 owner = btrfs_header_owner(eb);
10528 /* Check the extent item of the file extent in extent tree */
10529 btrfs_init_path(&path);
10530 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10531 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10532 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10534 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10538 leaf = path.nodes[0];
10539 slot = path.slots[0];
10540 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10542 extent_flags = btrfs_extent_flags(leaf, ei);
10544 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10546 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10547 disk_bytenr, disk_num_bytes,
10548 BTRFS_EXTENT_FLAG_DATA);
10549 err |= BACKREF_MISMATCH;
10552 /* Check data backref inside that extent item */
10553 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10554 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10555 ptr = (unsigned long)iref;
10556 end = (unsigned long)ei + item_size;
10557 while (ptr < end) {
10558 iref = (struct btrfs_extent_inline_ref *)ptr;
10559 type = btrfs_extent_inline_ref_type(leaf, iref);
10560 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10562 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10563 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10564 if (ref_root == owner || ref_root == root->objectid)
10565 found_dbackref = 1;
10566 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10567 found_dbackref = !check_tree_block_ref(root, NULL,
10568 btrfs_extent_inline_ref_offset(leaf, iref),
10572 if (found_dbackref)
10574 ptr += btrfs_extent_inline_ref_size(type);
10577 if (!found_dbackref) {
10578 btrfs_release_path(&path);
10580 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10581 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10582 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10583 dbref_key.offset = hash_extent_data_ref(root->objectid,
10584 fi_key.objectid, fi_key.offset);
10586 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10587 &dbref_key, &path, 0, 0);
10589 found_dbackref = 1;
10593 btrfs_release_path(&path);
10596 * Neither inlined nor EXTENT_DATA_REF found, try
10597 * SHARED_DATA_REF as last chance.
10599 dbref_key.objectid = disk_bytenr;
10600 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10601 dbref_key.offset = eb->start;
10603 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10604 &dbref_key, &path, 0, 0);
10606 found_dbackref = 1;
10612 if (!found_dbackref)
10613 err |= BACKREF_MISSING;
10614 btrfs_release_path(&path);
10615 if (err & BACKREF_MISSING) {
10616 error("data extent[%llu %llu] backref lost",
10617 disk_bytenr, disk_num_bytes);
10623 * Get real tree block level for the case like shared block
10624 * Return >= 0 as tree level
10625 * Return <0 for error
10627 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10629 struct extent_buffer *eb;
10630 struct btrfs_path path;
10631 struct btrfs_key key;
10632 struct btrfs_extent_item *ei;
10639 /* Search extent tree for extent generation and level */
10640 key.objectid = bytenr;
10641 key.type = BTRFS_METADATA_ITEM_KEY;
10642 key.offset = (u64)-1;
10644 btrfs_init_path(&path);
10645 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10648 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10656 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10657 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10658 struct btrfs_extent_item);
10659 flags = btrfs_extent_flags(path.nodes[0], ei);
10660 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10665 /* Get transid for later read_tree_block() check */
10666 transid = btrfs_extent_generation(path.nodes[0], ei);
10668 /* Get backref level as one source */
10669 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10670 backref_level = key.offset;
10672 struct btrfs_tree_block_info *info;
10674 info = (struct btrfs_tree_block_info *)(ei + 1);
10675 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10677 btrfs_release_path(&path);
10679 /* Get level from tree block as an alternative source */
10680 eb = read_tree_block(fs_info, bytenr, transid);
10681 if (!extent_buffer_uptodate(eb)) {
10682 free_extent_buffer(eb);
10685 header_level = btrfs_header_level(eb);
10686 free_extent_buffer(eb);
10688 if (header_level != backref_level)
10690 return header_level;
10693 btrfs_release_path(&path);
10698 * Check if a tree block backref is valid (points to a valid tree block)
10699 * if level == -1, level will be resolved
10700 * Return >0 for any error found and print error message
10702 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10703 u64 bytenr, int level)
10705 struct btrfs_root *root;
10706 struct btrfs_key key;
10707 struct btrfs_path path;
10708 struct extent_buffer *eb;
10709 struct extent_buffer *node;
10710 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10714 /* Query level for level == -1 special case */
10716 level = query_tree_block_level(fs_info, bytenr);
10718 err |= REFERENCER_MISSING;
10722 key.objectid = root_id;
10723 key.type = BTRFS_ROOT_ITEM_KEY;
10724 key.offset = (u64)-1;
10726 root = btrfs_read_fs_root(fs_info, &key);
10727 if (IS_ERR(root)) {
10728 err |= REFERENCER_MISSING;
10732 /* Read out the tree block to get item/node key */
10733 eb = read_tree_block(fs_info, bytenr, 0);
10734 if (!extent_buffer_uptodate(eb)) {
10735 err |= REFERENCER_MISSING;
10736 free_extent_buffer(eb);
10740 /* Empty tree, no need to check key */
10741 if (!btrfs_header_nritems(eb) && !level) {
10742 free_extent_buffer(eb);
10747 btrfs_node_key_to_cpu(eb, &key, 0);
10749 btrfs_item_key_to_cpu(eb, &key, 0);
10751 free_extent_buffer(eb);
10753 btrfs_init_path(&path);
10754 path.lowest_level = level;
10755 /* Search with the first key, to ensure we can reach it */
10756 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10758 err |= REFERENCER_MISSING;
10762 node = path.nodes[level];
10763 if (btrfs_header_bytenr(node) != bytenr) {
10765 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10766 bytenr, nodesize, bytenr,
10767 btrfs_header_bytenr(node));
10768 err |= REFERENCER_MISMATCH;
10770 if (btrfs_header_level(node) != level) {
10772 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10773 bytenr, nodesize, level,
10774 btrfs_header_level(node));
10775 err |= REFERENCER_MISMATCH;
10779 btrfs_release_path(&path);
10781 if (err & REFERENCER_MISSING) {
10783 error("extent [%llu %d] lost referencer (owner: %llu)",
10784 bytenr, nodesize, root_id);
10787 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10788 bytenr, nodesize, root_id, level);
10795 * Check if tree block @eb is tree reloc root.
10796 * Return 0 if it's not or any problem happens
10797 * Return 1 if it's a tree reloc root
10799 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10800 struct extent_buffer *eb)
10802 struct btrfs_root *tree_reloc_root;
10803 struct btrfs_key key;
10804 u64 bytenr = btrfs_header_bytenr(eb);
10805 u64 owner = btrfs_header_owner(eb);
10808 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10809 key.offset = owner;
10810 key.type = BTRFS_ROOT_ITEM_KEY;
10812 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10813 if (IS_ERR(tree_reloc_root))
10816 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10818 btrfs_free_fs_root(tree_reloc_root);
10823 * Check referencer for shared block backref
10824 * If level == -1, this function will resolve the level.
10826 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10827 u64 parent, u64 bytenr, int level)
10829 struct extent_buffer *eb;
10831 int found_parent = 0;
10834 eb = read_tree_block(fs_info, parent, 0);
10835 if (!extent_buffer_uptodate(eb))
10839 level = query_tree_block_level(fs_info, bytenr);
10843 /* It's possible it's a tree reloc root */
10844 if (parent == bytenr) {
10845 if (is_tree_reloc_root(fs_info, eb))
10850 if (level + 1 != btrfs_header_level(eb))
10853 nr = btrfs_header_nritems(eb);
10854 for (i = 0; i < nr; i++) {
10855 if (bytenr == btrfs_node_blockptr(eb, i)) {
10861 free_extent_buffer(eb);
10862 if (!found_parent) {
10864 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10865 bytenr, fs_info->nodesize, parent, level);
10866 return REFERENCER_MISSING;
10872 * Check referencer for normal (inlined) data ref
10873 * If len == 0, it will be resolved by searching in extent tree
10875 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10876 u64 root_id, u64 objectid, u64 offset,
10877 u64 bytenr, u64 len, u32 count)
10879 struct btrfs_root *root;
10880 struct btrfs_root *extent_root = fs_info->extent_root;
10881 struct btrfs_key key;
10882 struct btrfs_path path;
10883 struct extent_buffer *leaf;
10884 struct btrfs_file_extent_item *fi;
10885 u32 found_count = 0;
10890 key.objectid = bytenr;
10891 key.type = BTRFS_EXTENT_ITEM_KEY;
10892 key.offset = (u64)-1;
10894 btrfs_init_path(&path);
10895 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10898 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10901 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10902 if (key.objectid != bytenr ||
10903 key.type != BTRFS_EXTENT_ITEM_KEY)
10906 btrfs_release_path(&path);
10908 key.objectid = root_id;
10909 key.type = BTRFS_ROOT_ITEM_KEY;
10910 key.offset = (u64)-1;
10911 btrfs_init_path(&path);
10913 root = btrfs_read_fs_root(fs_info, &key);
10917 key.objectid = objectid;
10918 key.type = BTRFS_EXTENT_DATA_KEY;
10920 * It can be nasty as data backref offset is
10921 * file offset - file extent offset, which is smaller or
10922 * equal to original backref offset. The only special case is
10923 * overflow. So we need to special check and do further search.
10925 key.offset = offset & (1ULL << 63) ? 0 : offset;
10927 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10932 * Search afterwards to get correct one
10933 * NOTE: As we must do a comprehensive check on the data backref to
10934 * make sure the dref count also matches, we must iterate all file
10935 * extents for that inode.
10938 leaf = path.nodes[0];
10939 slot = path.slots[0];
10941 if (slot >= btrfs_header_nritems(leaf))
10943 btrfs_item_key_to_cpu(leaf, &key, slot);
10944 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10946 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10948 * Except normal disk bytenr and disk num bytes, we still
10949 * need to do extra check on dbackref offset as
10950 * dbackref offset = file_offset - file_extent_offset
10952 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10953 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10954 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10959 ret = btrfs_next_item(root, &path);
10964 btrfs_release_path(&path);
10965 if (found_count != count) {
10967 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10968 bytenr, len, root_id, objectid, offset, count, found_count);
10969 return REFERENCER_MISSING;
10975 * Check if the referencer of a shared data backref exists
10977 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10978 u64 parent, u64 bytenr)
10980 struct extent_buffer *eb;
10981 struct btrfs_key key;
10982 struct btrfs_file_extent_item *fi;
10984 int found_parent = 0;
10987 eb = read_tree_block(fs_info, parent, 0);
10988 if (!extent_buffer_uptodate(eb))
10991 nr = btrfs_header_nritems(eb);
10992 for (i = 0; i < nr; i++) {
10993 btrfs_item_key_to_cpu(eb, &key, i);
10994 if (key.type != BTRFS_EXTENT_DATA_KEY)
10997 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10998 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11001 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11008 free_extent_buffer(eb);
11009 if (!found_parent) {
11010 error("shared extent %llu referencer lost (parent: %llu)",
11012 return REFERENCER_MISSING;
11018 * This function will check a given extent item, including its backref and
11019 * itself (like crossing stripe boundary and type)
11021 * Since we don't use extent_record anymore, introduce new error bit
11023 static int check_extent_item(struct btrfs_fs_info *fs_info,
11024 struct extent_buffer *eb, int slot)
11026 struct btrfs_extent_item *ei;
11027 struct btrfs_extent_inline_ref *iref;
11028 struct btrfs_extent_data_ref *dref;
11032 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11033 u32 item_size = btrfs_item_size_nr(eb, slot);
11038 struct btrfs_key key;
11042 btrfs_item_key_to_cpu(eb, &key, slot);
11043 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11044 bytes_used += key.offset;
11046 bytes_used += nodesize;
11048 if (item_size < sizeof(*ei)) {
11050 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11051 * old thing when on disk format is still un-determined.
11052 * No need to care about it anymore
11054 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11058 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11059 flags = btrfs_extent_flags(eb, ei);
11061 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11063 if (metadata && check_crossing_stripes(global_info, key.objectid,
11065 error("bad metadata [%llu, %llu) crossing stripe boundary",
11066 key.objectid, key.objectid + nodesize);
11067 err |= CROSSING_STRIPE_BOUNDARY;
11070 ptr = (unsigned long)(ei + 1);
11072 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11073 /* Old EXTENT_ITEM metadata */
11074 struct btrfs_tree_block_info *info;
11076 info = (struct btrfs_tree_block_info *)ptr;
11077 level = btrfs_tree_block_level(eb, info);
11078 ptr += sizeof(struct btrfs_tree_block_info);
11080 /* New METADATA_ITEM */
11081 level = key.offset;
11083 end = (unsigned long)ei + item_size;
11086 /* Reached extent item end normally */
11090 /* Beyond extent item end, wrong item size */
11092 err |= ITEM_SIZE_MISMATCH;
11093 error("extent item at bytenr %llu slot %d has wrong size",
11098 /* Now check every backref in this extent item */
11099 iref = (struct btrfs_extent_inline_ref *)ptr;
11100 type = btrfs_extent_inline_ref_type(eb, iref);
11101 offset = btrfs_extent_inline_ref_offset(eb, iref);
11103 case BTRFS_TREE_BLOCK_REF_KEY:
11104 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11108 case BTRFS_SHARED_BLOCK_REF_KEY:
11109 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11113 case BTRFS_EXTENT_DATA_REF_KEY:
11114 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11115 ret = check_extent_data_backref(fs_info,
11116 btrfs_extent_data_ref_root(eb, dref),
11117 btrfs_extent_data_ref_objectid(eb, dref),
11118 btrfs_extent_data_ref_offset(eb, dref),
11119 key.objectid, key.offset,
11120 btrfs_extent_data_ref_count(eb, dref));
11123 case BTRFS_SHARED_DATA_REF_KEY:
11124 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11128 error("extent[%llu %d %llu] has unknown ref type: %d",
11129 key.objectid, key.type, key.offset, type);
11130 err |= UNKNOWN_TYPE;
11134 ptr += btrfs_extent_inline_ref_size(type);
11142 * Check if a dev extent item is referred correctly by its chunk
11144 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11145 struct extent_buffer *eb, int slot)
11147 struct btrfs_root *chunk_root = fs_info->chunk_root;
11148 struct btrfs_dev_extent *ptr;
11149 struct btrfs_path path;
11150 struct btrfs_key chunk_key;
11151 struct btrfs_key devext_key;
11152 struct btrfs_chunk *chunk;
11153 struct extent_buffer *l;
11157 int found_chunk = 0;
11160 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11161 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11162 length = btrfs_dev_extent_length(eb, ptr);
11164 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11165 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11166 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11168 btrfs_init_path(&path);
11169 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11174 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11175 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11180 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11183 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11184 for (i = 0; i < num_stripes; i++) {
11185 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11186 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11188 if (devid == devext_key.objectid &&
11189 offset == devext_key.offset) {
11195 btrfs_release_path(&path);
11196 if (!found_chunk) {
11198 "device extent[%llu, %llu, %llu] did not find the related chunk",
11199 devext_key.objectid, devext_key.offset, length);
11200 return REFERENCER_MISSING;
11206 * Check if the used space is correct with the dev item
11208 static int check_dev_item(struct btrfs_fs_info *fs_info,
11209 struct extent_buffer *eb, int slot)
11211 struct btrfs_root *dev_root = fs_info->dev_root;
11212 struct btrfs_dev_item *dev_item;
11213 struct btrfs_path path;
11214 struct btrfs_key key;
11215 struct btrfs_dev_extent *ptr;
11221 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11222 dev_id = btrfs_device_id(eb, dev_item);
11223 used = btrfs_device_bytes_used(eb, dev_item);
11225 key.objectid = dev_id;
11226 key.type = BTRFS_DEV_EXTENT_KEY;
11229 btrfs_init_path(&path);
11230 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11232 btrfs_item_key_to_cpu(eb, &key, slot);
11233 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11234 key.objectid, key.type, key.offset);
11235 btrfs_release_path(&path);
11236 return REFERENCER_MISSING;
11239 /* Iterate dev_extents to calculate the used space of a device */
11241 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11244 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11245 if (key.objectid > dev_id)
11247 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11250 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11251 struct btrfs_dev_extent);
11252 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11254 ret = btrfs_next_item(dev_root, &path);
11258 btrfs_release_path(&path);
11260 if (used != total) {
11261 btrfs_item_key_to_cpu(eb, &key, slot);
11263 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11264 total, used, BTRFS_ROOT_TREE_OBJECTID,
11265 BTRFS_DEV_EXTENT_KEY, dev_id);
11266 return ACCOUNTING_MISMATCH;
11272 * Check a block group item with its referener (chunk) and its used space
11273 * with extent/metadata item
11275 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11276 struct extent_buffer *eb, int slot)
11278 struct btrfs_root *extent_root = fs_info->extent_root;
11279 struct btrfs_root *chunk_root = fs_info->chunk_root;
11280 struct btrfs_block_group_item *bi;
11281 struct btrfs_block_group_item bg_item;
11282 struct btrfs_path path;
11283 struct btrfs_key bg_key;
11284 struct btrfs_key chunk_key;
11285 struct btrfs_key extent_key;
11286 struct btrfs_chunk *chunk;
11287 struct extent_buffer *leaf;
11288 struct btrfs_extent_item *ei;
11289 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11297 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11298 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11299 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11300 used = btrfs_block_group_used(&bg_item);
11301 bg_flags = btrfs_block_group_flags(&bg_item);
11303 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11304 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11305 chunk_key.offset = bg_key.objectid;
11307 btrfs_init_path(&path);
11308 /* Search for the referencer chunk */
11309 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11312 "block group[%llu %llu] did not find the related chunk item",
11313 bg_key.objectid, bg_key.offset);
11314 err |= REFERENCER_MISSING;
11316 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11317 struct btrfs_chunk);
11318 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11321 "block group[%llu %llu] related chunk item length does not match",
11322 bg_key.objectid, bg_key.offset);
11323 err |= REFERENCER_MISMATCH;
11326 btrfs_release_path(&path);
11328 /* Search from the block group bytenr */
11329 extent_key.objectid = bg_key.objectid;
11330 extent_key.type = 0;
11331 extent_key.offset = 0;
11333 btrfs_init_path(&path);
11334 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11338 /* Iterate extent tree to account used space */
11340 leaf = path.nodes[0];
11342 /* Search slot can point to the last item beyond leaf nritems */
11343 if (path.slots[0] >= btrfs_header_nritems(leaf))
11346 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11347 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11350 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11351 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11353 if (extent_key.objectid < bg_key.objectid)
11356 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11359 total += extent_key.offset;
11361 ei = btrfs_item_ptr(leaf, path.slots[0],
11362 struct btrfs_extent_item);
11363 flags = btrfs_extent_flags(leaf, ei);
11364 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11365 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11367 "bad extent[%llu, %llu) type mismatch with chunk",
11368 extent_key.objectid,
11369 extent_key.objectid + extent_key.offset);
11370 err |= CHUNK_TYPE_MISMATCH;
11372 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11373 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11374 BTRFS_BLOCK_GROUP_METADATA))) {
11376 "bad extent[%llu, %llu) type mismatch with chunk",
11377 extent_key.objectid,
11378 extent_key.objectid + nodesize);
11379 err |= CHUNK_TYPE_MISMATCH;
11383 ret = btrfs_next_item(extent_root, &path);
11389 btrfs_release_path(&path);
11391 if (total != used) {
11393 "block group[%llu %llu] used %llu but extent items used %llu",
11394 bg_key.objectid, bg_key.offset, used, total);
11395 err |= ACCOUNTING_MISMATCH;
11401 * Check a chunk item.
11402 * Including checking all referred dev_extents and block group
11404 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11405 struct extent_buffer *eb, int slot)
11407 struct btrfs_root *extent_root = fs_info->extent_root;
11408 struct btrfs_root *dev_root = fs_info->dev_root;
11409 struct btrfs_path path;
11410 struct btrfs_key chunk_key;
11411 struct btrfs_key bg_key;
11412 struct btrfs_key devext_key;
11413 struct btrfs_chunk *chunk;
11414 struct extent_buffer *leaf;
11415 struct btrfs_block_group_item *bi;
11416 struct btrfs_block_group_item bg_item;
11417 struct btrfs_dev_extent *ptr;
11429 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11430 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11431 length = btrfs_chunk_length(eb, chunk);
11432 chunk_end = chunk_key.offset + length;
11433 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11436 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11438 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11441 type = btrfs_chunk_type(eb, chunk);
11443 bg_key.objectid = chunk_key.offset;
11444 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11445 bg_key.offset = length;
11447 btrfs_init_path(&path);
11448 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11451 "chunk[%llu %llu) did not find the related block group item",
11452 chunk_key.offset, chunk_end);
11453 err |= REFERENCER_MISSING;
11455 leaf = path.nodes[0];
11456 bi = btrfs_item_ptr(leaf, path.slots[0],
11457 struct btrfs_block_group_item);
11458 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11460 if (btrfs_block_group_flags(&bg_item) != type) {
11462 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11463 chunk_key.offset, chunk_end, type,
11464 btrfs_block_group_flags(&bg_item));
11465 err |= REFERENCER_MISSING;
11469 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11470 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11471 for (i = 0; i < num_stripes; i++) {
11472 btrfs_release_path(&path);
11473 btrfs_init_path(&path);
11474 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11475 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11476 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11478 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11481 goto not_match_dev;
11483 leaf = path.nodes[0];
11484 ptr = btrfs_item_ptr(leaf, path.slots[0],
11485 struct btrfs_dev_extent);
11486 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11487 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11488 if (objectid != chunk_key.objectid ||
11489 offset != chunk_key.offset ||
11490 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11491 goto not_match_dev;
11494 err |= BACKREF_MISSING;
11496 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11497 chunk_key.objectid, chunk_end, i);
11500 btrfs_release_path(&path);
11506 * Main entry function to check known items and update related accounting info
11508 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11510 struct btrfs_fs_info *fs_info = root->fs_info;
11511 struct btrfs_key key;
11514 struct btrfs_extent_data_ref *dref;
11519 btrfs_item_key_to_cpu(eb, &key, slot);
11523 case BTRFS_EXTENT_DATA_KEY:
11524 ret = check_extent_data_item(root, eb, slot);
11527 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11528 ret = check_block_group_item(fs_info, eb, slot);
11531 case BTRFS_DEV_ITEM_KEY:
11532 ret = check_dev_item(fs_info, eb, slot);
11535 case BTRFS_CHUNK_ITEM_KEY:
11536 ret = check_chunk_item(fs_info, eb, slot);
11539 case BTRFS_DEV_EXTENT_KEY:
11540 ret = check_dev_extent_item(fs_info, eb, slot);
11543 case BTRFS_EXTENT_ITEM_KEY:
11544 case BTRFS_METADATA_ITEM_KEY:
11545 ret = check_extent_item(fs_info, eb, slot);
11548 case BTRFS_EXTENT_CSUM_KEY:
11549 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11551 case BTRFS_TREE_BLOCK_REF_KEY:
11552 ret = check_tree_block_backref(fs_info, key.offset,
11556 case BTRFS_EXTENT_DATA_REF_KEY:
11557 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11558 ret = check_extent_data_backref(fs_info,
11559 btrfs_extent_data_ref_root(eb, dref),
11560 btrfs_extent_data_ref_objectid(eb, dref),
11561 btrfs_extent_data_ref_offset(eb, dref),
11563 btrfs_extent_data_ref_count(eb, dref));
11566 case BTRFS_SHARED_BLOCK_REF_KEY:
11567 ret = check_shared_block_backref(fs_info, key.offset,
11571 case BTRFS_SHARED_DATA_REF_KEY:
11572 ret = check_shared_data_backref(fs_info, key.offset,
11580 if (++slot < btrfs_header_nritems(eb))
11587 * Helper function for later fs/subvol tree check. To determine if a tree
11588 * block should be checked.
11589 * This function will ensure only the direct referencer with lowest rootid to
11590 * check a fs/subvolume tree block.
11592 * Backref check at extent tree would detect errors like missing subvolume
11593 * tree, so we can do aggressive check to reduce duplicated checks.
11595 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11597 struct btrfs_root *extent_root = root->fs_info->extent_root;
11598 struct btrfs_key key;
11599 struct btrfs_path path;
11600 struct extent_buffer *leaf;
11602 struct btrfs_extent_item *ei;
11608 struct btrfs_extent_inline_ref *iref;
11611 btrfs_init_path(&path);
11612 key.objectid = btrfs_header_bytenr(eb);
11613 key.type = BTRFS_METADATA_ITEM_KEY;
11614 key.offset = (u64)-1;
11617 * Any failure in backref resolving means we can't determine
11618 * whom the tree block belongs to.
11619 * So in that case, we need to check that tree block
11621 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11625 ret = btrfs_previous_extent_item(extent_root, &path,
11626 btrfs_header_bytenr(eb));
11630 leaf = path.nodes[0];
11631 slot = path.slots[0];
11632 btrfs_item_key_to_cpu(leaf, &key, slot);
11633 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11635 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11636 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11638 struct btrfs_tree_block_info *info;
11640 info = (struct btrfs_tree_block_info *)(ei + 1);
11641 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11644 item_size = btrfs_item_size_nr(leaf, slot);
11645 ptr = (unsigned long)iref;
11646 end = (unsigned long)ei + item_size;
11647 while (ptr < end) {
11648 iref = (struct btrfs_extent_inline_ref *)ptr;
11649 type = btrfs_extent_inline_ref_type(leaf, iref);
11650 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11653 * We only check the tree block if current root is
11654 * the lowest referencer of it.
11656 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11657 offset < root->objectid) {
11658 btrfs_release_path(&path);
11662 ptr += btrfs_extent_inline_ref_size(type);
11665 * Normally we should also check keyed tree block ref, but that may be
11666 * very time consuming. Inlined ref should already make us skip a lot
11667 * of refs now. So skip search keyed tree block ref.
11671 btrfs_release_path(&path);
11676 * Traversal function for tree block. We will do:
11677 * 1) Skip shared fs/subvolume tree blocks
11678 * 2) Update related bytes accounting
11679 * 3) Pre-order traversal
11681 static int traverse_tree_block(struct btrfs_root *root,
11682 struct extent_buffer *node)
11684 struct extent_buffer *eb;
11685 struct btrfs_key key;
11686 struct btrfs_key drop_key;
11694 * Skip shared fs/subvolume tree block, in that case they will
11695 * be checked by referencer with lowest rootid
11697 if (is_fstree(root->objectid) && !should_check(root, node))
11700 /* Update bytes accounting */
11701 total_btree_bytes += node->len;
11702 if (fs_root_objectid(btrfs_header_owner(node)))
11703 total_fs_tree_bytes += node->len;
11704 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11705 total_extent_tree_bytes += node->len;
11707 /* pre-order tranversal, check itself first */
11708 level = btrfs_header_level(node);
11709 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11710 btrfs_header_level(node),
11711 btrfs_header_owner(node));
11715 "check %s failed root %llu bytenr %llu level %d, force continue check",
11716 level ? "node":"leaf", root->objectid,
11717 btrfs_header_bytenr(node), btrfs_header_level(node));
11720 btree_space_waste += btrfs_leaf_free_space(root, node);
11721 ret = check_leaf_items(root, node);
11726 nr = btrfs_header_nritems(node);
11727 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11728 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11729 sizeof(struct btrfs_key_ptr);
11731 /* Then check all its children */
11732 for (i = 0; i < nr; i++) {
11733 u64 blocknr = btrfs_node_blockptr(node, i);
11735 btrfs_node_key_to_cpu(node, &key, i);
11736 if (level == root->root_item.drop_level &&
11737 is_dropped_key(&key, &drop_key))
11741 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11742 * to call the function itself.
11744 eb = read_tree_block(root->fs_info, blocknr, 0);
11745 if (extent_buffer_uptodate(eb)) {
11746 ret = traverse_tree_block(root, eb);
11749 free_extent_buffer(eb);
11756 * Low memory usage version check_chunks_and_extents.
11758 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11760 struct btrfs_path path;
11761 struct btrfs_key key;
11762 struct btrfs_root *root1;
11763 struct btrfs_root *root;
11764 struct btrfs_root *cur_root;
11768 root = fs_info->fs_root;
11770 root1 = root->fs_info->chunk_root;
11771 ret = traverse_tree_block(root1, root1->node);
11774 root1 = root->fs_info->tree_root;
11775 ret = traverse_tree_block(root1, root1->node);
11778 btrfs_init_path(&path);
11779 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11781 key.type = BTRFS_ROOT_ITEM_KEY;
11783 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11785 error("cannot find extent treet in tree_root");
11790 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11791 if (key.type != BTRFS_ROOT_ITEM_KEY)
11793 key.offset = (u64)-1;
11795 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11796 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11799 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11800 if (IS_ERR(cur_root) || !cur_root) {
11801 error("failed to read tree: %lld", key.objectid);
11805 ret = traverse_tree_block(cur_root, cur_root->node);
11808 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11809 btrfs_free_fs_root(cur_root);
11811 ret = btrfs_next_item(root1, &path);
11817 btrfs_release_path(&path);
11821 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11825 if (!ctx.progress_enabled)
11826 fprintf(stderr, "checking extents\n");
11827 if (check_mode == CHECK_MODE_LOWMEM)
11828 ret = check_chunks_and_extents_v2(fs_info);
11830 ret = check_chunks_and_extents(fs_info);
11835 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11836 struct btrfs_root *root, int overwrite)
11838 struct extent_buffer *c;
11839 struct extent_buffer *old = root->node;
11842 struct btrfs_disk_key disk_key = {0,0,0};
11848 extent_buffer_get(c);
11851 c = btrfs_alloc_free_block(trans, root,
11852 root->fs_info->nodesize,
11853 root->root_key.objectid,
11854 &disk_key, level, 0, 0);
11857 extent_buffer_get(c);
11861 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11862 btrfs_set_header_level(c, level);
11863 btrfs_set_header_bytenr(c, c->start);
11864 btrfs_set_header_generation(c, trans->transid);
11865 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11866 btrfs_set_header_owner(c, root->root_key.objectid);
11868 write_extent_buffer(c, root->fs_info->fsid,
11869 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11871 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11872 btrfs_header_chunk_tree_uuid(c),
11875 btrfs_mark_buffer_dirty(c);
11877 * this case can happen in the following case:
11879 * 1.overwrite previous root.
11881 * 2.reinit reloc data root, this is because we skip pin
11882 * down reloc data tree before which means we can allocate
11883 * same block bytenr here.
11885 if (old->start == c->start) {
11886 btrfs_set_root_generation(&root->root_item,
11888 root->root_item.level = btrfs_header_level(root->node);
11889 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11890 &root->root_key, &root->root_item);
11892 free_extent_buffer(c);
11896 free_extent_buffer(old);
11898 add_root_to_dirty_list(root);
11902 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11903 struct extent_buffer *eb, int tree_root)
11905 struct extent_buffer *tmp;
11906 struct btrfs_root_item *ri;
11907 struct btrfs_key key;
11909 int level = btrfs_header_level(eb);
11915 * If we have pinned this block before, don't pin it again.
11916 * This can not only avoid forever loop with broken filesystem
11917 * but also give us some speedups.
11919 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11920 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11923 btrfs_pin_extent(fs_info, eb->start, eb->len);
11925 nritems = btrfs_header_nritems(eb);
11926 for (i = 0; i < nritems; i++) {
11928 btrfs_item_key_to_cpu(eb, &key, i);
11929 if (key.type != BTRFS_ROOT_ITEM_KEY)
11931 /* Skip the extent root and reloc roots */
11932 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11933 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11934 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11936 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11937 bytenr = btrfs_disk_root_bytenr(eb, ri);
11940 * If at any point we start needing the real root we
11941 * will have to build a stump root for the root we are
11942 * in, but for now this doesn't actually use the root so
11943 * just pass in extent_root.
11945 tmp = read_tree_block(fs_info, bytenr, 0);
11946 if (!extent_buffer_uptodate(tmp)) {
11947 fprintf(stderr, "Error reading root block\n");
11950 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11951 free_extent_buffer(tmp);
11955 bytenr = btrfs_node_blockptr(eb, i);
11957 /* If we aren't the tree root don't read the block */
11958 if (level == 1 && !tree_root) {
11959 btrfs_pin_extent(fs_info, bytenr,
11960 fs_info->nodesize);
11964 tmp = read_tree_block(fs_info, bytenr, 0);
11965 if (!extent_buffer_uptodate(tmp)) {
11966 fprintf(stderr, "Error reading tree block\n");
11969 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11970 free_extent_buffer(tmp);
11979 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11983 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11987 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11990 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11992 struct btrfs_block_group_cache *cache;
11993 struct btrfs_path path;
11994 struct extent_buffer *leaf;
11995 struct btrfs_chunk *chunk;
11996 struct btrfs_key key;
12000 btrfs_init_path(&path);
12002 key.type = BTRFS_CHUNK_ITEM_KEY;
12004 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12006 btrfs_release_path(&path);
12011 * We do this in case the block groups were screwed up and had alloc
12012 * bits that aren't actually set on the chunks. This happens with
12013 * restored images every time and could happen in real life I guess.
12015 fs_info->avail_data_alloc_bits = 0;
12016 fs_info->avail_metadata_alloc_bits = 0;
12017 fs_info->avail_system_alloc_bits = 0;
12019 /* First we need to create the in-memory block groups */
12021 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12022 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12024 btrfs_release_path(&path);
12032 leaf = path.nodes[0];
12033 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12034 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12039 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12040 btrfs_add_block_group(fs_info, 0,
12041 btrfs_chunk_type(leaf, chunk),
12042 key.objectid, key.offset,
12043 btrfs_chunk_length(leaf, chunk));
12044 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12045 key.offset + btrfs_chunk_length(leaf, chunk));
12050 cache = btrfs_lookup_first_block_group(fs_info, start);
12054 start = cache->key.objectid + cache->key.offset;
12057 btrfs_release_path(&path);
12061 static int reset_balance(struct btrfs_trans_handle *trans,
12062 struct btrfs_fs_info *fs_info)
12064 struct btrfs_root *root = fs_info->tree_root;
12065 struct btrfs_path path;
12066 struct extent_buffer *leaf;
12067 struct btrfs_key key;
12068 int del_slot, del_nr = 0;
12072 btrfs_init_path(&path);
12073 key.objectid = BTRFS_BALANCE_OBJECTID;
12074 key.type = BTRFS_BALANCE_ITEM_KEY;
12076 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12081 goto reinit_data_reloc;
12086 ret = btrfs_del_item(trans, root, &path);
12089 btrfs_release_path(&path);
12091 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12092 key.type = BTRFS_ROOT_ITEM_KEY;
12094 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12098 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12103 ret = btrfs_del_items(trans, root, &path,
12110 btrfs_release_path(&path);
12113 ret = btrfs_search_slot(trans, root, &key, &path,
12120 leaf = path.nodes[0];
12121 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12122 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12124 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12129 del_slot = path.slots[0];
12138 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12142 btrfs_release_path(&path);
12145 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12146 key.type = BTRFS_ROOT_ITEM_KEY;
12147 key.offset = (u64)-1;
12148 root = btrfs_read_fs_root(fs_info, &key);
12149 if (IS_ERR(root)) {
12150 fprintf(stderr, "Error reading data reloc tree\n");
12151 ret = PTR_ERR(root);
12154 record_root_in_trans(trans, root);
12155 ret = btrfs_fsck_reinit_root(trans, root, 0);
12158 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12160 btrfs_release_path(&path);
12164 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12165 struct btrfs_fs_info *fs_info)
12171 * The only reason we don't do this is because right now we're just
12172 * walking the trees we find and pinning down their bytes, we don't look
12173 * at any of the leaves. In order to do mixed groups we'd have to check
12174 * the leaves of any fs roots and pin down the bytes for any file
12175 * extents we find. Not hard but why do it if we don't have to?
12177 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12178 fprintf(stderr, "We don't support re-initing the extent tree "
12179 "for mixed block groups yet, please notify a btrfs "
12180 "developer you want to do this so they can add this "
12181 "functionality.\n");
12186 * first we need to walk all of the trees except the extent tree and pin
12187 * down the bytes that are in use so we don't overwrite any existing
12190 ret = pin_metadata_blocks(fs_info);
12192 fprintf(stderr, "error pinning down used bytes\n");
12197 * Need to drop all the block groups since we're going to recreate all
12200 btrfs_free_block_groups(fs_info);
12201 ret = reset_block_groups(fs_info);
12203 fprintf(stderr, "error resetting the block groups\n");
12207 /* Ok we can allocate now, reinit the extent root */
12208 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12210 fprintf(stderr, "extent root initialization failed\n");
12212 * When the transaction code is updated we should end the
12213 * transaction, but for now progs only knows about commit so
12214 * just return an error.
12220 * Now we have all the in-memory block groups setup so we can make
12221 * allocations properly, and the metadata we care about is safe since we
12222 * pinned all of it above.
12225 struct btrfs_block_group_cache *cache;
12227 cache = btrfs_lookup_first_block_group(fs_info, start);
12230 start = cache->key.objectid + cache->key.offset;
12231 ret = btrfs_insert_item(trans, fs_info->extent_root,
12232 &cache->key, &cache->item,
12233 sizeof(cache->item));
12235 fprintf(stderr, "Error adding block group\n");
12238 btrfs_extent_post_op(trans, fs_info->extent_root);
12241 ret = reset_balance(trans, fs_info);
12243 fprintf(stderr, "error resetting the pending balance\n");
12248 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12250 struct btrfs_path path;
12251 struct btrfs_trans_handle *trans;
12252 struct btrfs_key key;
12255 printf("Recowing metadata block %llu\n", eb->start);
12256 key.objectid = btrfs_header_owner(eb);
12257 key.type = BTRFS_ROOT_ITEM_KEY;
12258 key.offset = (u64)-1;
12260 root = btrfs_read_fs_root(root->fs_info, &key);
12261 if (IS_ERR(root)) {
12262 fprintf(stderr, "Couldn't find owner root %llu\n",
12264 return PTR_ERR(root);
12267 trans = btrfs_start_transaction(root, 1);
12269 return PTR_ERR(trans);
12271 btrfs_init_path(&path);
12272 path.lowest_level = btrfs_header_level(eb);
12273 if (path.lowest_level)
12274 btrfs_node_key_to_cpu(eb, &key, 0);
12276 btrfs_item_key_to_cpu(eb, &key, 0);
12278 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12279 btrfs_commit_transaction(trans, root);
12280 btrfs_release_path(&path);
12284 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12286 struct btrfs_path path;
12287 struct btrfs_trans_handle *trans;
12288 struct btrfs_key key;
12291 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12292 bad->key.type, bad->key.offset);
12293 key.objectid = bad->root_id;
12294 key.type = BTRFS_ROOT_ITEM_KEY;
12295 key.offset = (u64)-1;
12297 root = btrfs_read_fs_root(root->fs_info, &key);
12298 if (IS_ERR(root)) {
12299 fprintf(stderr, "Couldn't find owner root %llu\n",
12301 return PTR_ERR(root);
12304 trans = btrfs_start_transaction(root, 1);
12306 return PTR_ERR(trans);
12308 btrfs_init_path(&path);
12309 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12315 ret = btrfs_del_item(trans, root, &path);
12317 btrfs_commit_transaction(trans, root);
12318 btrfs_release_path(&path);
12322 static int zero_log_tree(struct btrfs_root *root)
12324 struct btrfs_trans_handle *trans;
12327 trans = btrfs_start_transaction(root, 1);
12328 if (IS_ERR(trans)) {
12329 ret = PTR_ERR(trans);
12332 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12333 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12334 ret = btrfs_commit_transaction(trans, root);
12338 static int populate_csum(struct btrfs_trans_handle *trans,
12339 struct btrfs_root *csum_root, char *buf, u64 start,
12342 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12347 while (offset < len) {
12348 sectorsize = fs_info->sectorsize;
12349 ret = read_extent_data(fs_info, buf, start + offset,
12353 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12354 start + offset, buf, sectorsize);
12357 offset += sectorsize;
12362 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12363 struct btrfs_root *csum_root,
12364 struct btrfs_root *cur_root)
12366 struct btrfs_path path;
12367 struct btrfs_key key;
12368 struct extent_buffer *node;
12369 struct btrfs_file_extent_item *fi;
12376 buf = malloc(cur_root->fs_info->sectorsize);
12380 btrfs_init_path(&path);
12384 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12387 /* Iterate all regular file extents and fill its csum */
12389 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12391 if (key.type != BTRFS_EXTENT_DATA_KEY)
12393 node = path.nodes[0];
12394 slot = path.slots[0];
12395 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12396 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12398 start = btrfs_file_extent_disk_bytenr(node, fi);
12399 len = btrfs_file_extent_disk_num_bytes(node, fi);
12401 ret = populate_csum(trans, csum_root, buf, start, len);
12402 if (ret == -EEXIST)
12408 * TODO: if next leaf is corrupted, jump to nearest next valid
12411 ret = btrfs_next_item(cur_root, &path);
12421 btrfs_release_path(&path);
12426 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12427 struct btrfs_root *csum_root)
12429 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12430 struct btrfs_path path;
12431 struct btrfs_root *tree_root = fs_info->tree_root;
12432 struct btrfs_root *cur_root;
12433 struct extent_buffer *node;
12434 struct btrfs_key key;
12438 btrfs_init_path(&path);
12439 key.objectid = BTRFS_FS_TREE_OBJECTID;
12441 key.type = BTRFS_ROOT_ITEM_KEY;
12442 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12451 node = path.nodes[0];
12452 slot = path.slots[0];
12453 btrfs_item_key_to_cpu(node, &key, slot);
12454 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12456 if (key.type != BTRFS_ROOT_ITEM_KEY)
12458 if (!is_fstree(key.objectid))
12460 key.offset = (u64)-1;
12462 cur_root = btrfs_read_fs_root(fs_info, &key);
12463 if (IS_ERR(cur_root) || !cur_root) {
12464 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12468 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12473 ret = btrfs_next_item(tree_root, &path);
12483 btrfs_release_path(&path);
12487 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12488 struct btrfs_root *csum_root)
12490 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12491 struct btrfs_path path;
12492 struct btrfs_extent_item *ei;
12493 struct extent_buffer *leaf;
12495 struct btrfs_key key;
12498 btrfs_init_path(&path);
12500 key.type = BTRFS_EXTENT_ITEM_KEY;
12502 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12504 btrfs_release_path(&path);
12508 buf = malloc(csum_root->fs_info->sectorsize);
12510 btrfs_release_path(&path);
12515 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12516 ret = btrfs_next_leaf(extent_root, &path);
12524 leaf = path.nodes[0];
12526 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12527 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12532 ei = btrfs_item_ptr(leaf, path.slots[0],
12533 struct btrfs_extent_item);
12534 if (!(btrfs_extent_flags(leaf, ei) &
12535 BTRFS_EXTENT_FLAG_DATA)) {
12540 ret = populate_csum(trans, csum_root, buf, key.objectid,
12547 btrfs_release_path(&path);
12553 * Recalculate the csum and put it into the csum tree.
12555 * Extent tree init will wipe out all the extent info, so in that case, we
12556 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12557 * will use fs/subvol trees to init the csum tree.
12559 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12560 struct btrfs_root *csum_root,
12561 int search_fs_tree)
12563 if (search_fs_tree)
12564 return fill_csum_tree_from_fs(trans, csum_root);
12566 return fill_csum_tree_from_extent(trans, csum_root);
12569 static void free_roots_info_cache(void)
12571 if (!roots_info_cache)
12574 while (!cache_tree_empty(roots_info_cache)) {
12575 struct cache_extent *entry;
12576 struct root_item_info *rii;
12578 entry = first_cache_extent(roots_info_cache);
12581 remove_cache_extent(roots_info_cache, entry);
12582 rii = container_of(entry, struct root_item_info, cache_extent);
12586 free(roots_info_cache);
12587 roots_info_cache = NULL;
12590 static int build_roots_info_cache(struct btrfs_fs_info *info)
12593 struct btrfs_key key;
12594 struct extent_buffer *leaf;
12595 struct btrfs_path path;
12597 if (!roots_info_cache) {
12598 roots_info_cache = malloc(sizeof(*roots_info_cache));
12599 if (!roots_info_cache)
12601 cache_tree_init(roots_info_cache);
12604 btrfs_init_path(&path);
12606 key.type = BTRFS_EXTENT_ITEM_KEY;
12608 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12611 leaf = path.nodes[0];
12614 struct btrfs_key found_key;
12615 struct btrfs_extent_item *ei;
12616 struct btrfs_extent_inline_ref *iref;
12617 int slot = path.slots[0];
12622 struct cache_extent *entry;
12623 struct root_item_info *rii;
12625 if (slot >= btrfs_header_nritems(leaf)) {
12626 ret = btrfs_next_leaf(info->extent_root, &path);
12633 leaf = path.nodes[0];
12634 slot = path.slots[0];
12637 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12639 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12640 found_key.type != BTRFS_METADATA_ITEM_KEY)
12643 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12644 flags = btrfs_extent_flags(leaf, ei);
12646 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12647 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12650 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12651 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12652 level = found_key.offset;
12654 struct btrfs_tree_block_info *binfo;
12656 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12657 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12658 level = btrfs_tree_block_level(leaf, binfo);
12662 * For a root extent, it must be of the following type and the
12663 * first (and only one) iref in the item.
12665 type = btrfs_extent_inline_ref_type(leaf, iref);
12666 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12669 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12670 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12672 rii = malloc(sizeof(struct root_item_info));
12677 rii->cache_extent.start = root_id;
12678 rii->cache_extent.size = 1;
12679 rii->level = (u8)-1;
12680 entry = &rii->cache_extent;
12681 ret = insert_cache_extent(roots_info_cache, entry);
12684 rii = container_of(entry, struct root_item_info,
12688 ASSERT(rii->cache_extent.start == root_id);
12689 ASSERT(rii->cache_extent.size == 1);
12691 if (level > rii->level || rii->level == (u8)-1) {
12692 rii->level = level;
12693 rii->bytenr = found_key.objectid;
12694 rii->gen = btrfs_extent_generation(leaf, ei);
12695 rii->node_count = 1;
12696 } else if (level == rii->level) {
12704 btrfs_release_path(&path);
12709 static int maybe_repair_root_item(struct btrfs_path *path,
12710 const struct btrfs_key *root_key,
12711 const int read_only_mode)
12713 const u64 root_id = root_key->objectid;
12714 struct cache_extent *entry;
12715 struct root_item_info *rii;
12716 struct btrfs_root_item ri;
12717 unsigned long offset;
12719 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12722 "Error: could not find extent items for root %llu\n",
12723 root_key->objectid);
12727 rii = container_of(entry, struct root_item_info, cache_extent);
12728 ASSERT(rii->cache_extent.start == root_id);
12729 ASSERT(rii->cache_extent.size == 1);
12731 if (rii->node_count != 1) {
12733 "Error: could not find btree root extent for root %llu\n",
12738 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12739 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12741 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12742 btrfs_root_level(&ri) != rii->level ||
12743 btrfs_root_generation(&ri) != rii->gen) {
12746 * If we're in repair mode but our caller told us to not update
12747 * the root item, i.e. just check if it needs to be updated, don't
12748 * print this message, since the caller will call us again shortly
12749 * for the same root item without read only mode (the caller will
12750 * open a transaction first).
12752 if (!(read_only_mode && repair))
12754 "%sroot item for root %llu,"
12755 " current bytenr %llu, current gen %llu, current level %u,"
12756 " new bytenr %llu, new gen %llu, new level %u\n",
12757 (read_only_mode ? "" : "fixing "),
12759 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12760 btrfs_root_level(&ri),
12761 rii->bytenr, rii->gen, rii->level);
12763 if (btrfs_root_generation(&ri) > rii->gen) {
12765 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12766 root_id, btrfs_root_generation(&ri), rii->gen);
12770 if (!read_only_mode) {
12771 btrfs_set_root_bytenr(&ri, rii->bytenr);
12772 btrfs_set_root_level(&ri, rii->level);
12773 btrfs_set_root_generation(&ri, rii->gen);
12774 write_extent_buffer(path->nodes[0], &ri,
12775 offset, sizeof(ri));
12785 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12786 * caused read-only snapshots to be corrupted if they were created at a moment
12787 * when the source subvolume/snapshot had orphan items. The issue was that the
12788 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12789 * node instead of the post orphan cleanup root node.
12790 * So this function, and its callees, just detects and fixes those cases. Even
12791 * though the regression was for read-only snapshots, this function applies to
12792 * any snapshot/subvolume root.
12793 * This must be run before any other repair code - not doing it so, makes other
12794 * repair code delete or modify backrefs in the extent tree for example, which
12795 * will result in an inconsistent fs after repairing the root items.
12797 static int repair_root_items(struct btrfs_fs_info *info)
12799 struct btrfs_path path;
12800 struct btrfs_key key;
12801 struct extent_buffer *leaf;
12802 struct btrfs_trans_handle *trans = NULL;
12805 int need_trans = 0;
12807 btrfs_init_path(&path);
12809 ret = build_roots_info_cache(info);
12813 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12814 key.type = BTRFS_ROOT_ITEM_KEY;
12819 * Avoid opening and committing transactions if a leaf doesn't have
12820 * any root items that need to be fixed, so that we avoid rotating
12821 * backup roots unnecessarily.
12824 trans = btrfs_start_transaction(info->tree_root, 1);
12825 if (IS_ERR(trans)) {
12826 ret = PTR_ERR(trans);
12831 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12835 leaf = path.nodes[0];
12838 struct btrfs_key found_key;
12840 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12841 int no_more_keys = find_next_key(&path, &key);
12843 btrfs_release_path(&path);
12845 ret = btrfs_commit_transaction(trans,
12857 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12859 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12861 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12864 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12868 if (!trans && repair) {
12871 btrfs_release_path(&path);
12881 free_roots_info_cache();
12882 btrfs_release_path(&path);
12884 btrfs_commit_transaction(trans, info->tree_root);
12891 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12893 struct btrfs_trans_handle *trans;
12894 struct btrfs_block_group_cache *bg_cache;
12898 /* Clear all free space cache inodes and its extent data */
12900 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12903 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12906 current = bg_cache->key.objectid + bg_cache->key.offset;
12909 /* Don't forget to set cache_generation to -1 */
12910 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12911 if (IS_ERR(trans)) {
12912 error("failed to update super block cache generation");
12913 return PTR_ERR(trans);
12915 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12916 btrfs_commit_transaction(trans, fs_info->tree_root);
12921 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12926 if (clear_version == 1) {
12927 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12929 "free space cache v2 detected, use --clear-space-cache v2");
12933 printf("Clearing free space cache\n");
12934 ret = clear_free_space_cache(fs_info);
12936 error("failed to clear free space cache");
12939 printf("Free space cache cleared\n");
12941 } else if (clear_version == 2) {
12942 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12943 printf("no free space cache v2 to clear\n");
12947 printf("Clear free space cache v2\n");
12948 ret = btrfs_clear_free_space_tree(fs_info);
12950 error("failed to clear free space cache v2: %d", ret);
12953 printf("free space cache v2 cleared\n");
12960 const char * const cmd_check_usage[] = {
12961 "btrfs check [options] <device>",
12962 "Check structural integrity of a filesystem (unmounted).",
12963 "Check structural integrity of an unmounted filesystem. Verify internal",
12964 "trees' consistency and item connectivity. In the repair mode try to",
12965 "fix the problems found. ",
12966 "WARNING: the repair mode is considered dangerous",
12968 "-s|--super <superblock> use this superblock copy",
12969 "-b|--backup use the first valid backup root copy",
12970 "--force skip mount checks, repair is not possible",
12971 "--repair try to repair the filesystem",
12972 "--readonly run in read-only mode (default)",
12973 "--init-csum-tree create a new CRC tree",
12974 "--init-extent-tree create a new extent tree",
12975 "--mode <MODE> allows choice of memory/IO trade-offs",
12976 " where MODE is one of:",
12977 " original - read inodes and extents to memory (requires",
12978 " more memory, does less IO)",
12979 " lowmem - try to use less memory but read blocks again",
12981 "--check-data-csum verify checksums of data blocks",
12982 "-Q|--qgroup-report print a report on qgroup consistency",
12983 "-E|--subvol-extents <subvolid>",
12984 " print subvolume extents and sharing state",
12985 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12986 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12987 "-p|--progress indicate progress",
12988 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12992 int cmd_check(int argc, char **argv)
12994 struct cache_tree root_cache;
12995 struct btrfs_root *root;
12996 struct btrfs_fs_info *info;
12999 u64 tree_root_bytenr = 0;
13000 u64 chunk_root_bytenr = 0;
13001 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13005 int init_csum_tree = 0;
13007 int clear_space_cache = 0;
13008 int qgroup_report = 0;
13009 int qgroups_repaired = 0;
13010 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13015 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13016 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13017 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13018 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13019 GETOPT_VAL_FORCE };
13020 static const struct option long_options[] = {
13021 { "super", required_argument, NULL, 's' },
13022 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13023 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13024 { "init-csum-tree", no_argument, NULL,
13025 GETOPT_VAL_INIT_CSUM },
13026 { "init-extent-tree", no_argument, NULL,
13027 GETOPT_VAL_INIT_EXTENT },
13028 { "check-data-csum", no_argument, NULL,
13029 GETOPT_VAL_CHECK_CSUM },
13030 { "backup", no_argument, NULL, 'b' },
13031 { "subvol-extents", required_argument, NULL, 'E' },
13032 { "qgroup-report", no_argument, NULL, 'Q' },
13033 { "tree-root", required_argument, NULL, 'r' },
13034 { "chunk-root", required_argument, NULL,
13035 GETOPT_VAL_CHUNK_TREE },
13036 { "progress", no_argument, NULL, 'p' },
13037 { "mode", required_argument, NULL,
13039 { "clear-space-cache", required_argument, NULL,
13040 GETOPT_VAL_CLEAR_SPACE_CACHE},
13041 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13042 { NULL, 0, NULL, 0}
13045 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13049 case 'a': /* ignored */ break;
13051 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13054 num = arg_strtou64(optarg);
13055 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13057 "super mirror should be less than %d",
13058 BTRFS_SUPER_MIRROR_MAX);
13061 bytenr = btrfs_sb_offset(((int)num));
13062 printf("using SB copy %llu, bytenr %llu\n", num,
13063 (unsigned long long)bytenr);
13069 subvolid = arg_strtou64(optarg);
13072 tree_root_bytenr = arg_strtou64(optarg);
13074 case GETOPT_VAL_CHUNK_TREE:
13075 chunk_root_bytenr = arg_strtou64(optarg);
13078 ctx.progress_enabled = true;
13082 usage(cmd_check_usage);
13083 case GETOPT_VAL_REPAIR:
13084 printf("enabling repair mode\n");
13086 ctree_flags |= OPEN_CTREE_WRITES;
13088 case GETOPT_VAL_READONLY:
13091 case GETOPT_VAL_INIT_CSUM:
13092 printf("Creating a new CRC tree\n");
13093 init_csum_tree = 1;
13095 ctree_flags |= OPEN_CTREE_WRITES;
13097 case GETOPT_VAL_INIT_EXTENT:
13098 init_extent_tree = 1;
13099 ctree_flags |= (OPEN_CTREE_WRITES |
13100 OPEN_CTREE_NO_BLOCK_GROUPS);
13103 case GETOPT_VAL_CHECK_CSUM:
13104 check_data_csum = 1;
13106 case GETOPT_VAL_MODE:
13107 check_mode = parse_check_mode(optarg);
13108 if (check_mode == CHECK_MODE_UNKNOWN) {
13109 error("unknown mode: %s", optarg);
13113 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13114 if (strcmp(optarg, "v1") == 0) {
13115 clear_space_cache = 1;
13116 } else if (strcmp(optarg, "v2") == 0) {
13117 clear_space_cache = 2;
13118 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13121 "invalid argument to --clear-space-cache, must be v1 or v2");
13124 ctree_flags |= OPEN_CTREE_WRITES;
13126 case GETOPT_VAL_FORCE:
13132 if (check_argc_exact(argc - optind, 1))
13133 usage(cmd_check_usage);
13135 if (ctx.progress_enabled) {
13136 ctx.tp = TASK_NOTHING;
13137 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13140 /* This check is the only reason for --readonly to exist */
13141 if (readonly && repair) {
13142 error("repair options are not compatible with --readonly");
13147 * experimental and dangerous
13149 if (repair && check_mode == CHECK_MODE_LOWMEM)
13150 warning("low-memory mode repair support is only partial");
13153 cache_tree_init(&root_cache);
13155 ret = check_mounted(argv[optind]);
13158 error("could not check mount status: %s",
13164 "%s is currently mounted, use --force if you really intend to check the filesystem",
13172 error("repair and --force is not yet supported");
13179 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13183 "filesystem mounted, continuing because of --force");
13185 /* A block device is mounted in exclusive mode by kernel */
13186 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13189 /* only allow partial opening under repair mode */
13191 ctree_flags |= OPEN_CTREE_PARTIAL;
13193 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13194 chunk_root_bytenr, ctree_flags);
13196 error("cannot open file system");
13202 global_info = info;
13203 root = info->fs_root;
13204 uuid_unparse(info->super_copy->fsid, uuidbuf);
13206 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13209 * Check the bare minimum before starting anything else that could rely
13210 * on it, namely the tree roots, any local consistency checks
13212 if (!extent_buffer_uptodate(info->tree_root->node) ||
13213 !extent_buffer_uptodate(info->dev_root->node) ||
13214 !extent_buffer_uptodate(info->chunk_root->node)) {
13215 error("critical roots corrupted, unable to check the filesystem");
13221 if (clear_space_cache) {
13222 ret = do_clear_free_space_cache(info, clear_space_cache);
13228 * repair mode will force us to commit transaction which
13229 * will make us fail to load log tree when mounting.
13231 if (repair && btrfs_super_log_root(info->super_copy)) {
13232 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13238 ret = zero_log_tree(root);
13241 error("failed to zero log tree: %d", ret);
13246 if (qgroup_report) {
13247 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13249 ret = qgroup_verify_all(info);
13256 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13257 subvolid, argv[optind], uuidbuf);
13258 ret = print_extent_state(info, subvolid);
13263 if (init_extent_tree || init_csum_tree) {
13264 struct btrfs_trans_handle *trans;
13266 trans = btrfs_start_transaction(info->extent_root, 0);
13267 if (IS_ERR(trans)) {
13268 error("error starting transaction");
13269 ret = PTR_ERR(trans);
13274 if (init_extent_tree) {
13275 printf("Creating a new extent tree\n");
13276 ret = reinit_extent_tree(trans, info);
13282 if (init_csum_tree) {
13283 printf("Reinitialize checksum tree\n");
13284 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13286 error("checksum tree initialization failed: %d",
13293 ret = fill_csum_tree(trans, info->csum_root,
13297 error("checksum tree refilling failed: %d", ret);
13302 * Ok now we commit and run the normal fsck, which will add
13303 * extent entries for all of the items it finds.
13305 ret = btrfs_commit_transaction(trans, info->extent_root);
13310 if (!extent_buffer_uptodate(info->extent_root->node)) {
13311 error("critical: extent_root, unable to check the filesystem");
13316 if (!extent_buffer_uptodate(info->csum_root->node)) {
13317 error("critical: csum_root, unable to check the filesystem");
13323 ret = do_check_chunks_and_extents(info);
13327 "errors found in extent allocation tree or chunk allocation");
13329 ret = repair_root_items(info);
13332 error("failed to repair root items: %s", strerror(-ret));
13336 fprintf(stderr, "Fixed %d roots.\n", ret);
13338 } else if (ret > 0) {
13340 "Found %d roots with an outdated root item.\n",
13343 "Please run a filesystem check with the option --repair to fix them.\n");
13349 if (!ctx.progress_enabled) {
13350 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13351 fprintf(stderr, "checking free space tree\n");
13353 fprintf(stderr, "checking free space cache\n");
13355 ret = check_space_cache(root);
13358 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13359 error("errors found in free space tree");
13361 error("errors found in free space cache");
13366 * We used to have to have these hole extents in between our real
13367 * extents so if we don't have this flag set we need to make sure there
13368 * are no gaps in the file extents for inodes, otherwise we can just
13369 * ignore it when this happens.
13371 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13372 ret = do_check_fs_roots(info, &root_cache);
13375 error("errors found in fs roots");
13379 fprintf(stderr, "checking csums\n");
13380 ret = check_csums(root);
13383 error("errors found in csum tree");
13387 fprintf(stderr, "checking root refs\n");
13388 /* For low memory mode, check_fs_roots_v2 handles root refs */
13389 if (check_mode != CHECK_MODE_LOWMEM) {
13390 ret = check_root_refs(root, &root_cache);
13393 error("errors found in root refs");
13398 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13399 struct extent_buffer *eb;
13401 eb = list_first_entry(&root->fs_info->recow_ebs,
13402 struct extent_buffer, recow);
13403 list_del_init(&eb->recow);
13404 ret = recow_extent_buffer(root, eb);
13407 error("fails to fix transid errors");
13412 while (!list_empty(&delete_items)) {
13413 struct bad_item *bad;
13415 bad = list_first_entry(&delete_items, struct bad_item, list);
13416 list_del_init(&bad->list);
13418 ret = delete_bad_item(root, bad);
13424 if (info->quota_enabled) {
13425 fprintf(stderr, "checking quota groups\n");
13426 ret = qgroup_verify_all(info);
13429 error("failed to check quota groups");
13433 ret = repair_qgroups(info, &qgroups_repaired);
13436 error("failed to repair quota groups");
13442 if (!list_empty(&root->fs_info->recow_ebs)) {
13443 error("transid errors in file system");
13448 printf("found %llu bytes used, ",
13449 (unsigned long long)bytes_used);
13451 printf("error(s) found\n");
13453 printf("no error found\n");
13454 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13455 printf("total tree bytes: %llu\n",
13456 (unsigned long long)total_btree_bytes);
13457 printf("total fs tree bytes: %llu\n",
13458 (unsigned long long)total_fs_tree_bytes);
13459 printf("total extent tree bytes: %llu\n",
13460 (unsigned long long)total_extent_tree_bytes);
13461 printf("btree space waste bytes: %llu\n",
13462 (unsigned long long)btree_space_waste);
13463 printf("file data blocks allocated: %llu\n referenced %llu\n",
13464 (unsigned long long)data_bytes_allocated,
13465 (unsigned long long)data_bytes_referenced);
13467 free_qgroup_counts();
13468 free_root_recs_tree(&root_cache);
13472 if (ctx.progress_enabled)
13473 task_deinit(ctx.info);