2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 return container_of(back, struct data_backref, node);
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
141 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143 struct data_backref *back1 = to_data_backref(ext1);
144 struct data_backref *back2 = to_data_backref(ext2);
146 WARN_ON(!ext1->is_data);
147 WARN_ON(!ext2->is_data);
149 /* parent and root are a union, so this covers both */
150 if (back1->parent > back2->parent)
152 if (back1->parent < back2->parent)
155 /* This is a full backref and the parents match. */
156 if (back1->node.full_backref)
159 if (back1->owner > back2->owner)
161 if (back1->owner < back2->owner)
164 if (back1->offset > back2->offset)
166 if (back1->offset < back2->offset)
169 if (back1->found_ref && back2->found_ref) {
170 if (back1->disk_bytenr > back2->disk_bytenr)
172 if (back1->disk_bytenr < back2->disk_bytenr)
175 if (back1->bytes > back2->bytes)
177 if (back1->bytes < back2->bytes)
185 * Much like data_backref, just removed the undetermined members
186 * and change it to use list_head.
187 * During extent scan, it is stored in root->orphan_data_extent.
188 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
190 struct orphan_data_extent {
191 struct list_head list;
199 struct tree_backref {
200 struct extent_backref node;
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
209 return container_of(back, struct tree_backref, node);
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
214 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216 struct tree_backref *back1 = to_tree_backref(ext1);
217 struct tree_backref *back2 = to_tree_backref(ext2);
219 WARN_ON(ext1->is_data);
220 WARN_ON(ext2->is_data);
222 /* parent and root are a union, so this covers both */
223 if (back1->parent > back2->parent)
225 if (back1->parent < back2->parent)
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
233 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
236 if (ext1->is_data > ext2->is_data)
239 if (ext1->is_data < ext2->is_data)
242 if (ext1->full_backref > ext2->full_backref)
244 if (ext1->full_backref < ext2->full_backref)
248 return compare_data_backref(node1, node2);
250 return compare_tree_backref(node1, node2);
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
256 struct extent_record {
257 struct list_head backrefs;
258 struct list_head dups;
259 struct rb_root backref_tree;
260 struct list_head list;
261 struct cache_extent cache;
262 struct btrfs_disk_key parent_key;
267 u64 extent_item_refs;
269 u64 parent_generation;
273 unsigned int flag_block_full_backref:2;
274 unsigned int found_rec:1;
275 unsigned int content_checked:1;
276 unsigned int owner_ref_checked:1;
277 unsigned int is_root:1;
278 unsigned int metadata:1;
279 unsigned int bad_full_backref:1;
280 unsigned int crossing_stripes:1;
281 unsigned int wrong_chunk_type:1;
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
286 return container_of(entry, struct extent_record, list);
289 struct inode_backref {
290 struct list_head list;
291 unsigned int found_dir_item:1;
292 unsigned int found_dir_index:1;
293 unsigned int found_inode_ref:1;
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
305 return list_entry(entry, struct inode_backref, list);
308 struct root_item_record {
309 struct list_head list;
315 struct btrfs_key drop_key;
318 #define REF_ERR_NO_DIR_ITEM (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX (1 << 1)
320 #define REF_ERR_NO_INODE_REF (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
323 #define REF_ERR_DUP_INODE_REF (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
332 struct file_extent_hole {
338 struct inode_record {
339 struct list_head backrefs;
340 unsigned int checked:1;
341 unsigned int merging:1;
342 unsigned int found_inode_item:1;
343 unsigned int found_dir_item:1;
344 unsigned int found_file_extent:1;
345 unsigned int found_csum_item:1;
346 unsigned int some_csum_missing:1;
347 unsigned int nodatasum:1;
360 struct rb_root holes;
361 struct list_head orphan_extents;
366 #define I_ERR_NO_INODE_ITEM (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
382 struct root_backref {
383 struct list_head list;
384 unsigned int found_dir_item:1;
385 unsigned int found_dir_index:1;
386 unsigned int found_back_ref:1;
387 unsigned int found_forward_ref:1;
388 unsigned int reachable:1;
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
399 return list_entry(entry, struct root_backref, list);
403 struct list_head backrefs;
404 struct cache_extent cache;
405 unsigned int found_root_item:1;
411 struct cache_extent cache;
416 struct cache_extent cache;
417 struct cache_tree root_cache;
418 struct cache_tree inode_cache;
419 struct inode_record *current;
428 struct walk_control {
429 struct cache_tree shared;
430 struct shared_node *nodes[BTRFS_MAX_LEVEL];
436 struct btrfs_key key;
438 struct list_head list;
441 struct extent_entry {
446 struct list_head list;
449 struct root_item_info {
450 /* level of the root */
452 /* number of nodes at this level, must be 1 for a root */
456 struct cache_extent cache_extent;
460 * Error bit for low memory mode check.
462 * Currently no caller cares about it yet. Just internal use for error
465 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH (1 << 8)
476 static void *print_status_check(void *p)
478 struct task_ctx *priv = p;
479 const char work_indicator[] = { '.', 'o', 'O', 'o' };
481 static char *task_position_string[] = {
483 "checking free space cache",
487 task_period_start(priv->info, 1000 /* 1s */);
489 if (priv->tp == TASK_NOTHING)
493 printf("%s [%c]\r", task_position_string[priv->tp],
494 work_indicator[count % 4]);
497 task_period_wait(priv->info);
502 static int print_status_return(void *p)
510 static enum btrfs_check_mode parse_check_mode(const char *str)
512 if (strcmp(str, "lowmem") == 0)
513 return CHECK_MODE_LOWMEM;
514 if (strcmp(str, "orig") == 0)
515 return CHECK_MODE_ORIGINAL;
516 if (strcmp(str, "original") == 0)
517 return CHECK_MODE_ORIGINAL;
519 return CHECK_MODE_UNKNOWN;
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
525 struct file_extent_hole *hole;
527 if (RB_EMPTY_ROOT(holes))
530 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
536 struct file_extent_hole *hole1;
537 struct file_extent_hole *hole2;
539 hole1 = rb_entry(node1, struct file_extent_hole, node);
540 hole2 = rb_entry(node2, struct file_extent_hole, node);
542 if (hole1->start > hole2->start)
544 if (hole1->start < hole2->start)
546 /* Now hole1->start == hole2->start */
547 if (hole1->len >= hole2->len)
549 * Hole 1 will be merge center
550 * Same hole will be merged later
553 /* Hole 2 will be merge center */
558 * Add a hole to the record
560 * This will do hole merge for copy_file_extent_holes(),
561 * which will ensure there won't be continuous holes.
563 static int add_file_extent_hole(struct rb_root *holes,
566 struct file_extent_hole *hole;
567 struct file_extent_hole *prev = NULL;
568 struct file_extent_hole *next = NULL;
570 hole = malloc(sizeof(*hole));
575 /* Since compare will not return 0, no -EEXIST will happen */
576 rb_insert(holes, &hole->node, compare_hole);
578 /* simple merge with previous hole */
579 if (rb_prev(&hole->node))
580 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
582 if (prev && prev->start + prev->len >= hole->start) {
583 hole->len = hole->start + hole->len - prev->start;
584 hole->start = prev->start;
585 rb_erase(&prev->node, holes);
590 /* iterate merge with next holes */
592 if (!rb_next(&hole->node))
594 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
596 if (hole->start + hole->len >= next->start) {
597 if (hole->start + hole->len <= next->start + next->len)
598 hole->len = next->start + next->len -
600 rb_erase(&next->node, holes);
609 static int compare_hole_range(struct rb_node *node, void *data)
611 struct file_extent_hole *hole;
614 hole = (struct file_extent_hole *)data;
617 hole = rb_entry(node, struct file_extent_hole, node);
618 if (start < hole->start)
620 if (start >= hole->start && start < hole->start + hole->len)
626 * Delete a hole in the record
628 * This will do the hole split and is much restrict than add.
630 static int del_file_extent_hole(struct rb_root *holes,
633 struct file_extent_hole *hole;
634 struct file_extent_hole tmp;
639 struct rb_node *node;
646 node = rb_search(holes, &tmp, compare_hole_range, NULL);
649 hole = rb_entry(node, struct file_extent_hole, node);
650 if (start + len > hole->start + hole->len)
654 * Now there will be no overlap, delete the hole and re-add the
655 * split(s) if they exists.
657 if (start > hole->start) {
658 prev_start = hole->start;
659 prev_len = start - hole->start;
662 if (hole->start + hole->len > start + len) {
663 next_start = start + len;
664 next_len = hole->start + hole->len - start - len;
667 rb_erase(node, holes);
670 ret = add_file_extent_hole(holes, prev_start, prev_len);
675 ret = add_file_extent_hole(holes, next_start, next_len);
682 static int copy_file_extent_holes(struct rb_root *dst,
685 struct file_extent_hole *hole;
686 struct rb_node *node;
689 node = rb_first(src);
691 hole = rb_entry(node, struct file_extent_hole, node);
692 ret = add_file_extent_hole(dst, hole->start, hole->len);
695 node = rb_next(node);
700 static void free_file_extent_holes(struct rb_root *holes)
702 struct rb_node *node;
703 struct file_extent_hole *hole;
705 node = rb_first(holes);
707 hole = rb_entry(node, struct file_extent_hole, node);
708 rb_erase(node, holes);
710 node = rb_first(holes);
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717 struct btrfs_root *root)
719 if (root->last_trans != trans->transid) {
720 root->track_dirty = 1;
721 root->last_trans = trans->transid;
722 root->commit_root = root->node;
723 extent_buffer_get(root->node);
727 static u8 imode_to_type(u32 imode)
730 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
732 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
733 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
734 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
735 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
736 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
737 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
740 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
746 struct device_record *rec1;
747 struct device_record *rec2;
749 rec1 = rb_entry(node1, struct device_record, node);
750 rec2 = rb_entry(node2, struct device_record, node);
751 if (rec1->devid > rec2->devid)
753 else if (rec1->devid < rec2->devid)
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
761 struct inode_record *rec;
762 struct inode_backref *backref;
763 struct inode_backref *orig;
764 struct inode_backref *tmp;
765 struct orphan_data_extent *src_orphan;
766 struct orphan_data_extent *dst_orphan;
771 rec = malloc(sizeof(*rec));
773 return ERR_PTR(-ENOMEM);
774 memcpy(rec, orig_rec, sizeof(*rec));
776 INIT_LIST_HEAD(&rec->backrefs);
777 INIT_LIST_HEAD(&rec->orphan_extents);
778 rec->holes = RB_ROOT;
780 list_for_each_entry(orig, &orig_rec->backrefs, list) {
781 size = sizeof(*orig) + orig->namelen + 1;
782 backref = malloc(size);
787 memcpy(backref, orig, size);
788 list_add_tail(&backref->list, &rec->backrefs);
790 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791 dst_orphan = malloc(sizeof(*dst_orphan));
796 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
799 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
806 rb = rb_first(&rec->holes);
808 struct file_extent_hole *hole;
810 hole = rb_entry(rb, struct file_extent_hole, node);
816 if (!list_empty(&rec->backrefs))
817 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818 list_del(&orig->list);
822 if (!list_empty(&rec->orphan_extents))
823 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824 list_del(&orig->list);
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
836 struct orphan_data_extent *orphan;
838 if (list_empty(orphan_extents))
840 printf("The following data extent is lost in tree %llu:\n",
842 list_for_each_entry(orphan, orphan_extents, list) {
843 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844 orphan->objectid, orphan->offset, orphan->disk_bytenr,
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
851 u64 root_objectid = root->root_key.objectid;
852 int errors = rec->errors;
856 /* reloc root errors, we print its corresponding fs root objectid*/
857 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858 root_objectid = root->root_key.offset;
859 fprintf(stderr, "reloc");
861 fprintf(stderr, "root %llu inode %llu errors %x",
862 (unsigned long long) root_objectid,
863 (unsigned long long) rec->ino, rec->errors);
865 if (errors & I_ERR_NO_INODE_ITEM)
866 fprintf(stderr, ", no inode item");
867 if (errors & I_ERR_NO_ORPHAN_ITEM)
868 fprintf(stderr, ", no orphan item");
869 if (errors & I_ERR_DUP_INODE_ITEM)
870 fprintf(stderr, ", dup inode item");
871 if (errors & I_ERR_DUP_DIR_INDEX)
872 fprintf(stderr, ", dup dir index");
873 if (errors & I_ERR_ODD_DIR_ITEM)
874 fprintf(stderr, ", odd dir item");
875 if (errors & I_ERR_ODD_FILE_EXTENT)
876 fprintf(stderr, ", odd file extent");
877 if (errors & I_ERR_BAD_FILE_EXTENT)
878 fprintf(stderr, ", bad file extent");
879 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880 fprintf(stderr, ", file extent overlap");
881 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882 fprintf(stderr, ", file extent discount");
883 if (errors & I_ERR_DIR_ISIZE_WRONG)
884 fprintf(stderr, ", dir isize wrong");
885 if (errors & I_ERR_FILE_NBYTES_WRONG)
886 fprintf(stderr, ", nbytes wrong");
887 if (errors & I_ERR_ODD_CSUM_ITEM)
888 fprintf(stderr, ", odd csum item");
889 if (errors & I_ERR_SOME_CSUM_MISSING)
890 fprintf(stderr, ", some csum missing");
891 if (errors & I_ERR_LINK_COUNT_WRONG)
892 fprintf(stderr, ", link count wrong");
893 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894 fprintf(stderr, ", orphan file extent");
895 fprintf(stderr, "\n");
896 /* Print the orphan extents if needed */
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
900 /* Print the holes if needed */
901 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902 struct file_extent_hole *hole;
903 struct rb_node *node;
906 node = rb_first(&rec->holes);
907 fprintf(stderr, "Found file extent holes:\n");
910 hole = rb_entry(node, struct file_extent_hole, node);
911 fprintf(stderr, "\tstart: %llu, len: %llu\n",
912 hole->start, hole->len);
913 node = rb_next(node);
916 fprintf(stderr, "\tstart: 0, len: %llu\n",
918 root->fs_info->sectorsize));
922 static void print_ref_error(int errors)
924 if (errors & REF_ERR_NO_DIR_ITEM)
925 fprintf(stderr, ", no dir item");
926 if (errors & REF_ERR_NO_DIR_INDEX)
927 fprintf(stderr, ", no dir index");
928 if (errors & REF_ERR_NO_INODE_REF)
929 fprintf(stderr, ", no inode ref");
930 if (errors & REF_ERR_DUP_DIR_ITEM)
931 fprintf(stderr, ", dup dir item");
932 if (errors & REF_ERR_DUP_DIR_INDEX)
933 fprintf(stderr, ", dup dir index");
934 if (errors & REF_ERR_DUP_INODE_REF)
935 fprintf(stderr, ", dup inode ref");
936 if (errors & REF_ERR_INDEX_UNMATCH)
937 fprintf(stderr, ", index mismatch");
938 if (errors & REF_ERR_FILETYPE_UNMATCH)
939 fprintf(stderr, ", filetype mismatch");
940 if (errors & REF_ERR_NAME_TOO_LONG)
941 fprintf(stderr, ", name too long");
942 if (errors & REF_ERR_NO_ROOT_REF)
943 fprintf(stderr, ", no root ref");
944 if (errors & REF_ERR_NO_ROOT_BACKREF)
945 fprintf(stderr, ", no root backref");
946 if (errors & REF_ERR_DUP_ROOT_REF)
947 fprintf(stderr, ", dup root ref");
948 if (errors & REF_ERR_DUP_ROOT_BACKREF)
949 fprintf(stderr, ", dup root backref");
950 fprintf(stderr, "\n");
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956 struct ptr_node *node;
957 struct cache_extent *cache;
958 struct inode_record *rec = NULL;
961 cache = lookup_cache_extent(inode_cache, ino, 1);
963 node = container_of(cache, struct ptr_node, cache);
965 if (mod && rec->refs > 1) {
966 node->data = clone_inode_rec(rec);
967 if (IS_ERR(node->data))
973 rec = calloc(1, sizeof(*rec));
975 return ERR_PTR(-ENOMEM);
977 rec->extent_start = (u64)-1;
979 INIT_LIST_HEAD(&rec->backrefs);
980 INIT_LIST_HEAD(&rec->orphan_extents);
981 rec->holes = RB_ROOT;
983 node = malloc(sizeof(*node));
986 return ERR_PTR(-ENOMEM);
988 node->cache.start = ino;
989 node->cache.size = 1;
992 if (ino == BTRFS_FREE_INO_OBJECTID)
995 ret = insert_cache_extent(inode_cache, &node->cache);
997 return ERR_PTR(-EEXIST);
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1004 struct orphan_data_extent *orphan;
1006 while (!list_empty(orphan_extents)) {
1007 orphan = list_entry(orphan_extents->next,
1008 struct orphan_data_extent, list);
1009 list_del(&orphan->list);
1014 static void free_inode_rec(struct inode_record *rec)
1016 struct inode_backref *backref;
1018 if (--rec->refs > 0)
1021 while (!list_empty(&rec->backrefs)) {
1022 backref = to_inode_backref(rec->backrefs.next);
1023 list_del(&backref->list);
1026 free_orphan_data_extents(&rec->orphan_extents);
1027 free_file_extent_holes(&rec->holes);
1031 static int can_free_inode_rec(struct inode_record *rec)
1033 if (!rec->errors && rec->checked && rec->found_inode_item &&
1034 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040 struct inode_record *rec)
1042 struct cache_extent *cache;
1043 struct inode_backref *tmp, *backref;
1044 struct ptr_node *node;
1047 if (!rec->found_inode_item)
1050 filetype = imode_to_type(rec->imode);
1051 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052 if (backref->found_dir_item && backref->found_dir_index) {
1053 if (backref->filetype != filetype)
1054 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055 if (!backref->errors && backref->found_inode_ref &&
1056 rec->nlink == rec->found_link) {
1057 list_del(&backref->list);
1063 if (!rec->checked || rec->merging)
1066 if (S_ISDIR(rec->imode)) {
1067 if (rec->found_size != rec->isize)
1068 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069 if (rec->found_file_extent)
1070 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072 if (rec->found_dir_item)
1073 rec->errors |= I_ERR_ODD_DIR_ITEM;
1074 if (rec->found_size != rec->nbytes)
1075 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076 if (rec->nlink > 0 && !no_holes &&
1077 (rec->extent_end < rec->isize ||
1078 first_extent_gap(&rec->holes) < rec->isize))
1079 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083 if (rec->found_csum_item && rec->nodatasum)
1084 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085 if (rec->some_csum_missing && !rec->nodatasum)
1086 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089 BUG_ON(rec->refs != 1);
1090 if (can_free_inode_rec(rec)) {
1091 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092 node = container_of(cache, struct ptr_node, cache);
1093 BUG_ON(node->data != rec);
1094 remove_cache_extent(inode_cache, &node->cache);
1096 free_inode_rec(rec);
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1102 struct btrfs_path path;
1103 struct btrfs_key key;
1106 key.objectid = BTRFS_ORPHAN_OBJECTID;
1107 key.type = BTRFS_ORPHAN_ITEM_KEY;
1110 btrfs_init_path(&path);
1111 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112 btrfs_release_path(&path);
1118 static int process_inode_item(struct extent_buffer *eb,
1119 int slot, struct btrfs_key *key,
1120 struct shared_node *active_node)
1122 struct inode_record *rec;
1123 struct btrfs_inode_item *item;
1125 rec = active_node->current;
1126 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127 if (rec->found_inode_item) {
1128 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132 rec->nlink = btrfs_inode_nlink(eb, item);
1133 rec->isize = btrfs_inode_size(eb, item);
1134 rec->nbytes = btrfs_inode_nbytes(eb, item);
1135 rec->imode = btrfs_inode_mode(eb, item);
1136 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1138 rec->found_inode_item = 1;
1139 if (rec->nlink == 0)
1140 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141 maybe_free_inode_rec(&active_node->inode_cache, rec);
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1147 int namelen, u64 dir)
1149 struct inode_backref *backref;
1151 list_for_each_entry(backref, &rec->backrefs, list) {
1152 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1154 if (backref->dir != dir || backref->namelen != namelen)
1156 if (memcmp(name, backref->name, namelen))
1161 backref = malloc(sizeof(*backref) + namelen + 1);
1164 memset(backref, 0, sizeof(*backref));
1166 backref->namelen = namelen;
1167 memcpy(backref->name, name, namelen);
1168 backref->name[namelen] = '\0';
1169 list_add_tail(&backref->list, &rec->backrefs);
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174 u64 ino, u64 dir, u64 index,
1175 const char *name, int namelen,
1176 u8 filetype, u8 itemtype, int errors)
1178 struct inode_record *rec;
1179 struct inode_backref *backref;
1181 rec = get_inode_rec(inode_cache, ino, 1);
1182 BUG_ON(IS_ERR(rec));
1183 backref = get_inode_backref(rec, name, namelen, dir);
1186 backref->errors |= errors;
1187 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188 if (backref->found_dir_index)
1189 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190 if (backref->found_inode_ref && backref->index != index)
1191 backref->errors |= REF_ERR_INDEX_UNMATCH;
1192 if (backref->found_dir_item && backref->filetype != filetype)
1193 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1195 backref->index = index;
1196 backref->filetype = filetype;
1197 backref->found_dir_index = 1;
1198 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1200 if (backref->found_dir_item)
1201 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202 if (backref->found_dir_index && backref->filetype != filetype)
1203 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1205 backref->filetype = filetype;
1206 backref->found_dir_item = 1;
1207 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209 if (backref->found_inode_ref)
1210 backref->errors |= REF_ERR_DUP_INODE_REF;
1211 if (backref->found_dir_index && backref->index != index)
1212 backref->errors |= REF_ERR_INDEX_UNMATCH;
1214 backref->index = index;
1216 backref->ref_type = itemtype;
1217 backref->found_inode_ref = 1;
1222 maybe_free_inode_rec(inode_cache, rec);
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227 struct cache_tree *dst_cache)
1229 struct inode_backref *backref;
1234 list_for_each_entry(backref, &src->backrefs, list) {
1235 if (backref->found_dir_index) {
1236 add_inode_backref(dst_cache, dst->ino, backref->dir,
1237 backref->index, backref->name,
1238 backref->namelen, backref->filetype,
1239 BTRFS_DIR_INDEX_KEY, backref->errors);
1241 if (backref->found_dir_item) {
1243 add_inode_backref(dst_cache, dst->ino,
1244 backref->dir, 0, backref->name,
1245 backref->namelen, backref->filetype,
1246 BTRFS_DIR_ITEM_KEY, backref->errors);
1248 if (backref->found_inode_ref) {
1249 add_inode_backref(dst_cache, dst->ino,
1250 backref->dir, backref->index,
1251 backref->name, backref->namelen, 0,
1252 backref->ref_type, backref->errors);
1256 if (src->found_dir_item)
1257 dst->found_dir_item = 1;
1258 if (src->found_file_extent)
1259 dst->found_file_extent = 1;
1260 if (src->found_csum_item)
1261 dst->found_csum_item = 1;
1262 if (src->some_csum_missing)
1263 dst->some_csum_missing = 1;
1264 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270 BUG_ON(src->found_link < dir_count);
1271 dst->found_link += src->found_link - dir_count;
1272 dst->found_size += src->found_size;
1273 if (src->extent_start != (u64)-1) {
1274 if (dst->extent_start == (u64)-1) {
1275 dst->extent_start = src->extent_start;
1276 dst->extent_end = src->extent_end;
1278 if (dst->extent_end > src->extent_start)
1279 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280 else if (dst->extent_end < src->extent_start) {
1281 ret = add_file_extent_hole(&dst->holes,
1283 src->extent_start - dst->extent_end);
1285 if (dst->extent_end < src->extent_end)
1286 dst->extent_end = src->extent_end;
1290 dst->errors |= src->errors;
1291 if (src->found_inode_item) {
1292 if (!dst->found_inode_item) {
1293 dst->nlink = src->nlink;
1294 dst->isize = src->isize;
1295 dst->nbytes = src->nbytes;
1296 dst->imode = src->imode;
1297 dst->nodatasum = src->nodatasum;
1298 dst->found_inode_item = 1;
1300 dst->errors |= I_ERR_DUP_INODE_ITEM;
1308 static int splice_shared_node(struct shared_node *src_node,
1309 struct shared_node *dst_node)
1311 struct cache_extent *cache;
1312 struct ptr_node *node, *ins;
1313 struct cache_tree *src, *dst;
1314 struct inode_record *rec, *conflict;
1315 u64 current_ino = 0;
1319 if (--src_node->refs == 0)
1321 if (src_node->current)
1322 current_ino = src_node->current->ino;
1324 src = &src_node->root_cache;
1325 dst = &dst_node->root_cache;
1327 cache = search_cache_extent(src, 0);
1329 node = container_of(cache, struct ptr_node, cache);
1331 cache = next_cache_extent(cache);
1334 remove_cache_extent(src, &node->cache);
1337 ins = malloc(sizeof(*ins));
1339 ins->cache.start = node->cache.start;
1340 ins->cache.size = node->cache.size;
1344 ret = insert_cache_extent(dst, &ins->cache);
1345 if (ret == -EEXIST) {
1346 conflict = get_inode_rec(dst, rec->ino, 1);
1347 BUG_ON(IS_ERR(conflict));
1348 merge_inode_recs(rec, conflict, dst);
1350 conflict->checked = 1;
1351 if (dst_node->current == conflict)
1352 dst_node->current = NULL;
1354 maybe_free_inode_rec(dst, conflict);
1355 free_inode_rec(rec);
1362 if (src == &src_node->root_cache) {
1363 src = &src_node->inode_cache;
1364 dst = &dst_node->inode_cache;
1368 if (current_ino > 0 && (!dst_node->current ||
1369 current_ino > dst_node->current->ino)) {
1370 if (dst_node->current) {
1371 dst_node->current->checked = 1;
1372 maybe_free_inode_rec(dst, dst_node->current);
1374 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375 BUG_ON(IS_ERR(dst_node->current));
1380 static void free_inode_ptr(struct cache_extent *cache)
1382 struct ptr_node *node;
1383 struct inode_record *rec;
1385 node = container_of(cache, struct ptr_node, cache);
1387 free_inode_rec(rec);
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396 struct cache_extent *cache;
1397 struct shared_node *node;
1399 cache = lookup_cache_extent(shared, bytenr, 1);
1401 node = container_of(cache, struct shared_node, cache);
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 struct shared_node *node;
1412 node = calloc(1, sizeof(*node));
1415 node->cache.start = bytenr;
1416 node->cache.size = 1;
1417 cache_tree_init(&node->root_cache);
1418 cache_tree_init(&node->inode_cache);
1421 ret = insert_cache_extent(shared, &node->cache);
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427 struct walk_control *wc, int level)
1429 struct shared_node *node;
1430 struct shared_node *dest;
1433 if (level == wc->active_node)
1436 BUG_ON(wc->active_node <= level);
1437 node = find_shared_node(&wc->shared, bytenr);
1439 ret = add_shared_node(&wc->shared, bytenr, refs);
1441 node = find_shared_node(&wc->shared, bytenr);
1442 wc->nodes[level] = node;
1443 wc->active_node = level;
1447 if (wc->root_level == wc->active_node &&
1448 btrfs_root_refs(&root->root_item) == 0) {
1449 if (--node->refs == 0) {
1450 free_inode_recs_tree(&node->root_cache);
1451 free_inode_recs_tree(&node->inode_cache);
1452 remove_cache_extent(&wc->shared, &node->cache);
1458 dest = wc->nodes[wc->active_node];
1459 splice_shared_node(node, dest);
1460 if (node->refs == 0) {
1461 remove_cache_extent(&wc->shared, &node->cache);
1467 static int leave_shared_node(struct btrfs_root *root,
1468 struct walk_control *wc, int level)
1470 struct shared_node *node;
1471 struct shared_node *dest;
1474 if (level == wc->root_level)
1477 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481 BUG_ON(i >= BTRFS_MAX_LEVEL);
1483 node = wc->nodes[wc->active_node];
1484 wc->nodes[wc->active_node] = NULL;
1485 wc->active_node = i;
1487 dest = wc->nodes[wc->active_node];
1488 if (wc->active_node < wc->root_level ||
1489 btrfs_root_refs(&root->root_item) > 0) {
1490 BUG_ON(node->refs <= 1);
1491 splice_shared_node(node, dest);
1493 BUG_ON(node->refs < 2);
1502 * 1 - if the root with id child_root_id is a child of root parent_root_id
1503 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1504 * has other root(s) as parent(s)
1505 * 2 - if the root child_root_id doesn't have any parent roots
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510 struct btrfs_path path;
1511 struct btrfs_key key;
1512 struct extent_buffer *leaf;
1516 btrfs_init_path(&path);
1518 key.objectid = parent_root_id;
1519 key.type = BTRFS_ROOT_REF_KEY;
1520 key.offset = child_root_id;
1521 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525 btrfs_release_path(&path);
1529 key.objectid = child_root_id;
1530 key.type = BTRFS_ROOT_BACKREF_KEY;
1532 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1538 leaf = path.nodes[0];
1539 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543 leaf = path.nodes[0];
1546 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547 if (key.objectid != child_root_id ||
1548 key.type != BTRFS_ROOT_BACKREF_KEY)
1553 if (key.offset == parent_root_id) {
1554 btrfs_release_path(&path);
1561 btrfs_release_path(&path);
1564 return has_parent ? 0 : 2;
1567 static int process_dir_item(struct extent_buffer *eb,
1568 int slot, struct btrfs_key *key,
1569 struct shared_node *active_node)
1579 struct btrfs_dir_item *di;
1580 struct inode_record *rec;
1581 struct cache_tree *root_cache;
1582 struct cache_tree *inode_cache;
1583 struct btrfs_key location;
1584 char namebuf[BTRFS_NAME_LEN];
1586 root_cache = &active_node->root_cache;
1587 inode_cache = &active_node->inode_cache;
1588 rec = active_node->current;
1589 rec->found_dir_item = 1;
1591 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592 total = btrfs_item_size_nr(eb, slot);
1593 while (cur < total) {
1595 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596 name_len = btrfs_dir_name_len(eb, di);
1597 data_len = btrfs_dir_data_len(eb, di);
1598 filetype = btrfs_dir_type(eb, di);
1600 rec->found_size += name_len;
1601 if (cur + sizeof(*di) + name_len > total ||
1602 name_len > BTRFS_NAME_LEN) {
1603 error = REF_ERR_NAME_TOO_LONG;
1605 if (cur + sizeof(*di) > total)
1607 len = min_t(u32, total - cur - sizeof(*di),
1614 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1616 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617 key->offset != btrfs_name_hash(namebuf, len)) {
1618 rec->errors |= I_ERR_ODD_DIR_ITEM;
1619 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620 key->objectid, key->offset, namebuf, len, filetype,
1621 key->offset, btrfs_name_hash(namebuf, len));
1624 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625 add_inode_backref(inode_cache, location.objectid,
1626 key->objectid, key->offset, namebuf,
1627 len, filetype, key->type, error);
1628 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629 add_inode_backref(root_cache, location.objectid,
1630 key->objectid, key->offset,
1631 namebuf, len, filetype,
1634 fprintf(stderr, "invalid location in dir item %u\n",
1636 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637 key->objectid, key->offset, namebuf,
1638 len, filetype, key->type, error);
1641 len = sizeof(*di) + name_len + data_len;
1642 di = (struct btrfs_dir_item *)((char *)di + len);
1645 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646 rec->errors |= I_ERR_DUP_DIR_INDEX;
1651 static int process_inode_ref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1661 struct cache_tree *inode_cache;
1662 struct btrfs_inode_ref *ref;
1663 char namebuf[BTRFS_NAME_LEN];
1665 inode_cache = &active_node->inode_cache;
1667 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668 total = btrfs_item_size_nr(eb, slot);
1669 while (cur < total) {
1670 name_len = btrfs_inode_ref_name_len(eb, ref);
1671 index = btrfs_inode_ref_index(eb, ref);
1673 /* inode_ref + namelen should not cross item boundary */
1674 if (cur + sizeof(*ref) + name_len > total ||
1675 name_len > BTRFS_NAME_LEN) {
1676 if (total < cur + sizeof(*ref))
1679 /* Still try to read out the remaining part */
1680 len = min_t(u32, total - cur - sizeof(*ref),
1682 error = REF_ERR_NAME_TOO_LONG;
1688 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689 add_inode_backref(inode_cache, key->objectid, key->offset,
1690 index, namebuf, len, 0, key->type, error);
1692 len = sizeof(*ref) + name_len;
1693 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1699 static int process_inode_extref(struct extent_buffer *eb,
1700 int slot, struct btrfs_key *key,
1701 struct shared_node *active_node)
1710 struct cache_tree *inode_cache;
1711 struct btrfs_inode_extref *extref;
1712 char namebuf[BTRFS_NAME_LEN];
1714 inode_cache = &active_node->inode_cache;
1716 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717 total = btrfs_item_size_nr(eb, slot);
1718 while (cur < total) {
1719 name_len = btrfs_inode_extref_name_len(eb, extref);
1720 index = btrfs_inode_extref_index(eb, extref);
1721 parent = btrfs_inode_extref_parent(eb, extref);
1722 if (name_len <= BTRFS_NAME_LEN) {
1726 len = BTRFS_NAME_LEN;
1727 error = REF_ERR_NAME_TOO_LONG;
1729 read_extent_buffer(eb, namebuf,
1730 (unsigned long)(extref + 1), len);
1731 add_inode_backref(inode_cache, key->objectid, parent,
1732 index, namebuf, len, 0, key->type, error);
1734 len = sizeof(*extref) + name_len;
1735 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743 u64 len, u64 *found)
1745 struct btrfs_key key;
1746 struct btrfs_path path;
1747 struct extent_buffer *leaf;
1752 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1754 btrfs_init_path(&path);
1756 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1758 key.type = BTRFS_EXTENT_CSUM_KEY;
1760 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764 if (ret > 0 && path.slots[0] > 0) {
1765 leaf = path.nodes[0];
1766 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768 key.type == BTRFS_EXTENT_CSUM_KEY)
1773 leaf = path.nodes[0];
1774 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780 leaf = path.nodes[0];
1783 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785 key.type != BTRFS_EXTENT_CSUM_KEY)
1788 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789 if (key.offset >= start + len)
1792 if (key.offset > start)
1795 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796 csum_end = key.offset + (size / csum_size) *
1797 root->fs_info->sectorsize;
1798 if (csum_end > start) {
1799 size = min(csum_end - start, len);
1808 btrfs_release_path(&path);
1814 static int process_file_extent(struct btrfs_root *root,
1815 struct extent_buffer *eb,
1816 int slot, struct btrfs_key *key,
1817 struct shared_node *active_node)
1819 struct inode_record *rec;
1820 struct btrfs_file_extent_item *fi;
1822 u64 disk_bytenr = 0;
1823 u64 extent_offset = 0;
1824 u64 mask = root->fs_info->sectorsize - 1;
1828 rec = active_node->current;
1829 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830 rec->found_file_extent = 1;
1832 if (rec->extent_start == (u64)-1) {
1833 rec->extent_start = key->offset;
1834 rec->extent_end = key->offset;
1837 if (rec->extent_end > key->offset)
1838 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839 else if (rec->extent_end < key->offset) {
1840 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841 key->offset - rec->extent_end);
1846 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847 extent_type = btrfs_file_extent_type(eb, fi);
1849 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1852 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853 rec->found_size += num_bytes;
1854 num_bytes = (num_bytes + mask) & ~mask;
1855 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859 extent_offset = btrfs_file_extent_offset(eb, fi);
1860 if (num_bytes == 0 || (num_bytes & mask))
1861 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862 if (num_bytes + extent_offset >
1863 btrfs_file_extent_ram_bytes(eb, fi))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866 (btrfs_file_extent_compression(eb, fi) ||
1867 btrfs_file_extent_encryption(eb, fi) ||
1868 btrfs_file_extent_other_encoding(eb, fi)))
1869 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870 if (disk_bytenr > 0)
1871 rec->found_size += num_bytes;
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1875 rec->extent_end = key->offset + num_bytes;
1878 * The data reloc tree will copy full extents into its inode and then
1879 * copy the corresponding csums. Because the extent it copied could be
1880 * a preallocated extent that hasn't been written to yet there may be no
1881 * csums to copy, ergo we won't have csums for our file extent. This is
1882 * ok so just don't bother checking csums if the inode belongs to the
1885 if (disk_bytenr > 0 &&
1886 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1888 if (btrfs_file_extent_compression(eb, fi))
1889 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1891 disk_bytenr += extent_offset;
1893 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1898 rec->found_csum_item = 1;
1899 if (found < num_bytes)
1900 rec->some_csum_missing = 1;
1901 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1903 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910 struct walk_control *wc)
1912 struct btrfs_key key;
1916 struct cache_tree *inode_cache;
1917 struct shared_node *active_node;
1919 if (wc->root_level == wc->active_node &&
1920 btrfs_root_refs(&root->root_item) == 0)
1923 active_node = wc->nodes[wc->active_node];
1924 inode_cache = &active_node->inode_cache;
1925 nritems = btrfs_header_nritems(eb);
1926 for (i = 0; i < nritems; i++) {
1927 btrfs_item_key_to_cpu(eb, &key, i);
1929 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1931 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934 if (active_node->current == NULL ||
1935 active_node->current->ino < key.objectid) {
1936 if (active_node->current) {
1937 active_node->current->checked = 1;
1938 maybe_free_inode_rec(inode_cache,
1939 active_node->current);
1941 active_node->current = get_inode_rec(inode_cache,
1943 BUG_ON(IS_ERR(active_node->current));
1946 case BTRFS_DIR_ITEM_KEY:
1947 case BTRFS_DIR_INDEX_KEY:
1948 ret = process_dir_item(eb, i, &key, active_node);
1950 case BTRFS_INODE_REF_KEY:
1951 ret = process_inode_ref(eb, i, &key, active_node);
1953 case BTRFS_INODE_EXTREF_KEY:
1954 ret = process_inode_extref(eb, i, &key, active_node);
1956 case BTRFS_INODE_ITEM_KEY:
1957 ret = process_inode_item(eb, i, &key, active_node);
1959 case BTRFS_EXTENT_DATA_KEY:
1960 ret = process_file_extent(root, eb, i, &key,
1971 u64 bytenr[BTRFS_MAX_LEVEL];
1972 u64 refs[BTRFS_MAX_LEVEL];
1973 int need_check[BTRFS_MAX_LEVEL];
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977 struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979 unsigned int ext_ref);
1982 * Returns >0 Found error, not fatal, should continue
1983 * Returns <0 Fatal error, must exit the whole check
1984 * Returns 0 No errors found
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987 struct node_refs *nrefs, int *level, int ext_ref)
1989 struct extent_buffer *cur = path->nodes[0];
1990 struct btrfs_key key;
1994 int root_level = btrfs_header_level(root->node);
1996 int ret = 0; /* Final return value */
1997 int err = 0; /* Positive error bitmap */
1999 cur_bytenr = cur->start;
2001 /* skip to first inode item or the first inode number change */
2002 nritems = btrfs_header_nritems(cur);
2003 for (i = 0; i < nritems; i++) {
2004 btrfs_item_key_to_cpu(cur, &key, i);
2006 first_ino = key.objectid;
2007 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008 (first_ino && first_ino != key.objectid))
2012 path->slots[0] = nritems;
2018 err |= check_inode_item(root, path, ext_ref);
2020 /* modify cur since check_inode_item may change path */
2021 cur = path->nodes[0];
2023 if (err & LAST_ITEM)
2026 /* still have inode items in thie leaf */
2027 if (cur->start == cur_bytenr)
2031 * we have switched to another leaf, above nodes may
2032 * have changed, here walk down the path, if a node
2033 * or leaf is shared, check whether we can skip this
2036 for (i = root_level; i >= 0; i--) {
2037 if (path->nodes[i]->start == nrefs->bytenr[i])
2040 ret = update_nodes_refs(root,
2041 path->nodes[i]->start,
2046 if (!nrefs->need_check[i]) {
2052 for (i = 0; i < *level; i++) {
2053 free_extent_buffer(path->nodes[i]);
2054 path->nodes[i] = NULL;
2063 static void reada_walk_down(struct btrfs_root *root,
2064 struct extent_buffer *node, int slot)
2066 struct btrfs_fs_info *fs_info = root->fs_info;
2073 level = btrfs_header_level(node);
2077 nritems = btrfs_header_nritems(node);
2078 for (i = slot; i < nritems; i++) {
2079 bytenr = btrfs_node_blockptr(node, i);
2080 ptr_gen = btrfs_node_ptr_generation(node, i);
2081 readahead_tree_block(fs_info, bytenr, ptr_gen);
2086 * Check the child node/leaf by the following condition:
2087 * 1. the first item key of the node/leaf should be the same with the one
2089 * 2. block in parent node should match the child node/leaf.
2090 * 3. generation of parent node and child's header should be consistent.
2092 * Or the child node/leaf pointed by the key in parent is not valid.
2094 * We hope to check leaf owner too, but since subvol may share leaves,
2095 * which makes leaf owner check not so strong, key check should be
2096 * sufficient enough for that case.
2098 static int check_child_node(struct extent_buffer *parent, int slot,
2099 struct extent_buffer *child)
2101 struct btrfs_key parent_key;
2102 struct btrfs_key child_key;
2105 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2106 if (btrfs_header_level(child) == 0)
2107 btrfs_item_key_to_cpu(child, &child_key, 0);
2109 btrfs_node_key_to_cpu(child, &child_key, 0);
2111 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2115 parent_key.objectid, parent_key.type, parent_key.offset,
2116 child_key.objectid, child_key.type, child_key.offset);
2118 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2120 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2121 btrfs_node_blockptr(parent, slot),
2122 btrfs_header_bytenr(child));
2124 if (btrfs_node_ptr_generation(parent, slot) !=
2125 btrfs_header_generation(child)) {
2127 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2128 btrfs_header_generation(child),
2129 btrfs_node_ptr_generation(parent, slot));
2135 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2136 * in every fs or file tree check. Here we find its all root ids, and only check
2137 * it in the fs or file tree which has the smallest root id.
2139 static int need_check(struct btrfs_root *root, struct ulist *roots)
2141 struct rb_node *node;
2142 struct ulist_node *u;
2144 if (roots->nnodes == 1)
2147 node = rb_first(&roots->root);
2148 u = rb_entry(node, struct ulist_node, rb_node);
2150 * current root id is not smallest, we skip it and let it be checked
2151 * in the fs or file tree who hash the smallest root id.
2153 if (root->objectid != u->val)
2160 * for a tree node or leaf, we record its reference count, so later if we still
2161 * process this node or leaf, don't need to compute its reference count again.
2163 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2164 struct node_refs *nrefs, u64 level)
2168 struct ulist *roots;
2170 if (nrefs->bytenr[level] != bytenr) {
2171 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2172 level, 1, &refs, NULL);
2176 nrefs->bytenr[level] = bytenr;
2177 nrefs->refs[level] = refs;
2179 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2184 check = need_check(root, roots);
2186 nrefs->need_check[level] = check;
2188 nrefs->need_check[level] = 1;
2195 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2196 struct walk_control *wc, int *level,
2197 struct node_refs *nrefs)
2199 enum btrfs_tree_block_status status;
2202 struct btrfs_fs_info *fs_info = root->fs_info;
2203 struct extent_buffer *next;
2204 struct extent_buffer *cur;
2208 WARN_ON(*level < 0);
2209 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2211 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2212 refs = nrefs->refs[*level];
2215 ret = btrfs_lookup_extent_info(NULL, root,
2216 path->nodes[*level]->start,
2217 *level, 1, &refs, NULL);
2222 nrefs->bytenr[*level] = path->nodes[*level]->start;
2223 nrefs->refs[*level] = refs;
2227 ret = enter_shared_node(root, path->nodes[*level]->start,
2235 while (*level >= 0) {
2236 WARN_ON(*level < 0);
2237 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238 cur = path->nodes[*level];
2240 if (btrfs_header_level(cur) != *level)
2243 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246 ret = process_one_leaf(root, cur, wc);
2251 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2252 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2254 if (bytenr == nrefs->bytenr[*level - 1]) {
2255 refs = nrefs->refs[*level - 1];
2257 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2258 *level - 1, 1, &refs, NULL);
2262 nrefs->bytenr[*level - 1] = bytenr;
2263 nrefs->refs[*level - 1] = refs;
2268 ret = enter_shared_node(root, bytenr, refs,
2271 path->slots[*level]++;
2276 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2277 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2278 free_extent_buffer(next);
2279 reada_walk_down(root, cur, path->slots[*level]);
2280 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2281 if (!extent_buffer_uptodate(next)) {
2282 struct btrfs_key node_key;
2284 btrfs_node_key_to_cpu(path->nodes[*level],
2286 path->slots[*level]);
2287 btrfs_add_corrupt_extent_record(root->fs_info,
2289 path->nodes[*level]->start,
2290 root->fs_info->nodesize,
2297 ret = check_child_node(cur, path->slots[*level], next);
2299 free_extent_buffer(next);
2304 if (btrfs_is_leaf(next))
2305 status = btrfs_check_leaf(root, NULL, next);
2307 status = btrfs_check_node(root, NULL, next);
2308 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309 free_extent_buffer(next);
2314 *level = *level - 1;
2315 free_extent_buffer(path->nodes[*level]);
2316 path->nodes[*level] = next;
2317 path->slots[*level] = 0;
2320 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2324 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2325 unsigned int ext_ref);
2328 * Returns >0 Found error, should continue
2329 * Returns <0 Fatal error, must exit the whole check
2330 * Returns 0 No errors found
2332 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2333 int *level, struct node_refs *nrefs, int ext_ref)
2335 enum btrfs_tree_block_status status;
2338 struct btrfs_fs_info *fs_info = root->fs_info;
2339 struct extent_buffer *next;
2340 struct extent_buffer *cur;
2343 WARN_ON(*level < 0);
2344 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2346 ret = update_nodes_refs(root, path->nodes[*level]->start,
2351 while (*level >= 0) {
2352 WARN_ON(*level < 0);
2353 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2354 cur = path->nodes[*level];
2356 if (btrfs_header_level(cur) != *level)
2359 if (path->slots[*level] >= btrfs_header_nritems(cur))
2361 /* Don't forgot to check leaf/node validation */
2363 ret = btrfs_check_leaf(root, NULL, cur);
2364 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368 ret = process_one_leaf_v2(root, path, nrefs,
2370 cur = path->nodes[*level];
2373 ret = btrfs_check_node(root, NULL, cur);
2374 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2379 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2380 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2382 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385 if (!nrefs->need_check[*level - 1]) {
2386 path->slots[*level]++;
2390 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392 free_extent_buffer(next);
2393 reada_walk_down(root, cur, path->slots[*level]);
2394 next = read_tree_block(fs_info, bytenr, ptr_gen);
2395 if (!extent_buffer_uptodate(next)) {
2396 struct btrfs_key node_key;
2398 btrfs_node_key_to_cpu(path->nodes[*level],
2400 path->slots[*level]);
2401 btrfs_add_corrupt_extent_record(fs_info,
2403 path->nodes[*level]->start,
2411 ret = check_child_node(cur, path->slots[*level], next);
2415 if (btrfs_is_leaf(next))
2416 status = btrfs_check_leaf(root, NULL, next);
2418 status = btrfs_check_node(root, NULL, next);
2419 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2420 free_extent_buffer(next);
2425 *level = *level - 1;
2426 free_extent_buffer(path->nodes[*level]);
2427 path->nodes[*level] = next;
2428 path->slots[*level] = 0;
2433 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2434 struct walk_control *wc, int *level)
2437 struct extent_buffer *leaf;
2439 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2440 leaf = path->nodes[i];
2441 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2446 free_extent_buffer(path->nodes[*level]);
2447 path->nodes[*level] = NULL;
2448 BUG_ON(*level > wc->active_node);
2449 if (*level == wc->active_node)
2450 leave_shared_node(root, wc, *level);
2457 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2461 struct extent_buffer *leaf;
2463 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2464 leaf = path->nodes[i];
2465 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2470 free_extent_buffer(path->nodes[*level]);
2471 path->nodes[*level] = NULL;
2478 static int check_root_dir(struct inode_record *rec)
2480 struct inode_backref *backref;
2483 if (!rec->found_inode_item || rec->errors)
2485 if (rec->nlink != 1 || rec->found_link != 0)
2487 if (list_empty(&rec->backrefs))
2489 backref = to_inode_backref(rec->backrefs.next);
2490 if (!backref->found_inode_ref)
2492 if (backref->index != 0 || backref->namelen != 2 ||
2493 memcmp(backref->name, "..", 2))
2495 if (backref->found_dir_index || backref->found_dir_item)
2502 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2503 struct btrfs_root *root, struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct btrfs_inode_item *ei;
2507 struct btrfs_key key;
2510 key.objectid = rec->ino;
2511 key.type = BTRFS_INODE_ITEM_KEY;
2512 key.offset = (u64)-1;
2514 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2518 if (!path->slots[0]) {
2525 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2526 if (key.objectid != rec->ino) {
2531 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2532 struct btrfs_inode_item);
2533 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2534 btrfs_mark_buffer_dirty(path->nodes[0]);
2535 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2536 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2537 root->root_key.objectid);
2539 btrfs_release_path(path);
2543 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2544 struct btrfs_root *root,
2545 struct btrfs_path *path,
2546 struct inode_record *rec)
2550 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2551 btrfs_release_path(path);
2553 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2557 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2558 struct btrfs_root *root,
2559 struct btrfs_path *path,
2560 struct inode_record *rec)
2562 struct btrfs_inode_item *ei;
2563 struct btrfs_key key;
2566 key.objectid = rec->ino;
2567 key.type = BTRFS_INODE_ITEM_KEY;
2570 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2577 /* Since ret == 0, no need to check anything */
2578 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2579 struct btrfs_inode_item);
2580 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2581 btrfs_mark_buffer_dirty(path->nodes[0]);
2582 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2583 printf("reset nbytes for ino %llu root %llu\n",
2584 rec->ino, root->root_key.objectid);
2586 btrfs_release_path(path);
2590 static int add_missing_dir_index(struct btrfs_root *root,
2591 struct cache_tree *inode_cache,
2592 struct inode_record *rec,
2593 struct inode_backref *backref)
2595 struct btrfs_path path;
2596 struct btrfs_trans_handle *trans;
2597 struct btrfs_dir_item *dir_item;
2598 struct extent_buffer *leaf;
2599 struct btrfs_key key;
2600 struct btrfs_disk_key disk_key;
2601 struct inode_record *dir_rec;
2602 unsigned long name_ptr;
2603 u32 data_size = sizeof(*dir_item) + backref->namelen;
2606 trans = btrfs_start_transaction(root, 1);
2608 return PTR_ERR(trans);
2610 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2611 (unsigned long long)rec->ino);
2613 btrfs_init_path(&path);
2614 key.objectid = backref->dir;
2615 key.type = BTRFS_DIR_INDEX_KEY;
2616 key.offset = backref->index;
2617 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620 leaf = path.nodes[0];
2621 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2623 disk_key.objectid = cpu_to_le64(rec->ino);
2624 disk_key.type = BTRFS_INODE_ITEM_KEY;
2625 disk_key.offset = 0;
2627 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2628 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2629 btrfs_set_dir_data_len(leaf, dir_item, 0);
2630 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2631 name_ptr = (unsigned long)(dir_item + 1);
2632 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2633 btrfs_mark_buffer_dirty(leaf);
2634 btrfs_release_path(&path);
2635 btrfs_commit_transaction(trans, root);
2637 backref->found_dir_index = 1;
2638 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2639 BUG_ON(IS_ERR(dir_rec));
2642 dir_rec->found_size += backref->namelen;
2643 if (dir_rec->found_size == dir_rec->isize &&
2644 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2645 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2646 if (dir_rec->found_size != dir_rec->isize)
2647 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2652 static int delete_dir_index(struct btrfs_root *root,
2653 struct inode_backref *backref)
2655 struct btrfs_trans_handle *trans;
2656 struct btrfs_dir_item *di;
2657 struct btrfs_path path;
2660 trans = btrfs_start_transaction(root, 1);
2662 return PTR_ERR(trans);
2664 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2665 (unsigned long long)backref->dir,
2666 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2667 (unsigned long long)root->objectid);
2669 btrfs_init_path(&path);
2670 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2671 backref->name, backref->namelen,
2672 backref->index, -1);
2675 btrfs_release_path(&path);
2676 btrfs_commit_transaction(trans, root);
2683 ret = btrfs_del_item(trans, root, &path);
2685 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2687 btrfs_release_path(&path);
2688 btrfs_commit_transaction(trans, root);
2692 static int create_inode_item(struct btrfs_root *root,
2693 struct inode_record *rec,
2696 struct btrfs_trans_handle *trans;
2697 struct btrfs_inode_item inode_item;
2698 time_t now = time(NULL);
2701 trans = btrfs_start_transaction(root, 1);
2702 if (IS_ERR(trans)) {
2703 ret = PTR_ERR(trans);
2707 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2708 "be incomplete, please check permissions and content after "
2709 "the fsck completes.\n", (unsigned long long)root->objectid,
2710 (unsigned long long)rec->ino);
2712 memset(&inode_item, 0, sizeof(inode_item));
2713 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2715 btrfs_set_stack_inode_nlink(&inode_item, 1);
2717 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2718 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2719 if (rec->found_dir_item) {
2720 if (rec->found_file_extent)
2721 fprintf(stderr, "root %llu inode %llu has both a dir "
2722 "item and extents, unsure if it is a dir or a "
2723 "regular file so setting it as a directory\n",
2724 (unsigned long long)root->objectid,
2725 (unsigned long long)rec->ino);
2726 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2727 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2728 } else if (!rec->found_dir_item) {
2729 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2730 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2732 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2733 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2734 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2735 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2736 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2737 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2738 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2739 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2741 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2743 btrfs_commit_transaction(trans, root);
2747 static int repair_inode_backrefs(struct btrfs_root *root,
2748 struct inode_record *rec,
2749 struct cache_tree *inode_cache,
2752 struct inode_backref *tmp, *backref;
2753 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2757 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2758 if (!delete && rec->ino == root_dirid) {
2759 if (!rec->found_inode_item) {
2760 ret = create_inode_item(root, rec, 1);
2767 /* Index 0 for root dir's are special, don't mess with it */
2768 if (rec->ino == root_dirid && backref->index == 0)
2772 ((backref->found_dir_index && !backref->found_inode_ref) ||
2773 (backref->found_dir_index && backref->found_inode_ref &&
2774 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2775 ret = delete_dir_index(root, backref);
2779 list_del(&backref->list);
2784 if (!delete && !backref->found_dir_index &&
2785 backref->found_dir_item && backref->found_inode_ref) {
2786 ret = add_missing_dir_index(root, inode_cache, rec,
2791 if (backref->found_dir_item &&
2792 backref->found_dir_index) {
2793 if (!backref->errors &&
2794 backref->found_inode_ref) {
2795 list_del(&backref->list);
2802 if (!delete && (!backref->found_dir_index &&
2803 !backref->found_dir_item &&
2804 backref->found_inode_ref)) {
2805 struct btrfs_trans_handle *trans;
2806 struct btrfs_key location;
2808 ret = check_dir_conflict(root, backref->name,
2814 * let nlink fixing routine to handle it,
2815 * which can do it better.
2820 location.objectid = rec->ino;
2821 location.type = BTRFS_INODE_ITEM_KEY;
2822 location.offset = 0;
2824 trans = btrfs_start_transaction(root, 1);
2825 if (IS_ERR(trans)) {
2826 ret = PTR_ERR(trans);
2829 fprintf(stderr, "adding missing dir index/item pair "
2831 (unsigned long long)rec->ino);
2832 ret = btrfs_insert_dir_item(trans, root, backref->name,
2834 backref->dir, &location,
2835 imode_to_type(rec->imode),
2838 btrfs_commit_transaction(trans, root);
2842 if (!delete && (backref->found_inode_ref &&
2843 backref->found_dir_index &&
2844 backref->found_dir_item &&
2845 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2846 !rec->found_inode_item)) {
2847 ret = create_inode_item(root, rec, 0);
2854 return ret ? ret : repaired;
2858 * To determine the file type for nlink/inode_item repair
2860 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2861 * Return -ENOENT if file type is not found.
2863 static int find_file_type(struct inode_record *rec, u8 *type)
2865 struct inode_backref *backref;
2867 /* For inode item recovered case */
2868 if (rec->found_inode_item) {
2869 *type = imode_to_type(rec->imode);
2873 list_for_each_entry(backref, &rec->backrefs, list) {
2874 if (backref->found_dir_index || backref->found_dir_item) {
2875 *type = backref->filetype;
2883 * To determine the file name for nlink repair
2885 * Return 0 if file name is found, set name and namelen.
2886 * Return -ENOENT if file name is not found.
2888 static int find_file_name(struct inode_record *rec,
2889 char *name, int *namelen)
2891 struct inode_backref *backref;
2893 list_for_each_entry(backref, &rec->backrefs, list) {
2894 if (backref->found_dir_index || backref->found_dir_item ||
2895 backref->found_inode_ref) {
2896 memcpy(name, backref->name, backref->namelen);
2897 *namelen = backref->namelen;
2904 /* Reset the nlink of the inode to the correct one */
2905 static int reset_nlink(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct inode_backref *backref;
2911 struct inode_backref *tmp;
2912 struct btrfs_key key;
2913 struct btrfs_inode_item *inode_item;
2916 /* We don't believe this either, reset it and iterate backref */
2917 rec->found_link = 0;
2919 /* Remove all backref including the valid ones */
2920 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2921 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2922 backref->index, backref->name,
2923 backref->namelen, 0);
2927 /* remove invalid backref, so it won't be added back */
2928 if (!(backref->found_dir_index &&
2929 backref->found_dir_item &&
2930 backref->found_inode_ref)) {
2931 list_del(&backref->list);
2938 /* Set nlink to 0 */
2939 key.objectid = rec->ino;
2940 key.type = BTRFS_INODE_ITEM_KEY;
2942 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2949 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2950 struct btrfs_inode_item);
2951 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2952 btrfs_mark_buffer_dirty(path->nodes[0]);
2953 btrfs_release_path(path);
2956 * Add back valid inode_ref/dir_item/dir_index,
2957 * add_link() will handle the nlink inc, so new nlink must be correct
2959 list_for_each_entry(backref, &rec->backrefs, list) {
2960 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2961 backref->name, backref->namelen,
2962 backref->filetype, &backref->index, 1);
2967 btrfs_release_path(path);
2971 static int get_highest_inode(struct btrfs_trans_handle *trans,
2972 struct btrfs_root *root,
2973 struct btrfs_path *path,
2976 struct btrfs_key key, found_key;
2979 btrfs_init_path(path);
2980 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2982 key.type = BTRFS_INODE_ITEM_KEY;
2983 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2985 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2986 path->slots[0] - 1);
2987 *highest_ino = found_key.objectid;
2990 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2992 btrfs_release_path(path);
2996 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2997 struct btrfs_root *root,
2998 struct btrfs_path *path,
2999 struct inode_record *rec)
3001 char *dir_name = "lost+found";
3002 char namebuf[BTRFS_NAME_LEN] = {0};
3007 int name_recovered = 0;
3008 int type_recovered = 0;
3012 * Get file name and type first before these invalid inode ref
3013 * are deleted by remove_all_invalid_backref()
3015 name_recovered = !find_file_name(rec, namebuf, &namelen);
3016 type_recovered = !find_file_type(rec, &type);
3018 if (!name_recovered) {
3019 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3020 rec->ino, rec->ino);
3021 namelen = count_digits(rec->ino);
3022 sprintf(namebuf, "%llu", rec->ino);
3025 if (!type_recovered) {
3026 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3028 type = BTRFS_FT_REG_FILE;
3032 ret = reset_nlink(trans, root, path, rec);
3035 "Failed to reset nlink for inode %llu: %s\n",
3036 rec->ino, strerror(-ret));
3040 if (rec->found_link == 0) {
3041 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3045 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3046 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3049 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3050 dir_name, strerror(-ret));
3053 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3054 namebuf, namelen, type, NULL, 1);
3056 * Add ".INO" suffix several times to handle case where
3057 * "FILENAME.INO" is already taken by another file.
3059 while (ret == -EEXIST) {
3061 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3063 if (namelen + count_digits(rec->ino) + 1 >
3068 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3070 namelen += count_digits(rec->ino) + 1;
3071 ret = btrfs_add_link(trans, root, rec->ino,
3072 lost_found_ino, namebuf,
3073 namelen, type, NULL, 1);
3077 "Failed to link the inode %llu to %s dir: %s\n",
3078 rec->ino, dir_name, strerror(-ret));
3082 * Just increase the found_link, don't actually add the
3083 * backref. This will make things easier and this inode
3084 * record will be freed after the repair is done.
3085 * So fsck will not report problem about this inode.
3088 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3089 namelen, namebuf, dir_name);
3091 printf("Fixed the nlink of inode %llu\n", rec->ino);
3094 * Clear the flag anyway, or we will loop forever for the same inode
3095 * as it will not be removed from the bad inode list and the dead loop
3098 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3099 btrfs_release_path(path);
3104 * Check if there is any normal(reg or prealloc) file extent for given
3106 * This is used to determine the file type when neither its dir_index/item or
3107 * inode_item exists.
3109 * This will *NOT* report error, if any error happens, just consider it does
3110 * not have any normal file extent.
3112 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3114 struct btrfs_path path;
3115 struct btrfs_key key;
3116 struct btrfs_key found_key;
3117 struct btrfs_file_extent_item *fi;
3121 btrfs_init_path(&path);
3123 key.type = BTRFS_EXTENT_DATA_KEY;
3126 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3131 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3132 ret = btrfs_next_leaf(root, &path);
3139 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3141 if (found_key.objectid != ino ||
3142 found_key.type != BTRFS_EXTENT_DATA_KEY)
3144 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3145 struct btrfs_file_extent_item);
3146 type = btrfs_file_extent_type(path.nodes[0], fi);
3147 if (type != BTRFS_FILE_EXTENT_INLINE) {
3153 btrfs_release_path(&path);
3157 static u32 btrfs_type_to_imode(u8 type)
3159 static u32 imode_by_btrfs_type[] = {
3160 [BTRFS_FT_REG_FILE] = S_IFREG,
3161 [BTRFS_FT_DIR] = S_IFDIR,
3162 [BTRFS_FT_CHRDEV] = S_IFCHR,
3163 [BTRFS_FT_BLKDEV] = S_IFBLK,
3164 [BTRFS_FT_FIFO] = S_IFIFO,
3165 [BTRFS_FT_SOCK] = S_IFSOCK,
3166 [BTRFS_FT_SYMLINK] = S_IFLNK,
3169 return imode_by_btrfs_type[(type)];
3172 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct inode_record *rec)
3179 int type_recovered = 0;
3182 printf("Trying to rebuild inode:%llu\n", rec->ino);
3184 type_recovered = !find_file_type(rec, &filetype);
3187 * Try to determine inode type if type not found.
3189 * For found regular file extent, it must be FILE.
3190 * For found dir_item/index, it must be DIR.
3192 * For undetermined one, use FILE as fallback.
3195 * 1. If found backref(inode_index/item is already handled) to it,
3197 * Need new inode-inode ref structure to allow search for that.
3199 if (!type_recovered) {
3200 if (rec->found_file_extent &&
3201 find_normal_file_extent(root, rec->ino)) {
3203 filetype = BTRFS_FT_REG_FILE;
3204 } else if (rec->found_dir_item) {
3206 filetype = BTRFS_FT_DIR;
3207 } else if (!list_empty(&rec->orphan_extents)) {
3209 filetype = BTRFS_FT_REG_FILE;
3211 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3214 filetype = BTRFS_FT_REG_FILE;
3218 ret = btrfs_new_inode(trans, root, rec->ino,
3219 mode | btrfs_type_to_imode(filetype));
3224 * Here inode rebuild is done, we only rebuild the inode item,
3225 * don't repair the nlink(like move to lost+found).
3226 * That is the job of nlink repair.
3228 * We just fill the record and return
3230 rec->found_dir_item = 1;
3231 rec->imode = mode | btrfs_type_to_imode(filetype);
3233 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3234 /* Ensure the inode_nlinks repair function will be called */
3235 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3240 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3241 struct btrfs_root *root,
3242 struct btrfs_path *path,
3243 struct inode_record *rec)
3245 struct orphan_data_extent *orphan;
3246 struct orphan_data_extent *tmp;
3249 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3251 * Check for conflicting file extents
3253 * Here we don't know whether the extents is compressed or not,
3254 * so we can only assume it not compressed nor data offset,
3255 * and use its disk_len as extent length.
3257 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3258 orphan->offset, orphan->disk_len, 0);
3259 btrfs_release_path(path);
3264 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3265 orphan->disk_bytenr, orphan->disk_len);
3266 ret = btrfs_free_extent(trans,
3267 root->fs_info->extent_root,
3268 orphan->disk_bytenr, orphan->disk_len,
3269 0, root->objectid, orphan->objectid,
3274 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3275 orphan->offset, orphan->disk_bytenr,
3276 orphan->disk_len, orphan->disk_len);
3280 /* Update file size info */
3281 rec->found_size += orphan->disk_len;
3282 if (rec->found_size == rec->nbytes)
3283 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3285 /* Update the file extent hole info too */
3286 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3290 if (RB_EMPTY_ROOT(&rec->holes))
3291 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3293 list_del(&orphan->list);
3296 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3301 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3302 struct btrfs_root *root,
3303 struct btrfs_path *path,
3304 struct inode_record *rec)
3306 struct rb_node *node;
3307 struct file_extent_hole *hole;
3311 node = rb_first(&rec->holes);
3315 hole = rb_entry(node, struct file_extent_hole, node);
3316 ret = btrfs_punch_hole(trans, root, rec->ino,
3317 hole->start, hole->len);
3320 ret = del_file_extent_hole(&rec->holes, hole->start,
3324 if (RB_EMPTY_ROOT(&rec->holes))
3325 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3326 node = rb_first(&rec->holes);
3328 /* special case for a file losing all its file extent */
3330 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3331 round_up(rec->isize,
3332 root->fs_info->sectorsize));
3336 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3337 rec->ino, root->objectid);
3342 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3344 struct btrfs_trans_handle *trans;
3345 struct btrfs_path path;
3348 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3349 I_ERR_NO_ORPHAN_ITEM |
3350 I_ERR_LINK_COUNT_WRONG |
3351 I_ERR_NO_INODE_ITEM |
3352 I_ERR_FILE_EXTENT_ORPHAN |
3353 I_ERR_FILE_EXTENT_DISCOUNT|
3354 I_ERR_FILE_NBYTES_WRONG)))
3358 * For nlink repair, it may create a dir and add link, so
3359 * 2 for parent(256)'s dir_index and dir_item
3360 * 2 for lost+found dir's inode_item and inode_ref
3361 * 1 for the new inode_ref of the file
3362 * 2 for lost+found dir's dir_index and dir_item for the file
3364 trans = btrfs_start_transaction(root, 7);
3366 return PTR_ERR(trans);
3368 btrfs_init_path(&path);
3369 if (rec->errors & I_ERR_NO_INODE_ITEM)
3370 ret = repair_inode_no_item(trans, root, &path, rec);
3371 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3372 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3373 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3374 ret = repair_inode_discount_extent(trans, root, &path, rec);
3375 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3376 ret = repair_inode_isize(trans, root, &path, rec);
3377 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3378 ret = repair_inode_orphan_item(trans, root, &path, rec);
3379 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3380 ret = repair_inode_nlinks(trans, root, &path, rec);
3381 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3382 ret = repair_inode_nbytes(trans, root, &path, rec);
3383 btrfs_commit_transaction(trans, root);
3384 btrfs_release_path(&path);
3388 static int check_inode_recs(struct btrfs_root *root,
3389 struct cache_tree *inode_cache)
3391 struct cache_extent *cache;
3392 struct ptr_node *node;
3393 struct inode_record *rec;
3394 struct inode_backref *backref;
3399 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3401 if (btrfs_root_refs(&root->root_item) == 0) {
3402 if (!cache_tree_empty(inode_cache))
3403 fprintf(stderr, "warning line %d\n", __LINE__);
3408 * We need to repair backrefs first because we could change some of the
3409 * errors in the inode recs.
3411 * We also need to go through and delete invalid backrefs first and then
3412 * add the correct ones second. We do this because we may get EEXIST
3413 * when adding back the correct index because we hadn't yet deleted the
3416 * For example, if we were missing a dir index then the directories
3417 * isize would be wrong, so if we fixed the isize to what we thought it
3418 * would be and then fixed the backref we'd still have a invalid fs, so
3419 * we need to add back the dir index and then check to see if the isize
3424 if (stage == 3 && !err)
3427 cache = search_cache_extent(inode_cache, 0);
3428 while (repair && cache) {
3429 node = container_of(cache, struct ptr_node, cache);
3431 cache = next_cache_extent(cache);
3433 /* Need to free everything up and rescan */
3435 remove_cache_extent(inode_cache, &node->cache);
3437 free_inode_rec(rec);
3441 if (list_empty(&rec->backrefs))
3444 ret = repair_inode_backrefs(root, rec, inode_cache,
3458 rec = get_inode_rec(inode_cache, root_dirid, 0);
3459 BUG_ON(IS_ERR(rec));
3461 ret = check_root_dir(rec);
3463 fprintf(stderr, "root %llu root dir %llu error\n",
3464 (unsigned long long)root->root_key.objectid,
3465 (unsigned long long)root_dirid);
3466 print_inode_error(root, rec);
3471 struct btrfs_trans_handle *trans;
3473 trans = btrfs_start_transaction(root, 1);
3474 if (IS_ERR(trans)) {
3475 err = PTR_ERR(trans);
3480 "root %llu missing its root dir, recreating\n",
3481 (unsigned long long)root->objectid);
3483 ret = btrfs_make_root_dir(trans, root, root_dirid);
3486 btrfs_commit_transaction(trans, root);
3490 fprintf(stderr, "root %llu root dir %llu not found\n",
3491 (unsigned long long)root->root_key.objectid,
3492 (unsigned long long)root_dirid);
3496 cache = search_cache_extent(inode_cache, 0);
3499 node = container_of(cache, struct ptr_node, cache);
3501 remove_cache_extent(inode_cache, &node->cache);
3503 if (rec->ino == root_dirid ||
3504 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3505 free_inode_rec(rec);
3509 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3510 ret = check_orphan_item(root, rec->ino);
3512 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3513 if (can_free_inode_rec(rec)) {
3514 free_inode_rec(rec);
3519 if (!rec->found_inode_item)
3520 rec->errors |= I_ERR_NO_INODE_ITEM;
3521 if (rec->found_link != rec->nlink)
3522 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3524 ret = try_repair_inode(root, rec);
3525 if (ret == 0 && can_free_inode_rec(rec)) {
3526 free_inode_rec(rec);
3532 if (!(repair && ret == 0))
3534 print_inode_error(root, rec);
3535 list_for_each_entry(backref, &rec->backrefs, list) {
3536 if (!backref->found_dir_item)
3537 backref->errors |= REF_ERR_NO_DIR_ITEM;
3538 if (!backref->found_dir_index)
3539 backref->errors |= REF_ERR_NO_DIR_INDEX;
3540 if (!backref->found_inode_ref)
3541 backref->errors |= REF_ERR_NO_INODE_REF;
3542 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3543 " namelen %u name %s filetype %d errors %x",
3544 (unsigned long long)backref->dir,
3545 (unsigned long long)backref->index,
3546 backref->namelen, backref->name,
3547 backref->filetype, backref->errors);
3548 print_ref_error(backref->errors);
3550 free_inode_rec(rec);
3552 return (error > 0) ? -1 : 0;
3555 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3558 struct cache_extent *cache;
3559 struct root_record *rec = NULL;
3562 cache = lookup_cache_extent(root_cache, objectid, 1);
3564 rec = container_of(cache, struct root_record, cache);
3566 rec = calloc(1, sizeof(*rec));
3568 return ERR_PTR(-ENOMEM);
3569 rec->objectid = objectid;
3570 INIT_LIST_HEAD(&rec->backrefs);
3571 rec->cache.start = objectid;
3572 rec->cache.size = 1;
3574 ret = insert_cache_extent(root_cache, &rec->cache);
3576 return ERR_PTR(-EEXIST);
3581 static struct root_backref *get_root_backref(struct root_record *rec,
3582 u64 ref_root, u64 dir, u64 index,
3583 const char *name, int namelen)
3585 struct root_backref *backref;
3587 list_for_each_entry(backref, &rec->backrefs, list) {
3588 if (backref->ref_root != ref_root || backref->dir != dir ||
3589 backref->namelen != namelen)
3591 if (memcmp(name, backref->name, namelen))
3596 backref = calloc(1, sizeof(*backref) + namelen + 1);
3599 backref->ref_root = ref_root;
3601 backref->index = index;
3602 backref->namelen = namelen;
3603 memcpy(backref->name, name, namelen);
3604 backref->name[namelen] = '\0';
3605 list_add_tail(&backref->list, &rec->backrefs);
3609 static void free_root_record(struct cache_extent *cache)
3611 struct root_record *rec;
3612 struct root_backref *backref;
3614 rec = container_of(cache, struct root_record, cache);
3615 while (!list_empty(&rec->backrefs)) {
3616 backref = to_root_backref(rec->backrefs.next);
3617 list_del(&backref->list);
3624 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3626 static int add_root_backref(struct cache_tree *root_cache,
3627 u64 root_id, u64 ref_root, u64 dir, u64 index,
3628 const char *name, int namelen,
3629 int item_type, int errors)
3631 struct root_record *rec;
3632 struct root_backref *backref;
3634 rec = get_root_rec(root_cache, root_id);
3635 BUG_ON(IS_ERR(rec));
3636 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3639 backref->errors |= errors;
3641 if (item_type != BTRFS_DIR_ITEM_KEY) {
3642 if (backref->found_dir_index || backref->found_back_ref ||
3643 backref->found_forward_ref) {
3644 if (backref->index != index)
3645 backref->errors |= REF_ERR_INDEX_UNMATCH;
3647 backref->index = index;
3651 if (item_type == BTRFS_DIR_ITEM_KEY) {
3652 if (backref->found_forward_ref)
3654 backref->found_dir_item = 1;
3655 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3656 backref->found_dir_index = 1;
3657 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3658 if (backref->found_forward_ref)
3659 backref->errors |= REF_ERR_DUP_ROOT_REF;
3660 else if (backref->found_dir_item)
3662 backref->found_forward_ref = 1;
3663 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3664 if (backref->found_back_ref)
3665 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3666 backref->found_back_ref = 1;
3671 if (backref->found_forward_ref && backref->found_dir_item)
3672 backref->reachable = 1;
3676 static int merge_root_recs(struct btrfs_root *root,
3677 struct cache_tree *src_cache,
3678 struct cache_tree *dst_cache)
3680 struct cache_extent *cache;
3681 struct ptr_node *node;
3682 struct inode_record *rec;
3683 struct inode_backref *backref;
3686 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3687 free_inode_recs_tree(src_cache);
3692 cache = search_cache_extent(src_cache, 0);
3695 node = container_of(cache, struct ptr_node, cache);
3697 remove_cache_extent(src_cache, &node->cache);
3700 ret = is_child_root(root, root->objectid, rec->ino);
3706 list_for_each_entry(backref, &rec->backrefs, list) {
3707 BUG_ON(backref->found_inode_ref);
3708 if (backref->found_dir_item)
3709 add_root_backref(dst_cache, rec->ino,
3710 root->root_key.objectid, backref->dir,
3711 backref->index, backref->name,
3712 backref->namelen, BTRFS_DIR_ITEM_KEY,
3714 if (backref->found_dir_index)
3715 add_root_backref(dst_cache, rec->ino,
3716 root->root_key.objectid, backref->dir,
3717 backref->index, backref->name,
3718 backref->namelen, BTRFS_DIR_INDEX_KEY,
3722 free_inode_rec(rec);
3729 static int check_root_refs(struct btrfs_root *root,
3730 struct cache_tree *root_cache)
3732 struct root_record *rec;
3733 struct root_record *ref_root;
3734 struct root_backref *backref;
3735 struct cache_extent *cache;
3741 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3742 BUG_ON(IS_ERR(rec));
3745 /* fixme: this can not detect circular references */
3748 cache = search_cache_extent(root_cache, 0);
3752 rec = container_of(cache, struct root_record, cache);
3753 cache = next_cache_extent(cache);
3755 if (rec->found_ref == 0)
3758 list_for_each_entry(backref, &rec->backrefs, list) {
3759 if (!backref->reachable)
3762 ref_root = get_root_rec(root_cache,
3764 BUG_ON(IS_ERR(ref_root));
3765 if (ref_root->found_ref > 0)
3768 backref->reachable = 0;
3770 if (rec->found_ref == 0)
3776 cache = search_cache_extent(root_cache, 0);
3780 rec = container_of(cache, struct root_record, cache);
3781 cache = next_cache_extent(cache);
3783 if (rec->found_ref == 0 &&
3784 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3785 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3786 ret = check_orphan_item(root->fs_info->tree_root,
3792 * If we don't have a root item then we likely just have
3793 * a dir item in a snapshot for this root but no actual
3794 * ref key or anything so it's meaningless.
3796 if (!rec->found_root_item)
3799 fprintf(stderr, "fs tree %llu not referenced\n",
3800 (unsigned long long)rec->objectid);
3804 if (rec->found_ref > 0 && !rec->found_root_item)
3806 list_for_each_entry(backref, &rec->backrefs, list) {
3807 if (!backref->found_dir_item)
3808 backref->errors |= REF_ERR_NO_DIR_ITEM;
3809 if (!backref->found_dir_index)
3810 backref->errors |= REF_ERR_NO_DIR_INDEX;
3811 if (!backref->found_back_ref)
3812 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3813 if (!backref->found_forward_ref)
3814 backref->errors |= REF_ERR_NO_ROOT_REF;
3815 if (backref->reachable && backref->errors)
3822 fprintf(stderr, "fs tree %llu refs %u %s\n",
3823 (unsigned long long)rec->objectid, rec->found_ref,
3824 rec->found_root_item ? "" : "not found");
3826 list_for_each_entry(backref, &rec->backrefs, list) {
3827 if (!backref->reachable)
3829 if (!backref->errors && rec->found_root_item)
3831 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3832 " index %llu namelen %u name %s errors %x\n",
3833 (unsigned long long)backref->ref_root,
3834 (unsigned long long)backref->dir,
3835 (unsigned long long)backref->index,
3836 backref->namelen, backref->name,
3838 print_ref_error(backref->errors);
3841 return errors > 0 ? 1 : 0;
3844 static int process_root_ref(struct extent_buffer *eb, int slot,
3845 struct btrfs_key *key,
3846 struct cache_tree *root_cache)
3852 struct btrfs_root_ref *ref;
3853 char namebuf[BTRFS_NAME_LEN];
3856 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3858 dirid = btrfs_root_ref_dirid(eb, ref);
3859 index = btrfs_root_ref_sequence(eb, ref);
3860 name_len = btrfs_root_ref_name_len(eb, ref);
3862 if (name_len <= BTRFS_NAME_LEN) {
3866 len = BTRFS_NAME_LEN;
3867 error = REF_ERR_NAME_TOO_LONG;
3869 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3871 if (key->type == BTRFS_ROOT_REF_KEY) {
3872 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3873 index, namebuf, len, key->type, error);
3875 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3876 index, namebuf, len, key->type, error);
3881 static void free_corrupt_block(struct cache_extent *cache)
3883 struct btrfs_corrupt_block *corrupt;
3885 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3889 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3892 * Repair the btree of the given root.
3894 * The fix is to remove the node key in corrupt_blocks cache_tree.
3895 * and rebalance the tree.
3896 * After the fix, the btree should be writeable.
3898 static int repair_btree(struct btrfs_root *root,
3899 struct cache_tree *corrupt_blocks)
3901 struct btrfs_trans_handle *trans;
3902 struct btrfs_path path;
3903 struct btrfs_corrupt_block *corrupt;
3904 struct cache_extent *cache;
3905 struct btrfs_key key;
3910 if (cache_tree_empty(corrupt_blocks))
3913 trans = btrfs_start_transaction(root, 1);
3914 if (IS_ERR(trans)) {
3915 ret = PTR_ERR(trans);
3916 fprintf(stderr, "Error starting transaction: %s\n",
3920 btrfs_init_path(&path);
3921 cache = first_cache_extent(corrupt_blocks);
3923 corrupt = container_of(cache, struct btrfs_corrupt_block,
3925 level = corrupt->level;
3926 path.lowest_level = level;
3927 key.objectid = corrupt->key.objectid;
3928 key.type = corrupt->key.type;
3929 key.offset = corrupt->key.offset;
3932 * Here we don't want to do any tree balance, since it may
3933 * cause a balance with corrupted brother leaf/node,
3934 * so ins_len set to 0 here.
3935 * Balance will be done after all corrupt node/leaf is deleted.
3937 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3940 offset = btrfs_node_blockptr(path.nodes[level],
3943 /* Remove the ptr */
3944 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3948 * Remove the corresponding extent
3949 * return value is not concerned.
3951 btrfs_release_path(&path);
3952 ret = btrfs_free_extent(trans, root, offset,
3953 root->fs_info->nodesize, 0,
3954 root->root_key.objectid, level - 1, 0);
3955 cache = next_cache_extent(cache);
3958 /* Balance the btree using btrfs_search_slot() */
3959 cache = first_cache_extent(corrupt_blocks);
3961 corrupt = container_of(cache, struct btrfs_corrupt_block,
3963 memcpy(&key, &corrupt->key, sizeof(key));
3964 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3967 /* return will always >0 since it won't find the item */
3969 btrfs_release_path(&path);
3970 cache = next_cache_extent(cache);
3973 btrfs_commit_transaction(trans, root);
3974 btrfs_release_path(&path);
3978 static int check_fs_root(struct btrfs_root *root,
3979 struct cache_tree *root_cache,
3980 struct walk_control *wc)
3986 struct btrfs_path path;
3987 struct shared_node root_node;
3988 struct root_record *rec;
3989 struct btrfs_root_item *root_item = &root->root_item;
3990 struct cache_tree corrupt_blocks;
3991 struct orphan_data_extent *orphan;
3992 struct orphan_data_extent *tmp;
3993 enum btrfs_tree_block_status status;
3994 struct node_refs nrefs;
3997 * Reuse the corrupt_block cache tree to record corrupted tree block
3999 * Unlike the usage in extent tree check, here we do it in a per
4000 * fs/subvol tree base.
4002 cache_tree_init(&corrupt_blocks);
4003 root->fs_info->corrupt_blocks = &corrupt_blocks;
4005 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4006 rec = get_root_rec(root_cache, root->root_key.objectid);
4007 BUG_ON(IS_ERR(rec));
4008 if (btrfs_root_refs(root_item) > 0)
4009 rec->found_root_item = 1;
4012 btrfs_init_path(&path);
4013 memset(&root_node, 0, sizeof(root_node));
4014 cache_tree_init(&root_node.root_cache);
4015 cache_tree_init(&root_node.inode_cache);
4016 memset(&nrefs, 0, sizeof(nrefs));
4018 /* Move the orphan extent record to corresponding inode_record */
4019 list_for_each_entry_safe(orphan, tmp,
4020 &root->orphan_data_extents, list) {
4021 struct inode_record *inode;
4023 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4025 BUG_ON(IS_ERR(inode));
4026 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4027 list_move(&orphan->list, &inode->orphan_extents);
4030 level = btrfs_header_level(root->node);
4031 memset(wc->nodes, 0, sizeof(wc->nodes));
4032 wc->nodes[level] = &root_node;
4033 wc->active_node = level;
4034 wc->root_level = level;
4036 /* We may not have checked the root block, lets do that now */
4037 if (btrfs_is_leaf(root->node))
4038 status = btrfs_check_leaf(root, NULL, root->node);
4040 status = btrfs_check_node(root, NULL, root->node);
4041 if (status != BTRFS_TREE_BLOCK_CLEAN)
4044 if (btrfs_root_refs(root_item) > 0 ||
4045 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4046 path.nodes[level] = root->node;
4047 extent_buffer_get(root->node);
4048 path.slots[level] = 0;
4050 struct btrfs_key key;
4051 struct btrfs_disk_key found_key;
4053 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4054 level = root_item->drop_level;
4055 path.lowest_level = level;
4056 if (level > btrfs_header_level(root->node) ||
4057 level >= BTRFS_MAX_LEVEL) {
4058 error("ignoring invalid drop level: %u", level);
4061 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4064 btrfs_node_key(path.nodes[level], &found_key,
4066 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4067 sizeof(found_key)));
4071 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4077 wret = walk_up_tree(root, &path, wc, &level);
4084 btrfs_release_path(&path);
4086 if (!cache_tree_empty(&corrupt_blocks)) {
4087 struct cache_extent *cache;
4088 struct btrfs_corrupt_block *corrupt;
4090 printf("The following tree block(s) is corrupted in tree %llu:\n",
4091 root->root_key.objectid);
4092 cache = first_cache_extent(&corrupt_blocks);
4094 corrupt = container_of(cache,
4095 struct btrfs_corrupt_block,
4097 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4098 cache->start, corrupt->level,
4099 corrupt->key.objectid, corrupt->key.type,
4100 corrupt->key.offset);
4101 cache = next_cache_extent(cache);
4104 printf("Try to repair the btree for root %llu\n",
4105 root->root_key.objectid);
4106 ret = repair_btree(root, &corrupt_blocks);
4108 fprintf(stderr, "Failed to repair btree: %s\n",
4111 printf("Btree for root %llu is fixed\n",
4112 root->root_key.objectid);
4116 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4120 if (root_node.current) {
4121 root_node.current->checked = 1;
4122 maybe_free_inode_rec(&root_node.inode_cache,
4126 err = check_inode_recs(root, &root_node.inode_cache);
4130 free_corrupt_blocks_tree(&corrupt_blocks);
4131 root->fs_info->corrupt_blocks = NULL;
4132 free_orphan_data_extents(&root->orphan_data_extents);
4136 static int fs_root_objectid(u64 objectid)
4138 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4139 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4141 return is_fstree(objectid);
4144 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4145 struct cache_tree *root_cache)
4147 struct btrfs_path path;
4148 struct btrfs_key key;
4149 struct walk_control wc;
4150 struct extent_buffer *leaf, *tree_node;
4151 struct btrfs_root *tmp_root;
4152 struct btrfs_root *tree_root = fs_info->tree_root;
4156 if (ctx.progress_enabled) {
4157 ctx.tp = TASK_FS_ROOTS;
4158 task_start(ctx.info);
4162 * Just in case we made any changes to the extent tree that weren't
4163 * reflected into the free space cache yet.
4166 reset_cached_block_groups(fs_info);
4167 memset(&wc, 0, sizeof(wc));
4168 cache_tree_init(&wc.shared);
4169 btrfs_init_path(&path);
4174 key.type = BTRFS_ROOT_ITEM_KEY;
4175 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4180 tree_node = tree_root->node;
4182 if (tree_node != tree_root->node) {
4183 free_root_recs_tree(root_cache);
4184 btrfs_release_path(&path);
4187 leaf = path.nodes[0];
4188 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4189 ret = btrfs_next_leaf(tree_root, &path);
4195 leaf = path.nodes[0];
4197 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4198 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4199 fs_root_objectid(key.objectid)) {
4200 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4201 tmp_root = btrfs_read_fs_root_no_cache(
4204 key.offset = (u64)-1;
4205 tmp_root = btrfs_read_fs_root(
4208 if (IS_ERR(tmp_root)) {
4212 ret = check_fs_root(tmp_root, root_cache, &wc);
4213 if (ret == -EAGAIN) {
4214 free_root_recs_tree(root_cache);
4215 btrfs_release_path(&path);
4220 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4221 btrfs_free_fs_root(tmp_root);
4222 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4223 key.type == BTRFS_ROOT_BACKREF_KEY) {
4224 process_root_ref(leaf, path.slots[0], &key,
4231 btrfs_release_path(&path);
4233 free_extent_cache_tree(&wc.shared);
4234 if (!cache_tree_empty(&wc.shared))
4235 fprintf(stderr, "warning line %d\n", __LINE__);
4237 task_stop(ctx.info);
4243 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4244 * INODE_REF/INODE_EXTREF match.
4246 * @root: the root of the fs/file tree
4247 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4248 * @key: the key of the DIR_ITEM/DIR_INDEX
4249 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4250 * distinguish root_dir between normal dir/file
4251 * @name: the name in the INODE_REF/INODE_EXTREF
4252 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4253 * @mode: the st_mode of INODE_ITEM
4255 * Return 0 if no error occurred.
4256 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4257 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4259 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4260 * not match for normal dir/file.
4262 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4263 struct btrfs_key *key, u64 index, char *name,
4264 u32 namelen, u32 mode)
4266 struct btrfs_path path;
4267 struct extent_buffer *node;
4268 struct btrfs_dir_item *di;
4269 struct btrfs_key location;
4270 char namebuf[BTRFS_NAME_LEN] = {0};
4280 btrfs_init_path(&path);
4281 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4283 ret = DIR_ITEM_MISSING;
4287 /* Process root dir and goto out*/
4290 ret = ROOT_DIR_ERROR;
4292 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4294 ref_key->type == BTRFS_INODE_REF_KEY ?
4296 ref_key->objectid, ref_key->offset,
4297 key->type == BTRFS_DIR_ITEM_KEY ?
4298 "DIR_ITEM" : "DIR_INDEX");
4306 /* Process normal file/dir */
4308 ret = DIR_ITEM_MISSING;
4310 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4312 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4313 ref_key->objectid, ref_key->offset,
4314 key->type == BTRFS_DIR_ITEM_KEY ?
4315 "DIR_ITEM" : "DIR_INDEX",
4316 key->objectid, key->offset, namelen, name,
4317 imode_to_type(mode));
4321 /* Check whether inode_id/filetype/name match */
4322 node = path.nodes[0];
4323 slot = path.slots[0];
4324 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4325 total = btrfs_item_size_nr(node, slot);
4326 while (cur < total) {
4327 ret = DIR_ITEM_MISMATCH;
4328 name_len = btrfs_dir_name_len(node, di);
4329 data_len = btrfs_dir_data_len(node, di);
4331 btrfs_dir_item_key_to_cpu(node, di, &location);
4332 if (location.objectid != ref_key->objectid ||
4333 location.type != BTRFS_INODE_ITEM_KEY ||
4334 location.offset != 0)
4337 filetype = btrfs_dir_type(node, di);
4338 if (imode_to_type(mode) != filetype)
4341 if (cur + sizeof(*di) + name_len > total ||
4342 name_len > BTRFS_NAME_LEN) {
4343 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4345 key->type == BTRFS_DIR_ITEM_KEY ?
4346 "DIR_ITEM" : "DIR_INDEX",
4347 key->objectid, key->offset, name_len);
4349 if (cur + sizeof(*di) > total)
4351 len = min_t(u32, total - cur - sizeof(*di),
4357 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4358 if (len != namelen || strncmp(namebuf, name, len))
4364 len = sizeof(*di) + name_len + data_len;
4365 di = (struct btrfs_dir_item *)((char *)di + len);
4368 if (ret == DIR_ITEM_MISMATCH)
4370 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4372 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4373 ref_key->objectid, ref_key->offset,
4374 key->type == BTRFS_DIR_ITEM_KEY ?
4375 "DIR_ITEM" : "DIR_INDEX",
4376 key->objectid, key->offset, namelen, name,
4377 imode_to_type(mode));
4379 btrfs_release_path(&path);
4384 * Traverse the given INODE_REF and call find_dir_item() to find related
4385 * DIR_ITEM/DIR_INDEX.
4387 * @root: the root of the fs/file tree
4388 * @ref_key: the key of the INODE_REF
4389 * @refs: the count of INODE_REF
4390 * @mode: the st_mode of INODE_ITEM
4392 * Return 0 if no error occurred.
4394 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4395 struct extent_buffer *node, int slot, u64 *refs,
4398 struct btrfs_key key;
4399 struct btrfs_inode_ref *ref;
4400 char namebuf[BTRFS_NAME_LEN] = {0};
4408 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4409 total = btrfs_item_size_nr(node, slot);
4412 /* Update inode ref count */
4415 index = btrfs_inode_ref_index(node, ref);
4416 name_len = btrfs_inode_ref_name_len(node, ref);
4417 if (cur + sizeof(*ref) + name_len > total ||
4418 name_len > BTRFS_NAME_LEN) {
4419 warning("root %llu INODE_REF[%llu %llu] name too long",
4420 root->objectid, ref_key->objectid, ref_key->offset);
4422 if (total < cur + sizeof(*ref))
4424 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4429 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4431 /* Check root dir ref name */
4432 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4433 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4434 root->objectid, ref_key->objectid, ref_key->offset,
4436 err |= ROOT_DIR_ERROR;
4439 /* Find related DIR_INDEX */
4440 key.objectid = ref_key->offset;
4441 key.type = BTRFS_DIR_INDEX_KEY;
4443 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4446 /* Find related dir_item */
4447 key.objectid = ref_key->offset;
4448 key.type = BTRFS_DIR_ITEM_KEY;
4449 key.offset = btrfs_name_hash(namebuf, len);
4450 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4453 len = sizeof(*ref) + name_len;
4454 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4464 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4465 * DIR_ITEM/DIR_INDEX.
4467 * @root: the root of the fs/file tree
4468 * @ref_key: the key of the INODE_EXTREF
4469 * @refs: the count of INODE_EXTREF
4470 * @mode: the st_mode of INODE_ITEM
4472 * Return 0 if no error occurred.
4474 static int check_inode_extref(struct btrfs_root *root,
4475 struct btrfs_key *ref_key,
4476 struct extent_buffer *node, int slot, u64 *refs,
4479 struct btrfs_key key;
4480 struct btrfs_inode_extref *extref;
4481 char namebuf[BTRFS_NAME_LEN] = {0};
4491 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4492 total = btrfs_item_size_nr(node, slot);
4495 /* update inode ref count */
4497 name_len = btrfs_inode_extref_name_len(node, extref);
4498 index = btrfs_inode_extref_index(node, extref);
4499 parent = btrfs_inode_extref_parent(node, extref);
4500 if (name_len <= BTRFS_NAME_LEN) {
4503 len = BTRFS_NAME_LEN;
4504 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4505 root->objectid, ref_key->objectid, ref_key->offset);
4507 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4509 /* Check root dir ref name */
4510 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4511 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4512 root->objectid, ref_key->objectid, ref_key->offset,
4514 err |= ROOT_DIR_ERROR;
4517 /* find related dir_index */
4518 key.objectid = parent;
4519 key.type = BTRFS_DIR_INDEX_KEY;
4521 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4524 /* find related dir_item */
4525 key.objectid = parent;
4526 key.type = BTRFS_DIR_ITEM_KEY;
4527 key.offset = btrfs_name_hash(namebuf, len);
4528 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4531 len = sizeof(*extref) + name_len;
4532 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4542 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4543 * DIR_ITEM/DIR_INDEX match.
4544 * Return with @index_ret.
4546 * @root: the root of the fs/file tree
4547 * @key: the key of the INODE_REF/INODE_EXTREF
4548 * @name: the name in the INODE_REF/INODE_EXTREF
4549 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4550 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4551 * value (64)-1 means do not check index
4552 * @ext_ref: the EXTENDED_IREF feature
4554 * Return 0 if no error occurred.
4555 * Return >0 for error bitmap
4557 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4558 char *name, int namelen, u64 *index_ret,
4559 unsigned int ext_ref)
4561 struct btrfs_path path;
4562 struct btrfs_inode_ref *ref;
4563 struct btrfs_inode_extref *extref;
4564 struct extent_buffer *node;
4565 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4578 btrfs_init_path(&path);
4579 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4581 ret = INODE_REF_MISSING;
4585 node = path.nodes[0];
4586 slot = path.slots[0];
4588 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4589 total = btrfs_item_size_nr(node, slot);
4591 /* Iterate all entry of INODE_REF */
4592 while (cur < total) {
4593 ret = INODE_REF_MISSING;
4595 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4596 ref_index = btrfs_inode_ref_index(node, ref);
4597 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4600 if (cur + sizeof(*ref) + ref_namelen > total ||
4601 ref_namelen > BTRFS_NAME_LEN) {
4602 warning("root %llu INODE %s[%llu %llu] name too long",
4604 key->type == BTRFS_INODE_REF_KEY ?
4606 key->objectid, key->offset);
4608 if (cur + sizeof(*ref) > total)
4610 len = min_t(u32, total - cur - sizeof(*ref),
4616 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4619 if (len != namelen || strncmp(ref_namebuf, name, len))
4622 *index_ret = ref_index;
4626 len = sizeof(*ref) + ref_namelen;
4627 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4632 /* Skip if not support EXTENDED_IREF feature */
4636 btrfs_release_path(&path);
4637 btrfs_init_path(&path);
4639 dir_id = key->offset;
4640 key->type = BTRFS_INODE_EXTREF_KEY;
4641 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4643 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4645 ret = INODE_REF_MISSING;
4649 node = path.nodes[0];
4650 slot = path.slots[0];
4652 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4654 total = btrfs_item_size_nr(node, slot);
4656 /* Iterate all entry of INODE_EXTREF */
4657 while (cur < total) {
4658 ret = INODE_REF_MISSING;
4660 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4661 ref_index = btrfs_inode_extref_index(node, extref);
4662 parent = btrfs_inode_extref_parent(node, extref);
4663 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4666 if (parent != dir_id)
4669 if (ref_namelen <= BTRFS_NAME_LEN) {
4672 len = BTRFS_NAME_LEN;
4673 warning("root %llu INODE %s[%llu %llu] name too long",
4675 key->type == BTRFS_INODE_REF_KEY ?
4677 key->objectid, key->offset);
4679 read_extent_buffer(node, ref_namebuf,
4680 (unsigned long)(extref + 1), len);
4682 if (len != namelen || strncmp(ref_namebuf, name, len))
4685 *index_ret = ref_index;
4690 len = sizeof(*extref) + ref_namelen;
4691 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4696 btrfs_release_path(&path);
4701 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4702 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4704 * @root: the root of the fs/file tree
4705 * @key: the key of the INODE_REF/INODE_EXTREF
4706 * @size: the st_size of the INODE_ITEM
4707 * @ext_ref: the EXTENDED_IREF feature
4709 * Return 0 if no error occurred.
4711 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4712 struct extent_buffer *node, int slot, u64 *size,
4713 unsigned int ext_ref)
4715 struct btrfs_dir_item *di;
4716 struct btrfs_inode_item *ii;
4717 struct btrfs_path path;
4718 struct btrfs_key location;
4719 char namebuf[BTRFS_NAME_LEN] = {0};
4732 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4733 * ignore index check.
4735 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4737 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4738 total = btrfs_item_size_nr(node, slot);
4740 while (cur < total) {
4741 data_len = btrfs_dir_data_len(node, di);
4743 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4744 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4745 "DIR_ITEM" : "DIR_INDEX",
4746 key->objectid, key->offset, data_len);
4748 name_len = btrfs_dir_name_len(node, di);
4749 if (cur + sizeof(*di) + name_len > total ||
4750 name_len > BTRFS_NAME_LEN) {
4751 warning("root %llu %s[%llu %llu] name too long",
4753 key->type == BTRFS_DIR_ITEM_KEY ?
4754 "DIR_ITEM" : "DIR_INDEX",
4755 key->objectid, key->offset);
4757 if (cur + sizeof(*di) > total)
4759 len = min_t(u32, total - cur - sizeof(*di),
4764 (*size) += name_len;
4766 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4767 filetype = btrfs_dir_type(node, di);
4769 if (key->type == BTRFS_DIR_ITEM_KEY &&
4770 key->offset != btrfs_name_hash(namebuf, len)) {
4772 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4773 root->objectid, key->objectid, key->offset,
4774 namebuf, len, filetype, key->offset,
4775 btrfs_name_hash(namebuf, len));
4778 btrfs_init_path(&path);
4779 btrfs_dir_item_key_to_cpu(node, di, &location);
4781 /* Ignore related ROOT_ITEM check */
4782 if (location.type == BTRFS_ROOT_ITEM_KEY)
4785 /* Check relative INODE_ITEM(existence/filetype) */
4786 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4788 err |= INODE_ITEM_MISSING;
4789 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4790 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4791 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4792 key->offset, location.objectid, name_len,
4797 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4798 struct btrfs_inode_item);
4799 mode = btrfs_inode_mode(path.nodes[0], ii);
4801 if (imode_to_type(mode) != filetype) {
4802 err |= INODE_ITEM_MISMATCH;
4803 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4804 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4805 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4806 key->offset, name_len, namebuf, filetype);
4809 /* Check relative INODE_REF/INODE_EXTREF */
4810 location.type = BTRFS_INODE_REF_KEY;
4811 location.offset = key->objectid;
4812 ret = find_inode_ref(root, &location, namebuf, len,
4815 if (ret & INODE_REF_MISSING)
4816 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4817 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4818 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4819 key->offset, name_len, namebuf, filetype);
4822 btrfs_release_path(&path);
4823 len = sizeof(*di) + name_len + data_len;
4824 di = (struct btrfs_dir_item *)((char *)di + len);
4827 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4828 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4829 root->objectid, key->objectid, key->offset);
4838 * Check file extent datasum/hole, update the size of the file extents,
4839 * check and update the last offset of the file extent.
4841 * @root: the root of fs/file tree.
4842 * @fkey: the key of the file extent.
4843 * @nodatasum: INODE_NODATASUM feature.
4844 * @size: the sum of all EXTENT_DATA items size for this inode.
4845 * @end: the offset of the last extent.
4847 * Return 0 if no error occurred.
4849 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4850 struct extent_buffer *node, int slot,
4851 unsigned int nodatasum, u64 *size, u64 *end)
4853 struct btrfs_file_extent_item *fi;
4856 u64 extent_num_bytes;
4858 u64 csum_found; /* In byte size, sectorsize aligned */
4859 u64 search_start; /* Logical range start we search for csum */
4860 u64 search_len; /* Logical range len we search for csum */
4861 unsigned int extent_type;
4862 unsigned int is_hole;
4867 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4869 /* Check inline extent */
4870 extent_type = btrfs_file_extent_type(node, fi);
4871 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4872 struct btrfs_item *e = btrfs_item_nr(slot);
4873 u32 item_inline_len;
4875 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4876 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4877 compressed = btrfs_file_extent_compression(node, fi);
4878 if (extent_num_bytes == 0) {
4880 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4881 root->objectid, fkey->objectid, fkey->offset);
4882 err |= FILE_EXTENT_ERROR;
4884 if (!compressed && extent_num_bytes != item_inline_len) {
4886 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4887 root->objectid, fkey->objectid, fkey->offset,
4888 extent_num_bytes, item_inline_len);
4889 err |= FILE_EXTENT_ERROR;
4891 *end += extent_num_bytes;
4892 *size += extent_num_bytes;
4896 /* Check extent type */
4897 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4898 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4899 err |= FILE_EXTENT_ERROR;
4900 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4901 root->objectid, fkey->objectid, fkey->offset);
4905 /* Check REG_EXTENT/PREALLOC_EXTENT */
4906 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4907 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4908 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4909 extent_offset = btrfs_file_extent_offset(node, fi);
4910 compressed = btrfs_file_extent_compression(node, fi);
4911 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4914 * Check EXTENT_DATA csum
4916 * For plain (uncompressed) extent, we should only check the range
4917 * we're referring to, as it's possible that part of prealloc extent
4918 * has been written, and has csum:
4920 * |<--- Original large preallocated extent A ---->|
4921 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4924 * For compressed extent, we should check the whole range.
4927 search_start = disk_bytenr + extent_offset;
4928 search_len = extent_num_bytes;
4930 search_start = disk_bytenr;
4931 search_len = disk_num_bytes;
4933 ret = count_csum_range(root, search_start, search_len, &csum_found);
4934 if (csum_found > 0 && nodatasum) {
4935 err |= ODD_CSUM_ITEM;
4936 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4937 root->objectid, fkey->objectid, fkey->offset);
4938 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4939 !is_hole && (ret < 0 || csum_found < search_len)) {
4940 err |= CSUM_ITEM_MISSING;
4941 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4942 root->objectid, fkey->objectid, fkey->offset,
4943 csum_found, search_len);
4944 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4945 err |= ODD_CSUM_ITEM;
4946 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4947 root->objectid, fkey->objectid, fkey->offset, csum_found);
4950 /* Check EXTENT_DATA hole */
4951 if (!no_holes && *end != fkey->offset) {
4952 err |= FILE_EXTENT_ERROR;
4953 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4954 root->objectid, fkey->objectid, fkey->offset);
4957 *end += extent_num_bytes;
4959 *size += extent_num_bytes;
4965 * Set inode item nbytes to @nbytes
4967 * Returns 0 on success
4968 * Returns != 0 on error
4970 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
4971 struct btrfs_path *path,
4972 u64 ino, u64 nbytes)
4974 struct btrfs_trans_handle *trans;
4975 struct btrfs_inode_item *ii;
4976 struct btrfs_key key;
4977 struct btrfs_key research_key;
4981 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
4984 key.type = BTRFS_INODE_ITEM_KEY;
4987 trans = btrfs_start_transaction(root, 1);
4988 if (IS_ERR(trans)) {
4989 ret = PTR_ERR(trans);
4994 btrfs_release_path(path);
4995 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5003 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5004 struct btrfs_inode_item);
5005 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5006 btrfs_mark_buffer_dirty(path->nodes[0]);
5008 btrfs_commit_transaction(trans, root);
5011 error("failed to set nbytes in inode %llu root %llu",
5012 ino, root->root_key.objectid);
5014 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5015 root->root_key.objectid, nbytes);
5018 btrfs_release_path(path);
5019 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5026 * Set directory inode isize to @isize.
5028 * Returns 0 on success.
5029 * Returns != 0 on error.
5031 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5032 struct btrfs_path *path,
5035 struct btrfs_trans_handle *trans;
5036 struct btrfs_inode_item *ii;
5037 struct btrfs_key key;
5038 struct btrfs_key research_key;
5042 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5045 key.type = BTRFS_INODE_ITEM_KEY;
5048 trans = btrfs_start_transaction(root, 1);
5049 if (IS_ERR(trans)) {
5050 ret = PTR_ERR(trans);
5055 btrfs_release_path(path);
5056 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5064 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5065 struct btrfs_inode_item);
5066 btrfs_set_inode_size(path->nodes[0], ii, isize);
5067 btrfs_mark_buffer_dirty(path->nodes[0]);
5069 btrfs_commit_transaction(trans, root);
5072 error("failed to set isize in inode %llu root %llu",
5073 ino, root->root_key.objectid);
5075 printf("Set isize in inode %llu root %llu to %llu\n",
5076 ino, root->root_key.objectid, isize);
5078 btrfs_release_path(path);
5079 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5086 * Wrapper function for btrfs_add_orphan_item().
5088 * Returns 0 on success.
5089 * Returns != 0 on error.
5091 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5092 struct btrfs_path *path, u64 ino)
5094 struct btrfs_trans_handle *trans;
5095 struct btrfs_key research_key;
5099 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5101 trans = btrfs_start_transaction(root, 1);
5102 if (IS_ERR(trans)) {
5103 ret = PTR_ERR(trans);
5108 btrfs_release_path(path);
5109 ret = btrfs_add_orphan_item(trans, root, path, ino);
5111 btrfs_commit_transaction(trans, root);
5114 error("failed to add inode %llu as orphan item root %llu",
5115 ino, root->root_key.objectid);
5117 printf("Added inode %llu as orphan item root %llu\n",
5118 ino, root->root_key.objectid);
5120 btrfs_release_path(path);
5121 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5128 * Check INODE_ITEM and related ITEMs (the same inode number)
5129 * 1. check link count
5130 * 2. check inode ref/extref
5131 * 3. check dir item/index
5133 * @ext_ref: the EXTENDED_IREF feature
5135 * Return 0 if no error occurred.
5136 * Return >0 for error or hit the traversal is done(by error bitmap)
5138 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5139 unsigned int ext_ref)
5141 struct extent_buffer *node;
5142 struct btrfs_inode_item *ii;
5143 struct btrfs_key key;
5152 u64 extent_size = 0;
5154 unsigned int nodatasum;
5159 node = path->nodes[0];
5160 slot = path->slots[0];
5162 btrfs_item_key_to_cpu(node, &key, slot);
5163 inode_id = key.objectid;
5165 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5166 ret = btrfs_next_item(root, path);
5172 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5173 isize = btrfs_inode_size(node, ii);
5174 nbytes = btrfs_inode_nbytes(node, ii);
5175 mode = btrfs_inode_mode(node, ii);
5176 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5177 nlink = btrfs_inode_nlink(node, ii);
5178 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5181 ret = btrfs_next_item(root, path);
5183 /* out will fill 'err' rusing current statistics */
5185 } else if (ret > 0) {
5190 node = path->nodes[0];
5191 slot = path->slots[0];
5192 btrfs_item_key_to_cpu(node, &key, slot);
5193 if (key.objectid != inode_id)
5197 case BTRFS_INODE_REF_KEY:
5198 ret = check_inode_ref(root, &key, node, slot, &refs,
5202 case BTRFS_INODE_EXTREF_KEY:
5203 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5204 warning("root %llu EXTREF[%llu %llu] isn't supported",
5205 root->objectid, key.objectid,
5207 ret = check_inode_extref(root, &key, node, slot, &refs,
5211 case BTRFS_DIR_ITEM_KEY:
5212 case BTRFS_DIR_INDEX_KEY:
5214 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5215 root->objectid, inode_id,
5216 imode_to_type(mode), key.objectid,
5219 ret = check_dir_item(root, &key, node, slot, &size,
5223 case BTRFS_EXTENT_DATA_KEY:
5225 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5226 root->objectid, inode_id, key.objectid,
5229 ret = check_file_extent(root, &key, node, slot,
5230 nodatasum, &extent_size,
5234 case BTRFS_XATTR_ITEM_KEY:
5237 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5238 key.objectid, key.type, key.offset);
5243 /* verify INODE_ITEM nlink/isize/nbytes */
5246 err |= LINK_COUNT_ERROR;
5247 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5248 root->objectid, inode_id, nlink);
5252 * Just a warning, as dir inode nbytes is just an
5253 * instructive value.
5255 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5256 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5257 root->objectid, inode_id,
5258 root->fs_info->nodesize);
5261 if (isize != size) {
5263 ret = repair_dir_isize_lowmem(root, path,
5265 if (!repair || ret) {
5268 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5269 root->objectid, inode_id, isize, size);
5273 if (nlink != refs) {
5274 err |= LINK_COUNT_ERROR;
5275 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5276 root->objectid, inode_id, nlink, refs);
5277 } else if (!nlink) {
5279 ret = repair_inode_orphan_item_lowmem(root,
5281 if (!repair || ret) {
5283 error("root %llu INODE[%llu] is orphan item",
5284 root->objectid, inode_id);
5288 if (!nbytes && !no_holes && extent_end < isize) {
5289 err |= NBYTES_ERROR;
5290 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5291 root->objectid, inode_id, isize);
5294 if (nbytes != extent_size) {
5296 ret = repair_inode_nbytes_lowmem(root, path,
5297 inode_id, extent_size);
5298 if (!repair || ret) {
5299 err |= NBYTES_ERROR;
5301 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5302 root->objectid, inode_id, nbytes,
5311 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5313 struct btrfs_path path;
5314 struct btrfs_key key;
5318 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5319 key.type = BTRFS_INODE_ITEM_KEY;
5322 /* For root being dropped, we don't need to check first inode */
5323 if (btrfs_root_refs(&root->root_item) == 0 &&
5324 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5328 btrfs_init_path(&path);
5330 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5335 err |= INODE_ITEM_MISSING;
5336 error("first inode item of root %llu is missing",
5340 err |= check_inode_item(root, &path, ext_ref);
5345 btrfs_release_path(&path);
5349 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5350 u64 parent, u64 root)
5352 struct rb_node *node;
5353 struct tree_backref *back = NULL;
5354 struct tree_backref match = {
5361 match.parent = parent;
5362 match.node.full_backref = 1;
5367 node = rb_search(&rec->backref_tree, &match.node.node,
5368 (rb_compare_keys)compare_extent_backref, NULL);
5370 back = to_tree_backref(rb_node_to_extent_backref(node));
5375 static struct data_backref *find_data_backref(struct extent_record *rec,
5376 u64 parent, u64 root,
5377 u64 owner, u64 offset,
5379 u64 disk_bytenr, u64 bytes)
5381 struct rb_node *node;
5382 struct data_backref *back = NULL;
5383 struct data_backref match = {
5390 .found_ref = found_ref,
5391 .disk_bytenr = disk_bytenr,
5395 match.parent = parent;
5396 match.node.full_backref = 1;
5401 node = rb_search(&rec->backref_tree, &match.node.node,
5402 (rb_compare_keys)compare_extent_backref, NULL);
5404 back = to_data_backref(rb_node_to_extent_backref(node));
5409 * Iterate all item on the tree and call check_inode_item() to check.
5411 * @root: the root of the tree to be checked.
5412 * @ext_ref: the EXTENDED_IREF feature
5414 * Return 0 if no error found.
5415 * Return <0 for error.
5417 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5419 struct btrfs_path path;
5420 struct node_refs nrefs;
5421 struct btrfs_root_item *root_item = &root->root_item;
5427 * We need to manually check the first inode item(256)
5428 * As the following traversal function will only start from
5429 * the first inode item in the leaf, if inode item(256) is missing
5430 * we will just skip it forever.
5432 ret = check_fs_first_inode(root, ext_ref);
5436 memset(&nrefs, 0, sizeof(nrefs));
5437 level = btrfs_header_level(root->node);
5438 btrfs_init_path(&path);
5440 if (btrfs_root_refs(root_item) > 0 ||
5441 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5442 path.nodes[level] = root->node;
5443 path.slots[level] = 0;
5444 extent_buffer_get(root->node);
5446 struct btrfs_key key;
5448 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5449 level = root_item->drop_level;
5450 path.lowest_level = level;
5451 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5458 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5461 /* if ret is negative, walk shall stop */
5467 ret = walk_up_tree_v2(root, &path, &level);
5469 /* Normal exit, reset ret to err */
5476 btrfs_release_path(&path);
5481 * Find the relative ref for root_ref and root_backref.
5483 * @root: the root of the root tree.
5484 * @ref_key: the key of the root ref.
5486 * Return 0 if no error occurred.
5488 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5489 struct extent_buffer *node, int slot)
5491 struct btrfs_path path;
5492 struct btrfs_key key;
5493 struct btrfs_root_ref *ref;
5494 struct btrfs_root_ref *backref;
5495 char ref_name[BTRFS_NAME_LEN] = {0};
5496 char backref_name[BTRFS_NAME_LEN] = {0};
5502 u32 backref_namelen;
5507 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5508 ref_dirid = btrfs_root_ref_dirid(node, ref);
5509 ref_seq = btrfs_root_ref_sequence(node, ref);
5510 ref_namelen = btrfs_root_ref_name_len(node, ref);
5512 if (ref_namelen <= BTRFS_NAME_LEN) {
5515 len = BTRFS_NAME_LEN;
5516 warning("%s[%llu %llu] ref_name too long",
5517 ref_key->type == BTRFS_ROOT_REF_KEY ?
5518 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5521 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5523 /* Find relative root_ref */
5524 key.objectid = ref_key->offset;
5525 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5526 key.offset = ref_key->objectid;
5528 btrfs_init_path(&path);
5529 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5531 err |= ROOT_REF_MISSING;
5532 error("%s[%llu %llu] couldn't find relative ref",
5533 ref_key->type == BTRFS_ROOT_REF_KEY ?
5534 "ROOT_REF" : "ROOT_BACKREF",
5535 ref_key->objectid, ref_key->offset);
5539 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5540 struct btrfs_root_ref);
5541 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5542 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5543 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5545 if (backref_namelen <= BTRFS_NAME_LEN) {
5546 len = backref_namelen;
5548 len = BTRFS_NAME_LEN;
5549 warning("%s[%llu %llu] ref_name too long",
5550 key.type == BTRFS_ROOT_REF_KEY ?
5551 "ROOT_REF" : "ROOT_BACKREF",
5552 key.objectid, key.offset);
5554 read_extent_buffer(path.nodes[0], backref_name,
5555 (unsigned long)(backref + 1), len);
5557 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5558 ref_namelen != backref_namelen ||
5559 strncmp(ref_name, backref_name, len)) {
5560 err |= ROOT_REF_MISMATCH;
5561 error("%s[%llu %llu] mismatch relative ref",
5562 ref_key->type == BTRFS_ROOT_REF_KEY ?
5563 "ROOT_REF" : "ROOT_BACKREF",
5564 ref_key->objectid, ref_key->offset);
5567 btrfs_release_path(&path);
5572 * Check all fs/file tree in low_memory mode.
5574 * 1. for fs tree root item, call check_fs_root_v2()
5575 * 2. for fs tree root ref/backref, call check_root_ref()
5577 * Return 0 if no error occurred.
5579 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5581 struct btrfs_root *tree_root = fs_info->tree_root;
5582 struct btrfs_root *cur_root = NULL;
5583 struct btrfs_path path;
5584 struct btrfs_key key;
5585 struct extent_buffer *node;
5586 unsigned int ext_ref;
5591 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5593 btrfs_init_path(&path);
5594 key.objectid = BTRFS_FS_TREE_OBJECTID;
5596 key.type = BTRFS_ROOT_ITEM_KEY;
5598 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5602 } else if (ret > 0) {
5608 node = path.nodes[0];
5609 slot = path.slots[0];
5610 btrfs_item_key_to_cpu(node, &key, slot);
5611 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5613 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5614 fs_root_objectid(key.objectid)) {
5615 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5616 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5619 key.offset = (u64)-1;
5620 cur_root = btrfs_read_fs_root(fs_info, &key);
5623 if (IS_ERR(cur_root)) {
5624 error("Fail to read fs/subvol tree: %lld",
5630 ret = check_fs_root_v2(cur_root, ext_ref);
5633 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5634 btrfs_free_fs_root(cur_root);
5635 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5636 key.type == BTRFS_ROOT_BACKREF_KEY) {
5637 ret = check_root_ref(tree_root, &key, node, slot);
5641 ret = btrfs_next_item(tree_root, &path);
5651 btrfs_release_path(&path);
5655 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5656 struct cache_tree *root_cache)
5660 if (!ctx.progress_enabled)
5661 fprintf(stderr, "checking fs roots\n");
5662 if (check_mode == CHECK_MODE_LOWMEM)
5663 ret = check_fs_roots_v2(fs_info);
5665 ret = check_fs_roots(fs_info, root_cache);
5670 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5672 struct extent_backref *back, *tmp;
5673 struct tree_backref *tback;
5674 struct data_backref *dback;
5678 rbtree_postorder_for_each_entry_safe(back, tmp,
5679 &rec->backref_tree, node) {
5680 if (!back->found_extent_tree) {
5684 if (back->is_data) {
5685 dback = to_data_backref(back);
5686 fprintf(stderr, "Data backref %llu %s %llu"
5687 " owner %llu offset %llu num_refs %lu"
5688 " not found in extent tree\n",
5689 (unsigned long long)rec->start,
5690 back->full_backref ?
5692 back->full_backref ?
5693 (unsigned long long)dback->parent:
5694 (unsigned long long)dback->root,
5695 (unsigned long long)dback->owner,
5696 (unsigned long long)dback->offset,
5697 (unsigned long)dback->num_refs);
5699 tback = to_tree_backref(back);
5700 fprintf(stderr, "Tree backref %llu parent %llu"
5701 " root %llu not found in extent tree\n",
5702 (unsigned long long)rec->start,
5703 (unsigned long long)tback->parent,
5704 (unsigned long long)tback->root);
5707 if (!back->is_data && !back->found_ref) {
5711 tback = to_tree_backref(back);
5712 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5713 (unsigned long long)rec->start,
5714 back->full_backref ? "parent" : "root",
5715 back->full_backref ?
5716 (unsigned long long)tback->parent :
5717 (unsigned long long)tback->root, back);
5719 if (back->is_data) {
5720 dback = to_data_backref(back);
5721 if (dback->found_ref != dback->num_refs) {
5725 fprintf(stderr, "Incorrect local backref count"
5726 " on %llu %s %llu owner %llu"
5727 " offset %llu found %u wanted %u back %p\n",
5728 (unsigned long long)rec->start,
5729 back->full_backref ?
5731 back->full_backref ?
5732 (unsigned long long)dback->parent:
5733 (unsigned long long)dback->root,
5734 (unsigned long long)dback->owner,
5735 (unsigned long long)dback->offset,
5736 dback->found_ref, dback->num_refs, back);
5738 if (dback->disk_bytenr != rec->start) {
5742 fprintf(stderr, "Backref disk bytenr does not"
5743 " match extent record, bytenr=%llu, "
5744 "ref bytenr=%llu\n",
5745 (unsigned long long)rec->start,
5746 (unsigned long long)dback->disk_bytenr);
5749 if (dback->bytes != rec->nr) {
5753 fprintf(stderr, "Backref bytes do not match "
5754 "extent backref, bytenr=%llu, ref "
5755 "bytes=%llu, backref bytes=%llu\n",
5756 (unsigned long long)rec->start,
5757 (unsigned long long)rec->nr,
5758 (unsigned long long)dback->bytes);
5761 if (!back->is_data) {
5764 dback = to_data_backref(back);
5765 found += dback->found_ref;
5768 if (found != rec->refs) {
5772 fprintf(stderr, "Incorrect global backref count "
5773 "on %llu found %llu wanted %llu\n",
5774 (unsigned long long)rec->start,
5775 (unsigned long long)found,
5776 (unsigned long long)rec->refs);
5782 static void __free_one_backref(struct rb_node *node)
5784 struct extent_backref *back = rb_node_to_extent_backref(node);
5789 static void free_all_extent_backrefs(struct extent_record *rec)
5791 rb_free_nodes(&rec->backref_tree, __free_one_backref);
5794 static void free_extent_record_cache(struct cache_tree *extent_cache)
5796 struct cache_extent *cache;
5797 struct extent_record *rec;
5800 cache = first_cache_extent(extent_cache);
5803 rec = container_of(cache, struct extent_record, cache);
5804 remove_cache_extent(extent_cache, cache);
5805 free_all_extent_backrefs(rec);
5810 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5811 struct extent_record *rec)
5813 if (rec->content_checked && rec->owner_ref_checked &&
5814 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5815 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5816 !rec->bad_full_backref && !rec->crossing_stripes &&
5817 !rec->wrong_chunk_type) {
5818 remove_cache_extent(extent_cache, &rec->cache);
5819 free_all_extent_backrefs(rec);
5820 list_del_init(&rec->list);
5826 static int check_owner_ref(struct btrfs_root *root,
5827 struct extent_record *rec,
5828 struct extent_buffer *buf)
5830 struct extent_backref *node, *tmp;
5831 struct tree_backref *back;
5832 struct btrfs_root *ref_root;
5833 struct btrfs_key key;
5834 struct btrfs_path path;
5835 struct extent_buffer *parent;
5840 rbtree_postorder_for_each_entry_safe(node, tmp,
5841 &rec->backref_tree, node) {
5844 if (!node->found_ref)
5846 if (node->full_backref)
5848 back = to_tree_backref(node);
5849 if (btrfs_header_owner(buf) == back->root)
5852 BUG_ON(rec->is_root);
5854 /* try to find the block by search corresponding fs tree */
5855 key.objectid = btrfs_header_owner(buf);
5856 key.type = BTRFS_ROOT_ITEM_KEY;
5857 key.offset = (u64)-1;
5859 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5860 if (IS_ERR(ref_root))
5863 level = btrfs_header_level(buf);
5865 btrfs_item_key_to_cpu(buf, &key, 0);
5867 btrfs_node_key_to_cpu(buf, &key, 0);
5869 btrfs_init_path(&path);
5870 path.lowest_level = level + 1;
5871 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5875 parent = path.nodes[level + 1];
5876 if (parent && buf->start == btrfs_node_blockptr(parent,
5877 path.slots[level + 1]))
5880 btrfs_release_path(&path);
5881 return found ? 0 : 1;
5884 static int is_extent_tree_record(struct extent_record *rec)
5886 struct extent_backref *node, *tmp;
5887 struct tree_backref *back;
5890 rbtree_postorder_for_each_entry_safe(node, tmp,
5891 &rec->backref_tree, node) {
5894 back = to_tree_backref(node);
5895 if (node->full_backref)
5897 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5904 static int record_bad_block_io(struct btrfs_fs_info *info,
5905 struct cache_tree *extent_cache,
5908 struct extent_record *rec;
5909 struct cache_extent *cache;
5910 struct btrfs_key key;
5912 cache = lookup_cache_extent(extent_cache, start, len);
5916 rec = container_of(cache, struct extent_record, cache);
5917 if (!is_extent_tree_record(rec))
5920 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5921 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5924 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5925 struct extent_buffer *buf, int slot)
5927 if (btrfs_header_level(buf)) {
5928 struct btrfs_key_ptr ptr1, ptr2;
5930 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5931 sizeof(struct btrfs_key_ptr));
5932 read_extent_buffer(buf, &ptr2,
5933 btrfs_node_key_ptr_offset(slot + 1),
5934 sizeof(struct btrfs_key_ptr));
5935 write_extent_buffer(buf, &ptr1,
5936 btrfs_node_key_ptr_offset(slot + 1),
5937 sizeof(struct btrfs_key_ptr));
5938 write_extent_buffer(buf, &ptr2,
5939 btrfs_node_key_ptr_offset(slot),
5940 sizeof(struct btrfs_key_ptr));
5942 struct btrfs_disk_key key;
5943 btrfs_node_key(buf, &key, 0);
5944 btrfs_fixup_low_keys(root, path, &key,
5945 btrfs_header_level(buf) + 1);
5948 struct btrfs_item *item1, *item2;
5949 struct btrfs_key k1, k2;
5950 char *item1_data, *item2_data;
5951 u32 item1_offset, item2_offset, item1_size, item2_size;
5953 item1 = btrfs_item_nr(slot);
5954 item2 = btrfs_item_nr(slot + 1);
5955 btrfs_item_key_to_cpu(buf, &k1, slot);
5956 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5957 item1_offset = btrfs_item_offset(buf, item1);
5958 item2_offset = btrfs_item_offset(buf, item2);
5959 item1_size = btrfs_item_size(buf, item1);
5960 item2_size = btrfs_item_size(buf, item2);
5962 item1_data = malloc(item1_size);
5965 item2_data = malloc(item2_size);
5971 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5972 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5974 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5975 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5979 btrfs_set_item_offset(buf, item1, item2_offset);
5980 btrfs_set_item_offset(buf, item2, item1_offset);
5981 btrfs_set_item_size(buf, item1, item2_size);
5982 btrfs_set_item_size(buf, item2, item1_size);
5984 path->slots[0] = slot;
5985 btrfs_set_item_key_unsafe(root, path, &k2);
5986 path->slots[0] = slot + 1;
5987 btrfs_set_item_key_unsafe(root, path, &k1);
5992 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5994 struct extent_buffer *buf;
5995 struct btrfs_key k1, k2;
5997 int level = path->lowest_level;
6000 buf = path->nodes[level];
6001 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6003 btrfs_node_key_to_cpu(buf, &k1, i);
6004 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6006 btrfs_item_key_to_cpu(buf, &k1, i);
6007 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6009 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6011 ret = swap_values(root, path, buf, i);
6014 btrfs_mark_buffer_dirty(buf);
6020 static int delete_bogus_item(struct btrfs_root *root,
6021 struct btrfs_path *path,
6022 struct extent_buffer *buf, int slot)
6024 struct btrfs_key key;
6025 int nritems = btrfs_header_nritems(buf);
6027 btrfs_item_key_to_cpu(buf, &key, slot);
6029 /* These are all the keys we can deal with missing. */
6030 if (key.type != BTRFS_DIR_INDEX_KEY &&
6031 key.type != BTRFS_EXTENT_ITEM_KEY &&
6032 key.type != BTRFS_METADATA_ITEM_KEY &&
6033 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6034 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6037 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6038 (unsigned long long)key.objectid, key.type,
6039 (unsigned long long)key.offset, slot, buf->start);
6040 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6041 btrfs_item_nr_offset(slot + 1),
6042 sizeof(struct btrfs_item) *
6043 (nritems - slot - 1));
6044 btrfs_set_header_nritems(buf, nritems - 1);
6046 struct btrfs_disk_key disk_key;
6048 btrfs_item_key(buf, &disk_key, 0);
6049 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6051 btrfs_mark_buffer_dirty(buf);
6055 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6057 struct extent_buffer *buf;
6061 /* We should only get this for leaves */
6062 BUG_ON(path->lowest_level);
6063 buf = path->nodes[0];
6065 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6066 unsigned int shift = 0, offset;
6068 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6069 BTRFS_LEAF_DATA_SIZE(root)) {
6070 if (btrfs_item_end_nr(buf, i) >
6071 BTRFS_LEAF_DATA_SIZE(root)) {
6072 ret = delete_bogus_item(root, path, buf, i);
6075 fprintf(stderr, "item is off the end of the "
6076 "leaf, can't fix\n");
6080 shift = BTRFS_LEAF_DATA_SIZE(root) -
6081 btrfs_item_end_nr(buf, i);
6082 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6083 btrfs_item_offset_nr(buf, i - 1)) {
6084 if (btrfs_item_end_nr(buf, i) >
6085 btrfs_item_offset_nr(buf, i - 1)) {
6086 ret = delete_bogus_item(root, path, buf, i);
6089 fprintf(stderr, "items overlap, can't fix\n");
6093 shift = btrfs_item_offset_nr(buf, i - 1) -
6094 btrfs_item_end_nr(buf, i);
6099 printf("Shifting item nr %d by %u bytes in block %llu\n",
6100 i, shift, (unsigned long long)buf->start);
6101 offset = btrfs_item_offset_nr(buf, i);
6102 memmove_extent_buffer(buf,
6103 btrfs_leaf_data(buf) + offset + shift,
6104 btrfs_leaf_data(buf) + offset,
6105 btrfs_item_size_nr(buf, i));
6106 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6108 btrfs_mark_buffer_dirty(buf);
6112 * We may have moved things, in which case we want to exit so we don't
6113 * write those changes out. Once we have proper abort functionality in
6114 * progs this can be changed to something nicer.
6121 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6122 * then just return -EIO.
6124 static int try_to_fix_bad_block(struct btrfs_root *root,
6125 struct extent_buffer *buf,
6126 enum btrfs_tree_block_status status)
6128 struct btrfs_trans_handle *trans;
6129 struct ulist *roots;
6130 struct ulist_node *node;
6131 struct btrfs_root *search_root;
6132 struct btrfs_path path;
6133 struct ulist_iterator iter;
6134 struct btrfs_key root_key, key;
6137 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6138 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6141 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6145 btrfs_init_path(&path);
6146 ULIST_ITER_INIT(&iter);
6147 while ((node = ulist_next(roots, &iter))) {
6148 root_key.objectid = node->val;
6149 root_key.type = BTRFS_ROOT_ITEM_KEY;
6150 root_key.offset = (u64)-1;
6152 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6159 trans = btrfs_start_transaction(search_root, 0);
6160 if (IS_ERR(trans)) {
6161 ret = PTR_ERR(trans);
6165 path.lowest_level = btrfs_header_level(buf);
6166 path.skip_check_block = 1;
6167 if (path.lowest_level)
6168 btrfs_node_key_to_cpu(buf, &key, 0);
6170 btrfs_item_key_to_cpu(buf, &key, 0);
6171 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6174 btrfs_commit_transaction(trans, search_root);
6177 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6178 ret = fix_key_order(search_root, &path);
6179 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6180 ret = fix_item_offset(search_root, &path);
6182 btrfs_commit_transaction(trans, search_root);
6185 btrfs_release_path(&path);
6186 btrfs_commit_transaction(trans, search_root);
6189 btrfs_release_path(&path);
6193 static int check_block(struct btrfs_root *root,
6194 struct cache_tree *extent_cache,
6195 struct extent_buffer *buf, u64 flags)
6197 struct extent_record *rec;
6198 struct cache_extent *cache;
6199 struct btrfs_key key;
6200 enum btrfs_tree_block_status status;
6204 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6207 rec = container_of(cache, struct extent_record, cache);
6208 rec->generation = btrfs_header_generation(buf);
6210 level = btrfs_header_level(buf);
6211 if (btrfs_header_nritems(buf) > 0) {
6214 btrfs_item_key_to_cpu(buf, &key, 0);
6216 btrfs_node_key_to_cpu(buf, &key, 0);
6218 rec->info_objectid = key.objectid;
6220 rec->info_level = level;
6222 if (btrfs_is_leaf(buf))
6223 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6225 status = btrfs_check_node(root, &rec->parent_key, buf);
6227 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6229 status = try_to_fix_bad_block(root, buf, status);
6230 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6232 fprintf(stderr, "bad block %llu\n",
6233 (unsigned long long)buf->start);
6236 * Signal to callers we need to start the scan over
6237 * again since we'll have cowed blocks.
6242 rec->content_checked = 1;
6243 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6244 rec->owner_ref_checked = 1;
6246 ret = check_owner_ref(root, rec, buf);
6248 rec->owner_ref_checked = 1;
6252 maybe_free_extent_rec(extent_cache, rec);
6257 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6258 u64 parent, u64 root)
6260 struct list_head *cur = rec->backrefs.next;
6261 struct extent_backref *node;
6262 struct tree_backref *back;
6264 while(cur != &rec->backrefs) {
6265 node = to_extent_backref(cur);
6269 back = to_tree_backref(node);
6271 if (!node->full_backref)
6273 if (parent == back->parent)
6276 if (node->full_backref)
6278 if (back->root == root)
6286 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6287 u64 parent, u64 root)
6289 struct tree_backref *ref = malloc(sizeof(*ref));
6293 memset(&ref->node, 0, sizeof(ref->node));
6295 ref->parent = parent;
6296 ref->node.full_backref = 1;
6299 ref->node.full_backref = 0;
6306 static struct data_backref *find_data_backref(struct extent_record *rec,
6307 u64 parent, u64 root,
6308 u64 owner, u64 offset,
6310 u64 disk_bytenr, u64 bytes)
6312 struct list_head *cur = rec->backrefs.next;
6313 struct extent_backref *node;
6314 struct data_backref *back;
6316 while(cur != &rec->backrefs) {
6317 node = to_extent_backref(cur);
6321 back = to_data_backref(node);
6323 if (!node->full_backref)
6325 if (parent == back->parent)
6328 if (node->full_backref)
6330 if (back->root == root && back->owner == owner &&
6331 back->offset == offset) {
6332 if (found_ref && node->found_ref &&
6333 (back->bytes != bytes ||
6334 back->disk_bytenr != disk_bytenr))
6344 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6345 u64 parent, u64 root,
6346 u64 owner, u64 offset,
6349 struct data_backref *ref = malloc(sizeof(*ref));
6353 memset(&ref->node, 0, sizeof(ref->node));
6354 ref->node.is_data = 1;
6357 ref->parent = parent;
6360 ref->node.full_backref = 1;
6364 ref->offset = offset;
6365 ref->node.full_backref = 0;
6367 ref->bytes = max_size;
6370 if (max_size > rec->max_size)
6371 rec->max_size = max_size;
6375 /* Check if the type of extent matches with its chunk */
6376 static void check_extent_type(struct extent_record *rec)
6378 struct btrfs_block_group_cache *bg_cache;
6380 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6384 /* data extent, check chunk directly*/
6385 if (!rec->metadata) {
6386 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6387 rec->wrong_chunk_type = 1;
6391 /* metadata extent, check the obvious case first */
6392 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6393 BTRFS_BLOCK_GROUP_METADATA))) {
6394 rec->wrong_chunk_type = 1;
6399 * Check SYSTEM extent, as it's also marked as metadata, we can only
6400 * make sure it's a SYSTEM extent by its backref
6402 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6403 struct extent_backref *node;
6404 struct tree_backref *tback;
6407 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6408 if (node->is_data) {
6409 /* tree block shouldn't have data backref */
6410 rec->wrong_chunk_type = 1;
6413 tback = container_of(node, struct tree_backref, node);
6415 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6416 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6418 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6419 if (!(bg_cache->flags & bg_type))
6420 rec->wrong_chunk_type = 1;
6425 * Allocate a new extent record, fill default values from @tmpl and insert int
6426 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6427 * the cache, otherwise it fails.
6429 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6430 struct extent_record *tmpl)
6432 struct extent_record *rec;
6435 BUG_ON(tmpl->max_size == 0);
6436 rec = malloc(sizeof(*rec));
6439 rec->start = tmpl->start;
6440 rec->max_size = tmpl->max_size;
6441 rec->nr = max(tmpl->nr, tmpl->max_size);
6442 rec->found_rec = tmpl->found_rec;
6443 rec->content_checked = tmpl->content_checked;
6444 rec->owner_ref_checked = tmpl->owner_ref_checked;
6445 rec->num_duplicates = 0;
6446 rec->metadata = tmpl->metadata;
6447 rec->flag_block_full_backref = FLAG_UNSET;
6448 rec->bad_full_backref = 0;
6449 rec->crossing_stripes = 0;
6450 rec->wrong_chunk_type = 0;
6451 rec->is_root = tmpl->is_root;
6452 rec->refs = tmpl->refs;
6453 rec->extent_item_refs = tmpl->extent_item_refs;
6454 rec->parent_generation = tmpl->parent_generation;
6455 INIT_LIST_HEAD(&rec->backrefs);
6456 INIT_LIST_HEAD(&rec->dups);
6457 INIT_LIST_HEAD(&rec->list);
6458 rec->backref_tree = RB_ROOT;
6459 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6460 rec->cache.start = tmpl->start;
6461 rec->cache.size = tmpl->nr;
6462 ret = insert_cache_extent(extent_cache, &rec->cache);
6467 bytes_used += rec->nr;
6470 rec->crossing_stripes = check_crossing_stripes(global_info,
6471 rec->start, global_info->nodesize);
6472 check_extent_type(rec);
6477 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6479 * - refs - if found, increase refs
6480 * - is_root - if found, set
6481 * - content_checked - if found, set
6482 * - owner_ref_checked - if found, set
6484 * If not found, create a new one, initialize and insert.
6486 static int add_extent_rec(struct cache_tree *extent_cache,
6487 struct extent_record *tmpl)
6489 struct extent_record *rec;
6490 struct cache_extent *cache;
6494 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6496 rec = container_of(cache, struct extent_record, cache);
6500 rec->nr = max(tmpl->nr, tmpl->max_size);
6503 * We need to make sure to reset nr to whatever the extent
6504 * record says was the real size, this way we can compare it to
6507 if (tmpl->found_rec) {
6508 if (tmpl->start != rec->start || rec->found_rec) {
6509 struct extent_record *tmp;
6512 if (list_empty(&rec->list))
6513 list_add_tail(&rec->list,
6514 &duplicate_extents);
6517 * We have to do this song and dance in case we
6518 * find an extent record that falls inside of
6519 * our current extent record but does not have
6520 * the same objectid.
6522 tmp = malloc(sizeof(*tmp));
6525 tmp->start = tmpl->start;
6526 tmp->max_size = tmpl->max_size;
6529 tmp->metadata = tmpl->metadata;
6530 tmp->extent_item_refs = tmpl->extent_item_refs;
6531 INIT_LIST_HEAD(&tmp->list);
6532 list_add_tail(&tmp->list, &rec->dups);
6533 rec->num_duplicates++;
6540 if (tmpl->extent_item_refs && !dup) {
6541 if (rec->extent_item_refs) {
6542 fprintf(stderr, "block %llu rec "
6543 "extent_item_refs %llu, passed %llu\n",
6544 (unsigned long long)tmpl->start,
6545 (unsigned long long)
6546 rec->extent_item_refs,
6547 (unsigned long long)tmpl->extent_item_refs);
6549 rec->extent_item_refs = tmpl->extent_item_refs;
6553 if (tmpl->content_checked)
6554 rec->content_checked = 1;
6555 if (tmpl->owner_ref_checked)
6556 rec->owner_ref_checked = 1;
6557 memcpy(&rec->parent_key, &tmpl->parent_key,
6558 sizeof(tmpl->parent_key));
6559 if (tmpl->parent_generation)
6560 rec->parent_generation = tmpl->parent_generation;
6561 if (rec->max_size < tmpl->max_size)
6562 rec->max_size = tmpl->max_size;
6565 * A metadata extent can't cross stripe_len boundary, otherwise
6566 * kernel scrub won't be able to handle it.
6567 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6571 rec->crossing_stripes = check_crossing_stripes(
6572 global_info, rec->start,
6573 global_info->nodesize);
6574 check_extent_type(rec);
6575 maybe_free_extent_rec(extent_cache, rec);
6579 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6584 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6585 u64 parent, u64 root, int found_ref)
6587 struct extent_record *rec;
6588 struct tree_backref *back;
6589 struct cache_extent *cache;
6591 bool insert = false;
6593 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6595 struct extent_record tmpl;
6597 memset(&tmpl, 0, sizeof(tmpl));
6598 tmpl.start = bytenr;
6603 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6607 /* really a bug in cache_extent implement now */
6608 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6613 rec = container_of(cache, struct extent_record, cache);
6614 if (rec->start != bytenr) {
6616 * Several cause, from unaligned bytenr to over lapping extents
6621 back = find_tree_backref(rec, parent, root);
6623 back = alloc_tree_backref(rec, parent, root);
6630 if (back->node.found_ref) {
6631 fprintf(stderr, "Extent back ref already exists "
6632 "for %llu parent %llu root %llu \n",
6633 (unsigned long long)bytenr,
6634 (unsigned long long)parent,
6635 (unsigned long long)root);
6637 back->node.found_ref = 1;
6639 if (back->node.found_extent_tree) {
6640 fprintf(stderr, "Extent back ref already exists "
6641 "for %llu parent %llu root %llu \n",
6642 (unsigned long long)bytenr,
6643 (unsigned long long)parent,
6644 (unsigned long long)root);
6646 back->node.found_extent_tree = 1;
6649 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6650 compare_extent_backref));
6651 check_extent_type(rec);
6652 maybe_free_extent_rec(extent_cache, rec);
6656 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6657 u64 parent, u64 root, u64 owner, u64 offset,
6658 u32 num_refs, int found_ref, u64 max_size)
6660 struct extent_record *rec;
6661 struct data_backref *back;
6662 struct cache_extent *cache;
6664 bool insert = false;
6666 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6668 struct extent_record tmpl;
6670 memset(&tmpl, 0, sizeof(tmpl));
6671 tmpl.start = bytenr;
6673 tmpl.max_size = max_size;
6675 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6679 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6684 rec = container_of(cache, struct extent_record, cache);
6685 if (rec->max_size < max_size)
6686 rec->max_size = max_size;
6689 * If found_ref is set then max_size is the real size and must match the
6690 * existing refs. So if we have already found a ref then we need to
6691 * make sure that this ref matches the existing one, otherwise we need
6692 * to add a new backref so we can notice that the backrefs don't match
6693 * and we need to figure out who is telling the truth. This is to
6694 * account for that awful fsync bug I introduced where we'd end up with
6695 * a btrfs_file_extent_item that would have its length include multiple
6696 * prealloc extents or point inside of a prealloc extent.
6698 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6701 back = alloc_data_backref(rec, parent, root, owner, offset,
6708 BUG_ON(num_refs != 1);
6709 if (back->node.found_ref)
6710 BUG_ON(back->bytes != max_size);
6711 back->node.found_ref = 1;
6712 back->found_ref += 1;
6713 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6714 back->bytes = max_size;
6715 back->disk_bytenr = bytenr;
6717 /* Need to reinsert if not already in the tree */
6719 rb_erase(&back->node.node, &rec->backref_tree);
6724 rec->content_checked = 1;
6725 rec->owner_ref_checked = 1;
6727 if (back->node.found_extent_tree) {
6728 fprintf(stderr, "Extent back ref already exists "
6729 "for %llu parent %llu root %llu "
6730 "owner %llu offset %llu num_refs %lu\n",
6731 (unsigned long long)bytenr,
6732 (unsigned long long)parent,
6733 (unsigned long long)root,
6734 (unsigned long long)owner,
6735 (unsigned long long)offset,
6736 (unsigned long)num_refs);
6738 back->num_refs = num_refs;
6739 back->node.found_extent_tree = 1;
6742 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6743 compare_extent_backref));
6745 maybe_free_extent_rec(extent_cache, rec);
6749 static int add_pending(struct cache_tree *pending,
6750 struct cache_tree *seen, u64 bytenr, u32 size)
6753 ret = add_cache_extent(seen, bytenr, size);
6756 add_cache_extent(pending, bytenr, size);
6760 static int pick_next_pending(struct cache_tree *pending,
6761 struct cache_tree *reada,
6762 struct cache_tree *nodes,
6763 u64 last, struct block_info *bits, int bits_nr,
6766 unsigned long node_start = last;
6767 struct cache_extent *cache;
6770 cache = search_cache_extent(reada, 0);
6772 bits[0].start = cache->start;
6773 bits[0].size = cache->size;
6778 if (node_start > 32768)
6779 node_start -= 32768;
6781 cache = search_cache_extent(nodes, node_start);
6783 cache = search_cache_extent(nodes, 0);
6786 cache = search_cache_extent(pending, 0);
6791 bits[ret].start = cache->start;
6792 bits[ret].size = cache->size;
6793 cache = next_cache_extent(cache);
6795 } while (cache && ret < bits_nr);
6801 bits[ret].start = cache->start;
6802 bits[ret].size = cache->size;
6803 cache = next_cache_extent(cache);
6805 } while (cache && ret < bits_nr);
6807 if (bits_nr - ret > 8) {
6808 u64 lookup = bits[0].start + bits[0].size;
6809 struct cache_extent *next;
6810 next = search_cache_extent(pending, lookup);
6812 if (next->start - lookup > 32768)
6814 bits[ret].start = next->start;
6815 bits[ret].size = next->size;
6816 lookup = next->start + next->size;
6820 next = next_cache_extent(next);
6828 static void free_chunk_record(struct cache_extent *cache)
6830 struct chunk_record *rec;
6832 rec = container_of(cache, struct chunk_record, cache);
6833 list_del_init(&rec->list);
6834 list_del_init(&rec->dextents);
6838 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6840 cache_tree_free_extents(chunk_cache, free_chunk_record);
6843 static void free_device_record(struct rb_node *node)
6845 struct device_record *rec;
6847 rec = container_of(node, struct device_record, node);
6851 FREE_RB_BASED_TREE(device_cache, free_device_record);
6853 int insert_block_group_record(struct block_group_tree *tree,
6854 struct block_group_record *bg_rec)
6858 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6862 list_add_tail(&bg_rec->list, &tree->block_groups);
6866 static void free_block_group_record(struct cache_extent *cache)
6868 struct block_group_record *rec;
6870 rec = container_of(cache, struct block_group_record, cache);
6871 list_del_init(&rec->list);
6875 void free_block_group_tree(struct block_group_tree *tree)
6877 cache_tree_free_extents(&tree->tree, free_block_group_record);
6880 int insert_device_extent_record(struct device_extent_tree *tree,
6881 struct device_extent_record *de_rec)
6886 * Device extent is a bit different from the other extents, because
6887 * the extents which belong to the different devices may have the
6888 * same start and size, so we need use the special extent cache
6889 * search/insert functions.
6891 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6895 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6896 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6900 static void free_device_extent_record(struct cache_extent *cache)
6902 struct device_extent_record *rec;
6904 rec = container_of(cache, struct device_extent_record, cache);
6905 if (!list_empty(&rec->chunk_list))
6906 list_del_init(&rec->chunk_list);
6907 if (!list_empty(&rec->device_list))
6908 list_del_init(&rec->device_list);
6912 void free_device_extent_tree(struct device_extent_tree *tree)
6914 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6917 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6918 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6919 struct extent_buffer *leaf, int slot)
6921 struct btrfs_extent_ref_v0 *ref0;
6922 struct btrfs_key key;
6925 btrfs_item_key_to_cpu(leaf, &key, slot);
6926 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6927 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6928 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6931 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6932 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6938 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6939 struct btrfs_key *key,
6942 struct btrfs_chunk *ptr;
6943 struct chunk_record *rec;
6946 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6947 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6949 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6951 fprintf(stderr, "memory allocation failed\n");
6955 INIT_LIST_HEAD(&rec->list);
6956 INIT_LIST_HEAD(&rec->dextents);
6959 rec->cache.start = key->offset;
6960 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6962 rec->generation = btrfs_header_generation(leaf);
6964 rec->objectid = key->objectid;
6965 rec->type = key->type;
6966 rec->offset = key->offset;
6968 rec->length = rec->cache.size;
6969 rec->owner = btrfs_chunk_owner(leaf, ptr);
6970 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6971 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6972 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6973 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6974 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6975 rec->num_stripes = num_stripes;
6976 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6978 for (i = 0; i < rec->num_stripes; ++i) {
6979 rec->stripes[i].devid =
6980 btrfs_stripe_devid_nr(leaf, ptr, i);
6981 rec->stripes[i].offset =
6982 btrfs_stripe_offset_nr(leaf, ptr, i);
6983 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6984 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6991 static int process_chunk_item(struct cache_tree *chunk_cache,
6992 struct btrfs_key *key, struct extent_buffer *eb,
6995 struct chunk_record *rec;
6996 struct btrfs_chunk *chunk;
6999 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7001 * Do extra check for this chunk item,
7003 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7004 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7005 * and owner<->key_type check.
7007 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7010 error("chunk(%llu, %llu) is not valid, ignore it",
7011 key->offset, btrfs_chunk_length(eb, chunk));
7014 rec = btrfs_new_chunk_record(eb, key, slot);
7015 ret = insert_cache_extent(chunk_cache, &rec->cache);
7017 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7018 rec->offset, rec->length);
7025 static int process_device_item(struct rb_root *dev_cache,
7026 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7028 struct btrfs_dev_item *ptr;
7029 struct device_record *rec;
7032 ptr = btrfs_item_ptr(eb,
7033 slot, struct btrfs_dev_item);
7035 rec = malloc(sizeof(*rec));
7037 fprintf(stderr, "memory allocation failed\n");
7041 rec->devid = key->offset;
7042 rec->generation = btrfs_header_generation(eb);
7044 rec->objectid = key->objectid;
7045 rec->type = key->type;
7046 rec->offset = key->offset;
7048 rec->devid = btrfs_device_id(eb, ptr);
7049 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7050 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7052 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7054 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7061 struct block_group_record *
7062 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7065 struct btrfs_block_group_item *ptr;
7066 struct block_group_record *rec;
7068 rec = calloc(1, sizeof(*rec));
7070 fprintf(stderr, "memory allocation failed\n");
7074 rec->cache.start = key->objectid;
7075 rec->cache.size = key->offset;
7077 rec->generation = btrfs_header_generation(leaf);
7079 rec->objectid = key->objectid;
7080 rec->type = key->type;
7081 rec->offset = key->offset;
7083 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7084 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7086 INIT_LIST_HEAD(&rec->list);
7091 static int process_block_group_item(struct block_group_tree *block_group_cache,
7092 struct btrfs_key *key,
7093 struct extent_buffer *eb, int slot)
7095 struct block_group_record *rec;
7098 rec = btrfs_new_block_group_record(eb, key, slot);
7099 ret = insert_block_group_record(block_group_cache, rec);
7101 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7102 rec->objectid, rec->offset);
7109 struct device_extent_record *
7110 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7111 struct btrfs_key *key, int slot)
7113 struct device_extent_record *rec;
7114 struct btrfs_dev_extent *ptr;
7116 rec = calloc(1, sizeof(*rec));
7118 fprintf(stderr, "memory allocation failed\n");
7122 rec->cache.objectid = key->objectid;
7123 rec->cache.start = key->offset;
7125 rec->generation = btrfs_header_generation(leaf);
7127 rec->objectid = key->objectid;
7128 rec->type = key->type;
7129 rec->offset = key->offset;
7131 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7132 rec->chunk_objecteid =
7133 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7135 btrfs_dev_extent_chunk_offset(leaf, ptr);
7136 rec->length = btrfs_dev_extent_length(leaf, ptr);
7137 rec->cache.size = rec->length;
7139 INIT_LIST_HEAD(&rec->chunk_list);
7140 INIT_LIST_HEAD(&rec->device_list);
7146 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7147 struct btrfs_key *key, struct extent_buffer *eb,
7150 struct device_extent_record *rec;
7153 rec = btrfs_new_device_extent_record(eb, key, slot);
7154 ret = insert_device_extent_record(dev_extent_cache, rec);
7157 "Device extent[%llu, %llu, %llu] existed.\n",
7158 rec->objectid, rec->offset, rec->length);
7165 static int process_extent_item(struct btrfs_root *root,
7166 struct cache_tree *extent_cache,
7167 struct extent_buffer *eb, int slot)
7169 struct btrfs_extent_item *ei;
7170 struct btrfs_extent_inline_ref *iref;
7171 struct btrfs_extent_data_ref *dref;
7172 struct btrfs_shared_data_ref *sref;
7173 struct btrfs_key key;
7174 struct extent_record tmpl;
7179 u32 item_size = btrfs_item_size_nr(eb, slot);
7185 btrfs_item_key_to_cpu(eb, &key, slot);
7187 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7189 num_bytes = root->fs_info->nodesize;
7191 num_bytes = key.offset;
7194 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7195 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7196 key.objectid, root->fs_info->sectorsize);
7199 if (item_size < sizeof(*ei)) {
7200 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7201 struct btrfs_extent_item_v0 *ei0;
7202 BUG_ON(item_size != sizeof(*ei0));
7203 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7204 refs = btrfs_extent_refs_v0(eb, ei0);
7208 memset(&tmpl, 0, sizeof(tmpl));
7209 tmpl.start = key.objectid;
7210 tmpl.nr = num_bytes;
7211 tmpl.extent_item_refs = refs;
7212 tmpl.metadata = metadata;
7214 tmpl.max_size = num_bytes;
7216 return add_extent_rec(extent_cache, &tmpl);
7219 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7220 refs = btrfs_extent_refs(eb, ei);
7221 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7225 if (metadata && num_bytes != root->fs_info->nodesize) {
7226 error("ignore invalid metadata extent, length %llu does not equal to %u",
7227 num_bytes, root->fs_info->nodesize);
7230 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7231 error("ignore invalid data extent, length %llu is not aligned to %u",
7232 num_bytes, root->fs_info->sectorsize);
7236 memset(&tmpl, 0, sizeof(tmpl));
7237 tmpl.start = key.objectid;
7238 tmpl.nr = num_bytes;
7239 tmpl.extent_item_refs = refs;
7240 tmpl.metadata = metadata;
7242 tmpl.max_size = num_bytes;
7243 add_extent_rec(extent_cache, &tmpl);
7245 ptr = (unsigned long)(ei + 1);
7246 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7247 key.type == BTRFS_EXTENT_ITEM_KEY)
7248 ptr += sizeof(struct btrfs_tree_block_info);
7250 end = (unsigned long)ei + item_size;
7252 iref = (struct btrfs_extent_inline_ref *)ptr;
7253 type = btrfs_extent_inline_ref_type(eb, iref);
7254 offset = btrfs_extent_inline_ref_offset(eb, iref);
7256 case BTRFS_TREE_BLOCK_REF_KEY:
7257 ret = add_tree_backref(extent_cache, key.objectid,
7261 "add_tree_backref failed (extent items tree block): %s",
7264 case BTRFS_SHARED_BLOCK_REF_KEY:
7265 ret = add_tree_backref(extent_cache, key.objectid,
7269 "add_tree_backref failed (extent items shared block): %s",
7272 case BTRFS_EXTENT_DATA_REF_KEY:
7273 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7274 add_data_backref(extent_cache, key.objectid, 0,
7275 btrfs_extent_data_ref_root(eb, dref),
7276 btrfs_extent_data_ref_objectid(eb,
7278 btrfs_extent_data_ref_offset(eb, dref),
7279 btrfs_extent_data_ref_count(eb, dref),
7282 case BTRFS_SHARED_DATA_REF_KEY:
7283 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7284 add_data_backref(extent_cache, key.objectid, offset,
7286 btrfs_shared_data_ref_count(eb, sref),
7290 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7291 key.objectid, key.type, num_bytes);
7294 ptr += btrfs_extent_inline_ref_size(type);
7301 static int check_cache_range(struct btrfs_root *root,
7302 struct btrfs_block_group_cache *cache,
7303 u64 offset, u64 bytes)
7305 struct btrfs_free_space *entry;
7311 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7312 bytenr = btrfs_sb_offset(i);
7313 ret = btrfs_rmap_block(root->fs_info,
7314 cache->key.objectid, bytenr, 0,
7315 &logical, &nr, &stripe_len);
7320 if (logical[nr] + stripe_len <= offset)
7322 if (offset + bytes <= logical[nr])
7324 if (logical[nr] == offset) {
7325 if (stripe_len >= bytes) {
7329 bytes -= stripe_len;
7330 offset += stripe_len;
7331 } else if (logical[nr] < offset) {
7332 if (logical[nr] + stripe_len >=
7337 bytes = (offset + bytes) -
7338 (logical[nr] + stripe_len);
7339 offset = logical[nr] + stripe_len;
7342 * Could be tricky, the super may land in the
7343 * middle of the area we're checking. First
7344 * check the easiest case, it's at the end.
7346 if (logical[nr] + stripe_len >=
7348 bytes = logical[nr] - offset;
7352 /* Check the left side */
7353 ret = check_cache_range(root, cache,
7355 logical[nr] - offset);
7361 /* Now we continue with the right side */
7362 bytes = (offset + bytes) -
7363 (logical[nr] + stripe_len);
7364 offset = logical[nr] + stripe_len;
7371 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7373 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7374 offset, offset+bytes);
7378 if (entry->offset != offset) {
7379 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7384 if (entry->bytes != bytes) {
7385 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7386 bytes, entry->bytes, offset);
7390 unlink_free_space(cache->free_space_ctl, entry);
7395 static int verify_space_cache(struct btrfs_root *root,
7396 struct btrfs_block_group_cache *cache)
7398 struct btrfs_path path;
7399 struct extent_buffer *leaf;
7400 struct btrfs_key key;
7404 root = root->fs_info->extent_root;
7406 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7408 btrfs_init_path(&path);
7409 key.objectid = last;
7411 key.type = BTRFS_EXTENT_ITEM_KEY;
7412 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7417 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7418 ret = btrfs_next_leaf(root, &path);
7426 leaf = path.nodes[0];
7427 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7428 if (key.objectid >= cache->key.offset + cache->key.objectid)
7430 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7431 key.type != BTRFS_METADATA_ITEM_KEY) {
7436 if (last == key.objectid) {
7437 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7438 last = key.objectid + key.offset;
7440 last = key.objectid + root->fs_info->nodesize;
7445 ret = check_cache_range(root, cache, last,
7446 key.objectid - last);
7449 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7450 last = key.objectid + key.offset;
7452 last = key.objectid + root->fs_info->nodesize;
7456 if (last < cache->key.objectid + cache->key.offset)
7457 ret = check_cache_range(root, cache, last,
7458 cache->key.objectid +
7459 cache->key.offset - last);
7462 btrfs_release_path(&path);
7465 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7466 fprintf(stderr, "There are still entries left in the space "
7474 static int check_space_cache(struct btrfs_root *root)
7476 struct btrfs_block_group_cache *cache;
7477 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7481 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7482 btrfs_super_generation(root->fs_info->super_copy) !=
7483 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7484 printf("cache and super generation don't match, space cache "
7485 "will be invalidated\n");
7489 if (ctx.progress_enabled) {
7490 ctx.tp = TASK_FREE_SPACE;
7491 task_start(ctx.info);
7495 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7499 start = cache->key.objectid + cache->key.offset;
7500 if (!cache->free_space_ctl) {
7501 if (btrfs_init_free_space_ctl(cache,
7502 root->fs_info->sectorsize)) {
7507 btrfs_remove_free_space_cache(cache);
7510 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7511 ret = exclude_super_stripes(root, cache);
7513 fprintf(stderr, "could not exclude super stripes: %s\n",
7518 ret = load_free_space_tree(root->fs_info, cache);
7519 free_excluded_extents(root, cache);
7521 fprintf(stderr, "could not load free space tree: %s\n",
7528 ret = load_free_space_cache(root->fs_info, cache);
7533 ret = verify_space_cache(root, cache);
7535 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7536 cache->key.objectid);
7541 task_stop(ctx.info);
7543 return error ? -EINVAL : 0;
7546 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7547 u64 num_bytes, unsigned long leaf_offset,
7548 struct extent_buffer *eb) {
7550 struct btrfs_fs_info *fs_info = root->fs_info;
7552 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7554 unsigned long csum_offset;
7558 u64 data_checked = 0;
7564 if (num_bytes % fs_info->sectorsize)
7567 data = malloc(num_bytes);
7571 while (offset < num_bytes) {
7574 read_len = num_bytes - offset;
7575 /* read as much space once a time */
7576 ret = read_extent_data(fs_info, data + offset,
7577 bytenr + offset, &read_len, mirror);
7581 /* verify every 4k data's checksum */
7582 while (data_checked < read_len) {
7584 tmp = offset + data_checked;
7586 csum = btrfs_csum_data((char *)data + tmp,
7587 csum, fs_info->sectorsize);
7588 btrfs_csum_final(csum, (u8 *)&csum);
7590 csum_offset = leaf_offset +
7591 tmp / fs_info->sectorsize * csum_size;
7592 read_extent_buffer(eb, (char *)&csum_expected,
7593 csum_offset, csum_size);
7594 /* try another mirror */
7595 if (csum != csum_expected) {
7596 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7597 mirror, bytenr + tmp,
7598 csum, csum_expected);
7599 num_copies = btrfs_num_copies(root->fs_info,
7601 if (mirror < num_copies - 1) {
7606 data_checked += fs_info->sectorsize;
7615 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7618 struct btrfs_path path;
7619 struct extent_buffer *leaf;
7620 struct btrfs_key key;
7623 btrfs_init_path(&path);
7624 key.objectid = bytenr;
7625 key.type = BTRFS_EXTENT_ITEM_KEY;
7626 key.offset = (u64)-1;
7629 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7632 fprintf(stderr, "Error looking up extent record %d\n", ret);
7633 btrfs_release_path(&path);
7636 if (path.slots[0] > 0) {
7639 ret = btrfs_prev_leaf(root, &path);
7642 } else if (ret > 0) {
7649 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7652 * Block group items come before extent items if they have the same
7653 * bytenr, so walk back one more just in case. Dear future traveller,
7654 * first congrats on mastering time travel. Now if it's not too much
7655 * trouble could you go back to 2006 and tell Chris to make the
7656 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7657 * EXTENT_ITEM_KEY please?
7659 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7660 if (path.slots[0] > 0) {
7663 ret = btrfs_prev_leaf(root, &path);
7666 } else if (ret > 0) {
7671 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7675 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7676 ret = btrfs_next_leaf(root, &path);
7678 fprintf(stderr, "Error going to next leaf "
7680 btrfs_release_path(&path);
7686 leaf = path.nodes[0];
7687 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7688 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7692 if (key.objectid + key.offset < bytenr) {
7696 if (key.objectid > bytenr + num_bytes)
7699 if (key.objectid == bytenr) {
7700 if (key.offset >= num_bytes) {
7704 num_bytes -= key.offset;
7705 bytenr += key.offset;
7706 } else if (key.objectid < bytenr) {
7707 if (key.objectid + key.offset >= bytenr + num_bytes) {
7711 num_bytes = (bytenr + num_bytes) -
7712 (key.objectid + key.offset);
7713 bytenr = key.objectid + key.offset;
7715 if (key.objectid + key.offset < bytenr + num_bytes) {
7716 u64 new_start = key.objectid + key.offset;
7717 u64 new_bytes = bytenr + num_bytes - new_start;
7720 * Weird case, the extent is in the middle of
7721 * our range, we'll have to search one side
7722 * and then the other. Not sure if this happens
7723 * in real life, but no harm in coding it up
7724 * anyway just in case.
7726 btrfs_release_path(&path);
7727 ret = check_extent_exists(root, new_start,
7730 fprintf(stderr, "Right section didn't "
7734 num_bytes = key.objectid - bytenr;
7737 num_bytes = key.objectid - bytenr;
7744 if (num_bytes && !ret) {
7745 fprintf(stderr, "There are no extents for csum range "
7746 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7750 btrfs_release_path(&path);
7754 static int check_csums(struct btrfs_root *root)
7756 struct btrfs_path path;
7757 struct extent_buffer *leaf;
7758 struct btrfs_key key;
7759 u64 offset = 0, num_bytes = 0;
7760 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7764 unsigned long leaf_offset;
7766 root = root->fs_info->csum_root;
7767 if (!extent_buffer_uptodate(root->node)) {
7768 fprintf(stderr, "No valid csum tree found\n");
7772 btrfs_init_path(&path);
7773 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7774 key.type = BTRFS_EXTENT_CSUM_KEY;
7776 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7778 fprintf(stderr, "Error searching csum tree %d\n", ret);
7779 btrfs_release_path(&path);
7783 if (ret > 0 && path.slots[0])
7788 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7789 ret = btrfs_next_leaf(root, &path);
7791 fprintf(stderr, "Error going to next leaf "
7798 leaf = path.nodes[0];
7800 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7801 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7806 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7807 csum_size) * root->fs_info->sectorsize;
7808 if (!check_data_csum)
7809 goto skip_csum_check;
7810 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7811 ret = check_extent_csums(root, key.offset, data_len,
7817 offset = key.offset;
7818 } else if (key.offset != offset + num_bytes) {
7819 ret = check_extent_exists(root, offset, num_bytes);
7821 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7822 "there is no extent record\n",
7823 offset, offset+num_bytes);
7826 offset = key.offset;
7829 num_bytes += data_len;
7833 btrfs_release_path(&path);
7837 static int is_dropped_key(struct btrfs_key *key,
7838 struct btrfs_key *drop_key) {
7839 if (key->objectid < drop_key->objectid)
7841 else if (key->objectid == drop_key->objectid) {
7842 if (key->type < drop_key->type)
7844 else if (key->type == drop_key->type) {
7845 if (key->offset < drop_key->offset)
7853 * Here are the rules for FULL_BACKREF.
7855 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7856 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7858 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7859 * if it happened after the relocation occurred since we'll have dropped the
7860 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7861 * have no real way to know for sure.
7863 * We process the blocks one root at a time, and we start from the lowest root
7864 * objectid and go to the highest. So we can just lookup the owner backref for
7865 * the record and if we don't find it then we know it doesn't exist and we have
7868 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7869 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7870 * be set or not and then we can check later once we've gathered all the refs.
7872 static int calc_extent_flag(struct cache_tree *extent_cache,
7873 struct extent_buffer *buf,
7874 struct root_item_record *ri,
7877 struct extent_record *rec;
7878 struct cache_extent *cache;
7879 struct tree_backref *tback;
7882 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7883 /* we have added this extent before */
7887 rec = container_of(cache, struct extent_record, cache);
7890 * Except file/reloc tree, we can not have
7893 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7898 if (buf->start == ri->bytenr)
7901 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7904 owner = btrfs_header_owner(buf);
7905 if (owner == ri->objectid)
7908 tback = find_tree_backref(rec, 0, owner);
7913 if (rec->flag_block_full_backref != FLAG_UNSET &&
7914 rec->flag_block_full_backref != 0)
7915 rec->bad_full_backref = 1;
7918 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7919 if (rec->flag_block_full_backref != FLAG_UNSET &&
7920 rec->flag_block_full_backref != 1)
7921 rec->bad_full_backref = 1;
7925 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7927 fprintf(stderr, "Invalid key type(");
7928 print_key_type(stderr, 0, key_type);
7929 fprintf(stderr, ") found in root(");
7930 print_objectid(stderr, rootid, 0);
7931 fprintf(stderr, ")\n");
7935 * Check if the key is valid with its extent buffer.
7937 * This is a early check in case invalid key exists in a extent buffer
7938 * This is not comprehensive yet, but should prevent wrong key/item passed
7941 static int check_type_with_root(u64 rootid, u8 key_type)
7944 /* Only valid in chunk tree */
7945 case BTRFS_DEV_ITEM_KEY:
7946 case BTRFS_CHUNK_ITEM_KEY:
7947 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7950 /* valid in csum and log tree */
7951 case BTRFS_CSUM_TREE_OBJECTID:
7952 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7956 case BTRFS_EXTENT_ITEM_KEY:
7957 case BTRFS_METADATA_ITEM_KEY:
7958 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7959 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7962 case BTRFS_ROOT_ITEM_KEY:
7963 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7966 case BTRFS_DEV_EXTENT_KEY:
7967 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7973 report_mismatch_key_root(key_type, rootid);
7977 static int run_next_block(struct btrfs_root *root,
7978 struct block_info *bits,
7981 struct cache_tree *pending,
7982 struct cache_tree *seen,
7983 struct cache_tree *reada,
7984 struct cache_tree *nodes,
7985 struct cache_tree *extent_cache,
7986 struct cache_tree *chunk_cache,
7987 struct rb_root *dev_cache,
7988 struct block_group_tree *block_group_cache,
7989 struct device_extent_tree *dev_extent_cache,
7990 struct root_item_record *ri)
7992 struct btrfs_fs_info *fs_info = root->fs_info;
7993 struct extent_buffer *buf;
7994 struct extent_record *rec = NULL;
8005 struct btrfs_key key;
8006 struct cache_extent *cache;
8009 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8010 bits_nr, &reada_bits);
8015 for(i = 0; i < nritems; i++) {
8016 ret = add_cache_extent(reada, bits[i].start,
8021 /* fixme, get the parent transid */
8022 readahead_tree_block(fs_info, bits[i].start, 0);
8025 *last = bits[0].start;
8026 bytenr = bits[0].start;
8027 size = bits[0].size;
8029 cache = lookup_cache_extent(pending, bytenr, size);
8031 remove_cache_extent(pending, cache);
8034 cache = lookup_cache_extent(reada, bytenr, size);
8036 remove_cache_extent(reada, cache);
8039 cache = lookup_cache_extent(nodes, bytenr, size);
8041 remove_cache_extent(nodes, cache);
8044 cache = lookup_cache_extent(extent_cache, bytenr, size);
8046 rec = container_of(cache, struct extent_record, cache);
8047 gen = rec->parent_generation;
8050 /* fixme, get the real parent transid */
8051 buf = read_tree_block(root->fs_info, bytenr, gen);
8052 if (!extent_buffer_uptodate(buf)) {
8053 record_bad_block_io(root->fs_info,
8054 extent_cache, bytenr, size);
8058 nritems = btrfs_header_nritems(buf);
8061 if (!init_extent_tree) {
8062 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8063 btrfs_header_level(buf), 1, NULL,
8066 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8068 fprintf(stderr, "Couldn't calc extent flags\n");
8069 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8074 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8076 fprintf(stderr, "Couldn't calc extent flags\n");
8077 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8081 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8083 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8084 ri->objectid == btrfs_header_owner(buf)) {
8086 * Ok we got to this block from it's original owner and
8087 * we have FULL_BACKREF set. Relocation can leave
8088 * converted blocks over so this is altogether possible,
8089 * however it's not possible if the generation > the
8090 * last snapshot, so check for this case.
8092 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8093 btrfs_header_generation(buf) > ri->last_snapshot) {
8094 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8095 rec->bad_full_backref = 1;
8100 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8101 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8102 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8103 rec->bad_full_backref = 1;
8107 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8108 rec->flag_block_full_backref = 1;
8112 rec->flag_block_full_backref = 0;
8114 owner = btrfs_header_owner(buf);
8117 ret = check_block(root, extent_cache, buf, flags);
8121 if (btrfs_is_leaf(buf)) {
8122 btree_space_waste += btrfs_leaf_free_space(root, buf);
8123 for (i = 0; i < nritems; i++) {
8124 struct btrfs_file_extent_item *fi;
8125 btrfs_item_key_to_cpu(buf, &key, i);
8127 * Check key type against the leaf owner.
8128 * Could filter quite a lot of early error if
8131 if (check_type_with_root(btrfs_header_owner(buf),
8133 fprintf(stderr, "ignoring invalid key\n");
8136 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8137 process_extent_item(root, extent_cache, buf,
8141 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8142 process_extent_item(root, extent_cache, buf,
8146 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8148 btrfs_item_size_nr(buf, i);
8151 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8152 process_chunk_item(chunk_cache, &key, buf, i);
8155 if (key.type == BTRFS_DEV_ITEM_KEY) {
8156 process_device_item(dev_cache, &key, buf, i);
8159 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8160 process_block_group_item(block_group_cache,
8164 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8165 process_device_extent_item(dev_extent_cache,
8170 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8171 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8172 process_extent_ref_v0(extent_cache, buf, i);
8179 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8180 ret = add_tree_backref(extent_cache,
8181 key.objectid, 0, key.offset, 0);
8184 "add_tree_backref failed (leaf tree block): %s",
8188 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8189 ret = add_tree_backref(extent_cache,
8190 key.objectid, key.offset, 0, 0);
8193 "add_tree_backref failed (leaf shared block): %s",
8197 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8198 struct btrfs_extent_data_ref *ref;
8199 ref = btrfs_item_ptr(buf, i,
8200 struct btrfs_extent_data_ref);
8201 add_data_backref(extent_cache,
8203 btrfs_extent_data_ref_root(buf, ref),
8204 btrfs_extent_data_ref_objectid(buf,
8206 btrfs_extent_data_ref_offset(buf, ref),
8207 btrfs_extent_data_ref_count(buf, ref),
8208 0, root->fs_info->sectorsize);
8211 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8212 struct btrfs_shared_data_ref *ref;
8213 ref = btrfs_item_ptr(buf, i,
8214 struct btrfs_shared_data_ref);
8215 add_data_backref(extent_cache,
8216 key.objectid, key.offset, 0, 0, 0,
8217 btrfs_shared_data_ref_count(buf, ref),
8218 0, root->fs_info->sectorsize);
8221 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8222 struct bad_item *bad;
8224 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8228 bad = malloc(sizeof(struct bad_item));
8231 INIT_LIST_HEAD(&bad->list);
8232 memcpy(&bad->key, &key,
8233 sizeof(struct btrfs_key));
8234 bad->root_id = owner;
8235 list_add_tail(&bad->list, &delete_items);
8238 if (key.type != BTRFS_EXTENT_DATA_KEY)
8240 fi = btrfs_item_ptr(buf, i,
8241 struct btrfs_file_extent_item);
8242 if (btrfs_file_extent_type(buf, fi) ==
8243 BTRFS_FILE_EXTENT_INLINE)
8245 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8248 data_bytes_allocated +=
8249 btrfs_file_extent_disk_num_bytes(buf, fi);
8250 if (data_bytes_allocated < root->fs_info->sectorsize) {
8253 data_bytes_referenced +=
8254 btrfs_file_extent_num_bytes(buf, fi);
8255 add_data_backref(extent_cache,
8256 btrfs_file_extent_disk_bytenr(buf, fi),
8257 parent, owner, key.objectid, key.offset -
8258 btrfs_file_extent_offset(buf, fi), 1, 1,
8259 btrfs_file_extent_disk_num_bytes(buf, fi));
8263 struct btrfs_key first_key;
8265 first_key.objectid = 0;
8268 btrfs_item_key_to_cpu(buf, &first_key, 0);
8269 level = btrfs_header_level(buf);
8270 for (i = 0; i < nritems; i++) {
8271 struct extent_record tmpl;
8273 ptr = btrfs_node_blockptr(buf, i);
8274 size = root->fs_info->nodesize;
8275 btrfs_node_key_to_cpu(buf, &key, i);
8277 if ((level == ri->drop_level)
8278 && is_dropped_key(&key, &ri->drop_key)) {
8283 memset(&tmpl, 0, sizeof(tmpl));
8284 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8285 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8290 tmpl.max_size = size;
8291 ret = add_extent_rec(extent_cache, &tmpl);
8295 ret = add_tree_backref(extent_cache, ptr, parent,
8299 "add_tree_backref failed (non-leaf block): %s",
8305 add_pending(nodes, seen, ptr, size);
8307 add_pending(pending, seen, ptr, size);
8310 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8311 nritems) * sizeof(struct btrfs_key_ptr);
8313 total_btree_bytes += buf->len;
8314 if (fs_root_objectid(btrfs_header_owner(buf)))
8315 total_fs_tree_bytes += buf->len;
8316 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8317 total_extent_tree_bytes += buf->len;
8319 free_extent_buffer(buf);
8323 static int add_root_to_pending(struct extent_buffer *buf,
8324 struct cache_tree *extent_cache,
8325 struct cache_tree *pending,
8326 struct cache_tree *seen,
8327 struct cache_tree *nodes,
8330 struct extent_record tmpl;
8333 if (btrfs_header_level(buf) > 0)
8334 add_pending(nodes, seen, buf->start, buf->len);
8336 add_pending(pending, seen, buf->start, buf->len);
8338 memset(&tmpl, 0, sizeof(tmpl));
8339 tmpl.start = buf->start;
8344 tmpl.max_size = buf->len;
8345 add_extent_rec(extent_cache, &tmpl);
8347 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8348 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8349 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8352 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8357 /* as we fix the tree, we might be deleting blocks that
8358 * we're tracking for repair. This hook makes sure we
8359 * remove any backrefs for blocks as we are fixing them.
8361 static int free_extent_hook(struct btrfs_trans_handle *trans,
8362 struct btrfs_root *root,
8363 u64 bytenr, u64 num_bytes, u64 parent,
8364 u64 root_objectid, u64 owner, u64 offset,
8367 struct extent_record *rec;
8368 struct cache_extent *cache;
8370 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8372 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8373 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8377 rec = container_of(cache, struct extent_record, cache);
8379 struct data_backref *back;
8380 back = find_data_backref(rec, parent, root_objectid, owner,
8381 offset, 1, bytenr, num_bytes);
8384 if (back->node.found_ref) {
8385 back->found_ref -= refs_to_drop;
8387 rec->refs -= refs_to_drop;
8389 if (back->node.found_extent_tree) {
8390 back->num_refs -= refs_to_drop;
8391 if (rec->extent_item_refs)
8392 rec->extent_item_refs -= refs_to_drop;
8394 if (back->found_ref == 0)
8395 back->node.found_ref = 0;
8396 if (back->num_refs == 0)
8397 back->node.found_extent_tree = 0;
8399 if (!back->node.found_extent_tree && back->node.found_ref) {
8400 rb_erase(&back->node.node, &rec->backref_tree);
8404 struct tree_backref *back;
8405 back = find_tree_backref(rec, parent, root_objectid);
8408 if (back->node.found_ref) {
8411 back->node.found_ref = 0;
8413 if (back->node.found_extent_tree) {
8414 if (rec->extent_item_refs)
8415 rec->extent_item_refs--;
8416 back->node.found_extent_tree = 0;
8418 if (!back->node.found_extent_tree && back->node.found_ref) {
8419 rb_erase(&back->node.node, &rec->backref_tree);
8423 maybe_free_extent_rec(extent_cache, rec);
8428 static int delete_extent_records(struct btrfs_trans_handle *trans,
8429 struct btrfs_root *root,
8430 struct btrfs_path *path,
8433 struct btrfs_key key;
8434 struct btrfs_key found_key;
8435 struct extent_buffer *leaf;
8440 key.objectid = bytenr;
8442 key.offset = (u64)-1;
8445 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8452 if (path->slots[0] == 0)
8458 leaf = path->nodes[0];
8459 slot = path->slots[0];
8461 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8462 if (found_key.objectid != bytenr)
8465 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8466 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8467 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8468 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8469 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8470 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8471 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8472 btrfs_release_path(path);
8473 if (found_key.type == 0) {
8474 if (found_key.offset == 0)
8476 key.offset = found_key.offset - 1;
8477 key.type = found_key.type;
8479 key.type = found_key.type - 1;
8480 key.offset = (u64)-1;
8484 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8485 found_key.objectid, found_key.type, found_key.offset);
8487 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8490 btrfs_release_path(path);
8492 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8493 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8494 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8495 found_key.offset : root->fs_info->nodesize;
8497 ret = btrfs_update_block_group(trans, root, bytenr,
8504 btrfs_release_path(path);
8509 * for a single backref, this will allocate a new extent
8510 * and add the backref to it.
8512 static int record_extent(struct btrfs_trans_handle *trans,
8513 struct btrfs_fs_info *info,
8514 struct btrfs_path *path,
8515 struct extent_record *rec,
8516 struct extent_backref *back,
8517 int allocated, u64 flags)
8520 struct btrfs_root *extent_root = info->extent_root;
8521 struct extent_buffer *leaf;
8522 struct btrfs_key ins_key;
8523 struct btrfs_extent_item *ei;
8524 struct data_backref *dback;
8525 struct btrfs_tree_block_info *bi;
8528 rec->max_size = max_t(u64, rec->max_size,
8532 u32 item_size = sizeof(*ei);
8535 item_size += sizeof(*bi);
8537 ins_key.objectid = rec->start;
8538 ins_key.offset = rec->max_size;
8539 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8541 ret = btrfs_insert_empty_item(trans, extent_root, path,
8542 &ins_key, item_size);
8546 leaf = path->nodes[0];
8547 ei = btrfs_item_ptr(leaf, path->slots[0],
8548 struct btrfs_extent_item);
8550 btrfs_set_extent_refs(leaf, ei, 0);
8551 btrfs_set_extent_generation(leaf, ei, rec->generation);
8553 if (back->is_data) {
8554 btrfs_set_extent_flags(leaf, ei,
8555 BTRFS_EXTENT_FLAG_DATA);
8557 struct btrfs_disk_key copy_key;;
8559 bi = (struct btrfs_tree_block_info *)(ei + 1);
8560 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8563 btrfs_set_disk_key_objectid(©_key,
8564 rec->info_objectid);
8565 btrfs_set_disk_key_type(©_key, 0);
8566 btrfs_set_disk_key_offset(©_key, 0);
8568 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8569 btrfs_set_tree_block_key(leaf, bi, ©_key);
8571 btrfs_set_extent_flags(leaf, ei,
8572 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8575 btrfs_mark_buffer_dirty(leaf);
8576 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8577 rec->max_size, 1, 0);
8580 btrfs_release_path(path);
8583 if (back->is_data) {
8587 dback = to_data_backref(back);
8588 if (back->full_backref)
8589 parent = dback->parent;
8593 for (i = 0; i < dback->found_ref; i++) {
8594 /* if parent != 0, we're doing a full backref
8595 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8596 * just makes the backref allocator create a data
8599 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8600 rec->start, rec->max_size,
8604 BTRFS_FIRST_FREE_OBJECTID :
8610 fprintf(stderr, "adding new data backref"
8611 " on %llu %s %llu owner %llu"
8612 " offset %llu found %d\n",
8613 (unsigned long long)rec->start,
8614 back->full_backref ?
8616 back->full_backref ?
8617 (unsigned long long)parent :
8618 (unsigned long long)dback->root,
8619 (unsigned long long)dback->owner,
8620 (unsigned long long)dback->offset,
8624 struct tree_backref *tback;
8626 tback = to_tree_backref(back);
8627 if (back->full_backref)
8628 parent = tback->parent;
8632 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8633 rec->start, rec->max_size,
8634 parent, tback->root, 0, 0);
8635 fprintf(stderr, "adding new tree backref on "
8636 "start %llu len %llu parent %llu root %llu\n",
8637 rec->start, rec->max_size, parent, tback->root);
8640 btrfs_release_path(path);
8644 static struct extent_entry *find_entry(struct list_head *entries,
8645 u64 bytenr, u64 bytes)
8647 struct extent_entry *entry = NULL;
8649 list_for_each_entry(entry, entries, list) {
8650 if (entry->bytenr == bytenr && entry->bytes == bytes)
8657 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8659 struct extent_entry *entry, *best = NULL, *prev = NULL;
8661 list_for_each_entry(entry, entries, list) {
8663 * If there are as many broken entries as entries then we know
8664 * not to trust this particular entry.
8666 if (entry->broken == entry->count)
8670 * Special case, when there are only two entries and 'best' is
8680 * If our current entry == best then we can't be sure our best
8681 * is really the best, so we need to keep searching.
8683 if (best && best->count == entry->count) {
8689 /* Prev == entry, not good enough, have to keep searching */
8690 if (!prev->broken && prev->count == entry->count)
8694 best = (prev->count > entry->count) ? prev : entry;
8695 else if (best->count < entry->count)
8703 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8704 struct data_backref *dback, struct extent_entry *entry)
8706 struct btrfs_trans_handle *trans;
8707 struct btrfs_root *root;
8708 struct btrfs_file_extent_item *fi;
8709 struct extent_buffer *leaf;
8710 struct btrfs_key key;
8714 key.objectid = dback->root;
8715 key.type = BTRFS_ROOT_ITEM_KEY;
8716 key.offset = (u64)-1;
8717 root = btrfs_read_fs_root(info, &key);
8719 fprintf(stderr, "Couldn't find root for our ref\n");
8724 * The backref points to the original offset of the extent if it was
8725 * split, so we need to search down to the offset we have and then walk
8726 * forward until we find the backref we're looking for.
8728 key.objectid = dback->owner;
8729 key.type = BTRFS_EXTENT_DATA_KEY;
8730 key.offset = dback->offset;
8731 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8733 fprintf(stderr, "Error looking up ref %d\n", ret);
8738 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8739 ret = btrfs_next_leaf(root, path);
8741 fprintf(stderr, "Couldn't find our ref, next\n");
8745 leaf = path->nodes[0];
8746 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8747 if (key.objectid != dback->owner ||
8748 key.type != BTRFS_EXTENT_DATA_KEY) {
8749 fprintf(stderr, "Couldn't find our ref, search\n");
8752 fi = btrfs_item_ptr(leaf, path->slots[0],
8753 struct btrfs_file_extent_item);
8754 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8755 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8757 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8762 btrfs_release_path(path);
8764 trans = btrfs_start_transaction(root, 1);
8766 return PTR_ERR(trans);
8769 * Ok we have the key of the file extent we want to fix, now we can cow
8770 * down to the thing and fix it.
8772 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8774 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8775 key.objectid, key.type, key.offset, ret);
8779 fprintf(stderr, "Well that's odd, we just found this key "
8780 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8785 leaf = path->nodes[0];
8786 fi = btrfs_item_ptr(leaf, path->slots[0],
8787 struct btrfs_file_extent_item);
8789 if (btrfs_file_extent_compression(leaf, fi) &&
8790 dback->disk_bytenr != entry->bytenr) {
8791 fprintf(stderr, "Ref doesn't match the record start and is "
8792 "compressed, please take a btrfs-image of this file "
8793 "system and send it to a btrfs developer so they can "
8794 "complete this functionality for bytenr %Lu\n",
8795 dback->disk_bytenr);
8800 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8801 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8802 } else if (dback->disk_bytenr > entry->bytenr) {
8803 u64 off_diff, offset;
8805 off_diff = dback->disk_bytenr - entry->bytenr;
8806 offset = btrfs_file_extent_offset(leaf, fi);
8807 if (dback->disk_bytenr + offset +
8808 btrfs_file_extent_num_bytes(leaf, fi) >
8809 entry->bytenr + entry->bytes) {
8810 fprintf(stderr, "Ref is past the entry end, please "
8811 "take a btrfs-image of this file system and "
8812 "send it to a btrfs developer, ref %Lu\n",
8813 dback->disk_bytenr);
8818 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8819 btrfs_set_file_extent_offset(leaf, fi, offset);
8820 } else if (dback->disk_bytenr < entry->bytenr) {
8823 offset = btrfs_file_extent_offset(leaf, fi);
8824 if (dback->disk_bytenr + offset < entry->bytenr) {
8825 fprintf(stderr, "Ref is before the entry start, please"
8826 " take a btrfs-image of this file system and "
8827 "send it to a btrfs developer, ref %Lu\n",
8828 dback->disk_bytenr);
8833 offset += dback->disk_bytenr;
8834 offset -= entry->bytenr;
8835 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8836 btrfs_set_file_extent_offset(leaf, fi, offset);
8839 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8842 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8843 * only do this if we aren't using compression, otherwise it's a
8846 if (!btrfs_file_extent_compression(leaf, fi))
8847 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8849 printf("ram bytes may be wrong?\n");
8850 btrfs_mark_buffer_dirty(leaf);
8852 err = btrfs_commit_transaction(trans, root);
8853 btrfs_release_path(path);
8854 return ret ? ret : err;
8857 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8858 struct extent_record *rec)
8860 struct extent_backref *back, *tmp;
8861 struct data_backref *dback;
8862 struct extent_entry *entry, *best = NULL;
8865 int broken_entries = 0;
8870 * Metadata is easy and the backrefs should always agree on bytenr and
8871 * size, if not we've got bigger issues.
8876 rbtree_postorder_for_each_entry_safe(back, tmp,
8877 &rec->backref_tree, node) {
8878 if (back->full_backref || !back->is_data)
8881 dback = to_data_backref(back);
8884 * We only pay attention to backrefs that we found a real
8887 if (dback->found_ref == 0)
8891 * For now we only catch when the bytes don't match, not the
8892 * bytenr. We can easily do this at the same time, but I want
8893 * to have a fs image to test on before we just add repair
8894 * functionality willy-nilly so we know we won't screw up the
8898 entry = find_entry(&entries, dback->disk_bytenr,
8901 entry = malloc(sizeof(struct extent_entry));
8906 memset(entry, 0, sizeof(*entry));
8907 entry->bytenr = dback->disk_bytenr;
8908 entry->bytes = dback->bytes;
8909 list_add_tail(&entry->list, &entries);
8914 * If we only have on entry we may think the entries agree when
8915 * in reality they don't so we have to do some extra checking.
8917 if (dback->disk_bytenr != rec->start ||
8918 dback->bytes != rec->nr || back->broken)
8929 /* Yay all the backrefs agree, carry on good sir */
8930 if (nr_entries <= 1 && !mismatch)
8933 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8934 "%Lu\n", rec->start);
8937 * First we want to see if the backrefs can agree amongst themselves who
8938 * is right, so figure out which one of the entries has the highest
8941 best = find_most_right_entry(&entries);
8944 * Ok so we may have an even split between what the backrefs think, so
8945 * this is where we use the extent ref to see what it thinks.
8948 entry = find_entry(&entries, rec->start, rec->nr);
8949 if (!entry && (!broken_entries || !rec->found_rec)) {
8950 fprintf(stderr, "Backrefs don't agree with each other "
8951 "and extent record doesn't agree with anybody,"
8952 " so we can't fix bytenr %Lu bytes %Lu\n",
8953 rec->start, rec->nr);
8956 } else if (!entry) {
8958 * Ok our backrefs were broken, we'll assume this is the
8959 * correct value and add an entry for this range.
8961 entry = malloc(sizeof(struct extent_entry));
8966 memset(entry, 0, sizeof(*entry));
8967 entry->bytenr = rec->start;
8968 entry->bytes = rec->nr;
8969 list_add_tail(&entry->list, &entries);
8973 best = find_most_right_entry(&entries);
8975 fprintf(stderr, "Backrefs and extent record evenly "
8976 "split on who is right, this is going to "
8977 "require user input to fix bytenr %Lu bytes "
8978 "%Lu\n", rec->start, rec->nr);
8985 * I don't think this can happen currently as we'll abort() if we catch
8986 * this case higher up, but in case somebody removes that we still can't
8987 * deal with it properly here yet, so just bail out of that's the case.
8989 if (best->bytenr != rec->start) {
8990 fprintf(stderr, "Extent start and backref starts don't match, "
8991 "please use btrfs-image on this file system and send "
8992 "it to a btrfs developer so they can make fsck fix "
8993 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8994 rec->start, rec->nr);
9000 * Ok great we all agreed on an extent record, let's go find the real
9001 * references and fix up the ones that don't match.
9003 rbtree_postorder_for_each_entry_safe(back, tmp,
9004 &rec->backref_tree, node) {
9005 if (back->full_backref || !back->is_data)
9008 dback = to_data_backref(back);
9011 * Still ignoring backrefs that don't have a real ref attached
9014 if (dback->found_ref == 0)
9017 if (dback->bytes == best->bytes &&
9018 dback->disk_bytenr == best->bytenr)
9021 ret = repair_ref(info, path, dback, best);
9027 * Ok we messed with the actual refs, which means we need to drop our
9028 * entire cache and go back and rescan. I know this is a huge pain and
9029 * adds a lot of extra work, but it's the only way to be safe. Once all
9030 * the backrefs agree we may not need to do anything to the extent
9035 while (!list_empty(&entries)) {
9036 entry = list_entry(entries.next, struct extent_entry, list);
9037 list_del_init(&entry->list);
9043 static int process_duplicates(struct cache_tree *extent_cache,
9044 struct extent_record *rec)
9046 struct extent_record *good, *tmp;
9047 struct cache_extent *cache;
9051 * If we found a extent record for this extent then return, or if we
9052 * have more than one duplicate we are likely going to need to delete
9055 if (rec->found_rec || rec->num_duplicates > 1)
9058 /* Shouldn't happen but just in case */
9059 BUG_ON(!rec->num_duplicates);
9062 * So this happens if we end up with a backref that doesn't match the
9063 * actual extent entry. So either the backref is bad or the extent
9064 * entry is bad. Either way we want to have the extent_record actually
9065 * reflect what we found in the extent_tree, so we need to take the
9066 * duplicate out and use that as the extent_record since the only way we
9067 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9069 remove_cache_extent(extent_cache, &rec->cache);
9071 good = to_extent_record(rec->dups.next);
9072 list_del_init(&good->list);
9073 INIT_LIST_HEAD(&good->backrefs);
9074 INIT_LIST_HEAD(&good->dups);
9075 good->cache.start = good->start;
9076 good->cache.size = good->nr;
9077 good->content_checked = 0;
9078 good->owner_ref_checked = 0;
9079 good->num_duplicates = 0;
9080 good->refs = rec->refs;
9081 list_splice_init(&rec->backrefs, &good->backrefs);
9083 cache = lookup_cache_extent(extent_cache, good->start,
9087 tmp = container_of(cache, struct extent_record, cache);
9090 * If we find another overlapping extent and it's found_rec is
9091 * set then it's a duplicate and we need to try and delete
9094 if (tmp->found_rec || tmp->num_duplicates > 0) {
9095 if (list_empty(&good->list))
9096 list_add_tail(&good->list,
9097 &duplicate_extents);
9098 good->num_duplicates += tmp->num_duplicates + 1;
9099 list_splice_init(&tmp->dups, &good->dups);
9100 list_del_init(&tmp->list);
9101 list_add_tail(&tmp->list, &good->dups);
9102 remove_cache_extent(extent_cache, &tmp->cache);
9107 * Ok we have another non extent item backed extent rec, so lets
9108 * just add it to this extent and carry on like we did above.
9110 good->refs += tmp->refs;
9111 list_splice_init(&tmp->backrefs, &good->backrefs);
9112 remove_cache_extent(extent_cache, &tmp->cache);
9115 ret = insert_cache_extent(extent_cache, &good->cache);
9118 return good->num_duplicates ? 0 : 1;
9121 static int delete_duplicate_records(struct btrfs_root *root,
9122 struct extent_record *rec)
9124 struct btrfs_trans_handle *trans;
9125 LIST_HEAD(delete_list);
9126 struct btrfs_path path;
9127 struct extent_record *tmp, *good, *n;
9130 struct btrfs_key key;
9132 btrfs_init_path(&path);
9135 /* Find the record that covers all of the duplicates. */
9136 list_for_each_entry(tmp, &rec->dups, list) {
9137 if (good->start < tmp->start)
9139 if (good->nr > tmp->nr)
9142 if (tmp->start + tmp->nr < good->start + good->nr) {
9143 fprintf(stderr, "Ok we have overlapping extents that "
9144 "aren't completely covered by each other, this "
9145 "is going to require more careful thought. "
9146 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9147 tmp->start, tmp->nr, good->start, good->nr);
9154 list_add_tail(&rec->list, &delete_list);
9156 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9159 list_move_tail(&tmp->list, &delete_list);
9162 root = root->fs_info->extent_root;
9163 trans = btrfs_start_transaction(root, 1);
9164 if (IS_ERR(trans)) {
9165 ret = PTR_ERR(trans);
9169 list_for_each_entry(tmp, &delete_list, list) {
9170 if (tmp->found_rec == 0)
9172 key.objectid = tmp->start;
9173 key.type = BTRFS_EXTENT_ITEM_KEY;
9174 key.offset = tmp->nr;
9176 /* Shouldn't happen but just in case */
9177 if (tmp->metadata) {
9178 fprintf(stderr, "Well this shouldn't happen, extent "
9179 "record overlaps but is metadata? "
9180 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9184 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9190 ret = btrfs_del_item(trans, root, &path);
9193 btrfs_release_path(&path);
9196 err = btrfs_commit_transaction(trans, root);
9200 while (!list_empty(&delete_list)) {
9201 tmp = to_extent_record(delete_list.next);
9202 list_del_init(&tmp->list);
9208 while (!list_empty(&rec->dups)) {
9209 tmp = to_extent_record(rec->dups.next);
9210 list_del_init(&tmp->list);
9214 btrfs_release_path(&path);
9216 if (!ret && !nr_del)
9217 rec->num_duplicates = 0;
9219 return ret ? ret : nr_del;
9222 static int find_possible_backrefs(struct btrfs_fs_info *info,
9223 struct btrfs_path *path,
9224 struct cache_tree *extent_cache,
9225 struct extent_record *rec)
9227 struct btrfs_root *root;
9228 struct extent_backref *back, *tmp;
9229 struct data_backref *dback;
9230 struct cache_extent *cache;
9231 struct btrfs_file_extent_item *fi;
9232 struct btrfs_key key;
9236 rbtree_postorder_for_each_entry_safe(back, tmp,
9237 &rec->backref_tree, node) {
9238 /* Don't care about full backrefs (poor unloved backrefs) */
9239 if (back->full_backref || !back->is_data)
9242 dback = to_data_backref(back);
9244 /* We found this one, we don't need to do a lookup */
9245 if (dback->found_ref)
9248 key.objectid = dback->root;
9249 key.type = BTRFS_ROOT_ITEM_KEY;
9250 key.offset = (u64)-1;
9252 root = btrfs_read_fs_root(info, &key);
9254 /* No root, definitely a bad ref, skip */
9255 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9257 /* Other err, exit */
9259 return PTR_ERR(root);
9261 key.objectid = dback->owner;
9262 key.type = BTRFS_EXTENT_DATA_KEY;
9263 key.offset = dback->offset;
9264 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9266 btrfs_release_path(path);
9269 /* Didn't find it, we can carry on */
9274 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9275 struct btrfs_file_extent_item);
9276 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9277 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9278 btrfs_release_path(path);
9279 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9281 struct extent_record *tmp;
9282 tmp = container_of(cache, struct extent_record, cache);
9285 * If we found an extent record for the bytenr for this
9286 * particular backref then we can't add it to our
9287 * current extent record. We only want to add backrefs
9288 * that don't have a corresponding extent item in the
9289 * extent tree since they likely belong to this record
9290 * and we need to fix it if it doesn't match bytenrs.
9296 dback->found_ref += 1;
9297 dback->disk_bytenr = bytenr;
9298 dback->bytes = bytes;
9301 * Set this so the verify backref code knows not to trust the
9302 * values in this backref.
9311 * Record orphan data ref into corresponding root.
9313 * Return 0 if the extent item contains data ref and recorded.
9314 * Return 1 if the extent item contains no useful data ref
9315 * On that case, it may contains only shared_dataref or metadata backref
9316 * or the file extent exists(this should be handled by the extent bytenr
9318 * Return <0 if something goes wrong.
9320 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9321 struct extent_record *rec)
9323 struct btrfs_key key;
9324 struct btrfs_root *dest_root;
9325 struct extent_backref *back, *tmp;
9326 struct data_backref *dback;
9327 struct orphan_data_extent *orphan;
9328 struct btrfs_path path;
9329 int recorded_data_ref = 0;
9334 btrfs_init_path(&path);
9335 rbtree_postorder_for_each_entry_safe(back, tmp,
9336 &rec->backref_tree, node) {
9337 if (back->full_backref || !back->is_data ||
9338 !back->found_extent_tree)
9340 dback = to_data_backref(back);
9341 if (dback->found_ref)
9343 key.objectid = dback->root;
9344 key.type = BTRFS_ROOT_ITEM_KEY;
9345 key.offset = (u64)-1;
9347 dest_root = btrfs_read_fs_root(fs_info, &key);
9349 /* For non-exist root we just skip it */
9350 if (IS_ERR(dest_root) || !dest_root)
9353 key.objectid = dback->owner;
9354 key.type = BTRFS_EXTENT_DATA_KEY;
9355 key.offset = dback->offset;
9357 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9358 btrfs_release_path(&path);
9360 * For ret < 0, it's OK since the fs-tree may be corrupted,
9361 * we need to record it for inode/file extent rebuild.
9362 * For ret > 0, we record it only for file extent rebuild.
9363 * For ret == 0, the file extent exists but only bytenr
9364 * mismatch, let the original bytenr fix routine to handle,
9370 orphan = malloc(sizeof(*orphan));
9375 INIT_LIST_HEAD(&orphan->list);
9376 orphan->root = dback->root;
9377 orphan->objectid = dback->owner;
9378 orphan->offset = dback->offset;
9379 orphan->disk_bytenr = rec->cache.start;
9380 orphan->disk_len = rec->cache.size;
9381 list_add(&dest_root->orphan_data_extents, &orphan->list);
9382 recorded_data_ref = 1;
9385 btrfs_release_path(&path);
9387 return !recorded_data_ref;
9393 * when an incorrect extent item is found, this will delete
9394 * all of the existing entries for it and recreate them
9395 * based on what the tree scan found.
9397 static int fixup_extent_refs(struct btrfs_fs_info *info,
9398 struct cache_tree *extent_cache,
9399 struct extent_record *rec)
9401 struct btrfs_trans_handle *trans = NULL;
9403 struct btrfs_path path;
9404 struct cache_extent *cache;
9405 struct extent_backref *back, *tmp;
9409 if (rec->flag_block_full_backref)
9410 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9412 btrfs_init_path(&path);
9413 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9415 * Sometimes the backrefs themselves are so broken they don't
9416 * get attached to any meaningful rec, so first go back and
9417 * check any of our backrefs that we couldn't find and throw
9418 * them into the list if we find the backref so that
9419 * verify_backrefs can figure out what to do.
9421 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9426 /* step one, make sure all of the backrefs agree */
9427 ret = verify_backrefs(info, &path, rec);
9431 trans = btrfs_start_transaction(info->extent_root, 1);
9432 if (IS_ERR(trans)) {
9433 ret = PTR_ERR(trans);
9437 /* step two, delete all the existing records */
9438 ret = delete_extent_records(trans, info->extent_root, &path,
9444 /* was this block corrupt? If so, don't add references to it */
9445 cache = lookup_cache_extent(info->corrupt_blocks,
9446 rec->start, rec->max_size);
9452 /* step three, recreate all the refs we did find */
9453 rbtree_postorder_for_each_entry_safe(back, tmp,
9454 &rec->backref_tree, node) {
9456 * if we didn't find any references, don't create a
9459 if (!back->found_ref)
9462 rec->bad_full_backref = 0;
9463 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9471 int err = btrfs_commit_transaction(trans, info->extent_root);
9477 fprintf(stderr, "Repaired extent references for %llu\n",
9478 (unsigned long long)rec->start);
9480 btrfs_release_path(&path);
9484 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9485 struct extent_record *rec)
9487 struct btrfs_trans_handle *trans;
9488 struct btrfs_root *root = fs_info->extent_root;
9489 struct btrfs_path path;
9490 struct btrfs_extent_item *ei;
9491 struct btrfs_key key;
9495 key.objectid = rec->start;
9496 if (rec->metadata) {
9497 key.type = BTRFS_METADATA_ITEM_KEY;
9498 key.offset = rec->info_level;
9500 key.type = BTRFS_EXTENT_ITEM_KEY;
9501 key.offset = rec->max_size;
9504 trans = btrfs_start_transaction(root, 0);
9506 return PTR_ERR(trans);
9508 btrfs_init_path(&path);
9509 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9511 btrfs_release_path(&path);
9512 btrfs_commit_transaction(trans, root);
9515 fprintf(stderr, "Didn't find extent for %llu\n",
9516 (unsigned long long)rec->start);
9517 btrfs_release_path(&path);
9518 btrfs_commit_transaction(trans, root);
9522 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9523 struct btrfs_extent_item);
9524 flags = btrfs_extent_flags(path.nodes[0], ei);
9525 if (rec->flag_block_full_backref) {
9526 fprintf(stderr, "setting full backref on %llu\n",
9527 (unsigned long long)key.objectid);
9528 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9530 fprintf(stderr, "clearing full backref on %llu\n",
9531 (unsigned long long)key.objectid);
9532 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9534 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9535 btrfs_mark_buffer_dirty(path.nodes[0]);
9536 btrfs_release_path(&path);
9537 ret = btrfs_commit_transaction(trans, root);
9539 fprintf(stderr, "Repaired extent flags for %llu\n",
9540 (unsigned long long)rec->start);
9545 /* right now we only prune from the extent allocation tree */
9546 static int prune_one_block(struct btrfs_trans_handle *trans,
9547 struct btrfs_fs_info *info,
9548 struct btrfs_corrupt_block *corrupt)
9551 struct btrfs_path path;
9552 struct extent_buffer *eb;
9556 int level = corrupt->level + 1;
9558 btrfs_init_path(&path);
9560 /* we want to stop at the parent to our busted block */
9561 path.lowest_level = level;
9563 ret = btrfs_search_slot(trans, info->extent_root,
9564 &corrupt->key, &path, -1, 1);
9569 eb = path.nodes[level];
9576 * hopefully the search gave us the block we want to prune,
9577 * lets try that first
9579 slot = path.slots[level];
9580 found = btrfs_node_blockptr(eb, slot);
9581 if (found == corrupt->cache.start)
9584 nritems = btrfs_header_nritems(eb);
9586 /* the search failed, lets scan this node and hope we find it */
9587 for (slot = 0; slot < nritems; slot++) {
9588 found = btrfs_node_blockptr(eb, slot);
9589 if (found == corrupt->cache.start)
9593 * we couldn't find the bad block. TODO, search all the nodes for pointers
9596 if (eb == info->extent_root->node) {
9601 btrfs_release_path(&path);
9606 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9607 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9610 btrfs_release_path(&path);
9614 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9616 struct btrfs_trans_handle *trans = NULL;
9617 struct cache_extent *cache;
9618 struct btrfs_corrupt_block *corrupt;
9621 cache = search_cache_extent(info->corrupt_blocks, 0);
9625 trans = btrfs_start_transaction(info->extent_root, 1);
9627 return PTR_ERR(trans);
9629 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9630 prune_one_block(trans, info, corrupt);
9631 remove_cache_extent(info->corrupt_blocks, cache);
9634 return btrfs_commit_transaction(trans, info->extent_root);
9638 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9640 struct btrfs_block_group_cache *cache;
9645 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9646 &start, &end, EXTENT_DIRTY);
9649 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9654 cache = btrfs_lookup_first_block_group(fs_info, start);
9659 start = cache->key.objectid + cache->key.offset;
9663 static int check_extent_refs(struct btrfs_root *root,
9664 struct cache_tree *extent_cache)
9666 struct extent_record *rec;
9667 struct cache_extent *cache;
9673 * if we're doing a repair, we have to make sure
9674 * we don't allocate from the problem extents.
9675 * In the worst case, this will be all the
9678 cache = search_cache_extent(extent_cache, 0);
9680 rec = container_of(cache, struct extent_record, cache);
9681 set_extent_dirty(root->fs_info->excluded_extents,
9683 rec->start + rec->max_size - 1);
9684 cache = next_cache_extent(cache);
9687 /* pin down all the corrupted blocks too */
9688 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9690 set_extent_dirty(root->fs_info->excluded_extents,
9692 cache->start + cache->size - 1);
9693 cache = next_cache_extent(cache);
9695 prune_corrupt_blocks(root->fs_info);
9696 reset_cached_block_groups(root->fs_info);
9699 reset_cached_block_groups(root->fs_info);
9702 * We need to delete any duplicate entries we find first otherwise we
9703 * could mess up the extent tree when we have backrefs that actually
9704 * belong to a different extent item and not the weird duplicate one.
9706 while (repair && !list_empty(&duplicate_extents)) {
9707 rec = to_extent_record(duplicate_extents.next);
9708 list_del_init(&rec->list);
9710 /* Sometimes we can find a backref before we find an actual
9711 * extent, so we need to process it a little bit to see if there
9712 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9713 * if this is a backref screwup. If we need to delete stuff
9714 * process_duplicates() will return 0, otherwise it will return
9717 if (process_duplicates(extent_cache, rec))
9719 ret = delete_duplicate_records(root, rec);
9723 * delete_duplicate_records will return the number of entries
9724 * deleted, so if it's greater than 0 then we know we actually
9725 * did something and we need to remove.
9738 cache = search_cache_extent(extent_cache, 0);
9741 rec = container_of(cache, struct extent_record, cache);
9742 if (rec->num_duplicates) {
9743 fprintf(stderr, "extent item %llu has multiple extent "
9744 "items\n", (unsigned long long)rec->start);
9748 if (rec->refs != rec->extent_item_refs) {
9749 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9750 (unsigned long long)rec->start,
9751 (unsigned long long)rec->nr);
9752 fprintf(stderr, "extent item %llu, found %llu\n",
9753 (unsigned long long)rec->extent_item_refs,
9754 (unsigned long long)rec->refs);
9755 ret = record_orphan_data_extents(root->fs_info, rec);
9761 if (all_backpointers_checked(rec, 1)) {
9762 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9763 (unsigned long long)rec->start,
9764 (unsigned long long)rec->nr);
9768 if (!rec->owner_ref_checked) {
9769 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9770 (unsigned long long)rec->start,
9771 (unsigned long long)rec->nr);
9776 if (repair && fix) {
9777 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9783 if (rec->bad_full_backref) {
9784 fprintf(stderr, "bad full backref, on [%llu]\n",
9785 (unsigned long long)rec->start);
9787 ret = fixup_extent_flags(root->fs_info, rec);
9795 * Although it's not a extent ref's problem, we reuse this
9796 * routine for error reporting.
9797 * No repair function yet.
9799 if (rec->crossing_stripes) {
9801 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9802 rec->start, rec->start + rec->max_size);
9806 if (rec->wrong_chunk_type) {
9808 "bad extent [%llu, %llu), type mismatch with chunk\n",
9809 rec->start, rec->start + rec->max_size);
9813 remove_cache_extent(extent_cache, cache);
9814 free_all_extent_backrefs(rec);
9815 if (!init_extent_tree && repair && (!cur_err || fix))
9816 clear_extent_dirty(root->fs_info->excluded_extents,
9818 rec->start + rec->max_size - 1);
9823 if (ret && ret != -EAGAIN) {
9824 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9827 struct btrfs_trans_handle *trans;
9829 root = root->fs_info->extent_root;
9830 trans = btrfs_start_transaction(root, 1);
9831 if (IS_ERR(trans)) {
9832 ret = PTR_ERR(trans);
9836 ret = btrfs_fix_block_accounting(trans, root);
9839 ret = btrfs_commit_transaction(trans, root);
9848 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9852 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9853 stripe_size = length;
9854 stripe_size /= num_stripes;
9855 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9856 stripe_size = length * 2;
9857 stripe_size /= num_stripes;
9858 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9859 stripe_size = length;
9860 stripe_size /= (num_stripes - 1);
9861 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9862 stripe_size = length;
9863 stripe_size /= (num_stripes - 2);
9865 stripe_size = length;
9871 * Check the chunk with its block group/dev list ref:
9872 * Return 0 if all refs seems valid.
9873 * Return 1 if part of refs seems valid, need later check for rebuild ref
9874 * like missing block group and needs to search extent tree to rebuild them.
9875 * Return -1 if essential refs are missing and unable to rebuild.
9877 static int check_chunk_refs(struct chunk_record *chunk_rec,
9878 struct block_group_tree *block_group_cache,
9879 struct device_extent_tree *dev_extent_cache,
9882 struct cache_extent *block_group_item;
9883 struct block_group_record *block_group_rec;
9884 struct cache_extent *dev_extent_item;
9885 struct device_extent_record *dev_extent_rec;
9889 int metadump_v2 = 0;
9893 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9896 if (block_group_item) {
9897 block_group_rec = container_of(block_group_item,
9898 struct block_group_record,
9900 if (chunk_rec->length != block_group_rec->offset ||
9901 chunk_rec->offset != block_group_rec->objectid ||
9903 chunk_rec->type_flags != block_group_rec->flags)) {
9906 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9907 chunk_rec->objectid,
9912 chunk_rec->type_flags,
9913 block_group_rec->objectid,
9914 block_group_rec->type,
9915 block_group_rec->offset,
9916 block_group_rec->offset,
9917 block_group_rec->objectid,
9918 block_group_rec->flags);
9921 list_del_init(&block_group_rec->list);
9922 chunk_rec->bg_rec = block_group_rec;
9927 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9928 chunk_rec->objectid,
9933 chunk_rec->type_flags);
9940 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9941 chunk_rec->num_stripes);
9942 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9943 devid = chunk_rec->stripes[i].devid;
9944 offset = chunk_rec->stripes[i].offset;
9945 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9946 devid, offset, length);
9947 if (dev_extent_item) {
9948 dev_extent_rec = container_of(dev_extent_item,
9949 struct device_extent_record,
9951 if (dev_extent_rec->objectid != devid ||
9952 dev_extent_rec->offset != offset ||
9953 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9954 dev_extent_rec->length != length) {
9957 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9958 chunk_rec->objectid,
9961 chunk_rec->stripes[i].devid,
9962 chunk_rec->stripes[i].offset,
9963 dev_extent_rec->objectid,
9964 dev_extent_rec->offset,
9965 dev_extent_rec->length);
9968 list_move(&dev_extent_rec->chunk_list,
9969 &chunk_rec->dextents);
9974 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9975 chunk_rec->objectid,
9978 chunk_rec->stripes[i].devid,
9979 chunk_rec->stripes[i].offset);
9986 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9987 int check_chunks(struct cache_tree *chunk_cache,
9988 struct block_group_tree *block_group_cache,
9989 struct device_extent_tree *dev_extent_cache,
9990 struct list_head *good, struct list_head *bad,
9991 struct list_head *rebuild, int silent)
9993 struct cache_extent *chunk_item;
9994 struct chunk_record *chunk_rec;
9995 struct block_group_record *bg_rec;
9996 struct device_extent_record *dext_rec;
10000 chunk_item = first_cache_extent(chunk_cache);
10001 while (chunk_item) {
10002 chunk_rec = container_of(chunk_item, struct chunk_record,
10004 err = check_chunk_refs(chunk_rec, block_group_cache,
10005 dev_extent_cache, silent);
10008 if (err == 0 && good)
10009 list_add_tail(&chunk_rec->list, good);
10010 if (err > 0 && rebuild)
10011 list_add_tail(&chunk_rec->list, rebuild);
10012 if (err < 0 && bad)
10013 list_add_tail(&chunk_rec->list, bad);
10014 chunk_item = next_cache_extent(chunk_item);
10017 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10020 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10028 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10032 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10033 dext_rec->objectid,
10043 static int check_device_used(struct device_record *dev_rec,
10044 struct device_extent_tree *dext_cache)
10046 struct cache_extent *cache;
10047 struct device_extent_record *dev_extent_rec;
10048 u64 total_byte = 0;
10050 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10052 dev_extent_rec = container_of(cache,
10053 struct device_extent_record,
10055 if (dev_extent_rec->objectid != dev_rec->devid)
10058 list_del_init(&dev_extent_rec->device_list);
10059 total_byte += dev_extent_rec->length;
10060 cache = next_cache_extent(cache);
10063 if (total_byte != dev_rec->byte_used) {
10065 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10066 total_byte, dev_rec->byte_used, dev_rec->objectid,
10067 dev_rec->type, dev_rec->offset);
10074 /* check btrfs_dev_item -> btrfs_dev_extent */
10075 static int check_devices(struct rb_root *dev_cache,
10076 struct device_extent_tree *dev_extent_cache)
10078 struct rb_node *dev_node;
10079 struct device_record *dev_rec;
10080 struct device_extent_record *dext_rec;
10084 dev_node = rb_first(dev_cache);
10086 dev_rec = container_of(dev_node, struct device_record, node);
10087 err = check_device_used(dev_rec, dev_extent_cache);
10091 dev_node = rb_next(dev_node);
10093 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10096 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10097 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10104 static int add_root_item_to_list(struct list_head *head,
10105 u64 objectid, u64 bytenr, u64 last_snapshot,
10106 u8 level, u8 drop_level,
10107 struct btrfs_key *drop_key)
10110 struct root_item_record *ri_rec;
10111 ri_rec = malloc(sizeof(*ri_rec));
10114 ri_rec->bytenr = bytenr;
10115 ri_rec->objectid = objectid;
10116 ri_rec->level = level;
10117 ri_rec->drop_level = drop_level;
10118 ri_rec->last_snapshot = last_snapshot;
10120 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10121 list_add_tail(&ri_rec->list, head);
10126 static void free_root_item_list(struct list_head *list)
10128 struct root_item_record *ri_rec;
10130 while (!list_empty(list)) {
10131 ri_rec = list_first_entry(list, struct root_item_record,
10133 list_del_init(&ri_rec->list);
10138 static int deal_root_from_list(struct list_head *list,
10139 struct btrfs_root *root,
10140 struct block_info *bits,
10142 struct cache_tree *pending,
10143 struct cache_tree *seen,
10144 struct cache_tree *reada,
10145 struct cache_tree *nodes,
10146 struct cache_tree *extent_cache,
10147 struct cache_tree *chunk_cache,
10148 struct rb_root *dev_cache,
10149 struct block_group_tree *block_group_cache,
10150 struct device_extent_tree *dev_extent_cache)
10155 while (!list_empty(list)) {
10156 struct root_item_record *rec;
10157 struct extent_buffer *buf;
10158 rec = list_entry(list->next,
10159 struct root_item_record, list);
10161 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10162 if (!extent_buffer_uptodate(buf)) {
10163 free_extent_buffer(buf);
10167 ret = add_root_to_pending(buf, extent_cache, pending,
10168 seen, nodes, rec->objectid);
10172 * To rebuild extent tree, we need deal with snapshot
10173 * one by one, otherwise we deal with node firstly which
10174 * can maximize readahead.
10177 ret = run_next_block(root, bits, bits_nr, &last,
10178 pending, seen, reada, nodes,
10179 extent_cache, chunk_cache,
10180 dev_cache, block_group_cache,
10181 dev_extent_cache, rec);
10185 free_extent_buffer(buf);
10186 list_del(&rec->list);
10192 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10193 reada, nodes, extent_cache, chunk_cache,
10194 dev_cache, block_group_cache,
10195 dev_extent_cache, NULL);
10205 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10207 struct rb_root dev_cache;
10208 struct cache_tree chunk_cache;
10209 struct block_group_tree block_group_cache;
10210 struct device_extent_tree dev_extent_cache;
10211 struct cache_tree extent_cache;
10212 struct cache_tree seen;
10213 struct cache_tree pending;
10214 struct cache_tree reada;
10215 struct cache_tree nodes;
10216 struct extent_io_tree excluded_extents;
10217 struct cache_tree corrupt_blocks;
10218 struct btrfs_path path;
10219 struct btrfs_key key;
10220 struct btrfs_key found_key;
10222 struct block_info *bits;
10224 struct extent_buffer *leaf;
10226 struct btrfs_root_item ri;
10227 struct list_head dropping_trees;
10228 struct list_head normal_trees;
10229 struct btrfs_root *root1;
10230 struct btrfs_root *root;
10234 root = fs_info->fs_root;
10235 dev_cache = RB_ROOT;
10236 cache_tree_init(&chunk_cache);
10237 block_group_tree_init(&block_group_cache);
10238 device_extent_tree_init(&dev_extent_cache);
10240 cache_tree_init(&extent_cache);
10241 cache_tree_init(&seen);
10242 cache_tree_init(&pending);
10243 cache_tree_init(&nodes);
10244 cache_tree_init(&reada);
10245 cache_tree_init(&corrupt_blocks);
10246 extent_io_tree_init(&excluded_extents);
10247 INIT_LIST_HEAD(&dropping_trees);
10248 INIT_LIST_HEAD(&normal_trees);
10251 fs_info->excluded_extents = &excluded_extents;
10252 fs_info->fsck_extent_cache = &extent_cache;
10253 fs_info->free_extent_hook = free_extent_hook;
10254 fs_info->corrupt_blocks = &corrupt_blocks;
10258 bits = malloc(bits_nr * sizeof(struct block_info));
10264 if (ctx.progress_enabled) {
10265 ctx.tp = TASK_EXTENTS;
10266 task_start(ctx.info);
10270 root1 = fs_info->tree_root;
10271 level = btrfs_header_level(root1->node);
10272 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10273 root1->node->start, 0, level, 0, NULL);
10276 root1 = fs_info->chunk_root;
10277 level = btrfs_header_level(root1->node);
10278 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10279 root1->node->start, 0, level, 0, NULL);
10282 btrfs_init_path(&path);
10285 key.type = BTRFS_ROOT_ITEM_KEY;
10286 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10290 leaf = path.nodes[0];
10291 slot = path.slots[0];
10292 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10293 ret = btrfs_next_leaf(root, &path);
10296 leaf = path.nodes[0];
10297 slot = path.slots[0];
10299 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10300 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10301 unsigned long offset;
10304 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10305 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10306 last_snapshot = btrfs_root_last_snapshot(&ri);
10307 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10308 level = btrfs_root_level(&ri);
10309 ret = add_root_item_to_list(&normal_trees,
10310 found_key.objectid,
10311 btrfs_root_bytenr(&ri),
10312 last_snapshot, level,
10317 level = btrfs_root_level(&ri);
10318 objectid = found_key.objectid;
10319 btrfs_disk_key_to_cpu(&found_key,
10320 &ri.drop_progress);
10321 ret = add_root_item_to_list(&dropping_trees,
10323 btrfs_root_bytenr(&ri),
10324 last_snapshot, level,
10325 ri.drop_level, &found_key);
10332 btrfs_release_path(&path);
10335 * check_block can return -EAGAIN if it fixes something, please keep
10336 * this in mind when dealing with return values from these functions, if
10337 * we get -EAGAIN we want to fall through and restart the loop.
10339 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10340 &seen, &reada, &nodes, &extent_cache,
10341 &chunk_cache, &dev_cache, &block_group_cache,
10342 &dev_extent_cache);
10344 if (ret == -EAGAIN)
10348 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10349 &pending, &seen, &reada, &nodes,
10350 &extent_cache, &chunk_cache, &dev_cache,
10351 &block_group_cache, &dev_extent_cache);
10353 if (ret == -EAGAIN)
10358 ret = check_chunks(&chunk_cache, &block_group_cache,
10359 &dev_extent_cache, NULL, NULL, NULL, 0);
10361 if (ret == -EAGAIN)
10366 ret = check_extent_refs(root, &extent_cache);
10368 if (ret == -EAGAIN)
10373 ret = check_devices(&dev_cache, &dev_extent_cache);
10378 task_stop(ctx.info);
10380 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10381 extent_io_tree_cleanup(&excluded_extents);
10382 fs_info->fsck_extent_cache = NULL;
10383 fs_info->free_extent_hook = NULL;
10384 fs_info->corrupt_blocks = NULL;
10385 fs_info->excluded_extents = NULL;
10388 free_chunk_cache_tree(&chunk_cache);
10389 free_device_cache_tree(&dev_cache);
10390 free_block_group_tree(&block_group_cache);
10391 free_device_extent_tree(&dev_extent_cache);
10392 free_extent_cache_tree(&seen);
10393 free_extent_cache_tree(&pending);
10394 free_extent_cache_tree(&reada);
10395 free_extent_cache_tree(&nodes);
10396 free_root_item_list(&normal_trees);
10397 free_root_item_list(&dropping_trees);
10400 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10401 free_extent_cache_tree(&seen);
10402 free_extent_cache_tree(&pending);
10403 free_extent_cache_tree(&reada);
10404 free_extent_cache_tree(&nodes);
10405 free_chunk_cache_tree(&chunk_cache);
10406 free_block_group_tree(&block_group_cache);
10407 free_device_cache_tree(&dev_cache);
10408 free_device_extent_tree(&dev_extent_cache);
10409 free_extent_record_cache(&extent_cache);
10410 free_root_item_list(&normal_trees);
10411 free_root_item_list(&dropping_trees);
10412 extent_io_tree_cleanup(&excluded_extents);
10417 * Check backrefs of a tree block given by @bytenr or @eb.
10419 * @root: the root containing the @bytenr or @eb
10420 * @eb: tree block extent buffer, can be NULL
10421 * @bytenr: bytenr of the tree block to search
10422 * @level: tree level of the tree block
10423 * @owner: owner of the tree block
10425 * Return >0 for any error found and output error message
10426 * Return 0 for no error found
10428 static int check_tree_block_ref(struct btrfs_root *root,
10429 struct extent_buffer *eb, u64 bytenr,
10430 int level, u64 owner)
10432 struct btrfs_key key;
10433 struct btrfs_root *extent_root = root->fs_info->extent_root;
10434 struct btrfs_path path;
10435 struct btrfs_extent_item *ei;
10436 struct btrfs_extent_inline_ref *iref;
10437 struct extent_buffer *leaf;
10443 u32 nodesize = root->fs_info->nodesize;
10446 int tree_reloc_root = 0;
10451 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10452 btrfs_header_bytenr(root->node) == bytenr)
10453 tree_reloc_root = 1;
10455 btrfs_init_path(&path);
10456 key.objectid = bytenr;
10457 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10458 key.type = BTRFS_METADATA_ITEM_KEY;
10460 key.type = BTRFS_EXTENT_ITEM_KEY;
10461 key.offset = (u64)-1;
10463 /* Search for the backref in extent tree */
10464 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10466 err |= BACKREF_MISSING;
10469 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10471 err |= BACKREF_MISSING;
10475 leaf = path.nodes[0];
10476 slot = path.slots[0];
10477 btrfs_item_key_to_cpu(leaf, &key, slot);
10479 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10481 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10482 skinny_level = (int)key.offset;
10483 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10485 struct btrfs_tree_block_info *info;
10487 info = (struct btrfs_tree_block_info *)(ei + 1);
10488 skinny_level = btrfs_tree_block_level(leaf, info);
10489 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10496 if (!(btrfs_extent_flags(leaf, ei) &
10497 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10499 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10500 key.objectid, nodesize,
10501 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10502 err = BACKREF_MISMATCH;
10504 header_gen = btrfs_header_generation(eb);
10505 extent_gen = btrfs_extent_generation(leaf, ei);
10506 if (header_gen != extent_gen) {
10508 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10509 key.objectid, nodesize, header_gen,
10511 err = BACKREF_MISMATCH;
10513 if (level != skinny_level) {
10515 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10516 key.objectid, nodesize, level, skinny_level);
10517 err = BACKREF_MISMATCH;
10519 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10521 "extent[%llu %u] is referred by other roots than %llu",
10522 key.objectid, nodesize, root->objectid);
10523 err = BACKREF_MISMATCH;
10528 * Iterate the extent/metadata item to find the exact backref
10530 item_size = btrfs_item_size_nr(leaf, slot);
10531 ptr = (unsigned long)iref;
10532 end = (unsigned long)ei + item_size;
10533 while (ptr < end) {
10534 iref = (struct btrfs_extent_inline_ref *)ptr;
10535 type = btrfs_extent_inline_ref_type(leaf, iref);
10536 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10538 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10539 (offset == root->objectid || offset == owner)) {
10541 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10543 * Backref of tree reloc root points to itself, no need
10544 * to check backref any more.
10546 if (tree_reloc_root)
10549 /* Check if the backref points to valid referencer */
10550 found_ref = !check_tree_block_ref(root, NULL,
10551 offset, level + 1, owner);
10556 ptr += btrfs_extent_inline_ref_size(type);
10560 * Inlined extent item doesn't have what we need, check
10561 * TREE_BLOCK_REF_KEY
10564 btrfs_release_path(&path);
10565 key.objectid = bytenr;
10566 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10567 key.offset = root->objectid;
10569 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10574 err |= BACKREF_MISSING;
10576 btrfs_release_path(&path);
10577 if (eb && (err & BACKREF_MISSING))
10578 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10579 bytenr, nodesize, owner, level);
10584 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10586 * Return >0 any error found and output error message
10587 * Return 0 for no error found
10589 static int check_extent_data_item(struct btrfs_root *root,
10590 struct extent_buffer *eb, int slot)
10592 struct btrfs_file_extent_item *fi;
10593 struct btrfs_path path;
10594 struct btrfs_root *extent_root = root->fs_info->extent_root;
10595 struct btrfs_key fi_key;
10596 struct btrfs_key dbref_key;
10597 struct extent_buffer *leaf;
10598 struct btrfs_extent_item *ei;
10599 struct btrfs_extent_inline_ref *iref;
10600 struct btrfs_extent_data_ref *dref;
10603 u64 disk_num_bytes;
10604 u64 extent_num_bytes;
10611 int found_dbackref = 0;
10615 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10616 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10618 /* Nothing to check for hole and inline data extents */
10619 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10620 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10623 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10624 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10625 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10627 /* Check unaligned disk_num_bytes and num_bytes */
10628 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10630 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10631 fi_key.objectid, fi_key.offset, disk_num_bytes,
10632 root->fs_info->sectorsize);
10633 err |= BYTES_UNALIGNED;
10635 data_bytes_allocated += disk_num_bytes;
10637 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10639 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10640 fi_key.objectid, fi_key.offset, extent_num_bytes,
10641 root->fs_info->sectorsize);
10642 err |= BYTES_UNALIGNED;
10644 data_bytes_referenced += extent_num_bytes;
10646 owner = btrfs_header_owner(eb);
10648 /* Check the extent item of the file extent in extent tree */
10649 btrfs_init_path(&path);
10650 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10651 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10652 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10654 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10658 leaf = path.nodes[0];
10659 slot = path.slots[0];
10660 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10662 extent_flags = btrfs_extent_flags(leaf, ei);
10664 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10666 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10667 disk_bytenr, disk_num_bytes,
10668 BTRFS_EXTENT_FLAG_DATA);
10669 err |= BACKREF_MISMATCH;
10672 /* Check data backref inside that extent item */
10673 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10674 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10675 ptr = (unsigned long)iref;
10676 end = (unsigned long)ei + item_size;
10677 while (ptr < end) {
10678 iref = (struct btrfs_extent_inline_ref *)ptr;
10679 type = btrfs_extent_inline_ref_type(leaf, iref);
10680 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10682 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10683 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10684 if (ref_root == owner || ref_root == root->objectid)
10685 found_dbackref = 1;
10686 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10687 found_dbackref = !check_tree_block_ref(root, NULL,
10688 btrfs_extent_inline_ref_offset(leaf, iref),
10692 if (found_dbackref)
10694 ptr += btrfs_extent_inline_ref_size(type);
10697 if (!found_dbackref) {
10698 btrfs_release_path(&path);
10700 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10701 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10702 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10703 dbref_key.offset = hash_extent_data_ref(root->objectid,
10704 fi_key.objectid, fi_key.offset);
10706 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10707 &dbref_key, &path, 0, 0);
10709 found_dbackref = 1;
10713 btrfs_release_path(&path);
10716 * Neither inlined nor EXTENT_DATA_REF found, try
10717 * SHARED_DATA_REF as last chance.
10719 dbref_key.objectid = disk_bytenr;
10720 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10721 dbref_key.offset = eb->start;
10723 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10724 &dbref_key, &path, 0, 0);
10726 found_dbackref = 1;
10732 if (!found_dbackref)
10733 err |= BACKREF_MISSING;
10734 btrfs_release_path(&path);
10735 if (err & BACKREF_MISSING) {
10736 error("data extent[%llu %llu] backref lost",
10737 disk_bytenr, disk_num_bytes);
10743 * Get real tree block level for the case like shared block
10744 * Return >= 0 as tree level
10745 * Return <0 for error
10747 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10749 struct extent_buffer *eb;
10750 struct btrfs_path path;
10751 struct btrfs_key key;
10752 struct btrfs_extent_item *ei;
10759 /* Search extent tree for extent generation and level */
10760 key.objectid = bytenr;
10761 key.type = BTRFS_METADATA_ITEM_KEY;
10762 key.offset = (u64)-1;
10764 btrfs_init_path(&path);
10765 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10768 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10776 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10777 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10778 struct btrfs_extent_item);
10779 flags = btrfs_extent_flags(path.nodes[0], ei);
10780 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10785 /* Get transid for later read_tree_block() check */
10786 transid = btrfs_extent_generation(path.nodes[0], ei);
10788 /* Get backref level as one source */
10789 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10790 backref_level = key.offset;
10792 struct btrfs_tree_block_info *info;
10794 info = (struct btrfs_tree_block_info *)(ei + 1);
10795 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10797 btrfs_release_path(&path);
10799 /* Get level from tree block as an alternative source */
10800 eb = read_tree_block(fs_info, bytenr, transid);
10801 if (!extent_buffer_uptodate(eb)) {
10802 free_extent_buffer(eb);
10805 header_level = btrfs_header_level(eb);
10806 free_extent_buffer(eb);
10808 if (header_level != backref_level)
10810 return header_level;
10813 btrfs_release_path(&path);
10818 * Check if a tree block backref is valid (points to a valid tree block)
10819 * if level == -1, level will be resolved
10820 * Return >0 for any error found and print error message
10822 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10823 u64 bytenr, int level)
10825 struct btrfs_root *root;
10826 struct btrfs_key key;
10827 struct btrfs_path path;
10828 struct extent_buffer *eb;
10829 struct extent_buffer *node;
10830 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10834 /* Query level for level == -1 special case */
10836 level = query_tree_block_level(fs_info, bytenr);
10838 err |= REFERENCER_MISSING;
10842 key.objectid = root_id;
10843 key.type = BTRFS_ROOT_ITEM_KEY;
10844 key.offset = (u64)-1;
10846 root = btrfs_read_fs_root(fs_info, &key);
10847 if (IS_ERR(root)) {
10848 err |= REFERENCER_MISSING;
10852 /* Read out the tree block to get item/node key */
10853 eb = read_tree_block(fs_info, bytenr, 0);
10854 if (!extent_buffer_uptodate(eb)) {
10855 err |= REFERENCER_MISSING;
10856 free_extent_buffer(eb);
10860 /* Empty tree, no need to check key */
10861 if (!btrfs_header_nritems(eb) && !level) {
10862 free_extent_buffer(eb);
10867 btrfs_node_key_to_cpu(eb, &key, 0);
10869 btrfs_item_key_to_cpu(eb, &key, 0);
10871 free_extent_buffer(eb);
10873 btrfs_init_path(&path);
10874 path.lowest_level = level;
10875 /* Search with the first key, to ensure we can reach it */
10876 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10878 err |= REFERENCER_MISSING;
10882 node = path.nodes[level];
10883 if (btrfs_header_bytenr(node) != bytenr) {
10885 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10886 bytenr, nodesize, bytenr,
10887 btrfs_header_bytenr(node));
10888 err |= REFERENCER_MISMATCH;
10890 if (btrfs_header_level(node) != level) {
10892 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10893 bytenr, nodesize, level,
10894 btrfs_header_level(node));
10895 err |= REFERENCER_MISMATCH;
10899 btrfs_release_path(&path);
10901 if (err & REFERENCER_MISSING) {
10903 error("extent [%llu %d] lost referencer (owner: %llu)",
10904 bytenr, nodesize, root_id);
10907 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10908 bytenr, nodesize, root_id, level);
10915 * Check if tree block @eb is tree reloc root.
10916 * Return 0 if it's not or any problem happens
10917 * Return 1 if it's a tree reloc root
10919 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10920 struct extent_buffer *eb)
10922 struct btrfs_root *tree_reloc_root;
10923 struct btrfs_key key;
10924 u64 bytenr = btrfs_header_bytenr(eb);
10925 u64 owner = btrfs_header_owner(eb);
10928 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10929 key.offset = owner;
10930 key.type = BTRFS_ROOT_ITEM_KEY;
10932 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10933 if (IS_ERR(tree_reloc_root))
10936 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10938 btrfs_free_fs_root(tree_reloc_root);
10943 * Check referencer for shared block backref
10944 * If level == -1, this function will resolve the level.
10946 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10947 u64 parent, u64 bytenr, int level)
10949 struct extent_buffer *eb;
10951 int found_parent = 0;
10954 eb = read_tree_block(fs_info, parent, 0);
10955 if (!extent_buffer_uptodate(eb))
10959 level = query_tree_block_level(fs_info, bytenr);
10963 /* It's possible it's a tree reloc root */
10964 if (parent == bytenr) {
10965 if (is_tree_reloc_root(fs_info, eb))
10970 if (level + 1 != btrfs_header_level(eb))
10973 nr = btrfs_header_nritems(eb);
10974 for (i = 0; i < nr; i++) {
10975 if (bytenr == btrfs_node_blockptr(eb, i)) {
10981 free_extent_buffer(eb);
10982 if (!found_parent) {
10984 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10985 bytenr, fs_info->nodesize, parent, level);
10986 return REFERENCER_MISSING;
10992 * Check referencer for normal (inlined) data ref
10993 * If len == 0, it will be resolved by searching in extent tree
10995 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10996 u64 root_id, u64 objectid, u64 offset,
10997 u64 bytenr, u64 len, u32 count)
10999 struct btrfs_root *root;
11000 struct btrfs_root *extent_root = fs_info->extent_root;
11001 struct btrfs_key key;
11002 struct btrfs_path path;
11003 struct extent_buffer *leaf;
11004 struct btrfs_file_extent_item *fi;
11005 u32 found_count = 0;
11010 key.objectid = bytenr;
11011 key.type = BTRFS_EXTENT_ITEM_KEY;
11012 key.offset = (u64)-1;
11014 btrfs_init_path(&path);
11015 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11018 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11021 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11022 if (key.objectid != bytenr ||
11023 key.type != BTRFS_EXTENT_ITEM_KEY)
11026 btrfs_release_path(&path);
11028 key.objectid = root_id;
11029 key.type = BTRFS_ROOT_ITEM_KEY;
11030 key.offset = (u64)-1;
11031 btrfs_init_path(&path);
11033 root = btrfs_read_fs_root(fs_info, &key);
11037 key.objectid = objectid;
11038 key.type = BTRFS_EXTENT_DATA_KEY;
11040 * It can be nasty as data backref offset is
11041 * file offset - file extent offset, which is smaller or
11042 * equal to original backref offset. The only special case is
11043 * overflow. So we need to special check and do further search.
11045 key.offset = offset & (1ULL << 63) ? 0 : offset;
11047 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11052 * Search afterwards to get correct one
11053 * NOTE: As we must do a comprehensive check on the data backref to
11054 * make sure the dref count also matches, we must iterate all file
11055 * extents for that inode.
11058 leaf = path.nodes[0];
11059 slot = path.slots[0];
11061 if (slot >= btrfs_header_nritems(leaf))
11063 btrfs_item_key_to_cpu(leaf, &key, slot);
11064 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11066 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11068 * Except normal disk bytenr and disk num bytes, we still
11069 * need to do extra check on dbackref offset as
11070 * dbackref offset = file_offset - file_extent_offset
11072 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11073 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11074 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11079 ret = btrfs_next_item(root, &path);
11084 btrfs_release_path(&path);
11085 if (found_count != count) {
11087 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11088 bytenr, len, root_id, objectid, offset, count, found_count);
11089 return REFERENCER_MISSING;
11095 * Check if the referencer of a shared data backref exists
11097 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11098 u64 parent, u64 bytenr)
11100 struct extent_buffer *eb;
11101 struct btrfs_key key;
11102 struct btrfs_file_extent_item *fi;
11104 int found_parent = 0;
11107 eb = read_tree_block(fs_info, parent, 0);
11108 if (!extent_buffer_uptodate(eb))
11111 nr = btrfs_header_nritems(eb);
11112 for (i = 0; i < nr; i++) {
11113 btrfs_item_key_to_cpu(eb, &key, i);
11114 if (key.type != BTRFS_EXTENT_DATA_KEY)
11117 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11118 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11121 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11128 free_extent_buffer(eb);
11129 if (!found_parent) {
11130 error("shared extent %llu referencer lost (parent: %llu)",
11132 return REFERENCER_MISSING;
11138 * This function will check a given extent item, including its backref and
11139 * itself (like crossing stripe boundary and type)
11141 * Since we don't use extent_record anymore, introduce new error bit
11143 static int check_extent_item(struct btrfs_fs_info *fs_info,
11144 struct extent_buffer *eb, int slot)
11146 struct btrfs_extent_item *ei;
11147 struct btrfs_extent_inline_ref *iref;
11148 struct btrfs_extent_data_ref *dref;
11152 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11153 u32 item_size = btrfs_item_size_nr(eb, slot);
11158 struct btrfs_key key;
11162 btrfs_item_key_to_cpu(eb, &key, slot);
11163 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11164 bytes_used += key.offset;
11166 bytes_used += nodesize;
11168 if (item_size < sizeof(*ei)) {
11170 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11171 * old thing when on disk format is still un-determined.
11172 * No need to care about it anymore
11174 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11178 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11179 flags = btrfs_extent_flags(eb, ei);
11181 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11183 if (metadata && check_crossing_stripes(global_info, key.objectid,
11185 error("bad metadata [%llu, %llu) crossing stripe boundary",
11186 key.objectid, key.objectid + nodesize);
11187 err |= CROSSING_STRIPE_BOUNDARY;
11190 ptr = (unsigned long)(ei + 1);
11192 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11193 /* Old EXTENT_ITEM metadata */
11194 struct btrfs_tree_block_info *info;
11196 info = (struct btrfs_tree_block_info *)ptr;
11197 level = btrfs_tree_block_level(eb, info);
11198 ptr += sizeof(struct btrfs_tree_block_info);
11200 /* New METADATA_ITEM */
11201 level = key.offset;
11203 end = (unsigned long)ei + item_size;
11206 /* Reached extent item end normally */
11210 /* Beyond extent item end, wrong item size */
11212 err |= ITEM_SIZE_MISMATCH;
11213 error("extent item at bytenr %llu slot %d has wrong size",
11218 /* Now check every backref in this extent item */
11219 iref = (struct btrfs_extent_inline_ref *)ptr;
11220 type = btrfs_extent_inline_ref_type(eb, iref);
11221 offset = btrfs_extent_inline_ref_offset(eb, iref);
11223 case BTRFS_TREE_BLOCK_REF_KEY:
11224 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11228 case BTRFS_SHARED_BLOCK_REF_KEY:
11229 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11233 case BTRFS_EXTENT_DATA_REF_KEY:
11234 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11235 ret = check_extent_data_backref(fs_info,
11236 btrfs_extent_data_ref_root(eb, dref),
11237 btrfs_extent_data_ref_objectid(eb, dref),
11238 btrfs_extent_data_ref_offset(eb, dref),
11239 key.objectid, key.offset,
11240 btrfs_extent_data_ref_count(eb, dref));
11243 case BTRFS_SHARED_DATA_REF_KEY:
11244 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11248 error("extent[%llu %d %llu] has unknown ref type: %d",
11249 key.objectid, key.type, key.offset, type);
11250 err |= UNKNOWN_TYPE;
11254 ptr += btrfs_extent_inline_ref_size(type);
11262 * Check if a dev extent item is referred correctly by its chunk
11264 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11265 struct extent_buffer *eb, int slot)
11267 struct btrfs_root *chunk_root = fs_info->chunk_root;
11268 struct btrfs_dev_extent *ptr;
11269 struct btrfs_path path;
11270 struct btrfs_key chunk_key;
11271 struct btrfs_key devext_key;
11272 struct btrfs_chunk *chunk;
11273 struct extent_buffer *l;
11277 int found_chunk = 0;
11280 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11281 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11282 length = btrfs_dev_extent_length(eb, ptr);
11284 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11285 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11286 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11288 btrfs_init_path(&path);
11289 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11294 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11295 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11300 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11303 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11304 for (i = 0; i < num_stripes; i++) {
11305 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11306 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11308 if (devid == devext_key.objectid &&
11309 offset == devext_key.offset) {
11315 btrfs_release_path(&path);
11316 if (!found_chunk) {
11318 "device extent[%llu, %llu, %llu] did not find the related chunk",
11319 devext_key.objectid, devext_key.offset, length);
11320 return REFERENCER_MISSING;
11326 * Check if the used space is correct with the dev item
11328 static int check_dev_item(struct btrfs_fs_info *fs_info,
11329 struct extent_buffer *eb, int slot)
11331 struct btrfs_root *dev_root = fs_info->dev_root;
11332 struct btrfs_dev_item *dev_item;
11333 struct btrfs_path path;
11334 struct btrfs_key key;
11335 struct btrfs_dev_extent *ptr;
11341 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11342 dev_id = btrfs_device_id(eb, dev_item);
11343 used = btrfs_device_bytes_used(eb, dev_item);
11345 key.objectid = dev_id;
11346 key.type = BTRFS_DEV_EXTENT_KEY;
11349 btrfs_init_path(&path);
11350 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11352 btrfs_item_key_to_cpu(eb, &key, slot);
11353 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11354 key.objectid, key.type, key.offset);
11355 btrfs_release_path(&path);
11356 return REFERENCER_MISSING;
11359 /* Iterate dev_extents to calculate the used space of a device */
11361 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11364 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11365 if (key.objectid > dev_id)
11367 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11370 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11371 struct btrfs_dev_extent);
11372 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11374 ret = btrfs_next_item(dev_root, &path);
11378 btrfs_release_path(&path);
11380 if (used != total) {
11381 btrfs_item_key_to_cpu(eb, &key, slot);
11383 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11384 total, used, BTRFS_ROOT_TREE_OBJECTID,
11385 BTRFS_DEV_EXTENT_KEY, dev_id);
11386 return ACCOUNTING_MISMATCH;
11392 * Check a block group item with its referener (chunk) and its used space
11393 * with extent/metadata item
11395 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11396 struct extent_buffer *eb, int slot)
11398 struct btrfs_root *extent_root = fs_info->extent_root;
11399 struct btrfs_root *chunk_root = fs_info->chunk_root;
11400 struct btrfs_block_group_item *bi;
11401 struct btrfs_block_group_item bg_item;
11402 struct btrfs_path path;
11403 struct btrfs_key bg_key;
11404 struct btrfs_key chunk_key;
11405 struct btrfs_key extent_key;
11406 struct btrfs_chunk *chunk;
11407 struct extent_buffer *leaf;
11408 struct btrfs_extent_item *ei;
11409 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11417 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11418 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11419 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11420 used = btrfs_block_group_used(&bg_item);
11421 bg_flags = btrfs_block_group_flags(&bg_item);
11423 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11424 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11425 chunk_key.offset = bg_key.objectid;
11427 btrfs_init_path(&path);
11428 /* Search for the referencer chunk */
11429 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11432 "block group[%llu %llu] did not find the related chunk item",
11433 bg_key.objectid, bg_key.offset);
11434 err |= REFERENCER_MISSING;
11436 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11437 struct btrfs_chunk);
11438 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11441 "block group[%llu %llu] related chunk item length does not match",
11442 bg_key.objectid, bg_key.offset);
11443 err |= REFERENCER_MISMATCH;
11446 btrfs_release_path(&path);
11448 /* Search from the block group bytenr */
11449 extent_key.objectid = bg_key.objectid;
11450 extent_key.type = 0;
11451 extent_key.offset = 0;
11453 btrfs_init_path(&path);
11454 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11458 /* Iterate extent tree to account used space */
11460 leaf = path.nodes[0];
11462 /* Search slot can point to the last item beyond leaf nritems */
11463 if (path.slots[0] >= btrfs_header_nritems(leaf))
11466 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11467 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11470 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11471 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11473 if (extent_key.objectid < bg_key.objectid)
11476 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11479 total += extent_key.offset;
11481 ei = btrfs_item_ptr(leaf, path.slots[0],
11482 struct btrfs_extent_item);
11483 flags = btrfs_extent_flags(leaf, ei);
11484 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11485 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11487 "bad extent[%llu, %llu) type mismatch with chunk",
11488 extent_key.objectid,
11489 extent_key.objectid + extent_key.offset);
11490 err |= CHUNK_TYPE_MISMATCH;
11492 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11493 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11494 BTRFS_BLOCK_GROUP_METADATA))) {
11496 "bad extent[%llu, %llu) type mismatch with chunk",
11497 extent_key.objectid,
11498 extent_key.objectid + nodesize);
11499 err |= CHUNK_TYPE_MISMATCH;
11503 ret = btrfs_next_item(extent_root, &path);
11509 btrfs_release_path(&path);
11511 if (total != used) {
11513 "block group[%llu %llu] used %llu but extent items used %llu",
11514 bg_key.objectid, bg_key.offset, used, total);
11515 err |= ACCOUNTING_MISMATCH;
11521 * Check a chunk item.
11522 * Including checking all referred dev_extents and block group
11524 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11525 struct extent_buffer *eb, int slot)
11527 struct btrfs_root *extent_root = fs_info->extent_root;
11528 struct btrfs_root *dev_root = fs_info->dev_root;
11529 struct btrfs_path path;
11530 struct btrfs_key chunk_key;
11531 struct btrfs_key bg_key;
11532 struct btrfs_key devext_key;
11533 struct btrfs_chunk *chunk;
11534 struct extent_buffer *leaf;
11535 struct btrfs_block_group_item *bi;
11536 struct btrfs_block_group_item bg_item;
11537 struct btrfs_dev_extent *ptr;
11549 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11550 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11551 length = btrfs_chunk_length(eb, chunk);
11552 chunk_end = chunk_key.offset + length;
11553 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11556 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11558 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11561 type = btrfs_chunk_type(eb, chunk);
11563 bg_key.objectid = chunk_key.offset;
11564 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11565 bg_key.offset = length;
11567 btrfs_init_path(&path);
11568 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11571 "chunk[%llu %llu) did not find the related block group item",
11572 chunk_key.offset, chunk_end);
11573 err |= REFERENCER_MISSING;
11575 leaf = path.nodes[0];
11576 bi = btrfs_item_ptr(leaf, path.slots[0],
11577 struct btrfs_block_group_item);
11578 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11580 if (btrfs_block_group_flags(&bg_item) != type) {
11582 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11583 chunk_key.offset, chunk_end, type,
11584 btrfs_block_group_flags(&bg_item));
11585 err |= REFERENCER_MISSING;
11589 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11590 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11591 for (i = 0; i < num_stripes; i++) {
11592 btrfs_release_path(&path);
11593 btrfs_init_path(&path);
11594 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11595 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11596 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11598 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11601 goto not_match_dev;
11603 leaf = path.nodes[0];
11604 ptr = btrfs_item_ptr(leaf, path.slots[0],
11605 struct btrfs_dev_extent);
11606 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11607 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11608 if (objectid != chunk_key.objectid ||
11609 offset != chunk_key.offset ||
11610 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11611 goto not_match_dev;
11614 err |= BACKREF_MISSING;
11616 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11617 chunk_key.objectid, chunk_end, i);
11620 btrfs_release_path(&path);
11626 * Main entry function to check known items and update related accounting info
11628 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11630 struct btrfs_fs_info *fs_info = root->fs_info;
11631 struct btrfs_key key;
11634 struct btrfs_extent_data_ref *dref;
11639 btrfs_item_key_to_cpu(eb, &key, slot);
11643 case BTRFS_EXTENT_DATA_KEY:
11644 ret = check_extent_data_item(root, eb, slot);
11647 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11648 ret = check_block_group_item(fs_info, eb, slot);
11651 case BTRFS_DEV_ITEM_KEY:
11652 ret = check_dev_item(fs_info, eb, slot);
11655 case BTRFS_CHUNK_ITEM_KEY:
11656 ret = check_chunk_item(fs_info, eb, slot);
11659 case BTRFS_DEV_EXTENT_KEY:
11660 ret = check_dev_extent_item(fs_info, eb, slot);
11663 case BTRFS_EXTENT_ITEM_KEY:
11664 case BTRFS_METADATA_ITEM_KEY:
11665 ret = check_extent_item(fs_info, eb, slot);
11668 case BTRFS_EXTENT_CSUM_KEY:
11669 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11671 case BTRFS_TREE_BLOCK_REF_KEY:
11672 ret = check_tree_block_backref(fs_info, key.offset,
11676 case BTRFS_EXTENT_DATA_REF_KEY:
11677 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11678 ret = check_extent_data_backref(fs_info,
11679 btrfs_extent_data_ref_root(eb, dref),
11680 btrfs_extent_data_ref_objectid(eb, dref),
11681 btrfs_extent_data_ref_offset(eb, dref),
11683 btrfs_extent_data_ref_count(eb, dref));
11686 case BTRFS_SHARED_BLOCK_REF_KEY:
11687 ret = check_shared_block_backref(fs_info, key.offset,
11691 case BTRFS_SHARED_DATA_REF_KEY:
11692 ret = check_shared_data_backref(fs_info, key.offset,
11700 if (++slot < btrfs_header_nritems(eb))
11707 * Helper function for later fs/subvol tree check. To determine if a tree
11708 * block should be checked.
11709 * This function will ensure only the direct referencer with lowest rootid to
11710 * check a fs/subvolume tree block.
11712 * Backref check at extent tree would detect errors like missing subvolume
11713 * tree, so we can do aggressive check to reduce duplicated checks.
11715 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11717 struct btrfs_root *extent_root = root->fs_info->extent_root;
11718 struct btrfs_key key;
11719 struct btrfs_path path;
11720 struct extent_buffer *leaf;
11722 struct btrfs_extent_item *ei;
11728 struct btrfs_extent_inline_ref *iref;
11731 btrfs_init_path(&path);
11732 key.objectid = btrfs_header_bytenr(eb);
11733 key.type = BTRFS_METADATA_ITEM_KEY;
11734 key.offset = (u64)-1;
11737 * Any failure in backref resolving means we can't determine
11738 * whom the tree block belongs to.
11739 * So in that case, we need to check that tree block
11741 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11745 ret = btrfs_previous_extent_item(extent_root, &path,
11746 btrfs_header_bytenr(eb));
11750 leaf = path.nodes[0];
11751 slot = path.slots[0];
11752 btrfs_item_key_to_cpu(leaf, &key, slot);
11753 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11755 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11756 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11758 struct btrfs_tree_block_info *info;
11760 info = (struct btrfs_tree_block_info *)(ei + 1);
11761 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11764 item_size = btrfs_item_size_nr(leaf, slot);
11765 ptr = (unsigned long)iref;
11766 end = (unsigned long)ei + item_size;
11767 while (ptr < end) {
11768 iref = (struct btrfs_extent_inline_ref *)ptr;
11769 type = btrfs_extent_inline_ref_type(leaf, iref);
11770 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11773 * We only check the tree block if current root is
11774 * the lowest referencer of it.
11776 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11777 offset < root->objectid) {
11778 btrfs_release_path(&path);
11782 ptr += btrfs_extent_inline_ref_size(type);
11785 * Normally we should also check keyed tree block ref, but that may be
11786 * very time consuming. Inlined ref should already make us skip a lot
11787 * of refs now. So skip search keyed tree block ref.
11791 btrfs_release_path(&path);
11796 * Traversal function for tree block. We will do:
11797 * 1) Skip shared fs/subvolume tree blocks
11798 * 2) Update related bytes accounting
11799 * 3) Pre-order traversal
11801 static int traverse_tree_block(struct btrfs_root *root,
11802 struct extent_buffer *node)
11804 struct extent_buffer *eb;
11805 struct btrfs_key key;
11806 struct btrfs_key drop_key;
11814 * Skip shared fs/subvolume tree block, in that case they will
11815 * be checked by referencer with lowest rootid
11817 if (is_fstree(root->objectid) && !should_check(root, node))
11820 /* Update bytes accounting */
11821 total_btree_bytes += node->len;
11822 if (fs_root_objectid(btrfs_header_owner(node)))
11823 total_fs_tree_bytes += node->len;
11824 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11825 total_extent_tree_bytes += node->len;
11827 /* pre-order tranversal, check itself first */
11828 level = btrfs_header_level(node);
11829 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11830 btrfs_header_level(node),
11831 btrfs_header_owner(node));
11835 "check %s failed root %llu bytenr %llu level %d, force continue check",
11836 level ? "node":"leaf", root->objectid,
11837 btrfs_header_bytenr(node), btrfs_header_level(node));
11840 btree_space_waste += btrfs_leaf_free_space(root, node);
11841 ret = check_leaf_items(root, node);
11846 nr = btrfs_header_nritems(node);
11847 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11848 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11849 sizeof(struct btrfs_key_ptr);
11851 /* Then check all its children */
11852 for (i = 0; i < nr; i++) {
11853 u64 blocknr = btrfs_node_blockptr(node, i);
11855 btrfs_node_key_to_cpu(node, &key, i);
11856 if (level == root->root_item.drop_level &&
11857 is_dropped_key(&key, &drop_key))
11861 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11862 * to call the function itself.
11864 eb = read_tree_block(root->fs_info, blocknr, 0);
11865 if (extent_buffer_uptodate(eb)) {
11866 ret = traverse_tree_block(root, eb);
11869 free_extent_buffer(eb);
11876 * Low memory usage version check_chunks_and_extents.
11878 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11880 struct btrfs_path path;
11881 struct btrfs_key key;
11882 struct btrfs_root *root1;
11883 struct btrfs_root *root;
11884 struct btrfs_root *cur_root;
11888 root = fs_info->fs_root;
11890 root1 = root->fs_info->chunk_root;
11891 ret = traverse_tree_block(root1, root1->node);
11894 root1 = root->fs_info->tree_root;
11895 ret = traverse_tree_block(root1, root1->node);
11898 btrfs_init_path(&path);
11899 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11901 key.type = BTRFS_ROOT_ITEM_KEY;
11903 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11905 error("cannot find extent treet in tree_root");
11910 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11911 if (key.type != BTRFS_ROOT_ITEM_KEY)
11913 key.offset = (u64)-1;
11915 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11916 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11919 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11920 if (IS_ERR(cur_root) || !cur_root) {
11921 error("failed to read tree: %lld", key.objectid);
11925 ret = traverse_tree_block(cur_root, cur_root->node);
11928 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11929 btrfs_free_fs_root(cur_root);
11931 ret = btrfs_next_item(root1, &path);
11937 btrfs_release_path(&path);
11941 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11945 if (!ctx.progress_enabled)
11946 fprintf(stderr, "checking extents\n");
11947 if (check_mode == CHECK_MODE_LOWMEM)
11948 ret = check_chunks_and_extents_v2(fs_info);
11950 ret = check_chunks_and_extents(fs_info);
11955 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11956 struct btrfs_root *root, int overwrite)
11958 struct extent_buffer *c;
11959 struct extent_buffer *old = root->node;
11962 struct btrfs_disk_key disk_key = {0,0,0};
11968 extent_buffer_get(c);
11971 c = btrfs_alloc_free_block(trans, root,
11972 root->fs_info->nodesize,
11973 root->root_key.objectid,
11974 &disk_key, level, 0, 0);
11977 extent_buffer_get(c);
11981 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11982 btrfs_set_header_level(c, level);
11983 btrfs_set_header_bytenr(c, c->start);
11984 btrfs_set_header_generation(c, trans->transid);
11985 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11986 btrfs_set_header_owner(c, root->root_key.objectid);
11988 write_extent_buffer(c, root->fs_info->fsid,
11989 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11991 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11992 btrfs_header_chunk_tree_uuid(c),
11995 btrfs_mark_buffer_dirty(c);
11997 * this case can happen in the following case:
11999 * 1.overwrite previous root.
12001 * 2.reinit reloc data root, this is because we skip pin
12002 * down reloc data tree before which means we can allocate
12003 * same block bytenr here.
12005 if (old->start == c->start) {
12006 btrfs_set_root_generation(&root->root_item,
12008 root->root_item.level = btrfs_header_level(root->node);
12009 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12010 &root->root_key, &root->root_item);
12012 free_extent_buffer(c);
12016 free_extent_buffer(old);
12018 add_root_to_dirty_list(root);
12022 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12023 struct extent_buffer *eb, int tree_root)
12025 struct extent_buffer *tmp;
12026 struct btrfs_root_item *ri;
12027 struct btrfs_key key;
12029 int level = btrfs_header_level(eb);
12035 * If we have pinned this block before, don't pin it again.
12036 * This can not only avoid forever loop with broken filesystem
12037 * but also give us some speedups.
12039 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12040 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12043 btrfs_pin_extent(fs_info, eb->start, eb->len);
12045 nritems = btrfs_header_nritems(eb);
12046 for (i = 0; i < nritems; i++) {
12048 btrfs_item_key_to_cpu(eb, &key, i);
12049 if (key.type != BTRFS_ROOT_ITEM_KEY)
12051 /* Skip the extent root and reloc roots */
12052 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12053 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12054 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12056 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12057 bytenr = btrfs_disk_root_bytenr(eb, ri);
12060 * If at any point we start needing the real root we
12061 * will have to build a stump root for the root we are
12062 * in, but for now this doesn't actually use the root so
12063 * just pass in extent_root.
12065 tmp = read_tree_block(fs_info, bytenr, 0);
12066 if (!extent_buffer_uptodate(tmp)) {
12067 fprintf(stderr, "Error reading root block\n");
12070 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12071 free_extent_buffer(tmp);
12075 bytenr = btrfs_node_blockptr(eb, i);
12077 /* If we aren't the tree root don't read the block */
12078 if (level == 1 && !tree_root) {
12079 btrfs_pin_extent(fs_info, bytenr,
12080 fs_info->nodesize);
12084 tmp = read_tree_block(fs_info, bytenr, 0);
12085 if (!extent_buffer_uptodate(tmp)) {
12086 fprintf(stderr, "Error reading tree block\n");
12089 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12090 free_extent_buffer(tmp);
12099 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12103 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12107 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12110 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12112 struct btrfs_block_group_cache *cache;
12113 struct btrfs_path path;
12114 struct extent_buffer *leaf;
12115 struct btrfs_chunk *chunk;
12116 struct btrfs_key key;
12120 btrfs_init_path(&path);
12122 key.type = BTRFS_CHUNK_ITEM_KEY;
12124 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12126 btrfs_release_path(&path);
12131 * We do this in case the block groups were screwed up and had alloc
12132 * bits that aren't actually set on the chunks. This happens with
12133 * restored images every time and could happen in real life I guess.
12135 fs_info->avail_data_alloc_bits = 0;
12136 fs_info->avail_metadata_alloc_bits = 0;
12137 fs_info->avail_system_alloc_bits = 0;
12139 /* First we need to create the in-memory block groups */
12141 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12142 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12144 btrfs_release_path(&path);
12152 leaf = path.nodes[0];
12153 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12154 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12159 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12160 btrfs_add_block_group(fs_info, 0,
12161 btrfs_chunk_type(leaf, chunk),
12162 key.objectid, key.offset,
12163 btrfs_chunk_length(leaf, chunk));
12164 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12165 key.offset + btrfs_chunk_length(leaf, chunk));
12170 cache = btrfs_lookup_first_block_group(fs_info, start);
12174 start = cache->key.objectid + cache->key.offset;
12177 btrfs_release_path(&path);
12181 static int reset_balance(struct btrfs_trans_handle *trans,
12182 struct btrfs_fs_info *fs_info)
12184 struct btrfs_root *root = fs_info->tree_root;
12185 struct btrfs_path path;
12186 struct extent_buffer *leaf;
12187 struct btrfs_key key;
12188 int del_slot, del_nr = 0;
12192 btrfs_init_path(&path);
12193 key.objectid = BTRFS_BALANCE_OBJECTID;
12194 key.type = BTRFS_BALANCE_ITEM_KEY;
12196 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12201 goto reinit_data_reloc;
12206 ret = btrfs_del_item(trans, root, &path);
12209 btrfs_release_path(&path);
12211 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12212 key.type = BTRFS_ROOT_ITEM_KEY;
12214 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12218 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12223 ret = btrfs_del_items(trans, root, &path,
12230 btrfs_release_path(&path);
12233 ret = btrfs_search_slot(trans, root, &key, &path,
12240 leaf = path.nodes[0];
12241 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12242 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12244 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12249 del_slot = path.slots[0];
12258 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12262 btrfs_release_path(&path);
12265 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12266 key.type = BTRFS_ROOT_ITEM_KEY;
12267 key.offset = (u64)-1;
12268 root = btrfs_read_fs_root(fs_info, &key);
12269 if (IS_ERR(root)) {
12270 fprintf(stderr, "Error reading data reloc tree\n");
12271 ret = PTR_ERR(root);
12274 record_root_in_trans(trans, root);
12275 ret = btrfs_fsck_reinit_root(trans, root, 0);
12278 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12280 btrfs_release_path(&path);
12284 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12285 struct btrfs_fs_info *fs_info)
12291 * The only reason we don't do this is because right now we're just
12292 * walking the trees we find and pinning down their bytes, we don't look
12293 * at any of the leaves. In order to do mixed groups we'd have to check
12294 * the leaves of any fs roots and pin down the bytes for any file
12295 * extents we find. Not hard but why do it if we don't have to?
12297 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12298 fprintf(stderr, "We don't support re-initing the extent tree "
12299 "for mixed block groups yet, please notify a btrfs "
12300 "developer you want to do this so they can add this "
12301 "functionality.\n");
12306 * first we need to walk all of the trees except the extent tree and pin
12307 * down the bytes that are in use so we don't overwrite any existing
12310 ret = pin_metadata_blocks(fs_info);
12312 fprintf(stderr, "error pinning down used bytes\n");
12317 * Need to drop all the block groups since we're going to recreate all
12320 btrfs_free_block_groups(fs_info);
12321 ret = reset_block_groups(fs_info);
12323 fprintf(stderr, "error resetting the block groups\n");
12327 /* Ok we can allocate now, reinit the extent root */
12328 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12330 fprintf(stderr, "extent root initialization failed\n");
12332 * When the transaction code is updated we should end the
12333 * transaction, but for now progs only knows about commit so
12334 * just return an error.
12340 * Now we have all the in-memory block groups setup so we can make
12341 * allocations properly, and the metadata we care about is safe since we
12342 * pinned all of it above.
12345 struct btrfs_block_group_cache *cache;
12347 cache = btrfs_lookup_first_block_group(fs_info, start);
12350 start = cache->key.objectid + cache->key.offset;
12351 ret = btrfs_insert_item(trans, fs_info->extent_root,
12352 &cache->key, &cache->item,
12353 sizeof(cache->item));
12355 fprintf(stderr, "Error adding block group\n");
12358 btrfs_extent_post_op(trans, fs_info->extent_root);
12361 ret = reset_balance(trans, fs_info);
12363 fprintf(stderr, "error resetting the pending balance\n");
12368 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12370 struct btrfs_path path;
12371 struct btrfs_trans_handle *trans;
12372 struct btrfs_key key;
12375 printf("Recowing metadata block %llu\n", eb->start);
12376 key.objectid = btrfs_header_owner(eb);
12377 key.type = BTRFS_ROOT_ITEM_KEY;
12378 key.offset = (u64)-1;
12380 root = btrfs_read_fs_root(root->fs_info, &key);
12381 if (IS_ERR(root)) {
12382 fprintf(stderr, "Couldn't find owner root %llu\n",
12384 return PTR_ERR(root);
12387 trans = btrfs_start_transaction(root, 1);
12389 return PTR_ERR(trans);
12391 btrfs_init_path(&path);
12392 path.lowest_level = btrfs_header_level(eb);
12393 if (path.lowest_level)
12394 btrfs_node_key_to_cpu(eb, &key, 0);
12396 btrfs_item_key_to_cpu(eb, &key, 0);
12398 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12399 btrfs_commit_transaction(trans, root);
12400 btrfs_release_path(&path);
12404 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12406 struct btrfs_path path;
12407 struct btrfs_trans_handle *trans;
12408 struct btrfs_key key;
12411 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12412 bad->key.type, bad->key.offset);
12413 key.objectid = bad->root_id;
12414 key.type = BTRFS_ROOT_ITEM_KEY;
12415 key.offset = (u64)-1;
12417 root = btrfs_read_fs_root(root->fs_info, &key);
12418 if (IS_ERR(root)) {
12419 fprintf(stderr, "Couldn't find owner root %llu\n",
12421 return PTR_ERR(root);
12424 trans = btrfs_start_transaction(root, 1);
12426 return PTR_ERR(trans);
12428 btrfs_init_path(&path);
12429 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12435 ret = btrfs_del_item(trans, root, &path);
12437 btrfs_commit_transaction(trans, root);
12438 btrfs_release_path(&path);
12442 static int zero_log_tree(struct btrfs_root *root)
12444 struct btrfs_trans_handle *trans;
12447 trans = btrfs_start_transaction(root, 1);
12448 if (IS_ERR(trans)) {
12449 ret = PTR_ERR(trans);
12452 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12453 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12454 ret = btrfs_commit_transaction(trans, root);
12458 static int populate_csum(struct btrfs_trans_handle *trans,
12459 struct btrfs_root *csum_root, char *buf, u64 start,
12462 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12467 while (offset < len) {
12468 sectorsize = fs_info->sectorsize;
12469 ret = read_extent_data(fs_info, buf, start + offset,
12473 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12474 start + offset, buf, sectorsize);
12477 offset += sectorsize;
12482 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12483 struct btrfs_root *csum_root,
12484 struct btrfs_root *cur_root)
12486 struct btrfs_path path;
12487 struct btrfs_key key;
12488 struct extent_buffer *node;
12489 struct btrfs_file_extent_item *fi;
12496 buf = malloc(cur_root->fs_info->sectorsize);
12500 btrfs_init_path(&path);
12504 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12507 /* Iterate all regular file extents and fill its csum */
12509 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12511 if (key.type != BTRFS_EXTENT_DATA_KEY)
12513 node = path.nodes[0];
12514 slot = path.slots[0];
12515 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12516 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12518 start = btrfs_file_extent_disk_bytenr(node, fi);
12519 len = btrfs_file_extent_disk_num_bytes(node, fi);
12521 ret = populate_csum(trans, csum_root, buf, start, len);
12522 if (ret == -EEXIST)
12528 * TODO: if next leaf is corrupted, jump to nearest next valid
12531 ret = btrfs_next_item(cur_root, &path);
12541 btrfs_release_path(&path);
12546 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12547 struct btrfs_root *csum_root)
12549 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12550 struct btrfs_path path;
12551 struct btrfs_root *tree_root = fs_info->tree_root;
12552 struct btrfs_root *cur_root;
12553 struct extent_buffer *node;
12554 struct btrfs_key key;
12558 btrfs_init_path(&path);
12559 key.objectid = BTRFS_FS_TREE_OBJECTID;
12561 key.type = BTRFS_ROOT_ITEM_KEY;
12562 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12571 node = path.nodes[0];
12572 slot = path.slots[0];
12573 btrfs_item_key_to_cpu(node, &key, slot);
12574 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12576 if (key.type != BTRFS_ROOT_ITEM_KEY)
12578 if (!is_fstree(key.objectid))
12580 key.offset = (u64)-1;
12582 cur_root = btrfs_read_fs_root(fs_info, &key);
12583 if (IS_ERR(cur_root) || !cur_root) {
12584 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12588 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12593 ret = btrfs_next_item(tree_root, &path);
12603 btrfs_release_path(&path);
12607 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12608 struct btrfs_root *csum_root)
12610 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12611 struct btrfs_path path;
12612 struct btrfs_extent_item *ei;
12613 struct extent_buffer *leaf;
12615 struct btrfs_key key;
12618 btrfs_init_path(&path);
12620 key.type = BTRFS_EXTENT_ITEM_KEY;
12622 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12624 btrfs_release_path(&path);
12628 buf = malloc(csum_root->fs_info->sectorsize);
12630 btrfs_release_path(&path);
12635 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12636 ret = btrfs_next_leaf(extent_root, &path);
12644 leaf = path.nodes[0];
12646 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12647 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12652 ei = btrfs_item_ptr(leaf, path.slots[0],
12653 struct btrfs_extent_item);
12654 if (!(btrfs_extent_flags(leaf, ei) &
12655 BTRFS_EXTENT_FLAG_DATA)) {
12660 ret = populate_csum(trans, csum_root, buf, key.objectid,
12667 btrfs_release_path(&path);
12673 * Recalculate the csum and put it into the csum tree.
12675 * Extent tree init will wipe out all the extent info, so in that case, we
12676 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12677 * will use fs/subvol trees to init the csum tree.
12679 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12680 struct btrfs_root *csum_root,
12681 int search_fs_tree)
12683 if (search_fs_tree)
12684 return fill_csum_tree_from_fs(trans, csum_root);
12686 return fill_csum_tree_from_extent(trans, csum_root);
12689 static void free_roots_info_cache(void)
12691 if (!roots_info_cache)
12694 while (!cache_tree_empty(roots_info_cache)) {
12695 struct cache_extent *entry;
12696 struct root_item_info *rii;
12698 entry = first_cache_extent(roots_info_cache);
12701 remove_cache_extent(roots_info_cache, entry);
12702 rii = container_of(entry, struct root_item_info, cache_extent);
12706 free(roots_info_cache);
12707 roots_info_cache = NULL;
12710 static int build_roots_info_cache(struct btrfs_fs_info *info)
12713 struct btrfs_key key;
12714 struct extent_buffer *leaf;
12715 struct btrfs_path path;
12717 if (!roots_info_cache) {
12718 roots_info_cache = malloc(sizeof(*roots_info_cache));
12719 if (!roots_info_cache)
12721 cache_tree_init(roots_info_cache);
12724 btrfs_init_path(&path);
12726 key.type = BTRFS_EXTENT_ITEM_KEY;
12728 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12731 leaf = path.nodes[0];
12734 struct btrfs_key found_key;
12735 struct btrfs_extent_item *ei;
12736 struct btrfs_extent_inline_ref *iref;
12737 int slot = path.slots[0];
12742 struct cache_extent *entry;
12743 struct root_item_info *rii;
12745 if (slot >= btrfs_header_nritems(leaf)) {
12746 ret = btrfs_next_leaf(info->extent_root, &path);
12753 leaf = path.nodes[0];
12754 slot = path.slots[0];
12757 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12759 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12760 found_key.type != BTRFS_METADATA_ITEM_KEY)
12763 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12764 flags = btrfs_extent_flags(leaf, ei);
12766 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12767 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12770 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12771 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12772 level = found_key.offset;
12774 struct btrfs_tree_block_info *binfo;
12776 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12777 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12778 level = btrfs_tree_block_level(leaf, binfo);
12782 * For a root extent, it must be of the following type and the
12783 * first (and only one) iref in the item.
12785 type = btrfs_extent_inline_ref_type(leaf, iref);
12786 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12789 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12790 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12792 rii = malloc(sizeof(struct root_item_info));
12797 rii->cache_extent.start = root_id;
12798 rii->cache_extent.size = 1;
12799 rii->level = (u8)-1;
12800 entry = &rii->cache_extent;
12801 ret = insert_cache_extent(roots_info_cache, entry);
12804 rii = container_of(entry, struct root_item_info,
12808 ASSERT(rii->cache_extent.start == root_id);
12809 ASSERT(rii->cache_extent.size == 1);
12811 if (level > rii->level || rii->level == (u8)-1) {
12812 rii->level = level;
12813 rii->bytenr = found_key.objectid;
12814 rii->gen = btrfs_extent_generation(leaf, ei);
12815 rii->node_count = 1;
12816 } else if (level == rii->level) {
12824 btrfs_release_path(&path);
12829 static int maybe_repair_root_item(struct btrfs_path *path,
12830 const struct btrfs_key *root_key,
12831 const int read_only_mode)
12833 const u64 root_id = root_key->objectid;
12834 struct cache_extent *entry;
12835 struct root_item_info *rii;
12836 struct btrfs_root_item ri;
12837 unsigned long offset;
12839 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12842 "Error: could not find extent items for root %llu\n",
12843 root_key->objectid);
12847 rii = container_of(entry, struct root_item_info, cache_extent);
12848 ASSERT(rii->cache_extent.start == root_id);
12849 ASSERT(rii->cache_extent.size == 1);
12851 if (rii->node_count != 1) {
12853 "Error: could not find btree root extent for root %llu\n",
12858 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12859 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12861 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12862 btrfs_root_level(&ri) != rii->level ||
12863 btrfs_root_generation(&ri) != rii->gen) {
12866 * If we're in repair mode but our caller told us to not update
12867 * the root item, i.e. just check if it needs to be updated, don't
12868 * print this message, since the caller will call us again shortly
12869 * for the same root item without read only mode (the caller will
12870 * open a transaction first).
12872 if (!(read_only_mode && repair))
12874 "%sroot item for root %llu,"
12875 " current bytenr %llu, current gen %llu, current level %u,"
12876 " new bytenr %llu, new gen %llu, new level %u\n",
12877 (read_only_mode ? "" : "fixing "),
12879 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12880 btrfs_root_level(&ri),
12881 rii->bytenr, rii->gen, rii->level);
12883 if (btrfs_root_generation(&ri) > rii->gen) {
12885 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12886 root_id, btrfs_root_generation(&ri), rii->gen);
12890 if (!read_only_mode) {
12891 btrfs_set_root_bytenr(&ri, rii->bytenr);
12892 btrfs_set_root_level(&ri, rii->level);
12893 btrfs_set_root_generation(&ri, rii->gen);
12894 write_extent_buffer(path->nodes[0], &ri,
12895 offset, sizeof(ri));
12905 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12906 * caused read-only snapshots to be corrupted if they were created at a moment
12907 * when the source subvolume/snapshot had orphan items. The issue was that the
12908 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12909 * node instead of the post orphan cleanup root node.
12910 * So this function, and its callees, just detects and fixes those cases. Even
12911 * though the regression was for read-only snapshots, this function applies to
12912 * any snapshot/subvolume root.
12913 * This must be run before any other repair code - not doing it so, makes other
12914 * repair code delete or modify backrefs in the extent tree for example, which
12915 * will result in an inconsistent fs after repairing the root items.
12917 static int repair_root_items(struct btrfs_fs_info *info)
12919 struct btrfs_path path;
12920 struct btrfs_key key;
12921 struct extent_buffer *leaf;
12922 struct btrfs_trans_handle *trans = NULL;
12925 int need_trans = 0;
12927 btrfs_init_path(&path);
12929 ret = build_roots_info_cache(info);
12933 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12934 key.type = BTRFS_ROOT_ITEM_KEY;
12939 * Avoid opening and committing transactions if a leaf doesn't have
12940 * any root items that need to be fixed, so that we avoid rotating
12941 * backup roots unnecessarily.
12944 trans = btrfs_start_transaction(info->tree_root, 1);
12945 if (IS_ERR(trans)) {
12946 ret = PTR_ERR(trans);
12951 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12955 leaf = path.nodes[0];
12958 struct btrfs_key found_key;
12960 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12961 int no_more_keys = find_next_key(&path, &key);
12963 btrfs_release_path(&path);
12965 ret = btrfs_commit_transaction(trans,
12977 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12979 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12981 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12984 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12988 if (!trans && repair) {
12991 btrfs_release_path(&path);
13001 free_roots_info_cache();
13002 btrfs_release_path(&path);
13004 btrfs_commit_transaction(trans, info->tree_root);
13011 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13013 struct btrfs_trans_handle *trans;
13014 struct btrfs_block_group_cache *bg_cache;
13018 /* Clear all free space cache inodes and its extent data */
13020 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13023 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13026 current = bg_cache->key.objectid + bg_cache->key.offset;
13029 /* Don't forget to set cache_generation to -1 */
13030 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13031 if (IS_ERR(trans)) {
13032 error("failed to update super block cache generation");
13033 return PTR_ERR(trans);
13035 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13036 btrfs_commit_transaction(trans, fs_info->tree_root);
13041 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13046 if (clear_version == 1) {
13047 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13049 "free space cache v2 detected, use --clear-space-cache v2");
13053 printf("Clearing free space cache\n");
13054 ret = clear_free_space_cache(fs_info);
13056 error("failed to clear free space cache");
13059 printf("Free space cache cleared\n");
13061 } else if (clear_version == 2) {
13062 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13063 printf("no free space cache v2 to clear\n");
13067 printf("Clear free space cache v2\n");
13068 ret = btrfs_clear_free_space_tree(fs_info);
13070 error("failed to clear free space cache v2: %d", ret);
13073 printf("free space cache v2 cleared\n");
13080 const char * const cmd_check_usage[] = {
13081 "btrfs check [options] <device>",
13082 "Check structural integrity of a filesystem (unmounted).",
13083 "Check structural integrity of an unmounted filesystem. Verify internal",
13084 "trees' consistency and item connectivity. In the repair mode try to",
13085 "fix the problems found. ",
13086 "WARNING: the repair mode is considered dangerous",
13088 "-s|--super <superblock> use this superblock copy",
13089 "-b|--backup use the first valid backup root copy",
13090 "--force skip mount checks, repair is not possible",
13091 "--repair try to repair the filesystem",
13092 "--readonly run in read-only mode (default)",
13093 "--init-csum-tree create a new CRC tree",
13094 "--init-extent-tree create a new extent tree",
13095 "--mode <MODE> allows choice of memory/IO trade-offs",
13096 " where MODE is one of:",
13097 " original - read inodes and extents to memory (requires",
13098 " more memory, does less IO)",
13099 " lowmem - try to use less memory but read blocks again",
13101 "--check-data-csum verify checksums of data blocks",
13102 "-Q|--qgroup-report print a report on qgroup consistency",
13103 "-E|--subvol-extents <subvolid>",
13104 " print subvolume extents and sharing state",
13105 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13106 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13107 "-p|--progress indicate progress",
13108 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13112 int cmd_check(int argc, char **argv)
13114 struct cache_tree root_cache;
13115 struct btrfs_root *root;
13116 struct btrfs_fs_info *info;
13119 u64 tree_root_bytenr = 0;
13120 u64 chunk_root_bytenr = 0;
13121 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13125 int init_csum_tree = 0;
13127 int clear_space_cache = 0;
13128 int qgroup_report = 0;
13129 int qgroups_repaired = 0;
13130 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13135 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13136 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13137 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13138 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13139 GETOPT_VAL_FORCE };
13140 static const struct option long_options[] = {
13141 { "super", required_argument, NULL, 's' },
13142 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13143 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13144 { "init-csum-tree", no_argument, NULL,
13145 GETOPT_VAL_INIT_CSUM },
13146 { "init-extent-tree", no_argument, NULL,
13147 GETOPT_VAL_INIT_EXTENT },
13148 { "check-data-csum", no_argument, NULL,
13149 GETOPT_VAL_CHECK_CSUM },
13150 { "backup", no_argument, NULL, 'b' },
13151 { "subvol-extents", required_argument, NULL, 'E' },
13152 { "qgroup-report", no_argument, NULL, 'Q' },
13153 { "tree-root", required_argument, NULL, 'r' },
13154 { "chunk-root", required_argument, NULL,
13155 GETOPT_VAL_CHUNK_TREE },
13156 { "progress", no_argument, NULL, 'p' },
13157 { "mode", required_argument, NULL,
13159 { "clear-space-cache", required_argument, NULL,
13160 GETOPT_VAL_CLEAR_SPACE_CACHE},
13161 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13162 { NULL, 0, NULL, 0}
13165 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13169 case 'a': /* ignored */ break;
13171 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13174 num = arg_strtou64(optarg);
13175 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13177 "super mirror should be less than %d",
13178 BTRFS_SUPER_MIRROR_MAX);
13181 bytenr = btrfs_sb_offset(((int)num));
13182 printf("using SB copy %llu, bytenr %llu\n", num,
13183 (unsigned long long)bytenr);
13189 subvolid = arg_strtou64(optarg);
13192 tree_root_bytenr = arg_strtou64(optarg);
13194 case GETOPT_VAL_CHUNK_TREE:
13195 chunk_root_bytenr = arg_strtou64(optarg);
13198 ctx.progress_enabled = true;
13202 usage(cmd_check_usage);
13203 case GETOPT_VAL_REPAIR:
13204 printf("enabling repair mode\n");
13206 ctree_flags |= OPEN_CTREE_WRITES;
13208 case GETOPT_VAL_READONLY:
13211 case GETOPT_VAL_INIT_CSUM:
13212 printf("Creating a new CRC tree\n");
13213 init_csum_tree = 1;
13215 ctree_flags |= OPEN_CTREE_WRITES;
13217 case GETOPT_VAL_INIT_EXTENT:
13218 init_extent_tree = 1;
13219 ctree_flags |= (OPEN_CTREE_WRITES |
13220 OPEN_CTREE_NO_BLOCK_GROUPS);
13223 case GETOPT_VAL_CHECK_CSUM:
13224 check_data_csum = 1;
13226 case GETOPT_VAL_MODE:
13227 check_mode = parse_check_mode(optarg);
13228 if (check_mode == CHECK_MODE_UNKNOWN) {
13229 error("unknown mode: %s", optarg);
13233 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13234 if (strcmp(optarg, "v1") == 0) {
13235 clear_space_cache = 1;
13236 } else if (strcmp(optarg, "v2") == 0) {
13237 clear_space_cache = 2;
13238 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13241 "invalid argument to --clear-space-cache, must be v1 or v2");
13244 ctree_flags |= OPEN_CTREE_WRITES;
13246 case GETOPT_VAL_FORCE:
13252 if (check_argc_exact(argc - optind, 1))
13253 usage(cmd_check_usage);
13255 if (ctx.progress_enabled) {
13256 ctx.tp = TASK_NOTHING;
13257 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13260 /* This check is the only reason for --readonly to exist */
13261 if (readonly && repair) {
13262 error("repair options are not compatible with --readonly");
13267 * experimental and dangerous
13269 if (repair && check_mode == CHECK_MODE_LOWMEM)
13270 warning("low-memory mode repair support is only partial");
13273 cache_tree_init(&root_cache);
13275 ret = check_mounted(argv[optind]);
13278 error("could not check mount status: %s",
13284 "%s is currently mounted, use --force if you really intend to check the filesystem",
13292 error("repair and --force is not yet supported");
13299 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13303 "filesystem mounted, continuing because of --force");
13305 /* A block device is mounted in exclusive mode by kernel */
13306 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13309 /* only allow partial opening under repair mode */
13311 ctree_flags |= OPEN_CTREE_PARTIAL;
13313 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13314 chunk_root_bytenr, ctree_flags);
13316 error("cannot open file system");
13322 global_info = info;
13323 root = info->fs_root;
13324 uuid_unparse(info->super_copy->fsid, uuidbuf);
13326 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13329 * Check the bare minimum before starting anything else that could rely
13330 * on it, namely the tree roots, any local consistency checks
13332 if (!extent_buffer_uptodate(info->tree_root->node) ||
13333 !extent_buffer_uptodate(info->dev_root->node) ||
13334 !extent_buffer_uptodate(info->chunk_root->node)) {
13335 error("critical roots corrupted, unable to check the filesystem");
13341 if (clear_space_cache) {
13342 ret = do_clear_free_space_cache(info, clear_space_cache);
13348 * repair mode will force us to commit transaction which
13349 * will make us fail to load log tree when mounting.
13351 if (repair && btrfs_super_log_root(info->super_copy)) {
13352 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13358 ret = zero_log_tree(root);
13361 error("failed to zero log tree: %d", ret);
13366 if (qgroup_report) {
13367 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13369 ret = qgroup_verify_all(info);
13376 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13377 subvolid, argv[optind], uuidbuf);
13378 ret = print_extent_state(info, subvolid);
13383 if (init_extent_tree || init_csum_tree) {
13384 struct btrfs_trans_handle *trans;
13386 trans = btrfs_start_transaction(info->extent_root, 0);
13387 if (IS_ERR(trans)) {
13388 error("error starting transaction");
13389 ret = PTR_ERR(trans);
13394 if (init_extent_tree) {
13395 printf("Creating a new extent tree\n");
13396 ret = reinit_extent_tree(trans, info);
13402 if (init_csum_tree) {
13403 printf("Reinitialize checksum tree\n");
13404 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13406 error("checksum tree initialization failed: %d",
13413 ret = fill_csum_tree(trans, info->csum_root,
13417 error("checksum tree refilling failed: %d", ret);
13422 * Ok now we commit and run the normal fsck, which will add
13423 * extent entries for all of the items it finds.
13425 ret = btrfs_commit_transaction(trans, info->extent_root);
13430 if (!extent_buffer_uptodate(info->extent_root->node)) {
13431 error("critical: extent_root, unable to check the filesystem");
13436 if (!extent_buffer_uptodate(info->csum_root->node)) {
13437 error("critical: csum_root, unable to check the filesystem");
13443 ret = do_check_chunks_and_extents(info);
13447 "errors found in extent allocation tree or chunk allocation");
13449 ret = repair_root_items(info);
13452 error("failed to repair root items: %s", strerror(-ret));
13456 fprintf(stderr, "Fixed %d roots.\n", ret);
13458 } else if (ret > 0) {
13460 "Found %d roots with an outdated root item.\n",
13463 "Please run a filesystem check with the option --repair to fix them.\n");
13469 if (!ctx.progress_enabled) {
13470 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13471 fprintf(stderr, "checking free space tree\n");
13473 fprintf(stderr, "checking free space cache\n");
13475 ret = check_space_cache(root);
13478 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13479 error("errors found in free space tree");
13481 error("errors found in free space cache");
13486 * We used to have to have these hole extents in between our real
13487 * extents so if we don't have this flag set we need to make sure there
13488 * are no gaps in the file extents for inodes, otherwise we can just
13489 * ignore it when this happens.
13491 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13492 ret = do_check_fs_roots(info, &root_cache);
13495 error("errors found in fs roots");
13499 fprintf(stderr, "checking csums\n");
13500 ret = check_csums(root);
13503 error("errors found in csum tree");
13507 fprintf(stderr, "checking root refs\n");
13508 /* For low memory mode, check_fs_roots_v2 handles root refs */
13509 if (check_mode != CHECK_MODE_LOWMEM) {
13510 ret = check_root_refs(root, &root_cache);
13513 error("errors found in root refs");
13518 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13519 struct extent_buffer *eb;
13521 eb = list_first_entry(&root->fs_info->recow_ebs,
13522 struct extent_buffer, recow);
13523 list_del_init(&eb->recow);
13524 ret = recow_extent_buffer(root, eb);
13527 error("fails to fix transid errors");
13532 while (!list_empty(&delete_items)) {
13533 struct bad_item *bad;
13535 bad = list_first_entry(&delete_items, struct bad_item, list);
13536 list_del_init(&bad->list);
13538 ret = delete_bad_item(root, bad);
13544 if (info->quota_enabled) {
13545 fprintf(stderr, "checking quota groups\n");
13546 ret = qgroup_verify_all(info);
13549 error("failed to check quota groups");
13553 ret = repair_qgroups(info, &qgroups_repaired);
13556 error("failed to repair quota groups");
13562 if (!list_empty(&root->fs_info->recow_ebs)) {
13563 error("transid errors in file system");
13568 printf("found %llu bytes used, ",
13569 (unsigned long long)bytes_used);
13571 printf("error(s) found\n");
13573 printf("no error found\n");
13574 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13575 printf("total tree bytes: %llu\n",
13576 (unsigned long long)total_btree_bytes);
13577 printf("total fs tree bytes: %llu\n",
13578 (unsigned long long)total_fs_tree_bytes);
13579 printf("total extent tree bytes: %llu\n",
13580 (unsigned long long)total_extent_tree_bytes);
13581 printf("btree space waste bytes: %llu\n",
13582 (unsigned long long)btree_space_waste);
13583 printf("file data blocks allocated: %llu\n referenced %llu\n",
13584 (unsigned long long)data_bytes_allocated,
13585 (unsigned long long)data_bytes_referenced);
13587 free_qgroup_counts();
13588 free_root_recs_tree(&root_cache);
13592 if (ctx.progress_enabled)
13593 task_deinit(ctx.info);