2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 return container_of(back, struct data_backref, node);
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
141 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143 struct data_backref *back1 = to_data_backref(ext1);
144 struct data_backref *back2 = to_data_backref(ext2);
146 WARN_ON(!ext1->is_data);
147 WARN_ON(!ext2->is_data);
149 /* parent and root are a union, so this covers both */
150 if (back1->parent > back2->parent)
152 if (back1->parent < back2->parent)
155 /* This is a full backref and the parents match. */
156 if (back1->node.full_backref)
159 if (back1->owner > back2->owner)
161 if (back1->owner < back2->owner)
164 if (back1->offset > back2->offset)
166 if (back1->offset < back2->offset)
169 if (back1->found_ref && back2->found_ref) {
170 if (back1->disk_bytenr > back2->disk_bytenr)
172 if (back1->disk_bytenr < back2->disk_bytenr)
175 if (back1->bytes > back2->bytes)
177 if (back1->bytes < back2->bytes)
185 * Much like data_backref, just removed the undetermined members
186 * and change it to use list_head.
187 * During extent scan, it is stored in root->orphan_data_extent.
188 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
190 struct orphan_data_extent {
191 struct list_head list;
199 struct tree_backref {
200 struct extent_backref node;
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
209 return container_of(back, struct tree_backref, node);
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
214 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216 struct tree_backref *back1 = to_tree_backref(ext1);
217 struct tree_backref *back2 = to_tree_backref(ext2);
219 WARN_ON(ext1->is_data);
220 WARN_ON(ext2->is_data);
222 /* parent and root are a union, so this covers both */
223 if (back1->parent > back2->parent)
225 if (back1->parent < back2->parent)
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
233 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
236 if (ext1->is_data > ext2->is_data)
239 if (ext1->is_data < ext2->is_data)
242 if (ext1->full_backref > ext2->full_backref)
244 if (ext1->full_backref < ext2->full_backref)
248 return compare_data_backref(node1, node2);
250 return compare_tree_backref(node1, node2);
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
256 struct extent_record {
257 struct list_head backrefs;
258 struct list_head dups;
259 struct rb_root backref_tree;
260 struct list_head list;
261 struct cache_extent cache;
262 struct btrfs_disk_key parent_key;
267 u64 extent_item_refs;
269 u64 parent_generation;
273 unsigned int flag_block_full_backref:2;
274 unsigned int found_rec:1;
275 unsigned int content_checked:1;
276 unsigned int owner_ref_checked:1;
277 unsigned int is_root:1;
278 unsigned int metadata:1;
279 unsigned int bad_full_backref:1;
280 unsigned int crossing_stripes:1;
281 unsigned int wrong_chunk_type:1;
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
286 return container_of(entry, struct extent_record, list);
289 struct inode_backref {
290 struct list_head list;
291 unsigned int found_dir_item:1;
292 unsigned int found_dir_index:1;
293 unsigned int found_inode_ref:1;
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
305 return list_entry(entry, struct inode_backref, list);
308 struct root_item_record {
309 struct list_head list;
315 struct btrfs_key drop_key;
318 #define REF_ERR_NO_DIR_ITEM (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX (1 << 1)
320 #define REF_ERR_NO_INODE_REF (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
323 #define REF_ERR_DUP_INODE_REF (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
332 struct file_extent_hole {
338 struct inode_record {
339 struct list_head backrefs;
340 unsigned int checked:1;
341 unsigned int merging:1;
342 unsigned int found_inode_item:1;
343 unsigned int found_dir_item:1;
344 unsigned int found_file_extent:1;
345 unsigned int found_csum_item:1;
346 unsigned int some_csum_missing:1;
347 unsigned int nodatasum:1;
360 struct rb_root holes;
361 struct list_head orphan_extents;
366 #define I_ERR_NO_INODE_ITEM (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
382 struct root_backref {
383 struct list_head list;
384 unsigned int found_dir_item:1;
385 unsigned int found_dir_index:1;
386 unsigned int found_back_ref:1;
387 unsigned int found_forward_ref:1;
388 unsigned int reachable:1;
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
399 return list_entry(entry, struct root_backref, list);
403 struct list_head backrefs;
404 struct cache_extent cache;
405 unsigned int found_root_item:1;
411 struct cache_extent cache;
416 struct cache_extent cache;
417 struct cache_tree root_cache;
418 struct cache_tree inode_cache;
419 struct inode_record *current;
428 struct walk_control {
429 struct cache_tree shared;
430 struct shared_node *nodes[BTRFS_MAX_LEVEL];
436 struct btrfs_key key;
438 struct list_head list;
441 struct extent_entry {
446 struct list_head list;
449 struct root_item_info {
450 /* level of the root */
452 /* number of nodes at this level, must be 1 for a root */
456 struct cache_extent cache_extent;
460 * Error bit for low memory mode check.
462 * Currently no caller cares about it yet. Just internal use for error
465 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH (1 << 8)
476 static void *print_status_check(void *p)
478 struct task_ctx *priv = p;
479 const char work_indicator[] = { '.', 'o', 'O', 'o' };
481 static char *task_position_string[] = {
483 "checking free space cache",
487 task_period_start(priv->info, 1000 /* 1s */);
489 if (priv->tp == TASK_NOTHING)
493 printf("%s [%c]\r", task_position_string[priv->tp],
494 work_indicator[count % 4]);
497 task_period_wait(priv->info);
502 static int print_status_return(void *p)
510 static enum btrfs_check_mode parse_check_mode(const char *str)
512 if (strcmp(str, "lowmem") == 0)
513 return CHECK_MODE_LOWMEM;
514 if (strcmp(str, "orig") == 0)
515 return CHECK_MODE_ORIGINAL;
516 if (strcmp(str, "original") == 0)
517 return CHECK_MODE_ORIGINAL;
519 return CHECK_MODE_UNKNOWN;
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
525 struct file_extent_hole *hole;
527 if (RB_EMPTY_ROOT(holes))
530 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
536 struct file_extent_hole *hole1;
537 struct file_extent_hole *hole2;
539 hole1 = rb_entry(node1, struct file_extent_hole, node);
540 hole2 = rb_entry(node2, struct file_extent_hole, node);
542 if (hole1->start > hole2->start)
544 if (hole1->start < hole2->start)
546 /* Now hole1->start == hole2->start */
547 if (hole1->len >= hole2->len)
549 * Hole 1 will be merge center
550 * Same hole will be merged later
553 /* Hole 2 will be merge center */
558 * Add a hole to the record
560 * This will do hole merge for copy_file_extent_holes(),
561 * which will ensure there won't be continuous holes.
563 static int add_file_extent_hole(struct rb_root *holes,
566 struct file_extent_hole *hole;
567 struct file_extent_hole *prev = NULL;
568 struct file_extent_hole *next = NULL;
570 hole = malloc(sizeof(*hole));
575 /* Since compare will not return 0, no -EEXIST will happen */
576 rb_insert(holes, &hole->node, compare_hole);
578 /* simple merge with previous hole */
579 if (rb_prev(&hole->node))
580 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
582 if (prev && prev->start + prev->len >= hole->start) {
583 hole->len = hole->start + hole->len - prev->start;
584 hole->start = prev->start;
585 rb_erase(&prev->node, holes);
590 /* iterate merge with next holes */
592 if (!rb_next(&hole->node))
594 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
596 if (hole->start + hole->len >= next->start) {
597 if (hole->start + hole->len <= next->start + next->len)
598 hole->len = next->start + next->len -
600 rb_erase(&next->node, holes);
609 static int compare_hole_range(struct rb_node *node, void *data)
611 struct file_extent_hole *hole;
614 hole = (struct file_extent_hole *)data;
617 hole = rb_entry(node, struct file_extent_hole, node);
618 if (start < hole->start)
620 if (start >= hole->start && start < hole->start + hole->len)
626 * Delete a hole in the record
628 * This will do the hole split and is much restrict than add.
630 static int del_file_extent_hole(struct rb_root *holes,
633 struct file_extent_hole *hole;
634 struct file_extent_hole tmp;
639 struct rb_node *node;
646 node = rb_search(holes, &tmp, compare_hole_range, NULL);
649 hole = rb_entry(node, struct file_extent_hole, node);
650 if (start + len > hole->start + hole->len)
654 * Now there will be no overlap, delete the hole and re-add the
655 * split(s) if they exists.
657 if (start > hole->start) {
658 prev_start = hole->start;
659 prev_len = start - hole->start;
662 if (hole->start + hole->len > start + len) {
663 next_start = start + len;
664 next_len = hole->start + hole->len - start - len;
667 rb_erase(node, holes);
670 ret = add_file_extent_hole(holes, prev_start, prev_len);
675 ret = add_file_extent_hole(holes, next_start, next_len);
682 static int copy_file_extent_holes(struct rb_root *dst,
685 struct file_extent_hole *hole;
686 struct rb_node *node;
689 node = rb_first(src);
691 hole = rb_entry(node, struct file_extent_hole, node);
692 ret = add_file_extent_hole(dst, hole->start, hole->len);
695 node = rb_next(node);
700 static void free_file_extent_holes(struct rb_root *holes)
702 struct rb_node *node;
703 struct file_extent_hole *hole;
705 node = rb_first(holes);
707 hole = rb_entry(node, struct file_extent_hole, node);
708 rb_erase(node, holes);
710 node = rb_first(holes);
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717 struct btrfs_root *root)
719 if (root->last_trans != trans->transid) {
720 root->track_dirty = 1;
721 root->last_trans = trans->transid;
722 root->commit_root = root->node;
723 extent_buffer_get(root->node);
727 static u8 imode_to_type(u32 imode)
730 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
732 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
733 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
734 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
735 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
736 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
737 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
740 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
746 struct device_record *rec1;
747 struct device_record *rec2;
749 rec1 = rb_entry(node1, struct device_record, node);
750 rec2 = rb_entry(node2, struct device_record, node);
751 if (rec1->devid > rec2->devid)
753 else if (rec1->devid < rec2->devid)
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
761 struct inode_record *rec;
762 struct inode_backref *backref;
763 struct inode_backref *orig;
764 struct inode_backref *tmp;
765 struct orphan_data_extent *src_orphan;
766 struct orphan_data_extent *dst_orphan;
771 rec = malloc(sizeof(*rec));
773 return ERR_PTR(-ENOMEM);
774 memcpy(rec, orig_rec, sizeof(*rec));
776 INIT_LIST_HEAD(&rec->backrefs);
777 INIT_LIST_HEAD(&rec->orphan_extents);
778 rec->holes = RB_ROOT;
780 list_for_each_entry(orig, &orig_rec->backrefs, list) {
781 size = sizeof(*orig) + orig->namelen + 1;
782 backref = malloc(size);
787 memcpy(backref, orig, size);
788 list_add_tail(&backref->list, &rec->backrefs);
790 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791 dst_orphan = malloc(sizeof(*dst_orphan));
796 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
799 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
806 rb = rb_first(&rec->holes);
808 struct file_extent_hole *hole;
810 hole = rb_entry(rb, struct file_extent_hole, node);
816 if (!list_empty(&rec->backrefs))
817 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818 list_del(&orig->list);
822 if (!list_empty(&rec->orphan_extents))
823 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824 list_del(&orig->list);
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
836 struct orphan_data_extent *orphan;
838 if (list_empty(orphan_extents))
840 printf("The following data extent is lost in tree %llu:\n",
842 list_for_each_entry(orphan, orphan_extents, list) {
843 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844 orphan->objectid, orphan->offset, orphan->disk_bytenr,
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
851 u64 root_objectid = root->root_key.objectid;
852 int errors = rec->errors;
856 /* reloc root errors, we print its corresponding fs root objectid*/
857 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858 root_objectid = root->root_key.offset;
859 fprintf(stderr, "reloc");
861 fprintf(stderr, "root %llu inode %llu errors %x",
862 (unsigned long long) root_objectid,
863 (unsigned long long) rec->ino, rec->errors);
865 if (errors & I_ERR_NO_INODE_ITEM)
866 fprintf(stderr, ", no inode item");
867 if (errors & I_ERR_NO_ORPHAN_ITEM)
868 fprintf(stderr, ", no orphan item");
869 if (errors & I_ERR_DUP_INODE_ITEM)
870 fprintf(stderr, ", dup inode item");
871 if (errors & I_ERR_DUP_DIR_INDEX)
872 fprintf(stderr, ", dup dir index");
873 if (errors & I_ERR_ODD_DIR_ITEM)
874 fprintf(stderr, ", odd dir item");
875 if (errors & I_ERR_ODD_FILE_EXTENT)
876 fprintf(stderr, ", odd file extent");
877 if (errors & I_ERR_BAD_FILE_EXTENT)
878 fprintf(stderr, ", bad file extent");
879 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880 fprintf(stderr, ", file extent overlap");
881 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882 fprintf(stderr, ", file extent discount");
883 if (errors & I_ERR_DIR_ISIZE_WRONG)
884 fprintf(stderr, ", dir isize wrong");
885 if (errors & I_ERR_FILE_NBYTES_WRONG)
886 fprintf(stderr, ", nbytes wrong");
887 if (errors & I_ERR_ODD_CSUM_ITEM)
888 fprintf(stderr, ", odd csum item");
889 if (errors & I_ERR_SOME_CSUM_MISSING)
890 fprintf(stderr, ", some csum missing");
891 if (errors & I_ERR_LINK_COUNT_WRONG)
892 fprintf(stderr, ", link count wrong");
893 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894 fprintf(stderr, ", orphan file extent");
895 fprintf(stderr, "\n");
896 /* Print the orphan extents if needed */
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
900 /* Print the holes if needed */
901 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902 struct file_extent_hole *hole;
903 struct rb_node *node;
906 node = rb_first(&rec->holes);
907 fprintf(stderr, "Found file extent holes:\n");
910 hole = rb_entry(node, struct file_extent_hole, node);
911 fprintf(stderr, "\tstart: %llu, len: %llu\n",
912 hole->start, hole->len);
913 node = rb_next(node);
916 fprintf(stderr, "\tstart: 0, len: %llu\n",
918 root->fs_info->sectorsize));
922 static void print_ref_error(int errors)
924 if (errors & REF_ERR_NO_DIR_ITEM)
925 fprintf(stderr, ", no dir item");
926 if (errors & REF_ERR_NO_DIR_INDEX)
927 fprintf(stderr, ", no dir index");
928 if (errors & REF_ERR_NO_INODE_REF)
929 fprintf(stderr, ", no inode ref");
930 if (errors & REF_ERR_DUP_DIR_ITEM)
931 fprintf(stderr, ", dup dir item");
932 if (errors & REF_ERR_DUP_DIR_INDEX)
933 fprintf(stderr, ", dup dir index");
934 if (errors & REF_ERR_DUP_INODE_REF)
935 fprintf(stderr, ", dup inode ref");
936 if (errors & REF_ERR_INDEX_UNMATCH)
937 fprintf(stderr, ", index mismatch");
938 if (errors & REF_ERR_FILETYPE_UNMATCH)
939 fprintf(stderr, ", filetype mismatch");
940 if (errors & REF_ERR_NAME_TOO_LONG)
941 fprintf(stderr, ", name too long");
942 if (errors & REF_ERR_NO_ROOT_REF)
943 fprintf(stderr, ", no root ref");
944 if (errors & REF_ERR_NO_ROOT_BACKREF)
945 fprintf(stderr, ", no root backref");
946 if (errors & REF_ERR_DUP_ROOT_REF)
947 fprintf(stderr, ", dup root ref");
948 if (errors & REF_ERR_DUP_ROOT_BACKREF)
949 fprintf(stderr, ", dup root backref");
950 fprintf(stderr, "\n");
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956 struct ptr_node *node;
957 struct cache_extent *cache;
958 struct inode_record *rec = NULL;
961 cache = lookup_cache_extent(inode_cache, ino, 1);
963 node = container_of(cache, struct ptr_node, cache);
965 if (mod && rec->refs > 1) {
966 node->data = clone_inode_rec(rec);
967 if (IS_ERR(node->data))
973 rec = calloc(1, sizeof(*rec));
975 return ERR_PTR(-ENOMEM);
977 rec->extent_start = (u64)-1;
979 INIT_LIST_HEAD(&rec->backrefs);
980 INIT_LIST_HEAD(&rec->orphan_extents);
981 rec->holes = RB_ROOT;
983 node = malloc(sizeof(*node));
986 return ERR_PTR(-ENOMEM);
988 node->cache.start = ino;
989 node->cache.size = 1;
992 if (ino == BTRFS_FREE_INO_OBJECTID)
995 ret = insert_cache_extent(inode_cache, &node->cache);
997 return ERR_PTR(-EEXIST);
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1004 struct orphan_data_extent *orphan;
1006 while (!list_empty(orphan_extents)) {
1007 orphan = list_entry(orphan_extents->next,
1008 struct orphan_data_extent, list);
1009 list_del(&orphan->list);
1014 static void free_inode_rec(struct inode_record *rec)
1016 struct inode_backref *backref;
1018 if (--rec->refs > 0)
1021 while (!list_empty(&rec->backrefs)) {
1022 backref = to_inode_backref(rec->backrefs.next);
1023 list_del(&backref->list);
1026 free_orphan_data_extents(&rec->orphan_extents);
1027 free_file_extent_holes(&rec->holes);
1031 static int can_free_inode_rec(struct inode_record *rec)
1033 if (!rec->errors && rec->checked && rec->found_inode_item &&
1034 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040 struct inode_record *rec)
1042 struct cache_extent *cache;
1043 struct inode_backref *tmp, *backref;
1044 struct ptr_node *node;
1047 if (!rec->found_inode_item)
1050 filetype = imode_to_type(rec->imode);
1051 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052 if (backref->found_dir_item && backref->found_dir_index) {
1053 if (backref->filetype != filetype)
1054 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055 if (!backref->errors && backref->found_inode_ref &&
1056 rec->nlink == rec->found_link) {
1057 list_del(&backref->list);
1063 if (!rec->checked || rec->merging)
1066 if (S_ISDIR(rec->imode)) {
1067 if (rec->found_size != rec->isize)
1068 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069 if (rec->found_file_extent)
1070 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072 if (rec->found_dir_item)
1073 rec->errors |= I_ERR_ODD_DIR_ITEM;
1074 if (rec->found_size != rec->nbytes)
1075 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076 if (rec->nlink > 0 && !no_holes &&
1077 (rec->extent_end < rec->isize ||
1078 first_extent_gap(&rec->holes) < rec->isize))
1079 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083 if (rec->found_csum_item && rec->nodatasum)
1084 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085 if (rec->some_csum_missing && !rec->nodatasum)
1086 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089 BUG_ON(rec->refs != 1);
1090 if (can_free_inode_rec(rec)) {
1091 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092 node = container_of(cache, struct ptr_node, cache);
1093 BUG_ON(node->data != rec);
1094 remove_cache_extent(inode_cache, &node->cache);
1096 free_inode_rec(rec);
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1102 struct btrfs_path path;
1103 struct btrfs_key key;
1106 key.objectid = BTRFS_ORPHAN_OBJECTID;
1107 key.type = BTRFS_ORPHAN_ITEM_KEY;
1110 btrfs_init_path(&path);
1111 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112 btrfs_release_path(&path);
1118 static int process_inode_item(struct extent_buffer *eb,
1119 int slot, struct btrfs_key *key,
1120 struct shared_node *active_node)
1122 struct inode_record *rec;
1123 struct btrfs_inode_item *item;
1125 rec = active_node->current;
1126 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127 if (rec->found_inode_item) {
1128 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132 rec->nlink = btrfs_inode_nlink(eb, item);
1133 rec->isize = btrfs_inode_size(eb, item);
1134 rec->nbytes = btrfs_inode_nbytes(eb, item);
1135 rec->imode = btrfs_inode_mode(eb, item);
1136 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1138 rec->found_inode_item = 1;
1139 if (rec->nlink == 0)
1140 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141 maybe_free_inode_rec(&active_node->inode_cache, rec);
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1147 int namelen, u64 dir)
1149 struct inode_backref *backref;
1151 list_for_each_entry(backref, &rec->backrefs, list) {
1152 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1154 if (backref->dir != dir || backref->namelen != namelen)
1156 if (memcmp(name, backref->name, namelen))
1161 backref = malloc(sizeof(*backref) + namelen + 1);
1164 memset(backref, 0, sizeof(*backref));
1166 backref->namelen = namelen;
1167 memcpy(backref->name, name, namelen);
1168 backref->name[namelen] = '\0';
1169 list_add_tail(&backref->list, &rec->backrefs);
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174 u64 ino, u64 dir, u64 index,
1175 const char *name, int namelen,
1176 u8 filetype, u8 itemtype, int errors)
1178 struct inode_record *rec;
1179 struct inode_backref *backref;
1181 rec = get_inode_rec(inode_cache, ino, 1);
1182 BUG_ON(IS_ERR(rec));
1183 backref = get_inode_backref(rec, name, namelen, dir);
1186 backref->errors |= errors;
1187 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188 if (backref->found_dir_index)
1189 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190 if (backref->found_inode_ref && backref->index != index)
1191 backref->errors |= REF_ERR_INDEX_UNMATCH;
1192 if (backref->found_dir_item && backref->filetype != filetype)
1193 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1195 backref->index = index;
1196 backref->filetype = filetype;
1197 backref->found_dir_index = 1;
1198 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1200 if (backref->found_dir_item)
1201 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202 if (backref->found_dir_index && backref->filetype != filetype)
1203 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1205 backref->filetype = filetype;
1206 backref->found_dir_item = 1;
1207 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209 if (backref->found_inode_ref)
1210 backref->errors |= REF_ERR_DUP_INODE_REF;
1211 if (backref->found_dir_index && backref->index != index)
1212 backref->errors |= REF_ERR_INDEX_UNMATCH;
1214 backref->index = index;
1216 backref->ref_type = itemtype;
1217 backref->found_inode_ref = 1;
1222 maybe_free_inode_rec(inode_cache, rec);
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227 struct cache_tree *dst_cache)
1229 struct inode_backref *backref;
1234 list_for_each_entry(backref, &src->backrefs, list) {
1235 if (backref->found_dir_index) {
1236 add_inode_backref(dst_cache, dst->ino, backref->dir,
1237 backref->index, backref->name,
1238 backref->namelen, backref->filetype,
1239 BTRFS_DIR_INDEX_KEY, backref->errors);
1241 if (backref->found_dir_item) {
1243 add_inode_backref(dst_cache, dst->ino,
1244 backref->dir, 0, backref->name,
1245 backref->namelen, backref->filetype,
1246 BTRFS_DIR_ITEM_KEY, backref->errors);
1248 if (backref->found_inode_ref) {
1249 add_inode_backref(dst_cache, dst->ino,
1250 backref->dir, backref->index,
1251 backref->name, backref->namelen, 0,
1252 backref->ref_type, backref->errors);
1256 if (src->found_dir_item)
1257 dst->found_dir_item = 1;
1258 if (src->found_file_extent)
1259 dst->found_file_extent = 1;
1260 if (src->found_csum_item)
1261 dst->found_csum_item = 1;
1262 if (src->some_csum_missing)
1263 dst->some_csum_missing = 1;
1264 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270 BUG_ON(src->found_link < dir_count);
1271 dst->found_link += src->found_link - dir_count;
1272 dst->found_size += src->found_size;
1273 if (src->extent_start != (u64)-1) {
1274 if (dst->extent_start == (u64)-1) {
1275 dst->extent_start = src->extent_start;
1276 dst->extent_end = src->extent_end;
1278 if (dst->extent_end > src->extent_start)
1279 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280 else if (dst->extent_end < src->extent_start) {
1281 ret = add_file_extent_hole(&dst->holes,
1283 src->extent_start - dst->extent_end);
1285 if (dst->extent_end < src->extent_end)
1286 dst->extent_end = src->extent_end;
1290 dst->errors |= src->errors;
1291 if (src->found_inode_item) {
1292 if (!dst->found_inode_item) {
1293 dst->nlink = src->nlink;
1294 dst->isize = src->isize;
1295 dst->nbytes = src->nbytes;
1296 dst->imode = src->imode;
1297 dst->nodatasum = src->nodatasum;
1298 dst->found_inode_item = 1;
1300 dst->errors |= I_ERR_DUP_INODE_ITEM;
1308 static int splice_shared_node(struct shared_node *src_node,
1309 struct shared_node *dst_node)
1311 struct cache_extent *cache;
1312 struct ptr_node *node, *ins;
1313 struct cache_tree *src, *dst;
1314 struct inode_record *rec, *conflict;
1315 u64 current_ino = 0;
1319 if (--src_node->refs == 0)
1321 if (src_node->current)
1322 current_ino = src_node->current->ino;
1324 src = &src_node->root_cache;
1325 dst = &dst_node->root_cache;
1327 cache = search_cache_extent(src, 0);
1329 node = container_of(cache, struct ptr_node, cache);
1331 cache = next_cache_extent(cache);
1334 remove_cache_extent(src, &node->cache);
1337 ins = malloc(sizeof(*ins));
1339 ins->cache.start = node->cache.start;
1340 ins->cache.size = node->cache.size;
1344 ret = insert_cache_extent(dst, &ins->cache);
1345 if (ret == -EEXIST) {
1346 conflict = get_inode_rec(dst, rec->ino, 1);
1347 BUG_ON(IS_ERR(conflict));
1348 merge_inode_recs(rec, conflict, dst);
1350 conflict->checked = 1;
1351 if (dst_node->current == conflict)
1352 dst_node->current = NULL;
1354 maybe_free_inode_rec(dst, conflict);
1355 free_inode_rec(rec);
1362 if (src == &src_node->root_cache) {
1363 src = &src_node->inode_cache;
1364 dst = &dst_node->inode_cache;
1368 if (current_ino > 0 && (!dst_node->current ||
1369 current_ino > dst_node->current->ino)) {
1370 if (dst_node->current) {
1371 dst_node->current->checked = 1;
1372 maybe_free_inode_rec(dst, dst_node->current);
1374 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375 BUG_ON(IS_ERR(dst_node->current));
1380 static void free_inode_ptr(struct cache_extent *cache)
1382 struct ptr_node *node;
1383 struct inode_record *rec;
1385 node = container_of(cache, struct ptr_node, cache);
1387 free_inode_rec(rec);
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396 struct cache_extent *cache;
1397 struct shared_node *node;
1399 cache = lookup_cache_extent(shared, bytenr, 1);
1401 node = container_of(cache, struct shared_node, cache);
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 struct shared_node *node;
1412 node = calloc(1, sizeof(*node));
1415 node->cache.start = bytenr;
1416 node->cache.size = 1;
1417 cache_tree_init(&node->root_cache);
1418 cache_tree_init(&node->inode_cache);
1421 ret = insert_cache_extent(shared, &node->cache);
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427 struct walk_control *wc, int level)
1429 struct shared_node *node;
1430 struct shared_node *dest;
1433 if (level == wc->active_node)
1436 BUG_ON(wc->active_node <= level);
1437 node = find_shared_node(&wc->shared, bytenr);
1439 ret = add_shared_node(&wc->shared, bytenr, refs);
1441 node = find_shared_node(&wc->shared, bytenr);
1442 wc->nodes[level] = node;
1443 wc->active_node = level;
1447 if (wc->root_level == wc->active_node &&
1448 btrfs_root_refs(&root->root_item) == 0) {
1449 if (--node->refs == 0) {
1450 free_inode_recs_tree(&node->root_cache);
1451 free_inode_recs_tree(&node->inode_cache);
1452 remove_cache_extent(&wc->shared, &node->cache);
1458 dest = wc->nodes[wc->active_node];
1459 splice_shared_node(node, dest);
1460 if (node->refs == 0) {
1461 remove_cache_extent(&wc->shared, &node->cache);
1467 static int leave_shared_node(struct btrfs_root *root,
1468 struct walk_control *wc, int level)
1470 struct shared_node *node;
1471 struct shared_node *dest;
1474 if (level == wc->root_level)
1477 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481 BUG_ON(i >= BTRFS_MAX_LEVEL);
1483 node = wc->nodes[wc->active_node];
1484 wc->nodes[wc->active_node] = NULL;
1485 wc->active_node = i;
1487 dest = wc->nodes[wc->active_node];
1488 if (wc->active_node < wc->root_level ||
1489 btrfs_root_refs(&root->root_item) > 0) {
1490 BUG_ON(node->refs <= 1);
1491 splice_shared_node(node, dest);
1493 BUG_ON(node->refs < 2);
1502 * 1 - if the root with id child_root_id is a child of root parent_root_id
1503 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1504 * has other root(s) as parent(s)
1505 * 2 - if the root child_root_id doesn't have any parent roots
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510 struct btrfs_path path;
1511 struct btrfs_key key;
1512 struct extent_buffer *leaf;
1516 btrfs_init_path(&path);
1518 key.objectid = parent_root_id;
1519 key.type = BTRFS_ROOT_REF_KEY;
1520 key.offset = child_root_id;
1521 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525 btrfs_release_path(&path);
1529 key.objectid = child_root_id;
1530 key.type = BTRFS_ROOT_BACKREF_KEY;
1532 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1538 leaf = path.nodes[0];
1539 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543 leaf = path.nodes[0];
1546 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547 if (key.objectid != child_root_id ||
1548 key.type != BTRFS_ROOT_BACKREF_KEY)
1553 if (key.offset == parent_root_id) {
1554 btrfs_release_path(&path);
1561 btrfs_release_path(&path);
1564 return has_parent ? 0 : 2;
1567 static int process_dir_item(struct extent_buffer *eb,
1568 int slot, struct btrfs_key *key,
1569 struct shared_node *active_node)
1579 struct btrfs_dir_item *di;
1580 struct inode_record *rec;
1581 struct cache_tree *root_cache;
1582 struct cache_tree *inode_cache;
1583 struct btrfs_key location;
1584 char namebuf[BTRFS_NAME_LEN];
1586 root_cache = &active_node->root_cache;
1587 inode_cache = &active_node->inode_cache;
1588 rec = active_node->current;
1589 rec->found_dir_item = 1;
1591 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592 total = btrfs_item_size_nr(eb, slot);
1593 while (cur < total) {
1595 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596 name_len = btrfs_dir_name_len(eb, di);
1597 data_len = btrfs_dir_data_len(eb, di);
1598 filetype = btrfs_dir_type(eb, di);
1600 rec->found_size += name_len;
1601 if (cur + sizeof(*di) + name_len > total ||
1602 name_len > BTRFS_NAME_LEN) {
1603 error = REF_ERR_NAME_TOO_LONG;
1605 if (cur + sizeof(*di) > total)
1607 len = min_t(u32, total - cur - sizeof(*di),
1614 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1616 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617 key->offset != btrfs_name_hash(namebuf, len)) {
1618 rec->errors |= I_ERR_ODD_DIR_ITEM;
1619 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620 key->objectid, key->offset, namebuf, len, filetype,
1621 key->offset, btrfs_name_hash(namebuf, len));
1624 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625 add_inode_backref(inode_cache, location.objectid,
1626 key->objectid, key->offset, namebuf,
1627 len, filetype, key->type, error);
1628 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629 add_inode_backref(root_cache, location.objectid,
1630 key->objectid, key->offset,
1631 namebuf, len, filetype,
1634 fprintf(stderr, "invalid location in dir item %u\n",
1636 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637 key->objectid, key->offset, namebuf,
1638 len, filetype, key->type, error);
1641 len = sizeof(*di) + name_len + data_len;
1642 di = (struct btrfs_dir_item *)((char *)di + len);
1645 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646 rec->errors |= I_ERR_DUP_DIR_INDEX;
1651 static int process_inode_ref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1661 struct cache_tree *inode_cache;
1662 struct btrfs_inode_ref *ref;
1663 char namebuf[BTRFS_NAME_LEN];
1665 inode_cache = &active_node->inode_cache;
1667 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668 total = btrfs_item_size_nr(eb, slot);
1669 while (cur < total) {
1670 name_len = btrfs_inode_ref_name_len(eb, ref);
1671 index = btrfs_inode_ref_index(eb, ref);
1673 /* inode_ref + namelen should not cross item boundary */
1674 if (cur + sizeof(*ref) + name_len > total ||
1675 name_len > BTRFS_NAME_LEN) {
1676 if (total < cur + sizeof(*ref))
1679 /* Still try to read out the remaining part */
1680 len = min_t(u32, total - cur - sizeof(*ref),
1682 error = REF_ERR_NAME_TOO_LONG;
1688 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689 add_inode_backref(inode_cache, key->objectid, key->offset,
1690 index, namebuf, len, 0, key->type, error);
1692 len = sizeof(*ref) + name_len;
1693 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1699 static int process_inode_extref(struct extent_buffer *eb,
1700 int slot, struct btrfs_key *key,
1701 struct shared_node *active_node)
1710 struct cache_tree *inode_cache;
1711 struct btrfs_inode_extref *extref;
1712 char namebuf[BTRFS_NAME_LEN];
1714 inode_cache = &active_node->inode_cache;
1716 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717 total = btrfs_item_size_nr(eb, slot);
1718 while (cur < total) {
1719 name_len = btrfs_inode_extref_name_len(eb, extref);
1720 index = btrfs_inode_extref_index(eb, extref);
1721 parent = btrfs_inode_extref_parent(eb, extref);
1722 if (name_len <= BTRFS_NAME_LEN) {
1726 len = BTRFS_NAME_LEN;
1727 error = REF_ERR_NAME_TOO_LONG;
1729 read_extent_buffer(eb, namebuf,
1730 (unsigned long)(extref + 1), len);
1731 add_inode_backref(inode_cache, key->objectid, parent,
1732 index, namebuf, len, 0, key->type, error);
1734 len = sizeof(*extref) + name_len;
1735 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743 u64 len, u64 *found)
1745 struct btrfs_key key;
1746 struct btrfs_path path;
1747 struct extent_buffer *leaf;
1752 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1754 btrfs_init_path(&path);
1756 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1758 key.type = BTRFS_EXTENT_CSUM_KEY;
1760 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764 if (ret > 0 && path.slots[0] > 0) {
1765 leaf = path.nodes[0];
1766 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768 key.type == BTRFS_EXTENT_CSUM_KEY)
1773 leaf = path.nodes[0];
1774 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780 leaf = path.nodes[0];
1783 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785 key.type != BTRFS_EXTENT_CSUM_KEY)
1788 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789 if (key.offset >= start + len)
1792 if (key.offset > start)
1795 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796 csum_end = key.offset + (size / csum_size) *
1797 root->fs_info->sectorsize;
1798 if (csum_end > start) {
1799 size = min(csum_end - start, len);
1808 btrfs_release_path(&path);
1814 static int process_file_extent(struct btrfs_root *root,
1815 struct extent_buffer *eb,
1816 int slot, struct btrfs_key *key,
1817 struct shared_node *active_node)
1819 struct inode_record *rec;
1820 struct btrfs_file_extent_item *fi;
1822 u64 disk_bytenr = 0;
1823 u64 extent_offset = 0;
1824 u64 mask = root->fs_info->sectorsize - 1;
1828 rec = active_node->current;
1829 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830 rec->found_file_extent = 1;
1832 if (rec->extent_start == (u64)-1) {
1833 rec->extent_start = key->offset;
1834 rec->extent_end = key->offset;
1837 if (rec->extent_end > key->offset)
1838 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839 else if (rec->extent_end < key->offset) {
1840 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841 key->offset - rec->extent_end);
1846 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847 extent_type = btrfs_file_extent_type(eb, fi);
1849 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1852 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853 rec->found_size += num_bytes;
1854 num_bytes = (num_bytes + mask) & ~mask;
1855 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859 extent_offset = btrfs_file_extent_offset(eb, fi);
1860 if (num_bytes == 0 || (num_bytes & mask))
1861 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862 if (num_bytes + extent_offset >
1863 btrfs_file_extent_ram_bytes(eb, fi))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866 (btrfs_file_extent_compression(eb, fi) ||
1867 btrfs_file_extent_encryption(eb, fi) ||
1868 btrfs_file_extent_other_encoding(eb, fi)))
1869 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870 if (disk_bytenr > 0)
1871 rec->found_size += num_bytes;
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1875 rec->extent_end = key->offset + num_bytes;
1878 * The data reloc tree will copy full extents into its inode and then
1879 * copy the corresponding csums. Because the extent it copied could be
1880 * a preallocated extent that hasn't been written to yet there may be no
1881 * csums to copy, ergo we won't have csums for our file extent. This is
1882 * ok so just don't bother checking csums if the inode belongs to the
1885 if (disk_bytenr > 0 &&
1886 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1888 if (btrfs_file_extent_compression(eb, fi))
1889 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1891 disk_bytenr += extent_offset;
1893 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1898 rec->found_csum_item = 1;
1899 if (found < num_bytes)
1900 rec->some_csum_missing = 1;
1901 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1903 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910 struct walk_control *wc)
1912 struct btrfs_key key;
1916 struct cache_tree *inode_cache;
1917 struct shared_node *active_node;
1919 if (wc->root_level == wc->active_node &&
1920 btrfs_root_refs(&root->root_item) == 0)
1923 active_node = wc->nodes[wc->active_node];
1924 inode_cache = &active_node->inode_cache;
1925 nritems = btrfs_header_nritems(eb);
1926 for (i = 0; i < nritems; i++) {
1927 btrfs_item_key_to_cpu(eb, &key, i);
1929 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1931 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934 if (active_node->current == NULL ||
1935 active_node->current->ino < key.objectid) {
1936 if (active_node->current) {
1937 active_node->current->checked = 1;
1938 maybe_free_inode_rec(inode_cache,
1939 active_node->current);
1941 active_node->current = get_inode_rec(inode_cache,
1943 BUG_ON(IS_ERR(active_node->current));
1946 case BTRFS_DIR_ITEM_KEY:
1947 case BTRFS_DIR_INDEX_KEY:
1948 ret = process_dir_item(eb, i, &key, active_node);
1950 case BTRFS_INODE_REF_KEY:
1951 ret = process_inode_ref(eb, i, &key, active_node);
1953 case BTRFS_INODE_EXTREF_KEY:
1954 ret = process_inode_extref(eb, i, &key, active_node);
1956 case BTRFS_INODE_ITEM_KEY:
1957 ret = process_inode_item(eb, i, &key, active_node);
1959 case BTRFS_EXTENT_DATA_KEY:
1960 ret = process_file_extent(root, eb, i, &key,
1971 u64 bytenr[BTRFS_MAX_LEVEL];
1972 u64 refs[BTRFS_MAX_LEVEL];
1973 int need_check[BTRFS_MAX_LEVEL];
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977 struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979 unsigned int ext_ref);
1982 * Returns >0 Found error, not fatal, should continue
1983 * Returns <0 Fatal error, must exit the whole check
1984 * Returns 0 No errors found
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987 struct node_refs *nrefs, int *level, int ext_ref)
1989 struct extent_buffer *cur = path->nodes[0];
1990 struct btrfs_key key;
1994 int root_level = btrfs_header_level(root->node);
1996 int ret = 0; /* Final return value */
1997 int err = 0; /* Positive error bitmap */
1999 cur_bytenr = cur->start;
2001 /* skip to first inode item or the first inode number change */
2002 nritems = btrfs_header_nritems(cur);
2003 for (i = 0; i < nritems; i++) {
2004 btrfs_item_key_to_cpu(cur, &key, i);
2006 first_ino = key.objectid;
2007 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008 (first_ino && first_ino != key.objectid))
2012 path->slots[0] = nritems;
2018 err |= check_inode_item(root, path, ext_ref);
2020 /* modify cur since check_inode_item may change path */
2021 cur = path->nodes[0];
2023 if (err & LAST_ITEM)
2026 /* still have inode items in thie leaf */
2027 if (cur->start == cur_bytenr)
2031 * we have switched to another leaf, above nodes may
2032 * have changed, here walk down the path, if a node
2033 * or leaf is shared, check whether we can skip this
2036 for (i = root_level; i >= 0; i--) {
2037 if (path->nodes[i]->start == nrefs->bytenr[i])
2040 ret = update_nodes_refs(root,
2041 path->nodes[i]->start,
2046 if (!nrefs->need_check[i]) {
2052 for (i = 0; i < *level; i++) {
2053 free_extent_buffer(path->nodes[i]);
2054 path->nodes[i] = NULL;
2063 static void reada_walk_down(struct btrfs_root *root,
2064 struct extent_buffer *node, int slot)
2066 struct btrfs_fs_info *fs_info = root->fs_info;
2073 level = btrfs_header_level(node);
2077 nritems = btrfs_header_nritems(node);
2078 for (i = slot; i < nritems; i++) {
2079 bytenr = btrfs_node_blockptr(node, i);
2080 ptr_gen = btrfs_node_ptr_generation(node, i);
2081 readahead_tree_block(fs_info, bytenr, ptr_gen);
2086 * Check the child node/leaf by the following condition:
2087 * 1. the first item key of the node/leaf should be the same with the one
2089 * 2. block in parent node should match the child node/leaf.
2090 * 3. generation of parent node and child's header should be consistent.
2092 * Or the child node/leaf pointed by the key in parent is not valid.
2094 * We hope to check leaf owner too, but since subvol may share leaves,
2095 * which makes leaf owner check not so strong, key check should be
2096 * sufficient enough for that case.
2098 static int check_child_node(struct extent_buffer *parent, int slot,
2099 struct extent_buffer *child)
2101 struct btrfs_key parent_key;
2102 struct btrfs_key child_key;
2105 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2106 if (btrfs_header_level(child) == 0)
2107 btrfs_item_key_to_cpu(child, &child_key, 0);
2109 btrfs_node_key_to_cpu(child, &child_key, 0);
2111 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2115 parent_key.objectid, parent_key.type, parent_key.offset,
2116 child_key.objectid, child_key.type, child_key.offset);
2118 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2120 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2121 btrfs_node_blockptr(parent, slot),
2122 btrfs_header_bytenr(child));
2124 if (btrfs_node_ptr_generation(parent, slot) !=
2125 btrfs_header_generation(child)) {
2127 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2128 btrfs_header_generation(child),
2129 btrfs_node_ptr_generation(parent, slot));
2135 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2136 * in every fs or file tree check. Here we find its all root ids, and only check
2137 * it in the fs or file tree which has the smallest root id.
2139 static int need_check(struct btrfs_root *root, struct ulist *roots)
2141 struct rb_node *node;
2142 struct ulist_node *u;
2144 if (roots->nnodes == 1)
2147 node = rb_first(&roots->root);
2148 u = rb_entry(node, struct ulist_node, rb_node);
2150 * current root id is not smallest, we skip it and let it be checked
2151 * in the fs or file tree who hash the smallest root id.
2153 if (root->objectid != u->val)
2160 * for a tree node or leaf, we record its reference count, so later if we still
2161 * process this node or leaf, don't need to compute its reference count again.
2163 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2164 struct node_refs *nrefs, u64 level)
2168 struct ulist *roots;
2170 if (nrefs->bytenr[level] != bytenr) {
2171 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2172 level, 1, &refs, NULL);
2176 nrefs->bytenr[level] = bytenr;
2177 nrefs->refs[level] = refs;
2179 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2184 check = need_check(root, roots);
2186 nrefs->need_check[level] = check;
2188 nrefs->need_check[level] = 1;
2195 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2196 struct walk_control *wc, int *level,
2197 struct node_refs *nrefs)
2199 enum btrfs_tree_block_status status;
2202 struct btrfs_fs_info *fs_info = root->fs_info;
2203 struct extent_buffer *next;
2204 struct extent_buffer *cur;
2208 WARN_ON(*level < 0);
2209 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2211 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2212 refs = nrefs->refs[*level];
2215 ret = btrfs_lookup_extent_info(NULL, root,
2216 path->nodes[*level]->start,
2217 *level, 1, &refs, NULL);
2222 nrefs->bytenr[*level] = path->nodes[*level]->start;
2223 nrefs->refs[*level] = refs;
2227 ret = enter_shared_node(root, path->nodes[*level]->start,
2235 while (*level >= 0) {
2236 WARN_ON(*level < 0);
2237 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238 cur = path->nodes[*level];
2240 if (btrfs_header_level(cur) != *level)
2243 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246 ret = process_one_leaf(root, cur, wc);
2251 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2252 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2254 if (bytenr == nrefs->bytenr[*level - 1]) {
2255 refs = nrefs->refs[*level - 1];
2257 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2258 *level - 1, 1, &refs, NULL);
2262 nrefs->bytenr[*level - 1] = bytenr;
2263 nrefs->refs[*level - 1] = refs;
2268 ret = enter_shared_node(root, bytenr, refs,
2271 path->slots[*level]++;
2276 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2277 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2278 free_extent_buffer(next);
2279 reada_walk_down(root, cur, path->slots[*level]);
2280 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2281 if (!extent_buffer_uptodate(next)) {
2282 struct btrfs_key node_key;
2284 btrfs_node_key_to_cpu(path->nodes[*level],
2286 path->slots[*level]);
2287 btrfs_add_corrupt_extent_record(root->fs_info,
2289 path->nodes[*level]->start,
2290 root->fs_info->nodesize,
2297 ret = check_child_node(cur, path->slots[*level], next);
2299 free_extent_buffer(next);
2304 if (btrfs_is_leaf(next))
2305 status = btrfs_check_leaf(root, NULL, next);
2307 status = btrfs_check_node(root, NULL, next);
2308 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309 free_extent_buffer(next);
2314 *level = *level - 1;
2315 free_extent_buffer(path->nodes[*level]);
2316 path->nodes[*level] = next;
2317 path->slots[*level] = 0;
2320 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2324 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2325 unsigned int ext_ref);
2328 * Returns >0 Found error, should continue
2329 * Returns <0 Fatal error, must exit the whole check
2330 * Returns 0 No errors found
2332 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2333 int *level, struct node_refs *nrefs, int ext_ref)
2335 enum btrfs_tree_block_status status;
2338 struct btrfs_fs_info *fs_info = root->fs_info;
2339 struct extent_buffer *next;
2340 struct extent_buffer *cur;
2343 WARN_ON(*level < 0);
2344 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2346 ret = update_nodes_refs(root, path->nodes[*level]->start,
2351 while (*level >= 0) {
2352 WARN_ON(*level < 0);
2353 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2354 cur = path->nodes[*level];
2356 if (btrfs_header_level(cur) != *level)
2359 if (path->slots[*level] >= btrfs_header_nritems(cur))
2361 /* Don't forgot to check leaf/node validation */
2363 ret = btrfs_check_leaf(root, NULL, cur);
2364 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368 ret = process_one_leaf_v2(root, path, nrefs,
2370 cur = path->nodes[*level];
2373 ret = btrfs_check_node(root, NULL, cur);
2374 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2379 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2380 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2382 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385 if (!nrefs->need_check[*level - 1]) {
2386 path->slots[*level]++;
2390 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392 free_extent_buffer(next);
2393 reada_walk_down(root, cur, path->slots[*level]);
2394 next = read_tree_block(fs_info, bytenr, ptr_gen);
2395 if (!extent_buffer_uptodate(next)) {
2396 struct btrfs_key node_key;
2398 btrfs_node_key_to_cpu(path->nodes[*level],
2400 path->slots[*level]);
2401 btrfs_add_corrupt_extent_record(fs_info,
2403 path->nodes[*level]->start,
2411 ret = check_child_node(cur, path->slots[*level], next);
2415 if (btrfs_is_leaf(next))
2416 status = btrfs_check_leaf(root, NULL, next);
2418 status = btrfs_check_node(root, NULL, next);
2419 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2420 free_extent_buffer(next);
2425 *level = *level - 1;
2426 free_extent_buffer(path->nodes[*level]);
2427 path->nodes[*level] = next;
2428 path->slots[*level] = 0;
2433 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2434 struct walk_control *wc, int *level)
2437 struct extent_buffer *leaf;
2439 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2440 leaf = path->nodes[i];
2441 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2446 free_extent_buffer(path->nodes[*level]);
2447 path->nodes[*level] = NULL;
2448 BUG_ON(*level > wc->active_node);
2449 if (*level == wc->active_node)
2450 leave_shared_node(root, wc, *level);
2457 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2461 struct extent_buffer *leaf;
2463 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2464 leaf = path->nodes[i];
2465 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2470 free_extent_buffer(path->nodes[*level]);
2471 path->nodes[*level] = NULL;
2478 static int check_root_dir(struct inode_record *rec)
2480 struct inode_backref *backref;
2483 if (!rec->found_inode_item || rec->errors)
2485 if (rec->nlink != 1 || rec->found_link != 0)
2487 if (list_empty(&rec->backrefs))
2489 backref = to_inode_backref(rec->backrefs.next);
2490 if (!backref->found_inode_ref)
2492 if (backref->index != 0 || backref->namelen != 2 ||
2493 memcmp(backref->name, "..", 2))
2495 if (backref->found_dir_index || backref->found_dir_item)
2502 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2503 struct btrfs_root *root, struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct btrfs_inode_item *ei;
2507 struct btrfs_key key;
2510 key.objectid = rec->ino;
2511 key.type = BTRFS_INODE_ITEM_KEY;
2512 key.offset = (u64)-1;
2514 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2518 if (!path->slots[0]) {
2525 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2526 if (key.objectid != rec->ino) {
2531 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2532 struct btrfs_inode_item);
2533 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2534 btrfs_mark_buffer_dirty(path->nodes[0]);
2535 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2536 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2537 root->root_key.objectid);
2539 btrfs_release_path(path);
2543 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2544 struct btrfs_root *root,
2545 struct btrfs_path *path,
2546 struct inode_record *rec)
2550 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2551 btrfs_release_path(path);
2553 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2557 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2558 struct btrfs_root *root,
2559 struct btrfs_path *path,
2560 struct inode_record *rec)
2562 struct btrfs_inode_item *ei;
2563 struct btrfs_key key;
2566 key.objectid = rec->ino;
2567 key.type = BTRFS_INODE_ITEM_KEY;
2570 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2577 /* Since ret == 0, no need to check anything */
2578 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2579 struct btrfs_inode_item);
2580 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2581 btrfs_mark_buffer_dirty(path->nodes[0]);
2582 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2583 printf("reset nbytes for ino %llu root %llu\n",
2584 rec->ino, root->root_key.objectid);
2586 btrfs_release_path(path);
2590 static int add_missing_dir_index(struct btrfs_root *root,
2591 struct cache_tree *inode_cache,
2592 struct inode_record *rec,
2593 struct inode_backref *backref)
2595 struct btrfs_path path;
2596 struct btrfs_trans_handle *trans;
2597 struct btrfs_dir_item *dir_item;
2598 struct extent_buffer *leaf;
2599 struct btrfs_key key;
2600 struct btrfs_disk_key disk_key;
2601 struct inode_record *dir_rec;
2602 unsigned long name_ptr;
2603 u32 data_size = sizeof(*dir_item) + backref->namelen;
2606 trans = btrfs_start_transaction(root, 1);
2608 return PTR_ERR(trans);
2610 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2611 (unsigned long long)rec->ino);
2613 btrfs_init_path(&path);
2614 key.objectid = backref->dir;
2615 key.type = BTRFS_DIR_INDEX_KEY;
2616 key.offset = backref->index;
2617 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620 leaf = path.nodes[0];
2621 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2623 disk_key.objectid = cpu_to_le64(rec->ino);
2624 disk_key.type = BTRFS_INODE_ITEM_KEY;
2625 disk_key.offset = 0;
2627 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2628 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2629 btrfs_set_dir_data_len(leaf, dir_item, 0);
2630 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2631 name_ptr = (unsigned long)(dir_item + 1);
2632 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2633 btrfs_mark_buffer_dirty(leaf);
2634 btrfs_release_path(&path);
2635 btrfs_commit_transaction(trans, root);
2637 backref->found_dir_index = 1;
2638 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2639 BUG_ON(IS_ERR(dir_rec));
2642 dir_rec->found_size += backref->namelen;
2643 if (dir_rec->found_size == dir_rec->isize &&
2644 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2645 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2646 if (dir_rec->found_size != dir_rec->isize)
2647 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2652 static int delete_dir_index(struct btrfs_root *root,
2653 struct inode_backref *backref)
2655 struct btrfs_trans_handle *trans;
2656 struct btrfs_dir_item *di;
2657 struct btrfs_path path;
2660 trans = btrfs_start_transaction(root, 1);
2662 return PTR_ERR(trans);
2664 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2665 (unsigned long long)backref->dir,
2666 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2667 (unsigned long long)root->objectid);
2669 btrfs_init_path(&path);
2670 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2671 backref->name, backref->namelen,
2672 backref->index, -1);
2675 btrfs_release_path(&path);
2676 btrfs_commit_transaction(trans, root);
2683 ret = btrfs_del_item(trans, root, &path);
2685 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2687 btrfs_release_path(&path);
2688 btrfs_commit_transaction(trans, root);
2692 static int create_inode_item(struct btrfs_root *root,
2693 struct inode_record *rec,
2696 struct btrfs_trans_handle *trans;
2697 struct btrfs_inode_item inode_item;
2698 time_t now = time(NULL);
2701 trans = btrfs_start_transaction(root, 1);
2702 if (IS_ERR(trans)) {
2703 ret = PTR_ERR(trans);
2707 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2708 "be incomplete, please check permissions and content after "
2709 "the fsck completes.\n", (unsigned long long)root->objectid,
2710 (unsigned long long)rec->ino);
2712 memset(&inode_item, 0, sizeof(inode_item));
2713 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2715 btrfs_set_stack_inode_nlink(&inode_item, 1);
2717 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2718 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2719 if (rec->found_dir_item) {
2720 if (rec->found_file_extent)
2721 fprintf(stderr, "root %llu inode %llu has both a dir "
2722 "item and extents, unsure if it is a dir or a "
2723 "regular file so setting it as a directory\n",
2724 (unsigned long long)root->objectid,
2725 (unsigned long long)rec->ino);
2726 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2727 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2728 } else if (!rec->found_dir_item) {
2729 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2730 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2732 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2733 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2734 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2735 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2736 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2737 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2738 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2739 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2741 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2743 btrfs_commit_transaction(trans, root);
2747 static int repair_inode_backrefs(struct btrfs_root *root,
2748 struct inode_record *rec,
2749 struct cache_tree *inode_cache,
2752 struct inode_backref *tmp, *backref;
2753 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2757 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2758 if (!delete && rec->ino == root_dirid) {
2759 if (!rec->found_inode_item) {
2760 ret = create_inode_item(root, rec, 1);
2767 /* Index 0 for root dir's are special, don't mess with it */
2768 if (rec->ino == root_dirid && backref->index == 0)
2772 ((backref->found_dir_index && !backref->found_inode_ref) ||
2773 (backref->found_dir_index && backref->found_inode_ref &&
2774 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2775 ret = delete_dir_index(root, backref);
2779 list_del(&backref->list);
2784 if (!delete && !backref->found_dir_index &&
2785 backref->found_dir_item && backref->found_inode_ref) {
2786 ret = add_missing_dir_index(root, inode_cache, rec,
2791 if (backref->found_dir_item &&
2792 backref->found_dir_index) {
2793 if (!backref->errors &&
2794 backref->found_inode_ref) {
2795 list_del(&backref->list);
2802 if (!delete && (!backref->found_dir_index &&
2803 !backref->found_dir_item &&
2804 backref->found_inode_ref)) {
2805 struct btrfs_trans_handle *trans;
2806 struct btrfs_key location;
2808 ret = check_dir_conflict(root, backref->name,
2814 * let nlink fixing routine to handle it,
2815 * which can do it better.
2820 location.objectid = rec->ino;
2821 location.type = BTRFS_INODE_ITEM_KEY;
2822 location.offset = 0;
2824 trans = btrfs_start_transaction(root, 1);
2825 if (IS_ERR(trans)) {
2826 ret = PTR_ERR(trans);
2829 fprintf(stderr, "adding missing dir index/item pair "
2831 (unsigned long long)rec->ino);
2832 ret = btrfs_insert_dir_item(trans, root, backref->name,
2834 backref->dir, &location,
2835 imode_to_type(rec->imode),
2838 btrfs_commit_transaction(trans, root);
2842 if (!delete && (backref->found_inode_ref &&
2843 backref->found_dir_index &&
2844 backref->found_dir_item &&
2845 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2846 !rec->found_inode_item)) {
2847 ret = create_inode_item(root, rec, 0);
2854 return ret ? ret : repaired;
2858 * To determine the file type for nlink/inode_item repair
2860 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2861 * Return -ENOENT if file type is not found.
2863 static int find_file_type(struct inode_record *rec, u8 *type)
2865 struct inode_backref *backref;
2867 /* For inode item recovered case */
2868 if (rec->found_inode_item) {
2869 *type = imode_to_type(rec->imode);
2873 list_for_each_entry(backref, &rec->backrefs, list) {
2874 if (backref->found_dir_index || backref->found_dir_item) {
2875 *type = backref->filetype;
2883 * To determine the file name for nlink repair
2885 * Return 0 if file name is found, set name and namelen.
2886 * Return -ENOENT if file name is not found.
2888 static int find_file_name(struct inode_record *rec,
2889 char *name, int *namelen)
2891 struct inode_backref *backref;
2893 list_for_each_entry(backref, &rec->backrefs, list) {
2894 if (backref->found_dir_index || backref->found_dir_item ||
2895 backref->found_inode_ref) {
2896 memcpy(name, backref->name, backref->namelen);
2897 *namelen = backref->namelen;
2904 /* Reset the nlink of the inode to the correct one */
2905 static int reset_nlink(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct inode_backref *backref;
2911 struct inode_backref *tmp;
2912 struct btrfs_key key;
2913 struct btrfs_inode_item *inode_item;
2916 /* We don't believe this either, reset it and iterate backref */
2917 rec->found_link = 0;
2919 /* Remove all backref including the valid ones */
2920 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2921 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2922 backref->index, backref->name,
2923 backref->namelen, 0);
2927 /* remove invalid backref, so it won't be added back */
2928 if (!(backref->found_dir_index &&
2929 backref->found_dir_item &&
2930 backref->found_inode_ref)) {
2931 list_del(&backref->list);
2938 /* Set nlink to 0 */
2939 key.objectid = rec->ino;
2940 key.type = BTRFS_INODE_ITEM_KEY;
2942 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2949 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2950 struct btrfs_inode_item);
2951 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2952 btrfs_mark_buffer_dirty(path->nodes[0]);
2953 btrfs_release_path(path);
2956 * Add back valid inode_ref/dir_item/dir_index,
2957 * add_link() will handle the nlink inc, so new nlink must be correct
2959 list_for_each_entry(backref, &rec->backrefs, list) {
2960 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2961 backref->name, backref->namelen,
2962 backref->filetype, &backref->index, 1);
2967 btrfs_release_path(path);
2971 static int get_highest_inode(struct btrfs_trans_handle *trans,
2972 struct btrfs_root *root,
2973 struct btrfs_path *path,
2976 struct btrfs_key key, found_key;
2979 btrfs_init_path(path);
2980 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2982 key.type = BTRFS_INODE_ITEM_KEY;
2983 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2985 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2986 path->slots[0] - 1);
2987 *highest_ino = found_key.objectid;
2990 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2992 btrfs_release_path(path);
2996 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2997 struct btrfs_root *root,
2998 struct btrfs_path *path,
2999 struct inode_record *rec)
3001 char *dir_name = "lost+found";
3002 char namebuf[BTRFS_NAME_LEN] = {0};
3007 int name_recovered = 0;
3008 int type_recovered = 0;
3012 * Get file name and type first before these invalid inode ref
3013 * are deleted by remove_all_invalid_backref()
3015 name_recovered = !find_file_name(rec, namebuf, &namelen);
3016 type_recovered = !find_file_type(rec, &type);
3018 if (!name_recovered) {
3019 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3020 rec->ino, rec->ino);
3021 namelen = count_digits(rec->ino);
3022 sprintf(namebuf, "%llu", rec->ino);
3025 if (!type_recovered) {
3026 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3028 type = BTRFS_FT_REG_FILE;
3032 ret = reset_nlink(trans, root, path, rec);
3035 "Failed to reset nlink for inode %llu: %s\n",
3036 rec->ino, strerror(-ret));
3040 if (rec->found_link == 0) {
3041 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3045 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3046 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3049 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3050 dir_name, strerror(-ret));
3053 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3054 namebuf, namelen, type, NULL, 1);
3056 * Add ".INO" suffix several times to handle case where
3057 * "FILENAME.INO" is already taken by another file.
3059 while (ret == -EEXIST) {
3061 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3063 if (namelen + count_digits(rec->ino) + 1 >
3068 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3070 namelen += count_digits(rec->ino) + 1;
3071 ret = btrfs_add_link(trans, root, rec->ino,
3072 lost_found_ino, namebuf,
3073 namelen, type, NULL, 1);
3077 "Failed to link the inode %llu to %s dir: %s\n",
3078 rec->ino, dir_name, strerror(-ret));
3082 * Just increase the found_link, don't actually add the
3083 * backref. This will make things easier and this inode
3084 * record will be freed after the repair is done.
3085 * So fsck will not report problem about this inode.
3088 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3089 namelen, namebuf, dir_name);
3091 printf("Fixed the nlink of inode %llu\n", rec->ino);
3094 * Clear the flag anyway, or we will loop forever for the same inode
3095 * as it will not be removed from the bad inode list and the dead loop
3098 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3099 btrfs_release_path(path);
3104 * Check if there is any normal(reg or prealloc) file extent for given
3106 * This is used to determine the file type when neither its dir_index/item or
3107 * inode_item exists.
3109 * This will *NOT* report error, if any error happens, just consider it does
3110 * not have any normal file extent.
3112 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3114 struct btrfs_path path;
3115 struct btrfs_key key;
3116 struct btrfs_key found_key;
3117 struct btrfs_file_extent_item *fi;
3121 btrfs_init_path(&path);
3123 key.type = BTRFS_EXTENT_DATA_KEY;
3126 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3131 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3132 ret = btrfs_next_leaf(root, &path);
3139 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3141 if (found_key.objectid != ino ||
3142 found_key.type != BTRFS_EXTENT_DATA_KEY)
3144 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3145 struct btrfs_file_extent_item);
3146 type = btrfs_file_extent_type(path.nodes[0], fi);
3147 if (type != BTRFS_FILE_EXTENT_INLINE) {
3153 btrfs_release_path(&path);
3157 static u32 btrfs_type_to_imode(u8 type)
3159 static u32 imode_by_btrfs_type[] = {
3160 [BTRFS_FT_REG_FILE] = S_IFREG,
3161 [BTRFS_FT_DIR] = S_IFDIR,
3162 [BTRFS_FT_CHRDEV] = S_IFCHR,
3163 [BTRFS_FT_BLKDEV] = S_IFBLK,
3164 [BTRFS_FT_FIFO] = S_IFIFO,
3165 [BTRFS_FT_SOCK] = S_IFSOCK,
3166 [BTRFS_FT_SYMLINK] = S_IFLNK,
3169 return imode_by_btrfs_type[(type)];
3172 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct inode_record *rec)
3179 int type_recovered = 0;
3182 printf("Trying to rebuild inode:%llu\n", rec->ino);
3184 type_recovered = !find_file_type(rec, &filetype);
3187 * Try to determine inode type if type not found.
3189 * For found regular file extent, it must be FILE.
3190 * For found dir_item/index, it must be DIR.
3192 * For undetermined one, use FILE as fallback.
3195 * 1. If found backref(inode_index/item is already handled) to it,
3197 * Need new inode-inode ref structure to allow search for that.
3199 if (!type_recovered) {
3200 if (rec->found_file_extent &&
3201 find_normal_file_extent(root, rec->ino)) {
3203 filetype = BTRFS_FT_REG_FILE;
3204 } else if (rec->found_dir_item) {
3206 filetype = BTRFS_FT_DIR;
3207 } else if (!list_empty(&rec->orphan_extents)) {
3209 filetype = BTRFS_FT_REG_FILE;
3211 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3214 filetype = BTRFS_FT_REG_FILE;
3218 ret = btrfs_new_inode(trans, root, rec->ino,
3219 mode | btrfs_type_to_imode(filetype));
3224 * Here inode rebuild is done, we only rebuild the inode item,
3225 * don't repair the nlink(like move to lost+found).
3226 * That is the job of nlink repair.
3228 * We just fill the record and return
3230 rec->found_dir_item = 1;
3231 rec->imode = mode | btrfs_type_to_imode(filetype);
3233 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3234 /* Ensure the inode_nlinks repair function will be called */
3235 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3240 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3241 struct btrfs_root *root,
3242 struct btrfs_path *path,
3243 struct inode_record *rec)
3245 struct orphan_data_extent *orphan;
3246 struct orphan_data_extent *tmp;
3249 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3251 * Check for conflicting file extents
3253 * Here we don't know whether the extents is compressed or not,
3254 * so we can only assume it not compressed nor data offset,
3255 * and use its disk_len as extent length.
3257 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3258 orphan->offset, orphan->disk_len, 0);
3259 btrfs_release_path(path);
3264 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3265 orphan->disk_bytenr, orphan->disk_len);
3266 ret = btrfs_free_extent(trans,
3267 root->fs_info->extent_root,
3268 orphan->disk_bytenr, orphan->disk_len,
3269 0, root->objectid, orphan->objectid,
3274 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3275 orphan->offset, orphan->disk_bytenr,
3276 orphan->disk_len, orphan->disk_len);
3280 /* Update file size info */
3281 rec->found_size += orphan->disk_len;
3282 if (rec->found_size == rec->nbytes)
3283 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3285 /* Update the file extent hole info too */
3286 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3290 if (RB_EMPTY_ROOT(&rec->holes))
3291 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3293 list_del(&orphan->list);
3296 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3301 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3302 struct btrfs_root *root,
3303 struct btrfs_path *path,
3304 struct inode_record *rec)
3306 struct rb_node *node;
3307 struct file_extent_hole *hole;
3311 node = rb_first(&rec->holes);
3315 hole = rb_entry(node, struct file_extent_hole, node);
3316 ret = btrfs_punch_hole(trans, root, rec->ino,
3317 hole->start, hole->len);
3320 ret = del_file_extent_hole(&rec->holes, hole->start,
3324 if (RB_EMPTY_ROOT(&rec->holes))
3325 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3326 node = rb_first(&rec->holes);
3328 /* special case for a file losing all its file extent */
3330 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3331 round_up(rec->isize,
3332 root->fs_info->sectorsize));
3336 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3337 rec->ino, root->objectid);
3342 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3344 struct btrfs_trans_handle *trans;
3345 struct btrfs_path path;
3348 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3349 I_ERR_NO_ORPHAN_ITEM |
3350 I_ERR_LINK_COUNT_WRONG |
3351 I_ERR_NO_INODE_ITEM |
3352 I_ERR_FILE_EXTENT_ORPHAN |
3353 I_ERR_FILE_EXTENT_DISCOUNT|
3354 I_ERR_FILE_NBYTES_WRONG)))
3358 * For nlink repair, it may create a dir and add link, so
3359 * 2 for parent(256)'s dir_index and dir_item
3360 * 2 for lost+found dir's inode_item and inode_ref
3361 * 1 for the new inode_ref of the file
3362 * 2 for lost+found dir's dir_index and dir_item for the file
3364 trans = btrfs_start_transaction(root, 7);
3366 return PTR_ERR(trans);
3368 btrfs_init_path(&path);
3369 if (rec->errors & I_ERR_NO_INODE_ITEM)
3370 ret = repair_inode_no_item(trans, root, &path, rec);
3371 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3372 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3373 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3374 ret = repair_inode_discount_extent(trans, root, &path, rec);
3375 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3376 ret = repair_inode_isize(trans, root, &path, rec);
3377 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3378 ret = repair_inode_orphan_item(trans, root, &path, rec);
3379 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3380 ret = repair_inode_nlinks(trans, root, &path, rec);
3381 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3382 ret = repair_inode_nbytes(trans, root, &path, rec);
3383 btrfs_commit_transaction(trans, root);
3384 btrfs_release_path(&path);
3388 static int check_inode_recs(struct btrfs_root *root,
3389 struct cache_tree *inode_cache)
3391 struct cache_extent *cache;
3392 struct ptr_node *node;
3393 struct inode_record *rec;
3394 struct inode_backref *backref;
3399 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3401 if (btrfs_root_refs(&root->root_item) == 0) {
3402 if (!cache_tree_empty(inode_cache))
3403 fprintf(stderr, "warning line %d\n", __LINE__);
3408 * We need to repair backrefs first because we could change some of the
3409 * errors in the inode recs.
3411 * We also need to go through and delete invalid backrefs first and then
3412 * add the correct ones second. We do this because we may get EEXIST
3413 * when adding back the correct index because we hadn't yet deleted the
3416 * For example, if we were missing a dir index then the directories
3417 * isize would be wrong, so if we fixed the isize to what we thought it
3418 * would be and then fixed the backref we'd still have a invalid fs, so
3419 * we need to add back the dir index and then check to see if the isize
3424 if (stage == 3 && !err)
3427 cache = search_cache_extent(inode_cache, 0);
3428 while (repair && cache) {
3429 node = container_of(cache, struct ptr_node, cache);
3431 cache = next_cache_extent(cache);
3433 /* Need to free everything up and rescan */
3435 remove_cache_extent(inode_cache, &node->cache);
3437 free_inode_rec(rec);
3441 if (list_empty(&rec->backrefs))
3444 ret = repair_inode_backrefs(root, rec, inode_cache,
3458 rec = get_inode_rec(inode_cache, root_dirid, 0);
3459 BUG_ON(IS_ERR(rec));
3461 ret = check_root_dir(rec);
3463 fprintf(stderr, "root %llu root dir %llu error\n",
3464 (unsigned long long)root->root_key.objectid,
3465 (unsigned long long)root_dirid);
3466 print_inode_error(root, rec);
3471 struct btrfs_trans_handle *trans;
3473 trans = btrfs_start_transaction(root, 1);
3474 if (IS_ERR(trans)) {
3475 err = PTR_ERR(trans);
3480 "root %llu missing its root dir, recreating\n",
3481 (unsigned long long)root->objectid);
3483 ret = btrfs_make_root_dir(trans, root, root_dirid);
3486 btrfs_commit_transaction(trans, root);
3490 fprintf(stderr, "root %llu root dir %llu not found\n",
3491 (unsigned long long)root->root_key.objectid,
3492 (unsigned long long)root_dirid);
3496 cache = search_cache_extent(inode_cache, 0);
3499 node = container_of(cache, struct ptr_node, cache);
3501 remove_cache_extent(inode_cache, &node->cache);
3503 if (rec->ino == root_dirid ||
3504 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3505 free_inode_rec(rec);
3509 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3510 ret = check_orphan_item(root, rec->ino);
3512 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3513 if (can_free_inode_rec(rec)) {
3514 free_inode_rec(rec);
3519 if (!rec->found_inode_item)
3520 rec->errors |= I_ERR_NO_INODE_ITEM;
3521 if (rec->found_link != rec->nlink)
3522 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3524 ret = try_repair_inode(root, rec);
3525 if (ret == 0 && can_free_inode_rec(rec)) {
3526 free_inode_rec(rec);
3532 if (!(repair && ret == 0))
3534 print_inode_error(root, rec);
3535 list_for_each_entry(backref, &rec->backrefs, list) {
3536 if (!backref->found_dir_item)
3537 backref->errors |= REF_ERR_NO_DIR_ITEM;
3538 if (!backref->found_dir_index)
3539 backref->errors |= REF_ERR_NO_DIR_INDEX;
3540 if (!backref->found_inode_ref)
3541 backref->errors |= REF_ERR_NO_INODE_REF;
3542 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3543 " namelen %u name %s filetype %d errors %x",
3544 (unsigned long long)backref->dir,
3545 (unsigned long long)backref->index,
3546 backref->namelen, backref->name,
3547 backref->filetype, backref->errors);
3548 print_ref_error(backref->errors);
3550 free_inode_rec(rec);
3552 return (error > 0) ? -1 : 0;
3555 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3558 struct cache_extent *cache;
3559 struct root_record *rec = NULL;
3562 cache = lookup_cache_extent(root_cache, objectid, 1);
3564 rec = container_of(cache, struct root_record, cache);
3566 rec = calloc(1, sizeof(*rec));
3568 return ERR_PTR(-ENOMEM);
3569 rec->objectid = objectid;
3570 INIT_LIST_HEAD(&rec->backrefs);
3571 rec->cache.start = objectid;
3572 rec->cache.size = 1;
3574 ret = insert_cache_extent(root_cache, &rec->cache);
3576 return ERR_PTR(-EEXIST);
3581 static struct root_backref *get_root_backref(struct root_record *rec,
3582 u64 ref_root, u64 dir, u64 index,
3583 const char *name, int namelen)
3585 struct root_backref *backref;
3587 list_for_each_entry(backref, &rec->backrefs, list) {
3588 if (backref->ref_root != ref_root || backref->dir != dir ||
3589 backref->namelen != namelen)
3591 if (memcmp(name, backref->name, namelen))
3596 backref = calloc(1, sizeof(*backref) + namelen + 1);
3599 backref->ref_root = ref_root;
3601 backref->index = index;
3602 backref->namelen = namelen;
3603 memcpy(backref->name, name, namelen);
3604 backref->name[namelen] = '\0';
3605 list_add_tail(&backref->list, &rec->backrefs);
3609 static void free_root_record(struct cache_extent *cache)
3611 struct root_record *rec;
3612 struct root_backref *backref;
3614 rec = container_of(cache, struct root_record, cache);
3615 while (!list_empty(&rec->backrefs)) {
3616 backref = to_root_backref(rec->backrefs.next);
3617 list_del(&backref->list);
3624 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3626 static int add_root_backref(struct cache_tree *root_cache,
3627 u64 root_id, u64 ref_root, u64 dir, u64 index,
3628 const char *name, int namelen,
3629 int item_type, int errors)
3631 struct root_record *rec;
3632 struct root_backref *backref;
3634 rec = get_root_rec(root_cache, root_id);
3635 BUG_ON(IS_ERR(rec));
3636 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3639 backref->errors |= errors;
3641 if (item_type != BTRFS_DIR_ITEM_KEY) {
3642 if (backref->found_dir_index || backref->found_back_ref ||
3643 backref->found_forward_ref) {
3644 if (backref->index != index)
3645 backref->errors |= REF_ERR_INDEX_UNMATCH;
3647 backref->index = index;
3651 if (item_type == BTRFS_DIR_ITEM_KEY) {
3652 if (backref->found_forward_ref)
3654 backref->found_dir_item = 1;
3655 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3656 backref->found_dir_index = 1;
3657 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3658 if (backref->found_forward_ref)
3659 backref->errors |= REF_ERR_DUP_ROOT_REF;
3660 else if (backref->found_dir_item)
3662 backref->found_forward_ref = 1;
3663 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3664 if (backref->found_back_ref)
3665 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3666 backref->found_back_ref = 1;
3671 if (backref->found_forward_ref && backref->found_dir_item)
3672 backref->reachable = 1;
3676 static int merge_root_recs(struct btrfs_root *root,
3677 struct cache_tree *src_cache,
3678 struct cache_tree *dst_cache)
3680 struct cache_extent *cache;
3681 struct ptr_node *node;
3682 struct inode_record *rec;
3683 struct inode_backref *backref;
3686 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3687 free_inode_recs_tree(src_cache);
3692 cache = search_cache_extent(src_cache, 0);
3695 node = container_of(cache, struct ptr_node, cache);
3697 remove_cache_extent(src_cache, &node->cache);
3700 ret = is_child_root(root, root->objectid, rec->ino);
3706 list_for_each_entry(backref, &rec->backrefs, list) {
3707 BUG_ON(backref->found_inode_ref);
3708 if (backref->found_dir_item)
3709 add_root_backref(dst_cache, rec->ino,
3710 root->root_key.objectid, backref->dir,
3711 backref->index, backref->name,
3712 backref->namelen, BTRFS_DIR_ITEM_KEY,
3714 if (backref->found_dir_index)
3715 add_root_backref(dst_cache, rec->ino,
3716 root->root_key.objectid, backref->dir,
3717 backref->index, backref->name,
3718 backref->namelen, BTRFS_DIR_INDEX_KEY,
3722 free_inode_rec(rec);
3729 static int check_root_refs(struct btrfs_root *root,
3730 struct cache_tree *root_cache)
3732 struct root_record *rec;
3733 struct root_record *ref_root;
3734 struct root_backref *backref;
3735 struct cache_extent *cache;
3741 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3742 BUG_ON(IS_ERR(rec));
3745 /* fixme: this can not detect circular references */
3748 cache = search_cache_extent(root_cache, 0);
3752 rec = container_of(cache, struct root_record, cache);
3753 cache = next_cache_extent(cache);
3755 if (rec->found_ref == 0)
3758 list_for_each_entry(backref, &rec->backrefs, list) {
3759 if (!backref->reachable)
3762 ref_root = get_root_rec(root_cache,
3764 BUG_ON(IS_ERR(ref_root));
3765 if (ref_root->found_ref > 0)
3768 backref->reachable = 0;
3770 if (rec->found_ref == 0)
3776 cache = search_cache_extent(root_cache, 0);
3780 rec = container_of(cache, struct root_record, cache);
3781 cache = next_cache_extent(cache);
3783 if (rec->found_ref == 0 &&
3784 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3785 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3786 ret = check_orphan_item(root->fs_info->tree_root,
3792 * If we don't have a root item then we likely just have
3793 * a dir item in a snapshot for this root but no actual
3794 * ref key or anything so it's meaningless.
3796 if (!rec->found_root_item)
3799 fprintf(stderr, "fs tree %llu not referenced\n",
3800 (unsigned long long)rec->objectid);
3804 if (rec->found_ref > 0 && !rec->found_root_item)
3806 list_for_each_entry(backref, &rec->backrefs, list) {
3807 if (!backref->found_dir_item)
3808 backref->errors |= REF_ERR_NO_DIR_ITEM;
3809 if (!backref->found_dir_index)
3810 backref->errors |= REF_ERR_NO_DIR_INDEX;
3811 if (!backref->found_back_ref)
3812 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3813 if (!backref->found_forward_ref)
3814 backref->errors |= REF_ERR_NO_ROOT_REF;
3815 if (backref->reachable && backref->errors)
3822 fprintf(stderr, "fs tree %llu refs %u %s\n",
3823 (unsigned long long)rec->objectid, rec->found_ref,
3824 rec->found_root_item ? "" : "not found");
3826 list_for_each_entry(backref, &rec->backrefs, list) {
3827 if (!backref->reachable)
3829 if (!backref->errors && rec->found_root_item)
3831 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3832 " index %llu namelen %u name %s errors %x\n",
3833 (unsigned long long)backref->ref_root,
3834 (unsigned long long)backref->dir,
3835 (unsigned long long)backref->index,
3836 backref->namelen, backref->name,
3838 print_ref_error(backref->errors);
3841 return errors > 0 ? 1 : 0;
3844 static int process_root_ref(struct extent_buffer *eb, int slot,
3845 struct btrfs_key *key,
3846 struct cache_tree *root_cache)
3852 struct btrfs_root_ref *ref;
3853 char namebuf[BTRFS_NAME_LEN];
3856 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3858 dirid = btrfs_root_ref_dirid(eb, ref);
3859 index = btrfs_root_ref_sequence(eb, ref);
3860 name_len = btrfs_root_ref_name_len(eb, ref);
3862 if (name_len <= BTRFS_NAME_LEN) {
3866 len = BTRFS_NAME_LEN;
3867 error = REF_ERR_NAME_TOO_LONG;
3869 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3871 if (key->type == BTRFS_ROOT_REF_KEY) {
3872 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3873 index, namebuf, len, key->type, error);
3875 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3876 index, namebuf, len, key->type, error);
3881 static void free_corrupt_block(struct cache_extent *cache)
3883 struct btrfs_corrupt_block *corrupt;
3885 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3889 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3892 * Repair the btree of the given root.
3894 * The fix is to remove the node key in corrupt_blocks cache_tree.
3895 * and rebalance the tree.
3896 * After the fix, the btree should be writeable.
3898 static int repair_btree(struct btrfs_root *root,
3899 struct cache_tree *corrupt_blocks)
3901 struct btrfs_trans_handle *trans;
3902 struct btrfs_path path;
3903 struct btrfs_corrupt_block *corrupt;
3904 struct cache_extent *cache;
3905 struct btrfs_key key;
3910 if (cache_tree_empty(corrupt_blocks))
3913 trans = btrfs_start_transaction(root, 1);
3914 if (IS_ERR(trans)) {
3915 ret = PTR_ERR(trans);
3916 fprintf(stderr, "Error starting transaction: %s\n",
3920 btrfs_init_path(&path);
3921 cache = first_cache_extent(corrupt_blocks);
3923 corrupt = container_of(cache, struct btrfs_corrupt_block,
3925 level = corrupt->level;
3926 path.lowest_level = level;
3927 key.objectid = corrupt->key.objectid;
3928 key.type = corrupt->key.type;
3929 key.offset = corrupt->key.offset;
3932 * Here we don't want to do any tree balance, since it may
3933 * cause a balance with corrupted brother leaf/node,
3934 * so ins_len set to 0 here.
3935 * Balance will be done after all corrupt node/leaf is deleted.
3937 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3940 offset = btrfs_node_blockptr(path.nodes[level],
3943 /* Remove the ptr */
3944 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3948 * Remove the corresponding extent
3949 * return value is not concerned.
3951 btrfs_release_path(&path);
3952 ret = btrfs_free_extent(trans, root, offset,
3953 root->fs_info->nodesize, 0,
3954 root->root_key.objectid, level - 1, 0);
3955 cache = next_cache_extent(cache);
3958 /* Balance the btree using btrfs_search_slot() */
3959 cache = first_cache_extent(corrupt_blocks);
3961 corrupt = container_of(cache, struct btrfs_corrupt_block,
3963 memcpy(&key, &corrupt->key, sizeof(key));
3964 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3967 /* return will always >0 since it won't find the item */
3969 btrfs_release_path(&path);
3970 cache = next_cache_extent(cache);
3973 btrfs_commit_transaction(trans, root);
3974 btrfs_release_path(&path);
3978 static int check_fs_root(struct btrfs_root *root,
3979 struct cache_tree *root_cache,
3980 struct walk_control *wc)
3986 struct btrfs_path path;
3987 struct shared_node root_node;
3988 struct root_record *rec;
3989 struct btrfs_root_item *root_item = &root->root_item;
3990 struct cache_tree corrupt_blocks;
3991 struct orphan_data_extent *orphan;
3992 struct orphan_data_extent *tmp;
3993 enum btrfs_tree_block_status status;
3994 struct node_refs nrefs;
3997 * Reuse the corrupt_block cache tree to record corrupted tree block
3999 * Unlike the usage in extent tree check, here we do it in a per
4000 * fs/subvol tree base.
4002 cache_tree_init(&corrupt_blocks);
4003 root->fs_info->corrupt_blocks = &corrupt_blocks;
4005 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4006 rec = get_root_rec(root_cache, root->root_key.objectid);
4007 BUG_ON(IS_ERR(rec));
4008 if (btrfs_root_refs(root_item) > 0)
4009 rec->found_root_item = 1;
4012 btrfs_init_path(&path);
4013 memset(&root_node, 0, sizeof(root_node));
4014 cache_tree_init(&root_node.root_cache);
4015 cache_tree_init(&root_node.inode_cache);
4016 memset(&nrefs, 0, sizeof(nrefs));
4018 /* Move the orphan extent record to corresponding inode_record */
4019 list_for_each_entry_safe(orphan, tmp,
4020 &root->orphan_data_extents, list) {
4021 struct inode_record *inode;
4023 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4025 BUG_ON(IS_ERR(inode));
4026 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4027 list_move(&orphan->list, &inode->orphan_extents);
4030 level = btrfs_header_level(root->node);
4031 memset(wc->nodes, 0, sizeof(wc->nodes));
4032 wc->nodes[level] = &root_node;
4033 wc->active_node = level;
4034 wc->root_level = level;
4036 /* We may not have checked the root block, lets do that now */
4037 if (btrfs_is_leaf(root->node))
4038 status = btrfs_check_leaf(root, NULL, root->node);
4040 status = btrfs_check_node(root, NULL, root->node);
4041 if (status != BTRFS_TREE_BLOCK_CLEAN)
4044 if (btrfs_root_refs(root_item) > 0 ||
4045 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4046 path.nodes[level] = root->node;
4047 extent_buffer_get(root->node);
4048 path.slots[level] = 0;
4050 struct btrfs_key key;
4051 struct btrfs_disk_key found_key;
4053 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4054 level = root_item->drop_level;
4055 path.lowest_level = level;
4056 if (level > btrfs_header_level(root->node) ||
4057 level >= BTRFS_MAX_LEVEL) {
4058 error("ignoring invalid drop level: %u", level);
4061 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4064 btrfs_node_key(path.nodes[level], &found_key,
4066 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4067 sizeof(found_key)));
4071 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4077 wret = walk_up_tree(root, &path, wc, &level);
4084 btrfs_release_path(&path);
4086 if (!cache_tree_empty(&corrupt_blocks)) {
4087 struct cache_extent *cache;
4088 struct btrfs_corrupt_block *corrupt;
4090 printf("The following tree block(s) is corrupted in tree %llu:\n",
4091 root->root_key.objectid);
4092 cache = first_cache_extent(&corrupt_blocks);
4094 corrupt = container_of(cache,
4095 struct btrfs_corrupt_block,
4097 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4098 cache->start, corrupt->level,
4099 corrupt->key.objectid, corrupt->key.type,
4100 corrupt->key.offset);
4101 cache = next_cache_extent(cache);
4104 printf("Try to repair the btree for root %llu\n",
4105 root->root_key.objectid);
4106 ret = repair_btree(root, &corrupt_blocks);
4108 fprintf(stderr, "Failed to repair btree: %s\n",
4111 printf("Btree for root %llu is fixed\n",
4112 root->root_key.objectid);
4116 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4120 if (root_node.current) {
4121 root_node.current->checked = 1;
4122 maybe_free_inode_rec(&root_node.inode_cache,
4126 err = check_inode_recs(root, &root_node.inode_cache);
4130 free_corrupt_blocks_tree(&corrupt_blocks);
4131 root->fs_info->corrupt_blocks = NULL;
4132 free_orphan_data_extents(&root->orphan_data_extents);
4136 static int fs_root_objectid(u64 objectid)
4138 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4139 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4141 return is_fstree(objectid);
4144 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4145 struct cache_tree *root_cache)
4147 struct btrfs_path path;
4148 struct btrfs_key key;
4149 struct walk_control wc;
4150 struct extent_buffer *leaf, *tree_node;
4151 struct btrfs_root *tmp_root;
4152 struct btrfs_root *tree_root = fs_info->tree_root;
4156 if (ctx.progress_enabled) {
4157 ctx.tp = TASK_FS_ROOTS;
4158 task_start(ctx.info);
4162 * Just in case we made any changes to the extent tree that weren't
4163 * reflected into the free space cache yet.
4166 reset_cached_block_groups(fs_info);
4167 memset(&wc, 0, sizeof(wc));
4168 cache_tree_init(&wc.shared);
4169 btrfs_init_path(&path);
4174 key.type = BTRFS_ROOT_ITEM_KEY;
4175 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4180 tree_node = tree_root->node;
4182 if (tree_node != tree_root->node) {
4183 free_root_recs_tree(root_cache);
4184 btrfs_release_path(&path);
4187 leaf = path.nodes[0];
4188 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4189 ret = btrfs_next_leaf(tree_root, &path);
4195 leaf = path.nodes[0];
4197 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4198 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4199 fs_root_objectid(key.objectid)) {
4200 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4201 tmp_root = btrfs_read_fs_root_no_cache(
4204 key.offset = (u64)-1;
4205 tmp_root = btrfs_read_fs_root(
4208 if (IS_ERR(tmp_root)) {
4212 ret = check_fs_root(tmp_root, root_cache, &wc);
4213 if (ret == -EAGAIN) {
4214 free_root_recs_tree(root_cache);
4215 btrfs_release_path(&path);
4220 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4221 btrfs_free_fs_root(tmp_root);
4222 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4223 key.type == BTRFS_ROOT_BACKREF_KEY) {
4224 process_root_ref(leaf, path.slots[0], &key,
4231 btrfs_release_path(&path);
4233 free_extent_cache_tree(&wc.shared);
4234 if (!cache_tree_empty(&wc.shared))
4235 fprintf(stderr, "warning line %d\n", __LINE__);
4237 task_stop(ctx.info);
4243 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4244 * INODE_REF/INODE_EXTREF match.
4246 * @root: the root of the fs/file tree
4247 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4248 * @key: the key of the DIR_ITEM/DIR_INDEX
4249 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4250 * distinguish root_dir between normal dir/file
4251 * @name: the name in the INODE_REF/INODE_EXTREF
4252 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4253 * @mode: the st_mode of INODE_ITEM
4255 * Return 0 if no error occurred.
4256 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4257 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4259 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4260 * not match for normal dir/file.
4262 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4263 struct btrfs_key *key, u64 index, char *name,
4264 u32 namelen, u32 mode)
4266 struct btrfs_path path;
4267 struct extent_buffer *node;
4268 struct btrfs_dir_item *di;
4269 struct btrfs_key location;
4270 char namebuf[BTRFS_NAME_LEN] = {0};
4280 btrfs_init_path(&path);
4281 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4283 ret = DIR_ITEM_MISSING;
4287 /* Process root dir and goto out*/
4290 ret = ROOT_DIR_ERROR;
4292 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4294 ref_key->type == BTRFS_INODE_REF_KEY ?
4296 ref_key->objectid, ref_key->offset,
4297 key->type == BTRFS_DIR_ITEM_KEY ?
4298 "DIR_ITEM" : "DIR_INDEX");
4306 /* Process normal file/dir */
4308 ret = DIR_ITEM_MISSING;
4310 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4312 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4313 ref_key->objectid, ref_key->offset,
4314 key->type == BTRFS_DIR_ITEM_KEY ?
4315 "DIR_ITEM" : "DIR_INDEX",
4316 key->objectid, key->offset, namelen, name,
4317 imode_to_type(mode));
4321 /* Check whether inode_id/filetype/name match */
4322 node = path.nodes[0];
4323 slot = path.slots[0];
4324 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4325 total = btrfs_item_size_nr(node, slot);
4326 while (cur < total) {
4327 ret = DIR_ITEM_MISMATCH;
4328 name_len = btrfs_dir_name_len(node, di);
4329 data_len = btrfs_dir_data_len(node, di);
4331 btrfs_dir_item_key_to_cpu(node, di, &location);
4332 if (location.objectid != ref_key->objectid ||
4333 location.type != BTRFS_INODE_ITEM_KEY ||
4334 location.offset != 0)
4337 filetype = btrfs_dir_type(node, di);
4338 if (imode_to_type(mode) != filetype)
4341 if (cur + sizeof(*di) + name_len > total ||
4342 name_len > BTRFS_NAME_LEN) {
4343 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4345 key->type == BTRFS_DIR_ITEM_KEY ?
4346 "DIR_ITEM" : "DIR_INDEX",
4347 key->objectid, key->offset, name_len);
4349 if (cur + sizeof(*di) > total)
4351 len = min_t(u32, total - cur - sizeof(*di),
4357 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4358 if (len != namelen || strncmp(namebuf, name, len))
4364 len = sizeof(*di) + name_len + data_len;
4365 di = (struct btrfs_dir_item *)((char *)di + len);
4368 if (ret == DIR_ITEM_MISMATCH)
4370 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4372 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4373 ref_key->objectid, ref_key->offset,
4374 key->type == BTRFS_DIR_ITEM_KEY ?
4375 "DIR_ITEM" : "DIR_INDEX",
4376 key->objectid, key->offset, namelen, name,
4377 imode_to_type(mode));
4379 btrfs_release_path(&path);
4384 * Traverse the given INODE_REF and call find_dir_item() to find related
4385 * DIR_ITEM/DIR_INDEX.
4387 * @root: the root of the fs/file tree
4388 * @ref_key: the key of the INODE_REF
4389 * @refs: the count of INODE_REF
4390 * @mode: the st_mode of INODE_ITEM
4392 * Return 0 if no error occurred.
4394 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4395 struct extent_buffer *node, int slot, u64 *refs,
4398 struct btrfs_key key;
4399 struct btrfs_inode_ref *ref;
4400 char namebuf[BTRFS_NAME_LEN] = {0};
4408 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4409 total = btrfs_item_size_nr(node, slot);
4412 /* Update inode ref count */
4415 index = btrfs_inode_ref_index(node, ref);
4416 name_len = btrfs_inode_ref_name_len(node, ref);
4417 if (cur + sizeof(*ref) + name_len > total ||
4418 name_len > BTRFS_NAME_LEN) {
4419 warning("root %llu INODE_REF[%llu %llu] name too long",
4420 root->objectid, ref_key->objectid, ref_key->offset);
4422 if (total < cur + sizeof(*ref))
4424 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4429 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4431 /* Check root dir ref name */
4432 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4433 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4434 root->objectid, ref_key->objectid, ref_key->offset,
4436 err |= ROOT_DIR_ERROR;
4439 /* Find related DIR_INDEX */
4440 key.objectid = ref_key->offset;
4441 key.type = BTRFS_DIR_INDEX_KEY;
4443 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4446 /* Find related dir_item */
4447 key.objectid = ref_key->offset;
4448 key.type = BTRFS_DIR_ITEM_KEY;
4449 key.offset = btrfs_name_hash(namebuf, len);
4450 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4453 len = sizeof(*ref) + name_len;
4454 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4464 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4465 * DIR_ITEM/DIR_INDEX.
4467 * @root: the root of the fs/file tree
4468 * @ref_key: the key of the INODE_EXTREF
4469 * @refs: the count of INODE_EXTREF
4470 * @mode: the st_mode of INODE_ITEM
4472 * Return 0 if no error occurred.
4474 static int check_inode_extref(struct btrfs_root *root,
4475 struct btrfs_key *ref_key,
4476 struct extent_buffer *node, int slot, u64 *refs,
4479 struct btrfs_key key;
4480 struct btrfs_inode_extref *extref;
4481 char namebuf[BTRFS_NAME_LEN] = {0};
4491 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4492 total = btrfs_item_size_nr(node, slot);
4495 /* update inode ref count */
4497 name_len = btrfs_inode_extref_name_len(node, extref);
4498 index = btrfs_inode_extref_index(node, extref);
4499 parent = btrfs_inode_extref_parent(node, extref);
4500 if (name_len <= BTRFS_NAME_LEN) {
4503 len = BTRFS_NAME_LEN;
4504 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4505 root->objectid, ref_key->objectid, ref_key->offset);
4507 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4509 /* Check root dir ref name */
4510 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4511 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4512 root->objectid, ref_key->objectid, ref_key->offset,
4514 err |= ROOT_DIR_ERROR;
4517 /* find related dir_index */
4518 key.objectid = parent;
4519 key.type = BTRFS_DIR_INDEX_KEY;
4521 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4524 /* find related dir_item */
4525 key.objectid = parent;
4526 key.type = BTRFS_DIR_ITEM_KEY;
4527 key.offset = btrfs_name_hash(namebuf, len);
4528 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4531 len = sizeof(*extref) + name_len;
4532 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4542 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4543 * DIR_ITEM/DIR_INDEX match.
4545 * @root: the root of the fs/file tree
4546 * @key: the key of the INODE_REF/INODE_EXTREF
4547 * @name: the name in the INODE_REF/INODE_EXTREF
4548 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4549 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4551 * @ext_ref: the EXTENDED_IREF feature
4553 * Return 0 if no error occurred.
4554 * Return >0 for error bitmap
4556 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4557 char *name, int namelen, u64 index,
4558 unsigned int ext_ref)
4560 struct btrfs_path path;
4561 struct btrfs_inode_ref *ref;
4562 struct btrfs_inode_extref *extref;
4563 struct extent_buffer *node;
4564 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4575 btrfs_init_path(&path);
4576 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4578 ret = INODE_REF_MISSING;
4582 node = path.nodes[0];
4583 slot = path.slots[0];
4585 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4586 total = btrfs_item_size_nr(node, slot);
4588 /* Iterate all entry of INODE_REF */
4589 while (cur < total) {
4590 ret = INODE_REF_MISSING;
4592 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4593 ref_index = btrfs_inode_ref_index(node, ref);
4594 if (index != (u64)-1 && index != ref_index)
4597 if (cur + sizeof(*ref) + ref_namelen > total ||
4598 ref_namelen > BTRFS_NAME_LEN) {
4599 warning("root %llu INODE %s[%llu %llu] name too long",
4601 key->type == BTRFS_INODE_REF_KEY ?
4603 key->objectid, key->offset);
4605 if (cur + sizeof(*ref) > total)
4607 len = min_t(u32, total - cur - sizeof(*ref),
4613 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4616 if (len != namelen || strncmp(ref_namebuf, name, len))
4622 len = sizeof(*ref) + ref_namelen;
4623 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4628 /* Skip if not support EXTENDED_IREF feature */
4632 btrfs_release_path(&path);
4633 btrfs_init_path(&path);
4635 dir_id = key->offset;
4636 key->type = BTRFS_INODE_EXTREF_KEY;
4637 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4639 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4641 ret = INODE_REF_MISSING;
4645 node = path.nodes[0];
4646 slot = path.slots[0];
4648 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4650 total = btrfs_item_size_nr(node, slot);
4652 /* Iterate all entry of INODE_EXTREF */
4653 while (cur < total) {
4654 ret = INODE_REF_MISSING;
4656 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4657 ref_index = btrfs_inode_extref_index(node, extref);
4658 parent = btrfs_inode_extref_parent(node, extref);
4659 if (index != (u64)-1 && index != ref_index)
4662 if (parent != dir_id)
4665 if (ref_namelen <= BTRFS_NAME_LEN) {
4668 len = BTRFS_NAME_LEN;
4669 warning("root %llu INODE %s[%llu %llu] name too long",
4671 key->type == BTRFS_INODE_REF_KEY ?
4673 key->objectid, key->offset);
4675 read_extent_buffer(node, ref_namebuf,
4676 (unsigned long)(extref + 1), len);
4678 if (len != namelen || strncmp(ref_namebuf, name, len))
4685 len = sizeof(*extref) + ref_namelen;
4686 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4691 btrfs_release_path(&path);
4696 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4697 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4699 * @root: the root of the fs/file tree
4700 * @key: the key of the INODE_REF/INODE_EXTREF
4701 * @size: the st_size of the INODE_ITEM
4702 * @ext_ref: the EXTENDED_IREF feature
4704 * Return 0 if no error occurred.
4706 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4707 struct extent_buffer *node, int slot, u64 *size,
4708 unsigned int ext_ref)
4710 struct btrfs_dir_item *di;
4711 struct btrfs_inode_item *ii;
4712 struct btrfs_path path;
4713 struct btrfs_key location;
4714 char namebuf[BTRFS_NAME_LEN] = {0};
4727 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4728 * ignore index check.
4730 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4732 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4733 total = btrfs_item_size_nr(node, slot);
4735 while (cur < total) {
4736 data_len = btrfs_dir_data_len(node, di);
4738 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4739 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4740 "DIR_ITEM" : "DIR_INDEX",
4741 key->objectid, key->offset, data_len);
4743 name_len = btrfs_dir_name_len(node, di);
4744 if (cur + sizeof(*di) + name_len > total ||
4745 name_len > BTRFS_NAME_LEN) {
4746 warning("root %llu %s[%llu %llu] name too long",
4748 key->type == BTRFS_DIR_ITEM_KEY ?
4749 "DIR_ITEM" : "DIR_INDEX",
4750 key->objectid, key->offset);
4752 if (cur + sizeof(*di) > total)
4754 len = min_t(u32, total - cur - sizeof(*di),
4759 (*size) += name_len;
4761 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4762 filetype = btrfs_dir_type(node, di);
4764 if (key->type == BTRFS_DIR_ITEM_KEY &&
4765 key->offset != btrfs_name_hash(namebuf, len)) {
4767 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4768 root->objectid, key->objectid, key->offset,
4769 namebuf, len, filetype, key->offset,
4770 btrfs_name_hash(namebuf, len));
4773 btrfs_init_path(&path);
4774 btrfs_dir_item_key_to_cpu(node, di, &location);
4776 /* Ignore related ROOT_ITEM check */
4777 if (location.type == BTRFS_ROOT_ITEM_KEY)
4780 /* Check relative INODE_ITEM(existence/filetype) */
4781 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4783 err |= INODE_ITEM_MISSING;
4784 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4785 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4786 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4787 key->offset, location.objectid, name_len,
4792 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4793 struct btrfs_inode_item);
4794 mode = btrfs_inode_mode(path.nodes[0], ii);
4796 if (imode_to_type(mode) != filetype) {
4797 err |= INODE_ITEM_MISMATCH;
4798 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4799 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4800 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4801 key->offset, name_len, namebuf, filetype);
4804 /* Check relative INODE_REF/INODE_EXTREF */
4805 location.type = BTRFS_INODE_REF_KEY;
4806 location.offset = key->objectid;
4807 ret = find_inode_ref(root, &location, namebuf, len,
4810 if (ret & INODE_REF_MISSING)
4811 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4812 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4813 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4814 key->offset, name_len, namebuf, filetype);
4817 btrfs_release_path(&path);
4818 len = sizeof(*di) + name_len + data_len;
4819 di = (struct btrfs_dir_item *)((char *)di + len);
4822 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4823 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4824 root->objectid, key->objectid, key->offset);
4833 * Check file extent datasum/hole, update the size of the file extents,
4834 * check and update the last offset of the file extent.
4836 * @root: the root of fs/file tree.
4837 * @fkey: the key of the file extent.
4838 * @nodatasum: INODE_NODATASUM feature.
4839 * @size: the sum of all EXTENT_DATA items size for this inode.
4840 * @end: the offset of the last extent.
4842 * Return 0 if no error occurred.
4844 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4845 struct extent_buffer *node, int slot,
4846 unsigned int nodatasum, u64 *size, u64 *end)
4848 struct btrfs_file_extent_item *fi;
4851 u64 extent_num_bytes;
4853 u64 csum_found; /* In byte size, sectorsize aligned */
4854 u64 search_start; /* Logical range start we search for csum */
4855 u64 search_len; /* Logical range len we search for csum */
4856 unsigned int extent_type;
4857 unsigned int is_hole;
4862 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4864 /* Check inline extent */
4865 extent_type = btrfs_file_extent_type(node, fi);
4866 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4867 struct btrfs_item *e = btrfs_item_nr(slot);
4868 u32 item_inline_len;
4870 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4871 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4872 compressed = btrfs_file_extent_compression(node, fi);
4873 if (extent_num_bytes == 0) {
4875 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4876 root->objectid, fkey->objectid, fkey->offset);
4877 err |= FILE_EXTENT_ERROR;
4879 if (!compressed && extent_num_bytes != item_inline_len) {
4881 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4882 root->objectid, fkey->objectid, fkey->offset,
4883 extent_num_bytes, item_inline_len);
4884 err |= FILE_EXTENT_ERROR;
4886 *end += extent_num_bytes;
4887 *size += extent_num_bytes;
4891 /* Check extent type */
4892 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4893 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4894 err |= FILE_EXTENT_ERROR;
4895 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4896 root->objectid, fkey->objectid, fkey->offset);
4900 /* Check REG_EXTENT/PREALLOC_EXTENT */
4901 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4902 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4903 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4904 extent_offset = btrfs_file_extent_offset(node, fi);
4905 compressed = btrfs_file_extent_compression(node, fi);
4906 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4909 * Check EXTENT_DATA csum
4911 * For plain (uncompressed) extent, we should only check the range
4912 * we're referring to, as it's possible that part of prealloc extent
4913 * has been written, and has csum:
4915 * |<--- Original large preallocated extent A ---->|
4916 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4919 * For compressed extent, we should check the whole range.
4922 search_start = disk_bytenr + extent_offset;
4923 search_len = extent_num_bytes;
4925 search_start = disk_bytenr;
4926 search_len = disk_num_bytes;
4928 ret = count_csum_range(root, search_start, search_len, &csum_found);
4929 if (csum_found > 0 && nodatasum) {
4930 err |= ODD_CSUM_ITEM;
4931 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4932 root->objectid, fkey->objectid, fkey->offset);
4933 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4934 !is_hole && (ret < 0 || csum_found < search_len)) {
4935 err |= CSUM_ITEM_MISSING;
4936 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4937 root->objectid, fkey->objectid, fkey->offset,
4938 csum_found, search_len);
4939 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4940 err |= ODD_CSUM_ITEM;
4941 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4942 root->objectid, fkey->objectid, fkey->offset, csum_found);
4945 /* Check EXTENT_DATA hole */
4946 if (!no_holes && *end != fkey->offset) {
4947 err |= FILE_EXTENT_ERROR;
4948 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4949 root->objectid, fkey->objectid, fkey->offset);
4952 *end += extent_num_bytes;
4954 *size += extent_num_bytes;
4960 * Set inode item nbytes to @nbytes
4962 * Returns 0 on success
4963 * Returns != 0 on error
4965 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
4966 struct btrfs_path *path,
4967 u64 ino, u64 nbytes)
4969 struct btrfs_trans_handle *trans;
4970 struct btrfs_inode_item *ii;
4971 struct btrfs_key key;
4972 struct btrfs_key research_key;
4976 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
4979 key.type = BTRFS_INODE_ITEM_KEY;
4982 trans = btrfs_start_transaction(root, 1);
4983 if (IS_ERR(trans)) {
4984 ret = PTR_ERR(trans);
4989 btrfs_release_path(path);
4990 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4998 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
4999 struct btrfs_inode_item);
5000 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5001 btrfs_mark_buffer_dirty(path->nodes[0]);
5003 btrfs_commit_transaction(trans, root);
5006 error("failed to set nbytes in inode %llu root %llu",
5007 ino, root->root_key.objectid);
5009 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5010 root->root_key.objectid, nbytes);
5013 btrfs_release_path(path);
5014 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5021 * Set directory inode isize to @isize.
5023 * Returns 0 on success.
5024 * Returns != 0 on error.
5026 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5027 struct btrfs_path *path,
5030 struct btrfs_trans_handle *trans;
5031 struct btrfs_inode_item *ii;
5032 struct btrfs_key key;
5033 struct btrfs_key research_key;
5037 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5040 key.type = BTRFS_INODE_ITEM_KEY;
5043 trans = btrfs_start_transaction(root, 1);
5044 if (IS_ERR(trans)) {
5045 ret = PTR_ERR(trans);
5050 btrfs_release_path(path);
5051 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5059 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5060 struct btrfs_inode_item);
5061 btrfs_set_inode_size(path->nodes[0], ii, isize);
5062 btrfs_mark_buffer_dirty(path->nodes[0]);
5064 btrfs_commit_transaction(trans, root);
5067 error("failed to set isize in inode %llu root %llu",
5068 ino, root->root_key.objectid);
5070 printf("Set isize in inode %llu root %llu to %llu\n",
5071 ino, root->root_key.objectid, isize);
5073 btrfs_release_path(path);
5074 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5081 * Wrapper function for btrfs_add_orphan_item().
5083 * Returns 0 on success.
5084 * Returns != 0 on error.
5086 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5087 struct btrfs_path *path, u64 ino)
5089 struct btrfs_trans_handle *trans;
5090 struct btrfs_key research_key;
5094 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5096 trans = btrfs_start_transaction(root, 1);
5097 if (IS_ERR(trans)) {
5098 ret = PTR_ERR(trans);
5103 btrfs_release_path(path);
5104 ret = btrfs_add_orphan_item(trans, root, path, ino);
5106 btrfs_commit_transaction(trans, root);
5109 error("failed to add inode %llu as orphan item root %llu",
5110 ino, root->root_key.objectid);
5112 printf("Added inode %llu as orphan item root %llu\n",
5113 ino, root->root_key.objectid);
5115 btrfs_release_path(path);
5116 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5123 * Check INODE_ITEM and related ITEMs (the same inode number)
5124 * 1. check link count
5125 * 2. check inode ref/extref
5126 * 3. check dir item/index
5128 * @ext_ref: the EXTENDED_IREF feature
5130 * Return 0 if no error occurred.
5131 * Return >0 for error or hit the traversal is done(by error bitmap)
5133 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5134 unsigned int ext_ref)
5136 struct extent_buffer *node;
5137 struct btrfs_inode_item *ii;
5138 struct btrfs_key key;
5147 u64 extent_size = 0;
5149 unsigned int nodatasum;
5154 node = path->nodes[0];
5155 slot = path->slots[0];
5157 btrfs_item_key_to_cpu(node, &key, slot);
5158 inode_id = key.objectid;
5160 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5161 ret = btrfs_next_item(root, path);
5167 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5168 isize = btrfs_inode_size(node, ii);
5169 nbytes = btrfs_inode_nbytes(node, ii);
5170 mode = btrfs_inode_mode(node, ii);
5171 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5172 nlink = btrfs_inode_nlink(node, ii);
5173 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5176 ret = btrfs_next_item(root, path);
5178 /* out will fill 'err' rusing current statistics */
5180 } else if (ret > 0) {
5185 node = path->nodes[0];
5186 slot = path->slots[0];
5187 btrfs_item_key_to_cpu(node, &key, slot);
5188 if (key.objectid != inode_id)
5192 case BTRFS_INODE_REF_KEY:
5193 ret = check_inode_ref(root, &key, node, slot, &refs,
5197 case BTRFS_INODE_EXTREF_KEY:
5198 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5199 warning("root %llu EXTREF[%llu %llu] isn't supported",
5200 root->objectid, key.objectid,
5202 ret = check_inode_extref(root, &key, node, slot, &refs,
5206 case BTRFS_DIR_ITEM_KEY:
5207 case BTRFS_DIR_INDEX_KEY:
5209 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5210 root->objectid, inode_id,
5211 imode_to_type(mode), key.objectid,
5214 ret = check_dir_item(root, &key, node, slot, &size,
5218 case BTRFS_EXTENT_DATA_KEY:
5220 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5221 root->objectid, inode_id, key.objectid,
5224 ret = check_file_extent(root, &key, node, slot,
5225 nodatasum, &extent_size,
5229 case BTRFS_XATTR_ITEM_KEY:
5232 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5233 key.objectid, key.type, key.offset);
5238 /* verify INODE_ITEM nlink/isize/nbytes */
5241 err |= LINK_COUNT_ERROR;
5242 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5243 root->objectid, inode_id, nlink);
5247 * Just a warning, as dir inode nbytes is just an
5248 * instructive value.
5250 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5251 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5252 root->objectid, inode_id,
5253 root->fs_info->nodesize);
5256 if (isize != size) {
5258 ret = repair_dir_isize_lowmem(root, path,
5260 if (!repair || ret) {
5263 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5264 root->objectid, inode_id, isize, size);
5268 if (nlink != refs) {
5269 err |= LINK_COUNT_ERROR;
5270 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5271 root->objectid, inode_id, nlink, refs);
5272 } else if (!nlink) {
5274 ret = repair_inode_orphan_item_lowmem(root,
5276 if (!repair || ret) {
5278 error("root %llu INODE[%llu] is orphan item",
5279 root->objectid, inode_id);
5283 if (!nbytes && !no_holes && extent_end < isize) {
5284 err |= NBYTES_ERROR;
5285 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5286 root->objectid, inode_id, isize);
5289 if (nbytes != extent_size) {
5291 ret = repair_inode_nbytes_lowmem(root, path,
5292 inode_id, extent_size);
5293 if (!repair || ret) {
5294 err |= NBYTES_ERROR;
5296 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5297 root->objectid, inode_id, nbytes,
5306 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5308 struct btrfs_path path;
5309 struct btrfs_key key;
5313 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5314 key.type = BTRFS_INODE_ITEM_KEY;
5317 /* For root being dropped, we don't need to check first inode */
5318 if (btrfs_root_refs(&root->root_item) == 0 &&
5319 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5323 btrfs_init_path(&path);
5325 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5330 err |= INODE_ITEM_MISSING;
5331 error("first inode item of root %llu is missing",
5335 err |= check_inode_item(root, &path, ext_ref);
5340 btrfs_release_path(&path);
5344 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5345 u64 parent, u64 root)
5347 struct rb_node *node;
5348 struct tree_backref *back = NULL;
5349 struct tree_backref match = {
5356 match.parent = parent;
5357 match.node.full_backref = 1;
5362 node = rb_search(&rec->backref_tree, &match.node.node,
5363 (rb_compare_keys)compare_extent_backref, NULL);
5365 back = to_tree_backref(rb_node_to_extent_backref(node));
5370 static struct data_backref *find_data_backref(struct extent_record *rec,
5371 u64 parent, u64 root,
5372 u64 owner, u64 offset,
5374 u64 disk_bytenr, u64 bytes)
5376 struct rb_node *node;
5377 struct data_backref *back = NULL;
5378 struct data_backref match = {
5385 .found_ref = found_ref,
5386 .disk_bytenr = disk_bytenr,
5390 match.parent = parent;
5391 match.node.full_backref = 1;
5396 node = rb_search(&rec->backref_tree, &match.node.node,
5397 (rb_compare_keys)compare_extent_backref, NULL);
5399 back = to_data_backref(rb_node_to_extent_backref(node));
5404 * Iterate all item on the tree and call check_inode_item() to check.
5406 * @root: the root of the tree to be checked.
5407 * @ext_ref: the EXTENDED_IREF feature
5409 * Return 0 if no error found.
5410 * Return <0 for error.
5412 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5414 struct btrfs_path path;
5415 struct node_refs nrefs;
5416 struct btrfs_root_item *root_item = &root->root_item;
5422 * We need to manually check the first inode item(256)
5423 * As the following traversal function will only start from
5424 * the first inode item in the leaf, if inode item(256) is missing
5425 * we will just skip it forever.
5427 ret = check_fs_first_inode(root, ext_ref);
5431 memset(&nrefs, 0, sizeof(nrefs));
5432 level = btrfs_header_level(root->node);
5433 btrfs_init_path(&path);
5435 if (btrfs_root_refs(root_item) > 0 ||
5436 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5437 path.nodes[level] = root->node;
5438 path.slots[level] = 0;
5439 extent_buffer_get(root->node);
5441 struct btrfs_key key;
5443 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5444 level = root_item->drop_level;
5445 path.lowest_level = level;
5446 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5453 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5456 /* if ret is negative, walk shall stop */
5462 ret = walk_up_tree_v2(root, &path, &level);
5464 /* Normal exit, reset ret to err */
5471 btrfs_release_path(&path);
5476 * Find the relative ref for root_ref and root_backref.
5478 * @root: the root of the root tree.
5479 * @ref_key: the key of the root ref.
5481 * Return 0 if no error occurred.
5483 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5484 struct extent_buffer *node, int slot)
5486 struct btrfs_path path;
5487 struct btrfs_key key;
5488 struct btrfs_root_ref *ref;
5489 struct btrfs_root_ref *backref;
5490 char ref_name[BTRFS_NAME_LEN] = {0};
5491 char backref_name[BTRFS_NAME_LEN] = {0};
5497 u32 backref_namelen;
5502 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5503 ref_dirid = btrfs_root_ref_dirid(node, ref);
5504 ref_seq = btrfs_root_ref_sequence(node, ref);
5505 ref_namelen = btrfs_root_ref_name_len(node, ref);
5507 if (ref_namelen <= BTRFS_NAME_LEN) {
5510 len = BTRFS_NAME_LEN;
5511 warning("%s[%llu %llu] ref_name too long",
5512 ref_key->type == BTRFS_ROOT_REF_KEY ?
5513 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5516 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5518 /* Find relative root_ref */
5519 key.objectid = ref_key->offset;
5520 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5521 key.offset = ref_key->objectid;
5523 btrfs_init_path(&path);
5524 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5526 err |= ROOT_REF_MISSING;
5527 error("%s[%llu %llu] couldn't find relative ref",
5528 ref_key->type == BTRFS_ROOT_REF_KEY ?
5529 "ROOT_REF" : "ROOT_BACKREF",
5530 ref_key->objectid, ref_key->offset);
5534 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5535 struct btrfs_root_ref);
5536 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5537 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5538 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5540 if (backref_namelen <= BTRFS_NAME_LEN) {
5541 len = backref_namelen;
5543 len = BTRFS_NAME_LEN;
5544 warning("%s[%llu %llu] ref_name too long",
5545 key.type == BTRFS_ROOT_REF_KEY ?
5546 "ROOT_REF" : "ROOT_BACKREF",
5547 key.objectid, key.offset);
5549 read_extent_buffer(path.nodes[0], backref_name,
5550 (unsigned long)(backref + 1), len);
5552 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5553 ref_namelen != backref_namelen ||
5554 strncmp(ref_name, backref_name, len)) {
5555 err |= ROOT_REF_MISMATCH;
5556 error("%s[%llu %llu] mismatch relative ref",
5557 ref_key->type == BTRFS_ROOT_REF_KEY ?
5558 "ROOT_REF" : "ROOT_BACKREF",
5559 ref_key->objectid, ref_key->offset);
5562 btrfs_release_path(&path);
5567 * Check all fs/file tree in low_memory mode.
5569 * 1. for fs tree root item, call check_fs_root_v2()
5570 * 2. for fs tree root ref/backref, call check_root_ref()
5572 * Return 0 if no error occurred.
5574 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5576 struct btrfs_root *tree_root = fs_info->tree_root;
5577 struct btrfs_root *cur_root = NULL;
5578 struct btrfs_path path;
5579 struct btrfs_key key;
5580 struct extent_buffer *node;
5581 unsigned int ext_ref;
5586 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5588 btrfs_init_path(&path);
5589 key.objectid = BTRFS_FS_TREE_OBJECTID;
5591 key.type = BTRFS_ROOT_ITEM_KEY;
5593 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5597 } else if (ret > 0) {
5603 node = path.nodes[0];
5604 slot = path.slots[0];
5605 btrfs_item_key_to_cpu(node, &key, slot);
5606 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5608 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5609 fs_root_objectid(key.objectid)) {
5610 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5611 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5614 key.offset = (u64)-1;
5615 cur_root = btrfs_read_fs_root(fs_info, &key);
5618 if (IS_ERR(cur_root)) {
5619 error("Fail to read fs/subvol tree: %lld",
5625 ret = check_fs_root_v2(cur_root, ext_ref);
5628 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5629 btrfs_free_fs_root(cur_root);
5630 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5631 key.type == BTRFS_ROOT_BACKREF_KEY) {
5632 ret = check_root_ref(tree_root, &key, node, slot);
5636 ret = btrfs_next_item(tree_root, &path);
5646 btrfs_release_path(&path);
5650 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5651 struct cache_tree *root_cache)
5655 if (!ctx.progress_enabled)
5656 fprintf(stderr, "checking fs roots\n");
5657 if (check_mode == CHECK_MODE_LOWMEM)
5658 ret = check_fs_roots_v2(fs_info);
5660 ret = check_fs_roots(fs_info, root_cache);
5665 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5667 struct extent_backref *back, *tmp;
5668 struct tree_backref *tback;
5669 struct data_backref *dback;
5673 rbtree_postorder_for_each_entry_safe(back, tmp,
5674 &rec->backref_tree, node) {
5675 if (!back->found_extent_tree) {
5679 if (back->is_data) {
5680 dback = to_data_backref(back);
5681 fprintf(stderr, "Data backref %llu %s %llu"
5682 " owner %llu offset %llu num_refs %lu"
5683 " not found in extent tree\n",
5684 (unsigned long long)rec->start,
5685 back->full_backref ?
5687 back->full_backref ?
5688 (unsigned long long)dback->parent:
5689 (unsigned long long)dback->root,
5690 (unsigned long long)dback->owner,
5691 (unsigned long long)dback->offset,
5692 (unsigned long)dback->num_refs);
5694 tback = to_tree_backref(back);
5695 fprintf(stderr, "Tree backref %llu parent %llu"
5696 " root %llu not found in extent tree\n",
5697 (unsigned long long)rec->start,
5698 (unsigned long long)tback->parent,
5699 (unsigned long long)tback->root);
5702 if (!back->is_data && !back->found_ref) {
5706 tback = to_tree_backref(back);
5707 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5708 (unsigned long long)rec->start,
5709 back->full_backref ? "parent" : "root",
5710 back->full_backref ?
5711 (unsigned long long)tback->parent :
5712 (unsigned long long)tback->root, back);
5714 if (back->is_data) {
5715 dback = to_data_backref(back);
5716 if (dback->found_ref != dback->num_refs) {
5720 fprintf(stderr, "Incorrect local backref count"
5721 " on %llu %s %llu owner %llu"
5722 " offset %llu found %u wanted %u back %p\n",
5723 (unsigned long long)rec->start,
5724 back->full_backref ?
5726 back->full_backref ?
5727 (unsigned long long)dback->parent:
5728 (unsigned long long)dback->root,
5729 (unsigned long long)dback->owner,
5730 (unsigned long long)dback->offset,
5731 dback->found_ref, dback->num_refs, back);
5733 if (dback->disk_bytenr != rec->start) {
5737 fprintf(stderr, "Backref disk bytenr does not"
5738 " match extent record, bytenr=%llu, "
5739 "ref bytenr=%llu\n",
5740 (unsigned long long)rec->start,
5741 (unsigned long long)dback->disk_bytenr);
5744 if (dback->bytes != rec->nr) {
5748 fprintf(stderr, "Backref bytes do not match "
5749 "extent backref, bytenr=%llu, ref "
5750 "bytes=%llu, backref bytes=%llu\n",
5751 (unsigned long long)rec->start,
5752 (unsigned long long)rec->nr,
5753 (unsigned long long)dback->bytes);
5756 if (!back->is_data) {
5759 dback = to_data_backref(back);
5760 found += dback->found_ref;
5763 if (found != rec->refs) {
5767 fprintf(stderr, "Incorrect global backref count "
5768 "on %llu found %llu wanted %llu\n",
5769 (unsigned long long)rec->start,
5770 (unsigned long long)found,
5771 (unsigned long long)rec->refs);
5777 static void __free_one_backref(struct rb_node *node)
5779 struct extent_backref *back = rb_node_to_extent_backref(node);
5784 static void free_all_extent_backrefs(struct extent_record *rec)
5786 rb_free_nodes(&rec->backref_tree, __free_one_backref);
5789 static void free_extent_record_cache(struct cache_tree *extent_cache)
5791 struct cache_extent *cache;
5792 struct extent_record *rec;
5795 cache = first_cache_extent(extent_cache);
5798 rec = container_of(cache, struct extent_record, cache);
5799 remove_cache_extent(extent_cache, cache);
5800 free_all_extent_backrefs(rec);
5805 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5806 struct extent_record *rec)
5808 if (rec->content_checked && rec->owner_ref_checked &&
5809 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5810 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5811 !rec->bad_full_backref && !rec->crossing_stripes &&
5812 !rec->wrong_chunk_type) {
5813 remove_cache_extent(extent_cache, &rec->cache);
5814 free_all_extent_backrefs(rec);
5815 list_del_init(&rec->list);
5821 static int check_owner_ref(struct btrfs_root *root,
5822 struct extent_record *rec,
5823 struct extent_buffer *buf)
5825 struct extent_backref *node, *tmp;
5826 struct tree_backref *back;
5827 struct btrfs_root *ref_root;
5828 struct btrfs_key key;
5829 struct btrfs_path path;
5830 struct extent_buffer *parent;
5835 rbtree_postorder_for_each_entry_safe(node, tmp,
5836 &rec->backref_tree, node) {
5839 if (!node->found_ref)
5841 if (node->full_backref)
5843 back = to_tree_backref(node);
5844 if (btrfs_header_owner(buf) == back->root)
5847 BUG_ON(rec->is_root);
5849 /* try to find the block by search corresponding fs tree */
5850 key.objectid = btrfs_header_owner(buf);
5851 key.type = BTRFS_ROOT_ITEM_KEY;
5852 key.offset = (u64)-1;
5854 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5855 if (IS_ERR(ref_root))
5858 level = btrfs_header_level(buf);
5860 btrfs_item_key_to_cpu(buf, &key, 0);
5862 btrfs_node_key_to_cpu(buf, &key, 0);
5864 btrfs_init_path(&path);
5865 path.lowest_level = level + 1;
5866 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5870 parent = path.nodes[level + 1];
5871 if (parent && buf->start == btrfs_node_blockptr(parent,
5872 path.slots[level + 1]))
5875 btrfs_release_path(&path);
5876 return found ? 0 : 1;
5879 static int is_extent_tree_record(struct extent_record *rec)
5881 struct extent_backref *node, *tmp;
5882 struct tree_backref *back;
5885 rbtree_postorder_for_each_entry_safe(node, tmp,
5886 &rec->backref_tree, node) {
5889 back = to_tree_backref(node);
5890 if (node->full_backref)
5892 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5899 static int record_bad_block_io(struct btrfs_fs_info *info,
5900 struct cache_tree *extent_cache,
5903 struct extent_record *rec;
5904 struct cache_extent *cache;
5905 struct btrfs_key key;
5907 cache = lookup_cache_extent(extent_cache, start, len);
5911 rec = container_of(cache, struct extent_record, cache);
5912 if (!is_extent_tree_record(rec))
5915 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5916 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5919 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5920 struct extent_buffer *buf, int slot)
5922 if (btrfs_header_level(buf)) {
5923 struct btrfs_key_ptr ptr1, ptr2;
5925 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5926 sizeof(struct btrfs_key_ptr));
5927 read_extent_buffer(buf, &ptr2,
5928 btrfs_node_key_ptr_offset(slot + 1),
5929 sizeof(struct btrfs_key_ptr));
5930 write_extent_buffer(buf, &ptr1,
5931 btrfs_node_key_ptr_offset(slot + 1),
5932 sizeof(struct btrfs_key_ptr));
5933 write_extent_buffer(buf, &ptr2,
5934 btrfs_node_key_ptr_offset(slot),
5935 sizeof(struct btrfs_key_ptr));
5937 struct btrfs_disk_key key;
5938 btrfs_node_key(buf, &key, 0);
5939 btrfs_fixup_low_keys(root, path, &key,
5940 btrfs_header_level(buf) + 1);
5943 struct btrfs_item *item1, *item2;
5944 struct btrfs_key k1, k2;
5945 char *item1_data, *item2_data;
5946 u32 item1_offset, item2_offset, item1_size, item2_size;
5948 item1 = btrfs_item_nr(slot);
5949 item2 = btrfs_item_nr(slot + 1);
5950 btrfs_item_key_to_cpu(buf, &k1, slot);
5951 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5952 item1_offset = btrfs_item_offset(buf, item1);
5953 item2_offset = btrfs_item_offset(buf, item2);
5954 item1_size = btrfs_item_size(buf, item1);
5955 item2_size = btrfs_item_size(buf, item2);
5957 item1_data = malloc(item1_size);
5960 item2_data = malloc(item2_size);
5966 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5967 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5969 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5970 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5974 btrfs_set_item_offset(buf, item1, item2_offset);
5975 btrfs_set_item_offset(buf, item2, item1_offset);
5976 btrfs_set_item_size(buf, item1, item2_size);
5977 btrfs_set_item_size(buf, item2, item1_size);
5979 path->slots[0] = slot;
5980 btrfs_set_item_key_unsafe(root, path, &k2);
5981 path->slots[0] = slot + 1;
5982 btrfs_set_item_key_unsafe(root, path, &k1);
5987 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5989 struct extent_buffer *buf;
5990 struct btrfs_key k1, k2;
5992 int level = path->lowest_level;
5995 buf = path->nodes[level];
5996 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5998 btrfs_node_key_to_cpu(buf, &k1, i);
5999 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6001 btrfs_item_key_to_cpu(buf, &k1, i);
6002 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6004 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6006 ret = swap_values(root, path, buf, i);
6009 btrfs_mark_buffer_dirty(buf);
6015 static int delete_bogus_item(struct btrfs_root *root,
6016 struct btrfs_path *path,
6017 struct extent_buffer *buf, int slot)
6019 struct btrfs_key key;
6020 int nritems = btrfs_header_nritems(buf);
6022 btrfs_item_key_to_cpu(buf, &key, slot);
6024 /* These are all the keys we can deal with missing. */
6025 if (key.type != BTRFS_DIR_INDEX_KEY &&
6026 key.type != BTRFS_EXTENT_ITEM_KEY &&
6027 key.type != BTRFS_METADATA_ITEM_KEY &&
6028 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6029 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6032 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6033 (unsigned long long)key.objectid, key.type,
6034 (unsigned long long)key.offset, slot, buf->start);
6035 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6036 btrfs_item_nr_offset(slot + 1),
6037 sizeof(struct btrfs_item) *
6038 (nritems - slot - 1));
6039 btrfs_set_header_nritems(buf, nritems - 1);
6041 struct btrfs_disk_key disk_key;
6043 btrfs_item_key(buf, &disk_key, 0);
6044 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6046 btrfs_mark_buffer_dirty(buf);
6050 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6052 struct extent_buffer *buf;
6056 /* We should only get this for leaves */
6057 BUG_ON(path->lowest_level);
6058 buf = path->nodes[0];
6060 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6061 unsigned int shift = 0, offset;
6063 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6064 BTRFS_LEAF_DATA_SIZE(root)) {
6065 if (btrfs_item_end_nr(buf, i) >
6066 BTRFS_LEAF_DATA_SIZE(root)) {
6067 ret = delete_bogus_item(root, path, buf, i);
6070 fprintf(stderr, "item is off the end of the "
6071 "leaf, can't fix\n");
6075 shift = BTRFS_LEAF_DATA_SIZE(root) -
6076 btrfs_item_end_nr(buf, i);
6077 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6078 btrfs_item_offset_nr(buf, i - 1)) {
6079 if (btrfs_item_end_nr(buf, i) >
6080 btrfs_item_offset_nr(buf, i - 1)) {
6081 ret = delete_bogus_item(root, path, buf, i);
6084 fprintf(stderr, "items overlap, can't fix\n");
6088 shift = btrfs_item_offset_nr(buf, i - 1) -
6089 btrfs_item_end_nr(buf, i);
6094 printf("Shifting item nr %d by %u bytes in block %llu\n",
6095 i, shift, (unsigned long long)buf->start);
6096 offset = btrfs_item_offset_nr(buf, i);
6097 memmove_extent_buffer(buf,
6098 btrfs_leaf_data(buf) + offset + shift,
6099 btrfs_leaf_data(buf) + offset,
6100 btrfs_item_size_nr(buf, i));
6101 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6103 btrfs_mark_buffer_dirty(buf);
6107 * We may have moved things, in which case we want to exit so we don't
6108 * write those changes out. Once we have proper abort functionality in
6109 * progs this can be changed to something nicer.
6116 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6117 * then just return -EIO.
6119 static int try_to_fix_bad_block(struct btrfs_root *root,
6120 struct extent_buffer *buf,
6121 enum btrfs_tree_block_status status)
6123 struct btrfs_trans_handle *trans;
6124 struct ulist *roots;
6125 struct ulist_node *node;
6126 struct btrfs_root *search_root;
6127 struct btrfs_path path;
6128 struct ulist_iterator iter;
6129 struct btrfs_key root_key, key;
6132 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6133 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6136 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6140 btrfs_init_path(&path);
6141 ULIST_ITER_INIT(&iter);
6142 while ((node = ulist_next(roots, &iter))) {
6143 root_key.objectid = node->val;
6144 root_key.type = BTRFS_ROOT_ITEM_KEY;
6145 root_key.offset = (u64)-1;
6147 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6154 trans = btrfs_start_transaction(search_root, 0);
6155 if (IS_ERR(trans)) {
6156 ret = PTR_ERR(trans);
6160 path.lowest_level = btrfs_header_level(buf);
6161 path.skip_check_block = 1;
6162 if (path.lowest_level)
6163 btrfs_node_key_to_cpu(buf, &key, 0);
6165 btrfs_item_key_to_cpu(buf, &key, 0);
6166 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6169 btrfs_commit_transaction(trans, search_root);
6172 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6173 ret = fix_key_order(search_root, &path);
6174 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6175 ret = fix_item_offset(search_root, &path);
6177 btrfs_commit_transaction(trans, search_root);
6180 btrfs_release_path(&path);
6181 btrfs_commit_transaction(trans, search_root);
6184 btrfs_release_path(&path);
6188 static int check_block(struct btrfs_root *root,
6189 struct cache_tree *extent_cache,
6190 struct extent_buffer *buf, u64 flags)
6192 struct extent_record *rec;
6193 struct cache_extent *cache;
6194 struct btrfs_key key;
6195 enum btrfs_tree_block_status status;
6199 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6202 rec = container_of(cache, struct extent_record, cache);
6203 rec->generation = btrfs_header_generation(buf);
6205 level = btrfs_header_level(buf);
6206 if (btrfs_header_nritems(buf) > 0) {
6209 btrfs_item_key_to_cpu(buf, &key, 0);
6211 btrfs_node_key_to_cpu(buf, &key, 0);
6213 rec->info_objectid = key.objectid;
6215 rec->info_level = level;
6217 if (btrfs_is_leaf(buf))
6218 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6220 status = btrfs_check_node(root, &rec->parent_key, buf);
6222 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6224 status = try_to_fix_bad_block(root, buf, status);
6225 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6227 fprintf(stderr, "bad block %llu\n",
6228 (unsigned long long)buf->start);
6231 * Signal to callers we need to start the scan over
6232 * again since we'll have cowed blocks.
6237 rec->content_checked = 1;
6238 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6239 rec->owner_ref_checked = 1;
6241 ret = check_owner_ref(root, rec, buf);
6243 rec->owner_ref_checked = 1;
6247 maybe_free_extent_rec(extent_cache, rec);
6252 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6253 u64 parent, u64 root)
6255 struct list_head *cur = rec->backrefs.next;
6256 struct extent_backref *node;
6257 struct tree_backref *back;
6259 while(cur != &rec->backrefs) {
6260 node = to_extent_backref(cur);
6264 back = to_tree_backref(node);
6266 if (!node->full_backref)
6268 if (parent == back->parent)
6271 if (node->full_backref)
6273 if (back->root == root)
6281 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6282 u64 parent, u64 root)
6284 struct tree_backref *ref = malloc(sizeof(*ref));
6288 memset(&ref->node, 0, sizeof(ref->node));
6290 ref->parent = parent;
6291 ref->node.full_backref = 1;
6294 ref->node.full_backref = 0;
6301 static struct data_backref *find_data_backref(struct extent_record *rec,
6302 u64 parent, u64 root,
6303 u64 owner, u64 offset,
6305 u64 disk_bytenr, u64 bytes)
6307 struct list_head *cur = rec->backrefs.next;
6308 struct extent_backref *node;
6309 struct data_backref *back;
6311 while(cur != &rec->backrefs) {
6312 node = to_extent_backref(cur);
6316 back = to_data_backref(node);
6318 if (!node->full_backref)
6320 if (parent == back->parent)
6323 if (node->full_backref)
6325 if (back->root == root && back->owner == owner &&
6326 back->offset == offset) {
6327 if (found_ref && node->found_ref &&
6328 (back->bytes != bytes ||
6329 back->disk_bytenr != disk_bytenr))
6339 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6340 u64 parent, u64 root,
6341 u64 owner, u64 offset,
6344 struct data_backref *ref = malloc(sizeof(*ref));
6348 memset(&ref->node, 0, sizeof(ref->node));
6349 ref->node.is_data = 1;
6352 ref->parent = parent;
6355 ref->node.full_backref = 1;
6359 ref->offset = offset;
6360 ref->node.full_backref = 0;
6362 ref->bytes = max_size;
6365 if (max_size > rec->max_size)
6366 rec->max_size = max_size;
6370 /* Check if the type of extent matches with its chunk */
6371 static void check_extent_type(struct extent_record *rec)
6373 struct btrfs_block_group_cache *bg_cache;
6375 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6379 /* data extent, check chunk directly*/
6380 if (!rec->metadata) {
6381 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6382 rec->wrong_chunk_type = 1;
6386 /* metadata extent, check the obvious case first */
6387 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6388 BTRFS_BLOCK_GROUP_METADATA))) {
6389 rec->wrong_chunk_type = 1;
6394 * Check SYSTEM extent, as it's also marked as metadata, we can only
6395 * make sure it's a SYSTEM extent by its backref
6397 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6398 struct extent_backref *node;
6399 struct tree_backref *tback;
6402 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6403 if (node->is_data) {
6404 /* tree block shouldn't have data backref */
6405 rec->wrong_chunk_type = 1;
6408 tback = container_of(node, struct tree_backref, node);
6410 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6411 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6413 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6414 if (!(bg_cache->flags & bg_type))
6415 rec->wrong_chunk_type = 1;
6420 * Allocate a new extent record, fill default values from @tmpl and insert int
6421 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6422 * the cache, otherwise it fails.
6424 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6425 struct extent_record *tmpl)
6427 struct extent_record *rec;
6430 BUG_ON(tmpl->max_size == 0);
6431 rec = malloc(sizeof(*rec));
6434 rec->start = tmpl->start;
6435 rec->max_size = tmpl->max_size;
6436 rec->nr = max(tmpl->nr, tmpl->max_size);
6437 rec->found_rec = tmpl->found_rec;
6438 rec->content_checked = tmpl->content_checked;
6439 rec->owner_ref_checked = tmpl->owner_ref_checked;
6440 rec->num_duplicates = 0;
6441 rec->metadata = tmpl->metadata;
6442 rec->flag_block_full_backref = FLAG_UNSET;
6443 rec->bad_full_backref = 0;
6444 rec->crossing_stripes = 0;
6445 rec->wrong_chunk_type = 0;
6446 rec->is_root = tmpl->is_root;
6447 rec->refs = tmpl->refs;
6448 rec->extent_item_refs = tmpl->extent_item_refs;
6449 rec->parent_generation = tmpl->parent_generation;
6450 INIT_LIST_HEAD(&rec->backrefs);
6451 INIT_LIST_HEAD(&rec->dups);
6452 INIT_LIST_HEAD(&rec->list);
6453 rec->backref_tree = RB_ROOT;
6454 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6455 rec->cache.start = tmpl->start;
6456 rec->cache.size = tmpl->nr;
6457 ret = insert_cache_extent(extent_cache, &rec->cache);
6462 bytes_used += rec->nr;
6465 rec->crossing_stripes = check_crossing_stripes(global_info,
6466 rec->start, global_info->nodesize);
6467 check_extent_type(rec);
6472 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6474 * - refs - if found, increase refs
6475 * - is_root - if found, set
6476 * - content_checked - if found, set
6477 * - owner_ref_checked - if found, set
6479 * If not found, create a new one, initialize and insert.
6481 static int add_extent_rec(struct cache_tree *extent_cache,
6482 struct extent_record *tmpl)
6484 struct extent_record *rec;
6485 struct cache_extent *cache;
6489 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6491 rec = container_of(cache, struct extent_record, cache);
6495 rec->nr = max(tmpl->nr, tmpl->max_size);
6498 * We need to make sure to reset nr to whatever the extent
6499 * record says was the real size, this way we can compare it to
6502 if (tmpl->found_rec) {
6503 if (tmpl->start != rec->start || rec->found_rec) {
6504 struct extent_record *tmp;
6507 if (list_empty(&rec->list))
6508 list_add_tail(&rec->list,
6509 &duplicate_extents);
6512 * We have to do this song and dance in case we
6513 * find an extent record that falls inside of
6514 * our current extent record but does not have
6515 * the same objectid.
6517 tmp = malloc(sizeof(*tmp));
6520 tmp->start = tmpl->start;
6521 tmp->max_size = tmpl->max_size;
6524 tmp->metadata = tmpl->metadata;
6525 tmp->extent_item_refs = tmpl->extent_item_refs;
6526 INIT_LIST_HEAD(&tmp->list);
6527 list_add_tail(&tmp->list, &rec->dups);
6528 rec->num_duplicates++;
6535 if (tmpl->extent_item_refs && !dup) {
6536 if (rec->extent_item_refs) {
6537 fprintf(stderr, "block %llu rec "
6538 "extent_item_refs %llu, passed %llu\n",
6539 (unsigned long long)tmpl->start,
6540 (unsigned long long)
6541 rec->extent_item_refs,
6542 (unsigned long long)tmpl->extent_item_refs);
6544 rec->extent_item_refs = tmpl->extent_item_refs;
6548 if (tmpl->content_checked)
6549 rec->content_checked = 1;
6550 if (tmpl->owner_ref_checked)
6551 rec->owner_ref_checked = 1;
6552 memcpy(&rec->parent_key, &tmpl->parent_key,
6553 sizeof(tmpl->parent_key));
6554 if (tmpl->parent_generation)
6555 rec->parent_generation = tmpl->parent_generation;
6556 if (rec->max_size < tmpl->max_size)
6557 rec->max_size = tmpl->max_size;
6560 * A metadata extent can't cross stripe_len boundary, otherwise
6561 * kernel scrub won't be able to handle it.
6562 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6566 rec->crossing_stripes = check_crossing_stripes(
6567 global_info, rec->start,
6568 global_info->nodesize);
6569 check_extent_type(rec);
6570 maybe_free_extent_rec(extent_cache, rec);
6574 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6579 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6580 u64 parent, u64 root, int found_ref)
6582 struct extent_record *rec;
6583 struct tree_backref *back;
6584 struct cache_extent *cache;
6586 bool insert = false;
6588 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6590 struct extent_record tmpl;
6592 memset(&tmpl, 0, sizeof(tmpl));
6593 tmpl.start = bytenr;
6598 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6602 /* really a bug in cache_extent implement now */
6603 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6608 rec = container_of(cache, struct extent_record, cache);
6609 if (rec->start != bytenr) {
6611 * Several cause, from unaligned bytenr to over lapping extents
6616 back = find_tree_backref(rec, parent, root);
6618 back = alloc_tree_backref(rec, parent, root);
6625 if (back->node.found_ref) {
6626 fprintf(stderr, "Extent back ref already exists "
6627 "for %llu parent %llu root %llu \n",
6628 (unsigned long long)bytenr,
6629 (unsigned long long)parent,
6630 (unsigned long long)root);
6632 back->node.found_ref = 1;
6634 if (back->node.found_extent_tree) {
6635 fprintf(stderr, "Extent back ref already exists "
6636 "for %llu parent %llu root %llu \n",
6637 (unsigned long long)bytenr,
6638 (unsigned long long)parent,
6639 (unsigned long long)root);
6641 back->node.found_extent_tree = 1;
6644 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6645 compare_extent_backref));
6646 check_extent_type(rec);
6647 maybe_free_extent_rec(extent_cache, rec);
6651 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6652 u64 parent, u64 root, u64 owner, u64 offset,
6653 u32 num_refs, int found_ref, u64 max_size)
6655 struct extent_record *rec;
6656 struct data_backref *back;
6657 struct cache_extent *cache;
6659 bool insert = false;
6661 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6663 struct extent_record tmpl;
6665 memset(&tmpl, 0, sizeof(tmpl));
6666 tmpl.start = bytenr;
6668 tmpl.max_size = max_size;
6670 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6674 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6679 rec = container_of(cache, struct extent_record, cache);
6680 if (rec->max_size < max_size)
6681 rec->max_size = max_size;
6684 * If found_ref is set then max_size is the real size and must match the
6685 * existing refs. So if we have already found a ref then we need to
6686 * make sure that this ref matches the existing one, otherwise we need
6687 * to add a new backref so we can notice that the backrefs don't match
6688 * and we need to figure out who is telling the truth. This is to
6689 * account for that awful fsync bug I introduced where we'd end up with
6690 * a btrfs_file_extent_item that would have its length include multiple
6691 * prealloc extents or point inside of a prealloc extent.
6693 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6696 back = alloc_data_backref(rec, parent, root, owner, offset,
6703 BUG_ON(num_refs != 1);
6704 if (back->node.found_ref)
6705 BUG_ON(back->bytes != max_size);
6706 back->node.found_ref = 1;
6707 back->found_ref += 1;
6708 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6709 back->bytes = max_size;
6710 back->disk_bytenr = bytenr;
6712 /* Need to reinsert if not already in the tree */
6714 rb_erase(&back->node.node, &rec->backref_tree);
6719 rec->content_checked = 1;
6720 rec->owner_ref_checked = 1;
6722 if (back->node.found_extent_tree) {
6723 fprintf(stderr, "Extent back ref already exists "
6724 "for %llu parent %llu root %llu "
6725 "owner %llu offset %llu num_refs %lu\n",
6726 (unsigned long long)bytenr,
6727 (unsigned long long)parent,
6728 (unsigned long long)root,
6729 (unsigned long long)owner,
6730 (unsigned long long)offset,
6731 (unsigned long)num_refs);
6733 back->num_refs = num_refs;
6734 back->node.found_extent_tree = 1;
6737 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6738 compare_extent_backref));
6740 maybe_free_extent_rec(extent_cache, rec);
6744 static int add_pending(struct cache_tree *pending,
6745 struct cache_tree *seen, u64 bytenr, u32 size)
6748 ret = add_cache_extent(seen, bytenr, size);
6751 add_cache_extent(pending, bytenr, size);
6755 static int pick_next_pending(struct cache_tree *pending,
6756 struct cache_tree *reada,
6757 struct cache_tree *nodes,
6758 u64 last, struct block_info *bits, int bits_nr,
6761 unsigned long node_start = last;
6762 struct cache_extent *cache;
6765 cache = search_cache_extent(reada, 0);
6767 bits[0].start = cache->start;
6768 bits[0].size = cache->size;
6773 if (node_start > 32768)
6774 node_start -= 32768;
6776 cache = search_cache_extent(nodes, node_start);
6778 cache = search_cache_extent(nodes, 0);
6781 cache = search_cache_extent(pending, 0);
6786 bits[ret].start = cache->start;
6787 bits[ret].size = cache->size;
6788 cache = next_cache_extent(cache);
6790 } while (cache && ret < bits_nr);
6796 bits[ret].start = cache->start;
6797 bits[ret].size = cache->size;
6798 cache = next_cache_extent(cache);
6800 } while (cache && ret < bits_nr);
6802 if (bits_nr - ret > 8) {
6803 u64 lookup = bits[0].start + bits[0].size;
6804 struct cache_extent *next;
6805 next = search_cache_extent(pending, lookup);
6807 if (next->start - lookup > 32768)
6809 bits[ret].start = next->start;
6810 bits[ret].size = next->size;
6811 lookup = next->start + next->size;
6815 next = next_cache_extent(next);
6823 static void free_chunk_record(struct cache_extent *cache)
6825 struct chunk_record *rec;
6827 rec = container_of(cache, struct chunk_record, cache);
6828 list_del_init(&rec->list);
6829 list_del_init(&rec->dextents);
6833 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6835 cache_tree_free_extents(chunk_cache, free_chunk_record);
6838 static void free_device_record(struct rb_node *node)
6840 struct device_record *rec;
6842 rec = container_of(node, struct device_record, node);
6846 FREE_RB_BASED_TREE(device_cache, free_device_record);
6848 int insert_block_group_record(struct block_group_tree *tree,
6849 struct block_group_record *bg_rec)
6853 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6857 list_add_tail(&bg_rec->list, &tree->block_groups);
6861 static void free_block_group_record(struct cache_extent *cache)
6863 struct block_group_record *rec;
6865 rec = container_of(cache, struct block_group_record, cache);
6866 list_del_init(&rec->list);
6870 void free_block_group_tree(struct block_group_tree *tree)
6872 cache_tree_free_extents(&tree->tree, free_block_group_record);
6875 int insert_device_extent_record(struct device_extent_tree *tree,
6876 struct device_extent_record *de_rec)
6881 * Device extent is a bit different from the other extents, because
6882 * the extents which belong to the different devices may have the
6883 * same start and size, so we need use the special extent cache
6884 * search/insert functions.
6886 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6890 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6891 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6895 static void free_device_extent_record(struct cache_extent *cache)
6897 struct device_extent_record *rec;
6899 rec = container_of(cache, struct device_extent_record, cache);
6900 if (!list_empty(&rec->chunk_list))
6901 list_del_init(&rec->chunk_list);
6902 if (!list_empty(&rec->device_list))
6903 list_del_init(&rec->device_list);
6907 void free_device_extent_tree(struct device_extent_tree *tree)
6909 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6912 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6913 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6914 struct extent_buffer *leaf, int slot)
6916 struct btrfs_extent_ref_v0 *ref0;
6917 struct btrfs_key key;
6920 btrfs_item_key_to_cpu(leaf, &key, slot);
6921 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6922 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6923 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6926 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6927 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6933 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6934 struct btrfs_key *key,
6937 struct btrfs_chunk *ptr;
6938 struct chunk_record *rec;
6941 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6942 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6944 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6946 fprintf(stderr, "memory allocation failed\n");
6950 INIT_LIST_HEAD(&rec->list);
6951 INIT_LIST_HEAD(&rec->dextents);
6954 rec->cache.start = key->offset;
6955 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6957 rec->generation = btrfs_header_generation(leaf);
6959 rec->objectid = key->objectid;
6960 rec->type = key->type;
6961 rec->offset = key->offset;
6963 rec->length = rec->cache.size;
6964 rec->owner = btrfs_chunk_owner(leaf, ptr);
6965 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6966 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6967 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6968 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6969 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6970 rec->num_stripes = num_stripes;
6971 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6973 for (i = 0; i < rec->num_stripes; ++i) {
6974 rec->stripes[i].devid =
6975 btrfs_stripe_devid_nr(leaf, ptr, i);
6976 rec->stripes[i].offset =
6977 btrfs_stripe_offset_nr(leaf, ptr, i);
6978 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6979 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6986 static int process_chunk_item(struct cache_tree *chunk_cache,
6987 struct btrfs_key *key, struct extent_buffer *eb,
6990 struct chunk_record *rec;
6991 struct btrfs_chunk *chunk;
6994 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6996 * Do extra check for this chunk item,
6998 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6999 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7000 * and owner<->key_type check.
7002 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7005 error("chunk(%llu, %llu) is not valid, ignore it",
7006 key->offset, btrfs_chunk_length(eb, chunk));
7009 rec = btrfs_new_chunk_record(eb, key, slot);
7010 ret = insert_cache_extent(chunk_cache, &rec->cache);
7012 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7013 rec->offset, rec->length);
7020 static int process_device_item(struct rb_root *dev_cache,
7021 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7023 struct btrfs_dev_item *ptr;
7024 struct device_record *rec;
7027 ptr = btrfs_item_ptr(eb,
7028 slot, struct btrfs_dev_item);
7030 rec = malloc(sizeof(*rec));
7032 fprintf(stderr, "memory allocation failed\n");
7036 rec->devid = key->offset;
7037 rec->generation = btrfs_header_generation(eb);
7039 rec->objectid = key->objectid;
7040 rec->type = key->type;
7041 rec->offset = key->offset;
7043 rec->devid = btrfs_device_id(eb, ptr);
7044 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7045 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7047 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7049 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7056 struct block_group_record *
7057 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7060 struct btrfs_block_group_item *ptr;
7061 struct block_group_record *rec;
7063 rec = calloc(1, sizeof(*rec));
7065 fprintf(stderr, "memory allocation failed\n");
7069 rec->cache.start = key->objectid;
7070 rec->cache.size = key->offset;
7072 rec->generation = btrfs_header_generation(leaf);
7074 rec->objectid = key->objectid;
7075 rec->type = key->type;
7076 rec->offset = key->offset;
7078 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7079 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7081 INIT_LIST_HEAD(&rec->list);
7086 static int process_block_group_item(struct block_group_tree *block_group_cache,
7087 struct btrfs_key *key,
7088 struct extent_buffer *eb, int slot)
7090 struct block_group_record *rec;
7093 rec = btrfs_new_block_group_record(eb, key, slot);
7094 ret = insert_block_group_record(block_group_cache, rec);
7096 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7097 rec->objectid, rec->offset);
7104 struct device_extent_record *
7105 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7106 struct btrfs_key *key, int slot)
7108 struct device_extent_record *rec;
7109 struct btrfs_dev_extent *ptr;
7111 rec = calloc(1, sizeof(*rec));
7113 fprintf(stderr, "memory allocation failed\n");
7117 rec->cache.objectid = key->objectid;
7118 rec->cache.start = key->offset;
7120 rec->generation = btrfs_header_generation(leaf);
7122 rec->objectid = key->objectid;
7123 rec->type = key->type;
7124 rec->offset = key->offset;
7126 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7127 rec->chunk_objecteid =
7128 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7130 btrfs_dev_extent_chunk_offset(leaf, ptr);
7131 rec->length = btrfs_dev_extent_length(leaf, ptr);
7132 rec->cache.size = rec->length;
7134 INIT_LIST_HEAD(&rec->chunk_list);
7135 INIT_LIST_HEAD(&rec->device_list);
7141 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7142 struct btrfs_key *key, struct extent_buffer *eb,
7145 struct device_extent_record *rec;
7148 rec = btrfs_new_device_extent_record(eb, key, slot);
7149 ret = insert_device_extent_record(dev_extent_cache, rec);
7152 "Device extent[%llu, %llu, %llu] existed.\n",
7153 rec->objectid, rec->offset, rec->length);
7160 static int process_extent_item(struct btrfs_root *root,
7161 struct cache_tree *extent_cache,
7162 struct extent_buffer *eb, int slot)
7164 struct btrfs_extent_item *ei;
7165 struct btrfs_extent_inline_ref *iref;
7166 struct btrfs_extent_data_ref *dref;
7167 struct btrfs_shared_data_ref *sref;
7168 struct btrfs_key key;
7169 struct extent_record tmpl;
7174 u32 item_size = btrfs_item_size_nr(eb, slot);
7180 btrfs_item_key_to_cpu(eb, &key, slot);
7182 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7184 num_bytes = root->fs_info->nodesize;
7186 num_bytes = key.offset;
7189 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7190 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7191 key.objectid, root->fs_info->sectorsize);
7194 if (item_size < sizeof(*ei)) {
7195 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7196 struct btrfs_extent_item_v0 *ei0;
7197 BUG_ON(item_size != sizeof(*ei0));
7198 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7199 refs = btrfs_extent_refs_v0(eb, ei0);
7203 memset(&tmpl, 0, sizeof(tmpl));
7204 tmpl.start = key.objectid;
7205 tmpl.nr = num_bytes;
7206 tmpl.extent_item_refs = refs;
7207 tmpl.metadata = metadata;
7209 tmpl.max_size = num_bytes;
7211 return add_extent_rec(extent_cache, &tmpl);
7214 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7215 refs = btrfs_extent_refs(eb, ei);
7216 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7220 if (metadata && num_bytes != root->fs_info->nodesize) {
7221 error("ignore invalid metadata extent, length %llu does not equal to %u",
7222 num_bytes, root->fs_info->nodesize);
7225 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7226 error("ignore invalid data extent, length %llu is not aligned to %u",
7227 num_bytes, root->fs_info->sectorsize);
7231 memset(&tmpl, 0, sizeof(tmpl));
7232 tmpl.start = key.objectid;
7233 tmpl.nr = num_bytes;
7234 tmpl.extent_item_refs = refs;
7235 tmpl.metadata = metadata;
7237 tmpl.max_size = num_bytes;
7238 add_extent_rec(extent_cache, &tmpl);
7240 ptr = (unsigned long)(ei + 1);
7241 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7242 key.type == BTRFS_EXTENT_ITEM_KEY)
7243 ptr += sizeof(struct btrfs_tree_block_info);
7245 end = (unsigned long)ei + item_size;
7247 iref = (struct btrfs_extent_inline_ref *)ptr;
7248 type = btrfs_extent_inline_ref_type(eb, iref);
7249 offset = btrfs_extent_inline_ref_offset(eb, iref);
7251 case BTRFS_TREE_BLOCK_REF_KEY:
7252 ret = add_tree_backref(extent_cache, key.objectid,
7256 "add_tree_backref failed (extent items tree block): %s",
7259 case BTRFS_SHARED_BLOCK_REF_KEY:
7260 ret = add_tree_backref(extent_cache, key.objectid,
7264 "add_tree_backref failed (extent items shared block): %s",
7267 case BTRFS_EXTENT_DATA_REF_KEY:
7268 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7269 add_data_backref(extent_cache, key.objectid, 0,
7270 btrfs_extent_data_ref_root(eb, dref),
7271 btrfs_extent_data_ref_objectid(eb,
7273 btrfs_extent_data_ref_offset(eb, dref),
7274 btrfs_extent_data_ref_count(eb, dref),
7277 case BTRFS_SHARED_DATA_REF_KEY:
7278 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7279 add_data_backref(extent_cache, key.objectid, offset,
7281 btrfs_shared_data_ref_count(eb, sref),
7285 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7286 key.objectid, key.type, num_bytes);
7289 ptr += btrfs_extent_inline_ref_size(type);
7296 static int check_cache_range(struct btrfs_root *root,
7297 struct btrfs_block_group_cache *cache,
7298 u64 offset, u64 bytes)
7300 struct btrfs_free_space *entry;
7306 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7307 bytenr = btrfs_sb_offset(i);
7308 ret = btrfs_rmap_block(root->fs_info,
7309 cache->key.objectid, bytenr, 0,
7310 &logical, &nr, &stripe_len);
7315 if (logical[nr] + stripe_len <= offset)
7317 if (offset + bytes <= logical[nr])
7319 if (logical[nr] == offset) {
7320 if (stripe_len >= bytes) {
7324 bytes -= stripe_len;
7325 offset += stripe_len;
7326 } else if (logical[nr] < offset) {
7327 if (logical[nr] + stripe_len >=
7332 bytes = (offset + bytes) -
7333 (logical[nr] + stripe_len);
7334 offset = logical[nr] + stripe_len;
7337 * Could be tricky, the super may land in the
7338 * middle of the area we're checking. First
7339 * check the easiest case, it's at the end.
7341 if (logical[nr] + stripe_len >=
7343 bytes = logical[nr] - offset;
7347 /* Check the left side */
7348 ret = check_cache_range(root, cache,
7350 logical[nr] - offset);
7356 /* Now we continue with the right side */
7357 bytes = (offset + bytes) -
7358 (logical[nr] + stripe_len);
7359 offset = logical[nr] + stripe_len;
7366 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7368 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7369 offset, offset+bytes);
7373 if (entry->offset != offset) {
7374 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7379 if (entry->bytes != bytes) {
7380 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7381 bytes, entry->bytes, offset);
7385 unlink_free_space(cache->free_space_ctl, entry);
7390 static int verify_space_cache(struct btrfs_root *root,
7391 struct btrfs_block_group_cache *cache)
7393 struct btrfs_path path;
7394 struct extent_buffer *leaf;
7395 struct btrfs_key key;
7399 root = root->fs_info->extent_root;
7401 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7403 btrfs_init_path(&path);
7404 key.objectid = last;
7406 key.type = BTRFS_EXTENT_ITEM_KEY;
7407 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7412 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7413 ret = btrfs_next_leaf(root, &path);
7421 leaf = path.nodes[0];
7422 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7423 if (key.objectid >= cache->key.offset + cache->key.objectid)
7425 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7426 key.type != BTRFS_METADATA_ITEM_KEY) {
7431 if (last == key.objectid) {
7432 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7433 last = key.objectid + key.offset;
7435 last = key.objectid + root->fs_info->nodesize;
7440 ret = check_cache_range(root, cache, last,
7441 key.objectid - last);
7444 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7445 last = key.objectid + key.offset;
7447 last = key.objectid + root->fs_info->nodesize;
7451 if (last < cache->key.objectid + cache->key.offset)
7452 ret = check_cache_range(root, cache, last,
7453 cache->key.objectid +
7454 cache->key.offset - last);
7457 btrfs_release_path(&path);
7460 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7461 fprintf(stderr, "There are still entries left in the space "
7469 static int check_space_cache(struct btrfs_root *root)
7471 struct btrfs_block_group_cache *cache;
7472 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7476 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7477 btrfs_super_generation(root->fs_info->super_copy) !=
7478 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7479 printf("cache and super generation don't match, space cache "
7480 "will be invalidated\n");
7484 if (ctx.progress_enabled) {
7485 ctx.tp = TASK_FREE_SPACE;
7486 task_start(ctx.info);
7490 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7494 start = cache->key.objectid + cache->key.offset;
7495 if (!cache->free_space_ctl) {
7496 if (btrfs_init_free_space_ctl(cache,
7497 root->fs_info->sectorsize)) {
7502 btrfs_remove_free_space_cache(cache);
7505 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7506 ret = exclude_super_stripes(root, cache);
7508 fprintf(stderr, "could not exclude super stripes: %s\n",
7513 ret = load_free_space_tree(root->fs_info, cache);
7514 free_excluded_extents(root, cache);
7516 fprintf(stderr, "could not load free space tree: %s\n",
7523 ret = load_free_space_cache(root->fs_info, cache);
7528 ret = verify_space_cache(root, cache);
7530 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7531 cache->key.objectid);
7536 task_stop(ctx.info);
7538 return error ? -EINVAL : 0;
7541 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7542 u64 num_bytes, unsigned long leaf_offset,
7543 struct extent_buffer *eb) {
7545 struct btrfs_fs_info *fs_info = root->fs_info;
7547 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7549 unsigned long csum_offset;
7553 u64 data_checked = 0;
7559 if (num_bytes % fs_info->sectorsize)
7562 data = malloc(num_bytes);
7566 while (offset < num_bytes) {
7569 read_len = num_bytes - offset;
7570 /* read as much space once a time */
7571 ret = read_extent_data(fs_info, data + offset,
7572 bytenr + offset, &read_len, mirror);
7576 /* verify every 4k data's checksum */
7577 while (data_checked < read_len) {
7579 tmp = offset + data_checked;
7581 csum = btrfs_csum_data((char *)data + tmp,
7582 csum, fs_info->sectorsize);
7583 btrfs_csum_final(csum, (u8 *)&csum);
7585 csum_offset = leaf_offset +
7586 tmp / fs_info->sectorsize * csum_size;
7587 read_extent_buffer(eb, (char *)&csum_expected,
7588 csum_offset, csum_size);
7589 /* try another mirror */
7590 if (csum != csum_expected) {
7591 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7592 mirror, bytenr + tmp,
7593 csum, csum_expected);
7594 num_copies = btrfs_num_copies(root->fs_info,
7596 if (mirror < num_copies - 1) {
7601 data_checked += fs_info->sectorsize;
7610 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7613 struct btrfs_path path;
7614 struct extent_buffer *leaf;
7615 struct btrfs_key key;
7618 btrfs_init_path(&path);
7619 key.objectid = bytenr;
7620 key.type = BTRFS_EXTENT_ITEM_KEY;
7621 key.offset = (u64)-1;
7624 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7627 fprintf(stderr, "Error looking up extent record %d\n", ret);
7628 btrfs_release_path(&path);
7631 if (path.slots[0] > 0) {
7634 ret = btrfs_prev_leaf(root, &path);
7637 } else if (ret > 0) {
7644 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7647 * Block group items come before extent items if they have the same
7648 * bytenr, so walk back one more just in case. Dear future traveller,
7649 * first congrats on mastering time travel. Now if it's not too much
7650 * trouble could you go back to 2006 and tell Chris to make the
7651 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7652 * EXTENT_ITEM_KEY please?
7654 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7655 if (path.slots[0] > 0) {
7658 ret = btrfs_prev_leaf(root, &path);
7661 } else if (ret > 0) {
7666 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7670 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7671 ret = btrfs_next_leaf(root, &path);
7673 fprintf(stderr, "Error going to next leaf "
7675 btrfs_release_path(&path);
7681 leaf = path.nodes[0];
7682 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7683 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7687 if (key.objectid + key.offset < bytenr) {
7691 if (key.objectid > bytenr + num_bytes)
7694 if (key.objectid == bytenr) {
7695 if (key.offset >= num_bytes) {
7699 num_bytes -= key.offset;
7700 bytenr += key.offset;
7701 } else if (key.objectid < bytenr) {
7702 if (key.objectid + key.offset >= bytenr + num_bytes) {
7706 num_bytes = (bytenr + num_bytes) -
7707 (key.objectid + key.offset);
7708 bytenr = key.objectid + key.offset;
7710 if (key.objectid + key.offset < bytenr + num_bytes) {
7711 u64 new_start = key.objectid + key.offset;
7712 u64 new_bytes = bytenr + num_bytes - new_start;
7715 * Weird case, the extent is in the middle of
7716 * our range, we'll have to search one side
7717 * and then the other. Not sure if this happens
7718 * in real life, but no harm in coding it up
7719 * anyway just in case.
7721 btrfs_release_path(&path);
7722 ret = check_extent_exists(root, new_start,
7725 fprintf(stderr, "Right section didn't "
7729 num_bytes = key.objectid - bytenr;
7732 num_bytes = key.objectid - bytenr;
7739 if (num_bytes && !ret) {
7740 fprintf(stderr, "There are no extents for csum range "
7741 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7745 btrfs_release_path(&path);
7749 static int check_csums(struct btrfs_root *root)
7751 struct btrfs_path path;
7752 struct extent_buffer *leaf;
7753 struct btrfs_key key;
7754 u64 offset = 0, num_bytes = 0;
7755 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7759 unsigned long leaf_offset;
7761 root = root->fs_info->csum_root;
7762 if (!extent_buffer_uptodate(root->node)) {
7763 fprintf(stderr, "No valid csum tree found\n");
7767 btrfs_init_path(&path);
7768 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7769 key.type = BTRFS_EXTENT_CSUM_KEY;
7771 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7773 fprintf(stderr, "Error searching csum tree %d\n", ret);
7774 btrfs_release_path(&path);
7778 if (ret > 0 && path.slots[0])
7783 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7784 ret = btrfs_next_leaf(root, &path);
7786 fprintf(stderr, "Error going to next leaf "
7793 leaf = path.nodes[0];
7795 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7796 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7801 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7802 csum_size) * root->fs_info->sectorsize;
7803 if (!check_data_csum)
7804 goto skip_csum_check;
7805 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7806 ret = check_extent_csums(root, key.offset, data_len,
7812 offset = key.offset;
7813 } else if (key.offset != offset + num_bytes) {
7814 ret = check_extent_exists(root, offset, num_bytes);
7816 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7817 "there is no extent record\n",
7818 offset, offset+num_bytes);
7821 offset = key.offset;
7824 num_bytes += data_len;
7828 btrfs_release_path(&path);
7832 static int is_dropped_key(struct btrfs_key *key,
7833 struct btrfs_key *drop_key) {
7834 if (key->objectid < drop_key->objectid)
7836 else if (key->objectid == drop_key->objectid) {
7837 if (key->type < drop_key->type)
7839 else if (key->type == drop_key->type) {
7840 if (key->offset < drop_key->offset)
7848 * Here are the rules for FULL_BACKREF.
7850 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7851 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7853 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7854 * if it happened after the relocation occurred since we'll have dropped the
7855 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7856 * have no real way to know for sure.
7858 * We process the blocks one root at a time, and we start from the lowest root
7859 * objectid and go to the highest. So we can just lookup the owner backref for
7860 * the record and if we don't find it then we know it doesn't exist and we have
7863 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7864 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7865 * be set or not and then we can check later once we've gathered all the refs.
7867 static int calc_extent_flag(struct cache_tree *extent_cache,
7868 struct extent_buffer *buf,
7869 struct root_item_record *ri,
7872 struct extent_record *rec;
7873 struct cache_extent *cache;
7874 struct tree_backref *tback;
7877 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7878 /* we have added this extent before */
7882 rec = container_of(cache, struct extent_record, cache);
7885 * Except file/reloc tree, we can not have
7888 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7893 if (buf->start == ri->bytenr)
7896 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7899 owner = btrfs_header_owner(buf);
7900 if (owner == ri->objectid)
7903 tback = find_tree_backref(rec, 0, owner);
7908 if (rec->flag_block_full_backref != FLAG_UNSET &&
7909 rec->flag_block_full_backref != 0)
7910 rec->bad_full_backref = 1;
7913 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7914 if (rec->flag_block_full_backref != FLAG_UNSET &&
7915 rec->flag_block_full_backref != 1)
7916 rec->bad_full_backref = 1;
7920 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7922 fprintf(stderr, "Invalid key type(");
7923 print_key_type(stderr, 0, key_type);
7924 fprintf(stderr, ") found in root(");
7925 print_objectid(stderr, rootid, 0);
7926 fprintf(stderr, ")\n");
7930 * Check if the key is valid with its extent buffer.
7932 * This is a early check in case invalid key exists in a extent buffer
7933 * This is not comprehensive yet, but should prevent wrong key/item passed
7936 static int check_type_with_root(u64 rootid, u8 key_type)
7939 /* Only valid in chunk tree */
7940 case BTRFS_DEV_ITEM_KEY:
7941 case BTRFS_CHUNK_ITEM_KEY:
7942 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7945 /* valid in csum and log tree */
7946 case BTRFS_CSUM_TREE_OBJECTID:
7947 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7951 case BTRFS_EXTENT_ITEM_KEY:
7952 case BTRFS_METADATA_ITEM_KEY:
7953 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7954 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7957 case BTRFS_ROOT_ITEM_KEY:
7958 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7961 case BTRFS_DEV_EXTENT_KEY:
7962 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7968 report_mismatch_key_root(key_type, rootid);
7972 static int run_next_block(struct btrfs_root *root,
7973 struct block_info *bits,
7976 struct cache_tree *pending,
7977 struct cache_tree *seen,
7978 struct cache_tree *reada,
7979 struct cache_tree *nodes,
7980 struct cache_tree *extent_cache,
7981 struct cache_tree *chunk_cache,
7982 struct rb_root *dev_cache,
7983 struct block_group_tree *block_group_cache,
7984 struct device_extent_tree *dev_extent_cache,
7985 struct root_item_record *ri)
7987 struct btrfs_fs_info *fs_info = root->fs_info;
7988 struct extent_buffer *buf;
7989 struct extent_record *rec = NULL;
8000 struct btrfs_key key;
8001 struct cache_extent *cache;
8004 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8005 bits_nr, &reada_bits);
8010 for(i = 0; i < nritems; i++) {
8011 ret = add_cache_extent(reada, bits[i].start,
8016 /* fixme, get the parent transid */
8017 readahead_tree_block(fs_info, bits[i].start, 0);
8020 *last = bits[0].start;
8021 bytenr = bits[0].start;
8022 size = bits[0].size;
8024 cache = lookup_cache_extent(pending, bytenr, size);
8026 remove_cache_extent(pending, cache);
8029 cache = lookup_cache_extent(reada, bytenr, size);
8031 remove_cache_extent(reada, cache);
8034 cache = lookup_cache_extent(nodes, bytenr, size);
8036 remove_cache_extent(nodes, cache);
8039 cache = lookup_cache_extent(extent_cache, bytenr, size);
8041 rec = container_of(cache, struct extent_record, cache);
8042 gen = rec->parent_generation;
8045 /* fixme, get the real parent transid */
8046 buf = read_tree_block(root->fs_info, bytenr, gen);
8047 if (!extent_buffer_uptodate(buf)) {
8048 record_bad_block_io(root->fs_info,
8049 extent_cache, bytenr, size);
8053 nritems = btrfs_header_nritems(buf);
8056 if (!init_extent_tree) {
8057 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8058 btrfs_header_level(buf), 1, NULL,
8061 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8063 fprintf(stderr, "Couldn't calc extent flags\n");
8064 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8069 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8071 fprintf(stderr, "Couldn't calc extent flags\n");
8072 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8076 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8078 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8079 ri->objectid == btrfs_header_owner(buf)) {
8081 * Ok we got to this block from it's original owner and
8082 * we have FULL_BACKREF set. Relocation can leave
8083 * converted blocks over so this is altogether possible,
8084 * however it's not possible if the generation > the
8085 * last snapshot, so check for this case.
8087 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8088 btrfs_header_generation(buf) > ri->last_snapshot) {
8089 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8090 rec->bad_full_backref = 1;
8095 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8096 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8097 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8098 rec->bad_full_backref = 1;
8102 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8103 rec->flag_block_full_backref = 1;
8107 rec->flag_block_full_backref = 0;
8109 owner = btrfs_header_owner(buf);
8112 ret = check_block(root, extent_cache, buf, flags);
8116 if (btrfs_is_leaf(buf)) {
8117 btree_space_waste += btrfs_leaf_free_space(root, buf);
8118 for (i = 0; i < nritems; i++) {
8119 struct btrfs_file_extent_item *fi;
8120 btrfs_item_key_to_cpu(buf, &key, i);
8122 * Check key type against the leaf owner.
8123 * Could filter quite a lot of early error if
8126 if (check_type_with_root(btrfs_header_owner(buf),
8128 fprintf(stderr, "ignoring invalid key\n");
8131 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8132 process_extent_item(root, extent_cache, buf,
8136 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8137 process_extent_item(root, extent_cache, buf,
8141 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8143 btrfs_item_size_nr(buf, i);
8146 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8147 process_chunk_item(chunk_cache, &key, buf, i);
8150 if (key.type == BTRFS_DEV_ITEM_KEY) {
8151 process_device_item(dev_cache, &key, buf, i);
8154 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8155 process_block_group_item(block_group_cache,
8159 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8160 process_device_extent_item(dev_extent_cache,
8165 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8166 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8167 process_extent_ref_v0(extent_cache, buf, i);
8174 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8175 ret = add_tree_backref(extent_cache,
8176 key.objectid, 0, key.offset, 0);
8179 "add_tree_backref failed (leaf tree block): %s",
8183 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8184 ret = add_tree_backref(extent_cache,
8185 key.objectid, key.offset, 0, 0);
8188 "add_tree_backref failed (leaf shared block): %s",
8192 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8193 struct btrfs_extent_data_ref *ref;
8194 ref = btrfs_item_ptr(buf, i,
8195 struct btrfs_extent_data_ref);
8196 add_data_backref(extent_cache,
8198 btrfs_extent_data_ref_root(buf, ref),
8199 btrfs_extent_data_ref_objectid(buf,
8201 btrfs_extent_data_ref_offset(buf, ref),
8202 btrfs_extent_data_ref_count(buf, ref),
8203 0, root->fs_info->sectorsize);
8206 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8207 struct btrfs_shared_data_ref *ref;
8208 ref = btrfs_item_ptr(buf, i,
8209 struct btrfs_shared_data_ref);
8210 add_data_backref(extent_cache,
8211 key.objectid, key.offset, 0, 0, 0,
8212 btrfs_shared_data_ref_count(buf, ref),
8213 0, root->fs_info->sectorsize);
8216 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8217 struct bad_item *bad;
8219 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8223 bad = malloc(sizeof(struct bad_item));
8226 INIT_LIST_HEAD(&bad->list);
8227 memcpy(&bad->key, &key,
8228 sizeof(struct btrfs_key));
8229 bad->root_id = owner;
8230 list_add_tail(&bad->list, &delete_items);
8233 if (key.type != BTRFS_EXTENT_DATA_KEY)
8235 fi = btrfs_item_ptr(buf, i,
8236 struct btrfs_file_extent_item);
8237 if (btrfs_file_extent_type(buf, fi) ==
8238 BTRFS_FILE_EXTENT_INLINE)
8240 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8243 data_bytes_allocated +=
8244 btrfs_file_extent_disk_num_bytes(buf, fi);
8245 if (data_bytes_allocated < root->fs_info->sectorsize) {
8248 data_bytes_referenced +=
8249 btrfs_file_extent_num_bytes(buf, fi);
8250 add_data_backref(extent_cache,
8251 btrfs_file_extent_disk_bytenr(buf, fi),
8252 parent, owner, key.objectid, key.offset -
8253 btrfs_file_extent_offset(buf, fi), 1, 1,
8254 btrfs_file_extent_disk_num_bytes(buf, fi));
8258 struct btrfs_key first_key;
8260 first_key.objectid = 0;
8263 btrfs_item_key_to_cpu(buf, &first_key, 0);
8264 level = btrfs_header_level(buf);
8265 for (i = 0; i < nritems; i++) {
8266 struct extent_record tmpl;
8268 ptr = btrfs_node_blockptr(buf, i);
8269 size = root->fs_info->nodesize;
8270 btrfs_node_key_to_cpu(buf, &key, i);
8272 if ((level == ri->drop_level)
8273 && is_dropped_key(&key, &ri->drop_key)) {
8278 memset(&tmpl, 0, sizeof(tmpl));
8279 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8280 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8285 tmpl.max_size = size;
8286 ret = add_extent_rec(extent_cache, &tmpl);
8290 ret = add_tree_backref(extent_cache, ptr, parent,
8294 "add_tree_backref failed (non-leaf block): %s",
8300 add_pending(nodes, seen, ptr, size);
8302 add_pending(pending, seen, ptr, size);
8305 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8306 nritems) * sizeof(struct btrfs_key_ptr);
8308 total_btree_bytes += buf->len;
8309 if (fs_root_objectid(btrfs_header_owner(buf)))
8310 total_fs_tree_bytes += buf->len;
8311 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8312 total_extent_tree_bytes += buf->len;
8314 free_extent_buffer(buf);
8318 static int add_root_to_pending(struct extent_buffer *buf,
8319 struct cache_tree *extent_cache,
8320 struct cache_tree *pending,
8321 struct cache_tree *seen,
8322 struct cache_tree *nodes,
8325 struct extent_record tmpl;
8328 if (btrfs_header_level(buf) > 0)
8329 add_pending(nodes, seen, buf->start, buf->len);
8331 add_pending(pending, seen, buf->start, buf->len);
8333 memset(&tmpl, 0, sizeof(tmpl));
8334 tmpl.start = buf->start;
8339 tmpl.max_size = buf->len;
8340 add_extent_rec(extent_cache, &tmpl);
8342 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8343 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8344 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8347 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8352 /* as we fix the tree, we might be deleting blocks that
8353 * we're tracking for repair. This hook makes sure we
8354 * remove any backrefs for blocks as we are fixing them.
8356 static int free_extent_hook(struct btrfs_trans_handle *trans,
8357 struct btrfs_root *root,
8358 u64 bytenr, u64 num_bytes, u64 parent,
8359 u64 root_objectid, u64 owner, u64 offset,
8362 struct extent_record *rec;
8363 struct cache_extent *cache;
8365 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8367 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8368 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8372 rec = container_of(cache, struct extent_record, cache);
8374 struct data_backref *back;
8375 back = find_data_backref(rec, parent, root_objectid, owner,
8376 offset, 1, bytenr, num_bytes);
8379 if (back->node.found_ref) {
8380 back->found_ref -= refs_to_drop;
8382 rec->refs -= refs_to_drop;
8384 if (back->node.found_extent_tree) {
8385 back->num_refs -= refs_to_drop;
8386 if (rec->extent_item_refs)
8387 rec->extent_item_refs -= refs_to_drop;
8389 if (back->found_ref == 0)
8390 back->node.found_ref = 0;
8391 if (back->num_refs == 0)
8392 back->node.found_extent_tree = 0;
8394 if (!back->node.found_extent_tree && back->node.found_ref) {
8395 rb_erase(&back->node.node, &rec->backref_tree);
8399 struct tree_backref *back;
8400 back = find_tree_backref(rec, parent, root_objectid);
8403 if (back->node.found_ref) {
8406 back->node.found_ref = 0;
8408 if (back->node.found_extent_tree) {
8409 if (rec->extent_item_refs)
8410 rec->extent_item_refs--;
8411 back->node.found_extent_tree = 0;
8413 if (!back->node.found_extent_tree && back->node.found_ref) {
8414 rb_erase(&back->node.node, &rec->backref_tree);
8418 maybe_free_extent_rec(extent_cache, rec);
8423 static int delete_extent_records(struct btrfs_trans_handle *trans,
8424 struct btrfs_root *root,
8425 struct btrfs_path *path,
8428 struct btrfs_key key;
8429 struct btrfs_key found_key;
8430 struct extent_buffer *leaf;
8435 key.objectid = bytenr;
8437 key.offset = (u64)-1;
8440 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8447 if (path->slots[0] == 0)
8453 leaf = path->nodes[0];
8454 slot = path->slots[0];
8456 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8457 if (found_key.objectid != bytenr)
8460 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8461 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8462 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8463 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8464 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8465 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8466 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8467 btrfs_release_path(path);
8468 if (found_key.type == 0) {
8469 if (found_key.offset == 0)
8471 key.offset = found_key.offset - 1;
8472 key.type = found_key.type;
8474 key.type = found_key.type - 1;
8475 key.offset = (u64)-1;
8479 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8480 found_key.objectid, found_key.type, found_key.offset);
8482 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8485 btrfs_release_path(path);
8487 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8488 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8489 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8490 found_key.offset : root->fs_info->nodesize;
8492 ret = btrfs_update_block_group(trans, root, bytenr,
8499 btrfs_release_path(path);
8504 * for a single backref, this will allocate a new extent
8505 * and add the backref to it.
8507 static int record_extent(struct btrfs_trans_handle *trans,
8508 struct btrfs_fs_info *info,
8509 struct btrfs_path *path,
8510 struct extent_record *rec,
8511 struct extent_backref *back,
8512 int allocated, u64 flags)
8515 struct btrfs_root *extent_root = info->extent_root;
8516 struct extent_buffer *leaf;
8517 struct btrfs_key ins_key;
8518 struct btrfs_extent_item *ei;
8519 struct data_backref *dback;
8520 struct btrfs_tree_block_info *bi;
8523 rec->max_size = max_t(u64, rec->max_size,
8527 u32 item_size = sizeof(*ei);
8530 item_size += sizeof(*bi);
8532 ins_key.objectid = rec->start;
8533 ins_key.offset = rec->max_size;
8534 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8536 ret = btrfs_insert_empty_item(trans, extent_root, path,
8537 &ins_key, item_size);
8541 leaf = path->nodes[0];
8542 ei = btrfs_item_ptr(leaf, path->slots[0],
8543 struct btrfs_extent_item);
8545 btrfs_set_extent_refs(leaf, ei, 0);
8546 btrfs_set_extent_generation(leaf, ei, rec->generation);
8548 if (back->is_data) {
8549 btrfs_set_extent_flags(leaf, ei,
8550 BTRFS_EXTENT_FLAG_DATA);
8552 struct btrfs_disk_key copy_key;;
8554 bi = (struct btrfs_tree_block_info *)(ei + 1);
8555 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8558 btrfs_set_disk_key_objectid(©_key,
8559 rec->info_objectid);
8560 btrfs_set_disk_key_type(©_key, 0);
8561 btrfs_set_disk_key_offset(©_key, 0);
8563 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8564 btrfs_set_tree_block_key(leaf, bi, ©_key);
8566 btrfs_set_extent_flags(leaf, ei,
8567 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8570 btrfs_mark_buffer_dirty(leaf);
8571 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8572 rec->max_size, 1, 0);
8575 btrfs_release_path(path);
8578 if (back->is_data) {
8582 dback = to_data_backref(back);
8583 if (back->full_backref)
8584 parent = dback->parent;
8588 for (i = 0; i < dback->found_ref; i++) {
8589 /* if parent != 0, we're doing a full backref
8590 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8591 * just makes the backref allocator create a data
8594 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8595 rec->start, rec->max_size,
8599 BTRFS_FIRST_FREE_OBJECTID :
8605 fprintf(stderr, "adding new data backref"
8606 " on %llu %s %llu owner %llu"
8607 " offset %llu found %d\n",
8608 (unsigned long long)rec->start,
8609 back->full_backref ?
8611 back->full_backref ?
8612 (unsigned long long)parent :
8613 (unsigned long long)dback->root,
8614 (unsigned long long)dback->owner,
8615 (unsigned long long)dback->offset,
8619 struct tree_backref *tback;
8621 tback = to_tree_backref(back);
8622 if (back->full_backref)
8623 parent = tback->parent;
8627 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8628 rec->start, rec->max_size,
8629 parent, tback->root, 0, 0);
8630 fprintf(stderr, "adding new tree backref on "
8631 "start %llu len %llu parent %llu root %llu\n",
8632 rec->start, rec->max_size, parent, tback->root);
8635 btrfs_release_path(path);
8639 static struct extent_entry *find_entry(struct list_head *entries,
8640 u64 bytenr, u64 bytes)
8642 struct extent_entry *entry = NULL;
8644 list_for_each_entry(entry, entries, list) {
8645 if (entry->bytenr == bytenr && entry->bytes == bytes)
8652 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8654 struct extent_entry *entry, *best = NULL, *prev = NULL;
8656 list_for_each_entry(entry, entries, list) {
8658 * If there are as many broken entries as entries then we know
8659 * not to trust this particular entry.
8661 if (entry->broken == entry->count)
8665 * Special case, when there are only two entries and 'best' is
8675 * If our current entry == best then we can't be sure our best
8676 * is really the best, so we need to keep searching.
8678 if (best && best->count == entry->count) {
8684 /* Prev == entry, not good enough, have to keep searching */
8685 if (!prev->broken && prev->count == entry->count)
8689 best = (prev->count > entry->count) ? prev : entry;
8690 else if (best->count < entry->count)
8698 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8699 struct data_backref *dback, struct extent_entry *entry)
8701 struct btrfs_trans_handle *trans;
8702 struct btrfs_root *root;
8703 struct btrfs_file_extent_item *fi;
8704 struct extent_buffer *leaf;
8705 struct btrfs_key key;
8709 key.objectid = dback->root;
8710 key.type = BTRFS_ROOT_ITEM_KEY;
8711 key.offset = (u64)-1;
8712 root = btrfs_read_fs_root(info, &key);
8714 fprintf(stderr, "Couldn't find root for our ref\n");
8719 * The backref points to the original offset of the extent if it was
8720 * split, so we need to search down to the offset we have and then walk
8721 * forward until we find the backref we're looking for.
8723 key.objectid = dback->owner;
8724 key.type = BTRFS_EXTENT_DATA_KEY;
8725 key.offset = dback->offset;
8726 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8728 fprintf(stderr, "Error looking up ref %d\n", ret);
8733 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8734 ret = btrfs_next_leaf(root, path);
8736 fprintf(stderr, "Couldn't find our ref, next\n");
8740 leaf = path->nodes[0];
8741 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8742 if (key.objectid != dback->owner ||
8743 key.type != BTRFS_EXTENT_DATA_KEY) {
8744 fprintf(stderr, "Couldn't find our ref, search\n");
8747 fi = btrfs_item_ptr(leaf, path->slots[0],
8748 struct btrfs_file_extent_item);
8749 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8750 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8752 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8757 btrfs_release_path(path);
8759 trans = btrfs_start_transaction(root, 1);
8761 return PTR_ERR(trans);
8764 * Ok we have the key of the file extent we want to fix, now we can cow
8765 * down to the thing and fix it.
8767 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8769 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8770 key.objectid, key.type, key.offset, ret);
8774 fprintf(stderr, "Well that's odd, we just found this key "
8775 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8780 leaf = path->nodes[0];
8781 fi = btrfs_item_ptr(leaf, path->slots[0],
8782 struct btrfs_file_extent_item);
8784 if (btrfs_file_extent_compression(leaf, fi) &&
8785 dback->disk_bytenr != entry->bytenr) {
8786 fprintf(stderr, "Ref doesn't match the record start and is "
8787 "compressed, please take a btrfs-image of this file "
8788 "system and send it to a btrfs developer so they can "
8789 "complete this functionality for bytenr %Lu\n",
8790 dback->disk_bytenr);
8795 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8796 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8797 } else if (dback->disk_bytenr > entry->bytenr) {
8798 u64 off_diff, offset;
8800 off_diff = dback->disk_bytenr - entry->bytenr;
8801 offset = btrfs_file_extent_offset(leaf, fi);
8802 if (dback->disk_bytenr + offset +
8803 btrfs_file_extent_num_bytes(leaf, fi) >
8804 entry->bytenr + entry->bytes) {
8805 fprintf(stderr, "Ref is past the entry end, please "
8806 "take a btrfs-image of this file system and "
8807 "send it to a btrfs developer, ref %Lu\n",
8808 dback->disk_bytenr);
8813 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8814 btrfs_set_file_extent_offset(leaf, fi, offset);
8815 } else if (dback->disk_bytenr < entry->bytenr) {
8818 offset = btrfs_file_extent_offset(leaf, fi);
8819 if (dback->disk_bytenr + offset < entry->bytenr) {
8820 fprintf(stderr, "Ref is before the entry start, please"
8821 " take a btrfs-image of this file system and "
8822 "send it to a btrfs developer, ref %Lu\n",
8823 dback->disk_bytenr);
8828 offset += dback->disk_bytenr;
8829 offset -= entry->bytenr;
8830 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8831 btrfs_set_file_extent_offset(leaf, fi, offset);
8834 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8837 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8838 * only do this if we aren't using compression, otherwise it's a
8841 if (!btrfs_file_extent_compression(leaf, fi))
8842 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8844 printf("ram bytes may be wrong?\n");
8845 btrfs_mark_buffer_dirty(leaf);
8847 err = btrfs_commit_transaction(trans, root);
8848 btrfs_release_path(path);
8849 return ret ? ret : err;
8852 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8853 struct extent_record *rec)
8855 struct extent_backref *back, *tmp;
8856 struct data_backref *dback;
8857 struct extent_entry *entry, *best = NULL;
8860 int broken_entries = 0;
8865 * Metadata is easy and the backrefs should always agree on bytenr and
8866 * size, if not we've got bigger issues.
8871 rbtree_postorder_for_each_entry_safe(back, tmp,
8872 &rec->backref_tree, node) {
8873 if (back->full_backref || !back->is_data)
8876 dback = to_data_backref(back);
8879 * We only pay attention to backrefs that we found a real
8882 if (dback->found_ref == 0)
8886 * For now we only catch when the bytes don't match, not the
8887 * bytenr. We can easily do this at the same time, but I want
8888 * to have a fs image to test on before we just add repair
8889 * functionality willy-nilly so we know we won't screw up the
8893 entry = find_entry(&entries, dback->disk_bytenr,
8896 entry = malloc(sizeof(struct extent_entry));
8901 memset(entry, 0, sizeof(*entry));
8902 entry->bytenr = dback->disk_bytenr;
8903 entry->bytes = dback->bytes;
8904 list_add_tail(&entry->list, &entries);
8909 * If we only have on entry we may think the entries agree when
8910 * in reality they don't so we have to do some extra checking.
8912 if (dback->disk_bytenr != rec->start ||
8913 dback->bytes != rec->nr || back->broken)
8924 /* Yay all the backrefs agree, carry on good sir */
8925 if (nr_entries <= 1 && !mismatch)
8928 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8929 "%Lu\n", rec->start);
8932 * First we want to see if the backrefs can agree amongst themselves who
8933 * is right, so figure out which one of the entries has the highest
8936 best = find_most_right_entry(&entries);
8939 * Ok so we may have an even split between what the backrefs think, so
8940 * this is where we use the extent ref to see what it thinks.
8943 entry = find_entry(&entries, rec->start, rec->nr);
8944 if (!entry && (!broken_entries || !rec->found_rec)) {
8945 fprintf(stderr, "Backrefs don't agree with each other "
8946 "and extent record doesn't agree with anybody,"
8947 " so we can't fix bytenr %Lu bytes %Lu\n",
8948 rec->start, rec->nr);
8951 } else if (!entry) {
8953 * Ok our backrefs were broken, we'll assume this is the
8954 * correct value and add an entry for this range.
8956 entry = malloc(sizeof(struct extent_entry));
8961 memset(entry, 0, sizeof(*entry));
8962 entry->bytenr = rec->start;
8963 entry->bytes = rec->nr;
8964 list_add_tail(&entry->list, &entries);
8968 best = find_most_right_entry(&entries);
8970 fprintf(stderr, "Backrefs and extent record evenly "
8971 "split on who is right, this is going to "
8972 "require user input to fix bytenr %Lu bytes "
8973 "%Lu\n", rec->start, rec->nr);
8980 * I don't think this can happen currently as we'll abort() if we catch
8981 * this case higher up, but in case somebody removes that we still can't
8982 * deal with it properly here yet, so just bail out of that's the case.
8984 if (best->bytenr != rec->start) {
8985 fprintf(stderr, "Extent start and backref starts don't match, "
8986 "please use btrfs-image on this file system and send "
8987 "it to a btrfs developer so they can make fsck fix "
8988 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8989 rec->start, rec->nr);
8995 * Ok great we all agreed on an extent record, let's go find the real
8996 * references and fix up the ones that don't match.
8998 rbtree_postorder_for_each_entry_safe(back, tmp,
8999 &rec->backref_tree, node) {
9000 if (back->full_backref || !back->is_data)
9003 dback = to_data_backref(back);
9006 * Still ignoring backrefs that don't have a real ref attached
9009 if (dback->found_ref == 0)
9012 if (dback->bytes == best->bytes &&
9013 dback->disk_bytenr == best->bytenr)
9016 ret = repair_ref(info, path, dback, best);
9022 * Ok we messed with the actual refs, which means we need to drop our
9023 * entire cache and go back and rescan. I know this is a huge pain and
9024 * adds a lot of extra work, but it's the only way to be safe. Once all
9025 * the backrefs agree we may not need to do anything to the extent
9030 while (!list_empty(&entries)) {
9031 entry = list_entry(entries.next, struct extent_entry, list);
9032 list_del_init(&entry->list);
9038 static int process_duplicates(struct cache_tree *extent_cache,
9039 struct extent_record *rec)
9041 struct extent_record *good, *tmp;
9042 struct cache_extent *cache;
9046 * If we found a extent record for this extent then return, or if we
9047 * have more than one duplicate we are likely going to need to delete
9050 if (rec->found_rec || rec->num_duplicates > 1)
9053 /* Shouldn't happen but just in case */
9054 BUG_ON(!rec->num_duplicates);
9057 * So this happens if we end up with a backref that doesn't match the
9058 * actual extent entry. So either the backref is bad or the extent
9059 * entry is bad. Either way we want to have the extent_record actually
9060 * reflect what we found in the extent_tree, so we need to take the
9061 * duplicate out and use that as the extent_record since the only way we
9062 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9064 remove_cache_extent(extent_cache, &rec->cache);
9066 good = to_extent_record(rec->dups.next);
9067 list_del_init(&good->list);
9068 INIT_LIST_HEAD(&good->backrefs);
9069 INIT_LIST_HEAD(&good->dups);
9070 good->cache.start = good->start;
9071 good->cache.size = good->nr;
9072 good->content_checked = 0;
9073 good->owner_ref_checked = 0;
9074 good->num_duplicates = 0;
9075 good->refs = rec->refs;
9076 list_splice_init(&rec->backrefs, &good->backrefs);
9078 cache = lookup_cache_extent(extent_cache, good->start,
9082 tmp = container_of(cache, struct extent_record, cache);
9085 * If we find another overlapping extent and it's found_rec is
9086 * set then it's a duplicate and we need to try and delete
9089 if (tmp->found_rec || tmp->num_duplicates > 0) {
9090 if (list_empty(&good->list))
9091 list_add_tail(&good->list,
9092 &duplicate_extents);
9093 good->num_duplicates += tmp->num_duplicates + 1;
9094 list_splice_init(&tmp->dups, &good->dups);
9095 list_del_init(&tmp->list);
9096 list_add_tail(&tmp->list, &good->dups);
9097 remove_cache_extent(extent_cache, &tmp->cache);
9102 * Ok we have another non extent item backed extent rec, so lets
9103 * just add it to this extent and carry on like we did above.
9105 good->refs += tmp->refs;
9106 list_splice_init(&tmp->backrefs, &good->backrefs);
9107 remove_cache_extent(extent_cache, &tmp->cache);
9110 ret = insert_cache_extent(extent_cache, &good->cache);
9113 return good->num_duplicates ? 0 : 1;
9116 static int delete_duplicate_records(struct btrfs_root *root,
9117 struct extent_record *rec)
9119 struct btrfs_trans_handle *trans;
9120 LIST_HEAD(delete_list);
9121 struct btrfs_path path;
9122 struct extent_record *tmp, *good, *n;
9125 struct btrfs_key key;
9127 btrfs_init_path(&path);
9130 /* Find the record that covers all of the duplicates. */
9131 list_for_each_entry(tmp, &rec->dups, list) {
9132 if (good->start < tmp->start)
9134 if (good->nr > tmp->nr)
9137 if (tmp->start + tmp->nr < good->start + good->nr) {
9138 fprintf(stderr, "Ok we have overlapping extents that "
9139 "aren't completely covered by each other, this "
9140 "is going to require more careful thought. "
9141 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9142 tmp->start, tmp->nr, good->start, good->nr);
9149 list_add_tail(&rec->list, &delete_list);
9151 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9154 list_move_tail(&tmp->list, &delete_list);
9157 root = root->fs_info->extent_root;
9158 trans = btrfs_start_transaction(root, 1);
9159 if (IS_ERR(trans)) {
9160 ret = PTR_ERR(trans);
9164 list_for_each_entry(tmp, &delete_list, list) {
9165 if (tmp->found_rec == 0)
9167 key.objectid = tmp->start;
9168 key.type = BTRFS_EXTENT_ITEM_KEY;
9169 key.offset = tmp->nr;
9171 /* Shouldn't happen but just in case */
9172 if (tmp->metadata) {
9173 fprintf(stderr, "Well this shouldn't happen, extent "
9174 "record overlaps but is metadata? "
9175 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9179 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9185 ret = btrfs_del_item(trans, root, &path);
9188 btrfs_release_path(&path);
9191 err = btrfs_commit_transaction(trans, root);
9195 while (!list_empty(&delete_list)) {
9196 tmp = to_extent_record(delete_list.next);
9197 list_del_init(&tmp->list);
9203 while (!list_empty(&rec->dups)) {
9204 tmp = to_extent_record(rec->dups.next);
9205 list_del_init(&tmp->list);
9209 btrfs_release_path(&path);
9211 if (!ret && !nr_del)
9212 rec->num_duplicates = 0;
9214 return ret ? ret : nr_del;
9217 static int find_possible_backrefs(struct btrfs_fs_info *info,
9218 struct btrfs_path *path,
9219 struct cache_tree *extent_cache,
9220 struct extent_record *rec)
9222 struct btrfs_root *root;
9223 struct extent_backref *back, *tmp;
9224 struct data_backref *dback;
9225 struct cache_extent *cache;
9226 struct btrfs_file_extent_item *fi;
9227 struct btrfs_key key;
9231 rbtree_postorder_for_each_entry_safe(back, tmp,
9232 &rec->backref_tree, node) {
9233 /* Don't care about full backrefs (poor unloved backrefs) */
9234 if (back->full_backref || !back->is_data)
9237 dback = to_data_backref(back);
9239 /* We found this one, we don't need to do a lookup */
9240 if (dback->found_ref)
9243 key.objectid = dback->root;
9244 key.type = BTRFS_ROOT_ITEM_KEY;
9245 key.offset = (u64)-1;
9247 root = btrfs_read_fs_root(info, &key);
9249 /* No root, definitely a bad ref, skip */
9250 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9252 /* Other err, exit */
9254 return PTR_ERR(root);
9256 key.objectid = dback->owner;
9257 key.type = BTRFS_EXTENT_DATA_KEY;
9258 key.offset = dback->offset;
9259 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9261 btrfs_release_path(path);
9264 /* Didn't find it, we can carry on */
9269 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9270 struct btrfs_file_extent_item);
9271 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9272 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9273 btrfs_release_path(path);
9274 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9276 struct extent_record *tmp;
9277 tmp = container_of(cache, struct extent_record, cache);
9280 * If we found an extent record for the bytenr for this
9281 * particular backref then we can't add it to our
9282 * current extent record. We only want to add backrefs
9283 * that don't have a corresponding extent item in the
9284 * extent tree since they likely belong to this record
9285 * and we need to fix it if it doesn't match bytenrs.
9291 dback->found_ref += 1;
9292 dback->disk_bytenr = bytenr;
9293 dback->bytes = bytes;
9296 * Set this so the verify backref code knows not to trust the
9297 * values in this backref.
9306 * Record orphan data ref into corresponding root.
9308 * Return 0 if the extent item contains data ref and recorded.
9309 * Return 1 if the extent item contains no useful data ref
9310 * On that case, it may contains only shared_dataref or metadata backref
9311 * or the file extent exists(this should be handled by the extent bytenr
9313 * Return <0 if something goes wrong.
9315 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9316 struct extent_record *rec)
9318 struct btrfs_key key;
9319 struct btrfs_root *dest_root;
9320 struct extent_backref *back, *tmp;
9321 struct data_backref *dback;
9322 struct orphan_data_extent *orphan;
9323 struct btrfs_path path;
9324 int recorded_data_ref = 0;
9329 btrfs_init_path(&path);
9330 rbtree_postorder_for_each_entry_safe(back, tmp,
9331 &rec->backref_tree, node) {
9332 if (back->full_backref || !back->is_data ||
9333 !back->found_extent_tree)
9335 dback = to_data_backref(back);
9336 if (dback->found_ref)
9338 key.objectid = dback->root;
9339 key.type = BTRFS_ROOT_ITEM_KEY;
9340 key.offset = (u64)-1;
9342 dest_root = btrfs_read_fs_root(fs_info, &key);
9344 /* For non-exist root we just skip it */
9345 if (IS_ERR(dest_root) || !dest_root)
9348 key.objectid = dback->owner;
9349 key.type = BTRFS_EXTENT_DATA_KEY;
9350 key.offset = dback->offset;
9352 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9353 btrfs_release_path(&path);
9355 * For ret < 0, it's OK since the fs-tree may be corrupted,
9356 * we need to record it for inode/file extent rebuild.
9357 * For ret > 0, we record it only for file extent rebuild.
9358 * For ret == 0, the file extent exists but only bytenr
9359 * mismatch, let the original bytenr fix routine to handle,
9365 orphan = malloc(sizeof(*orphan));
9370 INIT_LIST_HEAD(&orphan->list);
9371 orphan->root = dback->root;
9372 orphan->objectid = dback->owner;
9373 orphan->offset = dback->offset;
9374 orphan->disk_bytenr = rec->cache.start;
9375 orphan->disk_len = rec->cache.size;
9376 list_add(&dest_root->orphan_data_extents, &orphan->list);
9377 recorded_data_ref = 1;
9380 btrfs_release_path(&path);
9382 return !recorded_data_ref;
9388 * when an incorrect extent item is found, this will delete
9389 * all of the existing entries for it and recreate them
9390 * based on what the tree scan found.
9392 static int fixup_extent_refs(struct btrfs_fs_info *info,
9393 struct cache_tree *extent_cache,
9394 struct extent_record *rec)
9396 struct btrfs_trans_handle *trans = NULL;
9398 struct btrfs_path path;
9399 struct cache_extent *cache;
9400 struct extent_backref *back, *tmp;
9404 if (rec->flag_block_full_backref)
9405 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9407 btrfs_init_path(&path);
9408 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9410 * Sometimes the backrefs themselves are so broken they don't
9411 * get attached to any meaningful rec, so first go back and
9412 * check any of our backrefs that we couldn't find and throw
9413 * them into the list if we find the backref so that
9414 * verify_backrefs can figure out what to do.
9416 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9421 /* step one, make sure all of the backrefs agree */
9422 ret = verify_backrefs(info, &path, rec);
9426 trans = btrfs_start_transaction(info->extent_root, 1);
9427 if (IS_ERR(trans)) {
9428 ret = PTR_ERR(trans);
9432 /* step two, delete all the existing records */
9433 ret = delete_extent_records(trans, info->extent_root, &path,
9439 /* was this block corrupt? If so, don't add references to it */
9440 cache = lookup_cache_extent(info->corrupt_blocks,
9441 rec->start, rec->max_size);
9447 /* step three, recreate all the refs we did find */
9448 rbtree_postorder_for_each_entry_safe(back, tmp,
9449 &rec->backref_tree, node) {
9451 * if we didn't find any references, don't create a
9454 if (!back->found_ref)
9457 rec->bad_full_backref = 0;
9458 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9466 int err = btrfs_commit_transaction(trans, info->extent_root);
9472 fprintf(stderr, "Repaired extent references for %llu\n",
9473 (unsigned long long)rec->start);
9475 btrfs_release_path(&path);
9479 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9480 struct extent_record *rec)
9482 struct btrfs_trans_handle *trans;
9483 struct btrfs_root *root = fs_info->extent_root;
9484 struct btrfs_path path;
9485 struct btrfs_extent_item *ei;
9486 struct btrfs_key key;
9490 key.objectid = rec->start;
9491 if (rec->metadata) {
9492 key.type = BTRFS_METADATA_ITEM_KEY;
9493 key.offset = rec->info_level;
9495 key.type = BTRFS_EXTENT_ITEM_KEY;
9496 key.offset = rec->max_size;
9499 trans = btrfs_start_transaction(root, 0);
9501 return PTR_ERR(trans);
9503 btrfs_init_path(&path);
9504 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9506 btrfs_release_path(&path);
9507 btrfs_commit_transaction(trans, root);
9510 fprintf(stderr, "Didn't find extent for %llu\n",
9511 (unsigned long long)rec->start);
9512 btrfs_release_path(&path);
9513 btrfs_commit_transaction(trans, root);
9517 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9518 struct btrfs_extent_item);
9519 flags = btrfs_extent_flags(path.nodes[0], ei);
9520 if (rec->flag_block_full_backref) {
9521 fprintf(stderr, "setting full backref on %llu\n",
9522 (unsigned long long)key.objectid);
9523 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9525 fprintf(stderr, "clearing full backref on %llu\n",
9526 (unsigned long long)key.objectid);
9527 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9529 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9530 btrfs_mark_buffer_dirty(path.nodes[0]);
9531 btrfs_release_path(&path);
9532 ret = btrfs_commit_transaction(trans, root);
9534 fprintf(stderr, "Repaired extent flags for %llu\n",
9535 (unsigned long long)rec->start);
9540 /* right now we only prune from the extent allocation tree */
9541 static int prune_one_block(struct btrfs_trans_handle *trans,
9542 struct btrfs_fs_info *info,
9543 struct btrfs_corrupt_block *corrupt)
9546 struct btrfs_path path;
9547 struct extent_buffer *eb;
9551 int level = corrupt->level + 1;
9553 btrfs_init_path(&path);
9555 /* we want to stop at the parent to our busted block */
9556 path.lowest_level = level;
9558 ret = btrfs_search_slot(trans, info->extent_root,
9559 &corrupt->key, &path, -1, 1);
9564 eb = path.nodes[level];
9571 * hopefully the search gave us the block we want to prune,
9572 * lets try that first
9574 slot = path.slots[level];
9575 found = btrfs_node_blockptr(eb, slot);
9576 if (found == corrupt->cache.start)
9579 nritems = btrfs_header_nritems(eb);
9581 /* the search failed, lets scan this node and hope we find it */
9582 for (slot = 0; slot < nritems; slot++) {
9583 found = btrfs_node_blockptr(eb, slot);
9584 if (found == corrupt->cache.start)
9588 * we couldn't find the bad block. TODO, search all the nodes for pointers
9591 if (eb == info->extent_root->node) {
9596 btrfs_release_path(&path);
9601 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9602 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9605 btrfs_release_path(&path);
9609 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9611 struct btrfs_trans_handle *trans = NULL;
9612 struct cache_extent *cache;
9613 struct btrfs_corrupt_block *corrupt;
9616 cache = search_cache_extent(info->corrupt_blocks, 0);
9620 trans = btrfs_start_transaction(info->extent_root, 1);
9622 return PTR_ERR(trans);
9624 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9625 prune_one_block(trans, info, corrupt);
9626 remove_cache_extent(info->corrupt_blocks, cache);
9629 return btrfs_commit_transaction(trans, info->extent_root);
9633 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9635 struct btrfs_block_group_cache *cache;
9640 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9641 &start, &end, EXTENT_DIRTY);
9644 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9649 cache = btrfs_lookup_first_block_group(fs_info, start);
9654 start = cache->key.objectid + cache->key.offset;
9658 static int check_extent_refs(struct btrfs_root *root,
9659 struct cache_tree *extent_cache)
9661 struct extent_record *rec;
9662 struct cache_extent *cache;
9668 * if we're doing a repair, we have to make sure
9669 * we don't allocate from the problem extents.
9670 * In the worst case, this will be all the
9673 cache = search_cache_extent(extent_cache, 0);
9675 rec = container_of(cache, struct extent_record, cache);
9676 set_extent_dirty(root->fs_info->excluded_extents,
9678 rec->start + rec->max_size - 1);
9679 cache = next_cache_extent(cache);
9682 /* pin down all the corrupted blocks too */
9683 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9685 set_extent_dirty(root->fs_info->excluded_extents,
9687 cache->start + cache->size - 1);
9688 cache = next_cache_extent(cache);
9690 prune_corrupt_blocks(root->fs_info);
9691 reset_cached_block_groups(root->fs_info);
9694 reset_cached_block_groups(root->fs_info);
9697 * We need to delete any duplicate entries we find first otherwise we
9698 * could mess up the extent tree when we have backrefs that actually
9699 * belong to a different extent item and not the weird duplicate one.
9701 while (repair && !list_empty(&duplicate_extents)) {
9702 rec = to_extent_record(duplicate_extents.next);
9703 list_del_init(&rec->list);
9705 /* Sometimes we can find a backref before we find an actual
9706 * extent, so we need to process it a little bit to see if there
9707 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9708 * if this is a backref screwup. If we need to delete stuff
9709 * process_duplicates() will return 0, otherwise it will return
9712 if (process_duplicates(extent_cache, rec))
9714 ret = delete_duplicate_records(root, rec);
9718 * delete_duplicate_records will return the number of entries
9719 * deleted, so if it's greater than 0 then we know we actually
9720 * did something and we need to remove.
9733 cache = search_cache_extent(extent_cache, 0);
9736 rec = container_of(cache, struct extent_record, cache);
9737 if (rec->num_duplicates) {
9738 fprintf(stderr, "extent item %llu has multiple extent "
9739 "items\n", (unsigned long long)rec->start);
9743 if (rec->refs != rec->extent_item_refs) {
9744 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9745 (unsigned long long)rec->start,
9746 (unsigned long long)rec->nr);
9747 fprintf(stderr, "extent item %llu, found %llu\n",
9748 (unsigned long long)rec->extent_item_refs,
9749 (unsigned long long)rec->refs);
9750 ret = record_orphan_data_extents(root->fs_info, rec);
9756 if (all_backpointers_checked(rec, 1)) {
9757 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9758 (unsigned long long)rec->start,
9759 (unsigned long long)rec->nr);
9763 if (!rec->owner_ref_checked) {
9764 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9765 (unsigned long long)rec->start,
9766 (unsigned long long)rec->nr);
9771 if (repair && fix) {
9772 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9778 if (rec->bad_full_backref) {
9779 fprintf(stderr, "bad full backref, on [%llu]\n",
9780 (unsigned long long)rec->start);
9782 ret = fixup_extent_flags(root->fs_info, rec);
9790 * Although it's not a extent ref's problem, we reuse this
9791 * routine for error reporting.
9792 * No repair function yet.
9794 if (rec->crossing_stripes) {
9796 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9797 rec->start, rec->start + rec->max_size);
9801 if (rec->wrong_chunk_type) {
9803 "bad extent [%llu, %llu), type mismatch with chunk\n",
9804 rec->start, rec->start + rec->max_size);
9808 remove_cache_extent(extent_cache, cache);
9809 free_all_extent_backrefs(rec);
9810 if (!init_extent_tree && repair && (!cur_err || fix))
9811 clear_extent_dirty(root->fs_info->excluded_extents,
9813 rec->start + rec->max_size - 1);
9818 if (ret && ret != -EAGAIN) {
9819 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9822 struct btrfs_trans_handle *trans;
9824 root = root->fs_info->extent_root;
9825 trans = btrfs_start_transaction(root, 1);
9826 if (IS_ERR(trans)) {
9827 ret = PTR_ERR(trans);
9831 ret = btrfs_fix_block_accounting(trans, root);
9834 ret = btrfs_commit_transaction(trans, root);
9843 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9847 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9848 stripe_size = length;
9849 stripe_size /= num_stripes;
9850 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9851 stripe_size = length * 2;
9852 stripe_size /= num_stripes;
9853 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9854 stripe_size = length;
9855 stripe_size /= (num_stripes - 1);
9856 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9857 stripe_size = length;
9858 stripe_size /= (num_stripes - 2);
9860 stripe_size = length;
9866 * Check the chunk with its block group/dev list ref:
9867 * Return 0 if all refs seems valid.
9868 * Return 1 if part of refs seems valid, need later check for rebuild ref
9869 * like missing block group and needs to search extent tree to rebuild them.
9870 * Return -1 if essential refs are missing and unable to rebuild.
9872 static int check_chunk_refs(struct chunk_record *chunk_rec,
9873 struct block_group_tree *block_group_cache,
9874 struct device_extent_tree *dev_extent_cache,
9877 struct cache_extent *block_group_item;
9878 struct block_group_record *block_group_rec;
9879 struct cache_extent *dev_extent_item;
9880 struct device_extent_record *dev_extent_rec;
9884 int metadump_v2 = 0;
9888 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9891 if (block_group_item) {
9892 block_group_rec = container_of(block_group_item,
9893 struct block_group_record,
9895 if (chunk_rec->length != block_group_rec->offset ||
9896 chunk_rec->offset != block_group_rec->objectid ||
9898 chunk_rec->type_flags != block_group_rec->flags)) {
9901 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9902 chunk_rec->objectid,
9907 chunk_rec->type_flags,
9908 block_group_rec->objectid,
9909 block_group_rec->type,
9910 block_group_rec->offset,
9911 block_group_rec->offset,
9912 block_group_rec->objectid,
9913 block_group_rec->flags);
9916 list_del_init(&block_group_rec->list);
9917 chunk_rec->bg_rec = block_group_rec;
9922 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9923 chunk_rec->objectid,
9928 chunk_rec->type_flags);
9935 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9936 chunk_rec->num_stripes);
9937 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9938 devid = chunk_rec->stripes[i].devid;
9939 offset = chunk_rec->stripes[i].offset;
9940 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9941 devid, offset, length);
9942 if (dev_extent_item) {
9943 dev_extent_rec = container_of(dev_extent_item,
9944 struct device_extent_record,
9946 if (dev_extent_rec->objectid != devid ||
9947 dev_extent_rec->offset != offset ||
9948 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9949 dev_extent_rec->length != length) {
9952 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9953 chunk_rec->objectid,
9956 chunk_rec->stripes[i].devid,
9957 chunk_rec->stripes[i].offset,
9958 dev_extent_rec->objectid,
9959 dev_extent_rec->offset,
9960 dev_extent_rec->length);
9963 list_move(&dev_extent_rec->chunk_list,
9964 &chunk_rec->dextents);
9969 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9970 chunk_rec->objectid,
9973 chunk_rec->stripes[i].devid,
9974 chunk_rec->stripes[i].offset);
9981 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9982 int check_chunks(struct cache_tree *chunk_cache,
9983 struct block_group_tree *block_group_cache,
9984 struct device_extent_tree *dev_extent_cache,
9985 struct list_head *good, struct list_head *bad,
9986 struct list_head *rebuild, int silent)
9988 struct cache_extent *chunk_item;
9989 struct chunk_record *chunk_rec;
9990 struct block_group_record *bg_rec;
9991 struct device_extent_record *dext_rec;
9995 chunk_item = first_cache_extent(chunk_cache);
9996 while (chunk_item) {
9997 chunk_rec = container_of(chunk_item, struct chunk_record,
9999 err = check_chunk_refs(chunk_rec, block_group_cache,
10000 dev_extent_cache, silent);
10003 if (err == 0 && good)
10004 list_add_tail(&chunk_rec->list, good);
10005 if (err > 0 && rebuild)
10006 list_add_tail(&chunk_rec->list, rebuild);
10007 if (err < 0 && bad)
10008 list_add_tail(&chunk_rec->list, bad);
10009 chunk_item = next_cache_extent(chunk_item);
10012 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10015 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10023 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10027 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10028 dext_rec->objectid,
10038 static int check_device_used(struct device_record *dev_rec,
10039 struct device_extent_tree *dext_cache)
10041 struct cache_extent *cache;
10042 struct device_extent_record *dev_extent_rec;
10043 u64 total_byte = 0;
10045 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10047 dev_extent_rec = container_of(cache,
10048 struct device_extent_record,
10050 if (dev_extent_rec->objectid != dev_rec->devid)
10053 list_del_init(&dev_extent_rec->device_list);
10054 total_byte += dev_extent_rec->length;
10055 cache = next_cache_extent(cache);
10058 if (total_byte != dev_rec->byte_used) {
10060 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10061 total_byte, dev_rec->byte_used, dev_rec->objectid,
10062 dev_rec->type, dev_rec->offset);
10069 /* check btrfs_dev_item -> btrfs_dev_extent */
10070 static int check_devices(struct rb_root *dev_cache,
10071 struct device_extent_tree *dev_extent_cache)
10073 struct rb_node *dev_node;
10074 struct device_record *dev_rec;
10075 struct device_extent_record *dext_rec;
10079 dev_node = rb_first(dev_cache);
10081 dev_rec = container_of(dev_node, struct device_record, node);
10082 err = check_device_used(dev_rec, dev_extent_cache);
10086 dev_node = rb_next(dev_node);
10088 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10091 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10092 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10099 static int add_root_item_to_list(struct list_head *head,
10100 u64 objectid, u64 bytenr, u64 last_snapshot,
10101 u8 level, u8 drop_level,
10102 struct btrfs_key *drop_key)
10105 struct root_item_record *ri_rec;
10106 ri_rec = malloc(sizeof(*ri_rec));
10109 ri_rec->bytenr = bytenr;
10110 ri_rec->objectid = objectid;
10111 ri_rec->level = level;
10112 ri_rec->drop_level = drop_level;
10113 ri_rec->last_snapshot = last_snapshot;
10115 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10116 list_add_tail(&ri_rec->list, head);
10121 static void free_root_item_list(struct list_head *list)
10123 struct root_item_record *ri_rec;
10125 while (!list_empty(list)) {
10126 ri_rec = list_first_entry(list, struct root_item_record,
10128 list_del_init(&ri_rec->list);
10133 static int deal_root_from_list(struct list_head *list,
10134 struct btrfs_root *root,
10135 struct block_info *bits,
10137 struct cache_tree *pending,
10138 struct cache_tree *seen,
10139 struct cache_tree *reada,
10140 struct cache_tree *nodes,
10141 struct cache_tree *extent_cache,
10142 struct cache_tree *chunk_cache,
10143 struct rb_root *dev_cache,
10144 struct block_group_tree *block_group_cache,
10145 struct device_extent_tree *dev_extent_cache)
10150 while (!list_empty(list)) {
10151 struct root_item_record *rec;
10152 struct extent_buffer *buf;
10153 rec = list_entry(list->next,
10154 struct root_item_record, list);
10156 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10157 if (!extent_buffer_uptodate(buf)) {
10158 free_extent_buffer(buf);
10162 ret = add_root_to_pending(buf, extent_cache, pending,
10163 seen, nodes, rec->objectid);
10167 * To rebuild extent tree, we need deal with snapshot
10168 * one by one, otherwise we deal with node firstly which
10169 * can maximize readahead.
10172 ret = run_next_block(root, bits, bits_nr, &last,
10173 pending, seen, reada, nodes,
10174 extent_cache, chunk_cache,
10175 dev_cache, block_group_cache,
10176 dev_extent_cache, rec);
10180 free_extent_buffer(buf);
10181 list_del(&rec->list);
10187 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10188 reada, nodes, extent_cache, chunk_cache,
10189 dev_cache, block_group_cache,
10190 dev_extent_cache, NULL);
10200 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10202 struct rb_root dev_cache;
10203 struct cache_tree chunk_cache;
10204 struct block_group_tree block_group_cache;
10205 struct device_extent_tree dev_extent_cache;
10206 struct cache_tree extent_cache;
10207 struct cache_tree seen;
10208 struct cache_tree pending;
10209 struct cache_tree reada;
10210 struct cache_tree nodes;
10211 struct extent_io_tree excluded_extents;
10212 struct cache_tree corrupt_blocks;
10213 struct btrfs_path path;
10214 struct btrfs_key key;
10215 struct btrfs_key found_key;
10217 struct block_info *bits;
10219 struct extent_buffer *leaf;
10221 struct btrfs_root_item ri;
10222 struct list_head dropping_trees;
10223 struct list_head normal_trees;
10224 struct btrfs_root *root1;
10225 struct btrfs_root *root;
10229 root = fs_info->fs_root;
10230 dev_cache = RB_ROOT;
10231 cache_tree_init(&chunk_cache);
10232 block_group_tree_init(&block_group_cache);
10233 device_extent_tree_init(&dev_extent_cache);
10235 cache_tree_init(&extent_cache);
10236 cache_tree_init(&seen);
10237 cache_tree_init(&pending);
10238 cache_tree_init(&nodes);
10239 cache_tree_init(&reada);
10240 cache_tree_init(&corrupt_blocks);
10241 extent_io_tree_init(&excluded_extents);
10242 INIT_LIST_HEAD(&dropping_trees);
10243 INIT_LIST_HEAD(&normal_trees);
10246 fs_info->excluded_extents = &excluded_extents;
10247 fs_info->fsck_extent_cache = &extent_cache;
10248 fs_info->free_extent_hook = free_extent_hook;
10249 fs_info->corrupt_blocks = &corrupt_blocks;
10253 bits = malloc(bits_nr * sizeof(struct block_info));
10259 if (ctx.progress_enabled) {
10260 ctx.tp = TASK_EXTENTS;
10261 task_start(ctx.info);
10265 root1 = fs_info->tree_root;
10266 level = btrfs_header_level(root1->node);
10267 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10268 root1->node->start, 0, level, 0, NULL);
10271 root1 = fs_info->chunk_root;
10272 level = btrfs_header_level(root1->node);
10273 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10274 root1->node->start, 0, level, 0, NULL);
10277 btrfs_init_path(&path);
10280 key.type = BTRFS_ROOT_ITEM_KEY;
10281 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10285 leaf = path.nodes[0];
10286 slot = path.slots[0];
10287 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10288 ret = btrfs_next_leaf(root, &path);
10291 leaf = path.nodes[0];
10292 slot = path.slots[0];
10294 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10295 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10296 unsigned long offset;
10299 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10300 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10301 last_snapshot = btrfs_root_last_snapshot(&ri);
10302 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10303 level = btrfs_root_level(&ri);
10304 ret = add_root_item_to_list(&normal_trees,
10305 found_key.objectid,
10306 btrfs_root_bytenr(&ri),
10307 last_snapshot, level,
10312 level = btrfs_root_level(&ri);
10313 objectid = found_key.objectid;
10314 btrfs_disk_key_to_cpu(&found_key,
10315 &ri.drop_progress);
10316 ret = add_root_item_to_list(&dropping_trees,
10318 btrfs_root_bytenr(&ri),
10319 last_snapshot, level,
10320 ri.drop_level, &found_key);
10327 btrfs_release_path(&path);
10330 * check_block can return -EAGAIN if it fixes something, please keep
10331 * this in mind when dealing with return values from these functions, if
10332 * we get -EAGAIN we want to fall through and restart the loop.
10334 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10335 &seen, &reada, &nodes, &extent_cache,
10336 &chunk_cache, &dev_cache, &block_group_cache,
10337 &dev_extent_cache);
10339 if (ret == -EAGAIN)
10343 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10344 &pending, &seen, &reada, &nodes,
10345 &extent_cache, &chunk_cache, &dev_cache,
10346 &block_group_cache, &dev_extent_cache);
10348 if (ret == -EAGAIN)
10353 ret = check_chunks(&chunk_cache, &block_group_cache,
10354 &dev_extent_cache, NULL, NULL, NULL, 0);
10356 if (ret == -EAGAIN)
10361 ret = check_extent_refs(root, &extent_cache);
10363 if (ret == -EAGAIN)
10368 ret = check_devices(&dev_cache, &dev_extent_cache);
10373 task_stop(ctx.info);
10375 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10376 extent_io_tree_cleanup(&excluded_extents);
10377 fs_info->fsck_extent_cache = NULL;
10378 fs_info->free_extent_hook = NULL;
10379 fs_info->corrupt_blocks = NULL;
10380 fs_info->excluded_extents = NULL;
10383 free_chunk_cache_tree(&chunk_cache);
10384 free_device_cache_tree(&dev_cache);
10385 free_block_group_tree(&block_group_cache);
10386 free_device_extent_tree(&dev_extent_cache);
10387 free_extent_cache_tree(&seen);
10388 free_extent_cache_tree(&pending);
10389 free_extent_cache_tree(&reada);
10390 free_extent_cache_tree(&nodes);
10391 free_root_item_list(&normal_trees);
10392 free_root_item_list(&dropping_trees);
10395 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10396 free_extent_cache_tree(&seen);
10397 free_extent_cache_tree(&pending);
10398 free_extent_cache_tree(&reada);
10399 free_extent_cache_tree(&nodes);
10400 free_chunk_cache_tree(&chunk_cache);
10401 free_block_group_tree(&block_group_cache);
10402 free_device_cache_tree(&dev_cache);
10403 free_device_extent_tree(&dev_extent_cache);
10404 free_extent_record_cache(&extent_cache);
10405 free_root_item_list(&normal_trees);
10406 free_root_item_list(&dropping_trees);
10407 extent_io_tree_cleanup(&excluded_extents);
10412 * Check backrefs of a tree block given by @bytenr or @eb.
10414 * @root: the root containing the @bytenr or @eb
10415 * @eb: tree block extent buffer, can be NULL
10416 * @bytenr: bytenr of the tree block to search
10417 * @level: tree level of the tree block
10418 * @owner: owner of the tree block
10420 * Return >0 for any error found and output error message
10421 * Return 0 for no error found
10423 static int check_tree_block_ref(struct btrfs_root *root,
10424 struct extent_buffer *eb, u64 bytenr,
10425 int level, u64 owner)
10427 struct btrfs_key key;
10428 struct btrfs_root *extent_root = root->fs_info->extent_root;
10429 struct btrfs_path path;
10430 struct btrfs_extent_item *ei;
10431 struct btrfs_extent_inline_ref *iref;
10432 struct extent_buffer *leaf;
10438 u32 nodesize = root->fs_info->nodesize;
10441 int tree_reloc_root = 0;
10446 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10447 btrfs_header_bytenr(root->node) == bytenr)
10448 tree_reloc_root = 1;
10450 btrfs_init_path(&path);
10451 key.objectid = bytenr;
10452 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10453 key.type = BTRFS_METADATA_ITEM_KEY;
10455 key.type = BTRFS_EXTENT_ITEM_KEY;
10456 key.offset = (u64)-1;
10458 /* Search for the backref in extent tree */
10459 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10461 err |= BACKREF_MISSING;
10464 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10466 err |= BACKREF_MISSING;
10470 leaf = path.nodes[0];
10471 slot = path.slots[0];
10472 btrfs_item_key_to_cpu(leaf, &key, slot);
10474 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10476 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10477 skinny_level = (int)key.offset;
10478 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10480 struct btrfs_tree_block_info *info;
10482 info = (struct btrfs_tree_block_info *)(ei + 1);
10483 skinny_level = btrfs_tree_block_level(leaf, info);
10484 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10491 if (!(btrfs_extent_flags(leaf, ei) &
10492 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10494 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10495 key.objectid, nodesize,
10496 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10497 err = BACKREF_MISMATCH;
10499 header_gen = btrfs_header_generation(eb);
10500 extent_gen = btrfs_extent_generation(leaf, ei);
10501 if (header_gen != extent_gen) {
10503 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10504 key.objectid, nodesize, header_gen,
10506 err = BACKREF_MISMATCH;
10508 if (level != skinny_level) {
10510 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10511 key.objectid, nodesize, level, skinny_level);
10512 err = BACKREF_MISMATCH;
10514 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10516 "extent[%llu %u] is referred by other roots than %llu",
10517 key.objectid, nodesize, root->objectid);
10518 err = BACKREF_MISMATCH;
10523 * Iterate the extent/metadata item to find the exact backref
10525 item_size = btrfs_item_size_nr(leaf, slot);
10526 ptr = (unsigned long)iref;
10527 end = (unsigned long)ei + item_size;
10528 while (ptr < end) {
10529 iref = (struct btrfs_extent_inline_ref *)ptr;
10530 type = btrfs_extent_inline_ref_type(leaf, iref);
10531 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10533 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10534 (offset == root->objectid || offset == owner)) {
10536 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10538 * Backref of tree reloc root points to itself, no need
10539 * to check backref any more.
10541 if (tree_reloc_root)
10544 /* Check if the backref points to valid referencer */
10545 found_ref = !check_tree_block_ref(root, NULL,
10546 offset, level + 1, owner);
10551 ptr += btrfs_extent_inline_ref_size(type);
10555 * Inlined extent item doesn't have what we need, check
10556 * TREE_BLOCK_REF_KEY
10559 btrfs_release_path(&path);
10560 key.objectid = bytenr;
10561 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10562 key.offset = root->objectid;
10564 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10569 err |= BACKREF_MISSING;
10571 btrfs_release_path(&path);
10572 if (eb && (err & BACKREF_MISSING))
10573 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10574 bytenr, nodesize, owner, level);
10579 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10581 * Return >0 any error found and output error message
10582 * Return 0 for no error found
10584 static int check_extent_data_item(struct btrfs_root *root,
10585 struct extent_buffer *eb, int slot)
10587 struct btrfs_file_extent_item *fi;
10588 struct btrfs_path path;
10589 struct btrfs_root *extent_root = root->fs_info->extent_root;
10590 struct btrfs_key fi_key;
10591 struct btrfs_key dbref_key;
10592 struct extent_buffer *leaf;
10593 struct btrfs_extent_item *ei;
10594 struct btrfs_extent_inline_ref *iref;
10595 struct btrfs_extent_data_ref *dref;
10598 u64 disk_num_bytes;
10599 u64 extent_num_bytes;
10606 int found_dbackref = 0;
10610 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10611 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10613 /* Nothing to check for hole and inline data extents */
10614 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10615 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10618 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10619 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10620 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10622 /* Check unaligned disk_num_bytes and num_bytes */
10623 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10625 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10626 fi_key.objectid, fi_key.offset, disk_num_bytes,
10627 root->fs_info->sectorsize);
10628 err |= BYTES_UNALIGNED;
10630 data_bytes_allocated += disk_num_bytes;
10632 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10634 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10635 fi_key.objectid, fi_key.offset, extent_num_bytes,
10636 root->fs_info->sectorsize);
10637 err |= BYTES_UNALIGNED;
10639 data_bytes_referenced += extent_num_bytes;
10641 owner = btrfs_header_owner(eb);
10643 /* Check the extent item of the file extent in extent tree */
10644 btrfs_init_path(&path);
10645 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10646 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10647 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10649 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10653 leaf = path.nodes[0];
10654 slot = path.slots[0];
10655 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10657 extent_flags = btrfs_extent_flags(leaf, ei);
10659 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10661 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10662 disk_bytenr, disk_num_bytes,
10663 BTRFS_EXTENT_FLAG_DATA);
10664 err |= BACKREF_MISMATCH;
10667 /* Check data backref inside that extent item */
10668 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10669 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10670 ptr = (unsigned long)iref;
10671 end = (unsigned long)ei + item_size;
10672 while (ptr < end) {
10673 iref = (struct btrfs_extent_inline_ref *)ptr;
10674 type = btrfs_extent_inline_ref_type(leaf, iref);
10675 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10677 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10678 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10679 if (ref_root == owner || ref_root == root->objectid)
10680 found_dbackref = 1;
10681 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10682 found_dbackref = !check_tree_block_ref(root, NULL,
10683 btrfs_extent_inline_ref_offset(leaf, iref),
10687 if (found_dbackref)
10689 ptr += btrfs_extent_inline_ref_size(type);
10692 if (!found_dbackref) {
10693 btrfs_release_path(&path);
10695 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10696 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10697 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10698 dbref_key.offset = hash_extent_data_ref(root->objectid,
10699 fi_key.objectid, fi_key.offset);
10701 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10702 &dbref_key, &path, 0, 0);
10704 found_dbackref = 1;
10708 btrfs_release_path(&path);
10711 * Neither inlined nor EXTENT_DATA_REF found, try
10712 * SHARED_DATA_REF as last chance.
10714 dbref_key.objectid = disk_bytenr;
10715 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10716 dbref_key.offset = eb->start;
10718 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10719 &dbref_key, &path, 0, 0);
10721 found_dbackref = 1;
10727 if (!found_dbackref)
10728 err |= BACKREF_MISSING;
10729 btrfs_release_path(&path);
10730 if (err & BACKREF_MISSING) {
10731 error("data extent[%llu %llu] backref lost",
10732 disk_bytenr, disk_num_bytes);
10738 * Get real tree block level for the case like shared block
10739 * Return >= 0 as tree level
10740 * Return <0 for error
10742 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10744 struct extent_buffer *eb;
10745 struct btrfs_path path;
10746 struct btrfs_key key;
10747 struct btrfs_extent_item *ei;
10754 /* Search extent tree for extent generation and level */
10755 key.objectid = bytenr;
10756 key.type = BTRFS_METADATA_ITEM_KEY;
10757 key.offset = (u64)-1;
10759 btrfs_init_path(&path);
10760 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10763 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10771 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10772 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10773 struct btrfs_extent_item);
10774 flags = btrfs_extent_flags(path.nodes[0], ei);
10775 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10780 /* Get transid for later read_tree_block() check */
10781 transid = btrfs_extent_generation(path.nodes[0], ei);
10783 /* Get backref level as one source */
10784 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10785 backref_level = key.offset;
10787 struct btrfs_tree_block_info *info;
10789 info = (struct btrfs_tree_block_info *)(ei + 1);
10790 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10792 btrfs_release_path(&path);
10794 /* Get level from tree block as an alternative source */
10795 eb = read_tree_block(fs_info, bytenr, transid);
10796 if (!extent_buffer_uptodate(eb)) {
10797 free_extent_buffer(eb);
10800 header_level = btrfs_header_level(eb);
10801 free_extent_buffer(eb);
10803 if (header_level != backref_level)
10805 return header_level;
10808 btrfs_release_path(&path);
10813 * Check if a tree block backref is valid (points to a valid tree block)
10814 * if level == -1, level will be resolved
10815 * Return >0 for any error found and print error message
10817 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10818 u64 bytenr, int level)
10820 struct btrfs_root *root;
10821 struct btrfs_key key;
10822 struct btrfs_path path;
10823 struct extent_buffer *eb;
10824 struct extent_buffer *node;
10825 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10829 /* Query level for level == -1 special case */
10831 level = query_tree_block_level(fs_info, bytenr);
10833 err |= REFERENCER_MISSING;
10837 key.objectid = root_id;
10838 key.type = BTRFS_ROOT_ITEM_KEY;
10839 key.offset = (u64)-1;
10841 root = btrfs_read_fs_root(fs_info, &key);
10842 if (IS_ERR(root)) {
10843 err |= REFERENCER_MISSING;
10847 /* Read out the tree block to get item/node key */
10848 eb = read_tree_block(fs_info, bytenr, 0);
10849 if (!extent_buffer_uptodate(eb)) {
10850 err |= REFERENCER_MISSING;
10851 free_extent_buffer(eb);
10855 /* Empty tree, no need to check key */
10856 if (!btrfs_header_nritems(eb) && !level) {
10857 free_extent_buffer(eb);
10862 btrfs_node_key_to_cpu(eb, &key, 0);
10864 btrfs_item_key_to_cpu(eb, &key, 0);
10866 free_extent_buffer(eb);
10868 btrfs_init_path(&path);
10869 path.lowest_level = level;
10870 /* Search with the first key, to ensure we can reach it */
10871 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10873 err |= REFERENCER_MISSING;
10877 node = path.nodes[level];
10878 if (btrfs_header_bytenr(node) != bytenr) {
10880 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10881 bytenr, nodesize, bytenr,
10882 btrfs_header_bytenr(node));
10883 err |= REFERENCER_MISMATCH;
10885 if (btrfs_header_level(node) != level) {
10887 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10888 bytenr, nodesize, level,
10889 btrfs_header_level(node));
10890 err |= REFERENCER_MISMATCH;
10894 btrfs_release_path(&path);
10896 if (err & REFERENCER_MISSING) {
10898 error("extent [%llu %d] lost referencer (owner: %llu)",
10899 bytenr, nodesize, root_id);
10902 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10903 bytenr, nodesize, root_id, level);
10910 * Check if tree block @eb is tree reloc root.
10911 * Return 0 if it's not or any problem happens
10912 * Return 1 if it's a tree reloc root
10914 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10915 struct extent_buffer *eb)
10917 struct btrfs_root *tree_reloc_root;
10918 struct btrfs_key key;
10919 u64 bytenr = btrfs_header_bytenr(eb);
10920 u64 owner = btrfs_header_owner(eb);
10923 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10924 key.offset = owner;
10925 key.type = BTRFS_ROOT_ITEM_KEY;
10927 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10928 if (IS_ERR(tree_reloc_root))
10931 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10933 btrfs_free_fs_root(tree_reloc_root);
10938 * Check referencer for shared block backref
10939 * If level == -1, this function will resolve the level.
10941 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10942 u64 parent, u64 bytenr, int level)
10944 struct extent_buffer *eb;
10946 int found_parent = 0;
10949 eb = read_tree_block(fs_info, parent, 0);
10950 if (!extent_buffer_uptodate(eb))
10954 level = query_tree_block_level(fs_info, bytenr);
10958 /* It's possible it's a tree reloc root */
10959 if (parent == bytenr) {
10960 if (is_tree_reloc_root(fs_info, eb))
10965 if (level + 1 != btrfs_header_level(eb))
10968 nr = btrfs_header_nritems(eb);
10969 for (i = 0; i < nr; i++) {
10970 if (bytenr == btrfs_node_blockptr(eb, i)) {
10976 free_extent_buffer(eb);
10977 if (!found_parent) {
10979 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10980 bytenr, fs_info->nodesize, parent, level);
10981 return REFERENCER_MISSING;
10987 * Check referencer for normal (inlined) data ref
10988 * If len == 0, it will be resolved by searching in extent tree
10990 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10991 u64 root_id, u64 objectid, u64 offset,
10992 u64 bytenr, u64 len, u32 count)
10994 struct btrfs_root *root;
10995 struct btrfs_root *extent_root = fs_info->extent_root;
10996 struct btrfs_key key;
10997 struct btrfs_path path;
10998 struct extent_buffer *leaf;
10999 struct btrfs_file_extent_item *fi;
11000 u32 found_count = 0;
11005 key.objectid = bytenr;
11006 key.type = BTRFS_EXTENT_ITEM_KEY;
11007 key.offset = (u64)-1;
11009 btrfs_init_path(&path);
11010 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11013 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11016 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11017 if (key.objectid != bytenr ||
11018 key.type != BTRFS_EXTENT_ITEM_KEY)
11021 btrfs_release_path(&path);
11023 key.objectid = root_id;
11024 key.type = BTRFS_ROOT_ITEM_KEY;
11025 key.offset = (u64)-1;
11026 btrfs_init_path(&path);
11028 root = btrfs_read_fs_root(fs_info, &key);
11032 key.objectid = objectid;
11033 key.type = BTRFS_EXTENT_DATA_KEY;
11035 * It can be nasty as data backref offset is
11036 * file offset - file extent offset, which is smaller or
11037 * equal to original backref offset. The only special case is
11038 * overflow. So we need to special check and do further search.
11040 key.offset = offset & (1ULL << 63) ? 0 : offset;
11042 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11047 * Search afterwards to get correct one
11048 * NOTE: As we must do a comprehensive check on the data backref to
11049 * make sure the dref count also matches, we must iterate all file
11050 * extents for that inode.
11053 leaf = path.nodes[0];
11054 slot = path.slots[0];
11056 if (slot >= btrfs_header_nritems(leaf))
11058 btrfs_item_key_to_cpu(leaf, &key, slot);
11059 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11061 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11063 * Except normal disk bytenr and disk num bytes, we still
11064 * need to do extra check on dbackref offset as
11065 * dbackref offset = file_offset - file_extent_offset
11067 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11068 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11069 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11074 ret = btrfs_next_item(root, &path);
11079 btrfs_release_path(&path);
11080 if (found_count != count) {
11082 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11083 bytenr, len, root_id, objectid, offset, count, found_count);
11084 return REFERENCER_MISSING;
11090 * Check if the referencer of a shared data backref exists
11092 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11093 u64 parent, u64 bytenr)
11095 struct extent_buffer *eb;
11096 struct btrfs_key key;
11097 struct btrfs_file_extent_item *fi;
11099 int found_parent = 0;
11102 eb = read_tree_block(fs_info, parent, 0);
11103 if (!extent_buffer_uptodate(eb))
11106 nr = btrfs_header_nritems(eb);
11107 for (i = 0; i < nr; i++) {
11108 btrfs_item_key_to_cpu(eb, &key, i);
11109 if (key.type != BTRFS_EXTENT_DATA_KEY)
11112 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11113 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11116 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11123 free_extent_buffer(eb);
11124 if (!found_parent) {
11125 error("shared extent %llu referencer lost (parent: %llu)",
11127 return REFERENCER_MISSING;
11133 * This function will check a given extent item, including its backref and
11134 * itself (like crossing stripe boundary and type)
11136 * Since we don't use extent_record anymore, introduce new error bit
11138 static int check_extent_item(struct btrfs_fs_info *fs_info,
11139 struct extent_buffer *eb, int slot)
11141 struct btrfs_extent_item *ei;
11142 struct btrfs_extent_inline_ref *iref;
11143 struct btrfs_extent_data_ref *dref;
11147 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11148 u32 item_size = btrfs_item_size_nr(eb, slot);
11153 struct btrfs_key key;
11157 btrfs_item_key_to_cpu(eb, &key, slot);
11158 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11159 bytes_used += key.offset;
11161 bytes_used += nodesize;
11163 if (item_size < sizeof(*ei)) {
11165 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11166 * old thing when on disk format is still un-determined.
11167 * No need to care about it anymore
11169 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11173 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11174 flags = btrfs_extent_flags(eb, ei);
11176 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11178 if (metadata && check_crossing_stripes(global_info, key.objectid,
11180 error("bad metadata [%llu, %llu) crossing stripe boundary",
11181 key.objectid, key.objectid + nodesize);
11182 err |= CROSSING_STRIPE_BOUNDARY;
11185 ptr = (unsigned long)(ei + 1);
11187 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11188 /* Old EXTENT_ITEM metadata */
11189 struct btrfs_tree_block_info *info;
11191 info = (struct btrfs_tree_block_info *)ptr;
11192 level = btrfs_tree_block_level(eb, info);
11193 ptr += sizeof(struct btrfs_tree_block_info);
11195 /* New METADATA_ITEM */
11196 level = key.offset;
11198 end = (unsigned long)ei + item_size;
11201 /* Reached extent item end normally */
11205 /* Beyond extent item end, wrong item size */
11207 err |= ITEM_SIZE_MISMATCH;
11208 error("extent item at bytenr %llu slot %d has wrong size",
11213 /* Now check every backref in this extent item */
11214 iref = (struct btrfs_extent_inline_ref *)ptr;
11215 type = btrfs_extent_inline_ref_type(eb, iref);
11216 offset = btrfs_extent_inline_ref_offset(eb, iref);
11218 case BTRFS_TREE_BLOCK_REF_KEY:
11219 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11223 case BTRFS_SHARED_BLOCK_REF_KEY:
11224 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11228 case BTRFS_EXTENT_DATA_REF_KEY:
11229 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11230 ret = check_extent_data_backref(fs_info,
11231 btrfs_extent_data_ref_root(eb, dref),
11232 btrfs_extent_data_ref_objectid(eb, dref),
11233 btrfs_extent_data_ref_offset(eb, dref),
11234 key.objectid, key.offset,
11235 btrfs_extent_data_ref_count(eb, dref));
11238 case BTRFS_SHARED_DATA_REF_KEY:
11239 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11243 error("extent[%llu %d %llu] has unknown ref type: %d",
11244 key.objectid, key.type, key.offset, type);
11245 err |= UNKNOWN_TYPE;
11249 ptr += btrfs_extent_inline_ref_size(type);
11257 * Check if a dev extent item is referred correctly by its chunk
11259 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11260 struct extent_buffer *eb, int slot)
11262 struct btrfs_root *chunk_root = fs_info->chunk_root;
11263 struct btrfs_dev_extent *ptr;
11264 struct btrfs_path path;
11265 struct btrfs_key chunk_key;
11266 struct btrfs_key devext_key;
11267 struct btrfs_chunk *chunk;
11268 struct extent_buffer *l;
11272 int found_chunk = 0;
11275 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11276 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11277 length = btrfs_dev_extent_length(eb, ptr);
11279 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11280 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11281 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11283 btrfs_init_path(&path);
11284 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11289 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11290 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11295 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11298 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11299 for (i = 0; i < num_stripes; i++) {
11300 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11301 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11303 if (devid == devext_key.objectid &&
11304 offset == devext_key.offset) {
11310 btrfs_release_path(&path);
11311 if (!found_chunk) {
11313 "device extent[%llu, %llu, %llu] did not find the related chunk",
11314 devext_key.objectid, devext_key.offset, length);
11315 return REFERENCER_MISSING;
11321 * Check if the used space is correct with the dev item
11323 static int check_dev_item(struct btrfs_fs_info *fs_info,
11324 struct extent_buffer *eb, int slot)
11326 struct btrfs_root *dev_root = fs_info->dev_root;
11327 struct btrfs_dev_item *dev_item;
11328 struct btrfs_path path;
11329 struct btrfs_key key;
11330 struct btrfs_dev_extent *ptr;
11336 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11337 dev_id = btrfs_device_id(eb, dev_item);
11338 used = btrfs_device_bytes_used(eb, dev_item);
11340 key.objectid = dev_id;
11341 key.type = BTRFS_DEV_EXTENT_KEY;
11344 btrfs_init_path(&path);
11345 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11347 btrfs_item_key_to_cpu(eb, &key, slot);
11348 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11349 key.objectid, key.type, key.offset);
11350 btrfs_release_path(&path);
11351 return REFERENCER_MISSING;
11354 /* Iterate dev_extents to calculate the used space of a device */
11356 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11359 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11360 if (key.objectid > dev_id)
11362 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11365 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11366 struct btrfs_dev_extent);
11367 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11369 ret = btrfs_next_item(dev_root, &path);
11373 btrfs_release_path(&path);
11375 if (used != total) {
11376 btrfs_item_key_to_cpu(eb, &key, slot);
11378 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11379 total, used, BTRFS_ROOT_TREE_OBJECTID,
11380 BTRFS_DEV_EXTENT_KEY, dev_id);
11381 return ACCOUNTING_MISMATCH;
11387 * Check a block group item with its referener (chunk) and its used space
11388 * with extent/metadata item
11390 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11391 struct extent_buffer *eb, int slot)
11393 struct btrfs_root *extent_root = fs_info->extent_root;
11394 struct btrfs_root *chunk_root = fs_info->chunk_root;
11395 struct btrfs_block_group_item *bi;
11396 struct btrfs_block_group_item bg_item;
11397 struct btrfs_path path;
11398 struct btrfs_key bg_key;
11399 struct btrfs_key chunk_key;
11400 struct btrfs_key extent_key;
11401 struct btrfs_chunk *chunk;
11402 struct extent_buffer *leaf;
11403 struct btrfs_extent_item *ei;
11404 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11412 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11413 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11414 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11415 used = btrfs_block_group_used(&bg_item);
11416 bg_flags = btrfs_block_group_flags(&bg_item);
11418 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11419 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11420 chunk_key.offset = bg_key.objectid;
11422 btrfs_init_path(&path);
11423 /* Search for the referencer chunk */
11424 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11427 "block group[%llu %llu] did not find the related chunk item",
11428 bg_key.objectid, bg_key.offset);
11429 err |= REFERENCER_MISSING;
11431 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11432 struct btrfs_chunk);
11433 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11436 "block group[%llu %llu] related chunk item length does not match",
11437 bg_key.objectid, bg_key.offset);
11438 err |= REFERENCER_MISMATCH;
11441 btrfs_release_path(&path);
11443 /* Search from the block group bytenr */
11444 extent_key.objectid = bg_key.objectid;
11445 extent_key.type = 0;
11446 extent_key.offset = 0;
11448 btrfs_init_path(&path);
11449 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11453 /* Iterate extent tree to account used space */
11455 leaf = path.nodes[0];
11457 /* Search slot can point to the last item beyond leaf nritems */
11458 if (path.slots[0] >= btrfs_header_nritems(leaf))
11461 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11462 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11465 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11466 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11468 if (extent_key.objectid < bg_key.objectid)
11471 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11474 total += extent_key.offset;
11476 ei = btrfs_item_ptr(leaf, path.slots[0],
11477 struct btrfs_extent_item);
11478 flags = btrfs_extent_flags(leaf, ei);
11479 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11480 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11482 "bad extent[%llu, %llu) type mismatch with chunk",
11483 extent_key.objectid,
11484 extent_key.objectid + extent_key.offset);
11485 err |= CHUNK_TYPE_MISMATCH;
11487 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11488 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11489 BTRFS_BLOCK_GROUP_METADATA))) {
11491 "bad extent[%llu, %llu) type mismatch with chunk",
11492 extent_key.objectid,
11493 extent_key.objectid + nodesize);
11494 err |= CHUNK_TYPE_MISMATCH;
11498 ret = btrfs_next_item(extent_root, &path);
11504 btrfs_release_path(&path);
11506 if (total != used) {
11508 "block group[%llu %llu] used %llu but extent items used %llu",
11509 bg_key.objectid, bg_key.offset, used, total);
11510 err |= ACCOUNTING_MISMATCH;
11516 * Check a chunk item.
11517 * Including checking all referred dev_extents and block group
11519 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11520 struct extent_buffer *eb, int slot)
11522 struct btrfs_root *extent_root = fs_info->extent_root;
11523 struct btrfs_root *dev_root = fs_info->dev_root;
11524 struct btrfs_path path;
11525 struct btrfs_key chunk_key;
11526 struct btrfs_key bg_key;
11527 struct btrfs_key devext_key;
11528 struct btrfs_chunk *chunk;
11529 struct extent_buffer *leaf;
11530 struct btrfs_block_group_item *bi;
11531 struct btrfs_block_group_item bg_item;
11532 struct btrfs_dev_extent *ptr;
11544 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11545 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11546 length = btrfs_chunk_length(eb, chunk);
11547 chunk_end = chunk_key.offset + length;
11548 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11551 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11553 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11556 type = btrfs_chunk_type(eb, chunk);
11558 bg_key.objectid = chunk_key.offset;
11559 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11560 bg_key.offset = length;
11562 btrfs_init_path(&path);
11563 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11566 "chunk[%llu %llu) did not find the related block group item",
11567 chunk_key.offset, chunk_end);
11568 err |= REFERENCER_MISSING;
11570 leaf = path.nodes[0];
11571 bi = btrfs_item_ptr(leaf, path.slots[0],
11572 struct btrfs_block_group_item);
11573 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11575 if (btrfs_block_group_flags(&bg_item) != type) {
11577 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11578 chunk_key.offset, chunk_end, type,
11579 btrfs_block_group_flags(&bg_item));
11580 err |= REFERENCER_MISSING;
11584 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11585 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11586 for (i = 0; i < num_stripes; i++) {
11587 btrfs_release_path(&path);
11588 btrfs_init_path(&path);
11589 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11590 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11591 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11593 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11596 goto not_match_dev;
11598 leaf = path.nodes[0];
11599 ptr = btrfs_item_ptr(leaf, path.slots[0],
11600 struct btrfs_dev_extent);
11601 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11602 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11603 if (objectid != chunk_key.objectid ||
11604 offset != chunk_key.offset ||
11605 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11606 goto not_match_dev;
11609 err |= BACKREF_MISSING;
11611 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11612 chunk_key.objectid, chunk_end, i);
11615 btrfs_release_path(&path);
11621 * Main entry function to check known items and update related accounting info
11623 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11625 struct btrfs_fs_info *fs_info = root->fs_info;
11626 struct btrfs_key key;
11629 struct btrfs_extent_data_ref *dref;
11634 btrfs_item_key_to_cpu(eb, &key, slot);
11638 case BTRFS_EXTENT_DATA_KEY:
11639 ret = check_extent_data_item(root, eb, slot);
11642 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11643 ret = check_block_group_item(fs_info, eb, slot);
11646 case BTRFS_DEV_ITEM_KEY:
11647 ret = check_dev_item(fs_info, eb, slot);
11650 case BTRFS_CHUNK_ITEM_KEY:
11651 ret = check_chunk_item(fs_info, eb, slot);
11654 case BTRFS_DEV_EXTENT_KEY:
11655 ret = check_dev_extent_item(fs_info, eb, slot);
11658 case BTRFS_EXTENT_ITEM_KEY:
11659 case BTRFS_METADATA_ITEM_KEY:
11660 ret = check_extent_item(fs_info, eb, slot);
11663 case BTRFS_EXTENT_CSUM_KEY:
11664 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11666 case BTRFS_TREE_BLOCK_REF_KEY:
11667 ret = check_tree_block_backref(fs_info, key.offset,
11671 case BTRFS_EXTENT_DATA_REF_KEY:
11672 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11673 ret = check_extent_data_backref(fs_info,
11674 btrfs_extent_data_ref_root(eb, dref),
11675 btrfs_extent_data_ref_objectid(eb, dref),
11676 btrfs_extent_data_ref_offset(eb, dref),
11678 btrfs_extent_data_ref_count(eb, dref));
11681 case BTRFS_SHARED_BLOCK_REF_KEY:
11682 ret = check_shared_block_backref(fs_info, key.offset,
11686 case BTRFS_SHARED_DATA_REF_KEY:
11687 ret = check_shared_data_backref(fs_info, key.offset,
11695 if (++slot < btrfs_header_nritems(eb))
11702 * Helper function for later fs/subvol tree check. To determine if a tree
11703 * block should be checked.
11704 * This function will ensure only the direct referencer with lowest rootid to
11705 * check a fs/subvolume tree block.
11707 * Backref check at extent tree would detect errors like missing subvolume
11708 * tree, so we can do aggressive check to reduce duplicated checks.
11710 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11712 struct btrfs_root *extent_root = root->fs_info->extent_root;
11713 struct btrfs_key key;
11714 struct btrfs_path path;
11715 struct extent_buffer *leaf;
11717 struct btrfs_extent_item *ei;
11723 struct btrfs_extent_inline_ref *iref;
11726 btrfs_init_path(&path);
11727 key.objectid = btrfs_header_bytenr(eb);
11728 key.type = BTRFS_METADATA_ITEM_KEY;
11729 key.offset = (u64)-1;
11732 * Any failure in backref resolving means we can't determine
11733 * whom the tree block belongs to.
11734 * So in that case, we need to check that tree block
11736 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11740 ret = btrfs_previous_extent_item(extent_root, &path,
11741 btrfs_header_bytenr(eb));
11745 leaf = path.nodes[0];
11746 slot = path.slots[0];
11747 btrfs_item_key_to_cpu(leaf, &key, slot);
11748 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11750 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11751 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11753 struct btrfs_tree_block_info *info;
11755 info = (struct btrfs_tree_block_info *)(ei + 1);
11756 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11759 item_size = btrfs_item_size_nr(leaf, slot);
11760 ptr = (unsigned long)iref;
11761 end = (unsigned long)ei + item_size;
11762 while (ptr < end) {
11763 iref = (struct btrfs_extent_inline_ref *)ptr;
11764 type = btrfs_extent_inline_ref_type(leaf, iref);
11765 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11768 * We only check the tree block if current root is
11769 * the lowest referencer of it.
11771 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11772 offset < root->objectid) {
11773 btrfs_release_path(&path);
11777 ptr += btrfs_extent_inline_ref_size(type);
11780 * Normally we should also check keyed tree block ref, but that may be
11781 * very time consuming. Inlined ref should already make us skip a lot
11782 * of refs now. So skip search keyed tree block ref.
11786 btrfs_release_path(&path);
11791 * Traversal function for tree block. We will do:
11792 * 1) Skip shared fs/subvolume tree blocks
11793 * 2) Update related bytes accounting
11794 * 3) Pre-order traversal
11796 static int traverse_tree_block(struct btrfs_root *root,
11797 struct extent_buffer *node)
11799 struct extent_buffer *eb;
11800 struct btrfs_key key;
11801 struct btrfs_key drop_key;
11809 * Skip shared fs/subvolume tree block, in that case they will
11810 * be checked by referencer with lowest rootid
11812 if (is_fstree(root->objectid) && !should_check(root, node))
11815 /* Update bytes accounting */
11816 total_btree_bytes += node->len;
11817 if (fs_root_objectid(btrfs_header_owner(node)))
11818 total_fs_tree_bytes += node->len;
11819 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11820 total_extent_tree_bytes += node->len;
11822 /* pre-order tranversal, check itself first */
11823 level = btrfs_header_level(node);
11824 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11825 btrfs_header_level(node),
11826 btrfs_header_owner(node));
11830 "check %s failed root %llu bytenr %llu level %d, force continue check",
11831 level ? "node":"leaf", root->objectid,
11832 btrfs_header_bytenr(node), btrfs_header_level(node));
11835 btree_space_waste += btrfs_leaf_free_space(root, node);
11836 ret = check_leaf_items(root, node);
11841 nr = btrfs_header_nritems(node);
11842 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11843 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11844 sizeof(struct btrfs_key_ptr);
11846 /* Then check all its children */
11847 for (i = 0; i < nr; i++) {
11848 u64 blocknr = btrfs_node_blockptr(node, i);
11850 btrfs_node_key_to_cpu(node, &key, i);
11851 if (level == root->root_item.drop_level &&
11852 is_dropped_key(&key, &drop_key))
11856 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11857 * to call the function itself.
11859 eb = read_tree_block(root->fs_info, blocknr, 0);
11860 if (extent_buffer_uptodate(eb)) {
11861 ret = traverse_tree_block(root, eb);
11864 free_extent_buffer(eb);
11871 * Low memory usage version check_chunks_and_extents.
11873 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11875 struct btrfs_path path;
11876 struct btrfs_key key;
11877 struct btrfs_root *root1;
11878 struct btrfs_root *root;
11879 struct btrfs_root *cur_root;
11883 root = fs_info->fs_root;
11885 root1 = root->fs_info->chunk_root;
11886 ret = traverse_tree_block(root1, root1->node);
11889 root1 = root->fs_info->tree_root;
11890 ret = traverse_tree_block(root1, root1->node);
11893 btrfs_init_path(&path);
11894 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11896 key.type = BTRFS_ROOT_ITEM_KEY;
11898 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11900 error("cannot find extent treet in tree_root");
11905 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11906 if (key.type != BTRFS_ROOT_ITEM_KEY)
11908 key.offset = (u64)-1;
11910 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11911 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11914 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11915 if (IS_ERR(cur_root) || !cur_root) {
11916 error("failed to read tree: %lld", key.objectid);
11920 ret = traverse_tree_block(cur_root, cur_root->node);
11923 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11924 btrfs_free_fs_root(cur_root);
11926 ret = btrfs_next_item(root1, &path);
11932 btrfs_release_path(&path);
11936 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11940 if (!ctx.progress_enabled)
11941 fprintf(stderr, "checking extents\n");
11942 if (check_mode == CHECK_MODE_LOWMEM)
11943 ret = check_chunks_and_extents_v2(fs_info);
11945 ret = check_chunks_and_extents(fs_info);
11950 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11951 struct btrfs_root *root, int overwrite)
11953 struct extent_buffer *c;
11954 struct extent_buffer *old = root->node;
11957 struct btrfs_disk_key disk_key = {0,0,0};
11963 extent_buffer_get(c);
11966 c = btrfs_alloc_free_block(trans, root,
11967 root->fs_info->nodesize,
11968 root->root_key.objectid,
11969 &disk_key, level, 0, 0);
11972 extent_buffer_get(c);
11976 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11977 btrfs_set_header_level(c, level);
11978 btrfs_set_header_bytenr(c, c->start);
11979 btrfs_set_header_generation(c, trans->transid);
11980 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11981 btrfs_set_header_owner(c, root->root_key.objectid);
11983 write_extent_buffer(c, root->fs_info->fsid,
11984 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11986 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11987 btrfs_header_chunk_tree_uuid(c),
11990 btrfs_mark_buffer_dirty(c);
11992 * this case can happen in the following case:
11994 * 1.overwrite previous root.
11996 * 2.reinit reloc data root, this is because we skip pin
11997 * down reloc data tree before which means we can allocate
11998 * same block bytenr here.
12000 if (old->start == c->start) {
12001 btrfs_set_root_generation(&root->root_item,
12003 root->root_item.level = btrfs_header_level(root->node);
12004 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12005 &root->root_key, &root->root_item);
12007 free_extent_buffer(c);
12011 free_extent_buffer(old);
12013 add_root_to_dirty_list(root);
12017 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12018 struct extent_buffer *eb, int tree_root)
12020 struct extent_buffer *tmp;
12021 struct btrfs_root_item *ri;
12022 struct btrfs_key key;
12024 int level = btrfs_header_level(eb);
12030 * If we have pinned this block before, don't pin it again.
12031 * This can not only avoid forever loop with broken filesystem
12032 * but also give us some speedups.
12034 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12035 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12038 btrfs_pin_extent(fs_info, eb->start, eb->len);
12040 nritems = btrfs_header_nritems(eb);
12041 for (i = 0; i < nritems; i++) {
12043 btrfs_item_key_to_cpu(eb, &key, i);
12044 if (key.type != BTRFS_ROOT_ITEM_KEY)
12046 /* Skip the extent root and reloc roots */
12047 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12048 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12049 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12051 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12052 bytenr = btrfs_disk_root_bytenr(eb, ri);
12055 * If at any point we start needing the real root we
12056 * will have to build a stump root for the root we are
12057 * in, but for now this doesn't actually use the root so
12058 * just pass in extent_root.
12060 tmp = read_tree_block(fs_info, bytenr, 0);
12061 if (!extent_buffer_uptodate(tmp)) {
12062 fprintf(stderr, "Error reading root block\n");
12065 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12066 free_extent_buffer(tmp);
12070 bytenr = btrfs_node_blockptr(eb, i);
12072 /* If we aren't the tree root don't read the block */
12073 if (level == 1 && !tree_root) {
12074 btrfs_pin_extent(fs_info, bytenr,
12075 fs_info->nodesize);
12079 tmp = read_tree_block(fs_info, bytenr, 0);
12080 if (!extent_buffer_uptodate(tmp)) {
12081 fprintf(stderr, "Error reading tree block\n");
12084 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12085 free_extent_buffer(tmp);
12094 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12098 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12102 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12105 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12107 struct btrfs_block_group_cache *cache;
12108 struct btrfs_path path;
12109 struct extent_buffer *leaf;
12110 struct btrfs_chunk *chunk;
12111 struct btrfs_key key;
12115 btrfs_init_path(&path);
12117 key.type = BTRFS_CHUNK_ITEM_KEY;
12119 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12121 btrfs_release_path(&path);
12126 * We do this in case the block groups were screwed up and had alloc
12127 * bits that aren't actually set on the chunks. This happens with
12128 * restored images every time and could happen in real life I guess.
12130 fs_info->avail_data_alloc_bits = 0;
12131 fs_info->avail_metadata_alloc_bits = 0;
12132 fs_info->avail_system_alloc_bits = 0;
12134 /* First we need to create the in-memory block groups */
12136 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12137 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12139 btrfs_release_path(&path);
12147 leaf = path.nodes[0];
12148 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12149 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12154 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12155 btrfs_add_block_group(fs_info, 0,
12156 btrfs_chunk_type(leaf, chunk),
12157 key.objectid, key.offset,
12158 btrfs_chunk_length(leaf, chunk));
12159 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12160 key.offset + btrfs_chunk_length(leaf, chunk));
12165 cache = btrfs_lookup_first_block_group(fs_info, start);
12169 start = cache->key.objectid + cache->key.offset;
12172 btrfs_release_path(&path);
12176 static int reset_balance(struct btrfs_trans_handle *trans,
12177 struct btrfs_fs_info *fs_info)
12179 struct btrfs_root *root = fs_info->tree_root;
12180 struct btrfs_path path;
12181 struct extent_buffer *leaf;
12182 struct btrfs_key key;
12183 int del_slot, del_nr = 0;
12187 btrfs_init_path(&path);
12188 key.objectid = BTRFS_BALANCE_OBJECTID;
12189 key.type = BTRFS_BALANCE_ITEM_KEY;
12191 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12196 goto reinit_data_reloc;
12201 ret = btrfs_del_item(trans, root, &path);
12204 btrfs_release_path(&path);
12206 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12207 key.type = BTRFS_ROOT_ITEM_KEY;
12209 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12213 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12218 ret = btrfs_del_items(trans, root, &path,
12225 btrfs_release_path(&path);
12228 ret = btrfs_search_slot(trans, root, &key, &path,
12235 leaf = path.nodes[0];
12236 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12237 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12239 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12244 del_slot = path.slots[0];
12253 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12257 btrfs_release_path(&path);
12260 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12261 key.type = BTRFS_ROOT_ITEM_KEY;
12262 key.offset = (u64)-1;
12263 root = btrfs_read_fs_root(fs_info, &key);
12264 if (IS_ERR(root)) {
12265 fprintf(stderr, "Error reading data reloc tree\n");
12266 ret = PTR_ERR(root);
12269 record_root_in_trans(trans, root);
12270 ret = btrfs_fsck_reinit_root(trans, root, 0);
12273 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12275 btrfs_release_path(&path);
12279 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12280 struct btrfs_fs_info *fs_info)
12286 * The only reason we don't do this is because right now we're just
12287 * walking the trees we find and pinning down their bytes, we don't look
12288 * at any of the leaves. In order to do mixed groups we'd have to check
12289 * the leaves of any fs roots and pin down the bytes for any file
12290 * extents we find. Not hard but why do it if we don't have to?
12292 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12293 fprintf(stderr, "We don't support re-initing the extent tree "
12294 "for mixed block groups yet, please notify a btrfs "
12295 "developer you want to do this so they can add this "
12296 "functionality.\n");
12301 * first we need to walk all of the trees except the extent tree and pin
12302 * down the bytes that are in use so we don't overwrite any existing
12305 ret = pin_metadata_blocks(fs_info);
12307 fprintf(stderr, "error pinning down used bytes\n");
12312 * Need to drop all the block groups since we're going to recreate all
12315 btrfs_free_block_groups(fs_info);
12316 ret = reset_block_groups(fs_info);
12318 fprintf(stderr, "error resetting the block groups\n");
12322 /* Ok we can allocate now, reinit the extent root */
12323 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12325 fprintf(stderr, "extent root initialization failed\n");
12327 * When the transaction code is updated we should end the
12328 * transaction, but for now progs only knows about commit so
12329 * just return an error.
12335 * Now we have all the in-memory block groups setup so we can make
12336 * allocations properly, and the metadata we care about is safe since we
12337 * pinned all of it above.
12340 struct btrfs_block_group_cache *cache;
12342 cache = btrfs_lookup_first_block_group(fs_info, start);
12345 start = cache->key.objectid + cache->key.offset;
12346 ret = btrfs_insert_item(trans, fs_info->extent_root,
12347 &cache->key, &cache->item,
12348 sizeof(cache->item));
12350 fprintf(stderr, "Error adding block group\n");
12353 btrfs_extent_post_op(trans, fs_info->extent_root);
12356 ret = reset_balance(trans, fs_info);
12358 fprintf(stderr, "error resetting the pending balance\n");
12363 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12365 struct btrfs_path path;
12366 struct btrfs_trans_handle *trans;
12367 struct btrfs_key key;
12370 printf("Recowing metadata block %llu\n", eb->start);
12371 key.objectid = btrfs_header_owner(eb);
12372 key.type = BTRFS_ROOT_ITEM_KEY;
12373 key.offset = (u64)-1;
12375 root = btrfs_read_fs_root(root->fs_info, &key);
12376 if (IS_ERR(root)) {
12377 fprintf(stderr, "Couldn't find owner root %llu\n",
12379 return PTR_ERR(root);
12382 trans = btrfs_start_transaction(root, 1);
12384 return PTR_ERR(trans);
12386 btrfs_init_path(&path);
12387 path.lowest_level = btrfs_header_level(eb);
12388 if (path.lowest_level)
12389 btrfs_node_key_to_cpu(eb, &key, 0);
12391 btrfs_item_key_to_cpu(eb, &key, 0);
12393 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12394 btrfs_commit_transaction(trans, root);
12395 btrfs_release_path(&path);
12399 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12401 struct btrfs_path path;
12402 struct btrfs_trans_handle *trans;
12403 struct btrfs_key key;
12406 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12407 bad->key.type, bad->key.offset);
12408 key.objectid = bad->root_id;
12409 key.type = BTRFS_ROOT_ITEM_KEY;
12410 key.offset = (u64)-1;
12412 root = btrfs_read_fs_root(root->fs_info, &key);
12413 if (IS_ERR(root)) {
12414 fprintf(stderr, "Couldn't find owner root %llu\n",
12416 return PTR_ERR(root);
12419 trans = btrfs_start_transaction(root, 1);
12421 return PTR_ERR(trans);
12423 btrfs_init_path(&path);
12424 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12430 ret = btrfs_del_item(trans, root, &path);
12432 btrfs_commit_transaction(trans, root);
12433 btrfs_release_path(&path);
12437 static int zero_log_tree(struct btrfs_root *root)
12439 struct btrfs_trans_handle *trans;
12442 trans = btrfs_start_transaction(root, 1);
12443 if (IS_ERR(trans)) {
12444 ret = PTR_ERR(trans);
12447 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12448 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12449 ret = btrfs_commit_transaction(trans, root);
12453 static int populate_csum(struct btrfs_trans_handle *trans,
12454 struct btrfs_root *csum_root, char *buf, u64 start,
12457 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12462 while (offset < len) {
12463 sectorsize = fs_info->sectorsize;
12464 ret = read_extent_data(fs_info, buf, start + offset,
12468 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12469 start + offset, buf, sectorsize);
12472 offset += sectorsize;
12477 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12478 struct btrfs_root *csum_root,
12479 struct btrfs_root *cur_root)
12481 struct btrfs_path path;
12482 struct btrfs_key key;
12483 struct extent_buffer *node;
12484 struct btrfs_file_extent_item *fi;
12491 buf = malloc(cur_root->fs_info->sectorsize);
12495 btrfs_init_path(&path);
12499 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12502 /* Iterate all regular file extents and fill its csum */
12504 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12506 if (key.type != BTRFS_EXTENT_DATA_KEY)
12508 node = path.nodes[0];
12509 slot = path.slots[0];
12510 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12511 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12513 start = btrfs_file_extent_disk_bytenr(node, fi);
12514 len = btrfs_file_extent_disk_num_bytes(node, fi);
12516 ret = populate_csum(trans, csum_root, buf, start, len);
12517 if (ret == -EEXIST)
12523 * TODO: if next leaf is corrupted, jump to nearest next valid
12526 ret = btrfs_next_item(cur_root, &path);
12536 btrfs_release_path(&path);
12541 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12542 struct btrfs_root *csum_root)
12544 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12545 struct btrfs_path path;
12546 struct btrfs_root *tree_root = fs_info->tree_root;
12547 struct btrfs_root *cur_root;
12548 struct extent_buffer *node;
12549 struct btrfs_key key;
12553 btrfs_init_path(&path);
12554 key.objectid = BTRFS_FS_TREE_OBJECTID;
12556 key.type = BTRFS_ROOT_ITEM_KEY;
12557 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12566 node = path.nodes[0];
12567 slot = path.slots[0];
12568 btrfs_item_key_to_cpu(node, &key, slot);
12569 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12571 if (key.type != BTRFS_ROOT_ITEM_KEY)
12573 if (!is_fstree(key.objectid))
12575 key.offset = (u64)-1;
12577 cur_root = btrfs_read_fs_root(fs_info, &key);
12578 if (IS_ERR(cur_root) || !cur_root) {
12579 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12583 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12588 ret = btrfs_next_item(tree_root, &path);
12598 btrfs_release_path(&path);
12602 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12603 struct btrfs_root *csum_root)
12605 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12606 struct btrfs_path path;
12607 struct btrfs_extent_item *ei;
12608 struct extent_buffer *leaf;
12610 struct btrfs_key key;
12613 btrfs_init_path(&path);
12615 key.type = BTRFS_EXTENT_ITEM_KEY;
12617 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12619 btrfs_release_path(&path);
12623 buf = malloc(csum_root->fs_info->sectorsize);
12625 btrfs_release_path(&path);
12630 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12631 ret = btrfs_next_leaf(extent_root, &path);
12639 leaf = path.nodes[0];
12641 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12642 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12647 ei = btrfs_item_ptr(leaf, path.slots[0],
12648 struct btrfs_extent_item);
12649 if (!(btrfs_extent_flags(leaf, ei) &
12650 BTRFS_EXTENT_FLAG_DATA)) {
12655 ret = populate_csum(trans, csum_root, buf, key.objectid,
12662 btrfs_release_path(&path);
12668 * Recalculate the csum and put it into the csum tree.
12670 * Extent tree init will wipe out all the extent info, so in that case, we
12671 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12672 * will use fs/subvol trees to init the csum tree.
12674 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12675 struct btrfs_root *csum_root,
12676 int search_fs_tree)
12678 if (search_fs_tree)
12679 return fill_csum_tree_from_fs(trans, csum_root);
12681 return fill_csum_tree_from_extent(trans, csum_root);
12684 static void free_roots_info_cache(void)
12686 if (!roots_info_cache)
12689 while (!cache_tree_empty(roots_info_cache)) {
12690 struct cache_extent *entry;
12691 struct root_item_info *rii;
12693 entry = first_cache_extent(roots_info_cache);
12696 remove_cache_extent(roots_info_cache, entry);
12697 rii = container_of(entry, struct root_item_info, cache_extent);
12701 free(roots_info_cache);
12702 roots_info_cache = NULL;
12705 static int build_roots_info_cache(struct btrfs_fs_info *info)
12708 struct btrfs_key key;
12709 struct extent_buffer *leaf;
12710 struct btrfs_path path;
12712 if (!roots_info_cache) {
12713 roots_info_cache = malloc(sizeof(*roots_info_cache));
12714 if (!roots_info_cache)
12716 cache_tree_init(roots_info_cache);
12719 btrfs_init_path(&path);
12721 key.type = BTRFS_EXTENT_ITEM_KEY;
12723 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12726 leaf = path.nodes[0];
12729 struct btrfs_key found_key;
12730 struct btrfs_extent_item *ei;
12731 struct btrfs_extent_inline_ref *iref;
12732 int slot = path.slots[0];
12737 struct cache_extent *entry;
12738 struct root_item_info *rii;
12740 if (slot >= btrfs_header_nritems(leaf)) {
12741 ret = btrfs_next_leaf(info->extent_root, &path);
12748 leaf = path.nodes[0];
12749 slot = path.slots[0];
12752 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12754 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12755 found_key.type != BTRFS_METADATA_ITEM_KEY)
12758 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12759 flags = btrfs_extent_flags(leaf, ei);
12761 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12762 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12765 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12766 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12767 level = found_key.offset;
12769 struct btrfs_tree_block_info *binfo;
12771 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12772 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12773 level = btrfs_tree_block_level(leaf, binfo);
12777 * For a root extent, it must be of the following type and the
12778 * first (and only one) iref in the item.
12780 type = btrfs_extent_inline_ref_type(leaf, iref);
12781 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12784 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12785 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12787 rii = malloc(sizeof(struct root_item_info));
12792 rii->cache_extent.start = root_id;
12793 rii->cache_extent.size = 1;
12794 rii->level = (u8)-1;
12795 entry = &rii->cache_extent;
12796 ret = insert_cache_extent(roots_info_cache, entry);
12799 rii = container_of(entry, struct root_item_info,
12803 ASSERT(rii->cache_extent.start == root_id);
12804 ASSERT(rii->cache_extent.size == 1);
12806 if (level > rii->level || rii->level == (u8)-1) {
12807 rii->level = level;
12808 rii->bytenr = found_key.objectid;
12809 rii->gen = btrfs_extent_generation(leaf, ei);
12810 rii->node_count = 1;
12811 } else if (level == rii->level) {
12819 btrfs_release_path(&path);
12824 static int maybe_repair_root_item(struct btrfs_path *path,
12825 const struct btrfs_key *root_key,
12826 const int read_only_mode)
12828 const u64 root_id = root_key->objectid;
12829 struct cache_extent *entry;
12830 struct root_item_info *rii;
12831 struct btrfs_root_item ri;
12832 unsigned long offset;
12834 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12837 "Error: could not find extent items for root %llu\n",
12838 root_key->objectid);
12842 rii = container_of(entry, struct root_item_info, cache_extent);
12843 ASSERT(rii->cache_extent.start == root_id);
12844 ASSERT(rii->cache_extent.size == 1);
12846 if (rii->node_count != 1) {
12848 "Error: could not find btree root extent for root %llu\n",
12853 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12854 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12856 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12857 btrfs_root_level(&ri) != rii->level ||
12858 btrfs_root_generation(&ri) != rii->gen) {
12861 * If we're in repair mode but our caller told us to not update
12862 * the root item, i.e. just check if it needs to be updated, don't
12863 * print this message, since the caller will call us again shortly
12864 * for the same root item without read only mode (the caller will
12865 * open a transaction first).
12867 if (!(read_only_mode && repair))
12869 "%sroot item for root %llu,"
12870 " current bytenr %llu, current gen %llu, current level %u,"
12871 " new bytenr %llu, new gen %llu, new level %u\n",
12872 (read_only_mode ? "" : "fixing "),
12874 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12875 btrfs_root_level(&ri),
12876 rii->bytenr, rii->gen, rii->level);
12878 if (btrfs_root_generation(&ri) > rii->gen) {
12880 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12881 root_id, btrfs_root_generation(&ri), rii->gen);
12885 if (!read_only_mode) {
12886 btrfs_set_root_bytenr(&ri, rii->bytenr);
12887 btrfs_set_root_level(&ri, rii->level);
12888 btrfs_set_root_generation(&ri, rii->gen);
12889 write_extent_buffer(path->nodes[0], &ri,
12890 offset, sizeof(ri));
12900 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12901 * caused read-only snapshots to be corrupted if they were created at a moment
12902 * when the source subvolume/snapshot had orphan items. The issue was that the
12903 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12904 * node instead of the post orphan cleanup root node.
12905 * So this function, and its callees, just detects and fixes those cases. Even
12906 * though the regression was for read-only snapshots, this function applies to
12907 * any snapshot/subvolume root.
12908 * This must be run before any other repair code - not doing it so, makes other
12909 * repair code delete or modify backrefs in the extent tree for example, which
12910 * will result in an inconsistent fs after repairing the root items.
12912 static int repair_root_items(struct btrfs_fs_info *info)
12914 struct btrfs_path path;
12915 struct btrfs_key key;
12916 struct extent_buffer *leaf;
12917 struct btrfs_trans_handle *trans = NULL;
12920 int need_trans = 0;
12922 btrfs_init_path(&path);
12924 ret = build_roots_info_cache(info);
12928 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12929 key.type = BTRFS_ROOT_ITEM_KEY;
12934 * Avoid opening and committing transactions if a leaf doesn't have
12935 * any root items that need to be fixed, so that we avoid rotating
12936 * backup roots unnecessarily.
12939 trans = btrfs_start_transaction(info->tree_root, 1);
12940 if (IS_ERR(trans)) {
12941 ret = PTR_ERR(trans);
12946 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12950 leaf = path.nodes[0];
12953 struct btrfs_key found_key;
12955 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12956 int no_more_keys = find_next_key(&path, &key);
12958 btrfs_release_path(&path);
12960 ret = btrfs_commit_transaction(trans,
12972 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12974 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12976 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12979 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12983 if (!trans && repair) {
12986 btrfs_release_path(&path);
12996 free_roots_info_cache();
12997 btrfs_release_path(&path);
12999 btrfs_commit_transaction(trans, info->tree_root);
13006 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13008 struct btrfs_trans_handle *trans;
13009 struct btrfs_block_group_cache *bg_cache;
13013 /* Clear all free space cache inodes and its extent data */
13015 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13018 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13021 current = bg_cache->key.objectid + bg_cache->key.offset;
13024 /* Don't forget to set cache_generation to -1 */
13025 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13026 if (IS_ERR(trans)) {
13027 error("failed to update super block cache generation");
13028 return PTR_ERR(trans);
13030 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13031 btrfs_commit_transaction(trans, fs_info->tree_root);
13036 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13041 if (clear_version == 1) {
13042 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13044 "free space cache v2 detected, use --clear-space-cache v2");
13048 printf("Clearing free space cache\n");
13049 ret = clear_free_space_cache(fs_info);
13051 error("failed to clear free space cache");
13054 printf("Free space cache cleared\n");
13056 } else if (clear_version == 2) {
13057 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13058 printf("no free space cache v2 to clear\n");
13062 printf("Clear free space cache v2\n");
13063 ret = btrfs_clear_free_space_tree(fs_info);
13065 error("failed to clear free space cache v2: %d", ret);
13068 printf("free space cache v2 cleared\n");
13075 const char * const cmd_check_usage[] = {
13076 "btrfs check [options] <device>",
13077 "Check structural integrity of a filesystem (unmounted).",
13078 "Check structural integrity of an unmounted filesystem. Verify internal",
13079 "trees' consistency and item connectivity. In the repair mode try to",
13080 "fix the problems found. ",
13081 "WARNING: the repair mode is considered dangerous",
13083 "-s|--super <superblock> use this superblock copy",
13084 "-b|--backup use the first valid backup root copy",
13085 "--force skip mount checks, repair is not possible",
13086 "--repair try to repair the filesystem",
13087 "--readonly run in read-only mode (default)",
13088 "--init-csum-tree create a new CRC tree",
13089 "--init-extent-tree create a new extent tree",
13090 "--mode <MODE> allows choice of memory/IO trade-offs",
13091 " where MODE is one of:",
13092 " original - read inodes and extents to memory (requires",
13093 " more memory, does less IO)",
13094 " lowmem - try to use less memory but read blocks again",
13096 "--check-data-csum verify checksums of data blocks",
13097 "-Q|--qgroup-report print a report on qgroup consistency",
13098 "-E|--subvol-extents <subvolid>",
13099 " print subvolume extents and sharing state",
13100 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13101 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13102 "-p|--progress indicate progress",
13103 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13107 int cmd_check(int argc, char **argv)
13109 struct cache_tree root_cache;
13110 struct btrfs_root *root;
13111 struct btrfs_fs_info *info;
13114 u64 tree_root_bytenr = 0;
13115 u64 chunk_root_bytenr = 0;
13116 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13120 int init_csum_tree = 0;
13122 int clear_space_cache = 0;
13123 int qgroup_report = 0;
13124 int qgroups_repaired = 0;
13125 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13130 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13131 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13132 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13133 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13134 GETOPT_VAL_FORCE };
13135 static const struct option long_options[] = {
13136 { "super", required_argument, NULL, 's' },
13137 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13138 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13139 { "init-csum-tree", no_argument, NULL,
13140 GETOPT_VAL_INIT_CSUM },
13141 { "init-extent-tree", no_argument, NULL,
13142 GETOPT_VAL_INIT_EXTENT },
13143 { "check-data-csum", no_argument, NULL,
13144 GETOPT_VAL_CHECK_CSUM },
13145 { "backup", no_argument, NULL, 'b' },
13146 { "subvol-extents", required_argument, NULL, 'E' },
13147 { "qgroup-report", no_argument, NULL, 'Q' },
13148 { "tree-root", required_argument, NULL, 'r' },
13149 { "chunk-root", required_argument, NULL,
13150 GETOPT_VAL_CHUNK_TREE },
13151 { "progress", no_argument, NULL, 'p' },
13152 { "mode", required_argument, NULL,
13154 { "clear-space-cache", required_argument, NULL,
13155 GETOPT_VAL_CLEAR_SPACE_CACHE},
13156 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13157 { NULL, 0, NULL, 0}
13160 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13164 case 'a': /* ignored */ break;
13166 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13169 num = arg_strtou64(optarg);
13170 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13172 "super mirror should be less than %d",
13173 BTRFS_SUPER_MIRROR_MAX);
13176 bytenr = btrfs_sb_offset(((int)num));
13177 printf("using SB copy %llu, bytenr %llu\n", num,
13178 (unsigned long long)bytenr);
13184 subvolid = arg_strtou64(optarg);
13187 tree_root_bytenr = arg_strtou64(optarg);
13189 case GETOPT_VAL_CHUNK_TREE:
13190 chunk_root_bytenr = arg_strtou64(optarg);
13193 ctx.progress_enabled = true;
13197 usage(cmd_check_usage);
13198 case GETOPT_VAL_REPAIR:
13199 printf("enabling repair mode\n");
13201 ctree_flags |= OPEN_CTREE_WRITES;
13203 case GETOPT_VAL_READONLY:
13206 case GETOPT_VAL_INIT_CSUM:
13207 printf("Creating a new CRC tree\n");
13208 init_csum_tree = 1;
13210 ctree_flags |= OPEN_CTREE_WRITES;
13212 case GETOPT_VAL_INIT_EXTENT:
13213 init_extent_tree = 1;
13214 ctree_flags |= (OPEN_CTREE_WRITES |
13215 OPEN_CTREE_NO_BLOCK_GROUPS);
13218 case GETOPT_VAL_CHECK_CSUM:
13219 check_data_csum = 1;
13221 case GETOPT_VAL_MODE:
13222 check_mode = parse_check_mode(optarg);
13223 if (check_mode == CHECK_MODE_UNKNOWN) {
13224 error("unknown mode: %s", optarg);
13228 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13229 if (strcmp(optarg, "v1") == 0) {
13230 clear_space_cache = 1;
13231 } else if (strcmp(optarg, "v2") == 0) {
13232 clear_space_cache = 2;
13233 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13236 "invalid argument to --clear-space-cache, must be v1 or v2");
13239 ctree_flags |= OPEN_CTREE_WRITES;
13241 case GETOPT_VAL_FORCE:
13247 if (check_argc_exact(argc - optind, 1))
13248 usage(cmd_check_usage);
13250 if (ctx.progress_enabled) {
13251 ctx.tp = TASK_NOTHING;
13252 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13255 /* This check is the only reason for --readonly to exist */
13256 if (readonly && repair) {
13257 error("repair options are not compatible with --readonly");
13262 * experimental and dangerous
13264 if (repair && check_mode == CHECK_MODE_LOWMEM)
13265 warning("low-memory mode repair support is only partial");
13268 cache_tree_init(&root_cache);
13270 ret = check_mounted(argv[optind]);
13273 error("could not check mount status: %s",
13279 "%s is currently mounted, use --force if you really intend to check the filesystem",
13287 error("repair and --force is not yet supported");
13294 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13298 "filesystem mounted, continuing because of --force");
13300 /* A block device is mounted in exclusive mode by kernel */
13301 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13304 /* only allow partial opening under repair mode */
13306 ctree_flags |= OPEN_CTREE_PARTIAL;
13308 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13309 chunk_root_bytenr, ctree_flags);
13311 error("cannot open file system");
13317 global_info = info;
13318 root = info->fs_root;
13319 uuid_unparse(info->super_copy->fsid, uuidbuf);
13321 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13324 * Check the bare minimum before starting anything else that could rely
13325 * on it, namely the tree roots, any local consistency checks
13327 if (!extent_buffer_uptodate(info->tree_root->node) ||
13328 !extent_buffer_uptodate(info->dev_root->node) ||
13329 !extent_buffer_uptodate(info->chunk_root->node)) {
13330 error("critical roots corrupted, unable to check the filesystem");
13336 if (clear_space_cache) {
13337 ret = do_clear_free_space_cache(info, clear_space_cache);
13343 * repair mode will force us to commit transaction which
13344 * will make us fail to load log tree when mounting.
13346 if (repair && btrfs_super_log_root(info->super_copy)) {
13347 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13353 ret = zero_log_tree(root);
13356 error("failed to zero log tree: %d", ret);
13361 if (qgroup_report) {
13362 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13364 ret = qgroup_verify_all(info);
13371 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13372 subvolid, argv[optind], uuidbuf);
13373 ret = print_extent_state(info, subvolid);
13378 if (init_extent_tree || init_csum_tree) {
13379 struct btrfs_trans_handle *trans;
13381 trans = btrfs_start_transaction(info->extent_root, 0);
13382 if (IS_ERR(trans)) {
13383 error("error starting transaction");
13384 ret = PTR_ERR(trans);
13389 if (init_extent_tree) {
13390 printf("Creating a new extent tree\n");
13391 ret = reinit_extent_tree(trans, info);
13397 if (init_csum_tree) {
13398 printf("Reinitialize checksum tree\n");
13399 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13401 error("checksum tree initialization failed: %d",
13408 ret = fill_csum_tree(trans, info->csum_root,
13412 error("checksum tree refilling failed: %d", ret);
13417 * Ok now we commit and run the normal fsck, which will add
13418 * extent entries for all of the items it finds.
13420 ret = btrfs_commit_transaction(trans, info->extent_root);
13425 if (!extent_buffer_uptodate(info->extent_root->node)) {
13426 error("critical: extent_root, unable to check the filesystem");
13431 if (!extent_buffer_uptodate(info->csum_root->node)) {
13432 error("critical: csum_root, unable to check the filesystem");
13438 ret = do_check_chunks_and_extents(info);
13442 "errors found in extent allocation tree or chunk allocation");
13444 ret = repair_root_items(info);
13447 error("failed to repair root items: %s", strerror(-ret));
13451 fprintf(stderr, "Fixed %d roots.\n", ret);
13453 } else if (ret > 0) {
13455 "Found %d roots with an outdated root item.\n",
13458 "Please run a filesystem check with the option --repair to fix them.\n");
13464 if (!ctx.progress_enabled) {
13465 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13466 fprintf(stderr, "checking free space tree\n");
13468 fprintf(stderr, "checking free space cache\n");
13470 ret = check_space_cache(root);
13473 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13474 error("errors found in free space tree");
13476 error("errors found in free space cache");
13481 * We used to have to have these hole extents in between our real
13482 * extents so if we don't have this flag set we need to make sure there
13483 * are no gaps in the file extents for inodes, otherwise we can just
13484 * ignore it when this happens.
13486 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13487 ret = do_check_fs_roots(info, &root_cache);
13490 error("errors found in fs roots");
13494 fprintf(stderr, "checking csums\n");
13495 ret = check_csums(root);
13498 error("errors found in csum tree");
13502 fprintf(stderr, "checking root refs\n");
13503 /* For low memory mode, check_fs_roots_v2 handles root refs */
13504 if (check_mode != CHECK_MODE_LOWMEM) {
13505 ret = check_root_refs(root, &root_cache);
13508 error("errors found in root refs");
13513 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13514 struct extent_buffer *eb;
13516 eb = list_first_entry(&root->fs_info->recow_ebs,
13517 struct extent_buffer, recow);
13518 list_del_init(&eb->recow);
13519 ret = recow_extent_buffer(root, eb);
13522 error("fails to fix transid errors");
13527 while (!list_empty(&delete_items)) {
13528 struct bad_item *bad;
13530 bad = list_first_entry(&delete_items, struct bad_item, list);
13531 list_del_init(&bad->list);
13533 ret = delete_bad_item(root, bad);
13539 if (info->quota_enabled) {
13540 fprintf(stderr, "checking quota groups\n");
13541 ret = qgroup_verify_all(info);
13544 error("failed to check quota groups");
13548 ret = repair_qgroups(info, &qgroups_repaired);
13551 error("failed to repair quota groups");
13557 if (!list_empty(&root->fs_info->recow_ebs)) {
13558 error("transid errors in file system");
13563 printf("found %llu bytes used, ",
13564 (unsigned long long)bytes_used);
13566 printf("error(s) found\n");
13568 printf("no error found\n");
13569 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13570 printf("total tree bytes: %llu\n",
13571 (unsigned long long)total_btree_bytes);
13572 printf("total fs tree bytes: %llu\n",
13573 (unsigned long long)total_fs_tree_bytes);
13574 printf("total extent tree bytes: %llu\n",
13575 (unsigned long long)total_extent_tree_bytes);
13576 printf("btree space waste bytes: %llu\n",
13577 (unsigned long long)btree_space_waste);
13578 printf("file data blocks allocated: %llu\n referenced %llu\n",
13579 (unsigned long long)data_bytes_allocated,
13580 (unsigned long long)data_bytes_referenced);
13582 free_qgroup_counts();
13583 free_root_recs_tree(&root_cache);
13587 if (ctx.progress_enabled)
13588 task_deinit(ctx.info);