2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 return container_of(back, struct data_backref, node);
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
141 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143 struct data_backref *back1 = to_data_backref(ext1);
144 struct data_backref *back2 = to_data_backref(ext2);
146 WARN_ON(!ext1->is_data);
147 WARN_ON(!ext2->is_data);
149 /* parent and root are a union, so this covers both */
150 if (back1->parent > back2->parent)
152 if (back1->parent < back2->parent)
155 /* This is a full backref and the parents match. */
156 if (back1->node.full_backref)
159 if (back1->owner > back2->owner)
161 if (back1->owner < back2->owner)
164 if (back1->offset > back2->offset)
166 if (back1->offset < back2->offset)
169 if (back1->found_ref && back2->found_ref) {
170 if (back1->disk_bytenr > back2->disk_bytenr)
172 if (back1->disk_bytenr < back2->disk_bytenr)
175 if (back1->bytes > back2->bytes)
177 if (back1->bytes < back2->bytes)
185 * Much like data_backref, just removed the undetermined members
186 * and change it to use list_head.
187 * During extent scan, it is stored in root->orphan_data_extent.
188 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
190 struct orphan_data_extent {
191 struct list_head list;
199 struct tree_backref {
200 struct extent_backref node;
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
209 return container_of(back, struct tree_backref, node);
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
214 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216 struct tree_backref *back1 = to_tree_backref(ext1);
217 struct tree_backref *back2 = to_tree_backref(ext2);
219 WARN_ON(ext1->is_data);
220 WARN_ON(ext2->is_data);
222 /* parent and root are a union, so this covers both */
223 if (back1->parent > back2->parent)
225 if (back1->parent < back2->parent)
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
233 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
236 if (ext1->is_data > ext2->is_data)
239 if (ext1->is_data < ext2->is_data)
242 if (ext1->full_backref > ext2->full_backref)
244 if (ext1->full_backref < ext2->full_backref)
248 return compare_data_backref(node1, node2);
250 return compare_tree_backref(node1, node2);
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
256 struct extent_record {
257 struct list_head backrefs;
258 struct list_head dups;
259 struct rb_root backref_tree;
260 struct list_head list;
261 struct cache_extent cache;
262 struct btrfs_disk_key parent_key;
267 u64 extent_item_refs;
269 u64 parent_generation;
273 unsigned int flag_block_full_backref:2;
274 unsigned int found_rec:1;
275 unsigned int content_checked:1;
276 unsigned int owner_ref_checked:1;
277 unsigned int is_root:1;
278 unsigned int metadata:1;
279 unsigned int bad_full_backref:1;
280 unsigned int crossing_stripes:1;
281 unsigned int wrong_chunk_type:1;
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
286 return container_of(entry, struct extent_record, list);
289 struct inode_backref {
290 struct list_head list;
291 unsigned int found_dir_item:1;
292 unsigned int found_dir_index:1;
293 unsigned int found_inode_ref:1;
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
305 return list_entry(entry, struct inode_backref, list);
308 struct root_item_record {
309 struct list_head list;
315 struct btrfs_key drop_key;
318 #define REF_ERR_NO_DIR_ITEM (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX (1 << 1)
320 #define REF_ERR_NO_INODE_REF (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
323 #define REF_ERR_DUP_INODE_REF (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
332 struct file_extent_hole {
338 struct inode_record {
339 struct list_head backrefs;
340 unsigned int checked:1;
341 unsigned int merging:1;
342 unsigned int found_inode_item:1;
343 unsigned int found_dir_item:1;
344 unsigned int found_file_extent:1;
345 unsigned int found_csum_item:1;
346 unsigned int some_csum_missing:1;
347 unsigned int nodatasum:1;
360 struct rb_root holes;
361 struct list_head orphan_extents;
366 #define I_ERR_NO_INODE_ITEM (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
382 struct root_backref {
383 struct list_head list;
384 unsigned int found_dir_item:1;
385 unsigned int found_dir_index:1;
386 unsigned int found_back_ref:1;
387 unsigned int found_forward_ref:1;
388 unsigned int reachable:1;
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
399 return list_entry(entry, struct root_backref, list);
403 struct list_head backrefs;
404 struct cache_extent cache;
405 unsigned int found_root_item:1;
411 struct cache_extent cache;
416 struct cache_extent cache;
417 struct cache_tree root_cache;
418 struct cache_tree inode_cache;
419 struct inode_record *current;
428 struct walk_control {
429 struct cache_tree shared;
430 struct shared_node *nodes[BTRFS_MAX_LEVEL];
436 struct btrfs_key key;
438 struct list_head list;
441 struct extent_entry {
446 struct list_head list;
449 struct root_item_info {
450 /* level of the root */
452 /* number of nodes at this level, must be 1 for a root */
456 struct cache_extent cache_extent;
460 * Error bit for low memory mode check.
462 * Currently no caller cares about it yet. Just internal use for error
465 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH (1 << 8)
476 static void *print_status_check(void *p)
478 struct task_ctx *priv = p;
479 const char work_indicator[] = { '.', 'o', 'O', 'o' };
481 static char *task_position_string[] = {
483 "checking free space cache",
487 task_period_start(priv->info, 1000 /* 1s */);
489 if (priv->tp == TASK_NOTHING)
493 printf("%s [%c]\r", task_position_string[priv->tp],
494 work_indicator[count % 4]);
497 task_period_wait(priv->info);
502 static int print_status_return(void *p)
510 static enum btrfs_check_mode parse_check_mode(const char *str)
512 if (strcmp(str, "lowmem") == 0)
513 return CHECK_MODE_LOWMEM;
514 if (strcmp(str, "orig") == 0)
515 return CHECK_MODE_ORIGINAL;
516 if (strcmp(str, "original") == 0)
517 return CHECK_MODE_ORIGINAL;
519 return CHECK_MODE_UNKNOWN;
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
525 struct file_extent_hole *hole;
527 if (RB_EMPTY_ROOT(holes))
530 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
536 struct file_extent_hole *hole1;
537 struct file_extent_hole *hole2;
539 hole1 = rb_entry(node1, struct file_extent_hole, node);
540 hole2 = rb_entry(node2, struct file_extent_hole, node);
542 if (hole1->start > hole2->start)
544 if (hole1->start < hole2->start)
546 /* Now hole1->start == hole2->start */
547 if (hole1->len >= hole2->len)
549 * Hole 1 will be merge center
550 * Same hole will be merged later
553 /* Hole 2 will be merge center */
558 * Add a hole to the record
560 * This will do hole merge for copy_file_extent_holes(),
561 * which will ensure there won't be continuous holes.
563 static int add_file_extent_hole(struct rb_root *holes,
566 struct file_extent_hole *hole;
567 struct file_extent_hole *prev = NULL;
568 struct file_extent_hole *next = NULL;
570 hole = malloc(sizeof(*hole));
575 /* Since compare will not return 0, no -EEXIST will happen */
576 rb_insert(holes, &hole->node, compare_hole);
578 /* simple merge with previous hole */
579 if (rb_prev(&hole->node))
580 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
582 if (prev && prev->start + prev->len >= hole->start) {
583 hole->len = hole->start + hole->len - prev->start;
584 hole->start = prev->start;
585 rb_erase(&prev->node, holes);
590 /* iterate merge with next holes */
592 if (!rb_next(&hole->node))
594 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
596 if (hole->start + hole->len >= next->start) {
597 if (hole->start + hole->len <= next->start + next->len)
598 hole->len = next->start + next->len -
600 rb_erase(&next->node, holes);
609 static int compare_hole_range(struct rb_node *node, void *data)
611 struct file_extent_hole *hole;
614 hole = (struct file_extent_hole *)data;
617 hole = rb_entry(node, struct file_extent_hole, node);
618 if (start < hole->start)
620 if (start >= hole->start && start < hole->start + hole->len)
626 * Delete a hole in the record
628 * This will do the hole split and is much restrict than add.
630 static int del_file_extent_hole(struct rb_root *holes,
633 struct file_extent_hole *hole;
634 struct file_extent_hole tmp;
639 struct rb_node *node;
646 node = rb_search(holes, &tmp, compare_hole_range, NULL);
649 hole = rb_entry(node, struct file_extent_hole, node);
650 if (start + len > hole->start + hole->len)
654 * Now there will be no overlap, delete the hole and re-add the
655 * split(s) if they exists.
657 if (start > hole->start) {
658 prev_start = hole->start;
659 prev_len = start - hole->start;
662 if (hole->start + hole->len > start + len) {
663 next_start = start + len;
664 next_len = hole->start + hole->len - start - len;
667 rb_erase(node, holes);
670 ret = add_file_extent_hole(holes, prev_start, prev_len);
675 ret = add_file_extent_hole(holes, next_start, next_len);
682 static int copy_file_extent_holes(struct rb_root *dst,
685 struct file_extent_hole *hole;
686 struct rb_node *node;
689 node = rb_first(src);
691 hole = rb_entry(node, struct file_extent_hole, node);
692 ret = add_file_extent_hole(dst, hole->start, hole->len);
695 node = rb_next(node);
700 static void free_file_extent_holes(struct rb_root *holes)
702 struct rb_node *node;
703 struct file_extent_hole *hole;
705 node = rb_first(holes);
707 hole = rb_entry(node, struct file_extent_hole, node);
708 rb_erase(node, holes);
710 node = rb_first(holes);
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717 struct btrfs_root *root)
719 if (root->last_trans != trans->transid) {
720 root->track_dirty = 1;
721 root->last_trans = trans->transid;
722 root->commit_root = root->node;
723 extent_buffer_get(root->node);
727 static u8 imode_to_type(u32 imode)
730 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
732 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
733 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
734 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
735 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
736 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
737 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
740 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
746 struct device_record *rec1;
747 struct device_record *rec2;
749 rec1 = rb_entry(node1, struct device_record, node);
750 rec2 = rb_entry(node2, struct device_record, node);
751 if (rec1->devid > rec2->devid)
753 else if (rec1->devid < rec2->devid)
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
761 struct inode_record *rec;
762 struct inode_backref *backref;
763 struct inode_backref *orig;
764 struct inode_backref *tmp;
765 struct orphan_data_extent *src_orphan;
766 struct orphan_data_extent *dst_orphan;
771 rec = malloc(sizeof(*rec));
773 return ERR_PTR(-ENOMEM);
774 memcpy(rec, orig_rec, sizeof(*rec));
776 INIT_LIST_HEAD(&rec->backrefs);
777 INIT_LIST_HEAD(&rec->orphan_extents);
778 rec->holes = RB_ROOT;
780 list_for_each_entry(orig, &orig_rec->backrefs, list) {
781 size = sizeof(*orig) + orig->namelen + 1;
782 backref = malloc(size);
787 memcpy(backref, orig, size);
788 list_add_tail(&backref->list, &rec->backrefs);
790 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791 dst_orphan = malloc(sizeof(*dst_orphan));
796 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
799 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
806 rb = rb_first(&rec->holes);
808 struct file_extent_hole *hole;
810 hole = rb_entry(rb, struct file_extent_hole, node);
816 if (!list_empty(&rec->backrefs))
817 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818 list_del(&orig->list);
822 if (!list_empty(&rec->orphan_extents))
823 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824 list_del(&orig->list);
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
836 struct orphan_data_extent *orphan;
838 if (list_empty(orphan_extents))
840 printf("The following data extent is lost in tree %llu:\n",
842 list_for_each_entry(orphan, orphan_extents, list) {
843 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844 orphan->objectid, orphan->offset, orphan->disk_bytenr,
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
851 u64 root_objectid = root->root_key.objectid;
852 int errors = rec->errors;
856 /* reloc root errors, we print its corresponding fs root objectid*/
857 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858 root_objectid = root->root_key.offset;
859 fprintf(stderr, "reloc");
861 fprintf(stderr, "root %llu inode %llu errors %x",
862 (unsigned long long) root_objectid,
863 (unsigned long long) rec->ino, rec->errors);
865 if (errors & I_ERR_NO_INODE_ITEM)
866 fprintf(stderr, ", no inode item");
867 if (errors & I_ERR_NO_ORPHAN_ITEM)
868 fprintf(stderr, ", no orphan item");
869 if (errors & I_ERR_DUP_INODE_ITEM)
870 fprintf(stderr, ", dup inode item");
871 if (errors & I_ERR_DUP_DIR_INDEX)
872 fprintf(stderr, ", dup dir index");
873 if (errors & I_ERR_ODD_DIR_ITEM)
874 fprintf(stderr, ", odd dir item");
875 if (errors & I_ERR_ODD_FILE_EXTENT)
876 fprintf(stderr, ", odd file extent");
877 if (errors & I_ERR_BAD_FILE_EXTENT)
878 fprintf(stderr, ", bad file extent");
879 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880 fprintf(stderr, ", file extent overlap");
881 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882 fprintf(stderr, ", file extent discount");
883 if (errors & I_ERR_DIR_ISIZE_WRONG)
884 fprintf(stderr, ", dir isize wrong");
885 if (errors & I_ERR_FILE_NBYTES_WRONG)
886 fprintf(stderr, ", nbytes wrong");
887 if (errors & I_ERR_ODD_CSUM_ITEM)
888 fprintf(stderr, ", odd csum item");
889 if (errors & I_ERR_SOME_CSUM_MISSING)
890 fprintf(stderr, ", some csum missing");
891 if (errors & I_ERR_LINK_COUNT_WRONG)
892 fprintf(stderr, ", link count wrong");
893 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894 fprintf(stderr, ", orphan file extent");
895 fprintf(stderr, "\n");
896 /* Print the orphan extents if needed */
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
900 /* Print the holes if needed */
901 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902 struct file_extent_hole *hole;
903 struct rb_node *node;
906 node = rb_first(&rec->holes);
907 fprintf(stderr, "Found file extent holes:\n");
910 hole = rb_entry(node, struct file_extent_hole, node);
911 fprintf(stderr, "\tstart: %llu, len: %llu\n",
912 hole->start, hole->len);
913 node = rb_next(node);
916 fprintf(stderr, "\tstart: 0, len: %llu\n",
918 root->fs_info->sectorsize));
922 static void print_ref_error(int errors)
924 if (errors & REF_ERR_NO_DIR_ITEM)
925 fprintf(stderr, ", no dir item");
926 if (errors & REF_ERR_NO_DIR_INDEX)
927 fprintf(stderr, ", no dir index");
928 if (errors & REF_ERR_NO_INODE_REF)
929 fprintf(stderr, ", no inode ref");
930 if (errors & REF_ERR_DUP_DIR_ITEM)
931 fprintf(stderr, ", dup dir item");
932 if (errors & REF_ERR_DUP_DIR_INDEX)
933 fprintf(stderr, ", dup dir index");
934 if (errors & REF_ERR_DUP_INODE_REF)
935 fprintf(stderr, ", dup inode ref");
936 if (errors & REF_ERR_INDEX_UNMATCH)
937 fprintf(stderr, ", index mismatch");
938 if (errors & REF_ERR_FILETYPE_UNMATCH)
939 fprintf(stderr, ", filetype mismatch");
940 if (errors & REF_ERR_NAME_TOO_LONG)
941 fprintf(stderr, ", name too long");
942 if (errors & REF_ERR_NO_ROOT_REF)
943 fprintf(stderr, ", no root ref");
944 if (errors & REF_ERR_NO_ROOT_BACKREF)
945 fprintf(stderr, ", no root backref");
946 if (errors & REF_ERR_DUP_ROOT_REF)
947 fprintf(stderr, ", dup root ref");
948 if (errors & REF_ERR_DUP_ROOT_BACKREF)
949 fprintf(stderr, ", dup root backref");
950 fprintf(stderr, "\n");
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
956 struct ptr_node *node;
957 struct cache_extent *cache;
958 struct inode_record *rec = NULL;
961 cache = lookup_cache_extent(inode_cache, ino, 1);
963 node = container_of(cache, struct ptr_node, cache);
965 if (mod && rec->refs > 1) {
966 node->data = clone_inode_rec(rec);
967 if (IS_ERR(node->data))
973 rec = calloc(1, sizeof(*rec));
975 return ERR_PTR(-ENOMEM);
977 rec->extent_start = (u64)-1;
979 INIT_LIST_HEAD(&rec->backrefs);
980 INIT_LIST_HEAD(&rec->orphan_extents);
981 rec->holes = RB_ROOT;
983 node = malloc(sizeof(*node));
986 return ERR_PTR(-ENOMEM);
988 node->cache.start = ino;
989 node->cache.size = 1;
992 if (ino == BTRFS_FREE_INO_OBJECTID)
995 ret = insert_cache_extent(inode_cache, &node->cache);
997 return ERR_PTR(-EEXIST);
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1004 struct orphan_data_extent *orphan;
1006 while (!list_empty(orphan_extents)) {
1007 orphan = list_entry(orphan_extents->next,
1008 struct orphan_data_extent, list);
1009 list_del(&orphan->list);
1014 static void free_inode_rec(struct inode_record *rec)
1016 struct inode_backref *backref;
1018 if (--rec->refs > 0)
1021 while (!list_empty(&rec->backrefs)) {
1022 backref = to_inode_backref(rec->backrefs.next);
1023 list_del(&backref->list);
1026 free_orphan_data_extents(&rec->orphan_extents);
1027 free_file_extent_holes(&rec->holes);
1031 static int can_free_inode_rec(struct inode_record *rec)
1033 if (!rec->errors && rec->checked && rec->found_inode_item &&
1034 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040 struct inode_record *rec)
1042 struct cache_extent *cache;
1043 struct inode_backref *tmp, *backref;
1044 struct ptr_node *node;
1047 if (!rec->found_inode_item)
1050 filetype = imode_to_type(rec->imode);
1051 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052 if (backref->found_dir_item && backref->found_dir_index) {
1053 if (backref->filetype != filetype)
1054 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055 if (!backref->errors && backref->found_inode_ref &&
1056 rec->nlink == rec->found_link) {
1057 list_del(&backref->list);
1063 if (!rec->checked || rec->merging)
1066 if (S_ISDIR(rec->imode)) {
1067 if (rec->found_size != rec->isize)
1068 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069 if (rec->found_file_extent)
1070 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072 if (rec->found_dir_item)
1073 rec->errors |= I_ERR_ODD_DIR_ITEM;
1074 if (rec->found_size != rec->nbytes)
1075 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076 if (rec->nlink > 0 && !no_holes &&
1077 (rec->extent_end < rec->isize ||
1078 first_extent_gap(&rec->holes) < rec->isize))
1079 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1082 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083 if (rec->found_csum_item && rec->nodatasum)
1084 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085 if (rec->some_csum_missing && !rec->nodatasum)
1086 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1089 BUG_ON(rec->refs != 1);
1090 if (can_free_inode_rec(rec)) {
1091 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092 node = container_of(cache, struct ptr_node, cache);
1093 BUG_ON(node->data != rec);
1094 remove_cache_extent(inode_cache, &node->cache);
1096 free_inode_rec(rec);
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1102 struct btrfs_path path;
1103 struct btrfs_key key;
1106 key.objectid = BTRFS_ORPHAN_OBJECTID;
1107 key.type = BTRFS_ORPHAN_ITEM_KEY;
1110 btrfs_init_path(&path);
1111 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112 btrfs_release_path(&path);
1118 static int process_inode_item(struct extent_buffer *eb,
1119 int slot, struct btrfs_key *key,
1120 struct shared_node *active_node)
1122 struct inode_record *rec;
1123 struct btrfs_inode_item *item;
1125 rec = active_node->current;
1126 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127 if (rec->found_inode_item) {
1128 rec->errors |= I_ERR_DUP_INODE_ITEM;
1131 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132 rec->nlink = btrfs_inode_nlink(eb, item);
1133 rec->isize = btrfs_inode_size(eb, item);
1134 rec->nbytes = btrfs_inode_nbytes(eb, item);
1135 rec->imode = btrfs_inode_mode(eb, item);
1136 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1138 rec->found_inode_item = 1;
1139 if (rec->nlink == 0)
1140 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141 maybe_free_inode_rec(&active_node->inode_cache, rec);
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1147 int namelen, u64 dir)
1149 struct inode_backref *backref;
1151 list_for_each_entry(backref, &rec->backrefs, list) {
1152 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1154 if (backref->dir != dir || backref->namelen != namelen)
1156 if (memcmp(name, backref->name, namelen))
1161 backref = malloc(sizeof(*backref) + namelen + 1);
1164 memset(backref, 0, sizeof(*backref));
1166 backref->namelen = namelen;
1167 memcpy(backref->name, name, namelen);
1168 backref->name[namelen] = '\0';
1169 list_add_tail(&backref->list, &rec->backrefs);
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174 u64 ino, u64 dir, u64 index,
1175 const char *name, int namelen,
1176 u8 filetype, u8 itemtype, int errors)
1178 struct inode_record *rec;
1179 struct inode_backref *backref;
1181 rec = get_inode_rec(inode_cache, ino, 1);
1182 BUG_ON(IS_ERR(rec));
1183 backref = get_inode_backref(rec, name, namelen, dir);
1186 backref->errors |= errors;
1187 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188 if (backref->found_dir_index)
1189 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190 if (backref->found_inode_ref && backref->index != index)
1191 backref->errors |= REF_ERR_INDEX_UNMATCH;
1192 if (backref->found_dir_item && backref->filetype != filetype)
1193 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1195 backref->index = index;
1196 backref->filetype = filetype;
1197 backref->found_dir_index = 1;
1198 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1200 if (backref->found_dir_item)
1201 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202 if (backref->found_dir_index && backref->filetype != filetype)
1203 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1205 backref->filetype = filetype;
1206 backref->found_dir_item = 1;
1207 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209 if (backref->found_inode_ref)
1210 backref->errors |= REF_ERR_DUP_INODE_REF;
1211 if (backref->found_dir_index && backref->index != index)
1212 backref->errors |= REF_ERR_INDEX_UNMATCH;
1214 backref->index = index;
1216 backref->ref_type = itemtype;
1217 backref->found_inode_ref = 1;
1222 maybe_free_inode_rec(inode_cache, rec);
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227 struct cache_tree *dst_cache)
1229 struct inode_backref *backref;
1234 list_for_each_entry(backref, &src->backrefs, list) {
1235 if (backref->found_dir_index) {
1236 add_inode_backref(dst_cache, dst->ino, backref->dir,
1237 backref->index, backref->name,
1238 backref->namelen, backref->filetype,
1239 BTRFS_DIR_INDEX_KEY, backref->errors);
1241 if (backref->found_dir_item) {
1243 add_inode_backref(dst_cache, dst->ino,
1244 backref->dir, 0, backref->name,
1245 backref->namelen, backref->filetype,
1246 BTRFS_DIR_ITEM_KEY, backref->errors);
1248 if (backref->found_inode_ref) {
1249 add_inode_backref(dst_cache, dst->ino,
1250 backref->dir, backref->index,
1251 backref->name, backref->namelen, 0,
1252 backref->ref_type, backref->errors);
1256 if (src->found_dir_item)
1257 dst->found_dir_item = 1;
1258 if (src->found_file_extent)
1259 dst->found_file_extent = 1;
1260 if (src->found_csum_item)
1261 dst->found_csum_item = 1;
1262 if (src->some_csum_missing)
1263 dst->some_csum_missing = 1;
1264 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270 BUG_ON(src->found_link < dir_count);
1271 dst->found_link += src->found_link - dir_count;
1272 dst->found_size += src->found_size;
1273 if (src->extent_start != (u64)-1) {
1274 if (dst->extent_start == (u64)-1) {
1275 dst->extent_start = src->extent_start;
1276 dst->extent_end = src->extent_end;
1278 if (dst->extent_end > src->extent_start)
1279 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280 else if (dst->extent_end < src->extent_start) {
1281 ret = add_file_extent_hole(&dst->holes,
1283 src->extent_start - dst->extent_end);
1285 if (dst->extent_end < src->extent_end)
1286 dst->extent_end = src->extent_end;
1290 dst->errors |= src->errors;
1291 if (src->found_inode_item) {
1292 if (!dst->found_inode_item) {
1293 dst->nlink = src->nlink;
1294 dst->isize = src->isize;
1295 dst->nbytes = src->nbytes;
1296 dst->imode = src->imode;
1297 dst->nodatasum = src->nodatasum;
1298 dst->found_inode_item = 1;
1300 dst->errors |= I_ERR_DUP_INODE_ITEM;
1308 static int splice_shared_node(struct shared_node *src_node,
1309 struct shared_node *dst_node)
1311 struct cache_extent *cache;
1312 struct ptr_node *node, *ins;
1313 struct cache_tree *src, *dst;
1314 struct inode_record *rec, *conflict;
1315 u64 current_ino = 0;
1319 if (--src_node->refs == 0)
1321 if (src_node->current)
1322 current_ino = src_node->current->ino;
1324 src = &src_node->root_cache;
1325 dst = &dst_node->root_cache;
1327 cache = search_cache_extent(src, 0);
1329 node = container_of(cache, struct ptr_node, cache);
1331 cache = next_cache_extent(cache);
1334 remove_cache_extent(src, &node->cache);
1337 ins = malloc(sizeof(*ins));
1339 ins->cache.start = node->cache.start;
1340 ins->cache.size = node->cache.size;
1344 ret = insert_cache_extent(dst, &ins->cache);
1345 if (ret == -EEXIST) {
1346 conflict = get_inode_rec(dst, rec->ino, 1);
1347 BUG_ON(IS_ERR(conflict));
1348 merge_inode_recs(rec, conflict, dst);
1350 conflict->checked = 1;
1351 if (dst_node->current == conflict)
1352 dst_node->current = NULL;
1354 maybe_free_inode_rec(dst, conflict);
1355 free_inode_rec(rec);
1362 if (src == &src_node->root_cache) {
1363 src = &src_node->inode_cache;
1364 dst = &dst_node->inode_cache;
1368 if (current_ino > 0 && (!dst_node->current ||
1369 current_ino > dst_node->current->ino)) {
1370 if (dst_node->current) {
1371 dst_node->current->checked = 1;
1372 maybe_free_inode_rec(dst, dst_node->current);
1374 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375 BUG_ON(IS_ERR(dst_node->current));
1380 static void free_inode_ptr(struct cache_extent *cache)
1382 struct ptr_node *node;
1383 struct inode_record *rec;
1385 node = container_of(cache, struct ptr_node, cache);
1387 free_inode_rec(rec);
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1396 struct cache_extent *cache;
1397 struct shared_node *node;
1399 cache = lookup_cache_extent(shared, bytenr, 1);
1401 node = container_of(cache, struct shared_node, cache);
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1410 struct shared_node *node;
1412 node = calloc(1, sizeof(*node));
1415 node->cache.start = bytenr;
1416 node->cache.size = 1;
1417 cache_tree_init(&node->root_cache);
1418 cache_tree_init(&node->inode_cache);
1421 ret = insert_cache_extent(shared, &node->cache);
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427 struct walk_control *wc, int level)
1429 struct shared_node *node;
1430 struct shared_node *dest;
1433 if (level == wc->active_node)
1436 BUG_ON(wc->active_node <= level);
1437 node = find_shared_node(&wc->shared, bytenr);
1439 ret = add_shared_node(&wc->shared, bytenr, refs);
1441 node = find_shared_node(&wc->shared, bytenr);
1442 wc->nodes[level] = node;
1443 wc->active_node = level;
1447 if (wc->root_level == wc->active_node &&
1448 btrfs_root_refs(&root->root_item) == 0) {
1449 if (--node->refs == 0) {
1450 free_inode_recs_tree(&node->root_cache);
1451 free_inode_recs_tree(&node->inode_cache);
1452 remove_cache_extent(&wc->shared, &node->cache);
1458 dest = wc->nodes[wc->active_node];
1459 splice_shared_node(node, dest);
1460 if (node->refs == 0) {
1461 remove_cache_extent(&wc->shared, &node->cache);
1467 static int leave_shared_node(struct btrfs_root *root,
1468 struct walk_control *wc, int level)
1470 struct shared_node *node;
1471 struct shared_node *dest;
1474 if (level == wc->root_level)
1477 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481 BUG_ON(i >= BTRFS_MAX_LEVEL);
1483 node = wc->nodes[wc->active_node];
1484 wc->nodes[wc->active_node] = NULL;
1485 wc->active_node = i;
1487 dest = wc->nodes[wc->active_node];
1488 if (wc->active_node < wc->root_level ||
1489 btrfs_root_refs(&root->root_item) > 0) {
1490 BUG_ON(node->refs <= 1);
1491 splice_shared_node(node, dest);
1493 BUG_ON(node->refs < 2);
1502 * 1 - if the root with id child_root_id is a child of root parent_root_id
1503 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1504 * has other root(s) as parent(s)
1505 * 2 - if the root child_root_id doesn't have any parent roots
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1510 struct btrfs_path path;
1511 struct btrfs_key key;
1512 struct extent_buffer *leaf;
1516 btrfs_init_path(&path);
1518 key.objectid = parent_root_id;
1519 key.type = BTRFS_ROOT_REF_KEY;
1520 key.offset = child_root_id;
1521 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525 btrfs_release_path(&path);
1529 key.objectid = child_root_id;
1530 key.type = BTRFS_ROOT_BACKREF_KEY;
1532 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1538 leaf = path.nodes[0];
1539 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1543 leaf = path.nodes[0];
1546 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547 if (key.objectid != child_root_id ||
1548 key.type != BTRFS_ROOT_BACKREF_KEY)
1553 if (key.offset == parent_root_id) {
1554 btrfs_release_path(&path);
1561 btrfs_release_path(&path);
1564 return has_parent ? 0 : 2;
1567 static int process_dir_item(struct extent_buffer *eb,
1568 int slot, struct btrfs_key *key,
1569 struct shared_node *active_node)
1579 struct btrfs_dir_item *di;
1580 struct inode_record *rec;
1581 struct cache_tree *root_cache;
1582 struct cache_tree *inode_cache;
1583 struct btrfs_key location;
1584 char namebuf[BTRFS_NAME_LEN];
1586 root_cache = &active_node->root_cache;
1587 inode_cache = &active_node->inode_cache;
1588 rec = active_node->current;
1589 rec->found_dir_item = 1;
1591 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592 total = btrfs_item_size_nr(eb, slot);
1593 while (cur < total) {
1595 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596 name_len = btrfs_dir_name_len(eb, di);
1597 data_len = btrfs_dir_data_len(eb, di);
1598 filetype = btrfs_dir_type(eb, di);
1600 rec->found_size += name_len;
1601 if (cur + sizeof(*di) + name_len > total ||
1602 name_len > BTRFS_NAME_LEN) {
1603 error = REF_ERR_NAME_TOO_LONG;
1605 if (cur + sizeof(*di) > total)
1607 len = min_t(u32, total - cur - sizeof(*di),
1614 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1616 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617 key->offset != btrfs_name_hash(namebuf, len)) {
1618 rec->errors |= I_ERR_ODD_DIR_ITEM;
1619 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620 key->objectid, key->offset, namebuf, len, filetype,
1621 key->offset, btrfs_name_hash(namebuf, len));
1624 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625 add_inode_backref(inode_cache, location.objectid,
1626 key->objectid, key->offset, namebuf,
1627 len, filetype, key->type, error);
1628 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629 add_inode_backref(root_cache, location.objectid,
1630 key->objectid, key->offset,
1631 namebuf, len, filetype,
1634 fprintf(stderr, "invalid location in dir item %u\n",
1636 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637 key->objectid, key->offset, namebuf,
1638 len, filetype, key->type, error);
1641 len = sizeof(*di) + name_len + data_len;
1642 di = (struct btrfs_dir_item *)((char *)di + len);
1645 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646 rec->errors |= I_ERR_DUP_DIR_INDEX;
1651 static int process_inode_ref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1661 struct cache_tree *inode_cache;
1662 struct btrfs_inode_ref *ref;
1663 char namebuf[BTRFS_NAME_LEN];
1665 inode_cache = &active_node->inode_cache;
1667 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668 total = btrfs_item_size_nr(eb, slot);
1669 while (cur < total) {
1670 name_len = btrfs_inode_ref_name_len(eb, ref);
1671 index = btrfs_inode_ref_index(eb, ref);
1673 /* inode_ref + namelen should not cross item boundary */
1674 if (cur + sizeof(*ref) + name_len > total ||
1675 name_len > BTRFS_NAME_LEN) {
1676 if (total < cur + sizeof(*ref))
1679 /* Still try to read out the remaining part */
1680 len = min_t(u32, total - cur - sizeof(*ref),
1682 error = REF_ERR_NAME_TOO_LONG;
1688 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689 add_inode_backref(inode_cache, key->objectid, key->offset,
1690 index, namebuf, len, 0, key->type, error);
1692 len = sizeof(*ref) + name_len;
1693 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1699 static int process_inode_extref(struct extent_buffer *eb,
1700 int slot, struct btrfs_key *key,
1701 struct shared_node *active_node)
1710 struct cache_tree *inode_cache;
1711 struct btrfs_inode_extref *extref;
1712 char namebuf[BTRFS_NAME_LEN];
1714 inode_cache = &active_node->inode_cache;
1716 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717 total = btrfs_item_size_nr(eb, slot);
1718 while (cur < total) {
1719 name_len = btrfs_inode_extref_name_len(eb, extref);
1720 index = btrfs_inode_extref_index(eb, extref);
1721 parent = btrfs_inode_extref_parent(eb, extref);
1722 if (name_len <= BTRFS_NAME_LEN) {
1726 len = BTRFS_NAME_LEN;
1727 error = REF_ERR_NAME_TOO_LONG;
1729 read_extent_buffer(eb, namebuf,
1730 (unsigned long)(extref + 1), len);
1731 add_inode_backref(inode_cache, key->objectid, parent,
1732 index, namebuf, len, 0, key->type, error);
1734 len = sizeof(*extref) + name_len;
1735 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743 u64 len, u64 *found)
1745 struct btrfs_key key;
1746 struct btrfs_path path;
1747 struct extent_buffer *leaf;
1752 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1754 btrfs_init_path(&path);
1756 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1758 key.type = BTRFS_EXTENT_CSUM_KEY;
1760 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764 if (ret > 0 && path.slots[0] > 0) {
1765 leaf = path.nodes[0];
1766 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768 key.type == BTRFS_EXTENT_CSUM_KEY)
1773 leaf = path.nodes[0];
1774 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780 leaf = path.nodes[0];
1783 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785 key.type != BTRFS_EXTENT_CSUM_KEY)
1788 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789 if (key.offset >= start + len)
1792 if (key.offset > start)
1795 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796 csum_end = key.offset + (size / csum_size) *
1797 root->fs_info->sectorsize;
1798 if (csum_end > start) {
1799 size = min(csum_end - start, len);
1808 btrfs_release_path(&path);
1814 static int process_file_extent(struct btrfs_root *root,
1815 struct extent_buffer *eb,
1816 int slot, struct btrfs_key *key,
1817 struct shared_node *active_node)
1819 struct inode_record *rec;
1820 struct btrfs_file_extent_item *fi;
1822 u64 disk_bytenr = 0;
1823 u64 extent_offset = 0;
1824 u64 mask = root->fs_info->sectorsize - 1;
1828 rec = active_node->current;
1829 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830 rec->found_file_extent = 1;
1832 if (rec->extent_start == (u64)-1) {
1833 rec->extent_start = key->offset;
1834 rec->extent_end = key->offset;
1837 if (rec->extent_end > key->offset)
1838 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839 else if (rec->extent_end < key->offset) {
1840 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841 key->offset - rec->extent_end);
1846 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847 extent_type = btrfs_file_extent_type(eb, fi);
1849 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1852 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853 rec->found_size += num_bytes;
1854 num_bytes = (num_bytes + mask) & ~mask;
1855 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859 extent_offset = btrfs_file_extent_offset(eb, fi);
1860 if (num_bytes == 0 || (num_bytes & mask))
1861 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862 if (num_bytes + extent_offset >
1863 btrfs_file_extent_ram_bytes(eb, fi))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866 (btrfs_file_extent_compression(eb, fi) ||
1867 btrfs_file_extent_encryption(eb, fi) ||
1868 btrfs_file_extent_other_encoding(eb, fi)))
1869 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870 if (disk_bytenr > 0)
1871 rec->found_size += num_bytes;
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1875 rec->extent_end = key->offset + num_bytes;
1878 * The data reloc tree will copy full extents into its inode and then
1879 * copy the corresponding csums. Because the extent it copied could be
1880 * a preallocated extent that hasn't been written to yet there may be no
1881 * csums to copy, ergo we won't have csums for our file extent. This is
1882 * ok so just don't bother checking csums if the inode belongs to the
1885 if (disk_bytenr > 0 &&
1886 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1888 if (btrfs_file_extent_compression(eb, fi))
1889 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1891 disk_bytenr += extent_offset;
1893 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1896 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1898 rec->found_csum_item = 1;
1899 if (found < num_bytes)
1900 rec->some_csum_missing = 1;
1901 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1903 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910 struct walk_control *wc)
1912 struct btrfs_key key;
1916 struct cache_tree *inode_cache;
1917 struct shared_node *active_node;
1919 if (wc->root_level == wc->active_node &&
1920 btrfs_root_refs(&root->root_item) == 0)
1923 active_node = wc->nodes[wc->active_node];
1924 inode_cache = &active_node->inode_cache;
1925 nritems = btrfs_header_nritems(eb);
1926 for (i = 0; i < nritems; i++) {
1927 btrfs_item_key_to_cpu(eb, &key, i);
1929 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1931 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1934 if (active_node->current == NULL ||
1935 active_node->current->ino < key.objectid) {
1936 if (active_node->current) {
1937 active_node->current->checked = 1;
1938 maybe_free_inode_rec(inode_cache,
1939 active_node->current);
1941 active_node->current = get_inode_rec(inode_cache,
1943 BUG_ON(IS_ERR(active_node->current));
1946 case BTRFS_DIR_ITEM_KEY:
1947 case BTRFS_DIR_INDEX_KEY:
1948 ret = process_dir_item(eb, i, &key, active_node);
1950 case BTRFS_INODE_REF_KEY:
1951 ret = process_inode_ref(eb, i, &key, active_node);
1953 case BTRFS_INODE_EXTREF_KEY:
1954 ret = process_inode_extref(eb, i, &key, active_node);
1956 case BTRFS_INODE_ITEM_KEY:
1957 ret = process_inode_item(eb, i, &key, active_node);
1959 case BTRFS_EXTENT_DATA_KEY:
1960 ret = process_file_extent(root, eb, i, &key,
1971 u64 bytenr[BTRFS_MAX_LEVEL];
1972 u64 refs[BTRFS_MAX_LEVEL];
1973 int need_check[BTRFS_MAX_LEVEL];
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977 struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979 unsigned int ext_ref);
1982 * Returns >0 Found error, not fatal, should continue
1983 * Returns <0 Fatal error, must exit the whole check
1984 * Returns 0 No errors found
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987 struct node_refs *nrefs, int *level, int ext_ref)
1989 struct extent_buffer *cur = path->nodes[0];
1990 struct btrfs_key key;
1994 int root_level = btrfs_header_level(root->node);
1996 int ret = 0; /* Final return value */
1997 int err = 0; /* Positive error bitmap */
1999 cur_bytenr = cur->start;
2001 /* skip to first inode item or the first inode number change */
2002 nritems = btrfs_header_nritems(cur);
2003 for (i = 0; i < nritems; i++) {
2004 btrfs_item_key_to_cpu(cur, &key, i);
2006 first_ino = key.objectid;
2007 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008 (first_ino && first_ino != key.objectid))
2012 path->slots[0] = nritems;
2018 err |= check_inode_item(root, path, ext_ref);
2020 /* modify cur since check_inode_item may change path */
2021 cur = path->nodes[0];
2023 if (err & LAST_ITEM)
2026 /* still have inode items in thie leaf */
2027 if (cur->start == cur_bytenr)
2031 * we have switched to another leaf, above nodes may
2032 * have changed, here walk down the path, if a node
2033 * or leaf is shared, check whether we can skip this
2036 for (i = root_level; i >= 0; i--) {
2037 if (path->nodes[i]->start == nrefs->bytenr[i])
2040 ret = update_nodes_refs(root,
2041 path->nodes[i]->start,
2046 if (!nrefs->need_check[i]) {
2052 for (i = 0; i < *level; i++) {
2053 free_extent_buffer(path->nodes[i]);
2054 path->nodes[i] = NULL;
2063 static void reada_walk_down(struct btrfs_root *root,
2064 struct extent_buffer *node, int slot)
2066 struct btrfs_fs_info *fs_info = root->fs_info;
2073 level = btrfs_header_level(node);
2077 nritems = btrfs_header_nritems(node);
2078 for (i = slot; i < nritems; i++) {
2079 bytenr = btrfs_node_blockptr(node, i);
2080 ptr_gen = btrfs_node_ptr_generation(node, i);
2081 readahead_tree_block(fs_info, bytenr, ptr_gen);
2086 * Check the child node/leaf by the following condition:
2087 * 1. the first item key of the node/leaf should be the same with the one
2089 * 2. block in parent node should match the child node/leaf.
2090 * 3. generation of parent node and child's header should be consistent.
2092 * Or the child node/leaf pointed by the key in parent is not valid.
2094 * We hope to check leaf owner too, but since subvol may share leaves,
2095 * which makes leaf owner check not so strong, key check should be
2096 * sufficient enough for that case.
2098 static int check_child_node(struct extent_buffer *parent, int slot,
2099 struct extent_buffer *child)
2101 struct btrfs_key parent_key;
2102 struct btrfs_key child_key;
2105 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2106 if (btrfs_header_level(child) == 0)
2107 btrfs_item_key_to_cpu(child, &child_key, 0);
2109 btrfs_node_key_to_cpu(child, &child_key, 0);
2111 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2114 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2115 parent_key.objectid, parent_key.type, parent_key.offset,
2116 child_key.objectid, child_key.type, child_key.offset);
2118 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2120 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2121 btrfs_node_blockptr(parent, slot),
2122 btrfs_header_bytenr(child));
2124 if (btrfs_node_ptr_generation(parent, slot) !=
2125 btrfs_header_generation(child)) {
2127 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2128 btrfs_header_generation(child),
2129 btrfs_node_ptr_generation(parent, slot));
2135 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2136 * in every fs or file tree check. Here we find its all root ids, and only check
2137 * it in the fs or file tree which has the smallest root id.
2139 static int need_check(struct btrfs_root *root, struct ulist *roots)
2141 struct rb_node *node;
2142 struct ulist_node *u;
2144 if (roots->nnodes == 1)
2147 node = rb_first(&roots->root);
2148 u = rb_entry(node, struct ulist_node, rb_node);
2150 * current root id is not smallest, we skip it and let it be checked
2151 * in the fs or file tree who hash the smallest root id.
2153 if (root->objectid != u->val)
2160 * for a tree node or leaf, we record its reference count, so later if we still
2161 * process this node or leaf, don't need to compute its reference count again.
2163 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2164 struct node_refs *nrefs, u64 level)
2168 struct ulist *roots;
2170 if (nrefs->bytenr[level] != bytenr) {
2171 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2172 level, 1, &refs, NULL);
2176 nrefs->bytenr[level] = bytenr;
2177 nrefs->refs[level] = refs;
2179 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2184 check = need_check(root, roots);
2186 nrefs->need_check[level] = check;
2188 nrefs->need_check[level] = 1;
2195 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2196 struct walk_control *wc, int *level,
2197 struct node_refs *nrefs)
2199 enum btrfs_tree_block_status status;
2202 struct btrfs_fs_info *fs_info = root->fs_info;
2203 struct extent_buffer *next;
2204 struct extent_buffer *cur;
2208 WARN_ON(*level < 0);
2209 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2211 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2212 refs = nrefs->refs[*level];
2215 ret = btrfs_lookup_extent_info(NULL, root,
2216 path->nodes[*level]->start,
2217 *level, 1, &refs, NULL);
2222 nrefs->bytenr[*level] = path->nodes[*level]->start;
2223 nrefs->refs[*level] = refs;
2227 ret = enter_shared_node(root, path->nodes[*level]->start,
2235 while (*level >= 0) {
2236 WARN_ON(*level < 0);
2237 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238 cur = path->nodes[*level];
2240 if (btrfs_header_level(cur) != *level)
2243 if (path->slots[*level] >= btrfs_header_nritems(cur))
2246 ret = process_one_leaf(root, cur, wc);
2251 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2252 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2254 if (bytenr == nrefs->bytenr[*level - 1]) {
2255 refs = nrefs->refs[*level - 1];
2257 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2258 *level - 1, 1, &refs, NULL);
2262 nrefs->bytenr[*level - 1] = bytenr;
2263 nrefs->refs[*level - 1] = refs;
2268 ret = enter_shared_node(root, bytenr, refs,
2271 path->slots[*level]++;
2276 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2277 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2278 free_extent_buffer(next);
2279 reada_walk_down(root, cur, path->slots[*level]);
2280 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2281 if (!extent_buffer_uptodate(next)) {
2282 struct btrfs_key node_key;
2284 btrfs_node_key_to_cpu(path->nodes[*level],
2286 path->slots[*level]);
2287 btrfs_add_corrupt_extent_record(root->fs_info,
2289 path->nodes[*level]->start,
2290 root->fs_info->nodesize,
2297 ret = check_child_node(cur, path->slots[*level], next);
2299 free_extent_buffer(next);
2304 if (btrfs_is_leaf(next))
2305 status = btrfs_check_leaf(root, NULL, next);
2307 status = btrfs_check_node(root, NULL, next);
2308 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309 free_extent_buffer(next);
2314 *level = *level - 1;
2315 free_extent_buffer(path->nodes[*level]);
2316 path->nodes[*level] = next;
2317 path->slots[*level] = 0;
2320 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2324 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2325 unsigned int ext_ref);
2328 * Returns >0 Found error, should continue
2329 * Returns <0 Fatal error, must exit the whole check
2330 * Returns 0 No errors found
2332 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2333 int *level, struct node_refs *nrefs, int ext_ref)
2335 enum btrfs_tree_block_status status;
2338 struct btrfs_fs_info *fs_info = root->fs_info;
2339 struct extent_buffer *next;
2340 struct extent_buffer *cur;
2343 WARN_ON(*level < 0);
2344 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2346 ret = update_nodes_refs(root, path->nodes[*level]->start,
2351 while (*level >= 0) {
2352 WARN_ON(*level < 0);
2353 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2354 cur = path->nodes[*level];
2356 if (btrfs_header_level(cur) != *level)
2359 if (path->slots[*level] >= btrfs_header_nritems(cur))
2361 /* Don't forgot to check leaf/node validation */
2363 ret = btrfs_check_leaf(root, NULL, cur);
2364 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368 ret = process_one_leaf_v2(root, path, nrefs,
2370 cur = path->nodes[*level];
2373 ret = btrfs_check_node(root, NULL, cur);
2374 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2379 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2380 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2382 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2385 if (!nrefs->need_check[*level - 1]) {
2386 path->slots[*level]++;
2390 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392 free_extent_buffer(next);
2393 reada_walk_down(root, cur, path->slots[*level]);
2394 next = read_tree_block(fs_info, bytenr, ptr_gen);
2395 if (!extent_buffer_uptodate(next)) {
2396 struct btrfs_key node_key;
2398 btrfs_node_key_to_cpu(path->nodes[*level],
2400 path->slots[*level]);
2401 btrfs_add_corrupt_extent_record(fs_info,
2403 path->nodes[*level]->start,
2411 ret = check_child_node(cur, path->slots[*level], next);
2415 if (btrfs_is_leaf(next))
2416 status = btrfs_check_leaf(root, NULL, next);
2418 status = btrfs_check_node(root, NULL, next);
2419 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2420 free_extent_buffer(next);
2425 *level = *level - 1;
2426 free_extent_buffer(path->nodes[*level]);
2427 path->nodes[*level] = next;
2428 path->slots[*level] = 0;
2433 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2434 struct walk_control *wc, int *level)
2437 struct extent_buffer *leaf;
2439 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2440 leaf = path->nodes[i];
2441 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2446 free_extent_buffer(path->nodes[*level]);
2447 path->nodes[*level] = NULL;
2448 BUG_ON(*level > wc->active_node);
2449 if (*level == wc->active_node)
2450 leave_shared_node(root, wc, *level);
2457 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2461 struct extent_buffer *leaf;
2463 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2464 leaf = path->nodes[i];
2465 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2470 free_extent_buffer(path->nodes[*level]);
2471 path->nodes[*level] = NULL;
2478 static int check_root_dir(struct inode_record *rec)
2480 struct inode_backref *backref;
2483 if (!rec->found_inode_item || rec->errors)
2485 if (rec->nlink != 1 || rec->found_link != 0)
2487 if (list_empty(&rec->backrefs))
2489 backref = to_inode_backref(rec->backrefs.next);
2490 if (!backref->found_inode_ref)
2492 if (backref->index != 0 || backref->namelen != 2 ||
2493 memcmp(backref->name, "..", 2))
2495 if (backref->found_dir_index || backref->found_dir_item)
2502 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2503 struct btrfs_root *root, struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct btrfs_inode_item *ei;
2507 struct btrfs_key key;
2510 key.objectid = rec->ino;
2511 key.type = BTRFS_INODE_ITEM_KEY;
2512 key.offset = (u64)-1;
2514 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2518 if (!path->slots[0]) {
2525 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2526 if (key.objectid != rec->ino) {
2531 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2532 struct btrfs_inode_item);
2533 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2534 btrfs_mark_buffer_dirty(path->nodes[0]);
2535 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2536 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2537 root->root_key.objectid);
2539 btrfs_release_path(path);
2543 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2544 struct btrfs_root *root,
2545 struct btrfs_path *path,
2546 struct inode_record *rec)
2550 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2551 btrfs_release_path(path);
2553 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2557 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2558 struct btrfs_root *root,
2559 struct btrfs_path *path,
2560 struct inode_record *rec)
2562 struct btrfs_inode_item *ei;
2563 struct btrfs_key key;
2566 key.objectid = rec->ino;
2567 key.type = BTRFS_INODE_ITEM_KEY;
2570 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2577 /* Since ret == 0, no need to check anything */
2578 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2579 struct btrfs_inode_item);
2580 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2581 btrfs_mark_buffer_dirty(path->nodes[0]);
2582 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2583 printf("reset nbytes for ino %llu root %llu\n",
2584 rec->ino, root->root_key.objectid);
2586 btrfs_release_path(path);
2590 static int add_missing_dir_index(struct btrfs_root *root,
2591 struct cache_tree *inode_cache,
2592 struct inode_record *rec,
2593 struct inode_backref *backref)
2595 struct btrfs_path path;
2596 struct btrfs_trans_handle *trans;
2597 struct btrfs_dir_item *dir_item;
2598 struct extent_buffer *leaf;
2599 struct btrfs_key key;
2600 struct btrfs_disk_key disk_key;
2601 struct inode_record *dir_rec;
2602 unsigned long name_ptr;
2603 u32 data_size = sizeof(*dir_item) + backref->namelen;
2606 trans = btrfs_start_transaction(root, 1);
2608 return PTR_ERR(trans);
2610 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2611 (unsigned long long)rec->ino);
2613 btrfs_init_path(&path);
2614 key.objectid = backref->dir;
2615 key.type = BTRFS_DIR_INDEX_KEY;
2616 key.offset = backref->index;
2617 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620 leaf = path.nodes[0];
2621 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2623 disk_key.objectid = cpu_to_le64(rec->ino);
2624 disk_key.type = BTRFS_INODE_ITEM_KEY;
2625 disk_key.offset = 0;
2627 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2628 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2629 btrfs_set_dir_data_len(leaf, dir_item, 0);
2630 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2631 name_ptr = (unsigned long)(dir_item + 1);
2632 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2633 btrfs_mark_buffer_dirty(leaf);
2634 btrfs_release_path(&path);
2635 btrfs_commit_transaction(trans, root);
2637 backref->found_dir_index = 1;
2638 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2639 BUG_ON(IS_ERR(dir_rec));
2642 dir_rec->found_size += backref->namelen;
2643 if (dir_rec->found_size == dir_rec->isize &&
2644 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2645 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2646 if (dir_rec->found_size != dir_rec->isize)
2647 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2652 static int delete_dir_index(struct btrfs_root *root,
2653 struct inode_backref *backref)
2655 struct btrfs_trans_handle *trans;
2656 struct btrfs_dir_item *di;
2657 struct btrfs_path path;
2660 trans = btrfs_start_transaction(root, 1);
2662 return PTR_ERR(trans);
2664 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2665 (unsigned long long)backref->dir,
2666 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2667 (unsigned long long)root->objectid);
2669 btrfs_init_path(&path);
2670 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2671 backref->name, backref->namelen,
2672 backref->index, -1);
2675 btrfs_release_path(&path);
2676 btrfs_commit_transaction(trans, root);
2683 ret = btrfs_del_item(trans, root, &path);
2685 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2687 btrfs_release_path(&path);
2688 btrfs_commit_transaction(trans, root);
2692 static int create_inode_item(struct btrfs_root *root,
2693 struct inode_record *rec,
2696 struct btrfs_trans_handle *trans;
2697 struct btrfs_inode_item inode_item;
2698 time_t now = time(NULL);
2701 trans = btrfs_start_transaction(root, 1);
2702 if (IS_ERR(trans)) {
2703 ret = PTR_ERR(trans);
2707 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2708 "be incomplete, please check permissions and content after "
2709 "the fsck completes.\n", (unsigned long long)root->objectid,
2710 (unsigned long long)rec->ino);
2712 memset(&inode_item, 0, sizeof(inode_item));
2713 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2715 btrfs_set_stack_inode_nlink(&inode_item, 1);
2717 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2718 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2719 if (rec->found_dir_item) {
2720 if (rec->found_file_extent)
2721 fprintf(stderr, "root %llu inode %llu has both a dir "
2722 "item and extents, unsure if it is a dir or a "
2723 "regular file so setting it as a directory\n",
2724 (unsigned long long)root->objectid,
2725 (unsigned long long)rec->ino);
2726 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2727 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2728 } else if (!rec->found_dir_item) {
2729 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2730 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2732 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2733 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2734 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2735 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2736 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2737 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2738 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2739 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2741 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2743 btrfs_commit_transaction(trans, root);
2747 static int repair_inode_backrefs(struct btrfs_root *root,
2748 struct inode_record *rec,
2749 struct cache_tree *inode_cache,
2752 struct inode_backref *tmp, *backref;
2753 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2757 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2758 if (!delete && rec->ino == root_dirid) {
2759 if (!rec->found_inode_item) {
2760 ret = create_inode_item(root, rec, 1);
2767 /* Index 0 for root dir's are special, don't mess with it */
2768 if (rec->ino == root_dirid && backref->index == 0)
2772 ((backref->found_dir_index && !backref->found_inode_ref) ||
2773 (backref->found_dir_index && backref->found_inode_ref &&
2774 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2775 ret = delete_dir_index(root, backref);
2779 list_del(&backref->list);
2784 if (!delete && !backref->found_dir_index &&
2785 backref->found_dir_item && backref->found_inode_ref) {
2786 ret = add_missing_dir_index(root, inode_cache, rec,
2791 if (backref->found_dir_item &&
2792 backref->found_dir_index) {
2793 if (!backref->errors &&
2794 backref->found_inode_ref) {
2795 list_del(&backref->list);
2802 if (!delete && (!backref->found_dir_index &&
2803 !backref->found_dir_item &&
2804 backref->found_inode_ref)) {
2805 struct btrfs_trans_handle *trans;
2806 struct btrfs_key location;
2808 ret = check_dir_conflict(root, backref->name,
2814 * let nlink fixing routine to handle it,
2815 * which can do it better.
2820 location.objectid = rec->ino;
2821 location.type = BTRFS_INODE_ITEM_KEY;
2822 location.offset = 0;
2824 trans = btrfs_start_transaction(root, 1);
2825 if (IS_ERR(trans)) {
2826 ret = PTR_ERR(trans);
2829 fprintf(stderr, "adding missing dir index/item pair "
2831 (unsigned long long)rec->ino);
2832 ret = btrfs_insert_dir_item(trans, root, backref->name,
2834 backref->dir, &location,
2835 imode_to_type(rec->imode),
2838 btrfs_commit_transaction(trans, root);
2842 if (!delete && (backref->found_inode_ref &&
2843 backref->found_dir_index &&
2844 backref->found_dir_item &&
2845 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2846 !rec->found_inode_item)) {
2847 ret = create_inode_item(root, rec, 0);
2854 return ret ? ret : repaired;
2858 * To determine the file type for nlink/inode_item repair
2860 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2861 * Return -ENOENT if file type is not found.
2863 static int find_file_type(struct inode_record *rec, u8 *type)
2865 struct inode_backref *backref;
2867 /* For inode item recovered case */
2868 if (rec->found_inode_item) {
2869 *type = imode_to_type(rec->imode);
2873 list_for_each_entry(backref, &rec->backrefs, list) {
2874 if (backref->found_dir_index || backref->found_dir_item) {
2875 *type = backref->filetype;
2883 * To determine the file name for nlink repair
2885 * Return 0 if file name is found, set name and namelen.
2886 * Return -ENOENT if file name is not found.
2888 static int find_file_name(struct inode_record *rec,
2889 char *name, int *namelen)
2891 struct inode_backref *backref;
2893 list_for_each_entry(backref, &rec->backrefs, list) {
2894 if (backref->found_dir_index || backref->found_dir_item ||
2895 backref->found_inode_ref) {
2896 memcpy(name, backref->name, backref->namelen);
2897 *namelen = backref->namelen;
2904 /* Reset the nlink of the inode to the correct one */
2905 static int reset_nlink(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct inode_backref *backref;
2911 struct inode_backref *tmp;
2912 struct btrfs_key key;
2913 struct btrfs_inode_item *inode_item;
2916 /* We don't believe this either, reset it and iterate backref */
2917 rec->found_link = 0;
2919 /* Remove all backref including the valid ones */
2920 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2921 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2922 backref->index, backref->name,
2923 backref->namelen, 0);
2927 /* remove invalid backref, so it won't be added back */
2928 if (!(backref->found_dir_index &&
2929 backref->found_dir_item &&
2930 backref->found_inode_ref)) {
2931 list_del(&backref->list);
2938 /* Set nlink to 0 */
2939 key.objectid = rec->ino;
2940 key.type = BTRFS_INODE_ITEM_KEY;
2942 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2949 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2950 struct btrfs_inode_item);
2951 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2952 btrfs_mark_buffer_dirty(path->nodes[0]);
2953 btrfs_release_path(path);
2956 * Add back valid inode_ref/dir_item/dir_index,
2957 * add_link() will handle the nlink inc, so new nlink must be correct
2959 list_for_each_entry(backref, &rec->backrefs, list) {
2960 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2961 backref->name, backref->namelen,
2962 backref->filetype, &backref->index, 1);
2967 btrfs_release_path(path);
2971 static int get_highest_inode(struct btrfs_trans_handle *trans,
2972 struct btrfs_root *root,
2973 struct btrfs_path *path,
2976 struct btrfs_key key, found_key;
2979 btrfs_init_path(path);
2980 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2982 key.type = BTRFS_INODE_ITEM_KEY;
2983 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2985 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2986 path->slots[0] - 1);
2987 *highest_ino = found_key.objectid;
2990 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2992 btrfs_release_path(path);
2996 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2997 struct btrfs_root *root,
2998 struct btrfs_path *path,
2999 struct inode_record *rec)
3001 char *dir_name = "lost+found";
3002 char namebuf[BTRFS_NAME_LEN] = {0};
3007 int name_recovered = 0;
3008 int type_recovered = 0;
3012 * Get file name and type first before these invalid inode ref
3013 * are deleted by remove_all_invalid_backref()
3015 name_recovered = !find_file_name(rec, namebuf, &namelen);
3016 type_recovered = !find_file_type(rec, &type);
3018 if (!name_recovered) {
3019 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3020 rec->ino, rec->ino);
3021 namelen = count_digits(rec->ino);
3022 sprintf(namebuf, "%llu", rec->ino);
3025 if (!type_recovered) {
3026 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3028 type = BTRFS_FT_REG_FILE;
3032 ret = reset_nlink(trans, root, path, rec);
3035 "Failed to reset nlink for inode %llu: %s\n",
3036 rec->ino, strerror(-ret));
3040 if (rec->found_link == 0) {
3041 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3045 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3046 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3049 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3050 dir_name, strerror(-ret));
3053 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3054 namebuf, namelen, type, NULL, 1);
3056 * Add ".INO" suffix several times to handle case where
3057 * "FILENAME.INO" is already taken by another file.
3059 while (ret == -EEXIST) {
3061 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3063 if (namelen + count_digits(rec->ino) + 1 >
3068 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3070 namelen += count_digits(rec->ino) + 1;
3071 ret = btrfs_add_link(trans, root, rec->ino,
3072 lost_found_ino, namebuf,
3073 namelen, type, NULL, 1);
3077 "Failed to link the inode %llu to %s dir: %s\n",
3078 rec->ino, dir_name, strerror(-ret));
3082 * Just increase the found_link, don't actually add the
3083 * backref. This will make things easier and this inode
3084 * record will be freed after the repair is done.
3085 * So fsck will not report problem about this inode.
3088 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3089 namelen, namebuf, dir_name);
3091 printf("Fixed the nlink of inode %llu\n", rec->ino);
3094 * Clear the flag anyway, or we will loop forever for the same inode
3095 * as it will not be removed from the bad inode list and the dead loop
3098 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3099 btrfs_release_path(path);
3104 * Check if there is any normal(reg or prealloc) file extent for given
3106 * This is used to determine the file type when neither its dir_index/item or
3107 * inode_item exists.
3109 * This will *NOT* report error, if any error happens, just consider it does
3110 * not have any normal file extent.
3112 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3114 struct btrfs_path path;
3115 struct btrfs_key key;
3116 struct btrfs_key found_key;
3117 struct btrfs_file_extent_item *fi;
3121 btrfs_init_path(&path);
3123 key.type = BTRFS_EXTENT_DATA_KEY;
3126 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3131 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3132 ret = btrfs_next_leaf(root, &path);
3139 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3141 if (found_key.objectid != ino ||
3142 found_key.type != BTRFS_EXTENT_DATA_KEY)
3144 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3145 struct btrfs_file_extent_item);
3146 type = btrfs_file_extent_type(path.nodes[0], fi);
3147 if (type != BTRFS_FILE_EXTENT_INLINE) {
3153 btrfs_release_path(&path);
3157 static u32 btrfs_type_to_imode(u8 type)
3159 static u32 imode_by_btrfs_type[] = {
3160 [BTRFS_FT_REG_FILE] = S_IFREG,
3161 [BTRFS_FT_DIR] = S_IFDIR,
3162 [BTRFS_FT_CHRDEV] = S_IFCHR,
3163 [BTRFS_FT_BLKDEV] = S_IFBLK,
3164 [BTRFS_FT_FIFO] = S_IFIFO,
3165 [BTRFS_FT_SOCK] = S_IFSOCK,
3166 [BTRFS_FT_SYMLINK] = S_IFLNK,
3169 return imode_by_btrfs_type[(type)];
3172 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 struct btrfs_path *path,
3175 struct inode_record *rec)
3179 int type_recovered = 0;
3182 printf("Trying to rebuild inode:%llu\n", rec->ino);
3184 type_recovered = !find_file_type(rec, &filetype);
3187 * Try to determine inode type if type not found.
3189 * For found regular file extent, it must be FILE.
3190 * For found dir_item/index, it must be DIR.
3192 * For undetermined one, use FILE as fallback.
3195 * 1. If found backref(inode_index/item is already handled) to it,
3197 * Need new inode-inode ref structure to allow search for that.
3199 if (!type_recovered) {
3200 if (rec->found_file_extent &&
3201 find_normal_file_extent(root, rec->ino)) {
3203 filetype = BTRFS_FT_REG_FILE;
3204 } else if (rec->found_dir_item) {
3206 filetype = BTRFS_FT_DIR;
3207 } else if (!list_empty(&rec->orphan_extents)) {
3209 filetype = BTRFS_FT_REG_FILE;
3211 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3214 filetype = BTRFS_FT_REG_FILE;
3218 ret = btrfs_new_inode(trans, root, rec->ino,
3219 mode | btrfs_type_to_imode(filetype));
3224 * Here inode rebuild is done, we only rebuild the inode item,
3225 * don't repair the nlink(like move to lost+found).
3226 * That is the job of nlink repair.
3228 * We just fill the record and return
3230 rec->found_dir_item = 1;
3231 rec->imode = mode | btrfs_type_to_imode(filetype);
3233 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3234 /* Ensure the inode_nlinks repair function will be called */
3235 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3240 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3241 struct btrfs_root *root,
3242 struct btrfs_path *path,
3243 struct inode_record *rec)
3245 struct orphan_data_extent *orphan;
3246 struct orphan_data_extent *tmp;
3249 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3251 * Check for conflicting file extents
3253 * Here we don't know whether the extents is compressed or not,
3254 * so we can only assume it not compressed nor data offset,
3255 * and use its disk_len as extent length.
3257 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3258 orphan->offset, orphan->disk_len, 0);
3259 btrfs_release_path(path);
3264 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3265 orphan->disk_bytenr, orphan->disk_len);
3266 ret = btrfs_free_extent(trans,
3267 root->fs_info->extent_root,
3268 orphan->disk_bytenr, orphan->disk_len,
3269 0, root->objectid, orphan->objectid,
3274 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3275 orphan->offset, orphan->disk_bytenr,
3276 orphan->disk_len, orphan->disk_len);
3280 /* Update file size info */
3281 rec->found_size += orphan->disk_len;
3282 if (rec->found_size == rec->nbytes)
3283 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3285 /* Update the file extent hole info too */
3286 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3290 if (RB_EMPTY_ROOT(&rec->holes))
3291 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3293 list_del(&orphan->list);
3296 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3301 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3302 struct btrfs_root *root,
3303 struct btrfs_path *path,
3304 struct inode_record *rec)
3306 struct rb_node *node;
3307 struct file_extent_hole *hole;
3311 node = rb_first(&rec->holes);
3315 hole = rb_entry(node, struct file_extent_hole, node);
3316 ret = btrfs_punch_hole(trans, root, rec->ino,
3317 hole->start, hole->len);
3320 ret = del_file_extent_hole(&rec->holes, hole->start,
3324 if (RB_EMPTY_ROOT(&rec->holes))
3325 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3326 node = rb_first(&rec->holes);
3328 /* special case for a file losing all its file extent */
3330 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3331 round_up(rec->isize,
3332 root->fs_info->sectorsize));
3336 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3337 rec->ino, root->objectid);
3342 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3344 struct btrfs_trans_handle *trans;
3345 struct btrfs_path path;
3348 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3349 I_ERR_NO_ORPHAN_ITEM |
3350 I_ERR_LINK_COUNT_WRONG |
3351 I_ERR_NO_INODE_ITEM |
3352 I_ERR_FILE_EXTENT_ORPHAN |
3353 I_ERR_FILE_EXTENT_DISCOUNT|
3354 I_ERR_FILE_NBYTES_WRONG)))
3358 * For nlink repair, it may create a dir and add link, so
3359 * 2 for parent(256)'s dir_index and dir_item
3360 * 2 for lost+found dir's inode_item and inode_ref
3361 * 1 for the new inode_ref of the file
3362 * 2 for lost+found dir's dir_index and dir_item for the file
3364 trans = btrfs_start_transaction(root, 7);
3366 return PTR_ERR(trans);
3368 btrfs_init_path(&path);
3369 if (rec->errors & I_ERR_NO_INODE_ITEM)
3370 ret = repair_inode_no_item(trans, root, &path, rec);
3371 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3372 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3373 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3374 ret = repair_inode_discount_extent(trans, root, &path, rec);
3375 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3376 ret = repair_inode_isize(trans, root, &path, rec);
3377 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3378 ret = repair_inode_orphan_item(trans, root, &path, rec);
3379 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3380 ret = repair_inode_nlinks(trans, root, &path, rec);
3381 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3382 ret = repair_inode_nbytes(trans, root, &path, rec);
3383 btrfs_commit_transaction(trans, root);
3384 btrfs_release_path(&path);
3388 static int check_inode_recs(struct btrfs_root *root,
3389 struct cache_tree *inode_cache)
3391 struct cache_extent *cache;
3392 struct ptr_node *node;
3393 struct inode_record *rec;
3394 struct inode_backref *backref;
3399 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3401 if (btrfs_root_refs(&root->root_item) == 0) {
3402 if (!cache_tree_empty(inode_cache))
3403 fprintf(stderr, "warning line %d\n", __LINE__);
3408 * We need to repair backrefs first because we could change some of the
3409 * errors in the inode recs.
3411 * We also need to go through and delete invalid backrefs first and then
3412 * add the correct ones second. We do this because we may get EEXIST
3413 * when adding back the correct index because we hadn't yet deleted the
3416 * For example, if we were missing a dir index then the directories
3417 * isize would be wrong, so if we fixed the isize to what we thought it
3418 * would be and then fixed the backref we'd still have a invalid fs, so
3419 * we need to add back the dir index and then check to see if the isize
3424 if (stage == 3 && !err)
3427 cache = search_cache_extent(inode_cache, 0);
3428 while (repair && cache) {
3429 node = container_of(cache, struct ptr_node, cache);
3431 cache = next_cache_extent(cache);
3433 /* Need to free everything up and rescan */
3435 remove_cache_extent(inode_cache, &node->cache);
3437 free_inode_rec(rec);
3441 if (list_empty(&rec->backrefs))
3444 ret = repair_inode_backrefs(root, rec, inode_cache,
3458 rec = get_inode_rec(inode_cache, root_dirid, 0);
3459 BUG_ON(IS_ERR(rec));
3461 ret = check_root_dir(rec);
3463 fprintf(stderr, "root %llu root dir %llu error\n",
3464 (unsigned long long)root->root_key.objectid,
3465 (unsigned long long)root_dirid);
3466 print_inode_error(root, rec);
3471 struct btrfs_trans_handle *trans;
3473 trans = btrfs_start_transaction(root, 1);
3474 if (IS_ERR(trans)) {
3475 err = PTR_ERR(trans);
3480 "root %llu missing its root dir, recreating\n",
3481 (unsigned long long)root->objectid);
3483 ret = btrfs_make_root_dir(trans, root, root_dirid);
3486 btrfs_commit_transaction(trans, root);
3490 fprintf(stderr, "root %llu root dir %llu not found\n",
3491 (unsigned long long)root->root_key.objectid,
3492 (unsigned long long)root_dirid);
3496 cache = search_cache_extent(inode_cache, 0);
3499 node = container_of(cache, struct ptr_node, cache);
3501 remove_cache_extent(inode_cache, &node->cache);
3503 if (rec->ino == root_dirid ||
3504 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3505 free_inode_rec(rec);
3509 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3510 ret = check_orphan_item(root, rec->ino);
3512 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3513 if (can_free_inode_rec(rec)) {
3514 free_inode_rec(rec);
3519 if (!rec->found_inode_item)
3520 rec->errors |= I_ERR_NO_INODE_ITEM;
3521 if (rec->found_link != rec->nlink)
3522 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3524 ret = try_repair_inode(root, rec);
3525 if (ret == 0 && can_free_inode_rec(rec)) {
3526 free_inode_rec(rec);
3532 if (!(repair && ret == 0))
3534 print_inode_error(root, rec);
3535 list_for_each_entry(backref, &rec->backrefs, list) {
3536 if (!backref->found_dir_item)
3537 backref->errors |= REF_ERR_NO_DIR_ITEM;
3538 if (!backref->found_dir_index)
3539 backref->errors |= REF_ERR_NO_DIR_INDEX;
3540 if (!backref->found_inode_ref)
3541 backref->errors |= REF_ERR_NO_INODE_REF;
3542 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3543 " namelen %u name %s filetype %d errors %x",
3544 (unsigned long long)backref->dir,
3545 (unsigned long long)backref->index,
3546 backref->namelen, backref->name,
3547 backref->filetype, backref->errors);
3548 print_ref_error(backref->errors);
3550 free_inode_rec(rec);
3552 return (error > 0) ? -1 : 0;
3555 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3558 struct cache_extent *cache;
3559 struct root_record *rec = NULL;
3562 cache = lookup_cache_extent(root_cache, objectid, 1);
3564 rec = container_of(cache, struct root_record, cache);
3566 rec = calloc(1, sizeof(*rec));
3568 return ERR_PTR(-ENOMEM);
3569 rec->objectid = objectid;
3570 INIT_LIST_HEAD(&rec->backrefs);
3571 rec->cache.start = objectid;
3572 rec->cache.size = 1;
3574 ret = insert_cache_extent(root_cache, &rec->cache);
3576 return ERR_PTR(-EEXIST);
3581 static struct root_backref *get_root_backref(struct root_record *rec,
3582 u64 ref_root, u64 dir, u64 index,
3583 const char *name, int namelen)
3585 struct root_backref *backref;
3587 list_for_each_entry(backref, &rec->backrefs, list) {
3588 if (backref->ref_root != ref_root || backref->dir != dir ||
3589 backref->namelen != namelen)
3591 if (memcmp(name, backref->name, namelen))
3596 backref = calloc(1, sizeof(*backref) + namelen + 1);
3599 backref->ref_root = ref_root;
3601 backref->index = index;
3602 backref->namelen = namelen;
3603 memcpy(backref->name, name, namelen);
3604 backref->name[namelen] = '\0';
3605 list_add_tail(&backref->list, &rec->backrefs);
3609 static void free_root_record(struct cache_extent *cache)
3611 struct root_record *rec;
3612 struct root_backref *backref;
3614 rec = container_of(cache, struct root_record, cache);
3615 while (!list_empty(&rec->backrefs)) {
3616 backref = to_root_backref(rec->backrefs.next);
3617 list_del(&backref->list);
3624 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3626 static int add_root_backref(struct cache_tree *root_cache,
3627 u64 root_id, u64 ref_root, u64 dir, u64 index,
3628 const char *name, int namelen,
3629 int item_type, int errors)
3631 struct root_record *rec;
3632 struct root_backref *backref;
3634 rec = get_root_rec(root_cache, root_id);
3635 BUG_ON(IS_ERR(rec));
3636 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3639 backref->errors |= errors;
3641 if (item_type != BTRFS_DIR_ITEM_KEY) {
3642 if (backref->found_dir_index || backref->found_back_ref ||
3643 backref->found_forward_ref) {
3644 if (backref->index != index)
3645 backref->errors |= REF_ERR_INDEX_UNMATCH;
3647 backref->index = index;
3651 if (item_type == BTRFS_DIR_ITEM_KEY) {
3652 if (backref->found_forward_ref)
3654 backref->found_dir_item = 1;
3655 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3656 backref->found_dir_index = 1;
3657 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3658 if (backref->found_forward_ref)
3659 backref->errors |= REF_ERR_DUP_ROOT_REF;
3660 else if (backref->found_dir_item)
3662 backref->found_forward_ref = 1;
3663 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3664 if (backref->found_back_ref)
3665 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3666 backref->found_back_ref = 1;
3671 if (backref->found_forward_ref && backref->found_dir_item)
3672 backref->reachable = 1;
3676 static int merge_root_recs(struct btrfs_root *root,
3677 struct cache_tree *src_cache,
3678 struct cache_tree *dst_cache)
3680 struct cache_extent *cache;
3681 struct ptr_node *node;
3682 struct inode_record *rec;
3683 struct inode_backref *backref;
3686 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3687 free_inode_recs_tree(src_cache);
3692 cache = search_cache_extent(src_cache, 0);
3695 node = container_of(cache, struct ptr_node, cache);
3697 remove_cache_extent(src_cache, &node->cache);
3700 ret = is_child_root(root, root->objectid, rec->ino);
3706 list_for_each_entry(backref, &rec->backrefs, list) {
3707 BUG_ON(backref->found_inode_ref);
3708 if (backref->found_dir_item)
3709 add_root_backref(dst_cache, rec->ino,
3710 root->root_key.objectid, backref->dir,
3711 backref->index, backref->name,
3712 backref->namelen, BTRFS_DIR_ITEM_KEY,
3714 if (backref->found_dir_index)
3715 add_root_backref(dst_cache, rec->ino,
3716 root->root_key.objectid, backref->dir,
3717 backref->index, backref->name,
3718 backref->namelen, BTRFS_DIR_INDEX_KEY,
3722 free_inode_rec(rec);
3729 static int check_root_refs(struct btrfs_root *root,
3730 struct cache_tree *root_cache)
3732 struct root_record *rec;
3733 struct root_record *ref_root;
3734 struct root_backref *backref;
3735 struct cache_extent *cache;
3741 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3742 BUG_ON(IS_ERR(rec));
3745 /* fixme: this can not detect circular references */
3748 cache = search_cache_extent(root_cache, 0);
3752 rec = container_of(cache, struct root_record, cache);
3753 cache = next_cache_extent(cache);
3755 if (rec->found_ref == 0)
3758 list_for_each_entry(backref, &rec->backrefs, list) {
3759 if (!backref->reachable)
3762 ref_root = get_root_rec(root_cache,
3764 BUG_ON(IS_ERR(ref_root));
3765 if (ref_root->found_ref > 0)
3768 backref->reachable = 0;
3770 if (rec->found_ref == 0)
3776 cache = search_cache_extent(root_cache, 0);
3780 rec = container_of(cache, struct root_record, cache);
3781 cache = next_cache_extent(cache);
3783 if (rec->found_ref == 0 &&
3784 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3785 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3786 ret = check_orphan_item(root->fs_info->tree_root,
3792 * If we don't have a root item then we likely just have
3793 * a dir item in a snapshot for this root but no actual
3794 * ref key or anything so it's meaningless.
3796 if (!rec->found_root_item)
3799 fprintf(stderr, "fs tree %llu not referenced\n",
3800 (unsigned long long)rec->objectid);
3804 if (rec->found_ref > 0 && !rec->found_root_item)
3806 list_for_each_entry(backref, &rec->backrefs, list) {
3807 if (!backref->found_dir_item)
3808 backref->errors |= REF_ERR_NO_DIR_ITEM;
3809 if (!backref->found_dir_index)
3810 backref->errors |= REF_ERR_NO_DIR_INDEX;
3811 if (!backref->found_back_ref)
3812 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3813 if (!backref->found_forward_ref)
3814 backref->errors |= REF_ERR_NO_ROOT_REF;
3815 if (backref->reachable && backref->errors)
3822 fprintf(stderr, "fs tree %llu refs %u %s\n",
3823 (unsigned long long)rec->objectid, rec->found_ref,
3824 rec->found_root_item ? "" : "not found");
3826 list_for_each_entry(backref, &rec->backrefs, list) {
3827 if (!backref->reachable)
3829 if (!backref->errors && rec->found_root_item)
3831 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3832 " index %llu namelen %u name %s errors %x\n",
3833 (unsigned long long)backref->ref_root,
3834 (unsigned long long)backref->dir,
3835 (unsigned long long)backref->index,
3836 backref->namelen, backref->name,
3838 print_ref_error(backref->errors);
3841 return errors > 0 ? 1 : 0;
3844 static int process_root_ref(struct extent_buffer *eb, int slot,
3845 struct btrfs_key *key,
3846 struct cache_tree *root_cache)
3852 struct btrfs_root_ref *ref;
3853 char namebuf[BTRFS_NAME_LEN];
3856 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3858 dirid = btrfs_root_ref_dirid(eb, ref);
3859 index = btrfs_root_ref_sequence(eb, ref);
3860 name_len = btrfs_root_ref_name_len(eb, ref);
3862 if (name_len <= BTRFS_NAME_LEN) {
3866 len = BTRFS_NAME_LEN;
3867 error = REF_ERR_NAME_TOO_LONG;
3869 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3871 if (key->type == BTRFS_ROOT_REF_KEY) {
3872 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3873 index, namebuf, len, key->type, error);
3875 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3876 index, namebuf, len, key->type, error);
3881 static void free_corrupt_block(struct cache_extent *cache)
3883 struct btrfs_corrupt_block *corrupt;
3885 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3889 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3892 * Repair the btree of the given root.
3894 * The fix is to remove the node key in corrupt_blocks cache_tree.
3895 * and rebalance the tree.
3896 * After the fix, the btree should be writeable.
3898 static int repair_btree(struct btrfs_root *root,
3899 struct cache_tree *corrupt_blocks)
3901 struct btrfs_trans_handle *trans;
3902 struct btrfs_path path;
3903 struct btrfs_corrupt_block *corrupt;
3904 struct cache_extent *cache;
3905 struct btrfs_key key;
3910 if (cache_tree_empty(corrupt_blocks))
3913 trans = btrfs_start_transaction(root, 1);
3914 if (IS_ERR(trans)) {
3915 ret = PTR_ERR(trans);
3916 fprintf(stderr, "Error starting transaction: %s\n",
3920 btrfs_init_path(&path);
3921 cache = first_cache_extent(corrupt_blocks);
3923 corrupt = container_of(cache, struct btrfs_corrupt_block,
3925 level = corrupt->level;
3926 path.lowest_level = level;
3927 key.objectid = corrupt->key.objectid;
3928 key.type = corrupt->key.type;
3929 key.offset = corrupt->key.offset;
3932 * Here we don't want to do any tree balance, since it may
3933 * cause a balance with corrupted brother leaf/node,
3934 * so ins_len set to 0 here.
3935 * Balance will be done after all corrupt node/leaf is deleted.
3937 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3940 offset = btrfs_node_blockptr(path.nodes[level],
3943 /* Remove the ptr */
3944 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3948 * Remove the corresponding extent
3949 * return value is not concerned.
3951 btrfs_release_path(&path);
3952 ret = btrfs_free_extent(trans, root, offset,
3953 root->fs_info->nodesize, 0,
3954 root->root_key.objectid, level - 1, 0);
3955 cache = next_cache_extent(cache);
3958 /* Balance the btree using btrfs_search_slot() */
3959 cache = first_cache_extent(corrupt_blocks);
3961 corrupt = container_of(cache, struct btrfs_corrupt_block,
3963 memcpy(&key, &corrupt->key, sizeof(key));
3964 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3967 /* return will always >0 since it won't find the item */
3969 btrfs_release_path(&path);
3970 cache = next_cache_extent(cache);
3973 btrfs_commit_transaction(trans, root);
3974 btrfs_release_path(&path);
3978 static int check_fs_root(struct btrfs_root *root,
3979 struct cache_tree *root_cache,
3980 struct walk_control *wc)
3986 struct btrfs_path path;
3987 struct shared_node root_node;
3988 struct root_record *rec;
3989 struct btrfs_root_item *root_item = &root->root_item;
3990 struct cache_tree corrupt_blocks;
3991 struct orphan_data_extent *orphan;
3992 struct orphan_data_extent *tmp;
3993 enum btrfs_tree_block_status status;
3994 struct node_refs nrefs;
3997 * Reuse the corrupt_block cache tree to record corrupted tree block
3999 * Unlike the usage in extent tree check, here we do it in a per
4000 * fs/subvol tree base.
4002 cache_tree_init(&corrupt_blocks);
4003 root->fs_info->corrupt_blocks = &corrupt_blocks;
4005 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4006 rec = get_root_rec(root_cache, root->root_key.objectid);
4007 BUG_ON(IS_ERR(rec));
4008 if (btrfs_root_refs(root_item) > 0)
4009 rec->found_root_item = 1;
4012 btrfs_init_path(&path);
4013 memset(&root_node, 0, sizeof(root_node));
4014 cache_tree_init(&root_node.root_cache);
4015 cache_tree_init(&root_node.inode_cache);
4016 memset(&nrefs, 0, sizeof(nrefs));
4018 /* Move the orphan extent record to corresponding inode_record */
4019 list_for_each_entry_safe(orphan, tmp,
4020 &root->orphan_data_extents, list) {
4021 struct inode_record *inode;
4023 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4025 BUG_ON(IS_ERR(inode));
4026 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4027 list_move(&orphan->list, &inode->orphan_extents);
4030 level = btrfs_header_level(root->node);
4031 memset(wc->nodes, 0, sizeof(wc->nodes));
4032 wc->nodes[level] = &root_node;
4033 wc->active_node = level;
4034 wc->root_level = level;
4036 /* We may not have checked the root block, lets do that now */
4037 if (btrfs_is_leaf(root->node))
4038 status = btrfs_check_leaf(root, NULL, root->node);
4040 status = btrfs_check_node(root, NULL, root->node);
4041 if (status != BTRFS_TREE_BLOCK_CLEAN)
4044 if (btrfs_root_refs(root_item) > 0 ||
4045 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4046 path.nodes[level] = root->node;
4047 extent_buffer_get(root->node);
4048 path.slots[level] = 0;
4050 struct btrfs_key key;
4051 struct btrfs_disk_key found_key;
4053 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4054 level = root_item->drop_level;
4055 path.lowest_level = level;
4056 if (level > btrfs_header_level(root->node) ||
4057 level >= BTRFS_MAX_LEVEL) {
4058 error("ignoring invalid drop level: %u", level);
4061 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4064 btrfs_node_key(path.nodes[level], &found_key,
4066 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4067 sizeof(found_key)));
4071 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4077 wret = walk_up_tree(root, &path, wc, &level);
4084 btrfs_release_path(&path);
4086 if (!cache_tree_empty(&corrupt_blocks)) {
4087 struct cache_extent *cache;
4088 struct btrfs_corrupt_block *corrupt;
4090 printf("The following tree block(s) is corrupted in tree %llu:\n",
4091 root->root_key.objectid);
4092 cache = first_cache_extent(&corrupt_blocks);
4094 corrupt = container_of(cache,
4095 struct btrfs_corrupt_block,
4097 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4098 cache->start, corrupt->level,
4099 corrupt->key.objectid, corrupt->key.type,
4100 corrupt->key.offset);
4101 cache = next_cache_extent(cache);
4104 printf("Try to repair the btree for root %llu\n",
4105 root->root_key.objectid);
4106 ret = repair_btree(root, &corrupt_blocks);
4108 fprintf(stderr, "Failed to repair btree: %s\n",
4111 printf("Btree for root %llu is fixed\n",
4112 root->root_key.objectid);
4116 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4120 if (root_node.current) {
4121 root_node.current->checked = 1;
4122 maybe_free_inode_rec(&root_node.inode_cache,
4126 err = check_inode_recs(root, &root_node.inode_cache);
4130 free_corrupt_blocks_tree(&corrupt_blocks);
4131 root->fs_info->corrupt_blocks = NULL;
4132 free_orphan_data_extents(&root->orphan_data_extents);
4136 static int fs_root_objectid(u64 objectid)
4138 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4139 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4141 return is_fstree(objectid);
4144 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4145 struct cache_tree *root_cache)
4147 struct btrfs_path path;
4148 struct btrfs_key key;
4149 struct walk_control wc;
4150 struct extent_buffer *leaf, *tree_node;
4151 struct btrfs_root *tmp_root;
4152 struct btrfs_root *tree_root = fs_info->tree_root;
4156 if (ctx.progress_enabled) {
4157 ctx.tp = TASK_FS_ROOTS;
4158 task_start(ctx.info);
4162 * Just in case we made any changes to the extent tree that weren't
4163 * reflected into the free space cache yet.
4166 reset_cached_block_groups(fs_info);
4167 memset(&wc, 0, sizeof(wc));
4168 cache_tree_init(&wc.shared);
4169 btrfs_init_path(&path);
4174 key.type = BTRFS_ROOT_ITEM_KEY;
4175 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4180 tree_node = tree_root->node;
4182 if (tree_node != tree_root->node) {
4183 free_root_recs_tree(root_cache);
4184 btrfs_release_path(&path);
4187 leaf = path.nodes[0];
4188 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4189 ret = btrfs_next_leaf(tree_root, &path);
4195 leaf = path.nodes[0];
4197 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4198 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4199 fs_root_objectid(key.objectid)) {
4200 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4201 tmp_root = btrfs_read_fs_root_no_cache(
4204 key.offset = (u64)-1;
4205 tmp_root = btrfs_read_fs_root(
4208 if (IS_ERR(tmp_root)) {
4212 ret = check_fs_root(tmp_root, root_cache, &wc);
4213 if (ret == -EAGAIN) {
4214 free_root_recs_tree(root_cache);
4215 btrfs_release_path(&path);
4220 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4221 btrfs_free_fs_root(tmp_root);
4222 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4223 key.type == BTRFS_ROOT_BACKREF_KEY) {
4224 process_root_ref(leaf, path.slots[0], &key,
4231 btrfs_release_path(&path);
4233 free_extent_cache_tree(&wc.shared);
4234 if (!cache_tree_empty(&wc.shared))
4235 fprintf(stderr, "warning line %d\n", __LINE__);
4237 task_stop(ctx.info);
4243 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4244 * INODE_REF/INODE_EXTREF match.
4246 * @root: the root of the fs/file tree
4247 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4248 * @key: the key of the DIR_ITEM/DIR_INDEX
4249 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4250 * distinguish root_dir between normal dir/file
4251 * @name: the name in the INODE_REF/INODE_EXTREF
4252 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4253 * @mode: the st_mode of INODE_ITEM
4255 * Return 0 if no error occurred.
4256 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4257 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4259 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4260 * not match for normal dir/file.
4262 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4263 struct btrfs_key *key, u64 index, char *name,
4264 u32 namelen, u32 mode)
4266 struct btrfs_path path;
4267 struct extent_buffer *node;
4268 struct btrfs_dir_item *di;
4269 struct btrfs_key location;
4270 char namebuf[BTRFS_NAME_LEN] = {0};
4280 btrfs_init_path(&path);
4281 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4283 ret = DIR_ITEM_MISSING;
4287 /* Process root dir and goto out*/
4290 ret = ROOT_DIR_ERROR;
4292 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4294 ref_key->type == BTRFS_INODE_REF_KEY ?
4296 ref_key->objectid, ref_key->offset,
4297 key->type == BTRFS_DIR_ITEM_KEY ?
4298 "DIR_ITEM" : "DIR_INDEX");
4306 /* Process normal file/dir */
4308 ret = DIR_ITEM_MISSING;
4310 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4312 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4313 ref_key->objectid, ref_key->offset,
4314 key->type == BTRFS_DIR_ITEM_KEY ?
4315 "DIR_ITEM" : "DIR_INDEX",
4316 key->objectid, key->offset, namelen, name,
4317 imode_to_type(mode));
4321 /* Check whether inode_id/filetype/name match */
4322 node = path.nodes[0];
4323 slot = path.slots[0];
4324 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4325 total = btrfs_item_size_nr(node, slot);
4326 while (cur < total) {
4327 ret = DIR_ITEM_MISMATCH;
4328 name_len = btrfs_dir_name_len(node, di);
4329 data_len = btrfs_dir_data_len(node, di);
4331 btrfs_dir_item_key_to_cpu(node, di, &location);
4332 if (location.objectid != ref_key->objectid ||
4333 location.type != BTRFS_INODE_ITEM_KEY ||
4334 location.offset != 0)
4337 filetype = btrfs_dir_type(node, di);
4338 if (imode_to_type(mode) != filetype)
4341 if (cur + sizeof(*di) + name_len > total ||
4342 name_len > BTRFS_NAME_LEN) {
4343 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4345 key->type == BTRFS_DIR_ITEM_KEY ?
4346 "DIR_ITEM" : "DIR_INDEX",
4347 key->objectid, key->offset, name_len);
4349 if (cur + sizeof(*di) > total)
4351 len = min_t(u32, total - cur - sizeof(*di),
4357 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4358 if (len != namelen || strncmp(namebuf, name, len))
4364 len = sizeof(*di) + name_len + data_len;
4365 di = (struct btrfs_dir_item *)((char *)di + len);
4368 if (ret == DIR_ITEM_MISMATCH)
4370 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4372 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4373 ref_key->objectid, ref_key->offset,
4374 key->type == BTRFS_DIR_ITEM_KEY ?
4375 "DIR_ITEM" : "DIR_INDEX",
4376 key->objectid, key->offset, namelen, name,
4377 imode_to_type(mode));
4379 btrfs_release_path(&path);
4384 * Traverse the given INODE_REF and call find_dir_item() to find related
4385 * DIR_ITEM/DIR_INDEX.
4387 * @root: the root of the fs/file tree
4388 * @ref_key: the key of the INODE_REF
4389 * @refs: the count of INODE_REF
4390 * @mode: the st_mode of INODE_ITEM
4392 * Return 0 if no error occurred.
4394 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4395 struct extent_buffer *node, int slot, u64 *refs,
4398 struct btrfs_key key;
4399 struct btrfs_inode_ref *ref;
4400 char namebuf[BTRFS_NAME_LEN] = {0};
4408 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4409 total = btrfs_item_size_nr(node, slot);
4412 /* Update inode ref count */
4415 index = btrfs_inode_ref_index(node, ref);
4416 name_len = btrfs_inode_ref_name_len(node, ref);
4417 if (cur + sizeof(*ref) + name_len > total ||
4418 name_len > BTRFS_NAME_LEN) {
4419 warning("root %llu INODE_REF[%llu %llu] name too long",
4420 root->objectid, ref_key->objectid, ref_key->offset);
4422 if (total < cur + sizeof(*ref))
4424 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4429 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4431 /* Check root dir ref name */
4432 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4433 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4434 root->objectid, ref_key->objectid, ref_key->offset,
4436 err |= ROOT_DIR_ERROR;
4439 /* Find related DIR_INDEX */
4440 key.objectid = ref_key->offset;
4441 key.type = BTRFS_DIR_INDEX_KEY;
4443 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4446 /* Find related dir_item */
4447 key.objectid = ref_key->offset;
4448 key.type = BTRFS_DIR_ITEM_KEY;
4449 key.offset = btrfs_name_hash(namebuf, len);
4450 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4453 len = sizeof(*ref) + name_len;
4454 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4464 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4465 * DIR_ITEM/DIR_INDEX.
4467 * @root: the root of the fs/file tree
4468 * @ref_key: the key of the INODE_EXTREF
4469 * @refs: the count of INODE_EXTREF
4470 * @mode: the st_mode of INODE_ITEM
4472 * Return 0 if no error occurred.
4474 static int check_inode_extref(struct btrfs_root *root,
4475 struct btrfs_key *ref_key,
4476 struct extent_buffer *node, int slot, u64 *refs,
4479 struct btrfs_key key;
4480 struct btrfs_inode_extref *extref;
4481 char namebuf[BTRFS_NAME_LEN] = {0};
4491 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4492 total = btrfs_item_size_nr(node, slot);
4495 /* update inode ref count */
4497 name_len = btrfs_inode_extref_name_len(node, extref);
4498 index = btrfs_inode_extref_index(node, extref);
4499 parent = btrfs_inode_extref_parent(node, extref);
4500 if (name_len <= BTRFS_NAME_LEN) {
4503 len = BTRFS_NAME_LEN;
4504 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4505 root->objectid, ref_key->objectid, ref_key->offset);
4507 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4509 /* Check root dir ref name */
4510 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4511 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4512 root->objectid, ref_key->objectid, ref_key->offset,
4514 err |= ROOT_DIR_ERROR;
4517 /* find related dir_index */
4518 key.objectid = parent;
4519 key.type = BTRFS_DIR_INDEX_KEY;
4521 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4524 /* find related dir_item */
4525 key.objectid = parent;
4526 key.type = BTRFS_DIR_ITEM_KEY;
4527 key.offset = btrfs_name_hash(namebuf, len);
4528 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4531 len = sizeof(*extref) + name_len;
4532 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4542 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4543 * DIR_ITEM/DIR_INDEX match.
4544 * Return with @index_ret.
4546 * @root: the root of the fs/file tree
4547 * @key: the key of the INODE_REF/INODE_EXTREF
4548 * @name: the name in the INODE_REF/INODE_EXTREF
4549 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4550 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4551 * value (64)-1 means do not check index
4552 * @ext_ref: the EXTENDED_IREF feature
4554 * Return 0 if no error occurred.
4555 * Return >0 for error bitmap
4557 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4558 char *name, int namelen, u64 *index_ret,
4559 unsigned int ext_ref)
4561 struct btrfs_path path;
4562 struct btrfs_inode_ref *ref;
4563 struct btrfs_inode_extref *extref;
4564 struct extent_buffer *node;
4565 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4578 btrfs_init_path(&path);
4579 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4581 ret = INODE_REF_MISSING;
4585 node = path.nodes[0];
4586 slot = path.slots[0];
4588 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4589 total = btrfs_item_size_nr(node, slot);
4591 /* Iterate all entry of INODE_REF */
4592 while (cur < total) {
4593 ret = INODE_REF_MISSING;
4595 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4596 ref_index = btrfs_inode_ref_index(node, ref);
4597 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4600 if (cur + sizeof(*ref) + ref_namelen > total ||
4601 ref_namelen > BTRFS_NAME_LEN) {
4602 warning("root %llu INODE %s[%llu %llu] name too long",
4604 key->type == BTRFS_INODE_REF_KEY ?
4606 key->objectid, key->offset);
4608 if (cur + sizeof(*ref) > total)
4610 len = min_t(u32, total - cur - sizeof(*ref),
4616 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4619 if (len != namelen || strncmp(ref_namebuf, name, len))
4622 *index_ret = ref_index;
4626 len = sizeof(*ref) + ref_namelen;
4627 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4632 /* Skip if not support EXTENDED_IREF feature */
4636 btrfs_release_path(&path);
4637 btrfs_init_path(&path);
4639 dir_id = key->offset;
4640 key->type = BTRFS_INODE_EXTREF_KEY;
4641 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4643 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4645 ret = INODE_REF_MISSING;
4649 node = path.nodes[0];
4650 slot = path.slots[0];
4652 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4654 total = btrfs_item_size_nr(node, slot);
4656 /* Iterate all entry of INODE_EXTREF */
4657 while (cur < total) {
4658 ret = INODE_REF_MISSING;
4660 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4661 ref_index = btrfs_inode_extref_index(node, extref);
4662 parent = btrfs_inode_extref_parent(node, extref);
4663 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4666 if (parent != dir_id)
4669 if (ref_namelen <= BTRFS_NAME_LEN) {
4672 len = BTRFS_NAME_LEN;
4673 warning("root %llu INODE %s[%llu %llu] name too long",
4675 key->type == BTRFS_INODE_REF_KEY ?
4677 key->objectid, key->offset);
4679 read_extent_buffer(node, ref_namebuf,
4680 (unsigned long)(extref + 1), len);
4682 if (len != namelen || strncmp(ref_namebuf, name, len))
4685 *index_ret = ref_index;
4690 len = sizeof(*extref) + ref_namelen;
4691 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4696 btrfs_release_path(&path);
4701 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4702 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4704 * @root: the root of the fs/file tree
4705 * @key: the key of the INODE_REF/INODE_EXTREF
4706 * @size: the st_size of the INODE_ITEM
4707 * @ext_ref: the EXTENDED_IREF feature
4709 * Return 0 if no error occurred.
4711 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4712 struct extent_buffer *node, int slot, u64 *size,
4713 unsigned int ext_ref)
4715 struct btrfs_dir_item *di;
4716 struct btrfs_inode_item *ii;
4717 struct btrfs_path path;
4718 struct btrfs_key location;
4719 char namebuf[BTRFS_NAME_LEN] = {0};
4732 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4733 * ignore index check.
4735 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4737 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4738 total = btrfs_item_size_nr(node, slot);
4740 while (cur < total) {
4741 data_len = btrfs_dir_data_len(node, di);
4743 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4744 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4745 "DIR_ITEM" : "DIR_INDEX",
4746 key->objectid, key->offset, data_len);
4748 name_len = btrfs_dir_name_len(node, di);
4749 if (cur + sizeof(*di) + name_len > total ||
4750 name_len > BTRFS_NAME_LEN) {
4751 warning("root %llu %s[%llu %llu] name too long",
4753 key->type == BTRFS_DIR_ITEM_KEY ?
4754 "DIR_ITEM" : "DIR_INDEX",
4755 key->objectid, key->offset);
4757 if (cur + sizeof(*di) > total)
4759 len = min_t(u32, total - cur - sizeof(*di),
4764 (*size) += name_len;
4766 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4767 filetype = btrfs_dir_type(node, di);
4769 if (key->type == BTRFS_DIR_ITEM_KEY &&
4770 key->offset != btrfs_name_hash(namebuf, len)) {
4772 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4773 root->objectid, key->objectid, key->offset,
4774 namebuf, len, filetype, key->offset,
4775 btrfs_name_hash(namebuf, len));
4778 btrfs_init_path(&path);
4779 btrfs_dir_item_key_to_cpu(node, di, &location);
4781 /* Ignore related ROOT_ITEM check */
4782 if (location.type == BTRFS_ROOT_ITEM_KEY)
4785 /* Check relative INODE_ITEM(existence/filetype) */
4786 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4788 err |= INODE_ITEM_MISSING;
4789 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4790 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4791 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4792 key->offset, location.objectid, name_len,
4797 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4798 struct btrfs_inode_item);
4799 mode = btrfs_inode_mode(path.nodes[0], ii);
4801 if (imode_to_type(mode) != filetype) {
4802 err |= INODE_ITEM_MISMATCH;
4803 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4804 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4805 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4806 key->offset, name_len, namebuf, filetype);
4809 /* Check relative INODE_REF/INODE_EXTREF */
4810 location.type = BTRFS_INODE_REF_KEY;
4811 location.offset = key->objectid;
4812 ret = find_inode_ref(root, &location, namebuf, len,
4815 if (ret & INODE_REF_MISSING)
4816 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4817 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4818 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4819 key->offset, name_len, namebuf, filetype);
4822 btrfs_release_path(&path);
4823 len = sizeof(*di) + name_len + data_len;
4824 di = (struct btrfs_dir_item *)((char *)di + len);
4827 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4828 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4829 root->objectid, key->objectid, key->offset);
4838 * Check file extent datasum/hole, update the size of the file extents,
4839 * check and update the last offset of the file extent.
4841 * @root: the root of fs/file tree.
4842 * @fkey: the key of the file extent.
4843 * @nodatasum: INODE_NODATASUM feature.
4844 * @size: the sum of all EXTENT_DATA items size for this inode.
4845 * @end: the offset of the last extent.
4847 * Return 0 if no error occurred.
4849 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4850 struct extent_buffer *node, int slot,
4851 unsigned int nodatasum, u64 *size, u64 *end)
4853 struct btrfs_file_extent_item *fi;
4856 u64 extent_num_bytes;
4858 u64 csum_found; /* In byte size, sectorsize aligned */
4859 u64 search_start; /* Logical range start we search for csum */
4860 u64 search_len; /* Logical range len we search for csum */
4861 unsigned int extent_type;
4862 unsigned int is_hole;
4867 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4869 /* Check inline extent */
4870 extent_type = btrfs_file_extent_type(node, fi);
4871 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4872 struct btrfs_item *e = btrfs_item_nr(slot);
4873 u32 item_inline_len;
4875 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4876 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4877 compressed = btrfs_file_extent_compression(node, fi);
4878 if (extent_num_bytes == 0) {
4880 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4881 root->objectid, fkey->objectid, fkey->offset);
4882 err |= FILE_EXTENT_ERROR;
4884 if (!compressed && extent_num_bytes != item_inline_len) {
4886 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4887 root->objectid, fkey->objectid, fkey->offset,
4888 extent_num_bytes, item_inline_len);
4889 err |= FILE_EXTENT_ERROR;
4891 *end += extent_num_bytes;
4892 *size += extent_num_bytes;
4896 /* Check extent type */
4897 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4898 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4899 err |= FILE_EXTENT_ERROR;
4900 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4901 root->objectid, fkey->objectid, fkey->offset);
4905 /* Check REG_EXTENT/PREALLOC_EXTENT */
4906 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4907 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4908 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4909 extent_offset = btrfs_file_extent_offset(node, fi);
4910 compressed = btrfs_file_extent_compression(node, fi);
4911 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4914 * Check EXTENT_DATA csum
4916 * For plain (uncompressed) extent, we should only check the range
4917 * we're referring to, as it's possible that part of prealloc extent
4918 * has been written, and has csum:
4920 * |<--- Original large preallocated extent A ---->|
4921 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4924 * For compressed extent, we should check the whole range.
4927 search_start = disk_bytenr + extent_offset;
4928 search_len = extent_num_bytes;
4930 search_start = disk_bytenr;
4931 search_len = disk_num_bytes;
4933 ret = count_csum_range(root, search_start, search_len, &csum_found);
4934 if (csum_found > 0 && nodatasum) {
4935 err |= ODD_CSUM_ITEM;
4936 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4937 root->objectid, fkey->objectid, fkey->offset);
4938 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4939 !is_hole && (ret < 0 || csum_found < search_len)) {
4940 err |= CSUM_ITEM_MISSING;
4941 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4942 root->objectid, fkey->objectid, fkey->offset,
4943 csum_found, search_len);
4944 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4945 err |= ODD_CSUM_ITEM;
4946 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4947 root->objectid, fkey->objectid, fkey->offset, csum_found);
4950 /* Check EXTENT_DATA hole */
4951 if (!no_holes && *end != fkey->offset) {
4952 err |= FILE_EXTENT_ERROR;
4953 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4954 root->objectid, fkey->objectid, fkey->offset);
4957 *end += extent_num_bytes;
4959 *size += extent_num_bytes;
4965 * Set inode item nbytes to @nbytes
4967 * Returns 0 on success
4968 * Returns != 0 on error
4970 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
4971 struct btrfs_path *path,
4972 u64 ino, u64 nbytes)
4974 struct btrfs_trans_handle *trans;
4975 struct btrfs_inode_item *ii;
4976 struct btrfs_key key;
4977 struct btrfs_key research_key;
4981 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
4984 key.type = BTRFS_INODE_ITEM_KEY;
4987 trans = btrfs_start_transaction(root, 1);
4988 if (IS_ERR(trans)) {
4989 ret = PTR_ERR(trans);
4994 btrfs_release_path(path);
4995 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5003 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5004 struct btrfs_inode_item);
5005 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5006 btrfs_mark_buffer_dirty(path->nodes[0]);
5008 btrfs_commit_transaction(trans, root);
5011 error("failed to set nbytes in inode %llu root %llu",
5012 ino, root->root_key.objectid);
5014 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5015 root->root_key.objectid, nbytes);
5018 btrfs_release_path(path);
5019 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5026 * Set directory inode isize to @isize.
5028 * Returns 0 on success.
5029 * Returns != 0 on error.
5031 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5032 struct btrfs_path *path,
5035 struct btrfs_trans_handle *trans;
5036 struct btrfs_inode_item *ii;
5037 struct btrfs_key key;
5038 struct btrfs_key research_key;
5042 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5045 key.type = BTRFS_INODE_ITEM_KEY;
5048 trans = btrfs_start_transaction(root, 1);
5049 if (IS_ERR(trans)) {
5050 ret = PTR_ERR(trans);
5055 btrfs_release_path(path);
5056 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5064 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5065 struct btrfs_inode_item);
5066 btrfs_set_inode_size(path->nodes[0], ii, isize);
5067 btrfs_mark_buffer_dirty(path->nodes[0]);
5069 btrfs_commit_transaction(trans, root);
5072 error("failed to set isize in inode %llu root %llu",
5073 ino, root->root_key.objectid);
5075 printf("Set isize in inode %llu root %llu to %llu\n",
5076 ino, root->root_key.objectid, isize);
5078 btrfs_release_path(path);
5079 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5086 * Wrapper function for btrfs_add_orphan_item().
5088 * Returns 0 on success.
5089 * Returns != 0 on error.
5091 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5092 struct btrfs_path *path, u64 ino)
5094 struct btrfs_trans_handle *trans;
5095 struct btrfs_key research_key;
5099 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5101 trans = btrfs_start_transaction(root, 1);
5102 if (IS_ERR(trans)) {
5103 ret = PTR_ERR(trans);
5108 btrfs_release_path(path);
5109 ret = btrfs_add_orphan_item(trans, root, path, ino);
5111 btrfs_commit_transaction(trans, root);
5114 error("failed to add inode %llu as orphan item root %llu",
5115 ino, root->root_key.objectid);
5117 printf("Added inode %llu as orphan item root %llu\n",
5118 ino, root->root_key.objectid);
5120 btrfs_release_path(path);
5121 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5128 * Check INODE_ITEM and related ITEMs (the same inode number)
5129 * 1. check link count
5130 * 2. check inode ref/extref
5131 * 3. check dir item/index
5133 * @ext_ref: the EXTENDED_IREF feature
5135 * Return 0 if no error occurred.
5136 * Return >0 for error or hit the traversal is done(by error bitmap)
5138 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5139 unsigned int ext_ref)
5141 struct extent_buffer *node;
5142 struct btrfs_inode_item *ii;
5143 struct btrfs_key key;
5152 u64 extent_size = 0;
5154 unsigned int nodatasum;
5159 node = path->nodes[0];
5160 slot = path->slots[0];
5162 btrfs_item_key_to_cpu(node, &key, slot);
5163 inode_id = key.objectid;
5165 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5166 ret = btrfs_next_item(root, path);
5172 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5173 isize = btrfs_inode_size(node, ii);
5174 nbytes = btrfs_inode_nbytes(node, ii);
5175 mode = btrfs_inode_mode(node, ii);
5176 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5177 nlink = btrfs_inode_nlink(node, ii);
5178 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5181 ret = btrfs_next_item(root, path);
5183 /* out will fill 'err' rusing current statistics */
5185 } else if (ret > 0) {
5190 node = path->nodes[0];
5191 slot = path->slots[0];
5192 btrfs_item_key_to_cpu(node, &key, slot);
5193 if (key.objectid != inode_id)
5197 case BTRFS_INODE_REF_KEY:
5198 ret = check_inode_ref(root, &key, node, slot, &refs,
5202 case BTRFS_INODE_EXTREF_KEY:
5203 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5204 warning("root %llu EXTREF[%llu %llu] isn't supported",
5205 root->objectid, key.objectid,
5207 ret = check_inode_extref(root, &key, node, slot, &refs,
5211 case BTRFS_DIR_ITEM_KEY:
5212 case BTRFS_DIR_INDEX_KEY:
5214 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5215 root->objectid, inode_id,
5216 imode_to_type(mode), key.objectid,
5219 ret = check_dir_item(root, &key, node, slot, &size,
5223 case BTRFS_EXTENT_DATA_KEY:
5225 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5226 root->objectid, inode_id, key.objectid,
5229 ret = check_file_extent(root, &key, node, slot,
5230 nodatasum, &extent_size,
5234 case BTRFS_XATTR_ITEM_KEY:
5237 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5238 key.objectid, key.type, key.offset);
5243 /* verify INODE_ITEM nlink/isize/nbytes */
5246 err |= LINK_COUNT_ERROR;
5247 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5248 root->objectid, inode_id, nlink);
5252 * Just a warning, as dir inode nbytes is just an
5253 * instructive value.
5255 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5256 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5257 root->objectid, inode_id,
5258 root->fs_info->nodesize);
5261 if (isize != size) {
5263 ret = repair_dir_isize_lowmem(root, path,
5265 if (!repair || ret) {
5268 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5269 root->objectid, inode_id, isize, size);
5273 if (nlink != refs) {
5274 err |= LINK_COUNT_ERROR;
5275 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5276 root->objectid, inode_id, nlink, refs);
5277 } else if (!nlink) {
5279 ret = repair_inode_orphan_item_lowmem(root,
5281 if (!repair || ret) {
5283 error("root %llu INODE[%llu] is orphan item",
5284 root->objectid, inode_id);
5288 if (!nbytes && !no_holes && extent_end < isize) {
5289 err |= NBYTES_ERROR;
5290 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5291 root->objectid, inode_id, isize);
5294 if (nbytes != extent_size) {
5296 ret = repair_inode_nbytes_lowmem(root, path,
5297 inode_id, extent_size);
5298 if (!repair || ret) {
5299 err |= NBYTES_ERROR;
5301 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5302 root->objectid, inode_id, nbytes,
5312 * check first root dir's inode_item and inode_ref
5314 * returns 0 means no error
5315 * returns >0 means error
5316 * returns <0 means fatal error
5318 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5320 struct btrfs_path path;
5321 struct btrfs_key key;
5322 struct btrfs_inode_item *ii;
5328 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5329 key.type = BTRFS_INODE_ITEM_KEY;
5332 /* For root being dropped, we don't need to check first inode */
5333 if (btrfs_root_refs(&root->root_item) == 0 &&
5334 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5335 BTRFS_FIRST_FREE_OBJECTID)
5338 btrfs_init_path(&path);
5339 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5344 err |= INODE_ITEM_MISSING;
5346 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5347 struct btrfs_inode_item);
5348 mode = btrfs_inode_mode(path.nodes[0], ii);
5349 if (imode_to_type(mode) != BTRFS_FT_DIR)
5350 err |= INODE_ITEM_MISMATCH;
5353 /* lookup first inode ref */
5354 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5355 key.type = BTRFS_INODE_REF_KEY;
5356 /* special index value */
5359 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5365 btrfs_release_path(&path);
5366 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5367 error("root dir INODE_ITEM is %s",
5368 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5369 if (err & INODE_REF_MISSING)
5370 error("root dir INODE_REF is missing");
5372 return ret < 0 ? ret : err;
5375 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5376 u64 parent, u64 root)
5378 struct rb_node *node;
5379 struct tree_backref *back = NULL;
5380 struct tree_backref match = {
5387 match.parent = parent;
5388 match.node.full_backref = 1;
5393 node = rb_search(&rec->backref_tree, &match.node.node,
5394 (rb_compare_keys)compare_extent_backref, NULL);
5396 back = to_tree_backref(rb_node_to_extent_backref(node));
5401 static struct data_backref *find_data_backref(struct extent_record *rec,
5402 u64 parent, u64 root,
5403 u64 owner, u64 offset,
5405 u64 disk_bytenr, u64 bytes)
5407 struct rb_node *node;
5408 struct data_backref *back = NULL;
5409 struct data_backref match = {
5416 .found_ref = found_ref,
5417 .disk_bytenr = disk_bytenr,
5421 match.parent = parent;
5422 match.node.full_backref = 1;
5427 node = rb_search(&rec->backref_tree, &match.node.node,
5428 (rb_compare_keys)compare_extent_backref, NULL);
5430 back = to_data_backref(rb_node_to_extent_backref(node));
5435 * Iterate all item on the tree and call check_inode_item() to check.
5437 * @root: the root of the tree to be checked.
5438 * @ext_ref: the EXTENDED_IREF feature
5440 * Return 0 if no error found.
5441 * Return <0 for error.
5443 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5445 struct btrfs_path path;
5446 struct node_refs nrefs;
5447 struct btrfs_root_item *root_item = &root->root_item;
5453 * We need to manually check the first inode item(256)
5454 * As the following traversal function will only start from
5455 * the first inode item in the leaf, if inode item(256) is missing
5456 * we will just skip it forever.
5458 ret = check_fs_first_inode(root, ext_ref);
5463 memset(&nrefs, 0, sizeof(nrefs));
5464 level = btrfs_header_level(root->node);
5465 btrfs_init_path(&path);
5467 if (btrfs_root_refs(root_item) > 0 ||
5468 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5469 path.nodes[level] = root->node;
5470 path.slots[level] = 0;
5471 extent_buffer_get(root->node);
5473 struct btrfs_key key;
5475 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5476 level = root_item->drop_level;
5477 path.lowest_level = level;
5478 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5485 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5488 /* if ret is negative, walk shall stop */
5494 ret = walk_up_tree_v2(root, &path, &level);
5496 /* Normal exit, reset ret to err */
5503 btrfs_release_path(&path);
5508 * Find the relative ref for root_ref and root_backref.
5510 * @root: the root of the root tree.
5511 * @ref_key: the key of the root ref.
5513 * Return 0 if no error occurred.
5515 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5516 struct extent_buffer *node, int slot)
5518 struct btrfs_path path;
5519 struct btrfs_key key;
5520 struct btrfs_root_ref *ref;
5521 struct btrfs_root_ref *backref;
5522 char ref_name[BTRFS_NAME_LEN] = {0};
5523 char backref_name[BTRFS_NAME_LEN] = {0};
5529 u32 backref_namelen;
5534 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5535 ref_dirid = btrfs_root_ref_dirid(node, ref);
5536 ref_seq = btrfs_root_ref_sequence(node, ref);
5537 ref_namelen = btrfs_root_ref_name_len(node, ref);
5539 if (ref_namelen <= BTRFS_NAME_LEN) {
5542 len = BTRFS_NAME_LEN;
5543 warning("%s[%llu %llu] ref_name too long",
5544 ref_key->type == BTRFS_ROOT_REF_KEY ?
5545 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5548 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5550 /* Find relative root_ref */
5551 key.objectid = ref_key->offset;
5552 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5553 key.offset = ref_key->objectid;
5555 btrfs_init_path(&path);
5556 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5558 err |= ROOT_REF_MISSING;
5559 error("%s[%llu %llu] couldn't find relative ref",
5560 ref_key->type == BTRFS_ROOT_REF_KEY ?
5561 "ROOT_REF" : "ROOT_BACKREF",
5562 ref_key->objectid, ref_key->offset);
5566 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5567 struct btrfs_root_ref);
5568 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5569 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5570 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5572 if (backref_namelen <= BTRFS_NAME_LEN) {
5573 len = backref_namelen;
5575 len = BTRFS_NAME_LEN;
5576 warning("%s[%llu %llu] ref_name too long",
5577 key.type == BTRFS_ROOT_REF_KEY ?
5578 "ROOT_REF" : "ROOT_BACKREF",
5579 key.objectid, key.offset);
5581 read_extent_buffer(path.nodes[0], backref_name,
5582 (unsigned long)(backref + 1), len);
5584 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5585 ref_namelen != backref_namelen ||
5586 strncmp(ref_name, backref_name, len)) {
5587 err |= ROOT_REF_MISMATCH;
5588 error("%s[%llu %llu] mismatch relative ref",
5589 ref_key->type == BTRFS_ROOT_REF_KEY ?
5590 "ROOT_REF" : "ROOT_BACKREF",
5591 ref_key->objectid, ref_key->offset);
5594 btrfs_release_path(&path);
5599 * Check all fs/file tree in low_memory mode.
5601 * 1. for fs tree root item, call check_fs_root_v2()
5602 * 2. for fs tree root ref/backref, call check_root_ref()
5604 * Return 0 if no error occurred.
5606 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5608 struct btrfs_root *tree_root = fs_info->tree_root;
5609 struct btrfs_root *cur_root = NULL;
5610 struct btrfs_path path;
5611 struct btrfs_key key;
5612 struct extent_buffer *node;
5613 unsigned int ext_ref;
5618 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5620 btrfs_init_path(&path);
5621 key.objectid = BTRFS_FS_TREE_OBJECTID;
5623 key.type = BTRFS_ROOT_ITEM_KEY;
5625 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5629 } else if (ret > 0) {
5635 node = path.nodes[0];
5636 slot = path.slots[0];
5637 btrfs_item_key_to_cpu(node, &key, slot);
5638 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5640 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5641 fs_root_objectid(key.objectid)) {
5642 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5643 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5646 key.offset = (u64)-1;
5647 cur_root = btrfs_read_fs_root(fs_info, &key);
5650 if (IS_ERR(cur_root)) {
5651 error("Fail to read fs/subvol tree: %lld",
5657 ret = check_fs_root_v2(cur_root, ext_ref);
5660 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5661 btrfs_free_fs_root(cur_root);
5662 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5663 key.type == BTRFS_ROOT_BACKREF_KEY) {
5664 ret = check_root_ref(tree_root, &key, node, slot);
5668 ret = btrfs_next_item(tree_root, &path);
5678 btrfs_release_path(&path);
5682 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5683 struct cache_tree *root_cache)
5687 if (!ctx.progress_enabled)
5688 fprintf(stderr, "checking fs roots\n");
5689 if (check_mode == CHECK_MODE_LOWMEM)
5690 ret = check_fs_roots_v2(fs_info);
5692 ret = check_fs_roots(fs_info, root_cache);
5697 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5699 struct extent_backref *back, *tmp;
5700 struct tree_backref *tback;
5701 struct data_backref *dback;
5705 rbtree_postorder_for_each_entry_safe(back, tmp,
5706 &rec->backref_tree, node) {
5707 if (!back->found_extent_tree) {
5711 if (back->is_data) {
5712 dback = to_data_backref(back);
5713 fprintf(stderr, "Data backref %llu %s %llu"
5714 " owner %llu offset %llu num_refs %lu"
5715 " not found in extent tree\n",
5716 (unsigned long long)rec->start,
5717 back->full_backref ?
5719 back->full_backref ?
5720 (unsigned long long)dback->parent:
5721 (unsigned long long)dback->root,
5722 (unsigned long long)dback->owner,
5723 (unsigned long long)dback->offset,
5724 (unsigned long)dback->num_refs);
5726 tback = to_tree_backref(back);
5727 fprintf(stderr, "Tree backref %llu parent %llu"
5728 " root %llu not found in extent tree\n",
5729 (unsigned long long)rec->start,
5730 (unsigned long long)tback->parent,
5731 (unsigned long long)tback->root);
5734 if (!back->is_data && !back->found_ref) {
5738 tback = to_tree_backref(back);
5739 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5740 (unsigned long long)rec->start,
5741 back->full_backref ? "parent" : "root",
5742 back->full_backref ?
5743 (unsigned long long)tback->parent :
5744 (unsigned long long)tback->root, back);
5746 if (back->is_data) {
5747 dback = to_data_backref(back);
5748 if (dback->found_ref != dback->num_refs) {
5752 fprintf(stderr, "Incorrect local backref count"
5753 " on %llu %s %llu owner %llu"
5754 " offset %llu found %u wanted %u back %p\n",
5755 (unsigned long long)rec->start,
5756 back->full_backref ?
5758 back->full_backref ?
5759 (unsigned long long)dback->parent:
5760 (unsigned long long)dback->root,
5761 (unsigned long long)dback->owner,
5762 (unsigned long long)dback->offset,
5763 dback->found_ref, dback->num_refs, back);
5765 if (dback->disk_bytenr != rec->start) {
5769 fprintf(stderr, "Backref disk bytenr does not"
5770 " match extent record, bytenr=%llu, "
5771 "ref bytenr=%llu\n",
5772 (unsigned long long)rec->start,
5773 (unsigned long long)dback->disk_bytenr);
5776 if (dback->bytes != rec->nr) {
5780 fprintf(stderr, "Backref bytes do not match "
5781 "extent backref, bytenr=%llu, ref "
5782 "bytes=%llu, backref bytes=%llu\n",
5783 (unsigned long long)rec->start,
5784 (unsigned long long)rec->nr,
5785 (unsigned long long)dback->bytes);
5788 if (!back->is_data) {
5791 dback = to_data_backref(back);
5792 found += dback->found_ref;
5795 if (found != rec->refs) {
5799 fprintf(stderr, "Incorrect global backref count "
5800 "on %llu found %llu wanted %llu\n",
5801 (unsigned long long)rec->start,
5802 (unsigned long long)found,
5803 (unsigned long long)rec->refs);
5809 static void __free_one_backref(struct rb_node *node)
5811 struct extent_backref *back = rb_node_to_extent_backref(node);
5816 static void free_all_extent_backrefs(struct extent_record *rec)
5818 rb_free_nodes(&rec->backref_tree, __free_one_backref);
5821 static void free_extent_record_cache(struct cache_tree *extent_cache)
5823 struct cache_extent *cache;
5824 struct extent_record *rec;
5827 cache = first_cache_extent(extent_cache);
5830 rec = container_of(cache, struct extent_record, cache);
5831 remove_cache_extent(extent_cache, cache);
5832 free_all_extent_backrefs(rec);
5837 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5838 struct extent_record *rec)
5840 if (rec->content_checked && rec->owner_ref_checked &&
5841 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5842 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5843 !rec->bad_full_backref && !rec->crossing_stripes &&
5844 !rec->wrong_chunk_type) {
5845 remove_cache_extent(extent_cache, &rec->cache);
5846 free_all_extent_backrefs(rec);
5847 list_del_init(&rec->list);
5853 static int check_owner_ref(struct btrfs_root *root,
5854 struct extent_record *rec,
5855 struct extent_buffer *buf)
5857 struct extent_backref *node, *tmp;
5858 struct tree_backref *back;
5859 struct btrfs_root *ref_root;
5860 struct btrfs_key key;
5861 struct btrfs_path path;
5862 struct extent_buffer *parent;
5867 rbtree_postorder_for_each_entry_safe(node, tmp,
5868 &rec->backref_tree, node) {
5871 if (!node->found_ref)
5873 if (node->full_backref)
5875 back = to_tree_backref(node);
5876 if (btrfs_header_owner(buf) == back->root)
5879 BUG_ON(rec->is_root);
5881 /* try to find the block by search corresponding fs tree */
5882 key.objectid = btrfs_header_owner(buf);
5883 key.type = BTRFS_ROOT_ITEM_KEY;
5884 key.offset = (u64)-1;
5886 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5887 if (IS_ERR(ref_root))
5890 level = btrfs_header_level(buf);
5892 btrfs_item_key_to_cpu(buf, &key, 0);
5894 btrfs_node_key_to_cpu(buf, &key, 0);
5896 btrfs_init_path(&path);
5897 path.lowest_level = level + 1;
5898 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5902 parent = path.nodes[level + 1];
5903 if (parent && buf->start == btrfs_node_blockptr(parent,
5904 path.slots[level + 1]))
5907 btrfs_release_path(&path);
5908 return found ? 0 : 1;
5911 static int is_extent_tree_record(struct extent_record *rec)
5913 struct extent_backref *node, *tmp;
5914 struct tree_backref *back;
5917 rbtree_postorder_for_each_entry_safe(node, tmp,
5918 &rec->backref_tree, node) {
5921 back = to_tree_backref(node);
5922 if (node->full_backref)
5924 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5931 static int record_bad_block_io(struct btrfs_fs_info *info,
5932 struct cache_tree *extent_cache,
5935 struct extent_record *rec;
5936 struct cache_extent *cache;
5937 struct btrfs_key key;
5939 cache = lookup_cache_extent(extent_cache, start, len);
5943 rec = container_of(cache, struct extent_record, cache);
5944 if (!is_extent_tree_record(rec))
5947 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5948 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5951 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5952 struct extent_buffer *buf, int slot)
5954 if (btrfs_header_level(buf)) {
5955 struct btrfs_key_ptr ptr1, ptr2;
5957 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5958 sizeof(struct btrfs_key_ptr));
5959 read_extent_buffer(buf, &ptr2,
5960 btrfs_node_key_ptr_offset(slot + 1),
5961 sizeof(struct btrfs_key_ptr));
5962 write_extent_buffer(buf, &ptr1,
5963 btrfs_node_key_ptr_offset(slot + 1),
5964 sizeof(struct btrfs_key_ptr));
5965 write_extent_buffer(buf, &ptr2,
5966 btrfs_node_key_ptr_offset(slot),
5967 sizeof(struct btrfs_key_ptr));
5969 struct btrfs_disk_key key;
5970 btrfs_node_key(buf, &key, 0);
5971 btrfs_fixup_low_keys(root, path, &key,
5972 btrfs_header_level(buf) + 1);
5975 struct btrfs_item *item1, *item2;
5976 struct btrfs_key k1, k2;
5977 char *item1_data, *item2_data;
5978 u32 item1_offset, item2_offset, item1_size, item2_size;
5980 item1 = btrfs_item_nr(slot);
5981 item2 = btrfs_item_nr(slot + 1);
5982 btrfs_item_key_to_cpu(buf, &k1, slot);
5983 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5984 item1_offset = btrfs_item_offset(buf, item1);
5985 item2_offset = btrfs_item_offset(buf, item2);
5986 item1_size = btrfs_item_size(buf, item1);
5987 item2_size = btrfs_item_size(buf, item2);
5989 item1_data = malloc(item1_size);
5992 item2_data = malloc(item2_size);
5998 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5999 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6001 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6002 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6006 btrfs_set_item_offset(buf, item1, item2_offset);
6007 btrfs_set_item_offset(buf, item2, item1_offset);
6008 btrfs_set_item_size(buf, item1, item2_size);
6009 btrfs_set_item_size(buf, item2, item1_size);
6011 path->slots[0] = slot;
6012 btrfs_set_item_key_unsafe(root, path, &k2);
6013 path->slots[0] = slot + 1;
6014 btrfs_set_item_key_unsafe(root, path, &k1);
6019 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6021 struct extent_buffer *buf;
6022 struct btrfs_key k1, k2;
6024 int level = path->lowest_level;
6027 buf = path->nodes[level];
6028 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6030 btrfs_node_key_to_cpu(buf, &k1, i);
6031 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6033 btrfs_item_key_to_cpu(buf, &k1, i);
6034 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6036 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6038 ret = swap_values(root, path, buf, i);
6041 btrfs_mark_buffer_dirty(buf);
6047 static int delete_bogus_item(struct btrfs_root *root,
6048 struct btrfs_path *path,
6049 struct extent_buffer *buf, int slot)
6051 struct btrfs_key key;
6052 int nritems = btrfs_header_nritems(buf);
6054 btrfs_item_key_to_cpu(buf, &key, slot);
6056 /* These are all the keys we can deal with missing. */
6057 if (key.type != BTRFS_DIR_INDEX_KEY &&
6058 key.type != BTRFS_EXTENT_ITEM_KEY &&
6059 key.type != BTRFS_METADATA_ITEM_KEY &&
6060 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6061 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6064 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6065 (unsigned long long)key.objectid, key.type,
6066 (unsigned long long)key.offset, slot, buf->start);
6067 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6068 btrfs_item_nr_offset(slot + 1),
6069 sizeof(struct btrfs_item) *
6070 (nritems - slot - 1));
6071 btrfs_set_header_nritems(buf, nritems - 1);
6073 struct btrfs_disk_key disk_key;
6075 btrfs_item_key(buf, &disk_key, 0);
6076 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6078 btrfs_mark_buffer_dirty(buf);
6082 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6084 struct extent_buffer *buf;
6088 /* We should only get this for leaves */
6089 BUG_ON(path->lowest_level);
6090 buf = path->nodes[0];
6092 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6093 unsigned int shift = 0, offset;
6095 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6096 BTRFS_LEAF_DATA_SIZE(root)) {
6097 if (btrfs_item_end_nr(buf, i) >
6098 BTRFS_LEAF_DATA_SIZE(root)) {
6099 ret = delete_bogus_item(root, path, buf, i);
6102 fprintf(stderr, "item is off the end of the "
6103 "leaf, can't fix\n");
6107 shift = BTRFS_LEAF_DATA_SIZE(root) -
6108 btrfs_item_end_nr(buf, i);
6109 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6110 btrfs_item_offset_nr(buf, i - 1)) {
6111 if (btrfs_item_end_nr(buf, i) >
6112 btrfs_item_offset_nr(buf, i - 1)) {
6113 ret = delete_bogus_item(root, path, buf, i);
6116 fprintf(stderr, "items overlap, can't fix\n");
6120 shift = btrfs_item_offset_nr(buf, i - 1) -
6121 btrfs_item_end_nr(buf, i);
6126 printf("Shifting item nr %d by %u bytes in block %llu\n",
6127 i, shift, (unsigned long long)buf->start);
6128 offset = btrfs_item_offset_nr(buf, i);
6129 memmove_extent_buffer(buf,
6130 btrfs_leaf_data(buf) + offset + shift,
6131 btrfs_leaf_data(buf) + offset,
6132 btrfs_item_size_nr(buf, i));
6133 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6135 btrfs_mark_buffer_dirty(buf);
6139 * We may have moved things, in which case we want to exit so we don't
6140 * write those changes out. Once we have proper abort functionality in
6141 * progs this can be changed to something nicer.
6148 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6149 * then just return -EIO.
6151 static int try_to_fix_bad_block(struct btrfs_root *root,
6152 struct extent_buffer *buf,
6153 enum btrfs_tree_block_status status)
6155 struct btrfs_trans_handle *trans;
6156 struct ulist *roots;
6157 struct ulist_node *node;
6158 struct btrfs_root *search_root;
6159 struct btrfs_path path;
6160 struct ulist_iterator iter;
6161 struct btrfs_key root_key, key;
6164 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6165 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6168 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6172 btrfs_init_path(&path);
6173 ULIST_ITER_INIT(&iter);
6174 while ((node = ulist_next(roots, &iter))) {
6175 root_key.objectid = node->val;
6176 root_key.type = BTRFS_ROOT_ITEM_KEY;
6177 root_key.offset = (u64)-1;
6179 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6186 trans = btrfs_start_transaction(search_root, 0);
6187 if (IS_ERR(trans)) {
6188 ret = PTR_ERR(trans);
6192 path.lowest_level = btrfs_header_level(buf);
6193 path.skip_check_block = 1;
6194 if (path.lowest_level)
6195 btrfs_node_key_to_cpu(buf, &key, 0);
6197 btrfs_item_key_to_cpu(buf, &key, 0);
6198 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6201 btrfs_commit_transaction(trans, search_root);
6204 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6205 ret = fix_key_order(search_root, &path);
6206 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6207 ret = fix_item_offset(search_root, &path);
6209 btrfs_commit_transaction(trans, search_root);
6212 btrfs_release_path(&path);
6213 btrfs_commit_transaction(trans, search_root);
6216 btrfs_release_path(&path);
6220 static int check_block(struct btrfs_root *root,
6221 struct cache_tree *extent_cache,
6222 struct extent_buffer *buf, u64 flags)
6224 struct extent_record *rec;
6225 struct cache_extent *cache;
6226 struct btrfs_key key;
6227 enum btrfs_tree_block_status status;
6231 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6234 rec = container_of(cache, struct extent_record, cache);
6235 rec->generation = btrfs_header_generation(buf);
6237 level = btrfs_header_level(buf);
6238 if (btrfs_header_nritems(buf) > 0) {
6241 btrfs_item_key_to_cpu(buf, &key, 0);
6243 btrfs_node_key_to_cpu(buf, &key, 0);
6245 rec->info_objectid = key.objectid;
6247 rec->info_level = level;
6249 if (btrfs_is_leaf(buf))
6250 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6252 status = btrfs_check_node(root, &rec->parent_key, buf);
6254 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6256 status = try_to_fix_bad_block(root, buf, status);
6257 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6259 fprintf(stderr, "bad block %llu\n",
6260 (unsigned long long)buf->start);
6263 * Signal to callers we need to start the scan over
6264 * again since we'll have cowed blocks.
6269 rec->content_checked = 1;
6270 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6271 rec->owner_ref_checked = 1;
6273 ret = check_owner_ref(root, rec, buf);
6275 rec->owner_ref_checked = 1;
6279 maybe_free_extent_rec(extent_cache, rec);
6284 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6285 u64 parent, u64 root)
6287 struct list_head *cur = rec->backrefs.next;
6288 struct extent_backref *node;
6289 struct tree_backref *back;
6291 while(cur != &rec->backrefs) {
6292 node = to_extent_backref(cur);
6296 back = to_tree_backref(node);
6298 if (!node->full_backref)
6300 if (parent == back->parent)
6303 if (node->full_backref)
6305 if (back->root == root)
6313 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6314 u64 parent, u64 root)
6316 struct tree_backref *ref = malloc(sizeof(*ref));
6320 memset(&ref->node, 0, sizeof(ref->node));
6322 ref->parent = parent;
6323 ref->node.full_backref = 1;
6326 ref->node.full_backref = 0;
6333 static struct data_backref *find_data_backref(struct extent_record *rec,
6334 u64 parent, u64 root,
6335 u64 owner, u64 offset,
6337 u64 disk_bytenr, u64 bytes)
6339 struct list_head *cur = rec->backrefs.next;
6340 struct extent_backref *node;
6341 struct data_backref *back;
6343 while(cur != &rec->backrefs) {
6344 node = to_extent_backref(cur);
6348 back = to_data_backref(node);
6350 if (!node->full_backref)
6352 if (parent == back->parent)
6355 if (node->full_backref)
6357 if (back->root == root && back->owner == owner &&
6358 back->offset == offset) {
6359 if (found_ref && node->found_ref &&
6360 (back->bytes != bytes ||
6361 back->disk_bytenr != disk_bytenr))
6371 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6372 u64 parent, u64 root,
6373 u64 owner, u64 offset,
6376 struct data_backref *ref = malloc(sizeof(*ref));
6380 memset(&ref->node, 0, sizeof(ref->node));
6381 ref->node.is_data = 1;
6384 ref->parent = parent;
6387 ref->node.full_backref = 1;
6391 ref->offset = offset;
6392 ref->node.full_backref = 0;
6394 ref->bytes = max_size;
6397 if (max_size > rec->max_size)
6398 rec->max_size = max_size;
6402 /* Check if the type of extent matches with its chunk */
6403 static void check_extent_type(struct extent_record *rec)
6405 struct btrfs_block_group_cache *bg_cache;
6407 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6411 /* data extent, check chunk directly*/
6412 if (!rec->metadata) {
6413 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6414 rec->wrong_chunk_type = 1;
6418 /* metadata extent, check the obvious case first */
6419 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6420 BTRFS_BLOCK_GROUP_METADATA))) {
6421 rec->wrong_chunk_type = 1;
6426 * Check SYSTEM extent, as it's also marked as metadata, we can only
6427 * make sure it's a SYSTEM extent by its backref
6429 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6430 struct extent_backref *node;
6431 struct tree_backref *tback;
6434 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6435 if (node->is_data) {
6436 /* tree block shouldn't have data backref */
6437 rec->wrong_chunk_type = 1;
6440 tback = container_of(node, struct tree_backref, node);
6442 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6443 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6445 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6446 if (!(bg_cache->flags & bg_type))
6447 rec->wrong_chunk_type = 1;
6452 * Allocate a new extent record, fill default values from @tmpl and insert int
6453 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6454 * the cache, otherwise it fails.
6456 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6457 struct extent_record *tmpl)
6459 struct extent_record *rec;
6462 BUG_ON(tmpl->max_size == 0);
6463 rec = malloc(sizeof(*rec));
6466 rec->start = tmpl->start;
6467 rec->max_size = tmpl->max_size;
6468 rec->nr = max(tmpl->nr, tmpl->max_size);
6469 rec->found_rec = tmpl->found_rec;
6470 rec->content_checked = tmpl->content_checked;
6471 rec->owner_ref_checked = tmpl->owner_ref_checked;
6472 rec->num_duplicates = 0;
6473 rec->metadata = tmpl->metadata;
6474 rec->flag_block_full_backref = FLAG_UNSET;
6475 rec->bad_full_backref = 0;
6476 rec->crossing_stripes = 0;
6477 rec->wrong_chunk_type = 0;
6478 rec->is_root = tmpl->is_root;
6479 rec->refs = tmpl->refs;
6480 rec->extent_item_refs = tmpl->extent_item_refs;
6481 rec->parent_generation = tmpl->parent_generation;
6482 INIT_LIST_HEAD(&rec->backrefs);
6483 INIT_LIST_HEAD(&rec->dups);
6484 INIT_LIST_HEAD(&rec->list);
6485 rec->backref_tree = RB_ROOT;
6486 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6487 rec->cache.start = tmpl->start;
6488 rec->cache.size = tmpl->nr;
6489 ret = insert_cache_extent(extent_cache, &rec->cache);
6494 bytes_used += rec->nr;
6497 rec->crossing_stripes = check_crossing_stripes(global_info,
6498 rec->start, global_info->nodesize);
6499 check_extent_type(rec);
6504 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6506 * - refs - if found, increase refs
6507 * - is_root - if found, set
6508 * - content_checked - if found, set
6509 * - owner_ref_checked - if found, set
6511 * If not found, create a new one, initialize and insert.
6513 static int add_extent_rec(struct cache_tree *extent_cache,
6514 struct extent_record *tmpl)
6516 struct extent_record *rec;
6517 struct cache_extent *cache;
6521 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6523 rec = container_of(cache, struct extent_record, cache);
6527 rec->nr = max(tmpl->nr, tmpl->max_size);
6530 * We need to make sure to reset nr to whatever the extent
6531 * record says was the real size, this way we can compare it to
6534 if (tmpl->found_rec) {
6535 if (tmpl->start != rec->start || rec->found_rec) {
6536 struct extent_record *tmp;
6539 if (list_empty(&rec->list))
6540 list_add_tail(&rec->list,
6541 &duplicate_extents);
6544 * We have to do this song and dance in case we
6545 * find an extent record that falls inside of
6546 * our current extent record but does not have
6547 * the same objectid.
6549 tmp = malloc(sizeof(*tmp));
6552 tmp->start = tmpl->start;
6553 tmp->max_size = tmpl->max_size;
6556 tmp->metadata = tmpl->metadata;
6557 tmp->extent_item_refs = tmpl->extent_item_refs;
6558 INIT_LIST_HEAD(&tmp->list);
6559 list_add_tail(&tmp->list, &rec->dups);
6560 rec->num_duplicates++;
6567 if (tmpl->extent_item_refs && !dup) {
6568 if (rec->extent_item_refs) {
6569 fprintf(stderr, "block %llu rec "
6570 "extent_item_refs %llu, passed %llu\n",
6571 (unsigned long long)tmpl->start,
6572 (unsigned long long)
6573 rec->extent_item_refs,
6574 (unsigned long long)tmpl->extent_item_refs);
6576 rec->extent_item_refs = tmpl->extent_item_refs;
6580 if (tmpl->content_checked)
6581 rec->content_checked = 1;
6582 if (tmpl->owner_ref_checked)
6583 rec->owner_ref_checked = 1;
6584 memcpy(&rec->parent_key, &tmpl->parent_key,
6585 sizeof(tmpl->parent_key));
6586 if (tmpl->parent_generation)
6587 rec->parent_generation = tmpl->parent_generation;
6588 if (rec->max_size < tmpl->max_size)
6589 rec->max_size = tmpl->max_size;
6592 * A metadata extent can't cross stripe_len boundary, otherwise
6593 * kernel scrub won't be able to handle it.
6594 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6598 rec->crossing_stripes = check_crossing_stripes(
6599 global_info, rec->start,
6600 global_info->nodesize);
6601 check_extent_type(rec);
6602 maybe_free_extent_rec(extent_cache, rec);
6606 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6611 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6612 u64 parent, u64 root, int found_ref)
6614 struct extent_record *rec;
6615 struct tree_backref *back;
6616 struct cache_extent *cache;
6618 bool insert = false;
6620 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6622 struct extent_record tmpl;
6624 memset(&tmpl, 0, sizeof(tmpl));
6625 tmpl.start = bytenr;
6630 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6634 /* really a bug in cache_extent implement now */
6635 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6640 rec = container_of(cache, struct extent_record, cache);
6641 if (rec->start != bytenr) {
6643 * Several cause, from unaligned bytenr to over lapping extents
6648 back = find_tree_backref(rec, parent, root);
6650 back = alloc_tree_backref(rec, parent, root);
6657 if (back->node.found_ref) {
6658 fprintf(stderr, "Extent back ref already exists "
6659 "for %llu parent %llu root %llu \n",
6660 (unsigned long long)bytenr,
6661 (unsigned long long)parent,
6662 (unsigned long long)root);
6664 back->node.found_ref = 1;
6666 if (back->node.found_extent_tree) {
6667 fprintf(stderr, "Extent back ref already exists "
6668 "for %llu parent %llu root %llu \n",
6669 (unsigned long long)bytenr,
6670 (unsigned long long)parent,
6671 (unsigned long long)root);
6673 back->node.found_extent_tree = 1;
6676 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6677 compare_extent_backref));
6678 check_extent_type(rec);
6679 maybe_free_extent_rec(extent_cache, rec);
6683 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6684 u64 parent, u64 root, u64 owner, u64 offset,
6685 u32 num_refs, int found_ref, u64 max_size)
6687 struct extent_record *rec;
6688 struct data_backref *back;
6689 struct cache_extent *cache;
6691 bool insert = false;
6693 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6695 struct extent_record tmpl;
6697 memset(&tmpl, 0, sizeof(tmpl));
6698 tmpl.start = bytenr;
6700 tmpl.max_size = max_size;
6702 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6706 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6711 rec = container_of(cache, struct extent_record, cache);
6712 if (rec->max_size < max_size)
6713 rec->max_size = max_size;
6716 * If found_ref is set then max_size is the real size and must match the
6717 * existing refs. So if we have already found a ref then we need to
6718 * make sure that this ref matches the existing one, otherwise we need
6719 * to add a new backref so we can notice that the backrefs don't match
6720 * and we need to figure out who is telling the truth. This is to
6721 * account for that awful fsync bug I introduced where we'd end up with
6722 * a btrfs_file_extent_item that would have its length include multiple
6723 * prealloc extents or point inside of a prealloc extent.
6725 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6728 back = alloc_data_backref(rec, parent, root, owner, offset,
6735 BUG_ON(num_refs != 1);
6736 if (back->node.found_ref)
6737 BUG_ON(back->bytes != max_size);
6738 back->node.found_ref = 1;
6739 back->found_ref += 1;
6740 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6741 back->bytes = max_size;
6742 back->disk_bytenr = bytenr;
6744 /* Need to reinsert if not already in the tree */
6746 rb_erase(&back->node.node, &rec->backref_tree);
6751 rec->content_checked = 1;
6752 rec->owner_ref_checked = 1;
6754 if (back->node.found_extent_tree) {
6755 fprintf(stderr, "Extent back ref already exists "
6756 "for %llu parent %llu root %llu "
6757 "owner %llu offset %llu num_refs %lu\n",
6758 (unsigned long long)bytenr,
6759 (unsigned long long)parent,
6760 (unsigned long long)root,
6761 (unsigned long long)owner,
6762 (unsigned long long)offset,
6763 (unsigned long)num_refs);
6765 back->num_refs = num_refs;
6766 back->node.found_extent_tree = 1;
6769 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6770 compare_extent_backref));
6772 maybe_free_extent_rec(extent_cache, rec);
6776 static int add_pending(struct cache_tree *pending,
6777 struct cache_tree *seen, u64 bytenr, u32 size)
6780 ret = add_cache_extent(seen, bytenr, size);
6783 add_cache_extent(pending, bytenr, size);
6787 static int pick_next_pending(struct cache_tree *pending,
6788 struct cache_tree *reada,
6789 struct cache_tree *nodes,
6790 u64 last, struct block_info *bits, int bits_nr,
6793 unsigned long node_start = last;
6794 struct cache_extent *cache;
6797 cache = search_cache_extent(reada, 0);
6799 bits[0].start = cache->start;
6800 bits[0].size = cache->size;
6805 if (node_start > 32768)
6806 node_start -= 32768;
6808 cache = search_cache_extent(nodes, node_start);
6810 cache = search_cache_extent(nodes, 0);
6813 cache = search_cache_extent(pending, 0);
6818 bits[ret].start = cache->start;
6819 bits[ret].size = cache->size;
6820 cache = next_cache_extent(cache);
6822 } while (cache && ret < bits_nr);
6828 bits[ret].start = cache->start;
6829 bits[ret].size = cache->size;
6830 cache = next_cache_extent(cache);
6832 } while (cache && ret < bits_nr);
6834 if (bits_nr - ret > 8) {
6835 u64 lookup = bits[0].start + bits[0].size;
6836 struct cache_extent *next;
6837 next = search_cache_extent(pending, lookup);
6839 if (next->start - lookup > 32768)
6841 bits[ret].start = next->start;
6842 bits[ret].size = next->size;
6843 lookup = next->start + next->size;
6847 next = next_cache_extent(next);
6855 static void free_chunk_record(struct cache_extent *cache)
6857 struct chunk_record *rec;
6859 rec = container_of(cache, struct chunk_record, cache);
6860 list_del_init(&rec->list);
6861 list_del_init(&rec->dextents);
6865 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6867 cache_tree_free_extents(chunk_cache, free_chunk_record);
6870 static void free_device_record(struct rb_node *node)
6872 struct device_record *rec;
6874 rec = container_of(node, struct device_record, node);
6878 FREE_RB_BASED_TREE(device_cache, free_device_record);
6880 int insert_block_group_record(struct block_group_tree *tree,
6881 struct block_group_record *bg_rec)
6885 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6889 list_add_tail(&bg_rec->list, &tree->block_groups);
6893 static void free_block_group_record(struct cache_extent *cache)
6895 struct block_group_record *rec;
6897 rec = container_of(cache, struct block_group_record, cache);
6898 list_del_init(&rec->list);
6902 void free_block_group_tree(struct block_group_tree *tree)
6904 cache_tree_free_extents(&tree->tree, free_block_group_record);
6907 int insert_device_extent_record(struct device_extent_tree *tree,
6908 struct device_extent_record *de_rec)
6913 * Device extent is a bit different from the other extents, because
6914 * the extents which belong to the different devices may have the
6915 * same start and size, so we need use the special extent cache
6916 * search/insert functions.
6918 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6922 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6923 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6927 static void free_device_extent_record(struct cache_extent *cache)
6929 struct device_extent_record *rec;
6931 rec = container_of(cache, struct device_extent_record, cache);
6932 if (!list_empty(&rec->chunk_list))
6933 list_del_init(&rec->chunk_list);
6934 if (!list_empty(&rec->device_list))
6935 list_del_init(&rec->device_list);
6939 void free_device_extent_tree(struct device_extent_tree *tree)
6941 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6944 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6945 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6946 struct extent_buffer *leaf, int slot)
6948 struct btrfs_extent_ref_v0 *ref0;
6949 struct btrfs_key key;
6952 btrfs_item_key_to_cpu(leaf, &key, slot);
6953 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6954 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6955 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6958 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6959 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6965 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6966 struct btrfs_key *key,
6969 struct btrfs_chunk *ptr;
6970 struct chunk_record *rec;
6973 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6974 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6976 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6978 fprintf(stderr, "memory allocation failed\n");
6982 INIT_LIST_HEAD(&rec->list);
6983 INIT_LIST_HEAD(&rec->dextents);
6986 rec->cache.start = key->offset;
6987 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6989 rec->generation = btrfs_header_generation(leaf);
6991 rec->objectid = key->objectid;
6992 rec->type = key->type;
6993 rec->offset = key->offset;
6995 rec->length = rec->cache.size;
6996 rec->owner = btrfs_chunk_owner(leaf, ptr);
6997 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6998 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6999 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7000 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7001 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7002 rec->num_stripes = num_stripes;
7003 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7005 for (i = 0; i < rec->num_stripes; ++i) {
7006 rec->stripes[i].devid =
7007 btrfs_stripe_devid_nr(leaf, ptr, i);
7008 rec->stripes[i].offset =
7009 btrfs_stripe_offset_nr(leaf, ptr, i);
7010 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7011 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7018 static int process_chunk_item(struct cache_tree *chunk_cache,
7019 struct btrfs_key *key, struct extent_buffer *eb,
7022 struct chunk_record *rec;
7023 struct btrfs_chunk *chunk;
7026 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7028 * Do extra check for this chunk item,
7030 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7031 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7032 * and owner<->key_type check.
7034 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7037 error("chunk(%llu, %llu) is not valid, ignore it",
7038 key->offset, btrfs_chunk_length(eb, chunk));
7041 rec = btrfs_new_chunk_record(eb, key, slot);
7042 ret = insert_cache_extent(chunk_cache, &rec->cache);
7044 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7045 rec->offset, rec->length);
7052 static int process_device_item(struct rb_root *dev_cache,
7053 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7055 struct btrfs_dev_item *ptr;
7056 struct device_record *rec;
7059 ptr = btrfs_item_ptr(eb,
7060 slot, struct btrfs_dev_item);
7062 rec = malloc(sizeof(*rec));
7064 fprintf(stderr, "memory allocation failed\n");
7068 rec->devid = key->offset;
7069 rec->generation = btrfs_header_generation(eb);
7071 rec->objectid = key->objectid;
7072 rec->type = key->type;
7073 rec->offset = key->offset;
7075 rec->devid = btrfs_device_id(eb, ptr);
7076 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7077 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7079 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7081 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7088 struct block_group_record *
7089 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7092 struct btrfs_block_group_item *ptr;
7093 struct block_group_record *rec;
7095 rec = calloc(1, sizeof(*rec));
7097 fprintf(stderr, "memory allocation failed\n");
7101 rec->cache.start = key->objectid;
7102 rec->cache.size = key->offset;
7104 rec->generation = btrfs_header_generation(leaf);
7106 rec->objectid = key->objectid;
7107 rec->type = key->type;
7108 rec->offset = key->offset;
7110 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7111 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7113 INIT_LIST_HEAD(&rec->list);
7118 static int process_block_group_item(struct block_group_tree *block_group_cache,
7119 struct btrfs_key *key,
7120 struct extent_buffer *eb, int slot)
7122 struct block_group_record *rec;
7125 rec = btrfs_new_block_group_record(eb, key, slot);
7126 ret = insert_block_group_record(block_group_cache, rec);
7128 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7129 rec->objectid, rec->offset);
7136 struct device_extent_record *
7137 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7138 struct btrfs_key *key, int slot)
7140 struct device_extent_record *rec;
7141 struct btrfs_dev_extent *ptr;
7143 rec = calloc(1, sizeof(*rec));
7145 fprintf(stderr, "memory allocation failed\n");
7149 rec->cache.objectid = key->objectid;
7150 rec->cache.start = key->offset;
7152 rec->generation = btrfs_header_generation(leaf);
7154 rec->objectid = key->objectid;
7155 rec->type = key->type;
7156 rec->offset = key->offset;
7158 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7159 rec->chunk_objecteid =
7160 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7162 btrfs_dev_extent_chunk_offset(leaf, ptr);
7163 rec->length = btrfs_dev_extent_length(leaf, ptr);
7164 rec->cache.size = rec->length;
7166 INIT_LIST_HEAD(&rec->chunk_list);
7167 INIT_LIST_HEAD(&rec->device_list);
7173 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7174 struct btrfs_key *key, struct extent_buffer *eb,
7177 struct device_extent_record *rec;
7180 rec = btrfs_new_device_extent_record(eb, key, slot);
7181 ret = insert_device_extent_record(dev_extent_cache, rec);
7184 "Device extent[%llu, %llu, %llu] existed.\n",
7185 rec->objectid, rec->offset, rec->length);
7192 static int process_extent_item(struct btrfs_root *root,
7193 struct cache_tree *extent_cache,
7194 struct extent_buffer *eb, int slot)
7196 struct btrfs_extent_item *ei;
7197 struct btrfs_extent_inline_ref *iref;
7198 struct btrfs_extent_data_ref *dref;
7199 struct btrfs_shared_data_ref *sref;
7200 struct btrfs_key key;
7201 struct extent_record tmpl;
7206 u32 item_size = btrfs_item_size_nr(eb, slot);
7212 btrfs_item_key_to_cpu(eb, &key, slot);
7214 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7216 num_bytes = root->fs_info->nodesize;
7218 num_bytes = key.offset;
7221 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7222 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7223 key.objectid, root->fs_info->sectorsize);
7226 if (item_size < sizeof(*ei)) {
7227 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7228 struct btrfs_extent_item_v0 *ei0;
7229 BUG_ON(item_size != sizeof(*ei0));
7230 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7231 refs = btrfs_extent_refs_v0(eb, ei0);
7235 memset(&tmpl, 0, sizeof(tmpl));
7236 tmpl.start = key.objectid;
7237 tmpl.nr = num_bytes;
7238 tmpl.extent_item_refs = refs;
7239 tmpl.metadata = metadata;
7241 tmpl.max_size = num_bytes;
7243 return add_extent_rec(extent_cache, &tmpl);
7246 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7247 refs = btrfs_extent_refs(eb, ei);
7248 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7252 if (metadata && num_bytes != root->fs_info->nodesize) {
7253 error("ignore invalid metadata extent, length %llu does not equal to %u",
7254 num_bytes, root->fs_info->nodesize);
7257 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7258 error("ignore invalid data extent, length %llu is not aligned to %u",
7259 num_bytes, root->fs_info->sectorsize);
7263 memset(&tmpl, 0, sizeof(tmpl));
7264 tmpl.start = key.objectid;
7265 tmpl.nr = num_bytes;
7266 tmpl.extent_item_refs = refs;
7267 tmpl.metadata = metadata;
7269 tmpl.max_size = num_bytes;
7270 add_extent_rec(extent_cache, &tmpl);
7272 ptr = (unsigned long)(ei + 1);
7273 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7274 key.type == BTRFS_EXTENT_ITEM_KEY)
7275 ptr += sizeof(struct btrfs_tree_block_info);
7277 end = (unsigned long)ei + item_size;
7279 iref = (struct btrfs_extent_inline_ref *)ptr;
7280 type = btrfs_extent_inline_ref_type(eb, iref);
7281 offset = btrfs_extent_inline_ref_offset(eb, iref);
7283 case BTRFS_TREE_BLOCK_REF_KEY:
7284 ret = add_tree_backref(extent_cache, key.objectid,
7288 "add_tree_backref failed (extent items tree block): %s",
7291 case BTRFS_SHARED_BLOCK_REF_KEY:
7292 ret = add_tree_backref(extent_cache, key.objectid,
7296 "add_tree_backref failed (extent items shared block): %s",
7299 case BTRFS_EXTENT_DATA_REF_KEY:
7300 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7301 add_data_backref(extent_cache, key.objectid, 0,
7302 btrfs_extent_data_ref_root(eb, dref),
7303 btrfs_extent_data_ref_objectid(eb,
7305 btrfs_extent_data_ref_offset(eb, dref),
7306 btrfs_extent_data_ref_count(eb, dref),
7309 case BTRFS_SHARED_DATA_REF_KEY:
7310 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7311 add_data_backref(extent_cache, key.objectid, offset,
7313 btrfs_shared_data_ref_count(eb, sref),
7317 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7318 key.objectid, key.type, num_bytes);
7321 ptr += btrfs_extent_inline_ref_size(type);
7328 static int check_cache_range(struct btrfs_root *root,
7329 struct btrfs_block_group_cache *cache,
7330 u64 offset, u64 bytes)
7332 struct btrfs_free_space *entry;
7338 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7339 bytenr = btrfs_sb_offset(i);
7340 ret = btrfs_rmap_block(root->fs_info,
7341 cache->key.objectid, bytenr, 0,
7342 &logical, &nr, &stripe_len);
7347 if (logical[nr] + stripe_len <= offset)
7349 if (offset + bytes <= logical[nr])
7351 if (logical[nr] == offset) {
7352 if (stripe_len >= bytes) {
7356 bytes -= stripe_len;
7357 offset += stripe_len;
7358 } else if (logical[nr] < offset) {
7359 if (logical[nr] + stripe_len >=
7364 bytes = (offset + bytes) -
7365 (logical[nr] + stripe_len);
7366 offset = logical[nr] + stripe_len;
7369 * Could be tricky, the super may land in the
7370 * middle of the area we're checking. First
7371 * check the easiest case, it's at the end.
7373 if (logical[nr] + stripe_len >=
7375 bytes = logical[nr] - offset;
7379 /* Check the left side */
7380 ret = check_cache_range(root, cache,
7382 logical[nr] - offset);
7388 /* Now we continue with the right side */
7389 bytes = (offset + bytes) -
7390 (logical[nr] + stripe_len);
7391 offset = logical[nr] + stripe_len;
7398 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7400 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7401 offset, offset+bytes);
7405 if (entry->offset != offset) {
7406 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7411 if (entry->bytes != bytes) {
7412 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7413 bytes, entry->bytes, offset);
7417 unlink_free_space(cache->free_space_ctl, entry);
7422 static int verify_space_cache(struct btrfs_root *root,
7423 struct btrfs_block_group_cache *cache)
7425 struct btrfs_path path;
7426 struct extent_buffer *leaf;
7427 struct btrfs_key key;
7431 root = root->fs_info->extent_root;
7433 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7435 btrfs_init_path(&path);
7436 key.objectid = last;
7438 key.type = BTRFS_EXTENT_ITEM_KEY;
7439 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7444 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7445 ret = btrfs_next_leaf(root, &path);
7453 leaf = path.nodes[0];
7454 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7455 if (key.objectid >= cache->key.offset + cache->key.objectid)
7457 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7458 key.type != BTRFS_METADATA_ITEM_KEY) {
7463 if (last == key.objectid) {
7464 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7465 last = key.objectid + key.offset;
7467 last = key.objectid + root->fs_info->nodesize;
7472 ret = check_cache_range(root, cache, last,
7473 key.objectid - last);
7476 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7477 last = key.objectid + key.offset;
7479 last = key.objectid + root->fs_info->nodesize;
7483 if (last < cache->key.objectid + cache->key.offset)
7484 ret = check_cache_range(root, cache, last,
7485 cache->key.objectid +
7486 cache->key.offset - last);
7489 btrfs_release_path(&path);
7492 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7493 fprintf(stderr, "There are still entries left in the space "
7501 static int check_space_cache(struct btrfs_root *root)
7503 struct btrfs_block_group_cache *cache;
7504 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7508 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7509 btrfs_super_generation(root->fs_info->super_copy) !=
7510 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7511 printf("cache and super generation don't match, space cache "
7512 "will be invalidated\n");
7516 if (ctx.progress_enabled) {
7517 ctx.tp = TASK_FREE_SPACE;
7518 task_start(ctx.info);
7522 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7526 start = cache->key.objectid + cache->key.offset;
7527 if (!cache->free_space_ctl) {
7528 if (btrfs_init_free_space_ctl(cache,
7529 root->fs_info->sectorsize)) {
7534 btrfs_remove_free_space_cache(cache);
7537 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7538 ret = exclude_super_stripes(root, cache);
7540 fprintf(stderr, "could not exclude super stripes: %s\n",
7545 ret = load_free_space_tree(root->fs_info, cache);
7546 free_excluded_extents(root, cache);
7548 fprintf(stderr, "could not load free space tree: %s\n",
7555 ret = load_free_space_cache(root->fs_info, cache);
7560 ret = verify_space_cache(root, cache);
7562 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7563 cache->key.objectid);
7568 task_stop(ctx.info);
7570 return error ? -EINVAL : 0;
7573 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7574 u64 num_bytes, unsigned long leaf_offset,
7575 struct extent_buffer *eb) {
7577 struct btrfs_fs_info *fs_info = root->fs_info;
7579 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7581 unsigned long csum_offset;
7585 u64 data_checked = 0;
7591 if (num_bytes % fs_info->sectorsize)
7594 data = malloc(num_bytes);
7598 while (offset < num_bytes) {
7601 read_len = num_bytes - offset;
7602 /* read as much space once a time */
7603 ret = read_extent_data(fs_info, data + offset,
7604 bytenr + offset, &read_len, mirror);
7608 /* verify every 4k data's checksum */
7609 while (data_checked < read_len) {
7611 tmp = offset + data_checked;
7613 csum = btrfs_csum_data((char *)data + tmp,
7614 csum, fs_info->sectorsize);
7615 btrfs_csum_final(csum, (u8 *)&csum);
7617 csum_offset = leaf_offset +
7618 tmp / fs_info->sectorsize * csum_size;
7619 read_extent_buffer(eb, (char *)&csum_expected,
7620 csum_offset, csum_size);
7621 /* try another mirror */
7622 if (csum != csum_expected) {
7623 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7624 mirror, bytenr + tmp,
7625 csum, csum_expected);
7626 num_copies = btrfs_num_copies(root->fs_info,
7628 if (mirror < num_copies - 1) {
7633 data_checked += fs_info->sectorsize;
7642 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7645 struct btrfs_path path;
7646 struct extent_buffer *leaf;
7647 struct btrfs_key key;
7650 btrfs_init_path(&path);
7651 key.objectid = bytenr;
7652 key.type = BTRFS_EXTENT_ITEM_KEY;
7653 key.offset = (u64)-1;
7656 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7659 fprintf(stderr, "Error looking up extent record %d\n", ret);
7660 btrfs_release_path(&path);
7663 if (path.slots[0] > 0) {
7666 ret = btrfs_prev_leaf(root, &path);
7669 } else if (ret > 0) {
7676 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7679 * Block group items come before extent items if they have the same
7680 * bytenr, so walk back one more just in case. Dear future traveller,
7681 * first congrats on mastering time travel. Now if it's not too much
7682 * trouble could you go back to 2006 and tell Chris to make the
7683 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7684 * EXTENT_ITEM_KEY please?
7686 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7687 if (path.slots[0] > 0) {
7690 ret = btrfs_prev_leaf(root, &path);
7693 } else if (ret > 0) {
7698 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7702 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7703 ret = btrfs_next_leaf(root, &path);
7705 fprintf(stderr, "Error going to next leaf "
7707 btrfs_release_path(&path);
7713 leaf = path.nodes[0];
7714 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7715 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7719 if (key.objectid + key.offset < bytenr) {
7723 if (key.objectid > bytenr + num_bytes)
7726 if (key.objectid == bytenr) {
7727 if (key.offset >= num_bytes) {
7731 num_bytes -= key.offset;
7732 bytenr += key.offset;
7733 } else if (key.objectid < bytenr) {
7734 if (key.objectid + key.offset >= bytenr + num_bytes) {
7738 num_bytes = (bytenr + num_bytes) -
7739 (key.objectid + key.offset);
7740 bytenr = key.objectid + key.offset;
7742 if (key.objectid + key.offset < bytenr + num_bytes) {
7743 u64 new_start = key.objectid + key.offset;
7744 u64 new_bytes = bytenr + num_bytes - new_start;
7747 * Weird case, the extent is in the middle of
7748 * our range, we'll have to search one side
7749 * and then the other. Not sure if this happens
7750 * in real life, but no harm in coding it up
7751 * anyway just in case.
7753 btrfs_release_path(&path);
7754 ret = check_extent_exists(root, new_start,
7757 fprintf(stderr, "Right section didn't "
7761 num_bytes = key.objectid - bytenr;
7764 num_bytes = key.objectid - bytenr;
7771 if (num_bytes && !ret) {
7772 fprintf(stderr, "There are no extents for csum range "
7773 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7777 btrfs_release_path(&path);
7781 static int check_csums(struct btrfs_root *root)
7783 struct btrfs_path path;
7784 struct extent_buffer *leaf;
7785 struct btrfs_key key;
7786 u64 offset = 0, num_bytes = 0;
7787 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7791 unsigned long leaf_offset;
7793 root = root->fs_info->csum_root;
7794 if (!extent_buffer_uptodate(root->node)) {
7795 fprintf(stderr, "No valid csum tree found\n");
7799 btrfs_init_path(&path);
7800 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7801 key.type = BTRFS_EXTENT_CSUM_KEY;
7803 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7805 fprintf(stderr, "Error searching csum tree %d\n", ret);
7806 btrfs_release_path(&path);
7810 if (ret > 0 && path.slots[0])
7815 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7816 ret = btrfs_next_leaf(root, &path);
7818 fprintf(stderr, "Error going to next leaf "
7825 leaf = path.nodes[0];
7827 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7828 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7833 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7834 csum_size) * root->fs_info->sectorsize;
7835 if (!check_data_csum)
7836 goto skip_csum_check;
7837 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7838 ret = check_extent_csums(root, key.offset, data_len,
7844 offset = key.offset;
7845 } else if (key.offset != offset + num_bytes) {
7846 ret = check_extent_exists(root, offset, num_bytes);
7848 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7849 "there is no extent record\n",
7850 offset, offset+num_bytes);
7853 offset = key.offset;
7856 num_bytes += data_len;
7860 btrfs_release_path(&path);
7864 static int is_dropped_key(struct btrfs_key *key,
7865 struct btrfs_key *drop_key) {
7866 if (key->objectid < drop_key->objectid)
7868 else if (key->objectid == drop_key->objectid) {
7869 if (key->type < drop_key->type)
7871 else if (key->type == drop_key->type) {
7872 if (key->offset < drop_key->offset)
7880 * Here are the rules for FULL_BACKREF.
7882 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7883 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7885 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7886 * if it happened after the relocation occurred since we'll have dropped the
7887 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7888 * have no real way to know for sure.
7890 * We process the blocks one root at a time, and we start from the lowest root
7891 * objectid and go to the highest. So we can just lookup the owner backref for
7892 * the record and if we don't find it then we know it doesn't exist and we have
7895 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7896 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7897 * be set or not and then we can check later once we've gathered all the refs.
7899 static int calc_extent_flag(struct cache_tree *extent_cache,
7900 struct extent_buffer *buf,
7901 struct root_item_record *ri,
7904 struct extent_record *rec;
7905 struct cache_extent *cache;
7906 struct tree_backref *tback;
7909 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7910 /* we have added this extent before */
7914 rec = container_of(cache, struct extent_record, cache);
7917 * Except file/reloc tree, we can not have
7920 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7925 if (buf->start == ri->bytenr)
7928 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7931 owner = btrfs_header_owner(buf);
7932 if (owner == ri->objectid)
7935 tback = find_tree_backref(rec, 0, owner);
7940 if (rec->flag_block_full_backref != FLAG_UNSET &&
7941 rec->flag_block_full_backref != 0)
7942 rec->bad_full_backref = 1;
7945 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7946 if (rec->flag_block_full_backref != FLAG_UNSET &&
7947 rec->flag_block_full_backref != 1)
7948 rec->bad_full_backref = 1;
7952 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7954 fprintf(stderr, "Invalid key type(");
7955 print_key_type(stderr, 0, key_type);
7956 fprintf(stderr, ") found in root(");
7957 print_objectid(stderr, rootid, 0);
7958 fprintf(stderr, ")\n");
7962 * Check if the key is valid with its extent buffer.
7964 * This is a early check in case invalid key exists in a extent buffer
7965 * This is not comprehensive yet, but should prevent wrong key/item passed
7968 static int check_type_with_root(u64 rootid, u8 key_type)
7971 /* Only valid in chunk tree */
7972 case BTRFS_DEV_ITEM_KEY:
7973 case BTRFS_CHUNK_ITEM_KEY:
7974 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7977 /* valid in csum and log tree */
7978 case BTRFS_CSUM_TREE_OBJECTID:
7979 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7983 case BTRFS_EXTENT_ITEM_KEY:
7984 case BTRFS_METADATA_ITEM_KEY:
7985 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7986 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7989 case BTRFS_ROOT_ITEM_KEY:
7990 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7993 case BTRFS_DEV_EXTENT_KEY:
7994 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8000 report_mismatch_key_root(key_type, rootid);
8004 static int run_next_block(struct btrfs_root *root,
8005 struct block_info *bits,
8008 struct cache_tree *pending,
8009 struct cache_tree *seen,
8010 struct cache_tree *reada,
8011 struct cache_tree *nodes,
8012 struct cache_tree *extent_cache,
8013 struct cache_tree *chunk_cache,
8014 struct rb_root *dev_cache,
8015 struct block_group_tree *block_group_cache,
8016 struct device_extent_tree *dev_extent_cache,
8017 struct root_item_record *ri)
8019 struct btrfs_fs_info *fs_info = root->fs_info;
8020 struct extent_buffer *buf;
8021 struct extent_record *rec = NULL;
8032 struct btrfs_key key;
8033 struct cache_extent *cache;
8036 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8037 bits_nr, &reada_bits);
8042 for(i = 0; i < nritems; i++) {
8043 ret = add_cache_extent(reada, bits[i].start,
8048 /* fixme, get the parent transid */
8049 readahead_tree_block(fs_info, bits[i].start, 0);
8052 *last = bits[0].start;
8053 bytenr = bits[0].start;
8054 size = bits[0].size;
8056 cache = lookup_cache_extent(pending, bytenr, size);
8058 remove_cache_extent(pending, cache);
8061 cache = lookup_cache_extent(reada, bytenr, size);
8063 remove_cache_extent(reada, cache);
8066 cache = lookup_cache_extent(nodes, bytenr, size);
8068 remove_cache_extent(nodes, cache);
8071 cache = lookup_cache_extent(extent_cache, bytenr, size);
8073 rec = container_of(cache, struct extent_record, cache);
8074 gen = rec->parent_generation;
8077 /* fixme, get the real parent transid */
8078 buf = read_tree_block(root->fs_info, bytenr, gen);
8079 if (!extent_buffer_uptodate(buf)) {
8080 record_bad_block_io(root->fs_info,
8081 extent_cache, bytenr, size);
8085 nritems = btrfs_header_nritems(buf);
8088 if (!init_extent_tree) {
8089 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8090 btrfs_header_level(buf), 1, NULL,
8093 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8095 fprintf(stderr, "Couldn't calc extent flags\n");
8096 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8101 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8103 fprintf(stderr, "Couldn't calc extent flags\n");
8104 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8108 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8110 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8111 ri->objectid == btrfs_header_owner(buf)) {
8113 * Ok we got to this block from it's original owner and
8114 * we have FULL_BACKREF set. Relocation can leave
8115 * converted blocks over so this is altogether possible,
8116 * however it's not possible if the generation > the
8117 * last snapshot, so check for this case.
8119 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8120 btrfs_header_generation(buf) > ri->last_snapshot) {
8121 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8122 rec->bad_full_backref = 1;
8127 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8128 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8129 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8130 rec->bad_full_backref = 1;
8134 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8135 rec->flag_block_full_backref = 1;
8139 rec->flag_block_full_backref = 0;
8141 owner = btrfs_header_owner(buf);
8144 ret = check_block(root, extent_cache, buf, flags);
8148 if (btrfs_is_leaf(buf)) {
8149 btree_space_waste += btrfs_leaf_free_space(root, buf);
8150 for (i = 0; i < nritems; i++) {
8151 struct btrfs_file_extent_item *fi;
8152 btrfs_item_key_to_cpu(buf, &key, i);
8154 * Check key type against the leaf owner.
8155 * Could filter quite a lot of early error if
8158 if (check_type_with_root(btrfs_header_owner(buf),
8160 fprintf(stderr, "ignoring invalid key\n");
8163 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8164 process_extent_item(root, extent_cache, buf,
8168 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8169 process_extent_item(root, extent_cache, buf,
8173 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8175 btrfs_item_size_nr(buf, i);
8178 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8179 process_chunk_item(chunk_cache, &key, buf, i);
8182 if (key.type == BTRFS_DEV_ITEM_KEY) {
8183 process_device_item(dev_cache, &key, buf, i);
8186 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8187 process_block_group_item(block_group_cache,
8191 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8192 process_device_extent_item(dev_extent_cache,
8197 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8198 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8199 process_extent_ref_v0(extent_cache, buf, i);
8206 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8207 ret = add_tree_backref(extent_cache,
8208 key.objectid, 0, key.offset, 0);
8211 "add_tree_backref failed (leaf tree block): %s",
8215 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8216 ret = add_tree_backref(extent_cache,
8217 key.objectid, key.offset, 0, 0);
8220 "add_tree_backref failed (leaf shared block): %s",
8224 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8225 struct btrfs_extent_data_ref *ref;
8226 ref = btrfs_item_ptr(buf, i,
8227 struct btrfs_extent_data_ref);
8228 add_data_backref(extent_cache,
8230 btrfs_extent_data_ref_root(buf, ref),
8231 btrfs_extent_data_ref_objectid(buf,
8233 btrfs_extent_data_ref_offset(buf, ref),
8234 btrfs_extent_data_ref_count(buf, ref),
8235 0, root->fs_info->sectorsize);
8238 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8239 struct btrfs_shared_data_ref *ref;
8240 ref = btrfs_item_ptr(buf, i,
8241 struct btrfs_shared_data_ref);
8242 add_data_backref(extent_cache,
8243 key.objectid, key.offset, 0, 0, 0,
8244 btrfs_shared_data_ref_count(buf, ref),
8245 0, root->fs_info->sectorsize);
8248 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8249 struct bad_item *bad;
8251 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8255 bad = malloc(sizeof(struct bad_item));
8258 INIT_LIST_HEAD(&bad->list);
8259 memcpy(&bad->key, &key,
8260 sizeof(struct btrfs_key));
8261 bad->root_id = owner;
8262 list_add_tail(&bad->list, &delete_items);
8265 if (key.type != BTRFS_EXTENT_DATA_KEY)
8267 fi = btrfs_item_ptr(buf, i,
8268 struct btrfs_file_extent_item);
8269 if (btrfs_file_extent_type(buf, fi) ==
8270 BTRFS_FILE_EXTENT_INLINE)
8272 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8275 data_bytes_allocated +=
8276 btrfs_file_extent_disk_num_bytes(buf, fi);
8277 if (data_bytes_allocated < root->fs_info->sectorsize) {
8280 data_bytes_referenced +=
8281 btrfs_file_extent_num_bytes(buf, fi);
8282 add_data_backref(extent_cache,
8283 btrfs_file_extent_disk_bytenr(buf, fi),
8284 parent, owner, key.objectid, key.offset -
8285 btrfs_file_extent_offset(buf, fi), 1, 1,
8286 btrfs_file_extent_disk_num_bytes(buf, fi));
8290 struct btrfs_key first_key;
8292 first_key.objectid = 0;
8295 btrfs_item_key_to_cpu(buf, &first_key, 0);
8296 level = btrfs_header_level(buf);
8297 for (i = 0; i < nritems; i++) {
8298 struct extent_record tmpl;
8300 ptr = btrfs_node_blockptr(buf, i);
8301 size = root->fs_info->nodesize;
8302 btrfs_node_key_to_cpu(buf, &key, i);
8304 if ((level == ri->drop_level)
8305 && is_dropped_key(&key, &ri->drop_key)) {
8310 memset(&tmpl, 0, sizeof(tmpl));
8311 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8312 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8317 tmpl.max_size = size;
8318 ret = add_extent_rec(extent_cache, &tmpl);
8322 ret = add_tree_backref(extent_cache, ptr, parent,
8326 "add_tree_backref failed (non-leaf block): %s",
8332 add_pending(nodes, seen, ptr, size);
8334 add_pending(pending, seen, ptr, size);
8337 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8338 nritems) * sizeof(struct btrfs_key_ptr);
8340 total_btree_bytes += buf->len;
8341 if (fs_root_objectid(btrfs_header_owner(buf)))
8342 total_fs_tree_bytes += buf->len;
8343 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8344 total_extent_tree_bytes += buf->len;
8346 free_extent_buffer(buf);
8350 static int add_root_to_pending(struct extent_buffer *buf,
8351 struct cache_tree *extent_cache,
8352 struct cache_tree *pending,
8353 struct cache_tree *seen,
8354 struct cache_tree *nodes,
8357 struct extent_record tmpl;
8360 if (btrfs_header_level(buf) > 0)
8361 add_pending(nodes, seen, buf->start, buf->len);
8363 add_pending(pending, seen, buf->start, buf->len);
8365 memset(&tmpl, 0, sizeof(tmpl));
8366 tmpl.start = buf->start;
8371 tmpl.max_size = buf->len;
8372 add_extent_rec(extent_cache, &tmpl);
8374 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8375 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8376 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8379 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8384 /* as we fix the tree, we might be deleting blocks that
8385 * we're tracking for repair. This hook makes sure we
8386 * remove any backrefs for blocks as we are fixing them.
8388 static int free_extent_hook(struct btrfs_trans_handle *trans,
8389 struct btrfs_root *root,
8390 u64 bytenr, u64 num_bytes, u64 parent,
8391 u64 root_objectid, u64 owner, u64 offset,
8394 struct extent_record *rec;
8395 struct cache_extent *cache;
8397 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8399 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8400 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8404 rec = container_of(cache, struct extent_record, cache);
8406 struct data_backref *back;
8407 back = find_data_backref(rec, parent, root_objectid, owner,
8408 offset, 1, bytenr, num_bytes);
8411 if (back->node.found_ref) {
8412 back->found_ref -= refs_to_drop;
8414 rec->refs -= refs_to_drop;
8416 if (back->node.found_extent_tree) {
8417 back->num_refs -= refs_to_drop;
8418 if (rec->extent_item_refs)
8419 rec->extent_item_refs -= refs_to_drop;
8421 if (back->found_ref == 0)
8422 back->node.found_ref = 0;
8423 if (back->num_refs == 0)
8424 back->node.found_extent_tree = 0;
8426 if (!back->node.found_extent_tree && back->node.found_ref) {
8427 rb_erase(&back->node.node, &rec->backref_tree);
8431 struct tree_backref *back;
8432 back = find_tree_backref(rec, parent, root_objectid);
8435 if (back->node.found_ref) {
8438 back->node.found_ref = 0;
8440 if (back->node.found_extent_tree) {
8441 if (rec->extent_item_refs)
8442 rec->extent_item_refs--;
8443 back->node.found_extent_tree = 0;
8445 if (!back->node.found_extent_tree && back->node.found_ref) {
8446 rb_erase(&back->node.node, &rec->backref_tree);
8450 maybe_free_extent_rec(extent_cache, rec);
8455 static int delete_extent_records(struct btrfs_trans_handle *trans,
8456 struct btrfs_root *root,
8457 struct btrfs_path *path,
8460 struct btrfs_key key;
8461 struct btrfs_key found_key;
8462 struct extent_buffer *leaf;
8467 key.objectid = bytenr;
8469 key.offset = (u64)-1;
8472 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8479 if (path->slots[0] == 0)
8485 leaf = path->nodes[0];
8486 slot = path->slots[0];
8488 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8489 if (found_key.objectid != bytenr)
8492 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8493 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8494 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8495 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8496 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8497 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8498 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8499 btrfs_release_path(path);
8500 if (found_key.type == 0) {
8501 if (found_key.offset == 0)
8503 key.offset = found_key.offset - 1;
8504 key.type = found_key.type;
8506 key.type = found_key.type - 1;
8507 key.offset = (u64)-1;
8511 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8512 found_key.objectid, found_key.type, found_key.offset);
8514 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8517 btrfs_release_path(path);
8519 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8520 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8521 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8522 found_key.offset : root->fs_info->nodesize;
8524 ret = btrfs_update_block_group(trans, root, bytenr,
8531 btrfs_release_path(path);
8536 * for a single backref, this will allocate a new extent
8537 * and add the backref to it.
8539 static int record_extent(struct btrfs_trans_handle *trans,
8540 struct btrfs_fs_info *info,
8541 struct btrfs_path *path,
8542 struct extent_record *rec,
8543 struct extent_backref *back,
8544 int allocated, u64 flags)
8547 struct btrfs_root *extent_root = info->extent_root;
8548 struct extent_buffer *leaf;
8549 struct btrfs_key ins_key;
8550 struct btrfs_extent_item *ei;
8551 struct data_backref *dback;
8552 struct btrfs_tree_block_info *bi;
8555 rec->max_size = max_t(u64, rec->max_size,
8559 u32 item_size = sizeof(*ei);
8562 item_size += sizeof(*bi);
8564 ins_key.objectid = rec->start;
8565 ins_key.offset = rec->max_size;
8566 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8568 ret = btrfs_insert_empty_item(trans, extent_root, path,
8569 &ins_key, item_size);
8573 leaf = path->nodes[0];
8574 ei = btrfs_item_ptr(leaf, path->slots[0],
8575 struct btrfs_extent_item);
8577 btrfs_set_extent_refs(leaf, ei, 0);
8578 btrfs_set_extent_generation(leaf, ei, rec->generation);
8580 if (back->is_data) {
8581 btrfs_set_extent_flags(leaf, ei,
8582 BTRFS_EXTENT_FLAG_DATA);
8584 struct btrfs_disk_key copy_key;;
8586 bi = (struct btrfs_tree_block_info *)(ei + 1);
8587 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8590 btrfs_set_disk_key_objectid(©_key,
8591 rec->info_objectid);
8592 btrfs_set_disk_key_type(©_key, 0);
8593 btrfs_set_disk_key_offset(©_key, 0);
8595 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8596 btrfs_set_tree_block_key(leaf, bi, ©_key);
8598 btrfs_set_extent_flags(leaf, ei,
8599 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8602 btrfs_mark_buffer_dirty(leaf);
8603 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8604 rec->max_size, 1, 0);
8607 btrfs_release_path(path);
8610 if (back->is_data) {
8614 dback = to_data_backref(back);
8615 if (back->full_backref)
8616 parent = dback->parent;
8620 for (i = 0; i < dback->found_ref; i++) {
8621 /* if parent != 0, we're doing a full backref
8622 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8623 * just makes the backref allocator create a data
8626 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8627 rec->start, rec->max_size,
8631 BTRFS_FIRST_FREE_OBJECTID :
8637 fprintf(stderr, "adding new data backref"
8638 " on %llu %s %llu owner %llu"
8639 " offset %llu found %d\n",
8640 (unsigned long long)rec->start,
8641 back->full_backref ?
8643 back->full_backref ?
8644 (unsigned long long)parent :
8645 (unsigned long long)dback->root,
8646 (unsigned long long)dback->owner,
8647 (unsigned long long)dback->offset,
8651 struct tree_backref *tback;
8653 tback = to_tree_backref(back);
8654 if (back->full_backref)
8655 parent = tback->parent;
8659 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8660 rec->start, rec->max_size,
8661 parent, tback->root, 0, 0);
8662 fprintf(stderr, "adding new tree backref on "
8663 "start %llu len %llu parent %llu root %llu\n",
8664 rec->start, rec->max_size, parent, tback->root);
8667 btrfs_release_path(path);
8671 static struct extent_entry *find_entry(struct list_head *entries,
8672 u64 bytenr, u64 bytes)
8674 struct extent_entry *entry = NULL;
8676 list_for_each_entry(entry, entries, list) {
8677 if (entry->bytenr == bytenr && entry->bytes == bytes)
8684 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8686 struct extent_entry *entry, *best = NULL, *prev = NULL;
8688 list_for_each_entry(entry, entries, list) {
8690 * If there are as many broken entries as entries then we know
8691 * not to trust this particular entry.
8693 if (entry->broken == entry->count)
8697 * Special case, when there are only two entries and 'best' is
8707 * If our current entry == best then we can't be sure our best
8708 * is really the best, so we need to keep searching.
8710 if (best && best->count == entry->count) {
8716 /* Prev == entry, not good enough, have to keep searching */
8717 if (!prev->broken && prev->count == entry->count)
8721 best = (prev->count > entry->count) ? prev : entry;
8722 else if (best->count < entry->count)
8730 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8731 struct data_backref *dback, struct extent_entry *entry)
8733 struct btrfs_trans_handle *trans;
8734 struct btrfs_root *root;
8735 struct btrfs_file_extent_item *fi;
8736 struct extent_buffer *leaf;
8737 struct btrfs_key key;
8741 key.objectid = dback->root;
8742 key.type = BTRFS_ROOT_ITEM_KEY;
8743 key.offset = (u64)-1;
8744 root = btrfs_read_fs_root(info, &key);
8746 fprintf(stderr, "Couldn't find root for our ref\n");
8751 * The backref points to the original offset of the extent if it was
8752 * split, so we need to search down to the offset we have and then walk
8753 * forward until we find the backref we're looking for.
8755 key.objectid = dback->owner;
8756 key.type = BTRFS_EXTENT_DATA_KEY;
8757 key.offset = dback->offset;
8758 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8760 fprintf(stderr, "Error looking up ref %d\n", ret);
8765 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8766 ret = btrfs_next_leaf(root, path);
8768 fprintf(stderr, "Couldn't find our ref, next\n");
8772 leaf = path->nodes[0];
8773 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8774 if (key.objectid != dback->owner ||
8775 key.type != BTRFS_EXTENT_DATA_KEY) {
8776 fprintf(stderr, "Couldn't find our ref, search\n");
8779 fi = btrfs_item_ptr(leaf, path->slots[0],
8780 struct btrfs_file_extent_item);
8781 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8782 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8784 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8789 btrfs_release_path(path);
8791 trans = btrfs_start_transaction(root, 1);
8793 return PTR_ERR(trans);
8796 * Ok we have the key of the file extent we want to fix, now we can cow
8797 * down to the thing and fix it.
8799 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8801 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8802 key.objectid, key.type, key.offset, ret);
8806 fprintf(stderr, "Well that's odd, we just found this key "
8807 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8812 leaf = path->nodes[0];
8813 fi = btrfs_item_ptr(leaf, path->slots[0],
8814 struct btrfs_file_extent_item);
8816 if (btrfs_file_extent_compression(leaf, fi) &&
8817 dback->disk_bytenr != entry->bytenr) {
8818 fprintf(stderr, "Ref doesn't match the record start and is "
8819 "compressed, please take a btrfs-image of this file "
8820 "system and send it to a btrfs developer so they can "
8821 "complete this functionality for bytenr %Lu\n",
8822 dback->disk_bytenr);
8827 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8828 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8829 } else if (dback->disk_bytenr > entry->bytenr) {
8830 u64 off_diff, offset;
8832 off_diff = dback->disk_bytenr - entry->bytenr;
8833 offset = btrfs_file_extent_offset(leaf, fi);
8834 if (dback->disk_bytenr + offset +
8835 btrfs_file_extent_num_bytes(leaf, fi) >
8836 entry->bytenr + entry->bytes) {
8837 fprintf(stderr, "Ref is past the entry end, please "
8838 "take a btrfs-image of this file system and "
8839 "send it to a btrfs developer, ref %Lu\n",
8840 dback->disk_bytenr);
8845 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8846 btrfs_set_file_extent_offset(leaf, fi, offset);
8847 } else if (dback->disk_bytenr < entry->bytenr) {
8850 offset = btrfs_file_extent_offset(leaf, fi);
8851 if (dback->disk_bytenr + offset < entry->bytenr) {
8852 fprintf(stderr, "Ref is before the entry start, please"
8853 " take a btrfs-image of this file system and "
8854 "send it to a btrfs developer, ref %Lu\n",
8855 dback->disk_bytenr);
8860 offset += dback->disk_bytenr;
8861 offset -= entry->bytenr;
8862 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8863 btrfs_set_file_extent_offset(leaf, fi, offset);
8866 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8869 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8870 * only do this if we aren't using compression, otherwise it's a
8873 if (!btrfs_file_extent_compression(leaf, fi))
8874 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8876 printf("ram bytes may be wrong?\n");
8877 btrfs_mark_buffer_dirty(leaf);
8879 err = btrfs_commit_transaction(trans, root);
8880 btrfs_release_path(path);
8881 return ret ? ret : err;
8884 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8885 struct extent_record *rec)
8887 struct extent_backref *back, *tmp;
8888 struct data_backref *dback;
8889 struct extent_entry *entry, *best = NULL;
8892 int broken_entries = 0;
8897 * Metadata is easy and the backrefs should always agree on bytenr and
8898 * size, if not we've got bigger issues.
8903 rbtree_postorder_for_each_entry_safe(back, tmp,
8904 &rec->backref_tree, node) {
8905 if (back->full_backref || !back->is_data)
8908 dback = to_data_backref(back);
8911 * We only pay attention to backrefs that we found a real
8914 if (dback->found_ref == 0)
8918 * For now we only catch when the bytes don't match, not the
8919 * bytenr. We can easily do this at the same time, but I want
8920 * to have a fs image to test on before we just add repair
8921 * functionality willy-nilly so we know we won't screw up the
8925 entry = find_entry(&entries, dback->disk_bytenr,
8928 entry = malloc(sizeof(struct extent_entry));
8933 memset(entry, 0, sizeof(*entry));
8934 entry->bytenr = dback->disk_bytenr;
8935 entry->bytes = dback->bytes;
8936 list_add_tail(&entry->list, &entries);
8941 * If we only have on entry we may think the entries agree when
8942 * in reality they don't so we have to do some extra checking.
8944 if (dback->disk_bytenr != rec->start ||
8945 dback->bytes != rec->nr || back->broken)
8956 /* Yay all the backrefs agree, carry on good sir */
8957 if (nr_entries <= 1 && !mismatch)
8960 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8961 "%Lu\n", rec->start);
8964 * First we want to see if the backrefs can agree amongst themselves who
8965 * is right, so figure out which one of the entries has the highest
8968 best = find_most_right_entry(&entries);
8971 * Ok so we may have an even split between what the backrefs think, so
8972 * this is where we use the extent ref to see what it thinks.
8975 entry = find_entry(&entries, rec->start, rec->nr);
8976 if (!entry && (!broken_entries || !rec->found_rec)) {
8977 fprintf(stderr, "Backrefs don't agree with each other "
8978 "and extent record doesn't agree with anybody,"
8979 " so we can't fix bytenr %Lu bytes %Lu\n",
8980 rec->start, rec->nr);
8983 } else if (!entry) {
8985 * Ok our backrefs were broken, we'll assume this is the
8986 * correct value and add an entry for this range.
8988 entry = malloc(sizeof(struct extent_entry));
8993 memset(entry, 0, sizeof(*entry));
8994 entry->bytenr = rec->start;
8995 entry->bytes = rec->nr;
8996 list_add_tail(&entry->list, &entries);
9000 best = find_most_right_entry(&entries);
9002 fprintf(stderr, "Backrefs and extent record evenly "
9003 "split on who is right, this is going to "
9004 "require user input to fix bytenr %Lu bytes "
9005 "%Lu\n", rec->start, rec->nr);
9012 * I don't think this can happen currently as we'll abort() if we catch
9013 * this case higher up, but in case somebody removes that we still can't
9014 * deal with it properly here yet, so just bail out of that's the case.
9016 if (best->bytenr != rec->start) {
9017 fprintf(stderr, "Extent start and backref starts don't match, "
9018 "please use btrfs-image on this file system and send "
9019 "it to a btrfs developer so they can make fsck fix "
9020 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9021 rec->start, rec->nr);
9027 * Ok great we all agreed on an extent record, let's go find the real
9028 * references and fix up the ones that don't match.
9030 rbtree_postorder_for_each_entry_safe(back, tmp,
9031 &rec->backref_tree, node) {
9032 if (back->full_backref || !back->is_data)
9035 dback = to_data_backref(back);
9038 * Still ignoring backrefs that don't have a real ref attached
9041 if (dback->found_ref == 0)
9044 if (dback->bytes == best->bytes &&
9045 dback->disk_bytenr == best->bytenr)
9048 ret = repair_ref(info, path, dback, best);
9054 * Ok we messed with the actual refs, which means we need to drop our
9055 * entire cache and go back and rescan. I know this is a huge pain and
9056 * adds a lot of extra work, but it's the only way to be safe. Once all
9057 * the backrefs agree we may not need to do anything to the extent
9062 while (!list_empty(&entries)) {
9063 entry = list_entry(entries.next, struct extent_entry, list);
9064 list_del_init(&entry->list);
9070 static int process_duplicates(struct cache_tree *extent_cache,
9071 struct extent_record *rec)
9073 struct extent_record *good, *tmp;
9074 struct cache_extent *cache;
9078 * If we found a extent record for this extent then return, or if we
9079 * have more than one duplicate we are likely going to need to delete
9082 if (rec->found_rec || rec->num_duplicates > 1)
9085 /* Shouldn't happen but just in case */
9086 BUG_ON(!rec->num_duplicates);
9089 * So this happens if we end up with a backref that doesn't match the
9090 * actual extent entry. So either the backref is bad or the extent
9091 * entry is bad. Either way we want to have the extent_record actually
9092 * reflect what we found in the extent_tree, so we need to take the
9093 * duplicate out and use that as the extent_record since the only way we
9094 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9096 remove_cache_extent(extent_cache, &rec->cache);
9098 good = to_extent_record(rec->dups.next);
9099 list_del_init(&good->list);
9100 INIT_LIST_HEAD(&good->backrefs);
9101 INIT_LIST_HEAD(&good->dups);
9102 good->cache.start = good->start;
9103 good->cache.size = good->nr;
9104 good->content_checked = 0;
9105 good->owner_ref_checked = 0;
9106 good->num_duplicates = 0;
9107 good->refs = rec->refs;
9108 list_splice_init(&rec->backrefs, &good->backrefs);
9110 cache = lookup_cache_extent(extent_cache, good->start,
9114 tmp = container_of(cache, struct extent_record, cache);
9117 * If we find another overlapping extent and it's found_rec is
9118 * set then it's a duplicate and we need to try and delete
9121 if (tmp->found_rec || tmp->num_duplicates > 0) {
9122 if (list_empty(&good->list))
9123 list_add_tail(&good->list,
9124 &duplicate_extents);
9125 good->num_duplicates += tmp->num_duplicates + 1;
9126 list_splice_init(&tmp->dups, &good->dups);
9127 list_del_init(&tmp->list);
9128 list_add_tail(&tmp->list, &good->dups);
9129 remove_cache_extent(extent_cache, &tmp->cache);
9134 * Ok we have another non extent item backed extent rec, so lets
9135 * just add it to this extent and carry on like we did above.
9137 good->refs += tmp->refs;
9138 list_splice_init(&tmp->backrefs, &good->backrefs);
9139 remove_cache_extent(extent_cache, &tmp->cache);
9142 ret = insert_cache_extent(extent_cache, &good->cache);
9145 return good->num_duplicates ? 0 : 1;
9148 static int delete_duplicate_records(struct btrfs_root *root,
9149 struct extent_record *rec)
9151 struct btrfs_trans_handle *trans;
9152 LIST_HEAD(delete_list);
9153 struct btrfs_path path;
9154 struct extent_record *tmp, *good, *n;
9157 struct btrfs_key key;
9159 btrfs_init_path(&path);
9162 /* Find the record that covers all of the duplicates. */
9163 list_for_each_entry(tmp, &rec->dups, list) {
9164 if (good->start < tmp->start)
9166 if (good->nr > tmp->nr)
9169 if (tmp->start + tmp->nr < good->start + good->nr) {
9170 fprintf(stderr, "Ok we have overlapping extents that "
9171 "aren't completely covered by each other, this "
9172 "is going to require more careful thought. "
9173 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9174 tmp->start, tmp->nr, good->start, good->nr);
9181 list_add_tail(&rec->list, &delete_list);
9183 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9186 list_move_tail(&tmp->list, &delete_list);
9189 root = root->fs_info->extent_root;
9190 trans = btrfs_start_transaction(root, 1);
9191 if (IS_ERR(trans)) {
9192 ret = PTR_ERR(trans);
9196 list_for_each_entry(tmp, &delete_list, list) {
9197 if (tmp->found_rec == 0)
9199 key.objectid = tmp->start;
9200 key.type = BTRFS_EXTENT_ITEM_KEY;
9201 key.offset = tmp->nr;
9203 /* Shouldn't happen but just in case */
9204 if (tmp->metadata) {
9205 fprintf(stderr, "Well this shouldn't happen, extent "
9206 "record overlaps but is metadata? "
9207 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9211 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9217 ret = btrfs_del_item(trans, root, &path);
9220 btrfs_release_path(&path);
9223 err = btrfs_commit_transaction(trans, root);
9227 while (!list_empty(&delete_list)) {
9228 tmp = to_extent_record(delete_list.next);
9229 list_del_init(&tmp->list);
9235 while (!list_empty(&rec->dups)) {
9236 tmp = to_extent_record(rec->dups.next);
9237 list_del_init(&tmp->list);
9241 btrfs_release_path(&path);
9243 if (!ret && !nr_del)
9244 rec->num_duplicates = 0;
9246 return ret ? ret : nr_del;
9249 static int find_possible_backrefs(struct btrfs_fs_info *info,
9250 struct btrfs_path *path,
9251 struct cache_tree *extent_cache,
9252 struct extent_record *rec)
9254 struct btrfs_root *root;
9255 struct extent_backref *back, *tmp;
9256 struct data_backref *dback;
9257 struct cache_extent *cache;
9258 struct btrfs_file_extent_item *fi;
9259 struct btrfs_key key;
9263 rbtree_postorder_for_each_entry_safe(back, tmp,
9264 &rec->backref_tree, node) {
9265 /* Don't care about full backrefs (poor unloved backrefs) */
9266 if (back->full_backref || !back->is_data)
9269 dback = to_data_backref(back);
9271 /* We found this one, we don't need to do a lookup */
9272 if (dback->found_ref)
9275 key.objectid = dback->root;
9276 key.type = BTRFS_ROOT_ITEM_KEY;
9277 key.offset = (u64)-1;
9279 root = btrfs_read_fs_root(info, &key);
9281 /* No root, definitely a bad ref, skip */
9282 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9284 /* Other err, exit */
9286 return PTR_ERR(root);
9288 key.objectid = dback->owner;
9289 key.type = BTRFS_EXTENT_DATA_KEY;
9290 key.offset = dback->offset;
9291 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9293 btrfs_release_path(path);
9296 /* Didn't find it, we can carry on */
9301 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9302 struct btrfs_file_extent_item);
9303 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9304 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9305 btrfs_release_path(path);
9306 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9308 struct extent_record *tmp;
9309 tmp = container_of(cache, struct extent_record, cache);
9312 * If we found an extent record for the bytenr for this
9313 * particular backref then we can't add it to our
9314 * current extent record. We only want to add backrefs
9315 * that don't have a corresponding extent item in the
9316 * extent tree since they likely belong to this record
9317 * and we need to fix it if it doesn't match bytenrs.
9323 dback->found_ref += 1;
9324 dback->disk_bytenr = bytenr;
9325 dback->bytes = bytes;
9328 * Set this so the verify backref code knows not to trust the
9329 * values in this backref.
9338 * Record orphan data ref into corresponding root.
9340 * Return 0 if the extent item contains data ref and recorded.
9341 * Return 1 if the extent item contains no useful data ref
9342 * On that case, it may contains only shared_dataref or metadata backref
9343 * or the file extent exists(this should be handled by the extent bytenr
9345 * Return <0 if something goes wrong.
9347 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9348 struct extent_record *rec)
9350 struct btrfs_key key;
9351 struct btrfs_root *dest_root;
9352 struct extent_backref *back, *tmp;
9353 struct data_backref *dback;
9354 struct orphan_data_extent *orphan;
9355 struct btrfs_path path;
9356 int recorded_data_ref = 0;
9361 btrfs_init_path(&path);
9362 rbtree_postorder_for_each_entry_safe(back, tmp,
9363 &rec->backref_tree, node) {
9364 if (back->full_backref || !back->is_data ||
9365 !back->found_extent_tree)
9367 dback = to_data_backref(back);
9368 if (dback->found_ref)
9370 key.objectid = dback->root;
9371 key.type = BTRFS_ROOT_ITEM_KEY;
9372 key.offset = (u64)-1;
9374 dest_root = btrfs_read_fs_root(fs_info, &key);
9376 /* For non-exist root we just skip it */
9377 if (IS_ERR(dest_root) || !dest_root)
9380 key.objectid = dback->owner;
9381 key.type = BTRFS_EXTENT_DATA_KEY;
9382 key.offset = dback->offset;
9384 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9385 btrfs_release_path(&path);
9387 * For ret < 0, it's OK since the fs-tree may be corrupted,
9388 * we need to record it for inode/file extent rebuild.
9389 * For ret > 0, we record it only for file extent rebuild.
9390 * For ret == 0, the file extent exists but only bytenr
9391 * mismatch, let the original bytenr fix routine to handle,
9397 orphan = malloc(sizeof(*orphan));
9402 INIT_LIST_HEAD(&orphan->list);
9403 orphan->root = dback->root;
9404 orphan->objectid = dback->owner;
9405 orphan->offset = dback->offset;
9406 orphan->disk_bytenr = rec->cache.start;
9407 orphan->disk_len = rec->cache.size;
9408 list_add(&dest_root->orphan_data_extents, &orphan->list);
9409 recorded_data_ref = 1;
9412 btrfs_release_path(&path);
9414 return !recorded_data_ref;
9420 * when an incorrect extent item is found, this will delete
9421 * all of the existing entries for it and recreate them
9422 * based on what the tree scan found.
9424 static int fixup_extent_refs(struct btrfs_fs_info *info,
9425 struct cache_tree *extent_cache,
9426 struct extent_record *rec)
9428 struct btrfs_trans_handle *trans = NULL;
9430 struct btrfs_path path;
9431 struct cache_extent *cache;
9432 struct extent_backref *back, *tmp;
9436 if (rec->flag_block_full_backref)
9437 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9439 btrfs_init_path(&path);
9440 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9442 * Sometimes the backrefs themselves are so broken they don't
9443 * get attached to any meaningful rec, so first go back and
9444 * check any of our backrefs that we couldn't find and throw
9445 * them into the list if we find the backref so that
9446 * verify_backrefs can figure out what to do.
9448 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9453 /* step one, make sure all of the backrefs agree */
9454 ret = verify_backrefs(info, &path, rec);
9458 trans = btrfs_start_transaction(info->extent_root, 1);
9459 if (IS_ERR(trans)) {
9460 ret = PTR_ERR(trans);
9464 /* step two, delete all the existing records */
9465 ret = delete_extent_records(trans, info->extent_root, &path,
9471 /* was this block corrupt? If so, don't add references to it */
9472 cache = lookup_cache_extent(info->corrupt_blocks,
9473 rec->start, rec->max_size);
9479 /* step three, recreate all the refs we did find */
9480 rbtree_postorder_for_each_entry_safe(back, tmp,
9481 &rec->backref_tree, node) {
9483 * if we didn't find any references, don't create a
9486 if (!back->found_ref)
9489 rec->bad_full_backref = 0;
9490 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9498 int err = btrfs_commit_transaction(trans, info->extent_root);
9504 fprintf(stderr, "Repaired extent references for %llu\n",
9505 (unsigned long long)rec->start);
9507 btrfs_release_path(&path);
9511 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9512 struct extent_record *rec)
9514 struct btrfs_trans_handle *trans;
9515 struct btrfs_root *root = fs_info->extent_root;
9516 struct btrfs_path path;
9517 struct btrfs_extent_item *ei;
9518 struct btrfs_key key;
9522 key.objectid = rec->start;
9523 if (rec->metadata) {
9524 key.type = BTRFS_METADATA_ITEM_KEY;
9525 key.offset = rec->info_level;
9527 key.type = BTRFS_EXTENT_ITEM_KEY;
9528 key.offset = rec->max_size;
9531 trans = btrfs_start_transaction(root, 0);
9533 return PTR_ERR(trans);
9535 btrfs_init_path(&path);
9536 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9538 btrfs_release_path(&path);
9539 btrfs_commit_transaction(trans, root);
9542 fprintf(stderr, "Didn't find extent for %llu\n",
9543 (unsigned long long)rec->start);
9544 btrfs_release_path(&path);
9545 btrfs_commit_transaction(trans, root);
9549 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9550 struct btrfs_extent_item);
9551 flags = btrfs_extent_flags(path.nodes[0], ei);
9552 if (rec->flag_block_full_backref) {
9553 fprintf(stderr, "setting full backref on %llu\n",
9554 (unsigned long long)key.objectid);
9555 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9557 fprintf(stderr, "clearing full backref on %llu\n",
9558 (unsigned long long)key.objectid);
9559 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9561 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9562 btrfs_mark_buffer_dirty(path.nodes[0]);
9563 btrfs_release_path(&path);
9564 ret = btrfs_commit_transaction(trans, root);
9566 fprintf(stderr, "Repaired extent flags for %llu\n",
9567 (unsigned long long)rec->start);
9572 /* right now we only prune from the extent allocation tree */
9573 static int prune_one_block(struct btrfs_trans_handle *trans,
9574 struct btrfs_fs_info *info,
9575 struct btrfs_corrupt_block *corrupt)
9578 struct btrfs_path path;
9579 struct extent_buffer *eb;
9583 int level = corrupt->level + 1;
9585 btrfs_init_path(&path);
9587 /* we want to stop at the parent to our busted block */
9588 path.lowest_level = level;
9590 ret = btrfs_search_slot(trans, info->extent_root,
9591 &corrupt->key, &path, -1, 1);
9596 eb = path.nodes[level];
9603 * hopefully the search gave us the block we want to prune,
9604 * lets try that first
9606 slot = path.slots[level];
9607 found = btrfs_node_blockptr(eb, slot);
9608 if (found == corrupt->cache.start)
9611 nritems = btrfs_header_nritems(eb);
9613 /* the search failed, lets scan this node and hope we find it */
9614 for (slot = 0; slot < nritems; slot++) {
9615 found = btrfs_node_blockptr(eb, slot);
9616 if (found == corrupt->cache.start)
9620 * we couldn't find the bad block. TODO, search all the nodes for pointers
9623 if (eb == info->extent_root->node) {
9628 btrfs_release_path(&path);
9633 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9634 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9637 btrfs_release_path(&path);
9641 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9643 struct btrfs_trans_handle *trans = NULL;
9644 struct cache_extent *cache;
9645 struct btrfs_corrupt_block *corrupt;
9648 cache = search_cache_extent(info->corrupt_blocks, 0);
9652 trans = btrfs_start_transaction(info->extent_root, 1);
9654 return PTR_ERR(trans);
9656 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9657 prune_one_block(trans, info, corrupt);
9658 remove_cache_extent(info->corrupt_blocks, cache);
9661 return btrfs_commit_transaction(trans, info->extent_root);
9665 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9667 struct btrfs_block_group_cache *cache;
9672 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9673 &start, &end, EXTENT_DIRTY);
9676 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9681 cache = btrfs_lookup_first_block_group(fs_info, start);
9686 start = cache->key.objectid + cache->key.offset;
9690 static int check_extent_refs(struct btrfs_root *root,
9691 struct cache_tree *extent_cache)
9693 struct extent_record *rec;
9694 struct cache_extent *cache;
9700 * if we're doing a repair, we have to make sure
9701 * we don't allocate from the problem extents.
9702 * In the worst case, this will be all the
9705 cache = search_cache_extent(extent_cache, 0);
9707 rec = container_of(cache, struct extent_record, cache);
9708 set_extent_dirty(root->fs_info->excluded_extents,
9710 rec->start + rec->max_size - 1);
9711 cache = next_cache_extent(cache);
9714 /* pin down all the corrupted blocks too */
9715 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9717 set_extent_dirty(root->fs_info->excluded_extents,
9719 cache->start + cache->size - 1);
9720 cache = next_cache_extent(cache);
9722 prune_corrupt_blocks(root->fs_info);
9723 reset_cached_block_groups(root->fs_info);
9726 reset_cached_block_groups(root->fs_info);
9729 * We need to delete any duplicate entries we find first otherwise we
9730 * could mess up the extent tree when we have backrefs that actually
9731 * belong to a different extent item and not the weird duplicate one.
9733 while (repair && !list_empty(&duplicate_extents)) {
9734 rec = to_extent_record(duplicate_extents.next);
9735 list_del_init(&rec->list);
9737 /* Sometimes we can find a backref before we find an actual
9738 * extent, so we need to process it a little bit to see if there
9739 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9740 * if this is a backref screwup. If we need to delete stuff
9741 * process_duplicates() will return 0, otherwise it will return
9744 if (process_duplicates(extent_cache, rec))
9746 ret = delete_duplicate_records(root, rec);
9750 * delete_duplicate_records will return the number of entries
9751 * deleted, so if it's greater than 0 then we know we actually
9752 * did something and we need to remove.
9765 cache = search_cache_extent(extent_cache, 0);
9768 rec = container_of(cache, struct extent_record, cache);
9769 if (rec->num_duplicates) {
9770 fprintf(stderr, "extent item %llu has multiple extent "
9771 "items\n", (unsigned long long)rec->start);
9775 if (rec->refs != rec->extent_item_refs) {
9776 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9777 (unsigned long long)rec->start,
9778 (unsigned long long)rec->nr);
9779 fprintf(stderr, "extent item %llu, found %llu\n",
9780 (unsigned long long)rec->extent_item_refs,
9781 (unsigned long long)rec->refs);
9782 ret = record_orphan_data_extents(root->fs_info, rec);
9788 if (all_backpointers_checked(rec, 1)) {
9789 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9790 (unsigned long long)rec->start,
9791 (unsigned long long)rec->nr);
9795 if (!rec->owner_ref_checked) {
9796 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9797 (unsigned long long)rec->start,
9798 (unsigned long long)rec->nr);
9803 if (repair && fix) {
9804 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9810 if (rec->bad_full_backref) {
9811 fprintf(stderr, "bad full backref, on [%llu]\n",
9812 (unsigned long long)rec->start);
9814 ret = fixup_extent_flags(root->fs_info, rec);
9822 * Although it's not a extent ref's problem, we reuse this
9823 * routine for error reporting.
9824 * No repair function yet.
9826 if (rec->crossing_stripes) {
9828 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9829 rec->start, rec->start + rec->max_size);
9833 if (rec->wrong_chunk_type) {
9835 "bad extent [%llu, %llu), type mismatch with chunk\n",
9836 rec->start, rec->start + rec->max_size);
9840 remove_cache_extent(extent_cache, cache);
9841 free_all_extent_backrefs(rec);
9842 if (!init_extent_tree && repair && (!cur_err || fix))
9843 clear_extent_dirty(root->fs_info->excluded_extents,
9845 rec->start + rec->max_size - 1);
9850 if (ret && ret != -EAGAIN) {
9851 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9854 struct btrfs_trans_handle *trans;
9856 root = root->fs_info->extent_root;
9857 trans = btrfs_start_transaction(root, 1);
9858 if (IS_ERR(trans)) {
9859 ret = PTR_ERR(trans);
9863 ret = btrfs_fix_block_accounting(trans, root);
9866 ret = btrfs_commit_transaction(trans, root);
9875 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9879 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9880 stripe_size = length;
9881 stripe_size /= num_stripes;
9882 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9883 stripe_size = length * 2;
9884 stripe_size /= num_stripes;
9885 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9886 stripe_size = length;
9887 stripe_size /= (num_stripes - 1);
9888 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9889 stripe_size = length;
9890 stripe_size /= (num_stripes - 2);
9892 stripe_size = length;
9898 * Check the chunk with its block group/dev list ref:
9899 * Return 0 if all refs seems valid.
9900 * Return 1 if part of refs seems valid, need later check for rebuild ref
9901 * like missing block group and needs to search extent tree to rebuild them.
9902 * Return -1 if essential refs are missing and unable to rebuild.
9904 static int check_chunk_refs(struct chunk_record *chunk_rec,
9905 struct block_group_tree *block_group_cache,
9906 struct device_extent_tree *dev_extent_cache,
9909 struct cache_extent *block_group_item;
9910 struct block_group_record *block_group_rec;
9911 struct cache_extent *dev_extent_item;
9912 struct device_extent_record *dev_extent_rec;
9916 int metadump_v2 = 0;
9920 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9923 if (block_group_item) {
9924 block_group_rec = container_of(block_group_item,
9925 struct block_group_record,
9927 if (chunk_rec->length != block_group_rec->offset ||
9928 chunk_rec->offset != block_group_rec->objectid ||
9930 chunk_rec->type_flags != block_group_rec->flags)) {
9933 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9934 chunk_rec->objectid,
9939 chunk_rec->type_flags,
9940 block_group_rec->objectid,
9941 block_group_rec->type,
9942 block_group_rec->offset,
9943 block_group_rec->offset,
9944 block_group_rec->objectid,
9945 block_group_rec->flags);
9948 list_del_init(&block_group_rec->list);
9949 chunk_rec->bg_rec = block_group_rec;
9954 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9955 chunk_rec->objectid,
9960 chunk_rec->type_flags);
9967 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9968 chunk_rec->num_stripes);
9969 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9970 devid = chunk_rec->stripes[i].devid;
9971 offset = chunk_rec->stripes[i].offset;
9972 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9973 devid, offset, length);
9974 if (dev_extent_item) {
9975 dev_extent_rec = container_of(dev_extent_item,
9976 struct device_extent_record,
9978 if (dev_extent_rec->objectid != devid ||
9979 dev_extent_rec->offset != offset ||
9980 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9981 dev_extent_rec->length != length) {
9984 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9985 chunk_rec->objectid,
9988 chunk_rec->stripes[i].devid,
9989 chunk_rec->stripes[i].offset,
9990 dev_extent_rec->objectid,
9991 dev_extent_rec->offset,
9992 dev_extent_rec->length);
9995 list_move(&dev_extent_rec->chunk_list,
9996 &chunk_rec->dextents);
10001 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10002 chunk_rec->objectid,
10005 chunk_rec->stripes[i].devid,
10006 chunk_rec->stripes[i].offset);
10013 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10014 int check_chunks(struct cache_tree *chunk_cache,
10015 struct block_group_tree *block_group_cache,
10016 struct device_extent_tree *dev_extent_cache,
10017 struct list_head *good, struct list_head *bad,
10018 struct list_head *rebuild, int silent)
10020 struct cache_extent *chunk_item;
10021 struct chunk_record *chunk_rec;
10022 struct block_group_record *bg_rec;
10023 struct device_extent_record *dext_rec;
10027 chunk_item = first_cache_extent(chunk_cache);
10028 while (chunk_item) {
10029 chunk_rec = container_of(chunk_item, struct chunk_record,
10031 err = check_chunk_refs(chunk_rec, block_group_cache,
10032 dev_extent_cache, silent);
10035 if (err == 0 && good)
10036 list_add_tail(&chunk_rec->list, good);
10037 if (err > 0 && rebuild)
10038 list_add_tail(&chunk_rec->list, rebuild);
10039 if (err < 0 && bad)
10040 list_add_tail(&chunk_rec->list, bad);
10041 chunk_item = next_cache_extent(chunk_item);
10044 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10047 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10055 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10059 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10060 dext_rec->objectid,
10070 static int check_device_used(struct device_record *dev_rec,
10071 struct device_extent_tree *dext_cache)
10073 struct cache_extent *cache;
10074 struct device_extent_record *dev_extent_rec;
10075 u64 total_byte = 0;
10077 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10079 dev_extent_rec = container_of(cache,
10080 struct device_extent_record,
10082 if (dev_extent_rec->objectid != dev_rec->devid)
10085 list_del_init(&dev_extent_rec->device_list);
10086 total_byte += dev_extent_rec->length;
10087 cache = next_cache_extent(cache);
10090 if (total_byte != dev_rec->byte_used) {
10092 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10093 total_byte, dev_rec->byte_used, dev_rec->objectid,
10094 dev_rec->type, dev_rec->offset);
10101 /* check btrfs_dev_item -> btrfs_dev_extent */
10102 static int check_devices(struct rb_root *dev_cache,
10103 struct device_extent_tree *dev_extent_cache)
10105 struct rb_node *dev_node;
10106 struct device_record *dev_rec;
10107 struct device_extent_record *dext_rec;
10111 dev_node = rb_first(dev_cache);
10113 dev_rec = container_of(dev_node, struct device_record, node);
10114 err = check_device_used(dev_rec, dev_extent_cache);
10118 dev_node = rb_next(dev_node);
10120 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10123 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10124 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10131 static int add_root_item_to_list(struct list_head *head,
10132 u64 objectid, u64 bytenr, u64 last_snapshot,
10133 u8 level, u8 drop_level,
10134 struct btrfs_key *drop_key)
10137 struct root_item_record *ri_rec;
10138 ri_rec = malloc(sizeof(*ri_rec));
10141 ri_rec->bytenr = bytenr;
10142 ri_rec->objectid = objectid;
10143 ri_rec->level = level;
10144 ri_rec->drop_level = drop_level;
10145 ri_rec->last_snapshot = last_snapshot;
10147 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10148 list_add_tail(&ri_rec->list, head);
10153 static void free_root_item_list(struct list_head *list)
10155 struct root_item_record *ri_rec;
10157 while (!list_empty(list)) {
10158 ri_rec = list_first_entry(list, struct root_item_record,
10160 list_del_init(&ri_rec->list);
10165 static int deal_root_from_list(struct list_head *list,
10166 struct btrfs_root *root,
10167 struct block_info *bits,
10169 struct cache_tree *pending,
10170 struct cache_tree *seen,
10171 struct cache_tree *reada,
10172 struct cache_tree *nodes,
10173 struct cache_tree *extent_cache,
10174 struct cache_tree *chunk_cache,
10175 struct rb_root *dev_cache,
10176 struct block_group_tree *block_group_cache,
10177 struct device_extent_tree *dev_extent_cache)
10182 while (!list_empty(list)) {
10183 struct root_item_record *rec;
10184 struct extent_buffer *buf;
10185 rec = list_entry(list->next,
10186 struct root_item_record, list);
10188 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10189 if (!extent_buffer_uptodate(buf)) {
10190 free_extent_buffer(buf);
10194 ret = add_root_to_pending(buf, extent_cache, pending,
10195 seen, nodes, rec->objectid);
10199 * To rebuild extent tree, we need deal with snapshot
10200 * one by one, otherwise we deal with node firstly which
10201 * can maximize readahead.
10204 ret = run_next_block(root, bits, bits_nr, &last,
10205 pending, seen, reada, nodes,
10206 extent_cache, chunk_cache,
10207 dev_cache, block_group_cache,
10208 dev_extent_cache, rec);
10212 free_extent_buffer(buf);
10213 list_del(&rec->list);
10219 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10220 reada, nodes, extent_cache, chunk_cache,
10221 dev_cache, block_group_cache,
10222 dev_extent_cache, NULL);
10232 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10234 struct rb_root dev_cache;
10235 struct cache_tree chunk_cache;
10236 struct block_group_tree block_group_cache;
10237 struct device_extent_tree dev_extent_cache;
10238 struct cache_tree extent_cache;
10239 struct cache_tree seen;
10240 struct cache_tree pending;
10241 struct cache_tree reada;
10242 struct cache_tree nodes;
10243 struct extent_io_tree excluded_extents;
10244 struct cache_tree corrupt_blocks;
10245 struct btrfs_path path;
10246 struct btrfs_key key;
10247 struct btrfs_key found_key;
10249 struct block_info *bits;
10251 struct extent_buffer *leaf;
10253 struct btrfs_root_item ri;
10254 struct list_head dropping_trees;
10255 struct list_head normal_trees;
10256 struct btrfs_root *root1;
10257 struct btrfs_root *root;
10261 root = fs_info->fs_root;
10262 dev_cache = RB_ROOT;
10263 cache_tree_init(&chunk_cache);
10264 block_group_tree_init(&block_group_cache);
10265 device_extent_tree_init(&dev_extent_cache);
10267 cache_tree_init(&extent_cache);
10268 cache_tree_init(&seen);
10269 cache_tree_init(&pending);
10270 cache_tree_init(&nodes);
10271 cache_tree_init(&reada);
10272 cache_tree_init(&corrupt_blocks);
10273 extent_io_tree_init(&excluded_extents);
10274 INIT_LIST_HEAD(&dropping_trees);
10275 INIT_LIST_HEAD(&normal_trees);
10278 fs_info->excluded_extents = &excluded_extents;
10279 fs_info->fsck_extent_cache = &extent_cache;
10280 fs_info->free_extent_hook = free_extent_hook;
10281 fs_info->corrupt_blocks = &corrupt_blocks;
10285 bits = malloc(bits_nr * sizeof(struct block_info));
10291 if (ctx.progress_enabled) {
10292 ctx.tp = TASK_EXTENTS;
10293 task_start(ctx.info);
10297 root1 = fs_info->tree_root;
10298 level = btrfs_header_level(root1->node);
10299 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10300 root1->node->start, 0, level, 0, NULL);
10303 root1 = fs_info->chunk_root;
10304 level = btrfs_header_level(root1->node);
10305 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10306 root1->node->start, 0, level, 0, NULL);
10309 btrfs_init_path(&path);
10312 key.type = BTRFS_ROOT_ITEM_KEY;
10313 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10317 leaf = path.nodes[0];
10318 slot = path.slots[0];
10319 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10320 ret = btrfs_next_leaf(root, &path);
10323 leaf = path.nodes[0];
10324 slot = path.slots[0];
10326 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10327 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10328 unsigned long offset;
10331 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10332 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10333 last_snapshot = btrfs_root_last_snapshot(&ri);
10334 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10335 level = btrfs_root_level(&ri);
10336 ret = add_root_item_to_list(&normal_trees,
10337 found_key.objectid,
10338 btrfs_root_bytenr(&ri),
10339 last_snapshot, level,
10344 level = btrfs_root_level(&ri);
10345 objectid = found_key.objectid;
10346 btrfs_disk_key_to_cpu(&found_key,
10347 &ri.drop_progress);
10348 ret = add_root_item_to_list(&dropping_trees,
10350 btrfs_root_bytenr(&ri),
10351 last_snapshot, level,
10352 ri.drop_level, &found_key);
10359 btrfs_release_path(&path);
10362 * check_block can return -EAGAIN if it fixes something, please keep
10363 * this in mind when dealing with return values from these functions, if
10364 * we get -EAGAIN we want to fall through and restart the loop.
10366 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10367 &seen, &reada, &nodes, &extent_cache,
10368 &chunk_cache, &dev_cache, &block_group_cache,
10369 &dev_extent_cache);
10371 if (ret == -EAGAIN)
10375 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10376 &pending, &seen, &reada, &nodes,
10377 &extent_cache, &chunk_cache, &dev_cache,
10378 &block_group_cache, &dev_extent_cache);
10380 if (ret == -EAGAIN)
10385 ret = check_chunks(&chunk_cache, &block_group_cache,
10386 &dev_extent_cache, NULL, NULL, NULL, 0);
10388 if (ret == -EAGAIN)
10393 ret = check_extent_refs(root, &extent_cache);
10395 if (ret == -EAGAIN)
10400 ret = check_devices(&dev_cache, &dev_extent_cache);
10405 task_stop(ctx.info);
10407 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10408 extent_io_tree_cleanup(&excluded_extents);
10409 fs_info->fsck_extent_cache = NULL;
10410 fs_info->free_extent_hook = NULL;
10411 fs_info->corrupt_blocks = NULL;
10412 fs_info->excluded_extents = NULL;
10415 free_chunk_cache_tree(&chunk_cache);
10416 free_device_cache_tree(&dev_cache);
10417 free_block_group_tree(&block_group_cache);
10418 free_device_extent_tree(&dev_extent_cache);
10419 free_extent_cache_tree(&seen);
10420 free_extent_cache_tree(&pending);
10421 free_extent_cache_tree(&reada);
10422 free_extent_cache_tree(&nodes);
10423 free_root_item_list(&normal_trees);
10424 free_root_item_list(&dropping_trees);
10427 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10428 free_extent_cache_tree(&seen);
10429 free_extent_cache_tree(&pending);
10430 free_extent_cache_tree(&reada);
10431 free_extent_cache_tree(&nodes);
10432 free_chunk_cache_tree(&chunk_cache);
10433 free_block_group_tree(&block_group_cache);
10434 free_device_cache_tree(&dev_cache);
10435 free_device_extent_tree(&dev_extent_cache);
10436 free_extent_record_cache(&extent_cache);
10437 free_root_item_list(&normal_trees);
10438 free_root_item_list(&dropping_trees);
10439 extent_io_tree_cleanup(&excluded_extents);
10444 * Check backrefs of a tree block given by @bytenr or @eb.
10446 * @root: the root containing the @bytenr or @eb
10447 * @eb: tree block extent buffer, can be NULL
10448 * @bytenr: bytenr of the tree block to search
10449 * @level: tree level of the tree block
10450 * @owner: owner of the tree block
10452 * Return >0 for any error found and output error message
10453 * Return 0 for no error found
10455 static int check_tree_block_ref(struct btrfs_root *root,
10456 struct extent_buffer *eb, u64 bytenr,
10457 int level, u64 owner)
10459 struct btrfs_key key;
10460 struct btrfs_root *extent_root = root->fs_info->extent_root;
10461 struct btrfs_path path;
10462 struct btrfs_extent_item *ei;
10463 struct btrfs_extent_inline_ref *iref;
10464 struct extent_buffer *leaf;
10470 u32 nodesize = root->fs_info->nodesize;
10473 int tree_reloc_root = 0;
10478 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10479 btrfs_header_bytenr(root->node) == bytenr)
10480 tree_reloc_root = 1;
10482 btrfs_init_path(&path);
10483 key.objectid = bytenr;
10484 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10485 key.type = BTRFS_METADATA_ITEM_KEY;
10487 key.type = BTRFS_EXTENT_ITEM_KEY;
10488 key.offset = (u64)-1;
10490 /* Search for the backref in extent tree */
10491 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10493 err |= BACKREF_MISSING;
10496 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10498 err |= BACKREF_MISSING;
10502 leaf = path.nodes[0];
10503 slot = path.slots[0];
10504 btrfs_item_key_to_cpu(leaf, &key, slot);
10506 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10508 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10509 skinny_level = (int)key.offset;
10510 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10512 struct btrfs_tree_block_info *info;
10514 info = (struct btrfs_tree_block_info *)(ei + 1);
10515 skinny_level = btrfs_tree_block_level(leaf, info);
10516 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10523 if (!(btrfs_extent_flags(leaf, ei) &
10524 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10526 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10527 key.objectid, nodesize,
10528 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10529 err = BACKREF_MISMATCH;
10531 header_gen = btrfs_header_generation(eb);
10532 extent_gen = btrfs_extent_generation(leaf, ei);
10533 if (header_gen != extent_gen) {
10535 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10536 key.objectid, nodesize, header_gen,
10538 err = BACKREF_MISMATCH;
10540 if (level != skinny_level) {
10542 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10543 key.objectid, nodesize, level, skinny_level);
10544 err = BACKREF_MISMATCH;
10546 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10548 "extent[%llu %u] is referred by other roots than %llu",
10549 key.objectid, nodesize, root->objectid);
10550 err = BACKREF_MISMATCH;
10555 * Iterate the extent/metadata item to find the exact backref
10557 item_size = btrfs_item_size_nr(leaf, slot);
10558 ptr = (unsigned long)iref;
10559 end = (unsigned long)ei + item_size;
10560 while (ptr < end) {
10561 iref = (struct btrfs_extent_inline_ref *)ptr;
10562 type = btrfs_extent_inline_ref_type(leaf, iref);
10563 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10565 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10566 (offset == root->objectid || offset == owner)) {
10568 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10570 * Backref of tree reloc root points to itself, no need
10571 * to check backref any more.
10573 if (tree_reloc_root)
10576 /* Check if the backref points to valid referencer */
10577 found_ref = !check_tree_block_ref(root, NULL,
10578 offset, level + 1, owner);
10583 ptr += btrfs_extent_inline_ref_size(type);
10587 * Inlined extent item doesn't have what we need, check
10588 * TREE_BLOCK_REF_KEY
10591 btrfs_release_path(&path);
10592 key.objectid = bytenr;
10593 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10594 key.offset = root->objectid;
10596 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10601 err |= BACKREF_MISSING;
10603 btrfs_release_path(&path);
10604 if (eb && (err & BACKREF_MISSING))
10605 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10606 bytenr, nodesize, owner, level);
10611 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10613 * Return >0 any error found and output error message
10614 * Return 0 for no error found
10616 static int check_extent_data_item(struct btrfs_root *root,
10617 struct extent_buffer *eb, int slot)
10619 struct btrfs_file_extent_item *fi;
10620 struct btrfs_path path;
10621 struct btrfs_root *extent_root = root->fs_info->extent_root;
10622 struct btrfs_key fi_key;
10623 struct btrfs_key dbref_key;
10624 struct extent_buffer *leaf;
10625 struct btrfs_extent_item *ei;
10626 struct btrfs_extent_inline_ref *iref;
10627 struct btrfs_extent_data_ref *dref;
10630 u64 disk_num_bytes;
10631 u64 extent_num_bytes;
10638 int found_dbackref = 0;
10642 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10643 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10645 /* Nothing to check for hole and inline data extents */
10646 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10647 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10650 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10651 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10652 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10654 /* Check unaligned disk_num_bytes and num_bytes */
10655 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10657 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10658 fi_key.objectid, fi_key.offset, disk_num_bytes,
10659 root->fs_info->sectorsize);
10660 err |= BYTES_UNALIGNED;
10662 data_bytes_allocated += disk_num_bytes;
10664 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10666 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10667 fi_key.objectid, fi_key.offset, extent_num_bytes,
10668 root->fs_info->sectorsize);
10669 err |= BYTES_UNALIGNED;
10671 data_bytes_referenced += extent_num_bytes;
10673 owner = btrfs_header_owner(eb);
10675 /* Check the extent item of the file extent in extent tree */
10676 btrfs_init_path(&path);
10677 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10678 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10679 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10681 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10685 leaf = path.nodes[0];
10686 slot = path.slots[0];
10687 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10689 extent_flags = btrfs_extent_flags(leaf, ei);
10691 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10693 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10694 disk_bytenr, disk_num_bytes,
10695 BTRFS_EXTENT_FLAG_DATA);
10696 err |= BACKREF_MISMATCH;
10699 /* Check data backref inside that extent item */
10700 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10701 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10702 ptr = (unsigned long)iref;
10703 end = (unsigned long)ei + item_size;
10704 while (ptr < end) {
10705 iref = (struct btrfs_extent_inline_ref *)ptr;
10706 type = btrfs_extent_inline_ref_type(leaf, iref);
10707 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10709 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10710 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10711 if (ref_root == owner || ref_root == root->objectid)
10712 found_dbackref = 1;
10713 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10714 found_dbackref = !check_tree_block_ref(root, NULL,
10715 btrfs_extent_inline_ref_offset(leaf, iref),
10719 if (found_dbackref)
10721 ptr += btrfs_extent_inline_ref_size(type);
10724 if (!found_dbackref) {
10725 btrfs_release_path(&path);
10727 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10728 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10729 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10730 dbref_key.offset = hash_extent_data_ref(root->objectid,
10731 fi_key.objectid, fi_key.offset);
10733 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10734 &dbref_key, &path, 0, 0);
10736 found_dbackref = 1;
10740 btrfs_release_path(&path);
10743 * Neither inlined nor EXTENT_DATA_REF found, try
10744 * SHARED_DATA_REF as last chance.
10746 dbref_key.objectid = disk_bytenr;
10747 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10748 dbref_key.offset = eb->start;
10750 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10751 &dbref_key, &path, 0, 0);
10753 found_dbackref = 1;
10759 if (!found_dbackref)
10760 err |= BACKREF_MISSING;
10761 btrfs_release_path(&path);
10762 if (err & BACKREF_MISSING) {
10763 error("data extent[%llu %llu] backref lost",
10764 disk_bytenr, disk_num_bytes);
10770 * Get real tree block level for the case like shared block
10771 * Return >= 0 as tree level
10772 * Return <0 for error
10774 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10776 struct extent_buffer *eb;
10777 struct btrfs_path path;
10778 struct btrfs_key key;
10779 struct btrfs_extent_item *ei;
10786 /* Search extent tree for extent generation and level */
10787 key.objectid = bytenr;
10788 key.type = BTRFS_METADATA_ITEM_KEY;
10789 key.offset = (u64)-1;
10791 btrfs_init_path(&path);
10792 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10795 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10803 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10804 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10805 struct btrfs_extent_item);
10806 flags = btrfs_extent_flags(path.nodes[0], ei);
10807 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10812 /* Get transid for later read_tree_block() check */
10813 transid = btrfs_extent_generation(path.nodes[0], ei);
10815 /* Get backref level as one source */
10816 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10817 backref_level = key.offset;
10819 struct btrfs_tree_block_info *info;
10821 info = (struct btrfs_tree_block_info *)(ei + 1);
10822 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10824 btrfs_release_path(&path);
10826 /* Get level from tree block as an alternative source */
10827 eb = read_tree_block(fs_info, bytenr, transid);
10828 if (!extent_buffer_uptodate(eb)) {
10829 free_extent_buffer(eb);
10832 header_level = btrfs_header_level(eb);
10833 free_extent_buffer(eb);
10835 if (header_level != backref_level)
10837 return header_level;
10840 btrfs_release_path(&path);
10845 * Check if a tree block backref is valid (points to a valid tree block)
10846 * if level == -1, level will be resolved
10847 * Return >0 for any error found and print error message
10849 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10850 u64 bytenr, int level)
10852 struct btrfs_root *root;
10853 struct btrfs_key key;
10854 struct btrfs_path path;
10855 struct extent_buffer *eb;
10856 struct extent_buffer *node;
10857 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10861 /* Query level for level == -1 special case */
10863 level = query_tree_block_level(fs_info, bytenr);
10865 err |= REFERENCER_MISSING;
10869 key.objectid = root_id;
10870 key.type = BTRFS_ROOT_ITEM_KEY;
10871 key.offset = (u64)-1;
10873 root = btrfs_read_fs_root(fs_info, &key);
10874 if (IS_ERR(root)) {
10875 err |= REFERENCER_MISSING;
10879 /* Read out the tree block to get item/node key */
10880 eb = read_tree_block(fs_info, bytenr, 0);
10881 if (!extent_buffer_uptodate(eb)) {
10882 err |= REFERENCER_MISSING;
10883 free_extent_buffer(eb);
10887 /* Empty tree, no need to check key */
10888 if (!btrfs_header_nritems(eb) && !level) {
10889 free_extent_buffer(eb);
10894 btrfs_node_key_to_cpu(eb, &key, 0);
10896 btrfs_item_key_to_cpu(eb, &key, 0);
10898 free_extent_buffer(eb);
10900 btrfs_init_path(&path);
10901 path.lowest_level = level;
10902 /* Search with the first key, to ensure we can reach it */
10903 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10905 err |= REFERENCER_MISSING;
10909 node = path.nodes[level];
10910 if (btrfs_header_bytenr(node) != bytenr) {
10912 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10913 bytenr, nodesize, bytenr,
10914 btrfs_header_bytenr(node));
10915 err |= REFERENCER_MISMATCH;
10917 if (btrfs_header_level(node) != level) {
10919 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10920 bytenr, nodesize, level,
10921 btrfs_header_level(node));
10922 err |= REFERENCER_MISMATCH;
10926 btrfs_release_path(&path);
10928 if (err & REFERENCER_MISSING) {
10930 error("extent [%llu %d] lost referencer (owner: %llu)",
10931 bytenr, nodesize, root_id);
10934 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10935 bytenr, nodesize, root_id, level);
10942 * Check if tree block @eb is tree reloc root.
10943 * Return 0 if it's not or any problem happens
10944 * Return 1 if it's a tree reloc root
10946 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10947 struct extent_buffer *eb)
10949 struct btrfs_root *tree_reloc_root;
10950 struct btrfs_key key;
10951 u64 bytenr = btrfs_header_bytenr(eb);
10952 u64 owner = btrfs_header_owner(eb);
10955 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10956 key.offset = owner;
10957 key.type = BTRFS_ROOT_ITEM_KEY;
10959 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10960 if (IS_ERR(tree_reloc_root))
10963 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10965 btrfs_free_fs_root(tree_reloc_root);
10970 * Check referencer for shared block backref
10971 * If level == -1, this function will resolve the level.
10973 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10974 u64 parent, u64 bytenr, int level)
10976 struct extent_buffer *eb;
10978 int found_parent = 0;
10981 eb = read_tree_block(fs_info, parent, 0);
10982 if (!extent_buffer_uptodate(eb))
10986 level = query_tree_block_level(fs_info, bytenr);
10990 /* It's possible it's a tree reloc root */
10991 if (parent == bytenr) {
10992 if (is_tree_reloc_root(fs_info, eb))
10997 if (level + 1 != btrfs_header_level(eb))
11000 nr = btrfs_header_nritems(eb);
11001 for (i = 0; i < nr; i++) {
11002 if (bytenr == btrfs_node_blockptr(eb, i)) {
11008 free_extent_buffer(eb);
11009 if (!found_parent) {
11011 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11012 bytenr, fs_info->nodesize, parent, level);
11013 return REFERENCER_MISSING;
11019 * Check referencer for normal (inlined) data ref
11020 * If len == 0, it will be resolved by searching in extent tree
11022 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11023 u64 root_id, u64 objectid, u64 offset,
11024 u64 bytenr, u64 len, u32 count)
11026 struct btrfs_root *root;
11027 struct btrfs_root *extent_root = fs_info->extent_root;
11028 struct btrfs_key key;
11029 struct btrfs_path path;
11030 struct extent_buffer *leaf;
11031 struct btrfs_file_extent_item *fi;
11032 u32 found_count = 0;
11037 key.objectid = bytenr;
11038 key.type = BTRFS_EXTENT_ITEM_KEY;
11039 key.offset = (u64)-1;
11041 btrfs_init_path(&path);
11042 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11045 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11048 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11049 if (key.objectid != bytenr ||
11050 key.type != BTRFS_EXTENT_ITEM_KEY)
11053 btrfs_release_path(&path);
11055 key.objectid = root_id;
11056 key.type = BTRFS_ROOT_ITEM_KEY;
11057 key.offset = (u64)-1;
11058 btrfs_init_path(&path);
11060 root = btrfs_read_fs_root(fs_info, &key);
11064 key.objectid = objectid;
11065 key.type = BTRFS_EXTENT_DATA_KEY;
11067 * It can be nasty as data backref offset is
11068 * file offset - file extent offset, which is smaller or
11069 * equal to original backref offset. The only special case is
11070 * overflow. So we need to special check and do further search.
11072 key.offset = offset & (1ULL << 63) ? 0 : offset;
11074 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11079 * Search afterwards to get correct one
11080 * NOTE: As we must do a comprehensive check on the data backref to
11081 * make sure the dref count also matches, we must iterate all file
11082 * extents for that inode.
11085 leaf = path.nodes[0];
11086 slot = path.slots[0];
11088 if (slot >= btrfs_header_nritems(leaf))
11090 btrfs_item_key_to_cpu(leaf, &key, slot);
11091 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11093 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11095 * Except normal disk bytenr and disk num bytes, we still
11096 * need to do extra check on dbackref offset as
11097 * dbackref offset = file_offset - file_extent_offset
11099 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11100 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11101 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11106 ret = btrfs_next_item(root, &path);
11111 btrfs_release_path(&path);
11112 if (found_count != count) {
11114 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11115 bytenr, len, root_id, objectid, offset, count, found_count);
11116 return REFERENCER_MISSING;
11122 * Check if the referencer of a shared data backref exists
11124 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11125 u64 parent, u64 bytenr)
11127 struct extent_buffer *eb;
11128 struct btrfs_key key;
11129 struct btrfs_file_extent_item *fi;
11131 int found_parent = 0;
11134 eb = read_tree_block(fs_info, parent, 0);
11135 if (!extent_buffer_uptodate(eb))
11138 nr = btrfs_header_nritems(eb);
11139 for (i = 0; i < nr; i++) {
11140 btrfs_item_key_to_cpu(eb, &key, i);
11141 if (key.type != BTRFS_EXTENT_DATA_KEY)
11144 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11145 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11148 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11155 free_extent_buffer(eb);
11156 if (!found_parent) {
11157 error("shared extent %llu referencer lost (parent: %llu)",
11159 return REFERENCER_MISSING;
11165 * This function will check a given extent item, including its backref and
11166 * itself (like crossing stripe boundary and type)
11168 * Since we don't use extent_record anymore, introduce new error bit
11170 static int check_extent_item(struct btrfs_fs_info *fs_info,
11171 struct extent_buffer *eb, int slot)
11173 struct btrfs_extent_item *ei;
11174 struct btrfs_extent_inline_ref *iref;
11175 struct btrfs_extent_data_ref *dref;
11179 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11180 u32 item_size = btrfs_item_size_nr(eb, slot);
11185 struct btrfs_key key;
11189 btrfs_item_key_to_cpu(eb, &key, slot);
11190 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11191 bytes_used += key.offset;
11193 bytes_used += nodesize;
11195 if (item_size < sizeof(*ei)) {
11197 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11198 * old thing when on disk format is still un-determined.
11199 * No need to care about it anymore
11201 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11205 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11206 flags = btrfs_extent_flags(eb, ei);
11208 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11210 if (metadata && check_crossing_stripes(global_info, key.objectid,
11212 error("bad metadata [%llu, %llu) crossing stripe boundary",
11213 key.objectid, key.objectid + nodesize);
11214 err |= CROSSING_STRIPE_BOUNDARY;
11217 ptr = (unsigned long)(ei + 1);
11219 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11220 /* Old EXTENT_ITEM metadata */
11221 struct btrfs_tree_block_info *info;
11223 info = (struct btrfs_tree_block_info *)ptr;
11224 level = btrfs_tree_block_level(eb, info);
11225 ptr += sizeof(struct btrfs_tree_block_info);
11227 /* New METADATA_ITEM */
11228 level = key.offset;
11230 end = (unsigned long)ei + item_size;
11233 /* Reached extent item end normally */
11237 /* Beyond extent item end, wrong item size */
11239 err |= ITEM_SIZE_MISMATCH;
11240 error("extent item at bytenr %llu slot %d has wrong size",
11245 /* Now check every backref in this extent item */
11246 iref = (struct btrfs_extent_inline_ref *)ptr;
11247 type = btrfs_extent_inline_ref_type(eb, iref);
11248 offset = btrfs_extent_inline_ref_offset(eb, iref);
11250 case BTRFS_TREE_BLOCK_REF_KEY:
11251 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11255 case BTRFS_SHARED_BLOCK_REF_KEY:
11256 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11260 case BTRFS_EXTENT_DATA_REF_KEY:
11261 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11262 ret = check_extent_data_backref(fs_info,
11263 btrfs_extent_data_ref_root(eb, dref),
11264 btrfs_extent_data_ref_objectid(eb, dref),
11265 btrfs_extent_data_ref_offset(eb, dref),
11266 key.objectid, key.offset,
11267 btrfs_extent_data_ref_count(eb, dref));
11270 case BTRFS_SHARED_DATA_REF_KEY:
11271 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11275 error("extent[%llu %d %llu] has unknown ref type: %d",
11276 key.objectid, key.type, key.offset, type);
11277 err |= UNKNOWN_TYPE;
11281 ptr += btrfs_extent_inline_ref_size(type);
11289 * Check if a dev extent item is referred correctly by its chunk
11291 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11292 struct extent_buffer *eb, int slot)
11294 struct btrfs_root *chunk_root = fs_info->chunk_root;
11295 struct btrfs_dev_extent *ptr;
11296 struct btrfs_path path;
11297 struct btrfs_key chunk_key;
11298 struct btrfs_key devext_key;
11299 struct btrfs_chunk *chunk;
11300 struct extent_buffer *l;
11304 int found_chunk = 0;
11307 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11308 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11309 length = btrfs_dev_extent_length(eb, ptr);
11311 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11312 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11313 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11315 btrfs_init_path(&path);
11316 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11321 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11322 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11327 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11330 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11331 for (i = 0; i < num_stripes; i++) {
11332 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11333 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11335 if (devid == devext_key.objectid &&
11336 offset == devext_key.offset) {
11342 btrfs_release_path(&path);
11343 if (!found_chunk) {
11345 "device extent[%llu, %llu, %llu] did not find the related chunk",
11346 devext_key.objectid, devext_key.offset, length);
11347 return REFERENCER_MISSING;
11353 * Check if the used space is correct with the dev item
11355 static int check_dev_item(struct btrfs_fs_info *fs_info,
11356 struct extent_buffer *eb, int slot)
11358 struct btrfs_root *dev_root = fs_info->dev_root;
11359 struct btrfs_dev_item *dev_item;
11360 struct btrfs_path path;
11361 struct btrfs_key key;
11362 struct btrfs_dev_extent *ptr;
11368 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11369 dev_id = btrfs_device_id(eb, dev_item);
11370 used = btrfs_device_bytes_used(eb, dev_item);
11372 key.objectid = dev_id;
11373 key.type = BTRFS_DEV_EXTENT_KEY;
11376 btrfs_init_path(&path);
11377 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11379 btrfs_item_key_to_cpu(eb, &key, slot);
11380 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11381 key.objectid, key.type, key.offset);
11382 btrfs_release_path(&path);
11383 return REFERENCER_MISSING;
11386 /* Iterate dev_extents to calculate the used space of a device */
11388 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11391 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11392 if (key.objectid > dev_id)
11394 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11397 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11398 struct btrfs_dev_extent);
11399 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11401 ret = btrfs_next_item(dev_root, &path);
11405 btrfs_release_path(&path);
11407 if (used != total) {
11408 btrfs_item_key_to_cpu(eb, &key, slot);
11410 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11411 total, used, BTRFS_ROOT_TREE_OBJECTID,
11412 BTRFS_DEV_EXTENT_KEY, dev_id);
11413 return ACCOUNTING_MISMATCH;
11419 * Check a block group item with its referener (chunk) and its used space
11420 * with extent/metadata item
11422 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11423 struct extent_buffer *eb, int slot)
11425 struct btrfs_root *extent_root = fs_info->extent_root;
11426 struct btrfs_root *chunk_root = fs_info->chunk_root;
11427 struct btrfs_block_group_item *bi;
11428 struct btrfs_block_group_item bg_item;
11429 struct btrfs_path path;
11430 struct btrfs_key bg_key;
11431 struct btrfs_key chunk_key;
11432 struct btrfs_key extent_key;
11433 struct btrfs_chunk *chunk;
11434 struct extent_buffer *leaf;
11435 struct btrfs_extent_item *ei;
11436 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11444 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11445 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11446 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11447 used = btrfs_block_group_used(&bg_item);
11448 bg_flags = btrfs_block_group_flags(&bg_item);
11450 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11451 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11452 chunk_key.offset = bg_key.objectid;
11454 btrfs_init_path(&path);
11455 /* Search for the referencer chunk */
11456 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11459 "block group[%llu %llu] did not find the related chunk item",
11460 bg_key.objectid, bg_key.offset);
11461 err |= REFERENCER_MISSING;
11463 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11464 struct btrfs_chunk);
11465 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11468 "block group[%llu %llu] related chunk item length does not match",
11469 bg_key.objectid, bg_key.offset);
11470 err |= REFERENCER_MISMATCH;
11473 btrfs_release_path(&path);
11475 /* Search from the block group bytenr */
11476 extent_key.objectid = bg_key.objectid;
11477 extent_key.type = 0;
11478 extent_key.offset = 0;
11480 btrfs_init_path(&path);
11481 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11485 /* Iterate extent tree to account used space */
11487 leaf = path.nodes[0];
11489 /* Search slot can point to the last item beyond leaf nritems */
11490 if (path.slots[0] >= btrfs_header_nritems(leaf))
11493 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11494 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11497 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11498 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11500 if (extent_key.objectid < bg_key.objectid)
11503 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11506 total += extent_key.offset;
11508 ei = btrfs_item_ptr(leaf, path.slots[0],
11509 struct btrfs_extent_item);
11510 flags = btrfs_extent_flags(leaf, ei);
11511 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11512 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11514 "bad extent[%llu, %llu) type mismatch with chunk",
11515 extent_key.objectid,
11516 extent_key.objectid + extent_key.offset);
11517 err |= CHUNK_TYPE_MISMATCH;
11519 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11520 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11521 BTRFS_BLOCK_GROUP_METADATA))) {
11523 "bad extent[%llu, %llu) type mismatch with chunk",
11524 extent_key.objectid,
11525 extent_key.objectid + nodesize);
11526 err |= CHUNK_TYPE_MISMATCH;
11530 ret = btrfs_next_item(extent_root, &path);
11536 btrfs_release_path(&path);
11538 if (total != used) {
11540 "block group[%llu %llu] used %llu but extent items used %llu",
11541 bg_key.objectid, bg_key.offset, used, total);
11542 err |= ACCOUNTING_MISMATCH;
11548 * Check a chunk item.
11549 * Including checking all referred dev_extents and block group
11551 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11552 struct extent_buffer *eb, int slot)
11554 struct btrfs_root *extent_root = fs_info->extent_root;
11555 struct btrfs_root *dev_root = fs_info->dev_root;
11556 struct btrfs_path path;
11557 struct btrfs_key chunk_key;
11558 struct btrfs_key bg_key;
11559 struct btrfs_key devext_key;
11560 struct btrfs_chunk *chunk;
11561 struct extent_buffer *leaf;
11562 struct btrfs_block_group_item *bi;
11563 struct btrfs_block_group_item bg_item;
11564 struct btrfs_dev_extent *ptr;
11576 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11577 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11578 length = btrfs_chunk_length(eb, chunk);
11579 chunk_end = chunk_key.offset + length;
11580 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11583 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11585 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11588 type = btrfs_chunk_type(eb, chunk);
11590 bg_key.objectid = chunk_key.offset;
11591 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11592 bg_key.offset = length;
11594 btrfs_init_path(&path);
11595 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11598 "chunk[%llu %llu) did not find the related block group item",
11599 chunk_key.offset, chunk_end);
11600 err |= REFERENCER_MISSING;
11602 leaf = path.nodes[0];
11603 bi = btrfs_item_ptr(leaf, path.slots[0],
11604 struct btrfs_block_group_item);
11605 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11607 if (btrfs_block_group_flags(&bg_item) != type) {
11609 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11610 chunk_key.offset, chunk_end, type,
11611 btrfs_block_group_flags(&bg_item));
11612 err |= REFERENCER_MISSING;
11616 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11617 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11618 for (i = 0; i < num_stripes; i++) {
11619 btrfs_release_path(&path);
11620 btrfs_init_path(&path);
11621 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11622 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11623 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11625 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11628 goto not_match_dev;
11630 leaf = path.nodes[0];
11631 ptr = btrfs_item_ptr(leaf, path.slots[0],
11632 struct btrfs_dev_extent);
11633 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11634 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11635 if (objectid != chunk_key.objectid ||
11636 offset != chunk_key.offset ||
11637 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11638 goto not_match_dev;
11641 err |= BACKREF_MISSING;
11643 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11644 chunk_key.objectid, chunk_end, i);
11647 btrfs_release_path(&path);
11653 * Main entry function to check known items and update related accounting info
11655 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11657 struct btrfs_fs_info *fs_info = root->fs_info;
11658 struct btrfs_key key;
11661 struct btrfs_extent_data_ref *dref;
11666 btrfs_item_key_to_cpu(eb, &key, slot);
11670 case BTRFS_EXTENT_DATA_KEY:
11671 ret = check_extent_data_item(root, eb, slot);
11674 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11675 ret = check_block_group_item(fs_info, eb, slot);
11678 case BTRFS_DEV_ITEM_KEY:
11679 ret = check_dev_item(fs_info, eb, slot);
11682 case BTRFS_CHUNK_ITEM_KEY:
11683 ret = check_chunk_item(fs_info, eb, slot);
11686 case BTRFS_DEV_EXTENT_KEY:
11687 ret = check_dev_extent_item(fs_info, eb, slot);
11690 case BTRFS_EXTENT_ITEM_KEY:
11691 case BTRFS_METADATA_ITEM_KEY:
11692 ret = check_extent_item(fs_info, eb, slot);
11695 case BTRFS_EXTENT_CSUM_KEY:
11696 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11698 case BTRFS_TREE_BLOCK_REF_KEY:
11699 ret = check_tree_block_backref(fs_info, key.offset,
11703 case BTRFS_EXTENT_DATA_REF_KEY:
11704 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11705 ret = check_extent_data_backref(fs_info,
11706 btrfs_extent_data_ref_root(eb, dref),
11707 btrfs_extent_data_ref_objectid(eb, dref),
11708 btrfs_extent_data_ref_offset(eb, dref),
11710 btrfs_extent_data_ref_count(eb, dref));
11713 case BTRFS_SHARED_BLOCK_REF_KEY:
11714 ret = check_shared_block_backref(fs_info, key.offset,
11718 case BTRFS_SHARED_DATA_REF_KEY:
11719 ret = check_shared_data_backref(fs_info, key.offset,
11727 if (++slot < btrfs_header_nritems(eb))
11734 * Helper function for later fs/subvol tree check. To determine if a tree
11735 * block should be checked.
11736 * This function will ensure only the direct referencer with lowest rootid to
11737 * check a fs/subvolume tree block.
11739 * Backref check at extent tree would detect errors like missing subvolume
11740 * tree, so we can do aggressive check to reduce duplicated checks.
11742 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11744 struct btrfs_root *extent_root = root->fs_info->extent_root;
11745 struct btrfs_key key;
11746 struct btrfs_path path;
11747 struct extent_buffer *leaf;
11749 struct btrfs_extent_item *ei;
11755 struct btrfs_extent_inline_ref *iref;
11758 btrfs_init_path(&path);
11759 key.objectid = btrfs_header_bytenr(eb);
11760 key.type = BTRFS_METADATA_ITEM_KEY;
11761 key.offset = (u64)-1;
11764 * Any failure in backref resolving means we can't determine
11765 * whom the tree block belongs to.
11766 * So in that case, we need to check that tree block
11768 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11772 ret = btrfs_previous_extent_item(extent_root, &path,
11773 btrfs_header_bytenr(eb));
11777 leaf = path.nodes[0];
11778 slot = path.slots[0];
11779 btrfs_item_key_to_cpu(leaf, &key, slot);
11780 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11782 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11783 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11785 struct btrfs_tree_block_info *info;
11787 info = (struct btrfs_tree_block_info *)(ei + 1);
11788 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11791 item_size = btrfs_item_size_nr(leaf, slot);
11792 ptr = (unsigned long)iref;
11793 end = (unsigned long)ei + item_size;
11794 while (ptr < end) {
11795 iref = (struct btrfs_extent_inline_ref *)ptr;
11796 type = btrfs_extent_inline_ref_type(leaf, iref);
11797 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11800 * We only check the tree block if current root is
11801 * the lowest referencer of it.
11803 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11804 offset < root->objectid) {
11805 btrfs_release_path(&path);
11809 ptr += btrfs_extent_inline_ref_size(type);
11812 * Normally we should also check keyed tree block ref, but that may be
11813 * very time consuming. Inlined ref should already make us skip a lot
11814 * of refs now. So skip search keyed tree block ref.
11818 btrfs_release_path(&path);
11823 * Traversal function for tree block. We will do:
11824 * 1) Skip shared fs/subvolume tree blocks
11825 * 2) Update related bytes accounting
11826 * 3) Pre-order traversal
11828 static int traverse_tree_block(struct btrfs_root *root,
11829 struct extent_buffer *node)
11831 struct extent_buffer *eb;
11832 struct btrfs_key key;
11833 struct btrfs_key drop_key;
11841 * Skip shared fs/subvolume tree block, in that case they will
11842 * be checked by referencer with lowest rootid
11844 if (is_fstree(root->objectid) && !should_check(root, node))
11847 /* Update bytes accounting */
11848 total_btree_bytes += node->len;
11849 if (fs_root_objectid(btrfs_header_owner(node)))
11850 total_fs_tree_bytes += node->len;
11851 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11852 total_extent_tree_bytes += node->len;
11854 /* pre-order tranversal, check itself first */
11855 level = btrfs_header_level(node);
11856 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11857 btrfs_header_level(node),
11858 btrfs_header_owner(node));
11862 "check %s failed root %llu bytenr %llu level %d, force continue check",
11863 level ? "node":"leaf", root->objectid,
11864 btrfs_header_bytenr(node), btrfs_header_level(node));
11867 btree_space_waste += btrfs_leaf_free_space(root, node);
11868 ret = check_leaf_items(root, node);
11873 nr = btrfs_header_nritems(node);
11874 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11875 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11876 sizeof(struct btrfs_key_ptr);
11878 /* Then check all its children */
11879 for (i = 0; i < nr; i++) {
11880 u64 blocknr = btrfs_node_blockptr(node, i);
11882 btrfs_node_key_to_cpu(node, &key, i);
11883 if (level == root->root_item.drop_level &&
11884 is_dropped_key(&key, &drop_key))
11888 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11889 * to call the function itself.
11891 eb = read_tree_block(root->fs_info, blocknr, 0);
11892 if (extent_buffer_uptodate(eb)) {
11893 ret = traverse_tree_block(root, eb);
11896 free_extent_buffer(eb);
11903 * Low memory usage version check_chunks_and_extents.
11905 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11907 struct btrfs_path path;
11908 struct btrfs_key key;
11909 struct btrfs_root *root1;
11910 struct btrfs_root *root;
11911 struct btrfs_root *cur_root;
11915 root = fs_info->fs_root;
11917 root1 = root->fs_info->chunk_root;
11918 ret = traverse_tree_block(root1, root1->node);
11921 root1 = root->fs_info->tree_root;
11922 ret = traverse_tree_block(root1, root1->node);
11925 btrfs_init_path(&path);
11926 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11928 key.type = BTRFS_ROOT_ITEM_KEY;
11930 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11932 error("cannot find extent treet in tree_root");
11937 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11938 if (key.type != BTRFS_ROOT_ITEM_KEY)
11940 key.offset = (u64)-1;
11942 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11943 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11946 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11947 if (IS_ERR(cur_root) || !cur_root) {
11948 error("failed to read tree: %lld", key.objectid);
11952 ret = traverse_tree_block(cur_root, cur_root->node);
11955 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11956 btrfs_free_fs_root(cur_root);
11958 ret = btrfs_next_item(root1, &path);
11964 btrfs_release_path(&path);
11968 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11972 if (!ctx.progress_enabled)
11973 fprintf(stderr, "checking extents\n");
11974 if (check_mode == CHECK_MODE_LOWMEM)
11975 ret = check_chunks_and_extents_v2(fs_info);
11977 ret = check_chunks_and_extents(fs_info);
11982 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11983 struct btrfs_root *root, int overwrite)
11985 struct extent_buffer *c;
11986 struct extent_buffer *old = root->node;
11989 struct btrfs_disk_key disk_key = {0,0,0};
11995 extent_buffer_get(c);
11998 c = btrfs_alloc_free_block(trans, root,
11999 root->fs_info->nodesize,
12000 root->root_key.objectid,
12001 &disk_key, level, 0, 0);
12004 extent_buffer_get(c);
12008 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12009 btrfs_set_header_level(c, level);
12010 btrfs_set_header_bytenr(c, c->start);
12011 btrfs_set_header_generation(c, trans->transid);
12012 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12013 btrfs_set_header_owner(c, root->root_key.objectid);
12015 write_extent_buffer(c, root->fs_info->fsid,
12016 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12018 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12019 btrfs_header_chunk_tree_uuid(c),
12022 btrfs_mark_buffer_dirty(c);
12024 * this case can happen in the following case:
12026 * 1.overwrite previous root.
12028 * 2.reinit reloc data root, this is because we skip pin
12029 * down reloc data tree before which means we can allocate
12030 * same block bytenr here.
12032 if (old->start == c->start) {
12033 btrfs_set_root_generation(&root->root_item,
12035 root->root_item.level = btrfs_header_level(root->node);
12036 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12037 &root->root_key, &root->root_item);
12039 free_extent_buffer(c);
12043 free_extent_buffer(old);
12045 add_root_to_dirty_list(root);
12049 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12050 struct extent_buffer *eb, int tree_root)
12052 struct extent_buffer *tmp;
12053 struct btrfs_root_item *ri;
12054 struct btrfs_key key;
12056 int level = btrfs_header_level(eb);
12062 * If we have pinned this block before, don't pin it again.
12063 * This can not only avoid forever loop with broken filesystem
12064 * but also give us some speedups.
12066 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12067 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12070 btrfs_pin_extent(fs_info, eb->start, eb->len);
12072 nritems = btrfs_header_nritems(eb);
12073 for (i = 0; i < nritems; i++) {
12075 btrfs_item_key_to_cpu(eb, &key, i);
12076 if (key.type != BTRFS_ROOT_ITEM_KEY)
12078 /* Skip the extent root and reloc roots */
12079 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12080 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12081 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12083 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12084 bytenr = btrfs_disk_root_bytenr(eb, ri);
12087 * If at any point we start needing the real root we
12088 * will have to build a stump root for the root we are
12089 * in, but for now this doesn't actually use the root so
12090 * just pass in extent_root.
12092 tmp = read_tree_block(fs_info, bytenr, 0);
12093 if (!extent_buffer_uptodate(tmp)) {
12094 fprintf(stderr, "Error reading root block\n");
12097 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12098 free_extent_buffer(tmp);
12102 bytenr = btrfs_node_blockptr(eb, i);
12104 /* If we aren't the tree root don't read the block */
12105 if (level == 1 && !tree_root) {
12106 btrfs_pin_extent(fs_info, bytenr,
12107 fs_info->nodesize);
12111 tmp = read_tree_block(fs_info, bytenr, 0);
12112 if (!extent_buffer_uptodate(tmp)) {
12113 fprintf(stderr, "Error reading tree block\n");
12116 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12117 free_extent_buffer(tmp);
12126 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12130 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12134 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12137 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12139 struct btrfs_block_group_cache *cache;
12140 struct btrfs_path path;
12141 struct extent_buffer *leaf;
12142 struct btrfs_chunk *chunk;
12143 struct btrfs_key key;
12147 btrfs_init_path(&path);
12149 key.type = BTRFS_CHUNK_ITEM_KEY;
12151 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12153 btrfs_release_path(&path);
12158 * We do this in case the block groups were screwed up and had alloc
12159 * bits that aren't actually set on the chunks. This happens with
12160 * restored images every time and could happen in real life I guess.
12162 fs_info->avail_data_alloc_bits = 0;
12163 fs_info->avail_metadata_alloc_bits = 0;
12164 fs_info->avail_system_alloc_bits = 0;
12166 /* First we need to create the in-memory block groups */
12168 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12169 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12171 btrfs_release_path(&path);
12179 leaf = path.nodes[0];
12180 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12181 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12186 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12187 btrfs_add_block_group(fs_info, 0,
12188 btrfs_chunk_type(leaf, chunk),
12189 key.objectid, key.offset,
12190 btrfs_chunk_length(leaf, chunk));
12191 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12192 key.offset + btrfs_chunk_length(leaf, chunk));
12197 cache = btrfs_lookup_first_block_group(fs_info, start);
12201 start = cache->key.objectid + cache->key.offset;
12204 btrfs_release_path(&path);
12208 static int reset_balance(struct btrfs_trans_handle *trans,
12209 struct btrfs_fs_info *fs_info)
12211 struct btrfs_root *root = fs_info->tree_root;
12212 struct btrfs_path path;
12213 struct extent_buffer *leaf;
12214 struct btrfs_key key;
12215 int del_slot, del_nr = 0;
12219 btrfs_init_path(&path);
12220 key.objectid = BTRFS_BALANCE_OBJECTID;
12221 key.type = BTRFS_BALANCE_ITEM_KEY;
12223 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12228 goto reinit_data_reloc;
12233 ret = btrfs_del_item(trans, root, &path);
12236 btrfs_release_path(&path);
12238 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12239 key.type = BTRFS_ROOT_ITEM_KEY;
12241 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12245 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12250 ret = btrfs_del_items(trans, root, &path,
12257 btrfs_release_path(&path);
12260 ret = btrfs_search_slot(trans, root, &key, &path,
12267 leaf = path.nodes[0];
12268 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12269 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12271 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12276 del_slot = path.slots[0];
12285 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12289 btrfs_release_path(&path);
12292 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12293 key.type = BTRFS_ROOT_ITEM_KEY;
12294 key.offset = (u64)-1;
12295 root = btrfs_read_fs_root(fs_info, &key);
12296 if (IS_ERR(root)) {
12297 fprintf(stderr, "Error reading data reloc tree\n");
12298 ret = PTR_ERR(root);
12301 record_root_in_trans(trans, root);
12302 ret = btrfs_fsck_reinit_root(trans, root, 0);
12305 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12307 btrfs_release_path(&path);
12311 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12312 struct btrfs_fs_info *fs_info)
12318 * The only reason we don't do this is because right now we're just
12319 * walking the trees we find and pinning down their bytes, we don't look
12320 * at any of the leaves. In order to do mixed groups we'd have to check
12321 * the leaves of any fs roots and pin down the bytes for any file
12322 * extents we find. Not hard but why do it if we don't have to?
12324 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12325 fprintf(stderr, "We don't support re-initing the extent tree "
12326 "for mixed block groups yet, please notify a btrfs "
12327 "developer you want to do this so they can add this "
12328 "functionality.\n");
12333 * first we need to walk all of the trees except the extent tree and pin
12334 * down the bytes that are in use so we don't overwrite any existing
12337 ret = pin_metadata_blocks(fs_info);
12339 fprintf(stderr, "error pinning down used bytes\n");
12344 * Need to drop all the block groups since we're going to recreate all
12347 btrfs_free_block_groups(fs_info);
12348 ret = reset_block_groups(fs_info);
12350 fprintf(stderr, "error resetting the block groups\n");
12354 /* Ok we can allocate now, reinit the extent root */
12355 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12357 fprintf(stderr, "extent root initialization failed\n");
12359 * When the transaction code is updated we should end the
12360 * transaction, but for now progs only knows about commit so
12361 * just return an error.
12367 * Now we have all the in-memory block groups setup so we can make
12368 * allocations properly, and the metadata we care about is safe since we
12369 * pinned all of it above.
12372 struct btrfs_block_group_cache *cache;
12374 cache = btrfs_lookup_first_block_group(fs_info, start);
12377 start = cache->key.objectid + cache->key.offset;
12378 ret = btrfs_insert_item(trans, fs_info->extent_root,
12379 &cache->key, &cache->item,
12380 sizeof(cache->item));
12382 fprintf(stderr, "Error adding block group\n");
12385 btrfs_extent_post_op(trans, fs_info->extent_root);
12388 ret = reset_balance(trans, fs_info);
12390 fprintf(stderr, "error resetting the pending balance\n");
12395 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12397 struct btrfs_path path;
12398 struct btrfs_trans_handle *trans;
12399 struct btrfs_key key;
12402 printf("Recowing metadata block %llu\n", eb->start);
12403 key.objectid = btrfs_header_owner(eb);
12404 key.type = BTRFS_ROOT_ITEM_KEY;
12405 key.offset = (u64)-1;
12407 root = btrfs_read_fs_root(root->fs_info, &key);
12408 if (IS_ERR(root)) {
12409 fprintf(stderr, "Couldn't find owner root %llu\n",
12411 return PTR_ERR(root);
12414 trans = btrfs_start_transaction(root, 1);
12416 return PTR_ERR(trans);
12418 btrfs_init_path(&path);
12419 path.lowest_level = btrfs_header_level(eb);
12420 if (path.lowest_level)
12421 btrfs_node_key_to_cpu(eb, &key, 0);
12423 btrfs_item_key_to_cpu(eb, &key, 0);
12425 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12426 btrfs_commit_transaction(trans, root);
12427 btrfs_release_path(&path);
12431 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12433 struct btrfs_path path;
12434 struct btrfs_trans_handle *trans;
12435 struct btrfs_key key;
12438 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12439 bad->key.type, bad->key.offset);
12440 key.objectid = bad->root_id;
12441 key.type = BTRFS_ROOT_ITEM_KEY;
12442 key.offset = (u64)-1;
12444 root = btrfs_read_fs_root(root->fs_info, &key);
12445 if (IS_ERR(root)) {
12446 fprintf(stderr, "Couldn't find owner root %llu\n",
12448 return PTR_ERR(root);
12451 trans = btrfs_start_transaction(root, 1);
12453 return PTR_ERR(trans);
12455 btrfs_init_path(&path);
12456 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12462 ret = btrfs_del_item(trans, root, &path);
12464 btrfs_commit_transaction(trans, root);
12465 btrfs_release_path(&path);
12469 static int zero_log_tree(struct btrfs_root *root)
12471 struct btrfs_trans_handle *trans;
12474 trans = btrfs_start_transaction(root, 1);
12475 if (IS_ERR(trans)) {
12476 ret = PTR_ERR(trans);
12479 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12480 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12481 ret = btrfs_commit_transaction(trans, root);
12485 static int populate_csum(struct btrfs_trans_handle *trans,
12486 struct btrfs_root *csum_root, char *buf, u64 start,
12489 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12494 while (offset < len) {
12495 sectorsize = fs_info->sectorsize;
12496 ret = read_extent_data(fs_info, buf, start + offset,
12500 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12501 start + offset, buf, sectorsize);
12504 offset += sectorsize;
12509 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12510 struct btrfs_root *csum_root,
12511 struct btrfs_root *cur_root)
12513 struct btrfs_path path;
12514 struct btrfs_key key;
12515 struct extent_buffer *node;
12516 struct btrfs_file_extent_item *fi;
12523 buf = malloc(cur_root->fs_info->sectorsize);
12527 btrfs_init_path(&path);
12531 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12534 /* Iterate all regular file extents and fill its csum */
12536 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12538 if (key.type != BTRFS_EXTENT_DATA_KEY)
12540 node = path.nodes[0];
12541 slot = path.slots[0];
12542 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12543 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12545 start = btrfs_file_extent_disk_bytenr(node, fi);
12546 len = btrfs_file_extent_disk_num_bytes(node, fi);
12548 ret = populate_csum(trans, csum_root, buf, start, len);
12549 if (ret == -EEXIST)
12555 * TODO: if next leaf is corrupted, jump to nearest next valid
12558 ret = btrfs_next_item(cur_root, &path);
12568 btrfs_release_path(&path);
12573 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12574 struct btrfs_root *csum_root)
12576 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12577 struct btrfs_path path;
12578 struct btrfs_root *tree_root = fs_info->tree_root;
12579 struct btrfs_root *cur_root;
12580 struct extent_buffer *node;
12581 struct btrfs_key key;
12585 btrfs_init_path(&path);
12586 key.objectid = BTRFS_FS_TREE_OBJECTID;
12588 key.type = BTRFS_ROOT_ITEM_KEY;
12589 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12598 node = path.nodes[0];
12599 slot = path.slots[0];
12600 btrfs_item_key_to_cpu(node, &key, slot);
12601 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12603 if (key.type != BTRFS_ROOT_ITEM_KEY)
12605 if (!is_fstree(key.objectid))
12607 key.offset = (u64)-1;
12609 cur_root = btrfs_read_fs_root(fs_info, &key);
12610 if (IS_ERR(cur_root) || !cur_root) {
12611 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12615 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12620 ret = btrfs_next_item(tree_root, &path);
12630 btrfs_release_path(&path);
12634 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12635 struct btrfs_root *csum_root)
12637 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12638 struct btrfs_path path;
12639 struct btrfs_extent_item *ei;
12640 struct extent_buffer *leaf;
12642 struct btrfs_key key;
12645 btrfs_init_path(&path);
12647 key.type = BTRFS_EXTENT_ITEM_KEY;
12649 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12651 btrfs_release_path(&path);
12655 buf = malloc(csum_root->fs_info->sectorsize);
12657 btrfs_release_path(&path);
12662 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12663 ret = btrfs_next_leaf(extent_root, &path);
12671 leaf = path.nodes[0];
12673 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12674 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12679 ei = btrfs_item_ptr(leaf, path.slots[0],
12680 struct btrfs_extent_item);
12681 if (!(btrfs_extent_flags(leaf, ei) &
12682 BTRFS_EXTENT_FLAG_DATA)) {
12687 ret = populate_csum(trans, csum_root, buf, key.objectid,
12694 btrfs_release_path(&path);
12700 * Recalculate the csum and put it into the csum tree.
12702 * Extent tree init will wipe out all the extent info, so in that case, we
12703 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12704 * will use fs/subvol trees to init the csum tree.
12706 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12707 struct btrfs_root *csum_root,
12708 int search_fs_tree)
12710 if (search_fs_tree)
12711 return fill_csum_tree_from_fs(trans, csum_root);
12713 return fill_csum_tree_from_extent(trans, csum_root);
12716 static void free_roots_info_cache(void)
12718 if (!roots_info_cache)
12721 while (!cache_tree_empty(roots_info_cache)) {
12722 struct cache_extent *entry;
12723 struct root_item_info *rii;
12725 entry = first_cache_extent(roots_info_cache);
12728 remove_cache_extent(roots_info_cache, entry);
12729 rii = container_of(entry, struct root_item_info, cache_extent);
12733 free(roots_info_cache);
12734 roots_info_cache = NULL;
12737 static int build_roots_info_cache(struct btrfs_fs_info *info)
12740 struct btrfs_key key;
12741 struct extent_buffer *leaf;
12742 struct btrfs_path path;
12744 if (!roots_info_cache) {
12745 roots_info_cache = malloc(sizeof(*roots_info_cache));
12746 if (!roots_info_cache)
12748 cache_tree_init(roots_info_cache);
12751 btrfs_init_path(&path);
12753 key.type = BTRFS_EXTENT_ITEM_KEY;
12755 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12758 leaf = path.nodes[0];
12761 struct btrfs_key found_key;
12762 struct btrfs_extent_item *ei;
12763 struct btrfs_extent_inline_ref *iref;
12764 int slot = path.slots[0];
12769 struct cache_extent *entry;
12770 struct root_item_info *rii;
12772 if (slot >= btrfs_header_nritems(leaf)) {
12773 ret = btrfs_next_leaf(info->extent_root, &path);
12780 leaf = path.nodes[0];
12781 slot = path.slots[0];
12784 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12786 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12787 found_key.type != BTRFS_METADATA_ITEM_KEY)
12790 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12791 flags = btrfs_extent_flags(leaf, ei);
12793 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12794 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12797 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12798 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12799 level = found_key.offset;
12801 struct btrfs_tree_block_info *binfo;
12803 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12804 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12805 level = btrfs_tree_block_level(leaf, binfo);
12809 * For a root extent, it must be of the following type and the
12810 * first (and only one) iref in the item.
12812 type = btrfs_extent_inline_ref_type(leaf, iref);
12813 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12816 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12817 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12819 rii = malloc(sizeof(struct root_item_info));
12824 rii->cache_extent.start = root_id;
12825 rii->cache_extent.size = 1;
12826 rii->level = (u8)-1;
12827 entry = &rii->cache_extent;
12828 ret = insert_cache_extent(roots_info_cache, entry);
12831 rii = container_of(entry, struct root_item_info,
12835 ASSERT(rii->cache_extent.start == root_id);
12836 ASSERT(rii->cache_extent.size == 1);
12838 if (level > rii->level || rii->level == (u8)-1) {
12839 rii->level = level;
12840 rii->bytenr = found_key.objectid;
12841 rii->gen = btrfs_extent_generation(leaf, ei);
12842 rii->node_count = 1;
12843 } else if (level == rii->level) {
12851 btrfs_release_path(&path);
12856 static int maybe_repair_root_item(struct btrfs_path *path,
12857 const struct btrfs_key *root_key,
12858 const int read_only_mode)
12860 const u64 root_id = root_key->objectid;
12861 struct cache_extent *entry;
12862 struct root_item_info *rii;
12863 struct btrfs_root_item ri;
12864 unsigned long offset;
12866 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12869 "Error: could not find extent items for root %llu\n",
12870 root_key->objectid);
12874 rii = container_of(entry, struct root_item_info, cache_extent);
12875 ASSERT(rii->cache_extent.start == root_id);
12876 ASSERT(rii->cache_extent.size == 1);
12878 if (rii->node_count != 1) {
12880 "Error: could not find btree root extent for root %llu\n",
12885 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12886 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12888 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12889 btrfs_root_level(&ri) != rii->level ||
12890 btrfs_root_generation(&ri) != rii->gen) {
12893 * If we're in repair mode but our caller told us to not update
12894 * the root item, i.e. just check if it needs to be updated, don't
12895 * print this message, since the caller will call us again shortly
12896 * for the same root item without read only mode (the caller will
12897 * open a transaction first).
12899 if (!(read_only_mode && repair))
12901 "%sroot item for root %llu,"
12902 " current bytenr %llu, current gen %llu, current level %u,"
12903 " new bytenr %llu, new gen %llu, new level %u\n",
12904 (read_only_mode ? "" : "fixing "),
12906 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12907 btrfs_root_level(&ri),
12908 rii->bytenr, rii->gen, rii->level);
12910 if (btrfs_root_generation(&ri) > rii->gen) {
12912 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12913 root_id, btrfs_root_generation(&ri), rii->gen);
12917 if (!read_only_mode) {
12918 btrfs_set_root_bytenr(&ri, rii->bytenr);
12919 btrfs_set_root_level(&ri, rii->level);
12920 btrfs_set_root_generation(&ri, rii->gen);
12921 write_extent_buffer(path->nodes[0], &ri,
12922 offset, sizeof(ri));
12932 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12933 * caused read-only snapshots to be corrupted if they were created at a moment
12934 * when the source subvolume/snapshot had orphan items. The issue was that the
12935 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12936 * node instead of the post orphan cleanup root node.
12937 * So this function, and its callees, just detects and fixes those cases. Even
12938 * though the regression was for read-only snapshots, this function applies to
12939 * any snapshot/subvolume root.
12940 * This must be run before any other repair code - not doing it so, makes other
12941 * repair code delete or modify backrefs in the extent tree for example, which
12942 * will result in an inconsistent fs after repairing the root items.
12944 static int repair_root_items(struct btrfs_fs_info *info)
12946 struct btrfs_path path;
12947 struct btrfs_key key;
12948 struct extent_buffer *leaf;
12949 struct btrfs_trans_handle *trans = NULL;
12952 int need_trans = 0;
12954 btrfs_init_path(&path);
12956 ret = build_roots_info_cache(info);
12960 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12961 key.type = BTRFS_ROOT_ITEM_KEY;
12966 * Avoid opening and committing transactions if a leaf doesn't have
12967 * any root items that need to be fixed, so that we avoid rotating
12968 * backup roots unnecessarily.
12971 trans = btrfs_start_transaction(info->tree_root, 1);
12972 if (IS_ERR(trans)) {
12973 ret = PTR_ERR(trans);
12978 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12982 leaf = path.nodes[0];
12985 struct btrfs_key found_key;
12987 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12988 int no_more_keys = find_next_key(&path, &key);
12990 btrfs_release_path(&path);
12992 ret = btrfs_commit_transaction(trans,
13004 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13006 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13008 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13011 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13015 if (!trans && repair) {
13018 btrfs_release_path(&path);
13028 free_roots_info_cache();
13029 btrfs_release_path(&path);
13031 btrfs_commit_transaction(trans, info->tree_root);
13038 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13040 struct btrfs_trans_handle *trans;
13041 struct btrfs_block_group_cache *bg_cache;
13045 /* Clear all free space cache inodes and its extent data */
13047 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13050 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13053 current = bg_cache->key.objectid + bg_cache->key.offset;
13056 /* Don't forget to set cache_generation to -1 */
13057 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13058 if (IS_ERR(trans)) {
13059 error("failed to update super block cache generation");
13060 return PTR_ERR(trans);
13062 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13063 btrfs_commit_transaction(trans, fs_info->tree_root);
13068 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13073 if (clear_version == 1) {
13074 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13076 "free space cache v2 detected, use --clear-space-cache v2");
13080 printf("Clearing free space cache\n");
13081 ret = clear_free_space_cache(fs_info);
13083 error("failed to clear free space cache");
13086 printf("Free space cache cleared\n");
13088 } else if (clear_version == 2) {
13089 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13090 printf("no free space cache v2 to clear\n");
13094 printf("Clear free space cache v2\n");
13095 ret = btrfs_clear_free_space_tree(fs_info);
13097 error("failed to clear free space cache v2: %d", ret);
13100 printf("free space cache v2 cleared\n");
13107 const char * const cmd_check_usage[] = {
13108 "btrfs check [options] <device>",
13109 "Check structural integrity of a filesystem (unmounted).",
13110 "Check structural integrity of an unmounted filesystem. Verify internal",
13111 "trees' consistency and item connectivity. In the repair mode try to",
13112 "fix the problems found. ",
13113 "WARNING: the repair mode is considered dangerous",
13115 "-s|--super <superblock> use this superblock copy",
13116 "-b|--backup use the first valid backup root copy",
13117 "--force skip mount checks, repair is not possible",
13118 "--repair try to repair the filesystem",
13119 "--readonly run in read-only mode (default)",
13120 "--init-csum-tree create a new CRC tree",
13121 "--init-extent-tree create a new extent tree",
13122 "--mode <MODE> allows choice of memory/IO trade-offs",
13123 " where MODE is one of:",
13124 " original - read inodes and extents to memory (requires",
13125 " more memory, does less IO)",
13126 " lowmem - try to use less memory but read blocks again",
13128 "--check-data-csum verify checksums of data blocks",
13129 "-Q|--qgroup-report print a report on qgroup consistency",
13130 "-E|--subvol-extents <subvolid>",
13131 " print subvolume extents and sharing state",
13132 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13133 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13134 "-p|--progress indicate progress",
13135 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13139 int cmd_check(int argc, char **argv)
13141 struct cache_tree root_cache;
13142 struct btrfs_root *root;
13143 struct btrfs_fs_info *info;
13146 u64 tree_root_bytenr = 0;
13147 u64 chunk_root_bytenr = 0;
13148 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13152 int init_csum_tree = 0;
13154 int clear_space_cache = 0;
13155 int qgroup_report = 0;
13156 int qgroups_repaired = 0;
13157 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13162 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13163 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13164 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13165 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13166 GETOPT_VAL_FORCE };
13167 static const struct option long_options[] = {
13168 { "super", required_argument, NULL, 's' },
13169 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13170 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13171 { "init-csum-tree", no_argument, NULL,
13172 GETOPT_VAL_INIT_CSUM },
13173 { "init-extent-tree", no_argument, NULL,
13174 GETOPT_VAL_INIT_EXTENT },
13175 { "check-data-csum", no_argument, NULL,
13176 GETOPT_VAL_CHECK_CSUM },
13177 { "backup", no_argument, NULL, 'b' },
13178 { "subvol-extents", required_argument, NULL, 'E' },
13179 { "qgroup-report", no_argument, NULL, 'Q' },
13180 { "tree-root", required_argument, NULL, 'r' },
13181 { "chunk-root", required_argument, NULL,
13182 GETOPT_VAL_CHUNK_TREE },
13183 { "progress", no_argument, NULL, 'p' },
13184 { "mode", required_argument, NULL,
13186 { "clear-space-cache", required_argument, NULL,
13187 GETOPT_VAL_CLEAR_SPACE_CACHE},
13188 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13189 { NULL, 0, NULL, 0}
13192 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13196 case 'a': /* ignored */ break;
13198 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13201 num = arg_strtou64(optarg);
13202 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13204 "super mirror should be less than %d",
13205 BTRFS_SUPER_MIRROR_MAX);
13208 bytenr = btrfs_sb_offset(((int)num));
13209 printf("using SB copy %llu, bytenr %llu\n", num,
13210 (unsigned long long)bytenr);
13216 subvolid = arg_strtou64(optarg);
13219 tree_root_bytenr = arg_strtou64(optarg);
13221 case GETOPT_VAL_CHUNK_TREE:
13222 chunk_root_bytenr = arg_strtou64(optarg);
13225 ctx.progress_enabled = true;
13229 usage(cmd_check_usage);
13230 case GETOPT_VAL_REPAIR:
13231 printf("enabling repair mode\n");
13233 ctree_flags |= OPEN_CTREE_WRITES;
13235 case GETOPT_VAL_READONLY:
13238 case GETOPT_VAL_INIT_CSUM:
13239 printf("Creating a new CRC tree\n");
13240 init_csum_tree = 1;
13242 ctree_flags |= OPEN_CTREE_WRITES;
13244 case GETOPT_VAL_INIT_EXTENT:
13245 init_extent_tree = 1;
13246 ctree_flags |= (OPEN_CTREE_WRITES |
13247 OPEN_CTREE_NO_BLOCK_GROUPS);
13250 case GETOPT_VAL_CHECK_CSUM:
13251 check_data_csum = 1;
13253 case GETOPT_VAL_MODE:
13254 check_mode = parse_check_mode(optarg);
13255 if (check_mode == CHECK_MODE_UNKNOWN) {
13256 error("unknown mode: %s", optarg);
13260 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13261 if (strcmp(optarg, "v1") == 0) {
13262 clear_space_cache = 1;
13263 } else if (strcmp(optarg, "v2") == 0) {
13264 clear_space_cache = 2;
13265 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13268 "invalid argument to --clear-space-cache, must be v1 or v2");
13271 ctree_flags |= OPEN_CTREE_WRITES;
13273 case GETOPT_VAL_FORCE:
13279 if (check_argc_exact(argc - optind, 1))
13280 usage(cmd_check_usage);
13282 if (ctx.progress_enabled) {
13283 ctx.tp = TASK_NOTHING;
13284 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13287 /* This check is the only reason for --readonly to exist */
13288 if (readonly && repair) {
13289 error("repair options are not compatible with --readonly");
13294 * experimental and dangerous
13296 if (repair && check_mode == CHECK_MODE_LOWMEM)
13297 warning("low-memory mode repair support is only partial");
13300 cache_tree_init(&root_cache);
13302 ret = check_mounted(argv[optind]);
13305 error("could not check mount status: %s",
13311 "%s is currently mounted, use --force if you really intend to check the filesystem",
13319 error("repair and --force is not yet supported");
13326 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13330 "filesystem mounted, continuing because of --force");
13332 /* A block device is mounted in exclusive mode by kernel */
13333 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13336 /* only allow partial opening under repair mode */
13338 ctree_flags |= OPEN_CTREE_PARTIAL;
13340 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13341 chunk_root_bytenr, ctree_flags);
13343 error("cannot open file system");
13349 global_info = info;
13350 root = info->fs_root;
13351 uuid_unparse(info->super_copy->fsid, uuidbuf);
13353 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13356 * Check the bare minimum before starting anything else that could rely
13357 * on it, namely the tree roots, any local consistency checks
13359 if (!extent_buffer_uptodate(info->tree_root->node) ||
13360 !extent_buffer_uptodate(info->dev_root->node) ||
13361 !extent_buffer_uptodate(info->chunk_root->node)) {
13362 error("critical roots corrupted, unable to check the filesystem");
13368 if (clear_space_cache) {
13369 ret = do_clear_free_space_cache(info, clear_space_cache);
13375 * repair mode will force us to commit transaction which
13376 * will make us fail to load log tree when mounting.
13378 if (repair && btrfs_super_log_root(info->super_copy)) {
13379 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13385 ret = zero_log_tree(root);
13388 error("failed to zero log tree: %d", ret);
13393 if (qgroup_report) {
13394 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13396 ret = qgroup_verify_all(info);
13403 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13404 subvolid, argv[optind], uuidbuf);
13405 ret = print_extent_state(info, subvolid);
13410 if (init_extent_tree || init_csum_tree) {
13411 struct btrfs_trans_handle *trans;
13413 trans = btrfs_start_transaction(info->extent_root, 0);
13414 if (IS_ERR(trans)) {
13415 error("error starting transaction");
13416 ret = PTR_ERR(trans);
13421 if (init_extent_tree) {
13422 printf("Creating a new extent tree\n");
13423 ret = reinit_extent_tree(trans, info);
13429 if (init_csum_tree) {
13430 printf("Reinitialize checksum tree\n");
13431 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13433 error("checksum tree initialization failed: %d",
13440 ret = fill_csum_tree(trans, info->csum_root,
13444 error("checksum tree refilling failed: %d", ret);
13449 * Ok now we commit and run the normal fsck, which will add
13450 * extent entries for all of the items it finds.
13452 ret = btrfs_commit_transaction(trans, info->extent_root);
13457 if (!extent_buffer_uptodate(info->extent_root->node)) {
13458 error("critical: extent_root, unable to check the filesystem");
13463 if (!extent_buffer_uptodate(info->csum_root->node)) {
13464 error("critical: csum_root, unable to check the filesystem");
13470 ret = do_check_chunks_and_extents(info);
13474 "errors found in extent allocation tree or chunk allocation");
13476 ret = repair_root_items(info);
13479 error("failed to repair root items: %s", strerror(-ret));
13483 fprintf(stderr, "Fixed %d roots.\n", ret);
13485 } else if (ret > 0) {
13487 "Found %d roots with an outdated root item.\n",
13490 "Please run a filesystem check with the option --repair to fix them.\n");
13496 if (!ctx.progress_enabled) {
13497 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13498 fprintf(stderr, "checking free space tree\n");
13500 fprintf(stderr, "checking free space cache\n");
13502 ret = check_space_cache(root);
13505 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13506 error("errors found in free space tree");
13508 error("errors found in free space cache");
13513 * We used to have to have these hole extents in between our real
13514 * extents so if we don't have this flag set we need to make sure there
13515 * are no gaps in the file extents for inodes, otherwise we can just
13516 * ignore it when this happens.
13518 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13519 ret = do_check_fs_roots(info, &root_cache);
13522 error("errors found in fs roots");
13526 fprintf(stderr, "checking csums\n");
13527 ret = check_csums(root);
13530 error("errors found in csum tree");
13534 fprintf(stderr, "checking root refs\n");
13535 /* For low memory mode, check_fs_roots_v2 handles root refs */
13536 if (check_mode != CHECK_MODE_LOWMEM) {
13537 ret = check_root_refs(root, &root_cache);
13540 error("errors found in root refs");
13545 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13546 struct extent_buffer *eb;
13548 eb = list_first_entry(&root->fs_info->recow_ebs,
13549 struct extent_buffer, recow);
13550 list_del_init(&eb->recow);
13551 ret = recow_extent_buffer(root, eb);
13554 error("fails to fix transid errors");
13559 while (!list_empty(&delete_items)) {
13560 struct bad_item *bad;
13562 bad = list_first_entry(&delete_items, struct bad_item, list);
13563 list_del_init(&bad->list);
13565 ret = delete_bad_item(root, bad);
13571 if (info->quota_enabled) {
13572 fprintf(stderr, "checking quota groups\n");
13573 ret = qgroup_verify_all(info);
13576 error("failed to check quota groups");
13580 ret = repair_qgroups(info, &qgroups_repaired);
13583 error("failed to repair quota groups");
13589 if (!list_empty(&root->fs_info->recow_ebs)) {
13590 error("transid errors in file system");
13595 printf("found %llu bytes used, ",
13596 (unsigned long long)bytes_used);
13598 printf("error(s) found\n");
13600 printf("no error found\n");
13601 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13602 printf("total tree bytes: %llu\n",
13603 (unsigned long long)total_btree_bytes);
13604 printf("total fs tree bytes: %llu\n",
13605 (unsigned long long)total_fs_tree_bytes);
13606 printf("total extent tree bytes: %llu\n",
13607 (unsigned long long)total_extent_tree_bytes);
13608 printf("btree space waste bytes: %llu\n",
13609 (unsigned long long)btree_space_waste);
13610 printf("file data blocks allocated: %llu\n referenced %llu\n",
13611 (unsigned long long)data_bytes_allocated,
13612 (unsigned long long)data_bytes_referenced);
13614 free_qgroup_counts();
13615 free_root_recs_tree(&root_cache);
13619 if (ctx.progress_enabled)
13620 task_deinit(ctx.info);