2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 return container_of(back, struct data_backref, node);
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146 struct data_backref *back1 = to_data_backref(ext1);
147 struct data_backref *back2 = to_data_backref(ext2);
149 WARN_ON(!ext1->is_data);
150 WARN_ON(!ext2->is_data);
152 /* parent and root are a union, so this covers both */
153 if (back1->parent > back2->parent)
155 if (back1->parent < back2->parent)
158 /* This is a full backref and the parents match. */
159 if (back1->node.full_backref)
162 if (back1->owner > back2->owner)
164 if (back1->owner < back2->owner)
167 if (back1->offset > back2->offset)
169 if (back1->offset < back2->offset)
172 if (back1->found_ref && back2->found_ref) {
173 if (back1->disk_bytenr > back2->disk_bytenr)
175 if (back1->disk_bytenr < back2->disk_bytenr)
178 if (back1->bytes > back2->bytes)
180 if (back1->bytes < back2->bytes)
188 * Much like data_backref, just removed the undetermined members
189 * and change it to use list_head.
190 * During extent scan, it is stored in root->orphan_data_extent.
191 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193 struct orphan_data_extent {
194 struct list_head list;
202 struct tree_backref {
203 struct extent_backref node;
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 return container_of(back, struct tree_backref, node);
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219 struct tree_backref *back1 = to_tree_backref(ext1);
220 struct tree_backref *back2 = to_tree_backref(ext2);
222 WARN_ON(ext1->is_data);
223 WARN_ON(ext2->is_data);
225 /* parent and root are a union, so this covers both */
226 if (back1->parent > back2->parent)
228 if (back1->parent < back2->parent)
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239 if (ext1->is_data > ext2->is_data)
242 if (ext1->is_data < ext2->is_data)
245 if (ext1->full_backref > ext2->full_backref)
247 if (ext1->full_backref < ext2->full_backref)
251 return compare_data_backref(node1, node2);
253 return compare_tree_backref(node1, node2);
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
259 struct extent_record {
260 struct list_head backrefs;
261 struct list_head dups;
262 struct rb_root backref_tree;
263 struct list_head list;
264 struct cache_extent cache;
265 struct btrfs_disk_key parent_key;
270 u64 extent_item_refs;
272 u64 parent_generation;
276 unsigned int flag_block_full_backref:2;
277 unsigned int found_rec:1;
278 unsigned int content_checked:1;
279 unsigned int owner_ref_checked:1;
280 unsigned int is_root:1;
281 unsigned int metadata:1;
282 unsigned int bad_full_backref:1;
283 unsigned int crossing_stripes:1;
284 unsigned int wrong_chunk_type:1;
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 return container_of(entry, struct extent_record, list);
292 struct inode_backref {
293 struct list_head list;
294 unsigned int found_dir_item:1;
295 unsigned int found_dir_index:1;
296 unsigned int found_inode_ref:1;
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 return list_entry(entry, struct inode_backref, list);
311 struct root_item_record {
312 struct list_head list;
318 struct btrfs_key drop_key;
321 #define REF_ERR_NO_DIR_ITEM (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX (1 << 1)
323 #define REF_ERR_NO_INODE_REF (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
326 #define REF_ERR_DUP_INODE_REF (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
335 struct file_extent_hole {
341 struct inode_record {
342 struct list_head backrefs;
343 unsigned int checked:1;
344 unsigned int merging:1;
345 unsigned int found_inode_item:1;
346 unsigned int found_dir_item:1;
347 unsigned int found_file_extent:1;
348 unsigned int found_csum_item:1;
349 unsigned int some_csum_missing:1;
350 unsigned int nodatasum:1;
363 struct rb_root holes;
364 struct list_head orphan_extents;
369 #define I_ERR_NO_INODE_ITEM (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
385 struct root_backref {
386 struct list_head list;
387 unsigned int found_dir_item:1;
388 unsigned int found_dir_index:1;
389 unsigned int found_back_ref:1;
390 unsigned int found_forward_ref:1;
391 unsigned int reachable:1;
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 return list_entry(entry, struct root_backref, list);
406 struct list_head backrefs;
407 struct cache_extent cache;
408 unsigned int found_root_item:1;
414 struct cache_extent cache;
419 struct cache_extent cache;
420 struct cache_tree root_cache;
421 struct cache_tree inode_cache;
422 struct inode_record *current;
431 struct walk_control {
432 struct cache_tree shared;
433 struct shared_node *nodes[BTRFS_MAX_LEVEL];
439 struct btrfs_key key;
441 struct list_head list;
444 struct extent_entry {
449 struct list_head list;
452 struct root_item_info {
453 /* level of the root */
455 /* number of nodes at this level, must be 1 for a root */
459 struct cache_extent cache_extent;
463 * Error bit for low memory mode check.
465 * Currently no caller cares about it yet. Just internal use for error
468 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH (1 << 8)
479 static void *print_status_check(void *p)
481 struct task_ctx *priv = p;
482 const char work_indicator[] = { '.', 'o', 'O', 'o' };
484 static char *task_position_string[] = {
486 "checking free space cache",
490 task_period_start(priv->info, 1000 /* 1s */);
492 if (priv->tp == TASK_NOTHING)
496 printf("%s [%c]\r", task_position_string[priv->tp],
497 work_indicator[count % 4]);
500 task_period_wait(priv->info);
505 static int print_status_return(void *p)
513 static enum btrfs_check_mode parse_check_mode(const char *str)
515 if (strcmp(str, "lowmem") == 0)
516 return CHECK_MODE_LOWMEM;
517 if (strcmp(str, "orig") == 0)
518 return CHECK_MODE_ORIGINAL;
519 if (strcmp(str, "original") == 0)
520 return CHECK_MODE_ORIGINAL;
522 return CHECK_MODE_UNKNOWN;
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
528 struct file_extent_hole *hole;
530 if (RB_EMPTY_ROOT(holes))
533 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 struct file_extent_hole *hole1;
540 struct file_extent_hole *hole2;
542 hole1 = rb_entry(node1, struct file_extent_hole, node);
543 hole2 = rb_entry(node2, struct file_extent_hole, node);
545 if (hole1->start > hole2->start)
547 if (hole1->start < hole2->start)
549 /* Now hole1->start == hole2->start */
550 if (hole1->len >= hole2->len)
552 * Hole 1 will be merge center
553 * Same hole will be merged later
556 /* Hole 2 will be merge center */
561 * Add a hole to the record
563 * This will do hole merge for copy_file_extent_holes(),
564 * which will ensure there won't be continuous holes.
566 static int add_file_extent_hole(struct rb_root *holes,
569 struct file_extent_hole *hole;
570 struct file_extent_hole *prev = NULL;
571 struct file_extent_hole *next = NULL;
573 hole = malloc(sizeof(*hole));
578 /* Since compare will not return 0, no -EEXIST will happen */
579 rb_insert(holes, &hole->node, compare_hole);
581 /* simple merge with previous hole */
582 if (rb_prev(&hole->node))
583 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585 if (prev && prev->start + prev->len >= hole->start) {
586 hole->len = hole->start + hole->len - prev->start;
587 hole->start = prev->start;
588 rb_erase(&prev->node, holes);
593 /* iterate merge with next holes */
595 if (!rb_next(&hole->node))
597 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599 if (hole->start + hole->len >= next->start) {
600 if (hole->start + hole->len <= next->start + next->len)
601 hole->len = next->start + next->len -
603 rb_erase(&next->node, holes);
612 static int compare_hole_range(struct rb_node *node, void *data)
614 struct file_extent_hole *hole;
617 hole = (struct file_extent_hole *)data;
620 hole = rb_entry(node, struct file_extent_hole, node);
621 if (start < hole->start)
623 if (start >= hole->start && start < hole->start + hole->len)
629 * Delete a hole in the record
631 * This will do the hole split and is much restrict than add.
633 static int del_file_extent_hole(struct rb_root *holes,
636 struct file_extent_hole *hole;
637 struct file_extent_hole tmp;
642 struct rb_node *node;
649 node = rb_search(holes, &tmp, compare_hole_range, NULL);
652 hole = rb_entry(node, struct file_extent_hole, node);
653 if (start + len > hole->start + hole->len)
657 * Now there will be no overlap, delete the hole and re-add the
658 * split(s) if they exists.
660 if (start > hole->start) {
661 prev_start = hole->start;
662 prev_len = start - hole->start;
665 if (hole->start + hole->len > start + len) {
666 next_start = start + len;
667 next_len = hole->start + hole->len - start - len;
670 rb_erase(node, holes);
673 ret = add_file_extent_hole(holes, prev_start, prev_len);
678 ret = add_file_extent_hole(holes, next_start, next_len);
685 static int copy_file_extent_holes(struct rb_root *dst,
688 struct file_extent_hole *hole;
689 struct rb_node *node;
692 node = rb_first(src);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 ret = add_file_extent_hole(dst, hole->start, hole->len);
698 node = rb_next(node);
703 static void free_file_extent_holes(struct rb_root *holes)
705 struct rb_node *node;
706 struct file_extent_hole *hole;
708 node = rb_first(holes);
710 hole = rb_entry(node, struct file_extent_hole, node);
711 rb_erase(node, holes);
713 node = rb_first(holes);
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720 struct btrfs_root *root)
722 if (root->last_trans != trans->transid) {
723 root->track_dirty = 1;
724 root->last_trans = trans->transid;
725 root->commit_root = root->node;
726 extent_buffer_get(root->node);
730 static u8 imode_to_type(u32 imode)
733 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
735 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
736 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
737 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
738 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
739 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
740 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
743 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 struct device_record *rec1;
750 struct device_record *rec2;
752 rec1 = rb_entry(node1, struct device_record, node);
753 rec2 = rb_entry(node2, struct device_record, node);
754 if (rec1->devid > rec2->devid)
756 else if (rec1->devid < rec2->devid)
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 struct inode_record *rec;
765 struct inode_backref *backref;
766 struct inode_backref *orig;
767 struct inode_backref *tmp;
768 struct orphan_data_extent *src_orphan;
769 struct orphan_data_extent *dst_orphan;
774 rec = malloc(sizeof(*rec));
776 return ERR_PTR(-ENOMEM);
777 memcpy(rec, orig_rec, sizeof(*rec));
779 INIT_LIST_HEAD(&rec->backrefs);
780 INIT_LIST_HEAD(&rec->orphan_extents);
781 rec->holes = RB_ROOT;
783 list_for_each_entry(orig, &orig_rec->backrefs, list) {
784 size = sizeof(*orig) + orig->namelen + 1;
785 backref = malloc(size);
790 memcpy(backref, orig, size);
791 list_add_tail(&backref->list, &rec->backrefs);
793 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794 dst_orphan = malloc(sizeof(*dst_orphan));
799 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
809 rb = rb_first(&rec->holes);
811 struct file_extent_hole *hole;
813 hole = rb_entry(rb, struct file_extent_hole, node);
819 if (!list_empty(&rec->backrefs))
820 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821 list_del(&orig->list);
825 if (!list_empty(&rec->orphan_extents))
826 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827 list_del(&orig->list);
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
839 struct orphan_data_extent *orphan;
841 if (list_empty(orphan_extents))
843 printf("The following data extent is lost in tree %llu:\n",
845 list_for_each_entry(orphan, orphan_extents, list) {
846 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847 orphan->objectid, orphan->offset, orphan->disk_bytenr,
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 u64 root_objectid = root->root_key.objectid;
855 int errors = rec->errors;
859 /* reloc root errors, we print its corresponding fs root objectid*/
860 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861 root_objectid = root->root_key.offset;
862 fprintf(stderr, "reloc");
864 fprintf(stderr, "root %llu inode %llu errors %x",
865 (unsigned long long) root_objectid,
866 (unsigned long long) rec->ino, rec->errors);
868 if (errors & I_ERR_NO_INODE_ITEM)
869 fprintf(stderr, ", no inode item");
870 if (errors & I_ERR_NO_ORPHAN_ITEM)
871 fprintf(stderr, ", no orphan item");
872 if (errors & I_ERR_DUP_INODE_ITEM)
873 fprintf(stderr, ", dup inode item");
874 if (errors & I_ERR_DUP_DIR_INDEX)
875 fprintf(stderr, ", dup dir index");
876 if (errors & I_ERR_ODD_DIR_ITEM)
877 fprintf(stderr, ", odd dir item");
878 if (errors & I_ERR_ODD_FILE_EXTENT)
879 fprintf(stderr, ", odd file extent");
880 if (errors & I_ERR_BAD_FILE_EXTENT)
881 fprintf(stderr, ", bad file extent");
882 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883 fprintf(stderr, ", file extent overlap");
884 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885 fprintf(stderr, ", file extent discount");
886 if (errors & I_ERR_DIR_ISIZE_WRONG)
887 fprintf(stderr, ", dir isize wrong");
888 if (errors & I_ERR_FILE_NBYTES_WRONG)
889 fprintf(stderr, ", nbytes wrong");
890 if (errors & I_ERR_ODD_CSUM_ITEM)
891 fprintf(stderr, ", odd csum item");
892 if (errors & I_ERR_SOME_CSUM_MISSING)
893 fprintf(stderr, ", some csum missing");
894 if (errors & I_ERR_LINK_COUNT_WRONG)
895 fprintf(stderr, ", link count wrong");
896 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897 fprintf(stderr, ", orphan file extent");
898 fprintf(stderr, "\n");
899 /* Print the orphan extents if needed */
900 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903 /* Print the holes if needed */
904 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905 struct file_extent_hole *hole;
906 struct rb_node *node;
909 node = rb_first(&rec->holes);
910 fprintf(stderr, "Found file extent holes:\n");
913 hole = rb_entry(node, struct file_extent_hole, node);
914 fprintf(stderr, "\tstart: %llu, len: %llu\n",
915 hole->start, hole->len);
916 node = rb_next(node);
919 fprintf(stderr, "\tstart: 0, len: %llu\n",
921 root->fs_info->sectorsize));
925 static void print_ref_error(int errors)
927 if (errors & REF_ERR_NO_DIR_ITEM)
928 fprintf(stderr, ", no dir item");
929 if (errors & REF_ERR_NO_DIR_INDEX)
930 fprintf(stderr, ", no dir index");
931 if (errors & REF_ERR_NO_INODE_REF)
932 fprintf(stderr, ", no inode ref");
933 if (errors & REF_ERR_DUP_DIR_ITEM)
934 fprintf(stderr, ", dup dir item");
935 if (errors & REF_ERR_DUP_DIR_INDEX)
936 fprintf(stderr, ", dup dir index");
937 if (errors & REF_ERR_DUP_INODE_REF)
938 fprintf(stderr, ", dup inode ref");
939 if (errors & REF_ERR_INDEX_UNMATCH)
940 fprintf(stderr, ", index mismatch");
941 if (errors & REF_ERR_FILETYPE_UNMATCH)
942 fprintf(stderr, ", filetype mismatch");
943 if (errors & REF_ERR_NAME_TOO_LONG)
944 fprintf(stderr, ", name too long");
945 if (errors & REF_ERR_NO_ROOT_REF)
946 fprintf(stderr, ", no root ref");
947 if (errors & REF_ERR_NO_ROOT_BACKREF)
948 fprintf(stderr, ", no root backref");
949 if (errors & REF_ERR_DUP_ROOT_REF)
950 fprintf(stderr, ", dup root ref");
951 if (errors & REF_ERR_DUP_ROOT_BACKREF)
952 fprintf(stderr, ", dup root backref");
953 fprintf(stderr, "\n");
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
959 struct ptr_node *node;
960 struct cache_extent *cache;
961 struct inode_record *rec = NULL;
964 cache = lookup_cache_extent(inode_cache, ino, 1);
966 node = container_of(cache, struct ptr_node, cache);
968 if (mod && rec->refs > 1) {
969 node->data = clone_inode_rec(rec);
970 if (IS_ERR(node->data))
976 rec = calloc(1, sizeof(*rec));
978 return ERR_PTR(-ENOMEM);
980 rec->extent_start = (u64)-1;
982 INIT_LIST_HEAD(&rec->backrefs);
983 INIT_LIST_HEAD(&rec->orphan_extents);
984 rec->holes = RB_ROOT;
986 node = malloc(sizeof(*node));
989 return ERR_PTR(-ENOMEM);
991 node->cache.start = ino;
992 node->cache.size = 1;
995 if (ino == BTRFS_FREE_INO_OBJECTID)
998 ret = insert_cache_extent(inode_cache, &node->cache);
1000 return ERR_PTR(-EEXIST);
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 struct orphan_data_extent *orphan;
1009 while (!list_empty(orphan_extents)) {
1010 orphan = list_entry(orphan_extents->next,
1011 struct orphan_data_extent, list);
1012 list_del(&orphan->list);
1017 static void free_inode_rec(struct inode_record *rec)
1019 struct inode_backref *backref;
1021 if (--rec->refs > 0)
1024 while (!list_empty(&rec->backrefs)) {
1025 backref = to_inode_backref(rec->backrefs.next);
1026 list_del(&backref->list);
1029 free_orphan_data_extents(&rec->orphan_extents);
1030 free_file_extent_holes(&rec->holes);
1034 static int can_free_inode_rec(struct inode_record *rec)
1036 if (!rec->errors && rec->checked && rec->found_inode_item &&
1037 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043 struct inode_record *rec)
1045 struct cache_extent *cache;
1046 struct inode_backref *tmp, *backref;
1047 struct ptr_node *node;
1050 if (!rec->found_inode_item)
1053 filetype = imode_to_type(rec->imode);
1054 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055 if (backref->found_dir_item && backref->found_dir_index) {
1056 if (backref->filetype != filetype)
1057 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058 if (!backref->errors && backref->found_inode_ref &&
1059 rec->nlink == rec->found_link) {
1060 list_del(&backref->list);
1066 if (!rec->checked || rec->merging)
1069 if (S_ISDIR(rec->imode)) {
1070 if (rec->found_size != rec->isize)
1071 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072 if (rec->found_file_extent)
1073 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075 if (rec->found_dir_item)
1076 rec->errors |= I_ERR_ODD_DIR_ITEM;
1077 if (rec->found_size != rec->nbytes)
1078 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079 if (rec->nlink > 0 && !no_holes &&
1080 (rec->extent_end < rec->isize ||
1081 first_extent_gap(&rec->holes) < rec->isize))
1082 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1085 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086 if (rec->found_csum_item && rec->nodatasum)
1087 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088 if (rec->some_csum_missing && !rec->nodatasum)
1089 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1092 BUG_ON(rec->refs != 1);
1093 if (can_free_inode_rec(rec)) {
1094 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095 node = container_of(cache, struct ptr_node, cache);
1096 BUG_ON(node->data != rec);
1097 remove_cache_extent(inode_cache, &node->cache);
1099 free_inode_rec(rec);
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 struct btrfs_path path;
1106 struct btrfs_key key;
1109 key.objectid = BTRFS_ORPHAN_OBJECTID;
1110 key.type = BTRFS_ORPHAN_ITEM_KEY;
1113 btrfs_init_path(&path);
1114 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115 btrfs_release_path(&path);
1121 static int process_inode_item(struct extent_buffer *eb,
1122 int slot, struct btrfs_key *key,
1123 struct shared_node *active_node)
1125 struct inode_record *rec;
1126 struct btrfs_inode_item *item;
1128 rec = active_node->current;
1129 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130 if (rec->found_inode_item) {
1131 rec->errors |= I_ERR_DUP_INODE_ITEM;
1134 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135 rec->nlink = btrfs_inode_nlink(eb, item);
1136 rec->isize = btrfs_inode_size(eb, item);
1137 rec->nbytes = btrfs_inode_nbytes(eb, item);
1138 rec->imode = btrfs_inode_mode(eb, item);
1139 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141 rec->found_inode_item = 1;
1142 if (rec->nlink == 0)
1143 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144 maybe_free_inode_rec(&active_node->inode_cache, rec);
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150 int namelen, u64 dir)
1152 struct inode_backref *backref;
1154 list_for_each_entry(backref, &rec->backrefs, list) {
1155 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157 if (backref->dir != dir || backref->namelen != namelen)
1159 if (memcmp(name, backref->name, namelen))
1164 backref = malloc(sizeof(*backref) + namelen + 1);
1167 memset(backref, 0, sizeof(*backref));
1169 backref->namelen = namelen;
1170 memcpy(backref->name, name, namelen);
1171 backref->name[namelen] = '\0';
1172 list_add_tail(&backref->list, &rec->backrefs);
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177 u64 ino, u64 dir, u64 index,
1178 const char *name, int namelen,
1179 u8 filetype, u8 itemtype, int errors)
1181 struct inode_record *rec;
1182 struct inode_backref *backref;
1184 rec = get_inode_rec(inode_cache, ino, 1);
1185 BUG_ON(IS_ERR(rec));
1186 backref = get_inode_backref(rec, name, namelen, dir);
1189 backref->errors |= errors;
1190 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191 if (backref->found_dir_index)
1192 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193 if (backref->found_inode_ref && backref->index != index)
1194 backref->errors |= REF_ERR_INDEX_UNMATCH;
1195 if (backref->found_dir_item && backref->filetype != filetype)
1196 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198 backref->index = index;
1199 backref->filetype = filetype;
1200 backref->found_dir_index = 1;
1201 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203 if (backref->found_dir_item)
1204 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205 if (backref->found_dir_index && backref->filetype != filetype)
1206 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208 backref->filetype = filetype;
1209 backref->found_dir_item = 1;
1210 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212 if (backref->found_inode_ref)
1213 backref->errors |= REF_ERR_DUP_INODE_REF;
1214 if (backref->found_dir_index && backref->index != index)
1215 backref->errors |= REF_ERR_INDEX_UNMATCH;
1217 backref->index = index;
1219 backref->ref_type = itemtype;
1220 backref->found_inode_ref = 1;
1225 maybe_free_inode_rec(inode_cache, rec);
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230 struct cache_tree *dst_cache)
1232 struct inode_backref *backref;
1237 list_for_each_entry(backref, &src->backrefs, list) {
1238 if (backref->found_dir_index) {
1239 add_inode_backref(dst_cache, dst->ino, backref->dir,
1240 backref->index, backref->name,
1241 backref->namelen, backref->filetype,
1242 BTRFS_DIR_INDEX_KEY, backref->errors);
1244 if (backref->found_dir_item) {
1246 add_inode_backref(dst_cache, dst->ino,
1247 backref->dir, 0, backref->name,
1248 backref->namelen, backref->filetype,
1249 BTRFS_DIR_ITEM_KEY, backref->errors);
1251 if (backref->found_inode_ref) {
1252 add_inode_backref(dst_cache, dst->ino,
1253 backref->dir, backref->index,
1254 backref->name, backref->namelen, 0,
1255 backref->ref_type, backref->errors);
1259 if (src->found_dir_item)
1260 dst->found_dir_item = 1;
1261 if (src->found_file_extent)
1262 dst->found_file_extent = 1;
1263 if (src->found_csum_item)
1264 dst->found_csum_item = 1;
1265 if (src->some_csum_missing)
1266 dst->some_csum_missing = 1;
1267 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1273 BUG_ON(src->found_link < dir_count);
1274 dst->found_link += src->found_link - dir_count;
1275 dst->found_size += src->found_size;
1276 if (src->extent_start != (u64)-1) {
1277 if (dst->extent_start == (u64)-1) {
1278 dst->extent_start = src->extent_start;
1279 dst->extent_end = src->extent_end;
1281 if (dst->extent_end > src->extent_start)
1282 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283 else if (dst->extent_end < src->extent_start) {
1284 ret = add_file_extent_hole(&dst->holes,
1286 src->extent_start - dst->extent_end);
1288 if (dst->extent_end < src->extent_end)
1289 dst->extent_end = src->extent_end;
1293 dst->errors |= src->errors;
1294 if (src->found_inode_item) {
1295 if (!dst->found_inode_item) {
1296 dst->nlink = src->nlink;
1297 dst->isize = src->isize;
1298 dst->nbytes = src->nbytes;
1299 dst->imode = src->imode;
1300 dst->nodatasum = src->nodatasum;
1301 dst->found_inode_item = 1;
1303 dst->errors |= I_ERR_DUP_INODE_ITEM;
1311 static int splice_shared_node(struct shared_node *src_node,
1312 struct shared_node *dst_node)
1314 struct cache_extent *cache;
1315 struct ptr_node *node, *ins;
1316 struct cache_tree *src, *dst;
1317 struct inode_record *rec, *conflict;
1318 u64 current_ino = 0;
1322 if (--src_node->refs == 0)
1324 if (src_node->current)
1325 current_ino = src_node->current->ino;
1327 src = &src_node->root_cache;
1328 dst = &dst_node->root_cache;
1330 cache = search_cache_extent(src, 0);
1332 node = container_of(cache, struct ptr_node, cache);
1334 cache = next_cache_extent(cache);
1337 remove_cache_extent(src, &node->cache);
1340 ins = malloc(sizeof(*ins));
1342 ins->cache.start = node->cache.start;
1343 ins->cache.size = node->cache.size;
1347 ret = insert_cache_extent(dst, &ins->cache);
1348 if (ret == -EEXIST) {
1349 conflict = get_inode_rec(dst, rec->ino, 1);
1350 BUG_ON(IS_ERR(conflict));
1351 merge_inode_recs(rec, conflict, dst);
1353 conflict->checked = 1;
1354 if (dst_node->current == conflict)
1355 dst_node->current = NULL;
1357 maybe_free_inode_rec(dst, conflict);
1358 free_inode_rec(rec);
1365 if (src == &src_node->root_cache) {
1366 src = &src_node->inode_cache;
1367 dst = &dst_node->inode_cache;
1371 if (current_ino > 0 && (!dst_node->current ||
1372 current_ino > dst_node->current->ino)) {
1373 if (dst_node->current) {
1374 dst_node->current->checked = 1;
1375 maybe_free_inode_rec(dst, dst_node->current);
1377 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378 BUG_ON(IS_ERR(dst_node->current));
1383 static void free_inode_ptr(struct cache_extent *cache)
1385 struct ptr_node *node;
1386 struct inode_record *rec;
1388 node = container_of(cache, struct ptr_node, cache);
1390 free_inode_rec(rec);
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1399 struct cache_extent *cache;
1400 struct shared_node *node;
1402 cache = lookup_cache_extent(shared, bytenr, 1);
1404 node = container_of(cache, struct shared_node, cache);
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1413 struct shared_node *node;
1415 node = calloc(1, sizeof(*node));
1418 node->cache.start = bytenr;
1419 node->cache.size = 1;
1420 cache_tree_init(&node->root_cache);
1421 cache_tree_init(&node->inode_cache);
1424 ret = insert_cache_extent(shared, &node->cache);
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430 struct walk_control *wc, int level)
1432 struct shared_node *node;
1433 struct shared_node *dest;
1436 if (level == wc->active_node)
1439 BUG_ON(wc->active_node <= level);
1440 node = find_shared_node(&wc->shared, bytenr);
1442 ret = add_shared_node(&wc->shared, bytenr, refs);
1444 node = find_shared_node(&wc->shared, bytenr);
1445 wc->nodes[level] = node;
1446 wc->active_node = level;
1450 if (wc->root_level == wc->active_node &&
1451 btrfs_root_refs(&root->root_item) == 0) {
1452 if (--node->refs == 0) {
1453 free_inode_recs_tree(&node->root_cache);
1454 free_inode_recs_tree(&node->inode_cache);
1455 remove_cache_extent(&wc->shared, &node->cache);
1461 dest = wc->nodes[wc->active_node];
1462 splice_shared_node(node, dest);
1463 if (node->refs == 0) {
1464 remove_cache_extent(&wc->shared, &node->cache);
1470 static int leave_shared_node(struct btrfs_root *root,
1471 struct walk_control *wc, int level)
1473 struct shared_node *node;
1474 struct shared_node *dest;
1477 if (level == wc->root_level)
1480 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1484 BUG_ON(i >= BTRFS_MAX_LEVEL);
1486 node = wc->nodes[wc->active_node];
1487 wc->nodes[wc->active_node] = NULL;
1488 wc->active_node = i;
1490 dest = wc->nodes[wc->active_node];
1491 if (wc->active_node < wc->root_level ||
1492 btrfs_root_refs(&root->root_item) > 0) {
1493 BUG_ON(node->refs <= 1);
1494 splice_shared_node(node, dest);
1496 BUG_ON(node->refs < 2);
1505 * 1 - if the root with id child_root_id is a child of root parent_root_id
1506 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1507 * has other root(s) as parent(s)
1508 * 2 - if the root child_root_id doesn't have any parent roots
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1513 struct btrfs_path path;
1514 struct btrfs_key key;
1515 struct extent_buffer *leaf;
1519 btrfs_init_path(&path);
1521 key.objectid = parent_root_id;
1522 key.type = BTRFS_ROOT_REF_KEY;
1523 key.offset = child_root_id;
1524 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1528 btrfs_release_path(&path);
1532 key.objectid = child_root_id;
1533 key.type = BTRFS_ROOT_BACKREF_KEY;
1535 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1541 leaf = path.nodes[0];
1542 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1546 leaf = path.nodes[0];
1549 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550 if (key.objectid != child_root_id ||
1551 key.type != BTRFS_ROOT_BACKREF_KEY)
1556 if (key.offset == parent_root_id) {
1557 btrfs_release_path(&path);
1564 btrfs_release_path(&path);
1567 return has_parent ? 0 : 2;
1570 static int process_dir_item(struct extent_buffer *eb,
1571 int slot, struct btrfs_key *key,
1572 struct shared_node *active_node)
1582 struct btrfs_dir_item *di;
1583 struct inode_record *rec;
1584 struct cache_tree *root_cache;
1585 struct cache_tree *inode_cache;
1586 struct btrfs_key location;
1587 char namebuf[BTRFS_NAME_LEN];
1589 root_cache = &active_node->root_cache;
1590 inode_cache = &active_node->inode_cache;
1591 rec = active_node->current;
1592 rec->found_dir_item = 1;
1594 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595 total = btrfs_item_size_nr(eb, slot);
1596 while (cur < total) {
1598 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599 name_len = btrfs_dir_name_len(eb, di);
1600 data_len = btrfs_dir_data_len(eb, di);
1601 filetype = btrfs_dir_type(eb, di);
1603 rec->found_size += name_len;
1604 if (cur + sizeof(*di) + name_len > total ||
1605 name_len > BTRFS_NAME_LEN) {
1606 error = REF_ERR_NAME_TOO_LONG;
1608 if (cur + sizeof(*di) > total)
1610 len = min_t(u32, total - cur - sizeof(*di),
1617 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620 key->offset != btrfs_name_hash(namebuf, len)) {
1621 rec->errors |= I_ERR_ODD_DIR_ITEM;
1622 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623 key->objectid, key->offset, namebuf, len, filetype,
1624 key->offset, btrfs_name_hash(namebuf, len));
1627 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628 add_inode_backref(inode_cache, location.objectid,
1629 key->objectid, key->offset, namebuf,
1630 len, filetype, key->type, error);
1631 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632 add_inode_backref(root_cache, location.objectid,
1633 key->objectid, key->offset,
1634 namebuf, len, filetype,
1637 fprintf(stderr, "invalid location in dir item %u\n",
1639 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640 key->objectid, key->offset, namebuf,
1641 len, filetype, key->type, error);
1644 len = sizeof(*di) + name_len + data_len;
1645 di = (struct btrfs_dir_item *)((char *)di + len);
1648 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649 rec->errors |= I_ERR_DUP_DIR_INDEX;
1654 static int process_inode_ref(struct extent_buffer *eb,
1655 int slot, struct btrfs_key *key,
1656 struct shared_node *active_node)
1664 struct cache_tree *inode_cache;
1665 struct btrfs_inode_ref *ref;
1666 char namebuf[BTRFS_NAME_LEN];
1668 inode_cache = &active_node->inode_cache;
1670 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671 total = btrfs_item_size_nr(eb, slot);
1672 while (cur < total) {
1673 name_len = btrfs_inode_ref_name_len(eb, ref);
1674 index = btrfs_inode_ref_index(eb, ref);
1676 /* inode_ref + namelen should not cross item boundary */
1677 if (cur + sizeof(*ref) + name_len > total ||
1678 name_len > BTRFS_NAME_LEN) {
1679 if (total < cur + sizeof(*ref))
1682 /* Still try to read out the remaining part */
1683 len = min_t(u32, total - cur - sizeof(*ref),
1685 error = REF_ERR_NAME_TOO_LONG;
1691 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692 add_inode_backref(inode_cache, key->objectid, key->offset,
1693 index, namebuf, len, 0, key->type, error);
1695 len = sizeof(*ref) + name_len;
1696 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1702 static int process_inode_extref(struct extent_buffer *eb,
1703 int slot, struct btrfs_key *key,
1704 struct shared_node *active_node)
1713 struct cache_tree *inode_cache;
1714 struct btrfs_inode_extref *extref;
1715 char namebuf[BTRFS_NAME_LEN];
1717 inode_cache = &active_node->inode_cache;
1719 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720 total = btrfs_item_size_nr(eb, slot);
1721 while (cur < total) {
1722 name_len = btrfs_inode_extref_name_len(eb, extref);
1723 index = btrfs_inode_extref_index(eb, extref);
1724 parent = btrfs_inode_extref_parent(eb, extref);
1725 if (name_len <= BTRFS_NAME_LEN) {
1729 len = BTRFS_NAME_LEN;
1730 error = REF_ERR_NAME_TOO_LONG;
1732 read_extent_buffer(eb, namebuf,
1733 (unsigned long)(extref + 1), len);
1734 add_inode_backref(inode_cache, key->objectid, parent,
1735 index, namebuf, len, 0, key->type, error);
1737 len = sizeof(*extref) + name_len;
1738 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746 u64 len, u64 *found)
1748 struct btrfs_key key;
1749 struct btrfs_path path;
1750 struct extent_buffer *leaf;
1755 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1757 btrfs_init_path(&path);
1759 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761 key.type = BTRFS_EXTENT_CSUM_KEY;
1763 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1767 if (ret > 0 && path.slots[0] > 0) {
1768 leaf = path.nodes[0];
1769 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771 key.type == BTRFS_EXTENT_CSUM_KEY)
1776 leaf = path.nodes[0];
1777 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1783 leaf = path.nodes[0];
1786 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788 key.type != BTRFS_EXTENT_CSUM_KEY)
1791 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792 if (key.offset >= start + len)
1795 if (key.offset > start)
1798 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799 csum_end = key.offset + (size / csum_size) *
1800 root->fs_info->sectorsize;
1801 if (csum_end > start) {
1802 size = min(csum_end - start, len);
1811 btrfs_release_path(&path);
1817 static int process_file_extent(struct btrfs_root *root,
1818 struct extent_buffer *eb,
1819 int slot, struct btrfs_key *key,
1820 struct shared_node *active_node)
1822 struct inode_record *rec;
1823 struct btrfs_file_extent_item *fi;
1825 u64 disk_bytenr = 0;
1826 u64 extent_offset = 0;
1827 u64 mask = root->fs_info->sectorsize - 1;
1831 rec = active_node->current;
1832 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833 rec->found_file_extent = 1;
1835 if (rec->extent_start == (u64)-1) {
1836 rec->extent_start = key->offset;
1837 rec->extent_end = key->offset;
1840 if (rec->extent_end > key->offset)
1841 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842 else if (rec->extent_end < key->offset) {
1843 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844 key->offset - rec->extent_end);
1849 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850 extent_type = btrfs_file_extent_type(eb, fi);
1852 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856 rec->found_size += num_bytes;
1857 num_bytes = (num_bytes + mask) & ~mask;
1858 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862 extent_offset = btrfs_file_extent_offset(eb, fi);
1863 if (num_bytes == 0 || (num_bytes & mask))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (num_bytes + extent_offset >
1866 btrfs_file_extent_ram_bytes(eb, fi))
1867 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869 (btrfs_file_extent_compression(eb, fi) ||
1870 btrfs_file_extent_encryption(eb, fi) ||
1871 btrfs_file_extent_other_encoding(eb, fi)))
1872 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873 if (disk_bytenr > 0)
1874 rec->found_size += num_bytes;
1876 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1878 rec->extent_end = key->offset + num_bytes;
1881 * The data reloc tree will copy full extents into its inode and then
1882 * copy the corresponding csums. Because the extent it copied could be
1883 * a preallocated extent that hasn't been written to yet there may be no
1884 * csums to copy, ergo we won't have csums for our file extent. This is
1885 * ok so just don't bother checking csums if the inode belongs to the
1888 if (disk_bytenr > 0 &&
1889 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891 if (btrfs_file_extent_compression(eb, fi))
1892 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894 disk_bytenr += extent_offset;
1896 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1899 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901 rec->found_csum_item = 1;
1902 if (found < num_bytes)
1903 rec->some_csum_missing = 1;
1904 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913 struct walk_control *wc)
1915 struct btrfs_key key;
1919 struct cache_tree *inode_cache;
1920 struct shared_node *active_node;
1922 if (wc->root_level == wc->active_node &&
1923 btrfs_root_refs(&root->root_item) == 0)
1926 active_node = wc->nodes[wc->active_node];
1927 inode_cache = &active_node->inode_cache;
1928 nritems = btrfs_header_nritems(eb);
1929 for (i = 0; i < nritems; i++) {
1930 btrfs_item_key_to_cpu(eb, &key, i);
1932 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1937 if (active_node->current == NULL ||
1938 active_node->current->ino < key.objectid) {
1939 if (active_node->current) {
1940 active_node->current->checked = 1;
1941 maybe_free_inode_rec(inode_cache,
1942 active_node->current);
1944 active_node->current = get_inode_rec(inode_cache,
1946 BUG_ON(IS_ERR(active_node->current));
1949 case BTRFS_DIR_ITEM_KEY:
1950 case BTRFS_DIR_INDEX_KEY:
1951 ret = process_dir_item(eb, i, &key, active_node);
1953 case BTRFS_INODE_REF_KEY:
1954 ret = process_inode_ref(eb, i, &key, active_node);
1956 case BTRFS_INODE_EXTREF_KEY:
1957 ret = process_inode_extref(eb, i, &key, active_node);
1959 case BTRFS_INODE_ITEM_KEY:
1960 ret = process_inode_item(eb, i, &key, active_node);
1962 case BTRFS_EXTENT_DATA_KEY:
1963 ret = process_file_extent(root, eb, i, &key,
1974 u64 bytenr[BTRFS_MAX_LEVEL];
1975 u64 refs[BTRFS_MAX_LEVEL];
1976 int need_check[BTRFS_MAX_LEVEL];
1979 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1980 struct node_refs *nrefs, u64 level);
1981 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1982 unsigned int ext_ref);
1985 * Returns >0 Found error, not fatal, should continue
1986 * Returns <0 Fatal error, must exit the whole check
1987 * Returns 0 No errors found
1989 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1990 struct node_refs *nrefs, int *level, int ext_ref)
1992 struct extent_buffer *cur = path->nodes[0];
1993 struct btrfs_key key;
1997 int root_level = btrfs_header_level(root->node);
1999 int ret = 0; /* Final return value */
2000 int err = 0; /* Positive error bitmap */
2002 cur_bytenr = cur->start;
2004 /* skip to first inode item or the first inode number change */
2005 nritems = btrfs_header_nritems(cur);
2006 for (i = 0; i < nritems; i++) {
2007 btrfs_item_key_to_cpu(cur, &key, i);
2009 first_ino = key.objectid;
2010 if (key.type == BTRFS_INODE_ITEM_KEY ||
2011 (first_ino && first_ino != key.objectid))
2015 path->slots[0] = nritems;
2021 err |= check_inode_item(root, path, ext_ref);
2023 /* modify cur since check_inode_item may change path */
2024 cur = path->nodes[0];
2026 if (err & LAST_ITEM)
2029 /* still have inode items in thie leaf */
2030 if (cur->start == cur_bytenr)
2034 * we have switched to another leaf, above nodes may
2035 * have changed, here walk down the path, if a node
2036 * or leaf is shared, check whether we can skip this
2039 for (i = root_level; i >= 0; i--) {
2040 if (path->nodes[i]->start == nrefs->bytenr[i])
2043 ret = update_nodes_refs(root,
2044 path->nodes[i]->start,
2049 if (!nrefs->need_check[i]) {
2055 for (i = 0; i < *level; i++) {
2056 free_extent_buffer(path->nodes[i]);
2057 path->nodes[i] = NULL;
2066 static void reada_walk_down(struct btrfs_root *root,
2067 struct extent_buffer *node, int slot)
2069 struct btrfs_fs_info *fs_info = root->fs_info;
2076 level = btrfs_header_level(node);
2080 nritems = btrfs_header_nritems(node);
2081 for (i = slot; i < nritems; i++) {
2082 bytenr = btrfs_node_blockptr(node, i);
2083 ptr_gen = btrfs_node_ptr_generation(node, i);
2084 readahead_tree_block(fs_info, bytenr, ptr_gen);
2089 * Check the child node/leaf by the following condition:
2090 * 1. the first item key of the node/leaf should be the same with the one
2092 * 2. block in parent node should match the child node/leaf.
2093 * 3. generation of parent node and child's header should be consistent.
2095 * Or the child node/leaf pointed by the key in parent is not valid.
2097 * We hope to check leaf owner too, but since subvol may share leaves,
2098 * which makes leaf owner check not so strong, key check should be
2099 * sufficient enough for that case.
2101 static int check_child_node(struct extent_buffer *parent, int slot,
2102 struct extent_buffer *child)
2104 struct btrfs_key parent_key;
2105 struct btrfs_key child_key;
2108 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2109 if (btrfs_header_level(child) == 0)
2110 btrfs_item_key_to_cpu(child, &child_key, 0);
2112 btrfs_node_key_to_cpu(child, &child_key, 0);
2114 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2117 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2118 parent_key.objectid, parent_key.type, parent_key.offset,
2119 child_key.objectid, child_key.type, child_key.offset);
2121 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2123 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2124 btrfs_node_blockptr(parent, slot),
2125 btrfs_header_bytenr(child));
2127 if (btrfs_node_ptr_generation(parent, slot) !=
2128 btrfs_header_generation(child)) {
2130 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2131 btrfs_header_generation(child),
2132 btrfs_node_ptr_generation(parent, slot));
2138 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2139 * in every fs or file tree check. Here we find its all root ids, and only check
2140 * it in the fs or file tree which has the smallest root id.
2142 static int need_check(struct btrfs_root *root, struct ulist *roots)
2144 struct rb_node *node;
2145 struct ulist_node *u;
2147 if (roots->nnodes == 1)
2150 node = rb_first(&roots->root);
2151 u = rb_entry(node, struct ulist_node, rb_node);
2153 * current root id is not smallest, we skip it and let it be checked
2154 * in the fs or file tree who hash the smallest root id.
2156 if (root->objectid != u->val)
2163 * for a tree node or leaf, we record its reference count, so later if we still
2164 * process this node or leaf, don't need to compute its reference count again.
2166 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2167 struct node_refs *nrefs, u64 level)
2171 struct ulist *roots;
2173 if (nrefs->bytenr[level] != bytenr) {
2174 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2175 level, 1, &refs, NULL);
2179 nrefs->bytenr[level] = bytenr;
2180 nrefs->refs[level] = refs;
2182 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2187 check = need_check(root, roots);
2189 nrefs->need_check[level] = check;
2191 nrefs->need_check[level] = 1;
2198 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2199 struct walk_control *wc, int *level,
2200 struct node_refs *nrefs)
2202 enum btrfs_tree_block_status status;
2205 struct btrfs_fs_info *fs_info = root->fs_info;
2206 struct extent_buffer *next;
2207 struct extent_buffer *cur;
2211 WARN_ON(*level < 0);
2212 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2214 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2215 refs = nrefs->refs[*level];
2218 ret = btrfs_lookup_extent_info(NULL, root,
2219 path->nodes[*level]->start,
2220 *level, 1, &refs, NULL);
2225 nrefs->bytenr[*level] = path->nodes[*level]->start;
2226 nrefs->refs[*level] = refs;
2230 ret = enter_shared_node(root, path->nodes[*level]->start,
2238 while (*level >= 0) {
2239 WARN_ON(*level < 0);
2240 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2241 cur = path->nodes[*level];
2243 if (btrfs_header_level(cur) != *level)
2246 if (path->slots[*level] >= btrfs_header_nritems(cur))
2249 ret = process_one_leaf(root, cur, wc);
2254 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2255 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2257 if (bytenr == nrefs->bytenr[*level - 1]) {
2258 refs = nrefs->refs[*level - 1];
2260 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2261 *level - 1, 1, &refs, NULL);
2265 nrefs->bytenr[*level - 1] = bytenr;
2266 nrefs->refs[*level - 1] = refs;
2271 ret = enter_shared_node(root, bytenr, refs,
2274 path->slots[*level]++;
2279 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2280 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281 free_extent_buffer(next);
2282 reada_walk_down(root, cur, path->slots[*level]);
2283 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2284 if (!extent_buffer_uptodate(next)) {
2285 struct btrfs_key node_key;
2287 btrfs_node_key_to_cpu(path->nodes[*level],
2289 path->slots[*level]);
2290 btrfs_add_corrupt_extent_record(root->fs_info,
2292 path->nodes[*level]->start,
2293 root->fs_info->nodesize,
2300 ret = check_child_node(cur, path->slots[*level], next);
2302 free_extent_buffer(next);
2307 if (btrfs_is_leaf(next))
2308 status = btrfs_check_leaf(root, NULL, next);
2310 status = btrfs_check_node(root, NULL, next);
2311 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2312 free_extent_buffer(next);
2317 *level = *level - 1;
2318 free_extent_buffer(path->nodes[*level]);
2319 path->nodes[*level] = next;
2320 path->slots[*level] = 0;
2323 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2327 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2328 unsigned int ext_ref);
2331 * Returns >0 Found error, should continue
2332 * Returns <0 Fatal error, must exit the whole check
2333 * Returns 0 No errors found
2335 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2336 int *level, struct node_refs *nrefs, int ext_ref)
2338 enum btrfs_tree_block_status status;
2341 struct btrfs_fs_info *fs_info = root->fs_info;
2342 struct extent_buffer *next;
2343 struct extent_buffer *cur;
2346 WARN_ON(*level < 0);
2347 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2349 ret = update_nodes_refs(root, path->nodes[*level]->start,
2354 while (*level >= 0) {
2355 WARN_ON(*level < 0);
2356 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2357 cur = path->nodes[*level];
2359 if (btrfs_header_level(cur) != *level)
2362 if (path->slots[*level] >= btrfs_header_nritems(cur))
2364 /* Don't forgot to check leaf/node validation */
2366 ret = btrfs_check_leaf(root, NULL, cur);
2367 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2371 ret = process_one_leaf_v2(root, path, nrefs,
2373 cur = path->nodes[*level];
2376 ret = btrfs_check_node(root, NULL, cur);
2377 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2382 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2383 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2385 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2388 if (!nrefs->need_check[*level - 1]) {
2389 path->slots[*level]++;
2393 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2394 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2395 free_extent_buffer(next);
2396 reada_walk_down(root, cur, path->slots[*level]);
2397 next = read_tree_block(fs_info, bytenr, ptr_gen);
2398 if (!extent_buffer_uptodate(next)) {
2399 struct btrfs_key node_key;
2401 btrfs_node_key_to_cpu(path->nodes[*level],
2403 path->slots[*level]);
2404 btrfs_add_corrupt_extent_record(fs_info,
2406 path->nodes[*level]->start,
2414 ret = check_child_node(cur, path->slots[*level], next);
2418 if (btrfs_is_leaf(next))
2419 status = btrfs_check_leaf(root, NULL, next);
2421 status = btrfs_check_node(root, NULL, next);
2422 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2423 free_extent_buffer(next);
2428 *level = *level - 1;
2429 free_extent_buffer(path->nodes[*level]);
2430 path->nodes[*level] = next;
2431 path->slots[*level] = 0;
2436 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2437 struct walk_control *wc, int *level)
2440 struct extent_buffer *leaf;
2442 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2443 leaf = path->nodes[i];
2444 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2449 free_extent_buffer(path->nodes[*level]);
2450 path->nodes[*level] = NULL;
2451 BUG_ON(*level > wc->active_node);
2452 if (*level == wc->active_node)
2453 leave_shared_node(root, wc, *level);
2460 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2464 struct extent_buffer *leaf;
2466 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2467 leaf = path->nodes[i];
2468 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2473 free_extent_buffer(path->nodes[*level]);
2474 path->nodes[*level] = NULL;
2481 static int check_root_dir(struct inode_record *rec)
2483 struct inode_backref *backref;
2486 if (!rec->found_inode_item || rec->errors)
2488 if (rec->nlink != 1 || rec->found_link != 0)
2490 if (list_empty(&rec->backrefs))
2492 backref = to_inode_backref(rec->backrefs.next);
2493 if (!backref->found_inode_ref)
2495 if (backref->index != 0 || backref->namelen != 2 ||
2496 memcmp(backref->name, "..", 2))
2498 if (backref->found_dir_index || backref->found_dir_item)
2505 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2506 struct btrfs_root *root, struct btrfs_path *path,
2507 struct inode_record *rec)
2509 struct btrfs_inode_item *ei;
2510 struct btrfs_key key;
2513 key.objectid = rec->ino;
2514 key.type = BTRFS_INODE_ITEM_KEY;
2515 key.offset = (u64)-1;
2517 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2521 if (!path->slots[0]) {
2528 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2529 if (key.objectid != rec->ino) {
2534 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2535 struct btrfs_inode_item);
2536 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2537 btrfs_mark_buffer_dirty(path->nodes[0]);
2538 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2539 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2540 root->root_key.objectid);
2542 btrfs_release_path(path);
2546 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2547 struct btrfs_root *root,
2548 struct btrfs_path *path,
2549 struct inode_record *rec)
2553 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2554 btrfs_release_path(path);
2556 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2560 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2561 struct btrfs_root *root,
2562 struct btrfs_path *path,
2563 struct inode_record *rec)
2565 struct btrfs_inode_item *ei;
2566 struct btrfs_key key;
2569 key.objectid = rec->ino;
2570 key.type = BTRFS_INODE_ITEM_KEY;
2573 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2580 /* Since ret == 0, no need to check anything */
2581 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582 struct btrfs_inode_item);
2583 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2584 btrfs_mark_buffer_dirty(path->nodes[0]);
2585 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2586 printf("reset nbytes for ino %llu root %llu\n",
2587 rec->ino, root->root_key.objectid);
2589 btrfs_release_path(path);
2593 static int add_missing_dir_index(struct btrfs_root *root,
2594 struct cache_tree *inode_cache,
2595 struct inode_record *rec,
2596 struct inode_backref *backref)
2598 struct btrfs_path path;
2599 struct btrfs_trans_handle *trans;
2600 struct btrfs_dir_item *dir_item;
2601 struct extent_buffer *leaf;
2602 struct btrfs_key key;
2603 struct btrfs_disk_key disk_key;
2604 struct inode_record *dir_rec;
2605 unsigned long name_ptr;
2606 u32 data_size = sizeof(*dir_item) + backref->namelen;
2609 trans = btrfs_start_transaction(root, 1);
2611 return PTR_ERR(trans);
2613 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2614 (unsigned long long)rec->ino);
2616 btrfs_init_path(&path);
2617 key.objectid = backref->dir;
2618 key.type = BTRFS_DIR_INDEX_KEY;
2619 key.offset = backref->index;
2620 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2623 leaf = path.nodes[0];
2624 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2626 disk_key.objectid = cpu_to_le64(rec->ino);
2627 disk_key.type = BTRFS_INODE_ITEM_KEY;
2628 disk_key.offset = 0;
2630 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2631 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2632 btrfs_set_dir_data_len(leaf, dir_item, 0);
2633 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2634 name_ptr = (unsigned long)(dir_item + 1);
2635 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2636 btrfs_mark_buffer_dirty(leaf);
2637 btrfs_release_path(&path);
2638 btrfs_commit_transaction(trans, root);
2640 backref->found_dir_index = 1;
2641 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2642 BUG_ON(IS_ERR(dir_rec));
2645 dir_rec->found_size += backref->namelen;
2646 if (dir_rec->found_size == dir_rec->isize &&
2647 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2648 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2649 if (dir_rec->found_size != dir_rec->isize)
2650 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2655 static int delete_dir_index(struct btrfs_root *root,
2656 struct inode_backref *backref)
2658 struct btrfs_trans_handle *trans;
2659 struct btrfs_dir_item *di;
2660 struct btrfs_path path;
2663 trans = btrfs_start_transaction(root, 1);
2665 return PTR_ERR(trans);
2667 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2668 (unsigned long long)backref->dir,
2669 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2670 (unsigned long long)root->objectid);
2672 btrfs_init_path(&path);
2673 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2674 backref->name, backref->namelen,
2675 backref->index, -1);
2678 btrfs_release_path(&path);
2679 btrfs_commit_transaction(trans, root);
2686 ret = btrfs_del_item(trans, root, &path);
2688 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2690 btrfs_release_path(&path);
2691 btrfs_commit_transaction(trans, root);
2695 static int __create_inode_item(struct btrfs_trans_handle *trans,
2696 struct btrfs_root *root, u64 ino, u64 size,
2697 u64 nbytes, u64 nlink, u32 mode)
2699 struct btrfs_inode_item ii;
2700 time_t now = time(NULL);
2703 btrfs_set_stack_inode_size(&ii, size);
2704 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2705 btrfs_set_stack_inode_nlink(&ii, nlink);
2706 btrfs_set_stack_inode_mode(&ii, mode);
2707 btrfs_set_stack_inode_generation(&ii, trans->transid);
2708 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2709 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2710 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2711 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2712 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2713 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2714 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2716 ret = btrfs_insert_inode(trans, root, ino, &ii);
2719 warning("root %llu inode %llu recreating inode item, this may "
2720 "be incomplete, please check permissions and content after "
2721 "the fsck completes.\n", (unsigned long long)root->objectid,
2722 (unsigned long long)ino);
2727 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2728 struct btrfs_root *root, u64 ino,
2731 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2733 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2736 static int create_inode_item(struct btrfs_root *root,
2737 struct inode_record *rec, int root_dir)
2739 struct btrfs_trans_handle *trans;
2745 trans = btrfs_start_transaction(root, 1);
2746 if (IS_ERR(trans)) {
2747 ret = PTR_ERR(trans);
2751 nlink = root_dir ? 1 : rec->found_link;
2752 if (rec->found_dir_item) {
2753 if (rec->found_file_extent)
2754 fprintf(stderr, "root %llu inode %llu has both a dir "
2755 "item and extents, unsure if it is a dir or a "
2756 "regular file so setting it as a directory\n",
2757 (unsigned long long)root->objectid,
2758 (unsigned long long)rec->ino);
2759 mode = S_IFDIR | 0755;
2760 size = rec->found_size;
2761 } else if (!rec->found_dir_item) {
2762 size = rec->extent_end;
2763 mode = S_IFREG | 0755;
2766 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2768 btrfs_commit_transaction(trans, root);
2772 static int repair_inode_backrefs(struct btrfs_root *root,
2773 struct inode_record *rec,
2774 struct cache_tree *inode_cache,
2777 struct inode_backref *tmp, *backref;
2778 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2782 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2783 if (!delete && rec->ino == root_dirid) {
2784 if (!rec->found_inode_item) {
2785 ret = create_inode_item(root, rec, 1);
2792 /* Index 0 for root dir's are special, don't mess with it */
2793 if (rec->ino == root_dirid && backref->index == 0)
2797 ((backref->found_dir_index && !backref->found_inode_ref) ||
2798 (backref->found_dir_index && backref->found_inode_ref &&
2799 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2800 ret = delete_dir_index(root, backref);
2804 list_del(&backref->list);
2809 if (!delete && !backref->found_dir_index &&
2810 backref->found_dir_item && backref->found_inode_ref) {
2811 ret = add_missing_dir_index(root, inode_cache, rec,
2816 if (backref->found_dir_item &&
2817 backref->found_dir_index) {
2818 if (!backref->errors &&
2819 backref->found_inode_ref) {
2820 list_del(&backref->list);
2827 if (!delete && (!backref->found_dir_index &&
2828 !backref->found_dir_item &&
2829 backref->found_inode_ref)) {
2830 struct btrfs_trans_handle *trans;
2831 struct btrfs_key location;
2833 ret = check_dir_conflict(root, backref->name,
2839 * let nlink fixing routine to handle it,
2840 * which can do it better.
2845 location.objectid = rec->ino;
2846 location.type = BTRFS_INODE_ITEM_KEY;
2847 location.offset = 0;
2849 trans = btrfs_start_transaction(root, 1);
2850 if (IS_ERR(trans)) {
2851 ret = PTR_ERR(trans);
2854 fprintf(stderr, "adding missing dir index/item pair "
2856 (unsigned long long)rec->ino);
2857 ret = btrfs_insert_dir_item(trans, root, backref->name,
2859 backref->dir, &location,
2860 imode_to_type(rec->imode),
2863 btrfs_commit_transaction(trans, root);
2867 if (!delete && (backref->found_inode_ref &&
2868 backref->found_dir_index &&
2869 backref->found_dir_item &&
2870 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2871 !rec->found_inode_item)) {
2872 ret = create_inode_item(root, rec, 0);
2879 return ret ? ret : repaired;
2883 * To determine the file type for nlink/inode_item repair
2885 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2886 * Return -ENOENT if file type is not found.
2888 static int find_file_type(struct inode_record *rec, u8 *type)
2890 struct inode_backref *backref;
2892 /* For inode item recovered case */
2893 if (rec->found_inode_item) {
2894 *type = imode_to_type(rec->imode);
2898 list_for_each_entry(backref, &rec->backrefs, list) {
2899 if (backref->found_dir_index || backref->found_dir_item) {
2900 *type = backref->filetype;
2908 * To determine the file name for nlink repair
2910 * Return 0 if file name is found, set name and namelen.
2911 * Return -ENOENT if file name is not found.
2913 static int find_file_name(struct inode_record *rec,
2914 char *name, int *namelen)
2916 struct inode_backref *backref;
2918 list_for_each_entry(backref, &rec->backrefs, list) {
2919 if (backref->found_dir_index || backref->found_dir_item ||
2920 backref->found_inode_ref) {
2921 memcpy(name, backref->name, backref->namelen);
2922 *namelen = backref->namelen;
2929 /* Reset the nlink of the inode to the correct one */
2930 static int reset_nlink(struct btrfs_trans_handle *trans,
2931 struct btrfs_root *root,
2932 struct btrfs_path *path,
2933 struct inode_record *rec)
2935 struct inode_backref *backref;
2936 struct inode_backref *tmp;
2937 struct btrfs_key key;
2938 struct btrfs_inode_item *inode_item;
2941 /* We don't believe this either, reset it and iterate backref */
2942 rec->found_link = 0;
2944 /* Remove all backref including the valid ones */
2945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2946 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2947 backref->index, backref->name,
2948 backref->namelen, 0);
2952 /* remove invalid backref, so it won't be added back */
2953 if (!(backref->found_dir_index &&
2954 backref->found_dir_item &&
2955 backref->found_inode_ref)) {
2956 list_del(&backref->list);
2963 /* Set nlink to 0 */
2964 key.objectid = rec->ino;
2965 key.type = BTRFS_INODE_ITEM_KEY;
2967 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2974 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2975 struct btrfs_inode_item);
2976 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2977 btrfs_mark_buffer_dirty(path->nodes[0]);
2978 btrfs_release_path(path);
2981 * Add back valid inode_ref/dir_item/dir_index,
2982 * add_link() will handle the nlink inc, so new nlink must be correct
2984 list_for_each_entry(backref, &rec->backrefs, list) {
2985 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2986 backref->name, backref->namelen,
2987 backref->filetype, &backref->index, 1, 0);
2992 btrfs_release_path(path);
2996 static int get_highest_inode(struct btrfs_trans_handle *trans,
2997 struct btrfs_root *root,
2998 struct btrfs_path *path,
3001 struct btrfs_key key, found_key;
3004 btrfs_init_path(path);
3005 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3007 key.type = BTRFS_INODE_ITEM_KEY;
3008 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3010 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3011 path->slots[0] - 1);
3012 *highest_ino = found_key.objectid;
3015 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3017 btrfs_release_path(path);
3022 * Link inode to dir 'lost+found'. Increase @ref_count.
3024 * Returns 0 means success.
3025 * Returns <0 means failure.
3027 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3028 struct btrfs_root *root,
3029 struct btrfs_path *path,
3030 u64 ino, char *namebuf, u32 name_len,
3031 u8 filetype, u64 *ref_count)
3033 char *dir_name = "lost+found";
3038 btrfs_release_path(path);
3039 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3044 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3045 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3048 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3051 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3052 namebuf, name_len, filetype, NULL, 1, 0);
3054 * Add ".INO" suffix several times to handle case where
3055 * "FILENAME.INO" is already taken by another file.
3057 while (ret == -EEXIST) {
3059 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3061 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3065 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3067 name_len += count_digits(ino) + 1;
3068 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3069 name_len, filetype, NULL, 1, 0);
3072 error("failed to link the inode %llu to %s dir: %s",
3073 ino, dir_name, strerror(-ret));
3078 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3079 name_len, namebuf, dir_name);
3081 btrfs_release_path(path);
3083 error("failed to move file '%.*s' to '%s' dir", name_len,
3088 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3089 struct btrfs_root *root,
3090 struct btrfs_path *path,
3091 struct inode_record *rec)
3093 char namebuf[BTRFS_NAME_LEN] = {0};
3096 int name_recovered = 0;
3097 int type_recovered = 0;
3101 * Get file name and type first before these invalid inode ref
3102 * are deleted by remove_all_invalid_backref()
3104 name_recovered = !find_file_name(rec, namebuf, &namelen);
3105 type_recovered = !find_file_type(rec, &type);
3107 if (!name_recovered) {
3108 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3109 rec->ino, rec->ino);
3110 namelen = count_digits(rec->ino);
3111 sprintf(namebuf, "%llu", rec->ino);
3114 if (!type_recovered) {
3115 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3117 type = BTRFS_FT_REG_FILE;
3121 ret = reset_nlink(trans, root, path, rec);
3124 "Failed to reset nlink for inode %llu: %s\n",
3125 rec->ino, strerror(-ret));
3129 if (rec->found_link == 0) {
3130 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3131 namebuf, namelen, type,
3132 (u64 *)&rec->found_link);
3136 printf("Fixed the nlink of inode %llu\n", rec->ino);
3139 * Clear the flag anyway, or we will loop forever for the same inode
3140 * as it will not be removed from the bad inode list and the dead loop
3143 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3144 btrfs_release_path(path);
3149 * Check if there is any normal(reg or prealloc) file extent for given
3151 * This is used to determine the file type when neither its dir_index/item or
3152 * inode_item exists.
3154 * This will *NOT* report error, if any error happens, just consider it does
3155 * not have any normal file extent.
3157 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3159 struct btrfs_path path;
3160 struct btrfs_key key;
3161 struct btrfs_key found_key;
3162 struct btrfs_file_extent_item *fi;
3166 btrfs_init_path(&path);
3168 key.type = BTRFS_EXTENT_DATA_KEY;
3171 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3176 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3177 ret = btrfs_next_leaf(root, &path);
3184 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3186 if (found_key.objectid != ino ||
3187 found_key.type != BTRFS_EXTENT_DATA_KEY)
3189 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3190 struct btrfs_file_extent_item);
3191 type = btrfs_file_extent_type(path.nodes[0], fi);
3192 if (type != BTRFS_FILE_EXTENT_INLINE) {
3198 btrfs_release_path(&path);
3202 static u32 btrfs_type_to_imode(u8 type)
3204 static u32 imode_by_btrfs_type[] = {
3205 [BTRFS_FT_REG_FILE] = S_IFREG,
3206 [BTRFS_FT_DIR] = S_IFDIR,
3207 [BTRFS_FT_CHRDEV] = S_IFCHR,
3208 [BTRFS_FT_BLKDEV] = S_IFBLK,
3209 [BTRFS_FT_FIFO] = S_IFIFO,
3210 [BTRFS_FT_SOCK] = S_IFSOCK,
3211 [BTRFS_FT_SYMLINK] = S_IFLNK,
3214 return imode_by_btrfs_type[(type)];
3217 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3218 struct btrfs_root *root,
3219 struct btrfs_path *path,
3220 struct inode_record *rec)
3224 int type_recovered = 0;
3227 printf("Trying to rebuild inode:%llu\n", rec->ino);
3229 type_recovered = !find_file_type(rec, &filetype);
3232 * Try to determine inode type if type not found.
3234 * For found regular file extent, it must be FILE.
3235 * For found dir_item/index, it must be DIR.
3237 * For undetermined one, use FILE as fallback.
3240 * 1. If found backref(inode_index/item is already handled) to it,
3242 * Need new inode-inode ref structure to allow search for that.
3244 if (!type_recovered) {
3245 if (rec->found_file_extent &&
3246 find_normal_file_extent(root, rec->ino)) {
3248 filetype = BTRFS_FT_REG_FILE;
3249 } else if (rec->found_dir_item) {
3251 filetype = BTRFS_FT_DIR;
3252 } else if (!list_empty(&rec->orphan_extents)) {
3254 filetype = BTRFS_FT_REG_FILE;
3256 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3259 filetype = BTRFS_FT_REG_FILE;
3263 ret = btrfs_new_inode(trans, root, rec->ino,
3264 mode | btrfs_type_to_imode(filetype));
3269 * Here inode rebuild is done, we only rebuild the inode item,
3270 * don't repair the nlink(like move to lost+found).
3271 * That is the job of nlink repair.
3273 * We just fill the record and return
3275 rec->found_dir_item = 1;
3276 rec->imode = mode | btrfs_type_to_imode(filetype);
3278 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3279 /* Ensure the inode_nlinks repair function will be called */
3280 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3285 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3286 struct btrfs_root *root,
3287 struct btrfs_path *path,
3288 struct inode_record *rec)
3290 struct orphan_data_extent *orphan;
3291 struct orphan_data_extent *tmp;
3294 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3296 * Check for conflicting file extents
3298 * Here we don't know whether the extents is compressed or not,
3299 * so we can only assume it not compressed nor data offset,
3300 * and use its disk_len as extent length.
3302 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3303 orphan->offset, orphan->disk_len, 0);
3304 btrfs_release_path(path);
3309 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3310 orphan->disk_bytenr, orphan->disk_len);
3311 ret = btrfs_free_extent(trans,
3312 root->fs_info->extent_root,
3313 orphan->disk_bytenr, orphan->disk_len,
3314 0, root->objectid, orphan->objectid,
3319 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3320 orphan->offset, orphan->disk_bytenr,
3321 orphan->disk_len, orphan->disk_len);
3325 /* Update file size info */
3326 rec->found_size += orphan->disk_len;
3327 if (rec->found_size == rec->nbytes)
3328 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3330 /* Update the file extent hole info too */
3331 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3335 if (RB_EMPTY_ROOT(&rec->holes))
3336 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3338 list_del(&orphan->list);
3341 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3346 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3347 struct btrfs_root *root,
3348 struct btrfs_path *path,
3349 struct inode_record *rec)
3351 struct rb_node *node;
3352 struct file_extent_hole *hole;
3356 node = rb_first(&rec->holes);
3360 hole = rb_entry(node, struct file_extent_hole, node);
3361 ret = btrfs_punch_hole(trans, root, rec->ino,
3362 hole->start, hole->len);
3365 ret = del_file_extent_hole(&rec->holes, hole->start,
3369 if (RB_EMPTY_ROOT(&rec->holes))
3370 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3371 node = rb_first(&rec->holes);
3373 /* special case for a file losing all its file extent */
3375 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3376 round_up(rec->isize,
3377 root->fs_info->sectorsize));
3381 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3382 rec->ino, root->objectid);
3387 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3389 struct btrfs_trans_handle *trans;
3390 struct btrfs_path path;
3393 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3394 I_ERR_NO_ORPHAN_ITEM |
3395 I_ERR_LINK_COUNT_WRONG |
3396 I_ERR_NO_INODE_ITEM |
3397 I_ERR_FILE_EXTENT_ORPHAN |
3398 I_ERR_FILE_EXTENT_DISCOUNT|
3399 I_ERR_FILE_NBYTES_WRONG)))
3403 * For nlink repair, it may create a dir and add link, so
3404 * 2 for parent(256)'s dir_index and dir_item
3405 * 2 for lost+found dir's inode_item and inode_ref
3406 * 1 for the new inode_ref of the file
3407 * 2 for lost+found dir's dir_index and dir_item for the file
3409 trans = btrfs_start_transaction(root, 7);
3411 return PTR_ERR(trans);
3413 btrfs_init_path(&path);
3414 if (rec->errors & I_ERR_NO_INODE_ITEM)
3415 ret = repair_inode_no_item(trans, root, &path, rec);
3416 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3417 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3418 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3419 ret = repair_inode_discount_extent(trans, root, &path, rec);
3420 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3421 ret = repair_inode_isize(trans, root, &path, rec);
3422 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3423 ret = repair_inode_orphan_item(trans, root, &path, rec);
3424 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3425 ret = repair_inode_nlinks(trans, root, &path, rec);
3426 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3427 ret = repair_inode_nbytes(trans, root, &path, rec);
3428 btrfs_commit_transaction(trans, root);
3429 btrfs_release_path(&path);
3433 static int check_inode_recs(struct btrfs_root *root,
3434 struct cache_tree *inode_cache)
3436 struct cache_extent *cache;
3437 struct ptr_node *node;
3438 struct inode_record *rec;
3439 struct inode_backref *backref;
3444 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3446 if (btrfs_root_refs(&root->root_item) == 0) {
3447 if (!cache_tree_empty(inode_cache))
3448 fprintf(stderr, "warning line %d\n", __LINE__);
3453 * We need to repair backrefs first because we could change some of the
3454 * errors in the inode recs.
3456 * We also need to go through and delete invalid backrefs first and then
3457 * add the correct ones second. We do this because we may get EEXIST
3458 * when adding back the correct index because we hadn't yet deleted the
3461 * For example, if we were missing a dir index then the directories
3462 * isize would be wrong, so if we fixed the isize to what we thought it
3463 * would be and then fixed the backref we'd still have a invalid fs, so
3464 * we need to add back the dir index and then check to see if the isize
3469 if (stage == 3 && !err)
3472 cache = search_cache_extent(inode_cache, 0);
3473 while (repair && cache) {
3474 node = container_of(cache, struct ptr_node, cache);
3476 cache = next_cache_extent(cache);
3478 /* Need to free everything up and rescan */
3480 remove_cache_extent(inode_cache, &node->cache);
3482 free_inode_rec(rec);
3486 if (list_empty(&rec->backrefs))
3489 ret = repair_inode_backrefs(root, rec, inode_cache,
3503 rec = get_inode_rec(inode_cache, root_dirid, 0);
3504 BUG_ON(IS_ERR(rec));
3506 ret = check_root_dir(rec);
3508 fprintf(stderr, "root %llu root dir %llu error\n",
3509 (unsigned long long)root->root_key.objectid,
3510 (unsigned long long)root_dirid);
3511 print_inode_error(root, rec);
3516 struct btrfs_trans_handle *trans;
3518 trans = btrfs_start_transaction(root, 1);
3519 if (IS_ERR(trans)) {
3520 err = PTR_ERR(trans);
3525 "root %llu missing its root dir, recreating\n",
3526 (unsigned long long)root->objectid);
3528 ret = btrfs_make_root_dir(trans, root, root_dirid);
3531 btrfs_commit_transaction(trans, root);
3535 fprintf(stderr, "root %llu root dir %llu not found\n",
3536 (unsigned long long)root->root_key.objectid,
3537 (unsigned long long)root_dirid);
3541 cache = search_cache_extent(inode_cache, 0);
3544 node = container_of(cache, struct ptr_node, cache);
3546 remove_cache_extent(inode_cache, &node->cache);
3548 if (rec->ino == root_dirid ||
3549 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3550 free_inode_rec(rec);
3554 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3555 ret = check_orphan_item(root, rec->ino);
3557 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3558 if (can_free_inode_rec(rec)) {
3559 free_inode_rec(rec);
3564 if (!rec->found_inode_item)
3565 rec->errors |= I_ERR_NO_INODE_ITEM;
3566 if (rec->found_link != rec->nlink)
3567 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3569 ret = try_repair_inode(root, rec);
3570 if (ret == 0 && can_free_inode_rec(rec)) {
3571 free_inode_rec(rec);
3577 if (!(repair && ret == 0))
3579 print_inode_error(root, rec);
3580 list_for_each_entry(backref, &rec->backrefs, list) {
3581 if (!backref->found_dir_item)
3582 backref->errors |= REF_ERR_NO_DIR_ITEM;
3583 if (!backref->found_dir_index)
3584 backref->errors |= REF_ERR_NO_DIR_INDEX;
3585 if (!backref->found_inode_ref)
3586 backref->errors |= REF_ERR_NO_INODE_REF;
3587 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3588 " namelen %u name %s filetype %d errors %x",
3589 (unsigned long long)backref->dir,
3590 (unsigned long long)backref->index,
3591 backref->namelen, backref->name,
3592 backref->filetype, backref->errors);
3593 print_ref_error(backref->errors);
3595 free_inode_rec(rec);
3597 return (error > 0) ? -1 : 0;
3600 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3603 struct cache_extent *cache;
3604 struct root_record *rec = NULL;
3607 cache = lookup_cache_extent(root_cache, objectid, 1);
3609 rec = container_of(cache, struct root_record, cache);
3611 rec = calloc(1, sizeof(*rec));
3613 return ERR_PTR(-ENOMEM);
3614 rec->objectid = objectid;
3615 INIT_LIST_HEAD(&rec->backrefs);
3616 rec->cache.start = objectid;
3617 rec->cache.size = 1;
3619 ret = insert_cache_extent(root_cache, &rec->cache);
3621 return ERR_PTR(-EEXIST);
3626 static struct root_backref *get_root_backref(struct root_record *rec,
3627 u64 ref_root, u64 dir, u64 index,
3628 const char *name, int namelen)
3630 struct root_backref *backref;
3632 list_for_each_entry(backref, &rec->backrefs, list) {
3633 if (backref->ref_root != ref_root || backref->dir != dir ||
3634 backref->namelen != namelen)
3636 if (memcmp(name, backref->name, namelen))
3641 backref = calloc(1, sizeof(*backref) + namelen + 1);
3644 backref->ref_root = ref_root;
3646 backref->index = index;
3647 backref->namelen = namelen;
3648 memcpy(backref->name, name, namelen);
3649 backref->name[namelen] = '\0';
3650 list_add_tail(&backref->list, &rec->backrefs);
3654 static void free_root_record(struct cache_extent *cache)
3656 struct root_record *rec;
3657 struct root_backref *backref;
3659 rec = container_of(cache, struct root_record, cache);
3660 while (!list_empty(&rec->backrefs)) {
3661 backref = to_root_backref(rec->backrefs.next);
3662 list_del(&backref->list);
3669 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3671 static int add_root_backref(struct cache_tree *root_cache,
3672 u64 root_id, u64 ref_root, u64 dir, u64 index,
3673 const char *name, int namelen,
3674 int item_type, int errors)
3676 struct root_record *rec;
3677 struct root_backref *backref;
3679 rec = get_root_rec(root_cache, root_id);
3680 BUG_ON(IS_ERR(rec));
3681 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3684 backref->errors |= errors;
3686 if (item_type != BTRFS_DIR_ITEM_KEY) {
3687 if (backref->found_dir_index || backref->found_back_ref ||
3688 backref->found_forward_ref) {
3689 if (backref->index != index)
3690 backref->errors |= REF_ERR_INDEX_UNMATCH;
3692 backref->index = index;
3696 if (item_type == BTRFS_DIR_ITEM_KEY) {
3697 if (backref->found_forward_ref)
3699 backref->found_dir_item = 1;
3700 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3701 backref->found_dir_index = 1;
3702 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3703 if (backref->found_forward_ref)
3704 backref->errors |= REF_ERR_DUP_ROOT_REF;
3705 else if (backref->found_dir_item)
3707 backref->found_forward_ref = 1;
3708 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3709 if (backref->found_back_ref)
3710 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3711 backref->found_back_ref = 1;
3716 if (backref->found_forward_ref && backref->found_dir_item)
3717 backref->reachable = 1;
3721 static int merge_root_recs(struct btrfs_root *root,
3722 struct cache_tree *src_cache,
3723 struct cache_tree *dst_cache)
3725 struct cache_extent *cache;
3726 struct ptr_node *node;
3727 struct inode_record *rec;
3728 struct inode_backref *backref;
3731 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3732 free_inode_recs_tree(src_cache);
3737 cache = search_cache_extent(src_cache, 0);
3740 node = container_of(cache, struct ptr_node, cache);
3742 remove_cache_extent(src_cache, &node->cache);
3745 ret = is_child_root(root, root->objectid, rec->ino);
3751 list_for_each_entry(backref, &rec->backrefs, list) {
3752 BUG_ON(backref->found_inode_ref);
3753 if (backref->found_dir_item)
3754 add_root_backref(dst_cache, rec->ino,
3755 root->root_key.objectid, backref->dir,
3756 backref->index, backref->name,
3757 backref->namelen, BTRFS_DIR_ITEM_KEY,
3759 if (backref->found_dir_index)
3760 add_root_backref(dst_cache, rec->ino,
3761 root->root_key.objectid, backref->dir,
3762 backref->index, backref->name,
3763 backref->namelen, BTRFS_DIR_INDEX_KEY,
3767 free_inode_rec(rec);
3774 static int check_root_refs(struct btrfs_root *root,
3775 struct cache_tree *root_cache)
3777 struct root_record *rec;
3778 struct root_record *ref_root;
3779 struct root_backref *backref;
3780 struct cache_extent *cache;
3786 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3787 BUG_ON(IS_ERR(rec));
3790 /* fixme: this can not detect circular references */
3793 cache = search_cache_extent(root_cache, 0);
3797 rec = container_of(cache, struct root_record, cache);
3798 cache = next_cache_extent(cache);
3800 if (rec->found_ref == 0)
3803 list_for_each_entry(backref, &rec->backrefs, list) {
3804 if (!backref->reachable)
3807 ref_root = get_root_rec(root_cache,
3809 BUG_ON(IS_ERR(ref_root));
3810 if (ref_root->found_ref > 0)
3813 backref->reachable = 0;
3815 if (rec->found_ref == 0)
3821 cache = search_cache_extent(root_cache, 0);
3825 rec = container_of(cache, struct root_record, cache);
3826 cache = next_cache_extent(cache);
3828 if (rec->found_ref == 0 &&
3829 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3830 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3831 ret = check_orphan_item(root->fs_info->tree_root,
3837 * If we don't have a root item then we likely just have
3838 * a dir item in a snapshot for this root but no actual
3839 * ref key or anything so it's meaningless.
3841 if (!rec->found_root_item)
3844 fprintf(stderr, "fs tree %llu not referenced\n",
3845 (unsigned long long)rec->objectid);
3849 if (rec->found_ref > 0 && !rec->found_root_item)
3851 list_for_each_entry(backref, &rec->backrefs, list) {
3852 if (!backref->found_dir_item)
3853 backref->errors |= REF_ERR_NO_DIR_ITEM;
3854 if (!backref->found_dir_index)
3855 backref->errors |= REF_ERR_NO_DIR_INDEX;
3856 if (!backref->found_back_ref)
3857 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3858 if (!backref->found_forward_ref)
3859 backref->errors |= REF_ERR_NO_ROOT_REF;
3860 if (backref->reachable && backref->errors)
3867 fprintf(stderr, "fs tree %llu refs %u %s\n",
3868 (unsigned long long)rec->objectid, rec->found_ref,
3869 rec->found_root_item ? "" : "not found");
3871 list_for_each_entry(backref, &rec->backrefs, list) {
3872 if (!backref->reachable)
3874 if (!backref->errors && rec->found_root_item)
3876 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3877 " index %llu namelen %u name %s errors %x\n",
3878 (unsigned long long)backref->ref_root,
3879 (unsigned long long)backref->dir,
3880 (unsigned long long)backref->index,
3881 backref->namelen, backref->name,
3883 print_ref_error(backref->errors);
3886 return errors > 0 ? 1 : 0;
3889 static int process_root_ref(struct extent_buffer *eb, int slot,
3890 struct btrfs_key *key,
3891 struct cache_tree *root_cache)
3897 struct btrfs_root_ref *ref;
3898 char namebuf[BTRFS_NAME_LEN];
3901 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3903 dirid = btrfs_root_ref_dirid(eb, ref);
3904 index = btrfs_root_ref_sequence(eb, ref);
3905 name_len = btrfs_root_ref_name_len(eb, ref);
3907 if (name_len <= BTRFS_NAME_LEN) {
3911 len = BTRFS_NAME_LEN;
3912 error = REF_ERR_NAME_TOO_LONG;
3914 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3916 if (key->type == BTRFS_ROOT_REF_KEY) {
3917 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3918 index, namebuf, len, key->type, error);
3920 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3921 index, namebuf, len, key->type, error);
3926 static void free_corrupt_block(struct cache_extent *cache)
3928 struct btrfs_corrupt_block *corrupt;
3930 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3934 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3937 * Repair the btree of the given root.
3939 * The fix is to remove the node key in corrupt_blocks cache_tree.
3940 * and rebalance the tree.
3941 * After the fix, the btree should be writeable.
3943 static int repair_btree(struct btrfs_root *root,
3944 struct cache_tree *corrupt_blocks)
3946 struct btrfs_trans_handle *trans;
3947 struct btrfs_path path;
3948 struct btrfs_corrupt_block *corrupt;
3949 struct cache_extent *cache;
3950 struct btrfs_key key;
3955 if (cache_tree_empty(corrupt_blocks))
3958 trans = btrfs_start_transaction(root, 1);
3959 if (IS_ERR(trans)) {
3960 ret = PTR_ERR(trans);
3961 fprintf(stderr, "Error starting transaction: %s\n",
3965 btrfs_init_path(&path);
3966 cache = first_cache_extent(corrupt_blocks);
3968 corrupt = container_of(cache, struct btrfs_corrupt_block,
3970 level = corrupt->level;
3971 path.lowest_level = level;
3972 key.objectid = corrupt->key.objectid;
3973 key.type = corrupt->key.type;
3974 key.offset = corrupt->key.offset;
3977 * Here we don't want to do any tree balance, since it may
3978 * cause a balance with corrupted brother leaf/node,
3979 * so ins_len set to 0 here.
3980 * Balance will be done after all corrupt node/leaf is deleted.
3982 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3985 offset = btrfs_node_blockptr(path.nodes[level],
3988 /* Remove the ptr */
3989 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3993 * Remove the corresponding extent
3994 * return value is not concerned.
3996 btrfs_release_path(&path);
3997 ret = btrfs_free_extent(trans, root, offset,
3998 root->fs_info->nodesize, 0,
3999 root->root_key.objectid, level - 1, 0);
4000 cache = next_cache_extent(cache);
4003 /* Balance the btree using btrfs_search_slot() */
4004 cache = first_cache_extent(corrupt_blocks);
4006 corrupt = container_of(cache, struct btrfs_corrupt_block,
4008 memcpy(&key, &corrupt->key, sizeof(key));
4009 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4012 /* return will always >0 since it won't find the item */
4014 btrfs_release_path(&path);
4015 cache = next_cache_extent(cache);
4018 btrfs_commit_transaction(trans, root);
4019 btrfs_release_path(&path);
4023 static int check_fs_root(struct btrfs_root *root,
4024 struct cache_tree *root_cache,
4025 struct walk_control *wc)
4031 struct btrfs_path path;
4032 struct shared_node root_node;
4033 struct root_record *rec;
4034 struct btrfs_root_item *root_item = &root->root_item;
4035 struct cache_tree corrupt_blocks;
4036 struct orphan_data_extent *orphan;
4037 struct orphan_data_extent *tmp;
4038 enum btrfs_tree_block_status status;
4039 struct node_refs nrefs;
4042 * Reuse the corrupt_block cache tree to record corrupted tree block
4044 * Unlike the usage in extent tree check, here we do it in a per
4045 * fs/subvol tree base.
4047 cache_tree_init(&corrupt_blocks);
4048 root->fs_info->corrupt_blocks = &corrupt_blocks;
4050 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4051 rec = get_root_rec(root_cache, root->root_key.objectid);
4052 BUG_ON(IS_ERR(rec));
4053 if (btrfs_root_refs(root_item) > 0)
4054 rec->found_root_item = 1;
4057 btrfs_init_path(&path);
4058 memset(&root_node, 0, sizeof(root_node));
4059 cache_tree_init(&root_node.root_cache);
4060 cache_tree_init(&root_node.inode_cache);
4061 memset(&nrefs, 0, sizeof(nrefs));
4063 /* Move the orphan extent record to corresponding inode_record */
4064 list_for_each_entry_safe(orphan, tmp,
4065 &root->orphan_data_extents, list) {
4066 struct inode_record *inode;
4068 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4070 BUG_ON(IS_ERR(inode));
4071 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4072 list_move(&orphan->list, &inode->orphan_extents);
4075 level = btrfs_header_level(root->node);
4076 memset(wc->nodes, 0, sizeof(wc->nodes));
4077 wc->nodes[level] = &root_node;
4078 wc->active_node = level;
4079 wc->root_level = level;
4081 /* We may not have checked the root block, lets do that now */
4082 if (btrfs_is_leaf(root->node))
4083 status = btrfs_check_leaf(root, NULL, root->node);
4085 status = btrfs_check_node(root, NULL, root->node);
4086 if (status != BTRFS_TREE_BLOCK_CLEAN)
4089 if (btrfs_root_refs(root_item) > 0 ||
4090 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4091 path.nodes[level] = root->node;
4092 extent_buffer_get(root->node);
4093 path.slots[level] = 0;
4095 struct btrfs_key key;
4096 struct btrfs_disk_key found_key;
4098 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4099 level = root_item->drop_level;
4100 path.lowest_level = level;
4101 if (level > btrfs_header_level(root->node) ||
4102 level >= BTRFS_MAX_LEVEL) {
4103 error("ignoring invalid drop level: %u", level);
4106 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4109 btrfs_node_key(path.nodes[level], &found_key,
4111 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4112 sizeof(found_key)));
4116 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4122 wret = walk_up_tree(root, &path, wc, &level);
4129 btrfs_release_path(&path);
4131 if (!cache_tree_empty(&corrupt_blocks)) {
4132 struct cache_extent *cache;
4133 struct btrfs_corrupt_block *corrupt;
4135 printf("The following tree block(s) is corrupted in tree %llu:\n",
4136 root->root_key.objectid);
4137 cache = first_cache_extent(&corrupt_blocks);
4139 corrupt = container_of(cache,
4140 struct btrfs_corrupt_block,
4142 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4143 cache->start, corrupt->level,
4144 corrupt->key.objectid, corrupt->key.type,
4145 corrupt->key.offset);
4146 cache = next_cache_extent(cache);
4149 printf("Try to repair the btree for root %llu\n",
4150 root->root_key.objectid);
4151 ret = repair_btree(root, &corrupt_blocks);
4153 fprintf(stderr, "Failed to repair btree: %s\n",
4156 printf("Btree for root %llu is fixed\n",
4157 root->root_key.objectid);
4161 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4165 if (root_node.current) {
4166 root_node.current->checked = 1;
4167 maybe_free_inode_rec(&root_node.inode_cache,
4171 err = check_inode_recs(root, &root_node.inode_cache);
4175 free_corrupt_blocks_tree(&corrupt_blocks);
4176 root->fs_info->corrupt_blocks = NULL;
4177 free_orphan_data_extents(&root->orphan_data_extents);
4181 static int fs_root_objectid(u64 objectid)
4183 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4184 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4186 return is_fstree(objectid);
4189 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4190 struct cache_tree *root_cache)
4192 struct btrfs_path path;
4193 struct btrfs_key key;
4194 struct walk_control wc;
4195 struct extent_buffer *leaf, *tree_node;
4196 struct btrfs_root *tmp_root;
4197 struct btrfs_root *tree_root = fs_info->tree_root;
4201 if (ctx.progress_enabled) {
4202 ctx.tp = TASK_FS_ROOTS;
4203 task_start(ctx.info);
4207 * Just in case we made any changes to the extent tree that weren't
4208 * reflected into the free space cache yet.
4211 reset_cached_block_groups(fs_info);
4212 memset(&wc, 0, sizeof(wc));
4213 cache_tree_init(&wc.shared);
4214 btrfs_init_path(&path);
4219 key.type = BTRFS_ROOT_ITEM_KEY;
4220 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4225 tree_node = tree_root->node;
4227 if (tree_node != tree_root->node) {
4228 free_root_recs_tree(root_cache);
4229 btrfs_release_path(&path);
4232 leaf = path.nodes[0];
4233 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4234 ret = btrfs_next_leaf(tree_root, &path);
4240 leaf = path.nodes[0];
4242 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4243 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4244 fs_root_objectid(key.objectid)) {
4245 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4246 tmp_root = btrfs_read_fs_root_no_cache(
4249 key.offset = (u64)-1;
4250 tmp_root = btrfs_read_fs_root(
4253 if (IS_ERR(tmp_root)) {
4257 ret = check_fs_root(tmp_root, root_cache, &wc);
4258 if (ret == -EAGAIN) {
4259 free_root_recs_tree(root_cache);
4260 btrfs_release_path(&path);
4265 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4266 btrfs_free_fs_root(tmp_root);
4267 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4268 key.type == BTRFS_ROOT_BACKREF_KEY) {
4269 process_root_ref(leaf, path.slots[0], &key,
4276 btrfs_release_path(&path);
4278 free_extent_cache_tree(&wc.shared);
4279 if (!cache_tree_empty(&wc.shared))
4280 fprintf(stderr, "warning line %d\n", __LINE__);
4282 task_stop(ctx.info);
4288 * Find the @index according by @ino and name.
4289 * Notice:time efficiency is O(N)
4291 * @root: the root of the fs/file tree
4292 * @index_ret: the index as return value
4293 * @namebuf: the name to match
4294 * @name_len: the length of name to match
4295 * @file_type: the file_type of INODE_ITEM to match
4297 * Returns 0 if found and *@index_ret will be modified with right value
4298 * Returns< 0 not found and *@index_ret will be (u64)-1
4300 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4301 u64 *index_ret, char *namebuf, u32 name_len,
4304 struct btrfs_path path;
4305 struct extent_buffer *node;
4306 struct btrfs_dir_item *di;
4307 struct btrfs_key key;
4308 struct btrfs_key location;
4309 char name[BTRFS_NAME_LEN] = {0};
4321 /* search from the last index */
4322 key.objectid = dirid;
4323 key.offset = (u64)-1;
4324 key.type = BTRFS_DIR_INDEX_KEY;
4326 btrfs_init_path(&path);
4327 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4332 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4335 *index_ret = (64)-1;
4338 /* Check whether inode_id/filetype/name match */
4339 node = path.nodes[0];
4340 slot = path.slots[0];
4341 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4342 total = btrfs_item_size_nr(node, slot);
4343 while (cur < total) {
4345 len = btrfs_dir_name_len(node, di);
4346 data_len = btrfs_dir_data_len(node, di);
4348 btrfs_dir_item_key_to_cpu(node, di, &location);
4349 if (location.objectid != location_id ||
4350 location.type != BTRFS_INODE_ITEM_KEY ||
4351 location.offset != 0)
4354 filetype = btrfs_dir_type(node, di);
4355 if (file_type != filetype)
4358 if (len > BTRFS_NAME_LEN)
4359 len = BTRFS_NAME_LEN;
4361 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4362 if (len != name_len || strncmp(namebuf, name, len))
4365 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4366 *index_ret = key.offset;
4370 len += sizeof(*di) + data_len;
4371 di = (struct btrfs_dir_item *)((char *)di + len);
4377 btrfs_release_path(&path);
4382 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4383 * INODE_REF/INODE_EXTREF match.
4385 * @root: the root of the fs/file tree
4386 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4387 * value while find index
4388 * @location_key: location key of the struct btrfs_dir_item to match
4389 * @name: the name to match
4390 * @namelen: the length of name
4391 * @file_type: the type of file to math
4393 * Return 0 if no error occurred.
4394 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4395 * DIR_ITEM/DIR_INDEX
4396 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4397 * and DIR_ITEM/DIR_INDEX mismatch
4399 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4400 struct btrfs_key *location_key, char *name,
4401 u32 namelen, u8 file_type)
4403 struct btrfs_path path;
4404 struct extent_buffer *node;
4405 struct btrfs_dir_item *di;
4406 struct btrfs_key location;
4407 char namebuf[BTRFS_NAME_LEN] = {0};
4416 /* get the index by traversing all index */
4417 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4418 ret = find_dir_index(root, key->objectid,
4419 location_key->objectid, &key->offset,
4420 name, namelen, file_type);
4422 ret = DIR_INDEX_MISSING;
4426 btrfs_init_path(&path);
4427 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4429 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4434 /* Check whether inode_id/filetype/name match */
4435 node = path.nodes[0];
4436 slot = path.slots[0];
4437 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4438 total = btrfs_item_size_nr(node, slot);
4439 while (cur < total) {
4440 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4441 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4443 len = btrfs_dir_name_len(node, di);
4444 data_len = btrfs_dir_data_len(node, di);
4446 btrfs_dir_item_key_to_cpu(node, di, &location);
4447 if (location.objectid != location_key->objectid ||
4448 location.type != location_key->type ||
4449 location.offset != location_key->offset)
4452 filetype = btrfs_dir_type(node, di);
4453 if (file_type != filetype)
4456 if (len > BTRFS_NAME_LEN) {
4457 len = BTRFS_NAME_LEN;
4458 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4460 key->type == BTRFS_DIR_ITEM_KEY ?
4461 "DIR_ITEM" : "DIR_INDEX",
4462 key->objectid, key->offset, len);
4464 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4466 if (len != namelen || strncmp(namebuf, name, len))
4472 len += sizeof(*di) + data_len;
4473 di = (struct btrfs_dir_item *)((char *)di + len);
4478 btrfs_release_path(&path);
4483 * Prints inode ref error message
4485 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4486 u64 index, const char *namebuf, int name_len,
4487 u8 filetype, int err)
4492 /* root dir error */
4493 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4495 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4496 root->objectid, key->objectid, key->offset, namebuf);
4501 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4502 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4503 root->objectid, key->offset,
4504 btrfs_name_hash(namebuf, name_len),
4505 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4507 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4508 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4509 root->objectid, key->offset, index,
4510 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4515 * Insert the missing inode item.
4517 * Returns 0 means success.
4518 * Returns <0 means error.
4520 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4523 struct btrfs_key key;
4524 struct btrfs_trans_handle *trans;
4525 struct btrfs_path path;
4529 key.type = BTRFS_INODE_ITEM_KEY;
4532 btrfs_init_path(&path);
4533 trans = btrfs_start_transaction(root, 1);
4534 if (IS_ERR(trans)) {
4539 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4540 if (ret < 0 || !ret)
4543 /* insert inode item */
4544 create_inode_item_lowmem(trans, root, ino, filetype);
4547 btrfs_commit_transaction(trans, root);
4550 error("failed to repair root %llu INODE ITEM[%llu] missing",
4551 root->objectid, ino);
4552 btrfs_release_path(&path);
4557 * The ternary means dir item, dir index and relative inode ref.
4558 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4559 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4561 * If two of three is missing or mismatched, delete the existing one.
4562 * If one of three is missing or mismatched, add the missing one.
4564 * returns 0 means success.
4565 * returns not 0 means on error;
4567 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4568 u64 index, char *name, int name_len, u8 filetype,
4571 struct btrfs_trans_handle *trans;
4576 * stage shall be one of following valild values:
4577 * 0: Fine, nothing to do.
4578 * 1: One of three is wrong, so add missing one.
4579 * 2: Two of three is wrong, so delete existed one.
4581 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4583 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4585 if (err & (INODE_REF_MISSING))
4588 /* stage must be smllarer than 3 */
4591 trans = btrfs_start_transaction(root, 1);
4593 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4598 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4599 filetype, &index, 1, 1);
4603 btrfs_commit_transaction(trans, root);
4606 error("fail to repair inode %llu name %s filetype %u",
4607 ino, name, filetype);
4609 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4610 stage == 2 ? "Delete" : "Add",
4611 ino, name, filetype);
4617 * Traverse the given INODE_REF and call find_dir_item() to find related
4618 * DIR_ITEM/DIR_INDEX.
4620 * @root: the root of the fs/file tree
4621 * @ref_key: the key of the INODE_REF
4622 * @path the path provides node and slot
4623 * @refs: the count of INODE_REF
4624 * @mode: the st_mode of INODE_ITEM
4625 * @name_ret: returns with the first ref's name
4626 * @name_len_ret: len of the name_ret
4628 * Return 0 if no error occurred.
4630 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4631 struct btrfs_path *path, char *name_ret,
4632 u32 *namelen_ret, u64 *refs_ret, int mode)
4634 struct btrfs_key key;
4635 struct btrfs_key location;
4636 struct btrfs_inode_ref *ref;
4637 struct extent_buffer *node;
4638 char namebuf[BTRFS_NAME_LEN] = {0};
4648 int need_research = 0;
4656 /* since after repair, path and the dir item may be changed */
4657 if (need_research) {
4659 btrfs_release_path(path);
4660 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4661 /* the item was deleted, let path point to the last checked item */
4663 if (path->slots[0] == 0)
4664 btrfs_prev_leaf(root, path);
4672 location.objectid = ref_key->objectid;
4673 location.type = BTRFS_INODE_ITEM_KEY;
4674 location.offset = 0;
4675 node = path->nodes[0];
4676 slot = path->slots[0];
4678 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4679 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4680 total = btrfs_item_size_nr(node, slot);
4683 /* Update inode ref count */
4686 index = btrfs_inode_ref_index(node, ref);
4687 name_len = btrfs_inode_ref_name_len(node, ref);
4689 if (name_len <= BTRFS_NAME_LEN) {
4692 len = BTRFS_NAME_LEN;
4693 warning("root %llu INODE_REF[%llu %llu] name too long",
4694 root->objectid, ref_key->objectid, ref_key->offset);
4697 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4699 /* copy the first name found to name_ret */
4700 if (refs == 1 && name_ret) {
4701 memcpy(name_ret, namebuf, len);
4705 /* Check root dir ref */
4706 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4707 if (index != 0 || len != strlen("..") ||
4708 strncmp("..", namebuf, len) ||
4709 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4710 /* set err bits then repair will delete the ref */
4711 err |= DIR_INDEX_MISSING;
4712 err |= DIR_ITEM_MISSING;
4717 /* Find related DIR_INDEX */
4718 key.objectid = ref_key->offset;
4719 key.type = BTRFS_DIR_INDEX_KEY;
4721 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4722 imode_to_type(mode));
4724 /* Find related dir_item */
4725 key.objectid = ref_key->offset;
4726 key.type = BTRFS_DIR_ITEM_KEY;
4727 key.offset = btrfs_name_hash(namebuf, len);
4728 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4729 imode_to_type(mode));
4731 if (tmp_err && repair) {
4732 ret = repair_ternary_lowmem(root, ref_key->offset,
4733 ref_key->objectid, index, namebuf,
4734 name_len, imode_to_type(mode),
4741 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4742 imode_to_type(mode), tmp_err);
4744 len = sizeof(*ref) + name_len;
4745 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4756 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4757 * DIR_ITEM/DIR_INDEX.
4759 * @root: the root of the fs/file tree
4760 * @ref_key: the key of the INODE_EXTREF
4761 * @refs: the count of INODE_EXTREF
4762 * @mode: the st_mode of INODE_ITEM
4764 * Return 0 if no error occurred.
4766 static int check_inode_extref(struct btrfs_root *root,
4767 struct btrfs_key *ref_key,
4768 struct extent_buffer *node, int slot, u64 *refs,
4771 struct btrfs_key key;
4772 struct btrfs_key location;
4773 struct btrfs_inode_extref *extref;
4774 char namebuf[BTRFS_NAME_LEN] = {0};
4784 location.objectid = ref_key->objectid;
4785 location.type = BTRFS_INODE_ITEM_KEY;
4786 location.offset = 0;
4788 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4789 total = btrfs_item_size_nr(node, slot);
4792 /* update inode ref count */
4794 name_len = btrfs_inode_extref_name_len(node, extref);
4795 index = btrfs_inode_extref_index(node, extref);
4796 parent = btrfs_inode_extref_parent(node, extref);
4797 if (name_len <= BTRFS_NAME_LEN) {
4800 len = BTRFS_NAME_LEN;
4801 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4802 root->objectid, ref_key->objectid, ref_key->offset);
4804 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4806 /* Check root dir ref name */
4807 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4808 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4809 root->objectid, ref_key->objectid, ref_key->offset,
4811 err |= ROOT_DIR_ERROR;
4814 /* find related dir_index */
4815 key.objectid = parent;
4816 key.type = BTRFS_DIR_INDEX_KEY;
4818 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4821 /* find related dir_item */
4822 key.objectid = parent;
4823 key.type = BTRFS_DIR_ITEM_KEY;
4824 key.offset = btrfs_name_hash(namebuf, len);
4825 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4828 len = sizeof(*extref) + name_len;
4829 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4839 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4840 * DIR_ITEM/DIR_INDEX match.
4841 * Return with @index_ret.
4843 * @root: the root of the fs/file tree
4844 * @key: the key of the INODE_REF/INODE_EXTREF
4845 * @name: the name in the INODE_REF/INODE_EXTREF
4846 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4847 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4848 * value (64)-1 means do not check index
4849 * @ext_ref: the EXTENDED_IREF feature
4851 * Return 0 if no error occurred.
4852 * Return >0 for error bitmap
4854 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4855 char *name, int namelen, u64 *index_ret,
4856 unsigned int ext_ref)
4858 struct btrfs_path path;
4859 struct btrfs_inode_ref *ref;
4860 struct btrfs_inode_extref *extref;
4861 struct extent_buffer *node;
4862 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4875 btrfs_init_path(&path);
4876 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4878 ret = INODE_REF_MISSING;
4882 node = path.nodes[0];
4883 slot = path.slots[0];
4885 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4886 total = btrfs_item_size_nr(node, slot);
4888 /* Iterate all entry of INODE_REF */
4889 while (cur < total) {
4890 ret = INODE_REF_MISSING;
4892 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4893 ref_index = btrfs_inode_ref_index(node, ref);
4894 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4897 if (cur + sizeof(*ref) + ref_namelen > total ||
4898 ref_namelen > BTRFS_NAME_LEN) {
4899 warning("root %llu INODE %s[%llu %llu] name too long",
4901 key->type == BTRFS_INODE_REF_KEY ?
4903 key->objectid, key->offset);
4905 if (cur + sizeof(*ref) > total)
4907 len = min_t(u32, total - cur - sizeof(*ref),
4913 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4916 if (len != namelen || strncmp(ref_namebuf, name, len))
4919 *index_ret = ref_index;
4923 len = sizeof(*ref) + ref_namelen;
4924 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4929 /* Skip if not support EXTENDED_IREF feature */
4933 btrfs_release_path(&path);
4934 btrfs_init_path(&path);
4936 dir_id = key->offset;
4937 key->type = BTRFS_INODE_EXTREF_KEY;
4938 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4940 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4942 ret = INODE_REF_MISSING;
4946 node = path.nodes[0];
4947 slot = path.slots[0];
4949 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4951 total = btrfs_item_size_nr(node, slot);
4953 /* Iterate all entry of INODE_EXTREF */
4954 while (cur < total) {
4955 ret = INODE_REF_MISSING;
4957 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4958 ref_index = btrfs_inode_extref_index(node, extref);
4959 parent = btrfs_inode_extref_parent(node, extref);
4960 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4963 if (parent != dir_id)
4966 if (ref_namelen <= BTRFS_NAME_LEN) {
4969 len = BTRFS_NAME_LEN;
4970 warning("root %llu INODE %s[%llu %llu] name too long",
4972 key->type == BTRFS_INODE_REF_KEY ?
4974 key->objectid, key->offset);
4976 read_extent_buffer(node, ref_namebuf,
4977 (unsigned long)(extref + 1), len);
4979 if (len != namelen || strncmp(ref_namebuf, name, len))
4982 *index_ret = ref_index;
4987 len = sizeof(*extref) + ref_namelen;
4988 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4993 btrfs_release_path(&path);
4997 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4998 u64 ino, u64 index, const char *namebuf,
4999 int name_len, u8 filetype, int err)
5001 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5002 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5003 root->objectid, key->objectid, key->offset, namebuf,
5005 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5008 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5009 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5010 root->objectid, key->objectid, index, namebuf, filetype,
5011 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5014 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5016 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5017 root->objectid, ino, index, namebuf, filetype,
5018 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5021 if (err & INODE_REF_MISSING)
5023 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5024 root->objectid, ino, key->objectid, namebuf, filetype);
5029 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5031 * Returns error after repair
5033 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5034 u64 index, u8 filetype, char *namebuf, u32 name_len,
5039 if (err & INODE_ITEM_MISSING) {
5040 ret = repair_inode_item_missing(root, ino, filetype);
5042 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5045 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5046 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5047 name_len, filetype, err);
5049 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5050 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5051 err &= ~(INODE_REF_MISSING);
5057 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5060 struct btrfs_key key;
5061 struct btrfs_path path;
5063 struct btrfs_dir_item *di;
5073 key.offset = (u64)-1;
5075 btrfs_init_path(&path);
5076 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5081 /* if found, go to spacial case */
5086 ret = btrfs_previous_item(root, &path, ino, type);
5094 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5096 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5098 while (cur < total) {
5099 len = btrfs_dir_name_len(path.nodes[0], di);
5100 if (len > BTRFS_NAME_LEN)
5101 len = BTRFS_NAME_LEN;
5104 len += btrfs_dir_data_len(path.nodes[0], di);
5106 di = (struct btrfs_dir_item *)((char *)di + len);
5112 btrfs_release_path(&path);
5116 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5123 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5127 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5131 *size = item_size + index_size;
5135 error("failed to count root %llu INODE[%llu] root size",
5136 root->objectid, ino);
5141 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5142 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5144 * @root: the root of the fs/file tree
5145 * @key: the key of the INODE_REF/INODE_EXTREF
5147 * @size: the st_size of the INODE_ITEM
5148 * @ext_ref: the EXTENDED_IREF feature
5150 * Return 0 if no error occurred.
5151 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5153 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5154 struct btrfs_path *path, u64 *size,
5155 unsigned int ext_ref)
5157 struct btrfs_dir_item *di;
5158 struct btrfs_inode_item *ii;
5159 struct btrfs_key key;
5160 struct btrfs_key location;
5161 struct extent_buffer *node;
5163 char namebuf[BTRFS_NAME_LEN] = {0};
5175 int need_research = 0;
5178 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5179 * ignore index check.
5181 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5182 index = di_key->offset;
5189 /* since after repair, path and the dir item may be changed */
5190 if (need_research) {
5192 err |= DIR_COUNT_AGAIN;
5193 btrfs_release_path(path);
5194 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5195 /* the item was deleted, let path point the last checked item */
5197 if (path->slots[0] == 0)
5198 btrfs_prev_leaf(root, path);
5206 node = path->nodes[0];
5207 slot = path->slots[0];
5209 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5210 total = btrfs_item_size_nr(node, slot);
5211 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5213 while (cur < total) {
5214 data_len = btrfs_dir_data_len(node, di);
5217 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5219 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5220 di_key->objectid, di_key->offset, data_len);
5222 name_len = btrfs_dir_name_len(node, di);
5223 if (name_len <= BTRFS_NAME_LEN) {
5226 len = BTRFS_NAME_LEN;
5227 warning("root %llu %s[%llu %llu] name too long",
5229 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5230 di_key->objectid, di_key->offset);
5232 (*size) += name_len;
5233 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5235 filetype = btrfs_dir_type(node, di);
5237 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5238 di_key->offset != btrfs_name_hash(namebuf, len)) {
5240 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5241 root->objectid, di_key->objectid, di_key->offset,
5242 namebuf, len, filetype, di_key->offset,
5243 btrfs_name_hash(namebuf, len));
5246 btrfs_dir_item_key_to_cpu(node, di, &location);
5247 /* Ignore related ROOT_ITEM check */
5248 if (location.type == BTRFS_ROOT_ITEM_KEY)
5251 btrfs_release_path(path);
5252 /* Check relative INODE_ITEM(existence/filetype) */
5253 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5255 tmp_err |= INODE_ITEM_MISSING;
5259 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5260 struct btrfs_inode_item);
5261 mode = btrfs_inode_mode(path->nodes[0], ii);
5262 if (imode_to_type(mode) != filetype) {
5263 tmp_err |= INODE_ITEM_MISMATCH;
5267 /* Check relative INODE_REF/INODE_EXTREF */
5268 key.objectid = location.objectid;
5269 key.type = BTRFS_INODE_REF_KEY;
5270 key.offset = di_key->objectid;
5271 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5274 /* check relative INDEX/ITEM */
5275 key.objectid = di_key->objectid;
5276 if (key.type == BTRFS_DIR_ITEM_KEY) {
5277 key.type = BTRFS_DIR_INDEX_KEY;
5280 key.type = BTRFS_DIR_ITEM_KEY;
5281 key.offset = btrfs_name_hash(namebuf, name_len);
5284 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5285 name_len, filetype);
5286 /* find_dir_item may find index */
5287 if (key.type == BTRFS_DIR_INDEX_KEY)
5291 if (tmp_err && repair) {
5292 ret = repair_dir_item(root, di_key->objectid,
5293 location.objectid, index,
5294 imode_to_type(mode), namebuf,
5296 if (ret != tmp_err) {
5301 btrfs_release_path(path);
5302 print_dir_item_err(root, di_key, location.objectid, index,
5303 namebuf, name_len, filetype, tmp_err);
5305 len = sizeof(*di) + name_len + data_len;
5306 di = (struct btrfs_dir_item *)((char *)di + len);
5309 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5310 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5311 root->objectid, di_key->objectid,
5318 btrfs_release_path(path);
5319 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5321 err |= ret > 0 ? -ENOENT : ret;
5326 * Wrapper function of btrfs_punch_hole.
5328 * Returns 0 means success.
5329 * Returns not 0 means error.
5331 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5334 struct btrfs_trans_handle *trans;
5337 trans = btrfs_start_transaction(root, 1);
5339 return PTR_ERR(trans);
5341 ret = btrfs_punch_hole(trans, root, ino, start, len);
5343 error("failed to add hole [%llu, %llu] in inode [%llu]",
5346 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5349 btrfs_commit_transaction(trans, root);
5354 * Check file extent datasum/hole, update the size of the file extents,
5355 * check and update the last offset of the file extent.
5357 * @root: the root of fs/file tree.
5358 * @fkey: the key of the file extent.
5359 * @nodatasum: INODE_NODATASUM feature.
5360 * @size: the sum of all EXTENT_DATA items size for this inode.
5361 * @end: the offset of the last extent.
5363 * Return 0 if no error occurred.
5365 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5366 struct extent_buffer *node, int slot,
5367 unsigned int nodatasum, u64 *size, u64 *end)
5369 struct btrfs_file_extent_item *fi;
5372 u64 extent_num_bytes;
5374 u64 csum_found; /* In byte size, sectorsize aligned */
5375 u64 search_start; /* Logical range start we search for csum */
5376 u64 search_len; /* Logical range len we search for csum */
5377 unsigned int extent_type;
5378 unsigned int is_hole;
5383 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5385 /* Check inline extent */
5386 extent_type = btrfs_file_extent_type(node, fi);
5387 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5388 struct btrfs_item *e = btrfs_item_nr(slot);
5389 u32 item_inline_len;
5391 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5392 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5393 compressed = btrfs_file_extent_compression(node, fi);
5394 if (extent_num_bytes == 0) {
5396 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5397 root->objectid, fkey->objectid, fkey->offset);
5398 err |= FILE_EXTENT_ERROR;
5400 if (!compressed && extent_num_bytes != item_inline_len) {
5402 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5403 root->objectid, fkey->objectid, fkey->offset,
5404 extent_num_bytes, item_inline_len);
5405 err |= FILE_EXTENT_ERROR;
5407 *end += extent_num_bytes;
5408 *size += extent_num_bytes;
5412 /* Check extent type */
5413 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5414 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5415 err |= FILE_EXTENT_ERROR;
5416 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5417 root->objectid, fkey->objectid, fkey->offset);
5421 /* Check REG_EXTENT/PREALLOC_EXTENT */
5422 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5423 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5424 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5425 extent_offset = btrfs_file_extent_offset(node, fi);
5426 compressed = btrfs_file_extent_compression(node, fi);
5427 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5430 * Check EXTENT_DATA csum
5432 * For plain (uncompressed) extent, we should only check the range
5433 * we're referring to, as it's possible that part of prealloc extent
5434 * has been written, and has csum:
5436 * |<--- Original large preallocated extent A ---->|
5437 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5440 * For compressed extent, we should check the whole range.
5443 search_start = disk_bytenr + extent_offset;
5444 search_len = extent_num_bytes;
5446 search_start = disk_bytenr;
5447 search_len = disk_num_bytes;
5449 ret = count_csum_range(root, search_start, search_len, &csum_found);
5450 if (csum_found > 0 && nodatasum) {
5451 err |= ODD_CSUM_ITEM;
5452 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5453 root->objectid, fkey->objectid, fkey->offset);
5454 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5455 !is_hole && (ret < 0 || csum_found < search_len)) {
5456 err |= CSUM_ITEM_MISSING;
5457 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5458 root->objectid, fkey->objectid, fkey->offset,
5459 csum_found, search_len);
5460 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5461 err |= ODD_CSUM_ITEM;
5462 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5463 root->objectid, fkey->objectid, fkey->offset, csum_found);
5466 /* Check EXTENT_DATA hole */
5467 if (!no_holes && *end != fkey->offset) {
5469 ret = punch_extent_hole(root, fkey->objectid,
5470 *end, fkey->offset - *end);
5471 if (!repair || ret) {
5472 err |= FILE_EXTENT_ERROR;
5473 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5474 root->objectid, fkey->objectid, fkey->offset);
5478 *end += extent_num_bytes;
5480 *size += extent_num_bytes;
5486 * Set inode item nbytes to @nbytes
5488 * Returns 0 on success
5489 * Returns != 0 on error
5491 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5492 struct btrfs_path *path,
5493 u64 ino, u64 nbytes)
5495 struct btrfs_trans_handle *trans;
5496 struct btrfs_inode_item *ii;
5497 struct btrfs_key key;
5498 struct btrfs_key research_key;
5502 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5505 key.type = BTRFS_INODE_ITEM_KEY;
5508 trans = btrfs_start_transaction(root, 1);
5509 if (IS_ERR(trans)) {
5510 ret = PTR_ERR(trans);
5515 btrfs_release_path(path);
5516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5524 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5525 struct btrfs_inode_item);
5526 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5527 btrfs_mark_buffer_dirty(path->nodes[0]);
5529 btrfs_commit_transaction(trans, root);
5532 error("failed to set nbytes in inode %llu root %llu",
5533 ino, root->root_key.objectid);
5535 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5536 root->root_key.objectid, nbytes);
5539 btrfs_release_path(path);
5540 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5547 * Set directory inode isize to @isize.
5549 * Returns 0 on success.
5550 * Returns != 0 on error.
5552 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5553 struct btrfs_path *path,
5556 struct btrfs_trans_handle *trans;
5557 struct btrfs_inode_item *ii;
5558 struct btrfs_key key;
5559 struct btrfs_key research_key;
5563 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5566 key.type = BTRFS_INODE_ITEM_KEY;
5569 trans = btrfs_start_transaction(root, 1);
5570 if (IS_ERR(trans)) {
5571 ret = PTR_ERR(trans);
5576 btrfs_release_path(path);
5577 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5585 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5586 struct btrfs_inode_item);
5587 btrfs_set_inode_size(path->nodes[0], ii, isize);
5588 btrfs_mark_buffer_dirty(path->nodes[0]);
5590 btrfs_commit_transaction(trans, root);
5593 error("failed to set isize in inode %llu root %llu",
5594 ino, root->root_key.objectid);
5596 printf("Set isize in inode %llu root %llu to %llu\n",
5597 ino, root->root_key.objectid, isize);
5599 btrfs_release_path(path);
5600 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5607 * Wrapper function for btrfs_add_orphan_item().
5609 * Returns 0 on success.
5610 * Returns != 0 on error.
5612 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5613 struct btrfs_path *path, u64 ino)
5615 struct btrfs_trans_handle *trans;
5616 struct btrfs_key research_key;
5620 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5622 trans = btrfs_start_transaction(root, 1);
5623 if (IS_ERR(trans)) {
5624 ret = PTR_ERR(trans);
5629 btrfs_release_path(path);
5630 ret = btrfs_add_orphan_item(trans, root, path, ino);
5632 btrfs_commit_transaction(trans, root);
5635 error("failed to add inode %llu as orphan item root %llu",
5636 ino, root->root_key.objectid);
5638 printf("Added inode %llu as orphan item root %llu\n",
5639 ino, root->root_key.objectid);
5641 btrfs_release_path(path);
5642 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5648 /* Set inode_item nlink to @ref_count.
5649 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5651 * Returns 0 on success
5653 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5654 struct btrfs_path *path, u64 ino,
5655 const char *name, u32 namelen,
5656 u64 ref_count, u8 filetype, u64 *nlink)
5658 struct btrfs_trans_handle *trans;
5659 struct btrfs_inode_item *ii;
5660 struct btrfs_key key;
5661 struct btrfs_key old_key;
5662 char namebuf[BTRFS_NAME_LEN] = {0};
5668 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5670 if (name && namelen) {
5671 ASSERT(namelen <= BTRFS_NAME_LEN);
5672 memcpy(namebuf, name, namelen);
5675 sprintf(namebuf, "%llu", ino);
5676 name_len = count_digits(ino);
5677 printf("Can't find file name for inode %llu, use %s instead\n",
5681 trans = btrfs_start_transaction(root, 1);
5682 if (IS_ERR(trans)) {
5683 ret = PTR_ERR(trans);
5687 btrfs_release_path(path);
5688 /* if refs is 0, put it into lostfound */
5689 if (ref_count == 0) {
5690 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5691 name_len, filetype, &ref_count);
5696 /* reset inode_item's nlink to ref_count */
5698 key.type = BTRFS_INODE_ITEM_KEY;
5701 btrfs_release_path(path);
5702 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5708 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5709 struct btrfs_inode_item);
5710 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5711 btrfs_mark_buffer_dirty(path->nodes[0]);
5716 btrfs_commit_transaction(trans, root);
5720 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5721 root->objectid, ino, namebuf, filetype);
5723 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5724 root->objectid, ino, namebuf, filetype);
5727 btrfs_release_path(path);
5728 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5735 * Check INODE_ITEM and related ITEMs (the same inode number)
5736 * 1. check link count
5737 * 2. check inode ref/extref
5738 * 3. check dir item/index
5740 * @ext_ref: the EXTENDED_IREF feature
5742 * Return 0 if no error occurred.
5743 * Return >0 for error or hit the traversal is done(by error bitmap)
5745 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5746 unsigned int ext_ref)
5748 struct extent_buffer *node;
5749 struct btrfs_inode_item *ii;
5750 struct btrfs_key key;
5751 struct btrfs_key last_key;
5760 u64 extent_size = 0;
5762 unsigned int nodatasum;
5766 char namebuf[BTRFS_NAME_LEN] = {0};
5769 node = path->nodes[0];
5770 slot = path->slots[0];
5772 btrfs_item_key_to_cpu(node, &key, slot);
5773 inode_id = key.objectid;
5775 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5776 ret = btrfs_next_item(root, path);
5782 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5783 isize = btrfs_inode_size(node, ii);
5784 nbytes = btrfs_inode_nbytes(node, ii);
5785 mode = btrfs_inode_mode(node, ii);
5786 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5787 nlink = btrfs_inode_nlink(node, ii);
5788 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5791 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5792 ret = btrfs_next_item(root, path);
5794 /* out will fill 'err' rusing current statistics */
5796 } else if (ret > 0) {
5801 node = path->nodes[0];
5802 slot = path->slots[0];
5803 btrfs_item_key_to_cpu(node, &key, slot);
5804 if (key.objectid != inode_id)
5808 case BTRFS_INODE_REF_KEY:
5809 ret = check_inode_ref(root, &key, path, namebuf,
5810 &name_len, &refs, mode);
5813 case BTRFS_INODE_EXTREF_KEY:
5814 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5815 warning("root %llu EXTREF[%llu %llu] isn't supported",
5816 root->objectid, key.objectid,
5818 ret = check_inode_extref(root, &key, node, slot, &refs,
5822 case BTRFS_DIR_ITEM_KEY:
5823 case BTRFS_DIR_INDEX_KEY:
5825 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5826 root->objectid, inode_id,
5827 imode_to_type(mode), key.objectid,
5830 ret = check_dir_item(root, &key, path, &size, ext_ref);
5833 case BTRFS_EXTENT_DATA_KEY:
5835 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5836 root->objectid, inode_id, key.objectid,
5839 ret = check_file_extent(root, &key, node, slot,
5840 nodatasum, &extent_size,
5844 case BTRFS_XATTR_ITEM_KEY:
5847 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5848 key.objectid, key.type, key.offset);
5853 if (err & LAST_ITEM) {
5854 btrfs_release_path(path);
5855 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5860 /* verify INODE_ITEM nlink/isize/nbytes */
5862 if (repair && (err & DIR_COUNT_AGAIN)) {
5863 err &= ~DIR_COUNT_AGAIN;
5864 count_dir_isize(root, inode_id, &size);
5867 if ((nlink != 1 || refs != 1) && repair) {
5868 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5869 namebuf, name_len, refs, imode_to_type(mode),
5874 err |= LINK_COUNT_ERROR;
5875 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5876 root->objectid, inode_id, nlink);
5880 * Just a warning, as dir inode nbytes is just an
5881 * instructive value.
5883 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5884 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5885 root->objectid, inode_id,
5886 root->fs_info->nodesize);
5889 if (isize != size) {
5891 ret = repair_dir_isize_lowmem(root, path,
5893 if (!repair || ret) {
5896 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5897 root->objectid, inode_id, isize, size);
5901 if (nlink != refs) {
5903 ret = repair_inode_nlinks_lowmem(root, path,
5904 inode_id, namebuf, name_len, refs,
5905 imode_to_type(mode), &nlink);
5906 if (!repair || ret) {
5907 err |= LINK_COUNT_ERROR;
5909 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5910 root->objectid, inode_id, nlink, refs);
5912 } else if (!nlink) {
5914 ret = repair_inode_orphan_item_lowmem(root,
5916 if (!repair || ret) {
5918 error("root %llu INODE[%llu] is orphan item",
5919 root->objectid, inode_id);
5923 if (!nbytes && !no_holes && extent_end < isize) {
5925 ret = punch_extent_hole(root, inode_id,
5926 extent_end, isize - extent_end);
5927 if (!repair || ret) {
5928 err |= NBYTES_ERROR;
5930 "root %llu INODE[%llu] size %llu should have a file extent hole",
5931 root->objectid, inode_id, isize);
5935 if (nbytes != extent_size) {
5937 ret = repair_inode_nbytes_lowmem(root, path,
5938 inode_id, extent_size);
5939 if (!repair || ret) {
5940 err |= NBYTES_ERROR;
5942 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5943 root->objectid, inode_id, nbytes,
5949 if (err & LAST_ITEM)
5950 btrfs_next_item(root, path);
5955 * Insert the missing inode item and inode ref.
5957 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5958 * Root dir should be handled specially because root dir is the root of fs.
5960 * returns err (>0 or 0) after repair
5962 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5964 struct btrfs_trans_handle *trans;
5965 struct btrfs_key key;
5966 struct btrfs_path path;
5967 int filetype = BTRFS_FT_DIR;
5970 btrfs_init_path(&path);
5972 if (err & INODE_REF_MISSING) {
5973 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5974 key.type = BTRFS_INODE_REF_KEY;
5975 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5977 trans = btrfs_start_transaction(root, 1);
5978 if (IS_ERR(trans)) {
5979 ret = PTR_ERR(trans);
5983 btrfs_release_path(&path);
5984 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5988 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5989 BTRFS_FIRST_FREE_OBJECTID,
5990 BTRFS_FIRST_FREE_OBJECTID, 0);
5994 printf("Add INODE_REF[%llu %llu] name %s\n",
5995 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5997 err &= ~INODE_REF_MISSING;
6000 error("fail to insert first inode's ref");
6001 btrfs_commit_transaction(trans, root);
6004 if (err & INODE_ITEM_MISSING) {
6005 ret = repair_inode_item_missing(root,
6006 BTRFS_FIRST_FREE_OBJECTID, filetype);
6009 err &= ~INODE_ITEM_MISSING;
6013 error("fail to repair first inode");
6014 btrfs_release_path(&path);
6019 * check first root dir's inode_item and inode_ref
6021 * returns 0 means no error
6022 * returns >0 means error
6023 * returns <0 means fatal error
6025 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6027 struct btrfs_path path;
6028 struct btrfs_key key;
6029 struct btrfs_inode_item *ii;
6035 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6036 key.type = BTRFS_INODE_ITEM_KEY;
6039 /* For root being dropped, we don't need to check first inode */
6040 if (btrfs_root_refs(&root->root_item) == 0 &&
6041 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6042 BTRFS_FIRST_FREE_OBJECTID)
6045 btrfs_init_path(&path);
6046 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6051 err |= INODE_ITEM_MISSING;
6053 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6054 struct btrfs_inode_item);
6055 mode = btrfs_inode_mode(path.nodes[0], ii);
6056 if (imode_to_type(mode) != BTRFS_FT_DIR)
6057 err |= INODE_ITEM_MISMATCH;
6060 /* lookup first inode ref */
6061 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6062 key.type = BTRFS_INODE_REF_KEY;
6063 /* special index value */
6066 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6072 btrfs_release_path(&path);
6075 err = repair_fs_first_inode(root, err);
6077 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6078 error("root dir INODE_ITEM is %s",
6079 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6080 if (err & INODE_REF_MISSING)
6081 error("root dir INODE_REF is missing");
6083 return ret < 0 ? ret : err;
6086 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6087 u64 parent, u64 root)
6089 struct rb_node *node;
6090 struct tree_backref *back = NULL;
6091 struct tree_backref match = {
6098 match.parent = parent;
6099 match.node.full_backref = 1;
6104 node = rb_search(&rec->backref_tree, &match.node.node,
6105 (rb_compare_keys)compare_extent_backref, NULL);
6107 back = to_tree_backref(rb_node_to_extent_backref(node));
6112 static struct data_backref *find_data_backref(struct extent_record *rec,
6113 u64 parent, u64 root,
6114 u64 owner, u64 offset,
6116 u64 disk_bytenr, u64 bytes)
6118 struct rb_node *node;
6119 struct data_backref *back = NULL;
6120 struct data_backref match = {
6127 .found_ref = found_ref,
6128 .disk_bytenr = disk_bytenr,
6132 match.parent = parent;
6133 match.node.full_backref = 1;
6138 node = rb_search(&rec->backref_tree, &match.node.node,
6139 (rb_compare_keys)compare_extent_backref, NULL);
6141 back = to_data_backref(rb_node_to_extent_backref(node));
6146 * Iterate all item on the tree and call check_inode_item() to check.
6148 * @root: the root of the tree to be checked.
6149 * @ext_ref: the EXTENDED_IREF feature
6151 * Return 0 if no error found.
6152 * Return <0 for error.
6154 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6156 struct btrfs_path path;
6157 struct node_refs nrefs;
6158 struct btrfs_root_item *root_item = &root->root_item;
6164 * We need to manually check the first inode item(256)
6165 * As the following traversal function will only start from
6166 * the first inode item in the leaf, if inode item(256) is missing
6167 * we will just skip it forever.
6169 ret = check_fs_first_inode(root, ext_ref);
6174 memset(&nrefs, 0, sizeof(nrefs));
6175 level = btrfs_header_level(root->node);
6176 btrfs_init_path(&path);
6178 if (btrfs_root_refs(root_item) > 0 ||
6179 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6180 path.nodes[level] = root->node;
6181 path.slots[level] = 0;
6182 extent_buffer_get(root->node);
6184 struct btrfs_key key;
6186 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6187 level = root_item->drop_level;
6188 path.lowest_level = level;
6189 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6196 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
6199 /* if ret is negative, walk shall stop */
6205 ret = walk_up_tree_v2(root, &path, &level);
6207 /* Normal exit, reset ret to err */
6214 btrfs_release_path(&path);
6219 * Find the relative ref for root_ref and root_backref.
6221 * @root: the root of the root tree.
6222 * @ref_key: the key of the root ref.
6224 * Return 0 if no error occurred.
6226 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6227 struct extent_buffer *node, int slot)
6229 struct btrfs_path path;
6230 struct btrfs_key key;
6231 struct btrfs_root_ref *ref;
6232 struct btrfs_root_ref *backref;
6233 char ref_name[BTRFS_NAME_LEN] = {0};
6234 char backref_name[BTRFS_NAME_LEN] = {0};
6240 u32 backref_namelen;
6245 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6246 ref_dirid = btrfs_root_ref_dirid(node, ref);
6247 ref_seq = btrfs_root_ref_sequence(node, ref);
6248 ref_namelen = btrfs_root_ref_name_len(node, ref);
6250 if (ref_namelen <= BTRFS_NAME_LEN) {
6253 len = BTRFS_NAME_LEN;
6254 warning("%s[%llu %llu] ref_name too long",
6255 ref_key->type == BTRFS_ROOT_REF_KEY ?
6256 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6259 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6261 /* Find relative root_ref */
6262 key.objectid = ref_key->offset;
6263 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6264 key.offset = ref_key->objectid;
6266 btrfs_init_path(&path);
6267 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6269 err |= ROOT_REF_MISSING;
6270 error("%s[%llu %llu] couldn't find relative ref",
6271 ref_key->type == BTRFS_ROOT_REF_KEY ?
6272 "ROOT_REF" : "ROOT_BACKREF",
6273 ref_key->objectid, ref_key->offset);
6277 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6278 struct btrfs_root_ref);
6279 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6280 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6281 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6283 if (backref_namelen <= BTRFS_NAME_LEN) {
6284 len = backref_namelen;
6286 len = BTRFS_NAME_LEN;
6287 warning("%s[%llu %llu] ref_name too long",
6288 key.type == BTRFS_ROOT_REF_KEY ?
6289 "ROOT_REF" : "ROOT_BACKREF",
6290 key.objectid, key.offset);
6292 read_extent_buffer(path.nodes[0], backref_name,
6293 (unsigned long)(backref + 1), len);
6295 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6296 ref_namelen != backref_namelen ||
6297 strncmp(ref_name, backref_name, len)) {
6298 err |= ROOT_REF_MISMATCH;
6299 error("%s[%llu %llu] mismatch relative ref",
6300 ref_key->type == BTRFS_ROOT_REF_KEY ?
6301 "ROOT_REF" : "ROOT_BACKREF",
6302 ref_key->objectid, ref_key->offset);
6305 btrfs_release_path(&path);
6310 * Check all fs/file tree in low_memory mode.
6312 * 1. for fs tree root item, call check_fs_root_v2()
6313 * 2. for fs tree root ref/backref, call check_root_ref()
6315 * Return 0 if no error occurred.
6317 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6319 struct btrfs_root *tree_root = fs_info->tree_root;
6320 struct btrfs_root *cur_root = NULL;
6321 struct btrfs_path path;
6322 struct btrfs_key key;
6323 struct extent_buffer *node;
6324 unsigned int ext_ref;
6329 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6331 btrfs_init_path(&path);
6332 key.objectid = BTRFS_FS_TREE_OBJECTID;
6334 key.type = BTRFS_ROOT_ITEM_KEY;
6336 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6340 } else if (ret > 0) {
6346 node = path.nodes[0];
6347 slot = path.slots[0];
6348 btrfs_item_key_to_cpu(node, &key, slot);
6349 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6351 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6352 fs_root_objectid(key.objectid)) {
6353 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6354 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6357 key.offset = (u64)-1;
6358 cur_root = btrfs_read_fs_root(fs_info, &key);
6361 if (IS_ERR(cur_root)) {
6362 error("Fail to read fs/subvol tree: %lld",
6368 ret = check_fs_root_v2(cur_root, ext_ref);
6371 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6372 btrfs_free_fs_root(cur_root);
6373 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6374 key.type == BTRFS_ROOT_BACKREF_KEY) {
6375 ret = check_root_ref(tree_root, &key, node, slot);
6379 ret = btrfs_next_item(tree_root, &path);
6389 btrfs_release_path(&path);
6393 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6394 struct cache_tree *root_cache)
6398 if (!ctx.progress_enabled)
6399 fprintf(stderr, "checking fs roots\n");
6400 if (check_mode == CHECK_MODE_LOWMEM)
6401 ret = check_fs_roots_v2(fs_info);
6403 ret = check_fs_roots(fs_info, root_cache);
6408 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6410 struct extent_backref *back, *tmp;
6411 struct tree_backref *tback;
6412 struct data_backref *dback;
6416 rbtree_postorder_for_each_entry_safe(back, tmp,
6417 &rec->backref_tree, node) {
6418 if (!back->found_extent_tree) {
6422 if (back->is_data) {
6423 dback = to_data_backref(back);
6424 fprintf(stderr, "Data backref %llu %s %llu"
6425 " owner %llu offset %llu num_refs %lu"
6426 " not found in extent tree\n",
6427 (unsigned long long)rec->start,
6428 back->full_backref ?
6430 back->full_backref ?
6431 (unsigned long long)dback->parent:
6432 (unsigned long long)dback->root,
6433 (unsigned long long)dback->owner,
6434 (unsigned long long)dback->offset,
6435 (unsigned long)dback->num_refs);
6437 tback = to_tree_backref(back);
6438 fprintf(stderr, "Tree backref %llu parent %llu"
6439 " root %llu not found in extent tree\n",
6440 (unsigned long long)rec->start,
6441 (unsigned long long)tback->parent,
6442 (unsigned long long)tback->root);
6445 if (!back->is_data && !back->found_ref) {
6449 tback = to_tree_backref(back);
6450 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6451 (unsigned long long)rec->start,
6452 back->full_backref ? "parent" : "root",
6453 back->full_backref ?
6454 (unsigned long long)tback->parent :
6455 (unsigned long long)tback->root, back);
6457 if (back->is_data) {
6458 dback = to_data_backref(back);
6459 if (dback->found_ref != dback->num_refs) {
6463 fprintf(stderr, "Incorrect local backref count"
6464 " on %llu %s %llu owner %llu"
6465 " offset %llu found %u wanted %u back %p\n",
6466 (unsigned long long)rec->start,
6467 back->full_backref ?
6469 back->full_backref ?
6470 (unsigned long long)dback->parent:
6471 (unsigned long long)dback->root,
6472 (unsigned long long)dback->owner,
6473 (unsigned long long)dback->offset,
6474 dback->found_ref, dback->num_refs, back);
6476 if (dback->disk_bytenr != rec->start) {
6480 fprintf(stderr, "Backref disk bytenr does not"
6481 " match extent record, bytenr=%llu, "
6482 "ref bytenr=%llu\n",
6483 (unsigned long long)rec->start,
6484 (unsigned long long)dback->disk_bytenr);
6487 if (dback->bytes != rec->nr) {
6491 fprintf(stderr, "Backref bytes do not match "
6492 "extent backref, bytenr=%llu, ref "
6493 "bytes=%llu, backref bytes=%llu\n",
6494 (unsigned long long)rec->start,
6495 (unsigned long long)rec->nr,
6496 (unsigned long long)dback->bytes);
6499 if (!back->is_data) {
6502 dback = to_data_backref(back);
6503 found += dback->found_ref;
6506 if (found != rec->refs) {
6510 fprintf(stderr, "Incorrect global backref count "
6511 "on %llu found %llu wanted %llu\n",
6512 (unsigned long long)rec->start,
6513 (unsigned long long)found,
6514 (unsigned long long)rec->refs);
6520 static void __free_one_backref(struct rb_node *node)
6522 struct extent_backref *back = rb_node_to_extent_backref(node);
6527 static void free_all_extent_backrefs(struct extent_record *rec)
6529 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6532 static void free_extent_record_cache(struct cache_tree *extent_cache)
6534 struct cache_extent *cache;
6535 struct extent_record *rec;
6538 cache = first_cache_extent(extent_cache);
6541 rec = container_of(cache, struct extent_record, cache);
6542 remove_cache_extent(extent_cache, cache);
6543 free_all_extent_backrefs(rec);
6548 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6549 struct extent_record *rec)
6551 if (rec->content_checked && rec->owner_ref_checked &&
6552 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6553 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6554 !rec->bad_full_backref && !rec->crossing_stripes &&
6555 !rec->wrong_chunk_type) {
6556 remove_cache_extent(extent_cache, &rec->cache);
6557 free_all_extent_backrefs(rec);
6558 list_del_init(&rec->list);
6564 static int check_owner_ref(struct btrfs_root *root,
6565 struct extent_record *rec,
6566 struct extent_buffer *buf)
6568 struct extent_backref *node, *tmp;
6569 struct tree_backref *back;
6570 struct btrfs_root *ref_root;
6571 struct btrfs_key key;
6572 struct btrfs_path path;
6573 struct extent_buffer *parent;
6578 rbtree_postorder_for_each_entry_safe(node, tmp,
6579 &rec->backref_tree, node) {
6582 if (!node->found_ref)
6584 if (node->full_backref)
6586 back = to_tree_backref(node);
6587 if (btrfs_header_owner(buf) == back->root)
6590 BUG_ON(rec->is_root);
6592 /* try to find the block by search corresponding fs tree */
6593 key.objectid = btrfs_header_owner(buf);
6594 key.type = BTRFS_ROOT_ITEM_KEY;
6595 key.offset = (u64)-1;
6597 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6598 if (IS_ERR(ref_root))
6601 level = btrfs_header_level(buf);
6603 btrfs_item_key_to_cpu(buf, &key, 0);
6605 btrfs_node_key_to_cpu(buf, &key, 0);
6607 btrfs_init_path(&path);
6608 path.lowest_level = level + 1;
6609 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6613 parent = path.nodes[level + 1];
6614 if (parent && buf->start == btrfs_node_blockptr(parent,
6615 path.slots[level + 1]))
6618 btrfs_release_path(&path);
6619 return found ? 0 : 1;
6622 static int is_extent_tree_record(struct extent_record *rec)
6624 struct extent_backref *node, *tmp;
6625 struct tree_backref *back;
6628 rbtree_postorder_for_each_entry_safe(node, tmp,
6629 &rec->backref_tree, node) {
6632 back = to_tree_backref(node);
6633 if (node->full_backref)
6635 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6642 static int record_bad_block_io(struct btrfs_fs_info *info,
6643 struct cache_tree *extent_cache,
6646 struct extent_record *rec;
6647 struct cache_extent *cache;
6648 struct btrfs_key key;
6650 cache = lookup_cache_extent(extent_cache, start, len);
6654 rec = container_of(cache, struct extent_record, cache);
6655 if (!is_extent_tree_record(rec))
6658 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6659 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6662 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6663 struct extent_buffer *buf, int slot)
6665 if (btrfs_header_level(buf)) {
6666 struct btrfs_key_ptr ptr1, ptr2;
6668 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6669 sizeof(struct btrfs_key_ptr));
6670 read_extent_buffer(buf, &ptr2,
6671 btrfs_node_key_ptr_offset(slot + 1),
6672 sizeof(struct btrfs_key_ptr));
6673 write_extent_buffer(buf, &ptr1,
6674 btrfs_node_key_ptr_offset(slot + 1),
6675 sizeof(struct btrfs_key_ptr));
6676 write_extent_buffer(buf, &ptr2,
6677 btrfs_node_key_ptr_offset(slot),
6678 sizeof(struct btrfs_key_ptr));
6680 struct btrfs_disk_key key;
6681 btrfs_node_key(buf, &key, 0);
6682 btrfs_fixup_low_keys(root, path, &key,
6683 btrfs_header_level(buf) + 1);
6686 struct btrfs_item *item1, *item2;
6687 struct btrfs_key k1, k2;
6688 char *item1_data, *item2_data;
6689 u32 item1_offset, item2_offset, item1_size, item2_size;
6691 item1 = btrfs_item_nr(slot);
6692 item2 = btrfs_item_nr(slot + 1);
6693 btrfs_item_key_to_cpu(buf, &k1, slot);
6694 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6695 item1_offset = btrfs_item_offset(buf, item1);
6696 item2_offset = btrfs_item_offset(buf, item2);
6697 item1_size = btrfs_item_size(buf, item1);
6698 item2_size = btrfs_item_size(buf, item2);
6700 item1_data = malloc(item1_size);
6703 item2_data = malloc(item2_size);
6709 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6710 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6712 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6713 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6717 btrfs_set_item_offset(buf, item1, item2_offset);
6718 btrfs_set_item_offset(buf, item2, item1_offset);
6719 btrfs_set_item_size(buf, item1, item2_size);
6720 btrfs_set_item_size(buf, item2, item1_size);
6722 path->slots[0] = slot;
6723 btrfs_set_item_key_unsafe(root, path, &k2);
6724 path->slots[0] = slot + 1;
6725 btrfs_set_item_key_unsafe(root, path, &k1);
6730 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6732 struct extent_buffer *buf;
6733 struct btrfs_key k1, k2;
6735 int level = path->lowest_level;
6738 buf = path->nodes[level];
6739 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6741 btrfs_node_key_to_cpu(buf, &k1, i);
6742 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6744 btrfs_item_key_to_cpu(buf, &k1, i);
6745 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6747 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6749 ret = swap_values(root, path, buf, i);
6752 btrfs_mark_buffer_dirty(buf);
6758 static int delete_bogus_item(struct btrfs_root *root,
6759 struct btrfs_path *path,
6760 struct extent_buffer *buf, int slot)
6762 struct btrfs_key key;
6763 int nritems = btrfs_header_nritems(buf);
6765 btrfs_item_key_to_cpu(buf, &key, slot);
6767 /* These are all the keys we can deal with missing. */
6768 if (key.type != BTRFS_DIR_INDEX_KEY &&
6769 key.type != BTRFS_EXTENT_ITEM_KEY &&
6770 key.type != BTRFS_METADATA_ITEM_KEY &&
6771 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6772 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6775 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6776 (unsigned long long)key.objectid, key.type,
6777 (unsigned long long)key.offset, slot, buf->start);
6778 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6779 btrfs_item_nr_offset(slot + 1),
6780 sizeof(struct btrfs_item) *
6781 (nritems - slot - 1));
6782 btrfs_set_header_nritems(buf, nritems - 1);
6784 struct btrfs_disk_key disk_key;
6786 btrfs_item_key(buf, &disk_key, 0);
6787 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6789 btrfs_mark_buffer_dirty(buf);
6793 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6795 struct extent_buffer *buf;
6799 /* We should only get this for leaves */
6800 BUG_ON(path->lowest_level);
6801 buf = path->nodes[0];
6803 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6804 unsigned int shift = 0, offset;
6806 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6807 BTRFS_LEAF_DATA_SIZE(root)) {
6808 if (btrfs_item_end_nr(buf, i) >
6809 BTRFS_LEAF_DATA_SIZE(root)) {
6810 ret = delete_bogus_item(root, path, buf, i);
6813 fprintf(stderr, "item is off the end of the "
6814 "leaf, can't fix\n");
6818 shift = BTRFS_LEAF_DATA_SIZE(root) -
6819 btrfs_item_end_nr(buf, i);
6820 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6821 btrfs_item_offset_nr(buf, i - 1)) {
6822 if (btrfs_item_end_nr(buf, i) >
6823 btrfs_item_offset_nr(buf, i - 1)) {
6824 ret = delete_bogus_item(root, path, buf, i);
6827 fprintf(stderr, "items overlap, can't fix\n");
6831 shift = btrfs_item_offset_nr(buf, i - 1) -
6832 btrfs_item_end_nr(buf, i);
6837 printf("Shifting item nr %d by %u bytes in block %llu\n",
6838 i, shift, (unsigned long long)buf->start);
6839 offset = btrfs_item_offset_nr(buf, i);
6840 memmove_extent_buffer(buf,
6841 btrfs_leaf_data(buf) + offset + shift,
6842 btrfs_leaf_data(buf) + offset,
6843 btrfs_item_size_nr(buf, i));
6844 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6846 btrfs_mark_buffer_dirty(buf);
6850 * We may have moved things, in which case we want to exit so we don't
6851 * write those changes out. Once we have proper abort functionality in
6852 * progs this can be changed to something nicer.
6859 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6860 * then just return -EIO.
6862 static int try_to_fix_bad_block(struct btrfs_root *root,
6863 struct extent_buffer *buf,
6864 enum btrfs_tree_block_status status)
6866 struct btrfs_trans_handle *trans;
6867 struct ulist *roots;
6868 struct ulist_node *node;
6869 struct btrfs_root *search_root;
6870 struct btrfs_path path;
6871 struct ulist_iterator iter;
6872 struct btrfs_key root_key, key;
6875 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6876 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6879 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6883 btrfs_init_path(&path);
6884 ULIST_ITER_INIT(&iter);
6885 while ((node = ulist_next(roots, &iter))) {
6886 root_key.objectid = node->val;
6887 root_key.type = BTRFS_ROOT_ITEM_KEY;
6888 root_key.offset = (u64)-1;
6890 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6897 trans = btrfs_start_transaction(search_root, 0);
6898 if (IS_ERR(trans)) {
6899 ret = PTR_ERR(trans);
6903 path.lowest_level = btrfs_header_level(buf);
6904 path.skip_check_block = 1;
6905 if (path.lowest_level)
6906 btrfs_node_key_to_cpu(buf, &key, 0);
6908 btrfs_item_key_to_cpu(buf, &key, 0);
6909 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6912 btrfs_commit_transaction(trans, search_root);
6915 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6916 ret = fix_key_order(search_root, &path);
6917 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6918 ret = fix_item_offset(search_root, &path);
6920 btrfs_commit_transaction(trans, search_root);
6923 btrfs_release_path(&path);
6924 btrfs_commit_transaction(trans, search_root);
6927 btrfs_release_path(&path);
6931 static int check_block(struct btrfs_root *root,
6932 struct cache_tree *extent_cache,
6933 struct extent_buffer *buf, u64 flags)
6935 struct extent_record *rec;
6936 struct cache_extent *cache;
6937 struct btrfs_key key;
6938 enum btrfs_tree_block_status status;
6942 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6945 rec = container_of(cache, struct extent_record, cache);
6946 rec->generation = btrfs_header_generation(buf);
6948 level = btrfs_header_level(buf);
6949 if (btrfs_header_nritems(buf) > 0) {
6952 btrfs_item_key_to_cpu(buf, &key, 0);
6954 btrfs_node_key_to_cpu(buf, &key, 0);
6956 rec->info_objectid = key.objectid;
6958 rec->info_level = level;
6960 if (btrfs_is_leaf(buf))
6961 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6963 status = btrfs_check_node(root, &rec->parent_key, buf);
6965 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6967 status = try_to_fix_bad_block(root, buf, status);
6968 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6970 fprintf(stderr, "bad block %llu\n",
6971 (unsigned long long)buf->start);
6974 * Signal to callers we need to start the scan over
6975 * again since we'll have cowed blocks.
6980 rec->content_checked = 1;
6981 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6982 rec->owner_ref_checked = 1;
6984 ret = check_owner_ref(root, rec, buf);
6986 rec->owner_ref_checked = 1;
6990 maybe_free_extent_rec(extent_cache, rec);
6995 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6996 u64 parent, u64 root)
6998 struct list_head *cur = rec->backrefs.next;
6999 struct extent_backref *node;
7000 struct tree_backref *back;
7002 while(cur != &rec->backrefs) {
7003 node = to_extent_backref(cur);
7007 back = to_tree_backref(node);
7009 if (!node->full_backref)
7011 if (parent == back->parent)
7014 if (node->full_backref)
7016 if (back->root == root)
7024 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7025 u64 parent, u64 root)
7027 struct tree_backref *ref = malloc(sizeof(*ref));
7031 memset(&ref->node, 0, sizeof(ref->node));
7033 ref->parent = parent;
7034 ref->node.full_backref = 1;
7037 ref->node.full_backref = 0;
7044 static struct data_backref *find_data_backref(struct extent_record *rec,
7045 u64 parent, u64 root,
7046 u64 owner, u64 offset,
7048 u64 disk_bytenr, u64 bytes)
7050 struct list_head *cur = rec->backrefs.next;
7051 struct extent_backref *node;
7052 struct data_backref *back;
7054 while(cur != &rec->backrefs) {
7055 node = to_extent_backref(cur);
7059 back = to_data_backref(node);
7061 if (!node->full_backref)
7063 if (parent == back->parent)
7066 if (node->full_backref)
7068 if (back->root == root && back->owner == owner &&
7069 back->offset == offset) {
7070 if (found_ref && node->found_ref &&
7071 (back->bytes != bytes ||
7072 back->disk_bytenr != disk_bytenr))
7082 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7083 u64 parent, u64 root,
7084 u64 owner, u64 offset,
7087 struct data_backref *ref = malloc(sizeof(*ref));
7091 memset(&ref->node, 0, sizeof(ref->node));
7092 ref->node.is_data = 1;
7095 ref->parent = parent;
7098 ref->node.full_backref = 1;
7102 ref->offset = offset;
7103 ref->node.full_backref = 0;
7105 ref->bytes = max_size;
7108 if (max_size > rec->max_size)
7109 rec->max_size = max_size;
7113 /* Check if the type of extent matches with its chunk */
7114 static void check_extent_type(struct extent_record *rec)
7116 struct btrfs_block_group_cache *bg_cache;
7118 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7122 /* data extent, check chunk directly*/
7123 if (!rec->metadata) {
7124 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7125 rec->wrong_chunk_type = 1;
7129 /* metadata extent, check the obvious case first */
7130 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7131 BTRFS_BLOCK_GROUP_METADATA))) {
7132 rec->wrong_chunk_type = 1;
7137 * Check SYSTEM extent, as it's also marked as metadata, we can only
7138 * make sure it's a SYSTEM extent by its backref
7140 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7141 struct extent_backref *node;
7142 struct tree_backref *tback;
7145 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7146 if (node->is_data) {
7147 /* tree block shouldn't have data backref */
7148 rec->wrong_chunk_type = 1;
7151 tback = container_of(node, struct tree_backref, node);
7153 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7154 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7156 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7157 if (!(bg_cache->flags & bg_type))
7158 rec->wrong_chunk_type = 1;
7163 * Allocate a new extent record, fill default values from @tmpl and insert int
7164 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7165 * the cache, otherwise it fails.
7167 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7168 struct extent_record *tmpl)
7170 struct extent_record *rec;
7173 BUG_ON(tmpl->max_size == 0);
7174 rec = malloc(sizeof(*rec));
7177 rec->start = tmpl->start;
7178 rec->max_size = tmpl->max_size;
7179 rec->nr = max(tmpl->nr, tmpl->max_size);
7180 rec->found_rec = tmpl->found_rec;
7181 rec->content_checked = tmpl->content_checked;
7182 rec->owner_ref_checked = tmpl->owner_ref_checked;
7183 rec->num_duplicates = 0;
7184 rec->metadata = tmpl->metadata;
7185 rec->flag_block_full_backref = FLAG_UNSET;
7186 rec->bad_full_backref = 0;
7187 rec->crossing_stripes = 0;
7188 rec->wrong_chunk_type = 0;
7189 rec->is_root = tmpl->is_root;
7190 rec->refs = tmpl->refs;
7191 rec->extent_item_refs = tmpl->extent_item_refs;
7192 rec->parent_generation = tmpl->parent_generation;
7193 INIT_LIST_HEAD(&rec->backrefs);
7194 INIT_LIST_HEAD(&rec->dups);
7195 INIT_LIST_HEAD(&rec->list);
7196 rec->backref_tree = RB_ROOT;
7197 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7198 rec->cache.start = tmpl->start;
7199 rec->cache.size = tmpl->nr;
7200 ret = insert_cache_extent(extent_cache, &rec->cache);
7205 bytes_used += rec->nr;
7208 rec->crossing_stripes = check_crossing_stripes(global_info,
7209 rec->start, global_info->nodesize);
7210 check_extent_type(rec);
7215 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7217 * - refs - if found, increase refs
7218 * - is_root - if found, set
7219 * - content_checked - if found, set
7220 * - owner_ref_checked - if found, set
7222 * If not found, create a new one, initialize and insert.
7224 static int add_extent_rec(struct cache_tree *extent_cache,
7225 struct extent_record *tmpl)
7227 struct extent_record *rec;
7228 struct cache_extent *cache;
7232 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7234 rec = container_of(cache, struct extent_record, cache);
7238 rec->nr = max(tmpl->nr, tmpl->max_size);
7241 * We need to make sure to reset nr to whatever the extent
7242 * record says was the real size, this way we can compare it to
7245 if (tmpl->found_rec) {
7246 if (tmpl->start != rec->start || rec->found_rec) {
7247 struct extent_record *tmp;
7250 if (list_empty(&rec->list))
7251 list_add_tail(&rec->list,
7252 &duplicate_extents);
7255 * We have to do this song and dance in case we
7256 * find an extent record that falls inside of
7257 * our current extent record but does not have
7258 * the same objectid.
7260 tmp = malloc(sizeof(*tmp));
7263 tmp->start = tmpl->start;
7264 tmp->max_size = tmpl->max_size;
7267 tmp->metadata = tmpl->metadata;
7268 tmp->extent_item_refs = tmpl->extent_item_refs;
7269 INIT_LIST_HEAD(&tmp->list);
7270 list_add_tail(&tmp->list, &rec->dups);
7271 rec->num_duplicates++;
7278 if (tmpl->extent_item_refs && !dup) {
7279 if (rec->extent_item_refs) {
7280 fprintf(stderr, "block %llu rec "
7281 "extent_item_refs %llu, passed %llu\n",
7282 (unsigned long long)tmpl->start,
7283 (unsigned long long)
7284 rec->extent_item_refs,
7285 (unsigned long long)tmpl->extent_item_refs);
7287 rec->extent_item_refs = tmpl->extent_item_refs;
7291 if (tmpl->content_checked)
7292 rec->content_checked = 1;
7293 if (tmpl->owner_ref_checked)
7294 rec->owner_ref_checked = 1;
7295 memcpy(&rec->parent_key, &tmpl->parent_key,
7296 sizeof(tmpl->parent_key));
7297 if (tmpl->parent_generation)
7298 rec->parent_generation = tmpl->parent_generation;
7299 if (rec->max_size < tmpl->max_size)
7300 rec->max_size = tmpl->max_size;
7303 * A metadata extent can't cross stripe_len boundary, otherwise
7304 * kernel scrub won't be able to handle it.
7305 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7309 rec->crossing_stripes = check_crossing_stripes(
7310 global_info, rec->start,
7311 global_info->nodesize);
7312 check_extent_type(rec);
7313 maybe_free_extent_rec(extent_cache, rec);
7317 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7322 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7323 u64 parent, u64 root, int found_ref)
7325 struct extent_record *rec;
7326 struct tree_backref *back;
7327 struct cache_extent *cache;
7329 bool insert = false;
7331 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7333 struct extent_record tmpl;
7335 memset(&tmpl, 0, sizeof(tmpl));
7336 tmpl.start = bytenr;
7341 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7345 /* really a bug in cache_extent implement now */
7346 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7351 rec = container_of(cache, struct extent_record, cache);
7352 if (rec->start != bytenr) {
7354 * Several cause, from unaligned bytenr to over lapping extents
7359 back = find_tree_backref(rec, parent, root);
7361 back = alloc_tree_backref(rec, parent, root);
7368 if (back->node.found_ref) {
7369 fprintf(stderr, "Extent back ref already exists "
7370 "for %llu parent %llu root %llu \n",
7371 (unsigned long long)bytenr,
7372 (unsigned long long)parent,
7373 (unsigned long long)root);
7375 back->node.found_ref = 1;
7377 if (back->node.found_extent_tree) {
7378 fprintf(stderr, "Extent back ref already exists "
7379 "for %llu parent %llu root %llu \n",
7380 (unsigned long long)bytenr,
7381 (unsigned long long)parent,
7382 (unsigned long long)root);
7384 back->node.found_extent_tree = 1;
7387 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7388 compare_extent_backref));
7389 check_extent_type(rec);
7390 maybe_free_extent_rec(extent_cache, rec);
7394 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7395 u64 parent, u64 root, u64 owner, u64 offset,
7396 u32 num_refs, int found_ref, u64 max_size)
7398 struct extent_record *rec;
7399 struct data_backref *back;
7400 struct cache_extent *cache;
7402 bool insert = false;
7404 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7406 struct extent_record tmpl;
7408 memset(&tmpl, 0, sizeof(tmpl));
7409 tmpl.start = bytenr;
7411 tmpl.max_size = max_size;
7413 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7417 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7422 rec = container_of(cache, struct extent_record, cache);
7423 if (rec->max_size < max_size)
7424 rec->max_size = max_size;
7427 * If found_ref is set then max_size is the real size and must match the
7428 * existing refs. So if we have already found a ref then we need to
7429 * make sure that this ref matches the existing one, otherwise we need
7430 * to add a new backref so we can notice that the backrefs don't match
7431 * and we need to figure out who is telling the truth. This is to
7432 * account for that awful fsync bug I introduced where we'd end up with
7433 * a btrfs_file_extent_item that would have its length include multiple
7434 * prealloc extents or point inside of a prealloc extent.
7436 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7439 back = alloc_data_backref(rec, parent, root, owner, offset,
7446 BUG_ON(num_refs != 1);
7447 if (back->node.found_ref)
7448 BUG_ON(back->bytes != max_size);
7449 back->node.found_ref = 1;
7450 back->found_ref += 1;
7451 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7452 back->bytes = max_size;
7453 back->disk_bytenr = bytenr;
7455 /* Need to reinsert if not already in the tree */
7457 rb_erase(&back->node.node, &rec->backref_tree);
7462 rec->content_checked = 1;
7463 rec->owner_ref_checked = 1;
7465 if (back->node.found_extent_tree) {
7466 fprintf(stderr, "Extent back ref already exists "
7467 "for %llu parent %llu root %llu "
7468 "owner %llu offset %llu num_refs %lu\n",
7469 (unsigned long long)bytenr,
7470 (unsigned long long)parent,
7471 (unsigned long long)root,
7472 (unsigned long long)owner,
7473 (unsigned long long)offset,
7474 (unsigned long)num_refs);
7476 back->num_refs = num_refs;
7477 back->node.found_extent_tree = 1;
7480 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7481 compare_extent_backref));
7483 maybe_free_extent_rec(extent_cache, rec);
7487 static int add_pending(struct cache_tree *pending,
7488 struct cache_tree *seen, u64 bytenr, u32 size)
7491 ret = add_cache_extent(seen, bytenr, size);
7494 add_cache_extent(pending, bytenr, size);
7498 static int pick_next_pending(struct cache_tree *pending,
7499 struct cache_tree *reada,
7500 struct cache_tree *nodes,
7501 u64 last, struct block_info *bits, int bits_nr,
7504 unsigned long node_start = last;
7505 struct cache_extent *cache;
7508 cache = search_cache_extent(reada, 0);
7510 bits[0].start = cache->start;
7511 bits[0].size = cache->size;
7516 if (node_start > 32768)
7517 node_start -= 32768;
7519 cache = search_cache_extent(nodes, node_start);
7521 cache = search_cache_extent(nodes, 0);
7524 cache = search_cache_extent(pending, 0);
7529 bits[ret].start = cache->start;
7530 bits[ret].size = cache->size;
7531 cache = next_cache_extent(cache);
7533 } while (cache && ret < bits_nr);
7539 bits[ret].start = cache->start;
7540 bits[ret].size = cache->size;
7541 cache = next_cache_extent(cache);
7543 } while (cache && ret < bits_nr);
7545 if (bits_nr - ret > 8) {
7546 u64 lookup = bits[0].start + bits[0].size;
7547 struct cache_extent *next;
7548 next = search_cache_extent(pending, lookup);
7550 if (next->start - lookup > 32768)
7552 bits[ret].start = next->start;
7553 bits[ret].size = next->size;
7554 lookup = next->start + next->size;
7558 next = next_cache_extent(next);
7566 static void free_chunk_record(struct cache_extent *cache)
7568 struct chunk_record *rec;
7570 rec = container_of(cache, struct chunk_record, cache);
7571 list_del_init(&rec->list);
7572 list_del_init(&rec->dextents);
7576 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7578 cache_tree_free_extents(chunk_cache, free_chunk_record);
7581 static void free_device_record(struct rb_node *node)
7583 struct device_record *rec;
7585 rec = container_of(node, struct device_record, node);
7589 FREE_RB_BASED_TREE(device_cache, free_device_record);
7591 int insert_block_group_record(struct block_group_tree *tree,
7592 struct block_group_record *bg_rec)
7596 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7600 list_add_tail(&bg_rec->list, &tree->block_groups);
7604 static void free_block_group_record(struct cache_extent *cache)
7606 struct block_group_record *rec;
7608 rec = container_of(cache, struct block_group_record, cache);
7609 list_del_init(&rec->list);
7613 void free_block_group_tree(struct block_group_tree *tree)
7615 cache_tree_free_extents(&tree->tree, free_block_group_record);
7618 int insert_device_extent_record(struct device_extent_tree *tree,
7619 struct device_extent_record *de_rec)
7624 * Device extent is a bit different from the other extents, because
7625 * the extents which belong to the different devices may have the
7626 * same start and size, so we need use the special extent cache
7627 * search/insert functions.
7629 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7633 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7634 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7638 static void free_device_extent_record(struct cache_extent *cache)
7640 struct device_extent_record *rec;
7642 rec = container_of(cache, struct device_extent_record, cache);
7643 if (!list_empty(&rec->chunk_list))
7644 list_del_init(&rec->chunk_list);
7645 if (!list_empty(&rec->device_list))
7646 list_del_init(&rec->device_list);
7650 void free_device_extent_tree(struct device_extent_tree *tree)
7652 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7655 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7656 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7657 struct extent_buffer *leaf, int slot)
7659 struct btrfs_extent_ref_v0 *ref0;
7660 struct btrfs_key key;
7663 btrfs_item_key_to_cpu(leaf, &key, slot);
7664 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7665 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7666 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7669 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7670 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7676 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7677 struct btrfs_key *key,
7680 struct btrfs_chunk *ptr;
7681 struct chunk_record *rec;
7684 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7685 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7687 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7689 fprintf(stderr, "memory allocation failed\n");
7693 INIT_LIST_HEAD(&rec->list);
7694 INIT_LIST_HEAD(&rec->dextents);
7697 rec->cache.start = key->offset;
7698 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7700 rec->generation = btrfs_header_generation(leaf);
7702 rec->objectid = key->objectid;
7703 rec->type = key->type;
7704 rec->offset = key->offset;
7706 rec->length = rec->cache.size;
7707 rec->owner = btrfs_chunk_owner(leaf, ptr);
7708 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7709 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7710 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7711 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7712 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7713 rec->num_stripes = num_stripes;
7714 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7716 for (i = 0; i < rec->num_stripes; ++i) {
7717 rec->stripes[i].devid =
7718 btrfs_stripe_devid_nr(leaf, ptr, i);
7719 rec->stripes[i].offset =
7720 btrfs_stripe_offset_nr(leaf, ptr, i);
7721 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7722 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7729 static int process_chunk_item(struct cache_tree *chunk_cache,
7730 struct btrfs_key *key, struct extent_buffer *eb,
7733 struct chunk_record *rec;
7734 struct btrfs_chunk *chunk;
7737 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7739 * Do extra check for this chunk item,
7741 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7742 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7743 * and owner<->key_type check.
7745 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7748 error("chunk(%llu, %llu) is not valid, ignore it",
7749 key->offset, btrfs_chunk_length(eb, chunk));
7752 rec = btrfs_new_chunk_record(eb, key, slot);
7753 ret = insert_cache_extent(chunk_cache, &rec->cache);
7755 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7756 rec->offset, rec->length);
7763 static int process_device_item(struct rb_root *dev_cache,
7764 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7766 struct btrfs_dev_item *ptr;
7767 struct device_record *rec;
7770 ptr = btrfs_item_ptr(eb,
7771 slot, struct btrfs_dev_item);
7773 rec = malloc(sizeof(*rec));
7775 fprintf(stderr, "memory allocation failed\n");
7779 rec->devid = key->offset;
7780 rec->generation = btrfs_header_generation(eb);
7782 rec->objectid = key->objectid;
7783 rec->type = key->type;
7784 rec->offset = key->offset;
7786 rec->devid = btrfs_device_id(eb, ptr);
7787 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7788 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7790 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7792 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7799 struct block_group_record *
7800 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7803 struct btrfs_block_group_item *ptr;
7804 struct block_group_record *rec;
7806 rec = calloc(1, sizeof(*rec));
7808 fprintf(stderr, "memory allocation failed\n");
7812 rec->cache.start = key->objectid;
7813 rec->cache.size = key->offset;
7815 rec->generation = btrfs_header_generation(leaf);
7817 rec->objectid = key->objectid;
7818 rec->type = key->type;
7819 rec->offset = key->offset;
7821 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7822 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7824 INIT_LIST_HEAD(&rec->list);
7829 static int process_block_group_item(struct block_group_tree *block_group_cache,
7830 struct btrfs_key *key,
7831 struct extent_buffer *eb, int slot)
7833 struct block_group_record *rec;
7836 rec = btrfs_new_block_group_record(eb, key, slot);
7837 ret = insert_block_group_record(block_group_cache, rec);
7839 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7840 rec->objectid, rec->offset);
7847 struct device_extent_record *
7848 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7849 struct btrfs_key *key, int slot)
7851 struct device_extent_record *rec;
7852 struct btrfs_dev_extent *ptr;
7854 rec = calloc(1, sizeof(*rec));
7856 fprintf(stderr, "memory allocation failed\n");
7860 rec->cache.objectid = key->objectid;
7861 rec->cache.start = key->offset;
7863 rec->generation = btrfs_header_generation(leaf);
7865 rec->objectid = key->objectid;
7866 rec->type = key->type;
7867 rec->offset = key->offset;
7869 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7870 rec->chunk_objecteid =
7871 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7873 btrfs_dev_extent_chunk_offset(leaf, ptr);
7874 rec->length = btrfs_dev_extent_length(leaf, ptr);
7875 rec->cache.size = rec->length;
7877 INIT_LIST_HEAD(&rec->chunk_list);
7878 INIT_LIST_HEAD(&rec->device_list);
7884 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7885 struct btrfs_key *key, struct extent_buffer *eb,
7888 struct device_extent_record *rec;
7891 rec = btrfs_new_device_extent_record(eb, key, slot);
7892 ret = insert_device_extent_record(dev_extent_cache, rec);
7895 "Device extent[%llu, %llu, %llu] existed.\n",
7896 rec->objectid, rec->offset, rec->length);
7903 static int process_extent_item(struct btrfs_root *root,
7904 struct cache_tree *extent_cache,
7905 struct extent_buffer *eb, int slot)
7907 struct btrfs_extent_item *ei;
7908 struct btrfs_extent_inline_ref *iref;
7909 struct btrfs_extent_data_ref *dref;
7910 struct btrfs_shared_data_ref *sref;
7911 struct btrfs_key key;
7912 struct extent_record tmpl;
7917 u32 item_size = btrfs_item_size_nr(eb, slot);
7923 btrfs_item_key_to_cpu(eb, &key, slot);
7925 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7927 num_bytes = root->fs_info->nodesize;
7929 num_bytes = key.offset;
7932 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7933 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7934 key.objectid, root->fs_info->sectorsize);
7937 if (item_size < sizeof(*ei)) {
7938 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7939 struct btrfs_extent_item_v0 *ei0;
7940 BUG_ON(item_size != sizeof(*ei0));
7941 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7942 refs = btrfs_extent_refs_v0(eb, ei0);
7946 memset(&tmpl, 0, sizeof(tmpl));
7947 tmpl.start = key.objectid;
7948 tmpl.nr = num_bytes;
7949 tmpl.extent_item_refs = refs;
7950 tmpl.metadata = metadata;
7952 tmpl.max_size = num_bytes;
7954 return add_extent_rec(extent_cache, &tmpl);
7957 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7958 refs = btrfs_extent_refs(eb, ei);
7959 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7963 if (metadata && num_bytes != root->fs_info->nodesize) {
7964 error("ignore invalid metadata extent, length %llu does not equal to %u",
7965 num_bytes, root->fs_info->nodesize);
7968 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7969 error("ignore invalid data extent, length %llu is not aligned to %u",
7970 num_bytes, root->fs_info->sectorsize);
7974 memset(&tmpl, 0, sizeof(tmpl));
7975 tmpl.start = key.objectid;
7976 tmpl.nr = num_bytes;
7977 tmpl.extent_item_refs = refs;
7978 tmpl.metadata = metadata;
7980 tmpl.max_size = num_bytes;
7981 add_extent_rec(extent_cache, &tmpl);
7983 ptr = (unsigned long)(ei + 1);
7984 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7985 key.type == BTRFS_EXTENT_ITEM_KEY)
7986 ptr += sizeof(struct btrfs_tree_block_info);
7988 end = (unsigned long)ei + item_size;
7990 iref = (struct btrfs_extent_inline_ref *)ptr;
7991 type = btrfs_extent_inline_ref_type(eb, iref);
7992 offset = btrfs_extent_inline_ref_offset(eb, iref);
7994 case BTRFS_TREE_BLOCK_REF_KEY:
7995 ret = add_tree_backref(extent_cache, key.objectid,
7999 "add_tree_backref failed (extent items tree block): %s",
8002 case BTRFS_SHARED_BLOCK_REF_KEY:
8003 ret = add_tree_backref(extent_cache, key.objectid,
8007 "add_tree_backref failed (extent items shared block): %s",
8010 case BTRFS_EXTENT_DATA_REF_KEY:
8011 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8012 add_data_backref(extent_cache, key.objectid, 0,
8013 btrfs_extent_data_ref_root(eb, dref),
8014 btrfs_extent_data_ref_objectid(eb,
8016 btrfs_extent_data_ref_offset(eb, dref),
8017 btrfs_extent_data_ref_count(eb, dref),
8020 case BTRFS_SHARED_DATA_REF_KEY:
8021 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8022 add_data_backref(extent_cache, key.objectid, offset,
8024 btrfs_shared_data_ref_count(eb, sref),
8028 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8029 key.objectid, key.type, num_bytes);
8032 ptr += btrfs_extent_inline_ref_size(type);
8039 static int check_cache_range(struct btrfs_root *root,
8040 struct btrfs_block_group_cache *cache,
8041 u64 offset, u64 bytes)
8043 struct btrfs_free_space *entry;
8049 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8050 bytenr = btrfs_sb_offset(i);
8051 ret = btrfs_rmap_block(root->fs_info,
8052 cache->key.objectid, bytenr, 0,
8053 &logical, &nr, &stripe_len);
8058 if (logical[nr] + stripe_len <= offset)
8060 if (offset + bytes <= logical[nr])
8062 if (logical[nr] == offset) {
8063 if (stripe_len >= bytes) {
8067 bytes -= stripe_len;
8068 offset += stripe_len;
8069 } else if (logical[nr] < offset) {
8070 if (logical[nr] + stripe_len >=
8075 bytes = (offset + bytes) -
8076 (logical[nr] + stripe_len);
8077 offset = logical[nr] + stripe_len;
8080 * Could be tricky, the super may land in the
8081 * middle of the area we're checking. First
8082 * check the easiest case, it's at the end.
8084 if (logical[nr] + stripe_len >=
8086 bytes = logical[nr] - offset;
8090 /* Check the left side */
8091 ret = check_cache_range(root, cache,
8093 logical[nr] - offset);
8099 /* Now we continue with the right side */
8100 bytes = (offset + bytes) -
8101 (logical[nr] + stripe_len);
8102 offset = logical[nr] + stripe_len;
8109 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8111 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8112 offset, offset+bytes);
8116 if (entry->offset != offset) {
8117 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8122 if (entry->bytes != bytes) {
8123 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8124 bytes, entry->bytes, offset);
8128 unlink_free_space(cache->free_space_ctl, entry);
8133 static int verify_space_cache(struct btrfs_root *root,
8134 struct btrfs_block_group_cache *cache)
8136 struct btrfs_path path;
8137 struct extent_buffer *leaf;
8138 struct btrfs_key key;
8142 root = root->fs_info->extent_root;
8144 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8146 btrfs_init_path(&path);
8147 key.objectid = last;
8149 key.type = BTRFS_EXTENT_ITEM_KEY;
8150 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8155 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8156 ret = btrfs_next_leaf(root, &path);
8164 leaf = path.nodes[0];
8165 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8166 if (key.objectid >= cache->key.offset + cache->key.objectid)
8168 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8169 key.type != BTRFS_METADATA_ITEM_KEY) {
8174 if (last == key.objectid) {
8175 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8176 last = key.objectid + key.offset;
8178 last = key.objectid + root->fs_info->nodesize;
8183 ret = check_cache_range(root, cache, last,
8184 key.objectid - last);
8187 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8188 last = key.objectid + key.offset;
8190 last = key.objectid + root->fs_info->nodesize;
8194 if (last < cache->key.objectid + cache->key.offset)
8195 ret = check_cache_range(root, cache, last,
8196 cache->key.objectid +
8197 cache->key.offset - last);
8200 btrfs_release_path(&path);
8203 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8204 fprintf(stderr, "There are still entries left in the space "
8212 static int check_space_cache(struct btrfs_root *root)
8214 struct btrfs_block_group_cache *cache;
8215 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8219 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8220 btrfs_super_generation(root->fs_info->super_copy) !=
8221 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8222 printf("cache and super generation don't match, space cache "
8223 "will be invalidated\n");
8227 if (ctx.progress_enabled) {
8228 ctx.tp = TASK_FREE_SPACE;
8229 task_start(ctx.info);
8233 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8237 start = cache->key.objectid + cache->key.offset;
8238 if (!cache->free_space_ctl) {
8239 if (btrfs_init_free_space_ctl(cache,
8240 root->fs_info->sectorsize)) {
8245 btrfs_remove_free_space_cache(cache);
8248 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8249 ret = exclude_super_stripes(root, cache);
8251 fprintf(stderr, "could not exclude super stripes: %s\n",
8256 ret = load_free_space_tree(root->fs_info, cache);
8257 free_excluded_extents(root, cache);
8259 fprintf(stderr, "could not load free space tree: %s\n",
8266 ret = load_free_space_cache(root->fs_info, cache);
8271 ret = verify_space_cache(root, cache);
8273 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8274 cache->key.objectid);
8279 task_stop(ctx.info);
8281 return error ? -EINVAL : 0;
8284 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8285 u64 num_bytes, unsigned long leaf_offset,
8286 struct extent_buffer *eb) {
8288 struct btrfs_fs_info *fs_info = root->fs_info;
8290 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8292 unsigned long csum_offset;
8296 u64 data_checked = 0;
8302 if (num_bytes % fs_info->sectorsize)
8305 data = malloc(num_bytes);
8309 while (offset < num_bytes) {
8312 read_len = num_bytes - offset;
8313 /* read as much space once a time */
8314 ret = read_extent_data(fs_info, data + offset,
8315 bytenr + offset, &read_len, mirror);
8319 /* verify every 4k data's checksum */
8320 while (data_checked < read_len) {
8322 tmp = offset + data_checked;
8324 csum = btrfs_csum_data((char *)data + tmp,
8325 csum, fs_info->sectorsize);
8326 btrfs_csum_final(csum, (u8 *)&csum);
8328 csum_offset = leaf_offset +
8329 tmp / fs_info->sectorsize * csum_size;
8330 read_extent_buffer(eb, (char *)&csum_expected,
8331 csum_offset, csum_size);
8332 /* try another mirror */
8333 if (csum != csum_expected) {
8334 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8335 mirror, bytenr + tmp,
8336 csum, csum_expected);
8337 num_copies = btrfs_num_copies(root->fs_info,
8339 if (mirror < num_copies - 1) {
8344 data_checked += fs_info->sectorsize;
8353 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8356 struct btrfs_path path;
8357 struct extent_buffer *leaf;
8358 struct btrfs_key key;
8361 btrfs_init_path(&path);
8362 key.objectid = bytenr;
8363 key.type = BTRFS_EXTENT_ITEM_KEY;
8364 key.offset = (u64)-1;
8367 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8370 fprintf(stderr, "Error looking up extent record %d\n", ret);
8371 btrfs_release_path(&path);
8374 if (path.slots[0] > 0) {
8377 ret = btrfs_prev_leaf(root, &path);
8380 } else if (ret > 0) {
8387 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8390 * Block group items come before extent items if they have the same
8391 * bytenr, so walk back one more just in case. Dear future traveller,
8392 * first congrats on mastering time travel. Now if it's not too much
8393 * trouble could you go back to 2006 and tell Chris to make the
8394 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8395 * EXTENT_ITEM_KEY please?
8397 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8398 if (path.slots[0] > 0) {
8401 ret = btrfs_prev_leaf(root, &path);
8404 } else if (ret > 0) {
8409 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8413 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8414 ret = btrfs_next_leaf(root, &path);
8416 fprintf(stderr, "Error going to next leaf "
8418 btrfs_release_path(&path);
8424 leaf = path.nodes[0];
8425 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8426 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8430 if (key.objectid + key.offset < bytenr) {
8434 if (key.objectid > bytenr + num_bytes)
8437 if (key.objectid == bytenr) {
8438 if (key.offset >= num_bytes) {
8442 num_bytes -= key.offset;
8443 bytenr += key.offset;
8444 } else if (key.objectid < bytenr) {
8445 if (key.objectid + key.offset >= bytenr + num_bytes) {
8449 num_bytes = (bytenr + num_bytes) -
8450 (key.objectid + key.offset);
8451 bytenr = key.objectid + key.offset;
8453 if (key.objectid + key.offset < bytenr + num_bytes) {
8454 u64 new_start = key.objectid + key.offset;
8455 u64 new_bytes = bytenr + num_bytes - new_start;
8458 * Weird case, the extent is in the middle of
8459 * our range, we'll have to search one side
8460 * and then the other. Not sure if this happens
8461 * in real life, but no harm in coding it up
8462 * anyway just in case.
8464 btrfs_release_path(&path);
8465 ret = check_extent_exists(root, new_start,
8468 fprintf(stderr, "Right section didn't "
8472 num_bytes = key.objectid - bytenr;
8475 num_bytes = key.objectid - bytenr;
8482 if (num_bytes && !ret) {
8483 fprintf(stderr, "There are no extents for csum range "
8484 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8488 btrfs_release_path(&path);
8492 static int check_csums(struct btrfs_root *root)
8494 struct btrfs_path path;
8495 struct extent_buffer *leaf;
8496 struct btrfs_key key;
8497 u64 offset = 0, num_bytes = 0;
8498 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8502 unsigned long leaf_offset;
8504 root = root->fs_info->csum_root;
8505 if (!extent_buffer_uptodate(root->node)) {
8506 fprintf(stderr, "No valid csum tree found\n");
8510 btrfs_init_path(&path);
8511 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8512 key.type = BTRFS_EXTENT_CSUM_KEY;
8514 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8516 fprintf(stderr, "Error searching csum tree %d\n", ret);
8517 btrfs_release_path(&path);
8521 if (ret > 0 && path.slots[0])
8526 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8527 ret = btrfs_next_leaf(root, &path);
8529 fprintf(stderr, "Error going to next leaf "
8536 leaf = path.nodes[0];
8538 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8539 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8544 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8545 csum_size) * root->fs_info->sectorsize;
8546 if (!check_data_csum)
8547 goto skip_csum_check;
8548 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8549 ret = check_extent_csums(root, key.offset, data_len,
8555 offset = key.offset;
8556 } else if (key.offset != offset + num_bytes) {
8557 ret = check_extent_exists(root, offset, num_bytes);
8559 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8560 "there is no extent record\n",
8561 offset, offset+num_bytes);
8564 offset = key.offset;
8567 num_bytes += data_len;
8571 btrfs_release_path(&path);
8575 static int is_dropped_key(struct btrfs_key *key,
8576 struct btrfs_key *drop_key) {
8577 if (key->objectid < drop_key->objectid)
8579 else if (key->objectid == drop_key->objectid) {
8580 if (key->type < drop_key->type)
8582 else if (key->type == drop_key->type) {
8583 if (key->offset < drop_key->offset)
8591 * Here are the rules for FULL_BACKREF.
8593 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8594 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8596 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8597 * if it happened after the relocation occurred since we'll have dropped the
8598 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8599 * have no real way to know for sure.
8601 * We process the blocks one root at a time, and we start from the lowest root
8602 * objectid and go to the highest. So we can just lookup the owner backref for
8603 * the record and if we don't find it then we know it doesn't exist and we have
8606 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8607 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8608 * be set or not and then we can check later once we've gathered all the refs.
8610 static int calc_extent_flag(struct cache_tree *extent_cache,
8611 struct extent_buffer *buf,
8612 struct root_item_record *ri,
8615 struct extent_record *rec;
8616 struct cache_extent *cache;
8617 struct tree_backref *tback;
8620 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8621 /* we have added this extent before */
8625 rec = container_of(cache, struct extent_record, cache);
8628 * Except file/reloc tree, we can not have
8631 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8636 if (buf->start == ri->bytenr)
8639 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8642 owner = btrfs_header_owner(buf);
8643 if (owner == ri->objectid)
8646 tback = find_tree_backref(rec, 0, owner);
8651 if (rec->flag_block_full_backref != FLAG_UNSET &&
8652 rec->flag_block_full_backref != 0)
8653 rec->bad_full_backref = 1;
8656 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8657 if (rec->flag_block_full_backref != FLAG_UNSET &&
8658 rec->flag_block_full_backref != 1)
8659 rec->bad_full_backref = 1;
8663 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8665 fprintf(stderr, "Invalid key type(");
8666 print_key_type(stderr, 0, key_type);
8667 fprintf(stderr, ") found in root(");
8668 print_objectid(stderr, rootid, 0);
8669 fprintf(stderr, ")\n");
8673 * Check if the key is valid with its extent buffer.
8675 * This is a early check in case invalid key exists in a extent buffer
8676 * This is not comprehensive yet, but should prevent wrong key/item passed
8679 static int check_type_with_root(u64 rootid, u8 key_type)
8682 /* Only valid in chunk tree */
8683 case BTRFS_DEV_ITEM_KEY:
8684 case BTRFS_CHUNK_ITEM_KEY:
8685 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8688 /* valid in csum and log tree */
8689 case BTRFS_CSUM_TREE_OBJECTID:
8690 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8694 case BTRFS_EXTENT_ITEM_KEY:
8695 case BTRFS_METADATA_ITEM_KEY:
8696 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8697 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8700 case BTRFS_ROOT_ITEM_KEY:
8701 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8704 case BTRFS_DEV_EXTENT_KEY:
8705 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8711 report_mismatch_key_root(key_type, rootid);
8715 static int run_next_block(struct btrfs_root *root,
8716 struct block_info *bits,
8719 struct cache_tree *pending,
8720 struct cache_tree *seen,
8721 struct cache_tree *reada,
8722 struct cache_tree *nodes,
8723 struct cache_tree *extent_cache,
8724 struct cache_tree *chunk_cache,
8725 struct rb_root *dev_cache,
8726 struct block_group_tree *block_group_cache,
8727 struct device_extent_tree *dev_extent_cache,
8728 struct root_item_record *ri)
8730 struct btrfs_fs_info *fs_info = root->fs_info;
8731 struct extent_buffer *buf;
8732 struct extent_record *rec = NULL;
8743 struct btrfs_key key;
8744 struct cache_extent *cache;
8747 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8748 bits_nr, &reada_bits);
8753 for(i = 0; i < nritems; i++) {
8754 ret = add_cache_extent(reada, bits[i].start,
8759 /* fixme, get the parent transid */
8760 readahead_tree_block(fs_info, bits[i].start, 0);
8763 *last = bits[0].start;
8764 bytenr = bits[0].start;
8765 size = bits[0].size;
8767 cache = lookup_cache_extent(pending, bytenr, size);
8769 remove_cache_extent(pending, cache);
8772 cache = lookup_cache_extent(reada, bytenr, size);
8774 remove_cache_extent(reada, cache);
8777 cache = lookup_cache_extent(nodes, bytenr, size);
8779 remove_cache_extent(nodes, cache);
8782 cache = lookup_cache_extent(extent_cache, bytenr, size);
8784 rec = container_of(cache, struct extent_record, cache);
8785 gen = rec->parent_generation;
8788 /* fixme, get the real parent transid */
8789 buf = read_tree_block(root->fs_info, bytenr, gen);
8790 if (!extent_buffer_uptodate(buf)) {
8791 record_bad_block_io(root->fs_info,
8792 extent_cache, bytenr, size);
8796 nritems = btrfs_header_nritems(buf);
8799 if (!init_extent_tree) {
8800 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8801 btrfs_header_level(buf), 1, NULL,
8804 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8806 fprintf(stderr, "Couldn't calc extent flags\n");
8807 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8812 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8814 fprintf(stderr, "Couldn't calc extent flags\n");
8815 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8819 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8821 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8822 ri->objectid == btrfs_header_owner(buf)) {
8824 * Ok we got to this block from it's original owner and
8825 * we have FULL_BACKREF set. Relocation can leave
8826 * converted blocks over so this is altogether possible,
8827 * however it's not possible if the generation > the
8828 * last snapshot, so check for this case.
8830 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8831 btrfs_header_generation(buf) > ri->last_snapshot) {
8832 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8833 rec->bad_full_backref = 1;
8838 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8839 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8840 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8841 rec->bad_full_backref = 1;
8845 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8846 rec->flag_block_full_backref = 1;
8850 rec->flag_block_full_backref = 0;
8852 owner = btrfs_header_owner(buf);
8855 ret = check_block(root, extent_cache, buf, flags);
8859 if (btrfs_is_leaf(buf)) {
8860 btree_space_waste += btrfs_leaf_free_space(root, buf);
8861 for (i = 0; i < nritems; i++) {
8862 struct btrfs_file_extent_item *fi;
8863 btrfs_item_key_to_cpu(buf, &key, i);
8865 * Check key type against the leaf owner.
8866 * Could filter quite a lot of early error if
8869 if (check_type_with_root(btrfs_header_owner(buf),
8871 fprintf(stderr, "ignoring invalid key\n");
8874 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8875 process_extent_item(root, extent_cache, buf,
8879 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8880 process_extent_item(root, extent_cache, buf,
8884 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8886 btrfs_item_size_nr(buf, i);
8889 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8890 process_chunk_item(chunk_cache, &key, buf, i);
8893 if (key.type == BTRFS_DEV_ITEM_KEY) {
8894 process_device_item(dev_cache, &key, buf, i);
8897 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8898 process_block_group_item(block_group_cache,
8902 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8903 process_device_extent_item(dev_extent_cache,
8908 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8909 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8910 process_extent_ref_v0(extent_cache, buf, i);
8917 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8918 ret = add_tree_backref(extent_cache,
8919 key.objectid, 0, key.offset, 0);
8922 "add_tree_backref failed (leaf tree block): %s",
8926 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8927 ret = add_tree_backref(extent_cache,
8928 key.objectid, key.offset, 0, 0);
8931 "add_tree_backref failed (leaf shared block): %s",
8935 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8936 struct btrfs_extent_data_ref *ref;
8937 ref = btrfs_item_ptr(buf, i,
8938 struct btrfs_extent_data_ref);
8939 add_data_backref(extent_cache,
8941 btrfs_extent_data_ref_root(buf, ref),
8942 btrfs_extent_data_ref_objectid(buf,
8944 btrfs_extent_data_ref_offset(buf, ref),
8945 btrfs_extent_data_ref_count(buf, ref),
8946 0, root->fs_info->sectorsize);
8949 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8950 struct btrfs_shared_data_ref *ref;
8951 ref = btrfs_item_ptr(buf, i,
8952 struct btrfs_shared_data_ref);
8953 add_data_backref(extent_cache,
8954 key.objectid, key.offset, 0, 0, 0,
8955 btrfs_shared_data_ref_count(buf, ref),
8956 0, root->fs_info->sectorsize);
8959 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8960 struct bad_item *bad;
8962 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8966 bad = malloc(sizeof(struct bad_item));
8969 INIT_LIST_HEAD(&bad->list);
8970 memcpy(&bad->key, &key,
8971 sizeof(struct btrfs_key));
8972 bad->root_id = owner;
8973 list_add_tail(&bad->list, &delete_items);
8976 if (key.type != BTRFS_EXTENT_DATA_KEY)
8978 fi = btrfs_item_ptr(buf, i,
8979 struct btrfs_file_extent_item);
8980 if (btrfs_file_extent_type(buf, fi) ==
8981 BTRFS_FILE_EXTENT_INLINE)
8983 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8986 data_bytes_allocated +=
8987 btrfs_file_extent_disk_num_bytes(buf, fi);
8988 if (data_bytes_allocated < root->fs_info->sectorsize) {
8991 data_bytes_referenced +=
8992 btrfs_file_extent_num_bytes(buf, fi);
8993 add_data_backref(extent_cache,
8994 btrfs_file_extent_disk_bytenr(buf, fi),
8995 parent, owner, key.objectid, key.offset -
8996 btrfs_file_extent_offset(buf, fi), 1, 1,
8997 btrfs_file_extent_disk_num_bytes(buf, fi));
9001 struct btrfs_key first_key;
9003 first_key.objectid = 0;
9006 btrfs_item_key_to_cpu(buf, &first_key, 0);
9007 level = btrfs_header_level(buf);
9008 for (i = 0; i < nritems; i++) {
9009 struct extent_record tmpl;
9011 ptr = btrfs_node_blockptr(buf, i);
9012 size = root->fs_info->nodesize;
9013 btrfs_node_key_to_cpu(buf, &key, i);
9015 if ((level == ri->drop_level)
9016 && is_dropped_key(&key, &ri->drop_key)) {
9021 memset(&tmpl, 0, sizeof(tmpl));
9022 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9023 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9028 tmpl.max_size = size;
9029 ret = add_extent_rec(extent_cache, &tmpl);
9033 ret = add_tree_backref(extent_cache, ptr, parent,
9037 "add_tree_backref failed (non-leaf block): %s",
9043 add_pending(nodes, seen, ptr, size);
9045 add_pending(pending, seen, ptr, size);
9048 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9049 nritems) * sizeof(struct btrfs_key_ptr);
9051 total_btree_bytes += buf->len;
9052 if (fs_root_objectid(btrfs_header_owner(buf)))
9053 total_fs_tree_bytes += buf->len;
9054 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9055 total_extent_tree_bytes += buf->len;
9057 free_extent_buffer(buf);
9061 static int add_root_to_pending(struct extent_buffer *buf,
9062 struct cache_tree *extent_cache,
9063 struct cache_tree *pending,
9064 struct cache_tree *seen,
9065 struct cache_tree *nodes,
9068 struct extent_record tmpl;
9071 if (btrfs_header_level(buf) > 0)
9072 add_pending(nodes, seen, buf->start, buf->len);
9074 add_pending(pending, seen, buf->start, buf->len);
9076 memset(&tmpl, 0, sizeof(tmpl));
9077 tmpl.start = buf->start;
9082 tmpl.max_size = buf->len;
9083 add_extent_rec(extent_cache, &tmpl);
9085 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9086 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9087 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9090 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9095 /* as we fix the tree, we might be deleting blocks that
9096 * we're tracking for repair. This hook makes sure we
9097 * remove any backrefs for blocks as we are fixing them.
9099 static int free_extent_hook(struct btrfs_trans_handle *trans,
9100 struct btrfs_root *root,
9101 u64 bytenr, u64 num_bytes, u64 parent,
9102 u64 root_objectid, u64 owner, u64 offset,
9105 struct extent_record *rec;
9106 struct cache_extent *cache;
9108 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9110 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9111 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9115 rec = container_of(cache, struct extent_record, cache);
9117 struct data_backref *back;
9118 back = find_data_backref(rec, parent, root_objectid, owner,
9119 offset, 1, bytenr, num_bytes);
9122 if (back->node.found_ref) {
9123 back->found_ref -= refs_to_drop;
9125 rec->refs -= refs_to_drop;
9127 if (back->node.found_extent_tree) {
9128 back->num_refs -= refs_to_drop;
9129 if (rec->extent_item_refs)
9130 rec->extent_item_refs -= refs_to_drop;
9132 if (back->found_ref == 0)
9133 back->node.found_ref = 0;
9134 if (back->num_refs == 0)
9135 back->node.found_extent_tree = 0;
9137 if (!back->node.found_extent_tree && back->node.found_ref) {
9138 rb_erase(&back->node.node, &rec->backref_tree);
9142 struct tree_backref *back;
9143 back = find_tree_backref(rec, parent, root_objectid);
9146 if (back->node.found_ref) {
9149 back->node.found_ref = 0;
9151 if (back->node.found_extent_tree) {
9152 if (rec->extent_item_refs)
9153 rec->extent_item_refs--;
9154 back->node.found_extent_tree = 0;
9156 if (!back->node.found_extent_tree && back->node.found_ref) {
9157 rb_erase(&back->node.node, &rec->backref_tree);
9161 maybe_free_extent_rec(extent_cache, rec);
9166 static int delete_extent_records(struct btrfs_trans_handle *trans,
9167 struct btrfs_root *root,
9168 struct btrfs_path *path,
9171 struct btrfs_key key;
9172 struct btrfs_key found_key;
9173 struct extent_buffer *leaf;
9178 key.objectid = bytenr;
9180 key.offset = (u64)-1;
9183 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9190 if (path->slots[0] == 0)
9196 leaf = path->nodes[0];
9197 slot = path->slots[0];
9199 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9200 if (found_key.objectid != bytenr)
9203 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9204 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9205 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9206 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9207 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9208 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9209 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9210 btrfs_release_path(path);
9211 if (found_key.type == 0) {
9212 if (found_key.offset == 0)
9214 key.offset = found_key.offset - 1;
9215 key.type = found_key.type;
9217 key.type = found_key.type - 1;
9218 key.offset = (u64)-1;
9222 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9223 found_key.objectid, found_key.type, found_key.offset);
9225 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9228 btrfs_release_path(path);
9230 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9231 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9232 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9233 found_key.offset : root->fs_info->nodesize;
9235 ret = btrfs_update_block_group(trans, root, bytenr,
9242 btrfs_release_path(path);
9247 * for a single backref, this will allocate a new extent
9248 * and add the backref to it.
9250 static int record_extent(struct btrfs_trans_handle *trans,
9251 struct btrfs_fs_info *info,
9252 struct btrfs_path *path,
9253 struct extent_record *rec,
9254 struct extent_backref *back,
9255 int allocated, u64 flags)
9258 struct btrfs_root *extent_root = info->extent_root;
9259 struct extent_buffer *leaf;
9260 struct btrfs_key ins_key;
9261 struct btrfs_extent_item *ei;
9262 struct data_backref *dback;
9263 struct btrfs_tree_block_info *bi;
9266 rec->max_size = max_t(u64, rec->max_size,
9270 u32 item_size = sizeof(*ei);
9273 item_size += sizeof(*bi);
9275 ins_key.objectid = rec->start;
9276 ins_key.offset = rec->max_size;
9277 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9279 ret = btrfs_insert_empty_item(trans, extent_root, path,
9280 &ins_key, item_size);
9284 leaf = path->nodes[0];
9285 ei = btrfs_item_ptr(leaf, path->slots[0],
9286 struct btrfs_extent_item);
9288 btrfs_set_extent_refs(leaf, ei, 0);
9289 btrfs_set_extent_generation(leaf, ei, rec->generation);
9291 if (back->is_data) {
9292 btrfs_set_extent_flags(leaf, ei,
9293 BTRFS_EXTENT_FLAG_DATA);
9295 struct btrfs_disk_key copy_key;;
9297 bi = (struct btrfs_tree_block_info *)(ei + 1);
9298 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9301 btrfs_set_disk_key_objectid(©_key,
9302 rec->info_objectid);
9303 btrfs_set_disk_key_type(©_key, 0);
9304 btrfs_set_disk_key_offset(©_key, 0);
9306 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9307 btrfs_set_tree_block_key(leaf, bi, ©_key);
9309 btrfs_set_extent_flags(leaf, ei,
9310 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9313 btrfs_mark_buffer_dirty(leaf);
9314 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9315 rec->max_size, 1, 0);
9318 btrfs_release_path(path);
9321 if (back->is_data) {
9325 dback = to_data_backref(back);
9326 if (back->full_backref)
9327 parent = dback->parent;
9331 for (i = 0; i < dback->found_ref; i++) {
9332 /* if parent != 0, we're doing a full backref
9333 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9334 * just makes the backref allocator create a data
9337 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9338 rec->start, rec->max_size,
9342 BTRFS_FIRST_FREE_OBJECTID :
9348 fprintf(stderr, "adding new data backref"
9349 " on %llu %s %llu owner %llu"
9350 " offset %llu found %d\n",
9351 (unsigned long long)rec->start,
9352 back->full_backref ?
9354 back->full_backref ?
9355 (unsigned long long)parent :
9356 (unsigned long long)dback->root,
9357 (unsigned long long)dback->owner,
9358 (unsigned long long)dback->offset,
9362 struct tree_backref *tback;
9364 tback = to_tree_backref(back);
9365 if (back->full_backref)
9366 parent = tback->parent;
9370 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9371 rec->start, rec->max_size,
9372 parent, tback->root, 0, 0);
9373 fprintf(stderr, "adding new tree backref on "
9374 "start %llu len %llu parent %llu root %llu\n",
9375 rec->start, rec->max_size, parent, tback->root);
9378 btrfs_release_path(path);
9382 static struct extent_entry *find_entry(struct list_head *entries,
9383 u64 bytenr, u64 bytes)
9385 struct extent_entry *entry = NULL;
9387 list_for_each_entry(entry, entries, list) {
9388 if (entry->bytenr == bytenr && entry->bytes == bytes)
9395 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9397 struct extent_entry *entry, *best = NULL, *prev = NULL;
9399 list_for_each_entry(entry, entries, list) {
9401 * If there are as many broken entries as entries then we know
9402 * not to trust this particular entry.
9404 if (entry->broken == entry->count)
9408 * Special case, when there are only two entries and 'best' is
9418 * If our current entry == best then we can't be sure our best
9419 * is really the best, so we need to keep searching.
9421 if (best && best->count == entry->count) {
9427 /* Prev == entry, not good enough, have to keep searching */
9428 if (!prev->broken && prev->count == entry->count)
9432 best = (prev->count > entry->count) ? prev : entry;
9433 else if (best->count < entry->count)
9441 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9442 struct data_backref *dback, struct extent_entry *entry)
9444 struct btrfs_trans_handle *trans;
9445 struct btrfs_root *root;
9446 struct btrfs_file_extent_item *fi;
9447 struct extent_buffer *leaf;
9448 struct btrfs_key key;
9452 key.objectid = dback->root;
9453 key.type = BTRFS_ROOT_ITEM_KEY;
9454 key.offset = (u64)-1;
9455 root = btrfs_read_fs_root(info, &key);
9457 fprintf(stderr, "Couldn't find root for our ref\n");
9462 * The backref points to the original offset of the extent if it was
9463 * split, so we need to search down to the offset we have and then walk
9464 * forward until we find the backref we're looking for.
9466 key.objectid = dback->owner;
9467 key.type = BTRFS_EXTENT_DATA_KEY;
9468 key.offset = dback->offset;
9469 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9471 fprintf(stderr, "Error looking up ref %d\n", ret);
9476 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9477 ret = btrfs_next_leaf(root, path);
9479 fprintf(stderr, "Couldn't find our ref, next\n");
9483 leaf = path->nodes[0];
9484 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9485 if (key.objectid != dback->owner ||
9486 key.type != BTRFS_EXTENT_DATA_KEY) {
9487 fprintf(stderr, "Couldn't find our ref, search\n");
9490 fi = btrfs_item_ptr(leaf, path->slots[0],
9491 struct btrfs_file_extent_item);
9492 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9493 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9495 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9500 btrfs_release_path(path);
9502 trans = btrfs_start_transaction(root, 1);
9504 return PTR_ERR(trans);
9507 * Ok we have the key of the file extent we want to fix, now we can cow
9508 * down to the thing and fix it.
9510 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9512 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9513 key.objectid, key.type, key.offset, ret);
9517 fprintf(stderr, "Well that's odd, we just found this key "
9518 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9523 leaf = path->nodes[0];
9524 fi = btrfs_item_ptr(leaf, path->slots[0],
9525 struct btrfs_file_extent_item);
9527 if (btrfs_file_extent_compression(leaf, fi) &&
9528 dback->disk_bytenr != entry->bytenr) {
9529 fprintf(stderr, "Ref doesn't match the record start and is "
9530 "compressed, please take a btrfs-image of this file "
9531 "system and send it to a btrfs developer so they can "
9532 "complete this functionality for bytenr %Lu\n",
9533 dback->disk_bytenr);
9538 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9539 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9540 } else if (dback->disk_bytenr > entry->bytenr) {
9541 u64 off_diff, offset;
9543 off_diff = dback->disk_bytenr - entry->bytenr;
9544 offset = btrfs_file_extent_offset(leaf, fi);
9545 if (dback->disk_bytenr + offset +
9546 btrfs_file_extent_num_bytes(leaf, fi) >
9547 entry->bytenr + entry->bytes) {
9548 fprintf(stderr, "Ref is past the entry end, please "
9549 "take a btrfs-image of this file system and "
9550 "send it to a btrfs developer, ref %Lu\n",
9551 dback->disk_bytenr);
9556 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9557 btrfs_set_file_extent_offset(leaf, fi, offset);
9558 } else if (dback->disk_bytenr < entry->bytenr) {
9561 offset = btrfs_file_extent_offset(leaf, fi);
9562 if (dback->disk_bytenr + offset < entry->bytenr) {
9563 fprintf(stderr, "Ref is before the entry start, please"
9564 " take a btrfs-image of this file system and "
9565 "send it to a btrfs developer, ref %Lu\n",
9566 dback->disk_bytenr);
9571 offset += dback->disk_bytenr;
9572 offset -= entry->bytenr;
9573 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9574 btrfs_set_file_extent_offset(leaf, fi, offset);
9577 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9580 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9581 * only do this if we aren't using compression, otherwise it's a
9584 if (!btrfs_file_extent_compression(leaf, fi))
9585 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9587 printf("ram bytes may be wrong?\n");
9588 btrfs_mark_buffer_dirty(leaf);
9590 err = btrfs_commit_transaction(trans, root);
9591 btrfs_release_path(path);
9592 return ret ? ret : err;
9595 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9596 struct extent_record *rec)
9598 struct extent_backref *back, *tmp;
9599 struct data_backref *dback;
9600 struct extent_entry *entry, *best = NULL;
9603 int broken_entries = 0;
9608 * Metadata is easy and the backrefs should always agree on bytenr and
9609 * size, if not we've got bigger issues.
9614 rbtree_postorder_for_each_entry_safe(back, tmp,
9615 &rec->backref_tree, node) {
9616 if (back->full_backref || !back->is_data)
9619 dback = to_data_backref(back);
9622 * We only pay attention to backrefs that we found a real
9625 if (dback->found_ref == 0)
9629 * For now we only catch when the bytes don't match, not the
9630 * bytenr. We can easily do this at the same time, but I want
9631 * to have a fs image to test on before we just add repair
9632 * functionality willy-nilly so we know we won't screw up the
9636 entry = find_entry(&entries, dback->disk_bytenr,
9639 entry = malloc(sizeof(struct extent_entry));
9644 memset(entry, 0, sizeof(*entry));
9645 entry->bytenr = dback->disk_bytenr;
9646 entry->bytes = dback->bytes;
9647 list_add_tail(&entry->list, &entries);
9652 * If we only have on entry we may think the entries agree when
9653 * in reality they don't so we have to do some extra checking.
9655 if (dback->disk_bytenr != rec->start ||
9656 dback->bytes != rec->nr || back->broken)
9667 /* Yay all the backrefs agree, carry on good sir */
9668 if (nr_entries <= 1 && !mismatch)
9671 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9672 "%Lu\n", rec->start);
9675 * First we want to see if the backrefs can agree amongst themselves who
9676 * is right, so figure out which one of the entries has the highest
9679 best = find_most_right_entry(&entries);
9682 * Ok so we may have an even split between what the backrefs think, so
9683 * this is where we use the extent ref to see what it thinks.
9686 entry = find_entry(&entries, rec->start, rec->nr);
9687 if (!entry && (!broken_entries || !rec->found_rec)) {
9688 fprintf(stderr, "Backrefs don't agree with each other "
9689 "and extent record doesn't agree with anybody,"
9690 " so we can't fix bytenr %Lu bytes %Lu\n",
9691 rec->start, rec->nr);
9694 } else if (!entry) {
9696 * Ok our backrefs were broken, we'll assume this is the
9697 * correct value and add an entry for this range.
9699 entry = malloc(sizeof(struct extent_entry));
9704 memset(entry, 0, sizeof(*entry));
9705 entry->bytenr = rec->start;
9706 entry->bytes = rec->nr;
9707 list_add_tail(&entry->list, &entries);
9711 best = find_most_right_entry(&entries);
9713 fprintf(stderr, "Backrefs and extent record evenly "
9714 "split on who is right, this is going to "
9715 "require user input to fix bytenr %Lu bytes "
9716 "%Lu\n", rec->start, rec->nr);
9723 * I don't think this can happen currently as we'll abort() if we catch
9724 * this case higher up, but in case somebody removes that we still can't
9725 * deal with it properly here yet, so just bail out of that's the case.
9727 if (best->bytenr != rec->start) {
9728 fprintf(stderr, "Extent start and backref starts don't match, "
9729 "please use btrfs-image on this file system and send "
9730 "it to a btrfs developer so they can make fsck fix "
9731 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9732 rec->start, rec->nr);
9738 * Ok great we all agreed on an extent record, let's go find the real
9739 * references and fix up the ones that don't match.
9741 rbtree_postorder_for_each_entry_safe(back, tmp,
9742 &rec->backref_tree, node) {
9743 if (back->full_backref || !back->is_data)
9746 dback = to_data_backref(back);
9749 * Still ignoring backrefs that don't have a real ref attached
9752 if (dback->found_ref == 0)
9755 if (dback->bytes == best->bytes &&
9756 dback->disk_bytenr == best->bytenr)
9759 ret = repair_ref(info, path, dback, best);
9765 * Ok we messed with the actual refs, which means we need to drop our
9766 * entire cache and go back and rescan. I know this is a huge pain and
9767 * adds a lot of extra work, but it's the only way to be safe. Once all
9768 * the backrefs agree we may not need to do anything to the extent
9773 while (!list_empty(&entries)) {
9774 entry = list_entry(entries.next, struct extent_entry, list);
9775 list_del_init(&entry->list);
9781 static int process_duplicates(struct cache_tree *extent_cache,
9782 struct extent_record *rec)
9784 struct extent_record *good, *tmp;
9785 struct cache_extent *cache;
9789 * If we found a extent record for this extent then return, or if we
9790 * have more than one duplicate we are likely going to need to delete
9793 if (rec->found_rec || rec->num_duplicates > 1)
9796 /* Shouldn't happen but just in case */
9797 BUG_ON(!rec->num_duplicates);
9800 * So this happens if we end up with a backref that doesn't match the
9801 * actual extent entry. So either the backref is bad or the extent
9802 * entry is bad. Either way we want to have the extent_record actually
9803 * reflect what we found in the extent_tree, so we need to take the
9804 * duplicate out and use that as the extent_record since the only way we
9805 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9807 remove_cache_extent(extent_cache, &rec->cache);
9809 good = to_extent_record(rec->dups.next);
9810 list_del_init(&good->list);
9811 INIT_LIST_HEAD(&good->backrefs);
9812 INIT_LIST_HEAD(&good->dups);
9813 good->cache.start = good->start;
9814 good->cache.size = good->nr;
9815 good->content_checked = 0;
9816 good->owner_ref_checked = 0;
9817 good->num_duplicates = 0;
9818 good->refs = rec->refs;
9819 list_splice_init(&rec->backrefs, &good->backrefs);
9821 cache = lookup_cache_extent(extent_cache, good->start,
9825 tmp = container_of(cache, struct extent_record, cache);
9828 * If we find another overlapping extent and it's found_rec is
9829 * set then it's a duplicate and we need to try and delete
9832 if (tmp->found_rec || tmp->num_duplicates > 0) {
9833 if (list_empty(&good->list))
9834 list_add_tail(&good->list,
9835 &duplicate_extents);
9836 good->num_duplicates += tmp->num_duplicates + 1;
9837 list_splice_init(&tmp->dups, &good->dups);
9838 list_del_init(&tmp->list);
9839 list_add_tail(&tmp->list, &good->dups);
9840 remove_cache_extent(extent_cache, &tmp->cache);
9845 * Ok we have another non extent item backed extent rec, so lets
9846 * just add it to this extent and carry on like we did above.
9848 good->refs += tmp->refs;
9849 list_splice_init(&tmp->backrefs, &good->backrefs);
9850 remove_cache_extent(extent_cache, &tmp->cache);
9853 ret = insert_cache_extent(extent_cache, &good->cache);
9856 return good->num_duplicates ? 0 : 1;
9859 static int delete_duplicate_records(struct btrfs_root *root,
9860 struct extent_record *rec)
9862 struct btrfs_trans_handle *trans;
9863 LIST_HEAD(delete_list);
9864 struct btrfs_path path;
9865 struct extent_record *tmp, *good, *n;
9868 struct btrfs_key key;
9870 btrfs_init_path(&path);
9873 /* Find the record that covers all of the duplicates. */
9874 list_for_each_entry(tmp, &rec->dups, list) {
9875 if (good->start < tmp->start)
9877 if (good->nr > tmp->nr)
9880 if (tmp->start + tmp->nr < good->start + good->nr) {
9881 fprintf(stderr, "Ok we have overlapping extents that "
9882 "aren't completely covered by each other, this "
9883 "is going to require more careful thought. "
9884 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9885 tmp->start, tmp->nr, good->start, good->nr);
9892 list_add_tail(&rec->list, &delete_list);
9894 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9897 list_move_tail(&tmp->list, &delete_list);
9900 root = root->fs_info->extent_root;
9901 trans = btrfs_start_transaction(root, 1);
9902 if (IS_ERR(trans)) {
9903 ret = PTR_ERR(trans);
9907 list_for_each_entry(tmp, &delete_list, list) {
9908 if (tmp->found_rec == 0)
9910 key.objectid = tmp->start;
9911 key.type = BTRFS_EXTENT_ITEM_KEY;
9912 key.offset = tmp->nr;
9914 /* Shouldn't happen but just in case */
9915 if (tmp->metadata) {
9916 fprintf(stderr, "Well this shouldn't happen, extent "
9917 "record overlaps but is metadata? "
9918 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9922 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9928 ret = btrfs_del_item(trans, root, &path);
9931 btrfs_release_path(&path);
9934 err = btrfs_commit_transaction(trans, root);
9938 while (!list_empty(&delete_list)) {
9939 tmp = to_extent_record(delete_list.next);
9940 list_del_init(&tmp->list);
9946 while (!list_empty(&rec->dups)) {
9947 tmp = to_extent_record(rec->dups.next);
9948 list_del_init(&tmp->list);
9952 btrfs_release_path(&path);
9954 if (!ret && !nr_del)
9955 rec->num_duplicates = 0;
9957 return ret ? ret : nr_del;
9960 static int find_possible_backrefs(struct btrfs_fs_info *info,
9961 struct btrfs_path *path,
9962 struct cache_tree *extent_cache,
9963 struct extent_record *rec)
9965 struct btrfs_root *root;
9966 struct extent_backref *back, *tmp;
9967 struct data_backref *dback;
9968 struct cache_extent *cache;
9969 struct btrfs_file_extent_item *fi;
9970 struct btrfs_key key;
9974 rbtree_postorder_for_each_entry_safe(back, tmp,
9975 &rec->backref_tree, node) {
9976 /* Don't care about full backrefs (poor unloved backrefs) */
9977 if (back->full_backref || !back->is_data)
9980 dback = to_data_backref(back);
9982 /* We found this one, we don't need to do a lookup */
9983 if (dback->found_ref)
9986 key.objectid = dback->root;
9987 key.type = BTRFS_ROOT_ITEM_KEY;
9988 key.offset = (u64)-1;
9990 root = btrfs_read_fs_root(info, &key);
9992 /* No root, definitely a bad ref, skip */
9993 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9995 /* Other err, exit */
9997 return PTR_ERR(root);
9999 key.objectid = dback->owner;
10000 key.type = BTRFS_EXTENT_DATA_KEY;
10001 key.offset = dback->offset;
10002 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10004 btrfs_release_path(path);
10007 /* Didn't find it, we can carry on */
10012 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10013 struct btrfs_file_extent_item);
10014 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10015 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10016 btrfs_release_path(path);
10017 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10019 struct extent_record *tmp;
10020 tmp = container_of(cache, struct extent_record, cache);
10023 * If we found an extent record for the bytenr for this
10024 * particular backref then we can't add it to our
10025 * current extent record. We only want to add backrefs
10026 * that don't have a corresponding extent item in the
10027 * extent tree since they likely belong to this record
10028 * and we need to fix it if it doesn't match bytenrs.
10030 if (tmp->found_rec)
10034 dback->found_ref += 1;
10035 dback->disk_bytenr = bytenr;
10036 dback->bytes = bytes;
10039 * Set this so the verify backref code knows not to trust the
10040 * values in this backref.
10049 * Record orphan data ref into corresponding root.
10051 * Return 0 if the extent item contains data ref and recorded.
10052 * Return 1 if the extent item contains no useful data ref
10053 * On that case, it may contains only shared_dataref or metadata backref
10054 * or the file extent exists(this should be handled by the extent bytenr
10055 * recovery routine)
10056 * Return <0 if something goes wrong.
10058 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10059 struct extent_record *rec)
10061 struct btrfs_key key;
10062 struct btrfs_root *dest_root;
10063 struct extent_backref *back, *tmp;
10064 struct data_backref *dback;
10065 struct orphan_data_extent *orphan;
10066 struct btrfs_path path;
10067 int recorded_data_ref = 0;
10072 btrfs_init_path(&path);
10073 rbtree_postorder_for_each_entry_safe(back, tmp,
10074 &rec->backref_tree, node) {
10075 if (back->full_backref || !back->is_data ||
10076 !back->found_extent_tree)
10078 dback = to_data_backref(back);
10079 if (dback->found_ref)
10081 key.objectid = dback->root;
10082 key.type = BTRFS_ROOT_ITEM_KEY;
10083 key.offset = (u64)-1;
10085 dest_root = btrfs_read_fs_root(fs_info, &key);
10087 /* For non-exist root we just skip it */
10088 if (IS_ERR(dest_root) || !dest_root)
10091 key.objectid = dback->owner;
10092 key.type = BTRFS_EXTENT_DATA_KEY;
10093 key.offset = dback->offset;
10095 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10096 btrfs_release_path(&path);
10098 * For ret < 0, it's OK since the fs-tree may be corrupted,
10099 * we need to record it for inode/file extent rebuild.
10100 * For ret > 0, we record it only for file extent rebuild.
10101 * For ret == 0, the file extent exists but only bytenr
10102 * mismatch, let the original bytenr fix routine to handle,
10108 orphan = malloc(sizeof(*orphan));
10113 INIT_LIST_HEAD(&orphan->list);
10114 orphan->root = dback->root;
10115 orphan->objectid = dback->owner;
10116 orphan->offset = dback->offset;
10117 orphan->disk_bytenr = rec->cache.start;
10118 orphan->disk_len = rec->cache.size;
10119 list_add(&dest_root->orphan_data_extents, &orphan->list);
10120 recorded_data_ref = 1;
10123 btrfs_release_path(&path);
10125 return !recorded_data_ref;
10131 * when an incorrect extent item is found, this will delete
10132 * all of the existing entries for it and recreate them
10133 * based on what the tree scan found.
10135 static int fixup_extent_refs(struct btrfs_fs_info *info,
10136 struct cache_tree *extent_cache,
10137 struct extent_record *rec)
10139 struct btrfs_trans_handle *trans = NULL;
10141 struct btrfs_path path;
10142 struct cache_extent *cache;
10143 struct extent_backref *back, *tmp;
10147 if (rec->flag_block_full_backref)
10148 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10150 btrfs_init_path(&path);
10151 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10153 * Sometimes the backrefs themselves are so broken they don't
10154 * get attached to any meaningful rec, so first go back and
10155 * check any of our backrefs that we couldn't find and throw
10156 * them into the list if we find the backref so that
10157 * verify_backrefs can figure out what to do.
10159 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10164 /* step one, make sure all of the backrefs agree */
10165 ret = verify_backrefs(info, &path, rec);
10169 trans = btrfs_start_transaction(info->extent_root, 1);
10170 if (IS_ERR(trans)) {
10171 ret = PTR_ERR(trans);
10175 /* step two, delete all the existing records */
10176 ret = delete_extent_records(trans, info->extent_root, &path,
10182 /* was this block corrupt? If so, don't add references to it */
10183 cache = lookup_cache_extent(info->corrupt_blocks,
10184 rec->start, rec->max_size);
10190 /* step three, recreate all the refs we did find */
10191 rbtree_postorder_for_each_entry_safe(back, tmp,
10192 &rec->backref_tree, node) {
10194 * if we didn't find any references, don't create a
10195 * new extent record
10197 if (!back->found_ref)
10200 rec->bad_full_backref = 0;
10201 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10209 int err = btrfs_commit_transaction(trans, info->extent_root);
10215 fprintf(stderr, "Repaired extent references for %llu\n",
10216 (unsigned long long)rec->start);
10218 btrfs_release_path(&path);
10222 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10223 struct extent_record *rec)
10225 struct btrfs_trans_handle *trans;
10226 struct btrfs_root *root = fs_info->extent_root;
10227 struct btrfs_path path;
10228 struct btrfs_extent_item *ei;
10229 struct btrfs_key key;
10233 key.objectid = rec->start;
10234 if (rec->metadata) {
10235 key.type = BTRFS_METADATA_ITEM_KEY;
10236 key.offset = rec->info_level;
10238 key.type = BTRFS_EXTENT_ITEM_KEY;
10239 key.offset = rec->max_size;
10242 trans = btrfs_start_transaction(root, 0);
10244 return PTR_ERR(trans);
10246 btrfs_init_path(&path);
10247 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10249 btrfs_release_path(&path);
10250 btrfs_commit_transaction(trans, root);
10253 fprintf(stderr, "Didn't find extent for %llu\n",
10254 (unsigned long long)rec->start);
10255 btrfs_release_path(&path);
10256 btrfs_commit_transaction(trans, root);
10260 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10261 struct btrfs_extent_item);
10262 flags = btrfs_extent_flags(path.nodes[0], ei);
10263 if (rec->flag_block_full_backref) {
10264 fprintf(stderr, "setting full backref on %llu\n",
10265 (unsigned long long)key.objectid);
10266 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10268 fprintf(stderr, "clearing full backref on %llu\n",
10269 (unsigned long long)key.objectid);
10270 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10272 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10273 btrfs_mark_buffer_dirty(path.nodes[0]);
10274 btrfs_release_path(&path);
10275 ret = btrfs_commit_transaction(trans, root);
10277 fprintf(stderr, "Repaired extent flags for %llu\n",
10278 (unsigned long long)rec->start);
10283 /* right now we only prune from the extent allocation tree */
10284 static int prune_one_block(struct btrfs_trans_handle *trans,
10285 struct btrfs_fs_info *info,
10286 struct btrfs_corrupt_block *corrupt)
10289 struct btrfs_path path;
10290 struct extent_buffer *eb;
10294 int level = corrupt->level + 1;
10296 btrfs_init_path(&path);
10298 /* we want to stop at the parent to our busted block */
10299 path.lowest_level = level;
10301 ret = btrfs_search_slot(trans, info->extent_root,
10302 &corrupt->key, &path, -1, 1);
10307 eb = path.nodes[level];
10314 * hopefully the search gave us the block we want to prune,
10315 * lets try that first
10317 slot = path.slots[level];
10318 found = btrfs_node_blockptr(eb, slot);
10319 if (found == corrupt->cache.start)
10322 nritems = btrfs_header_nritems(eb);
10324 /* the search failed, lets scan this node and hope we find it */
10325 for (slot = 0; slot < nritems; slot++) {
10326 found = btrfs_node_blockptr(eb, slot);
10327 if (found == corrupt->cache.start)
10331 * we couldn't find the bad block. TODO, search all the nodes for pointers
10334 if (eb == info->extent_root->node) {
10339 btrfs_release_path(&path);
10344 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10345 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10348 btrfs_release_path(&path);
10352 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10354 struct btrfs_trans_handle *trans = NULL;
10355 struct cache_extent *cache;
10356 struct btrfs_corrupt_block *corrupt;
10359 cache = search_cache_extent(info->corrupt_blocks, 0);
10363 trans = btrfs_start_transaction(info->extent_root, 1);
10365 return PTR_ERR(trans);
10367 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10368 prune_one_block(trans, info, corrupt);
10369 remove_cache_extent(info->corrupt_blocks, cache);
10372 return btrfs_commit_transaction(trans, info->extent_root);
10376 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10378 struct btrfs_block_group_cache *cache;
10383 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10384 &start, &end, EXTENT_DIRTY);
10387 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10392 cache = btrfs_lookup_first_block_group(fs_info, start);
10397 start = cache->key.objectid + cache->key.offset;
10401 static int check_extent_refs(struct btrfs_root *root,
10402 struct cache_tree *extent_cache)
10404 struct extent_record *rec;
10405 struct cache_extent *cache;
10411 * if we're doing a repair, we have to make sure
10412 * we don't allocate from the problem extents.
10413 * In the worst case, this will be all the
10414 * extents in the FS
10416 cache = search_cache_extent(extent_cache, 0);
10418 rec = container_of(cache, struct extent_record, cache);
10419 set_extent_dirty(root->fs_info->excluded_extents,
10421 rec->start + rec->max_size - 1);
10422 cache = next_cache_extent(cache);
10425 /* pin down all the corrupted blocks too */
10426 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10428 set_extent_dirty(root->fs_info->excluded_extents,
10430 cache->start + cache->size - 1);
10431 cache = next_cache_extent(cache);
10433 prune_corrupt_blocks(root->fs_info);
10434 reset_cached_block_groups(root->fs_info);
10437 reset_cached_block_groups(root->fs_info);
10440 * We need to delete any duplicate entries we find first otherwise we
10441 * could mess up the extent tree when we have backrefs that actually
10442 * belong to a different extent item and not the weird duplicate one.
10444 while (repair && !list_empty(&duplicate_extents)) {
10445 rec = to_extent_record(duplicate_extents.next);
10446 list_del_init(&rec->list);
10448 /* Sometimes we can find a backref before we find an actual
10449 * extent, so we need to process it a little bit to see if there
10450 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10451 * if this is a backref screwup. If we need to delete stuff
10452 * process_duplicates() will return 0, otherwise it will return
10455 if (process_duplicates(extent_cache, rec))
10457 ret = delete_duplicate_records(root, rec);
10461 * delete_duplicate_records will return the number of entries
10462 * deleted, so if it's greater than 0 then we know we actually
10463 * did something and we need to remove.
10476 cache = search_cache_extent(extent_cache, 0);
10479 rec = container_of(cache, struct extent_record, cache);
10480 if (rec->num_duplicates) {
10481 fprintf(stderr, "extent item %llu has multiple extent "
10482 "items\n", (unsigned long long)rec->start);
10486 if (rec->refs != rec->extent_item_refs) {
10487 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10488 (unsigned long long)rec->start,
10489 (unsigned long long)rec->nr);
10490 fprintf(stderr, "extent item %llu, found %llu\n",
10491 (unsigned long long)rec->extent_item_refs,
10492 (unsigned long long)rec->refs);
10493 ret = record_orphan_data_extents(root->fs_info, rec);
10499 if (all_backpointers_checked(rec, 1)) {
10500 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10501 (unsigned long long)rec->start,
10502 (unsigned long long)rec->nr);
10506 if (!rec->owner_ref_checked) {
10507 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10508 (unsigned long long)rec->start,
10509 (unsigned long long)rec->nr);
10514 if (repair && fix) {
10515 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10521 if (rec->bad_full_backref) {
10522 fprintf(stderr, "bad full backref, on [%llu]\n",
10523 (unsigned long long)rec->start);
10525 ret = fixup_extent_flags(root->fs_info, rec);
10533 * Although it's not a extent ref's problem, we reuse this
10534 * routine for error reporting.
10535 * No repair function yet.
10537 if (rec->crossing_stripes) {
10539 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10540 rec->start, rec->start + rec->max_size);
10544 if (rec->wrong_chunk_type) {
10546 "bad extent [%llu, %llu), type mismatch with chunk\n",
10547 rec->start, rec->start + rec->max_size);
10551 remove_cache_extent(extent_cache, cache);
10552 free_all_extent_backrefs(rec);
10553 if (!init_extent_tree && repair && (!cur_err || fix))
10554 clear_extent_dirty(root->fs_info->excluded_extents,
10556 rec->start + rec->max_size - 1);
10561 if (ret && ret != -EAGAIN) {
10562 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10565 struct btrfs_trans_handle *trans;
10567 root = root->fs_info->extent_root;
10568 trans = btrfs_start_transaction(root, 1);
10569 if (IS_ERR(trans)) {
10570 ret = PTR_ERR(trans);
10574 ret = btrfs_fix_block_accounting(trans, root);
10577 ret = btrfs_commit_transaction(trans, root);
10586 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10590 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10591 stripe_size = length;
10592 stripe_size /= num_stripes;
10593 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10594 stripe_size = length * 2;
10595 stripe_size /= num_stripes;
10596 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10597 stripe_size = length;
10598 stripe_size /= (num_stripes - 1);
10599 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10600 stripe_size = length;
10601 stripe_size /= (num_stripes - 2);
10603 stripe_size = length;
10605 return stripe_size;
10609 * Check the chunk with its block group/dev list ref:
10610 * Return 0 if all refs seems valid.
10611 * Return 1 if part of refs seems valid, need later check for rebuild ref
10612 * like missing block group and needs to search extent tree to rebuild them.
10613 * Return -1 if essential refs are missing and unable to rebuild.
10615 static int check_chunk_refs(struct chunk_record *chunk_rec,
10616 struct block_group_tree *block_group_cache,
10617 struct device_extent_tree *dev_extent_cache,
10620 struct cache_extent *block_group_item;
10621 struct block_group_record *block_group_rec;
10622 struct cache_extent *dev_extent_item;
10623 struct device_extent_record *dev_extent_rec;
10627 int metadump_v2 = 0;
10631 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10633 chunk_rec->length);
10634 if (block_group_item) {
10635 block_group_rec = container_of(block_group_item,
10636 struct block_group_record,
10638 if (chunk_rec->length != block_group_rec->offset ||
10639 chunk_rec->offset != block_group_rec->objectid ||
10641 chunk_rec->type_flags != block_group_rec->flags)) {
10644 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10645 chunk_rec->objectid,
10650 chunk_rec->type_flags,
10651 block_group_rec->objectid,
10652 block_group_rec->type,
10653 block_group_rec->offset,
10654 block_group_rec->offset,
10655 block_group_rec->objectid,
10656 block_group_rec->flags);
10659 list_del_init(&block_group_rec->list);
10660 chunk_rec->bg_rec = block_group_rec;
10665 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10666 chunk_rec->objectid,
10671 chunk_rec->type_flags);
10678 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10679 chunk_rec->num_stripes);
10680 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10681 devid = chunk_rec->stripes[i].devid;
10682 offset = chunk_rec->stripes[i].offset;
10683 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10684 devid, offset, length);
10685 if (dev_extent_item) {
10686 dev_extent_rec = container_of(dev_extent_item,
10687 struct device_extent_record,
10689 if (dev_extent_rec->objectid != devid ||
10690 dev_extent_rec->offset != offset ||
10691 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10692 dev_extent_rec->length != length) {
10695 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10696 chunk_rec->objectid,
10699 chunk_rec->stripes[i].devid,
10700 chunk_rec->stripes[i].offset,
10701 dev_extent_rec->objectid,
10702 dev_extent_rec->offset,
10703 dev_extent_rec->length);
10706 list_move(&dev_extent_rec->chunk_list,
10707 &chunk_rec->dextents);
10712 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10713 chunk_rec->objectid,
10716 chunk_rec->stripes[i].devid,
10717 chunk_rec->stripes[i].offset);
10724 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10725 int check_chunks(struct cache_tree *chunk_cache,
10726 struct block_group_tree *block_group_cache,
10727 struct device_extent_tree *dev_extent_cache,
10728 struct list_head *good, struct list_head *bad,
10729 struct list_head *rebuild, int silent)
10731 struct cache_extent *chunk_item;
10732 struct chunk_record *chunk_rec;
10733 struct block_group_record *bg_rec;
10734 struct device_extent_record *dext_rec;
10738 chunk_item = first_cache_extent(chunk_cache);
10739 while (chunk_item) {
10740 chunk_rec = container_of(chunk_item, struct chunk_record,
10742 err = check_chunk_refs(chunk_rec, block_group_cache,
10743 dev_extent_cache, silent);
10746 if (err == 0 && good)
10747 list_add_tail(&chunk_rec->list, good);
10748 if (err > 0 && rebuild)
10749 list_add_tail(&chunk_rec->list, rebuild);
10750 if (err < 0 && bad)
10751 list_add_tail(&chunk_rec->list, bad);
10752 chunk_item = next_cache_extent(chunk_item);
10755 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10758 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10766 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10770 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10771 dext_rec->objectid,
10781 static int check_device_used(struct device_record *dev_rec,
10782 struct device_extent_tree *dext_cache)
10784 struct cache_extent *cache;
10785 struct device_extent_record *dev_extent_rec;
10786 u64 total_byte = 0;
10788 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10790 dev_extent_rec = container_of(cache,
10791 struct device_extent_record,
10793 if (dev_extent_rec->objectid != dev_rec->devid)
10796 list_del_init(&dev_extent_rec->device_list);
10797 total_byte += dev_extent_rec->length;
10798 cache = next_cache_extent(cache);
10801 if (total_byte != dev_rec->byte_used) {
10803 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10804 total_byte, dev_rec->byte_used, dev_rec->objectid,
10805 dev_rec->type, dev_rec->offset);
10812 /* check btrfs_dev_item -> btrfs_dev_extent */
10813 static int check_devices(struct rb_root *dev_cache,
10814 struct device_extent_tree *dev_extent_cache)
10816 struct rb_node *dev_node;
10817 struct device_record *dev_rec;
10818 struct device_extent_record *dext_rec;
10822 dev_node = rb_first(dev_cache);
10824 dev_rec = container_of(dev_node, struct device_record, node);
10825 err = check_device_used(dev_rec, dev_extent_cache);
10829 dev_node = rb_next(dev_node);
10831 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10834 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10835 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10842 static int add_root_item_to_list(struct list_head *head,
10843 u64 objectid, u64 bytenr, u64 last_snapshot,
10844 u8 level, u8 drop_level,
10845 struct btrfs_key *drop_key)
10848 struct root_item_record *ri_rec;
10849 ri_rec = malloc(sizeof(*ri_rec));
10852 ri_rec->bytenr = bytenr;
10853 ri_rec->objectid = objectid;
10854 ri_rec->level = level;
10855 ri_rec->drop_level = drop_level;
10856 ri_rec->last_snapshot = last_snapshot;
10858 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10859 list_add_tail(&ri_rec->list, head);
10864 static void free_root_item_list(struct list_head *list)
10866 struct root_item_record *ri_rec;
10868 while (!list_empty(list)) {
10869 ri_rec = list_first_entry(list, struct root_item_record,
10871 list_del_init(&ri_rec->list);
10876 static int deal_root_from_list(struct list_head *list,
10877 struct btrfs_root *root,
10878 struct block_info *bits,
10880 struct cache_tree *pending,
10881 struct cache_tree *seen,
10882 struct cache_tree *reada,
10883 struct cache_tree *nodes,
10884 struct cache_tree *extent_cache,
10885 struct cache_tree *chunk_cache,
10886 struct rb_root *dev_cache,
10887 struct block_group_tree *block_group_cache,
10888 struct device_extent_tree *dev_extent_cache)
10893 while (!list_empty(list)) {
10894 struct root_item_record *rec;
10895 struct extent_buffer *buf;
10896 rec = list_entry(list->next,
10897 struct root_item_record, list);
10899 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10900 if (!extent_buffer_uptodate(buf)) {
10901 free_extent_buffer(buf);
10905 ret = add_root_to_pending(buf, extent_cache, pending,
10906 seen, nodes, rec->objectid);
10910 * To rebuild extent tree, we need deal with snapshot
10911 * one by one, otherwise we deal with node firstly which
10912 * can maximize readahead.
10915 ret = run_next_block(root, bits, bits_nr, &last,
10916 pending, seen, reada, nodes,
10917 extent_cache, chunk_cache,
10918 dev_cache, block_group_cache,
10919 dev_extent_cache, rec);
10923 free_extent_buffer(buf);
10924 list_del(&rec->list);
10930 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10931 reada, nodes, extent_cache, chunk_cache,
10932 dev_cache, block_group_cache,
10933 dev_extent_cache, NULL);
10943 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10945 struct rb_root dev_cache;
10946 struct cache_tree chunk_cache;
10947 struct block_group_tree block_group_cache;
10948 struct device_extent_tree dev_extent_cache;
10949 struct cache_tree extent_cache;
10950 struct cache_tree seen;
10951 struct cache_tree pending;
10952 struct cache_tree reada;
10953 struct cache_tree nodes;
10954 struct extent_io_tree excluded_extents;
10955 struct cache_tree corrupt_blocks;
10956 struct btrfs_path path;
10957 struct btrfs_key key;
10958 struct btrfs_key found_key;
10960 struct block_info *bits;
10962 struct extent_buffer *leaf;
10964 struct btrfs_root_item ri;
10965 struct list_head dropping_trees;
10966 struct list_head normal_trees;
10967 struct btrfs_root *root1;
10968 struct btrfs_root *root;
10972 root = fs_info->fs_root;
10973 dev_cache = RB_ROOT;
10974 cache_tree_init(&chunk_cache);
10975 block_group_tree_init(&block_group_cache);
10976 device_extent_tree_init(&dev_extent_cache);
10978 cache_tree_init(&extent_cache);
10979 cache_tree_init(&seen);
10980 cache_tree_init(&pending);
10981 cache_tree_init(&nodes);
10982 cache_tree_init(&reada);
10983 cache_tree_init(&corrupt_blocks);
10984 extent_io_tree_init(&excluded_extents);
10985 INIT_LIST_HEAD(&dropping_trees);
10986 INIT_LIST_HEAD(&normal_trees);
10989 fs_info->excluded_extents = &excluded_extents;
10990 fs_info->fsck_extent_cache = &extent_cache;
10991 fs_info->free_extent_hook = free_extent_hook;
10992 fs_info->corrupt_blocks = &corrupt_blocks;
10996 bits = malloc(bits_nr * sizeof(struct block_info));
11002 if (ctx.progress_enabled) {
11003 ctx.tp = TASK_EXTENTS;
11004 task_start(ctx.info);
11008 root1 = fs_info->tree_root;
11009 level = btrfs_header_level(root1->node);
11010 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11011 root1->node->start, 0, level, 0, NULL);
11014 root1 = fs_info->chunk_root;
11015 level = btrfs_header_level(root1->node);
11016 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11017 root1->node->start, 0, level, 0, NULL);
11020 btrfs_init_path(&path);
11023 key.type = BTRFS_ROOT_ITEM_KEY;
11024 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11028 leaf = path.nodes[0];
11029 slot = path.slots[0];
11030 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11031 ret = btrfs_next_leaf(root, &path);
11034 leaf = path.nodes[0];
11035 slot = path.slots[0];
11037 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11038 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11039 unsigned long offset;
11042 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11043 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11044 last_snapshot = btrfs_root_last_snapshot(&ri);
11045 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11046 level = btrfs_root_level(&ri);
11047 ret = add_root_item_to_list(&normal_trees,
11048 found_key.objectid,
11049 btrfs_root_bytenr(&ri),
11050 last_snapshot, level,
11055 level = btrfs_root_level(&ri);
11056 objectid = found_key.objectid;
11057 btrfs_disk_key_to_cpu(&found_key,
11058 &ri.drop_progress);
11059 ret = add_root_item_to_list(&dropping_trees,
11061 btrfs_root_bytenr(&ri),
11062 last_snapshot, level,
11063 ri.drop_level, &found_key);
11070 btrfs_release_path(&path);
11073 * check_block can return -EAGAIN if it fixes something, please keep
11074 * this in mind when dealing with return values from these functions, if
11075 * we get -EAGAIN we want to fall through and restart the loop.
11077 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11078 &seen, &reada, &nodes, &extent_cache,
11079 &chunk_cache, &dev_cache, &block_group_cache,
11080 &dev_extent_cache);
11082 if (ret == -EAGAIN)
11086 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11087 &pending, &seen, &reada, &nodes,
11088 &extent_cache, &chunk_cache, &dev_cache,
11089 &block_group_cache, &dev_extent_cache);
11091 if (ret == -EAGAIN)
11096 ret = check_chunks(&chunk_cache, &block_group_cache,
11097 &dev_extent_cache, NULL, NULL, NULL, 0);
11099 if (ret == -EAGAIN)
11104 ret = check_extent_refs(root, &extent_cache);
11106 if (ret == -EAGAIN)
11111 ret = check_devices(&dev_cache, &dev_extent_cache);
11116 task_stop(ctx.info);
11118 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11119 extent_io_tree_cleanup(&excluded_extents);
11120 fs_info->fsck_extent_cache = NULL;
11121 fs_info->free_extent_hook = NULL;
11122 fs_info->corrupt_blocks = NULL;
11123 fs_info->excluded_extents = NULL;
11126 free_chunk_cache_tree(&chunk_cache);
11127 free_device_cache_tree(&dev_cache);
11128 free_block_group_tree(&block_group_cache);
11129 free_device_extent_tree(&dev_extent_cache);
11130 free_extent_cache_tree(&seen);
11131 free_extent_cache_tree(&pending);
11132 free_extent_cache_tree(&reada);
11133 free_extent_cache_tree(&nodes);
11134 free_root_item_list(&normal_trees);
11135 free_root_item_list(&dropping_trees);
11138 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11139 free_extent_cache_tree(&seen);
11140 free_extent_cache_tree(&pending);
11141 free_extent_cache_tree(&reada);
11142 free_extent_cache_tree(&nodes);
11143 free_chunk_cache_tree(&chunk_cache);
11144 free_block_group_tree(&block_group_cache);
11145 free_device_cache_tree(&dev_cache);
11146 free_device_extent_tree(&dev_extent_cache);
11147 free_extent_record_cache(&extent_cache);
11148 free_root_item_list(&normal_trees);
11149 free_root_item_list(&dropping_trees);
11150 extent_io_tree_cleanup(&excluded_extents);
11155 * Check backrefs of a tree block given by @bytenr or @eb.
11157 * @root: the root containing the @bytenr or @eb
11158 * @eb: tree block extent buffer, can be NULL
11159 * @bytenr: bytenr of the tree block to search
11160 * @level: tree level of the tree block
11161 * @owner: owner of the tree block
11163 * Return >0 for any error found and output error message
11164 * Return 0 for no error found
11166 static int check_tree_block_ref(struct btrfs_root *root,
11167 struct extent_buffer *eb, u64 bytenr,
11168 int level, u64 owner)
11170 struct btrfs_key key;
11171 struct btrfs_root *extent_root = root->fs_info->extent_root;
11172 struct btrfs_path path;
11173 struct btrfs_extent_item *ei;
11174 struct btrfs_extent_inline_ref *iref;
11175 struct extent_buffer *leaf;
11181 u32 nodesize = root->fs_info->nodesize;
11184 int tree_reloc_root = 0;
11189 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11190 btrfs_header_bytenr(root->node) == bytenr)
11191 tree_reloc_root = 1;
11193 btrfs_init_path(&path);
11194 key.objectid = bytenr;
11195 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11196 key.type = BTRFS_METADATA_ITEM_KEY;
11198 key.type = BTRFS_EXTENT_ITEM_KEY;
11199 key.offset = (u64)-1;
11201 /* Search for the backref in extent tree */
11202 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11204 err |= BACKREF_MISSING;
11207 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11209 err |= BACKREF_MISSING;
11213 leaf = path.nodes[0];
11214 slot = path.slots[0];
11215 btrfs_item_key_to_cpu(leaf, &key, slot);
11217 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11219 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11220 skinny_level = (int)key.offset;
11221 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11223 struct btrfs_tree_block_info *info;
11225 info = (struct btrfs_tree_block_info *)(ei + 1);
11226 skinny_level = btrfs_tree_block_level(leaf, info);
11227 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11234 if (!(btrfs_extent_flags(leaf, ei) &
11235 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11237 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11238 key.objectid, nodesize,
11239 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11240 err = BACKREF_MISMATCH;
11242 header_gen = btrfs_header_generation(eb);
11243 extent_gen = btrfs_extent_generation(leaf, ei);
11244 if (header_gen != extent_gen) {
11246 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11247 key.objectid, nodesize, header_gen,
11249 err = BACKREF_MISMATCH;
11251 if (level != skinny_level) {
11253 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11254 key.objectid, nodesize, level, skinny_level);
11255 err = BACKREF_MISMATCH;
11257 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11259 "extent[%llu %u] is referred by other roots than %llu",
11260 key.objectid, nodesize, root->objectid);
11261 err = BACKREF_MISMATCH;
11266 * Iterate the extent/metadata item to find the exact backref
11268 item_size = btrfs_item_size_nr(leaf, slot);
11269 ptr = (unsigned long)iref;
11270 end = (unsigned long)ei + item_size;
11271 while (ptr < end) {
11272 iref = (struct btrfs_extent_inline_ref *)ptr;
11273 type = btrfs_extent_inline_ref_type(leaf, iref);
11274 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11276 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11277 (offset == root->objectid || offset == owner)) {
11279 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11281 * Backref of tree reloc root points to itself, no need
11282 * to check backref any more.
11284 if (tree_reloc_root)
11287 /* Check if the backref points to valid referencer */
11288 found_ref = !check_tree_block_ref(root, NULL,
11289 offset, level + 1, owner);
11294 ptr += btrfs_extent_inline_ref_size(type);
11298 * Inlined extent item doesn't have what we need, check
11299 * TREE_BLOCK_REF_KEY
11302 btrfs_release_path(&path);
11303 key.objectid = bytenr;
11304 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11305 key.offset = root->objectid;
11307 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11312 err |= BACKREF_MISSING;
11314 btrfs_release_path(&path);
11315 if (eb && (err & BACKREF_MISSING))
11316 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
11317 bytenr, nodesize, owner, level);
11322 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11324 * Return >0 any error found and output error message
11325 * Return 0 for no error found
11327 static int check_extent_data_item(struct btrfs_root *root,
11328 struct extent_buffer *eb, int slot)
11330 struct btrfs_file_extent_item *fi;
11331 struct btrfs_path path;
11332 struct btrfs_root *extent_root = root->fs_info->extent_root;
11333 struct btrfs_key fi_key;
11334 struct btrfs_key dbref_key;
11335 struct extent_buffer *leaf;
11336 struct btrfs_extent_item *ei;
11337 struct btrfs_extent_inline_ref *iref;
11338 struct btrfs_extent_data_ref *dref;
11341 u64 disk_num_bytes;
11342 u64 extent_num_bytes;
11349 int found_dbackref = 0;
11353 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11354 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11356 /* Nothing to check for hole and inline data extents */
11357 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11358 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11361 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11362 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11363 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11365 /* Check unaligned disk_num_bytes and num_bytes */
11366 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11368 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11369 fi_key.objectid, fi_key.offset, disk_num_bytes,
11370 root->fs_info->sectorsize);
11371 err |= BYTES_UNALIGNED;
11373 data_bytes_allocated += disk_num_bytes;
11375 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11377 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11378 fi_key.objectid, fi_key.offset, extent_num_bytes,
11379 root->fs_info->sectorsize);
11380 err |= BYTES_UNALIGNED;
11382 data_bytes_referenced += extent_num_bytes;
11384 owner = btrfs_header_owner(eb);
11386 /* Check the extent item of the file extent in extent tree */
11387 btrfs_init_path(&path);
11388 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11389 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11390 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11392 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11396 leaf = path.nodes[0];
11397 slot = path.slots[0];
11398 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11400 extent_flags = btrfs_extent_flags(leaf, ei);
11402 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11404 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11405 disk_bytenr, disk_num_bytes,
11406 BTRFS_EXTENT_FLAG_DATA);
11407 err |= BACKREF_MISMATCH;
11410 /* Check data backref inside that extent item */
11411 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11412 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11413 ptr = (unsigned long)iref;
11414 end = (unsigned long)ei + item_size;
11415 while (ptr < end) {
11416 iref = (struct btrfs_extent_inline_ref *)ptr;
11417 type = btrfs_extent_inline_ref_type(leaf, iref);
11418 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11420 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11421 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11422 if (ref_root == owner || ref_root == root->objectid)
11423 found_dbackref = 1;
11424 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11425 found_dbackref = !check_tree_block_ref(root, NULL,
11426 btrfs_extent_inline_ref_offset(leaf, iref),
11430 if (found_dbackref)
11432 ptr += btrfs_extent_inline_ref_size(type);
11435 if (!found_dbackref) {
11436 btrfs_release_path(&path);
11438 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11439 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11440 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11441 dbref_key.offset = hash_extent_data_ref(root->objectid,
11442 fi_key.objectid, fi_key.offset);
11444 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11445 &dbref_key, &path, 0, 0);
11447 found_dbackref = 1;
11451 btrfs_release_path(&path);
11454 * Neither inlined nor EXTENT_DATA_REF found, try
11455 * SHARED_DATA_REF as last chance.
11457 dbref_key.objectid = disk_bytenr;
11458 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11459 dbref_key.offset = eb->start;
11461 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11462 &dbref_key, &path, 0, 0);
11464 found_dbackref = 1;
11470 if (!found_dbackref)
11471 err |= BACKREF_MISSING;
11472 btrfs_release_path(&path);
11473 if (err & BACKREF_MISSING) {
11474 error("data extent[%llu %llu] backref lost",
11475 disk_bytenr, disk_num_bytes);
11481 * Get real tree block level for the case like shared block
11482 * Return >= 0 as tree level
11483 * Return <0 for error
11485 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11487 struct extent_buffer *eb;
11488 struct btrfs_path path;
11489 struct btrfs_key key;
11490 struct btrfs_extent_item *ei;
11497 /* Search extent tree for extent generation and level */
11498 key.objectid = bytenr;
11499 key.type = BTRFS_METADATA_ITEM_KEY;
11500 key.offset = (u64)-1;
11502 btrfs_init_path(&path);
11503 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11506 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11514 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11515 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11516 struct btrfs_extent_item);
11517 flags = btrfs_extent_flags(path.nodes[0], ei);
11518 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11523 /* Get transid for later read_tree_block() check */
11524 transid = btrfs_extent_generation(path.nodes[0], ei);
11526 /* Get backref level as one source */
11527 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11528 backref_level = key.offset;
11530 struct btrfs_tree_block_info *info;
11532 info = (struct btrfs_tree_block_info *)(ei + 1);
11533 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11535 btrfs_release_path(&path);
11537 /* Get level from tree block as an alternative source */
11538 eb = read_tree_block(fs_info, bytenr, transid);
11539 if (!extent_buffer_uptodate(eb)) {
11540 free_extent_buffer(eb);
11543 header_level = btrfs_header_level(eb);
11544 free_extent_buffer(eb);
11546 if (header_level != backref_level)
11548 return header_level;
11551 btrfs_release_path(&path);
11556 * Check if a tree block backref is valid (points to a valid tree block)
11557 * if level == -1, level will be resolved
11558 * Return >0 for any error found and print error message
11560 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11561 u64 bytenr, int level)
11563 struct btrfs_root *root;
11564 struct btrfs_key key;
11565 struct btrfs_path path;
11566 struct extent_buffer *eb;
11567 struct extent_buffer *node;
11568 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11572 /* Query level for level == -1 special case */
11574 level = query_tree_block_level(fs_info, bytenr);
11576 err |= REFERENCER_MISSING;
11580 key.objectid = root_id;
11581 key.type = BTRFS_ROOT_ITEM_KEY;
11582 key.offset = (u64)-1;
11584 root = btrfs_read_fs_root(fs_info, &key);
11585 if (IS_ERR(root)) {
11586 err |= REFERENCER_MISSING;
11590 /* Read out the tree block to get item/node key */
11591 eb = read_tree_block(fs_info, bytenr, 0);
11592 if (!extent_buffer_uptodate(eb)) {
11593 err |= REFERENCER_MISSING;
11594 free_extent_buffer(eb);
11598 /* Empty tree, no need to check key */
11599 if (!btrfs_header_nritems(eb) && !level) {
11600 free_extent_buffer(eb);
11605 btrfs_node_key_to_cpu(eb, &key, 0);
11607 btrfs_item_key_to_cpu(eb, &key, 0);
11609 free_extent_buffer(eb);
11611 btrfs_init_path(&path);
11612 path.lowest_level = level;
11613 /* Search with the first key, to ensure we can reach it */
11614 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11616 err |= REFERENCER_MISSING;
11620 node = path.nodes[level];
11621 if (btrfs_header_bytenr(node) != bytenr) {
11623 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11624 bytenr, nodesize, bytenr,
11625 btrfs_header_bytenr(node));
11626 err |= REFERENCER_MISMATCH;
11628 if (btrfs_header_level(node) != level) {
11630 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11631 bytenr, nodesize, level,
11632 btrfs_header_level(node));
11633 err |= REFERENCER_MISMATCH;
11637 btrfs_release_path(&path);
11639 if (err & REFERENCER_MISSING) {
11641 error("extent [%llu %d] lost referencer (owner: %llu)",
11642 bytenr, nodesize, root_id);
11645 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11646 bytenr, nodesize, root_id, level);
11653 * Check if tree block @eb is tree reloc root.
11654 * Return 0 if it's not or any problem happens
11655 * Return 1 if it's a tree reloc root
11657 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11658 struct extent_buffer *eb)
11660 struct btrfs_root *tree_reloc_root;
11661 struct btrfs_key key;
11662 u64 bytenr = btrfs_header_bytenr(eb);
11663 u64 owner = btrfs_header_owner(eb);
11666 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11667 key.offset = owner;
11668 key.type = BTRFS_ROOT_ITEM_KEY;
11670 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11671 if (IS_ERR(tree_reloc_root))
11674 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11676 btrfs_free_fs_root(tree_reloc_root);
11681 * Check referencer for shared block backref
11682 * If level == -1, this function will resolve the level.
11684 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11685 u64 parent, u64 bytenr, int level)
11687 struct extent_buffer *eb;
11689 int found_parent = 0;
11692 eb = read_tree_block(fs_info, parent, 0);
11693 if (!extent_buffer_uptodate(eb))
11697 level = query_tree_block_level(fs_info, bytenr);
11701 /* It's possible it's a tree reloc root */
11702 if (parent == bytenr) {
11703 if (is_tree_reloc_root(fs_info, eb))
11708 if (level + 1 != btrfs_header_level(eb))
11711 nr = btrfs_header_nritems(eb);
11712 for (i = 0; i < nr; i++) {
11713 if (bytenr == btrfs_node_blockptr(eb, i)) {
11719 free_extent_buffer(eb);
11720 if (!found_parent) {
11722 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11723 bytenr, fs_info->nodesize, parent, level);
11724 return REFERENCER_MISSING;
11730 * Check referencer for normal (inlined) data ref
11731 * If len == 0, it will be resolved by searching in extent tree
11733 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11734 u64 root_id, u64 objectid, u64 offset,
11735 u64 bytenr, u64 len, u32 count)
11737 struct btrfs_root *root;
11738 struct btrfs_root *extent_root = fs_info->extent_root;
11739 struct btrfs_key key;
11740 struct btrfs_path path;
11741 struct extent_buffer *leaf;
11742 struct btrfs_file_extent_item *fi;
11743 u32 found_count = 0;
11748 key.objectid = bytenr;
11749 key.type = BTRFS_EXTENT_ITEM_KEY;
11750 key.offset = (u64)-1;
11752 btrfs_init_path(&path);
11753 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11756 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11759 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11760 if (key.objectid != bytenr ||
11761 key.type != BTRFS_EXTENT_ITEM_KEY)
11764 btrfs_release_path(&path);
11766 key.objectid = root_id;
11767 key.type = BTRFS_ROOT_ITEM_KEY;
11768 key.offset = (u64)-1;
11769 btrfs_init_path(&path);
11771 root = btrfs_read_fs_root(fs_info, &key);
11775 key.objectid = objectid;
11776 key.type = BTRFS_EXTENT_DATA_KEY;
11778 * It can be nasty as data backref offset is
11779 * file offset - file extent offset, which is smaller or
11780 * equal to original backref offset. The only special case is
11781 * overflow. So we need to special check and do further search.
11783 key.offset = offset & (1ULL << 63) ? 0 : offset;
11785 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11790 * Search afterwards to get correct one
11791 * NOTE: As we must do a comprehensive check on the data backref to
11792 * make sure the dref count also matches, we must iterate all file
11793 * extents for that inode.
11796 leaf = path.nodes[0];
11797 slot = path.slots[0];
11799 if (slot >= btrfs_header_nritems(leaf))
11801 btrfs_item_key_to_cpu(leaf, &key, slot);
11802 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11804 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11806 * Except normal disk bytenr and disk num bytes, we still
11807 * need to do extra check on dbackref offset as
11808 * dbackref offset = file_offset - file_extent_offset
11810 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11811 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11812 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11817 ret = btrfs_next_item(root, &path);
11822 btrfs_release_path(&path);
11823 if (found_count != count) {
11825 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11826 bytenr, len, root_id, objectid, offset, count, found_count);
11827 return REFERENCER_MISSING;
11833 * Check if the referencer of a shared data backref exists
11835 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11836 u64 parent, u64 bytenr)
11838 struct extent_buffer *eb;
11839 struct btrfs_key key;
11840 struct btrfs_file_extent_item *fi;
11842 int found_parent = 0;
11845 eb = read_tree_block(fs_info, parent, 0);
11846 if (!extent_buffer_uptodate(eb))
11849 nr = btrfs_header_nritems(eb);
11850 for (i = 0; i < nr; i++) {
11851 btrfs_item_key_to_cpu(eb, &key, i);
11852 if (key.type != BTRFS_EXTENT_DATA_KEY)
11855 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11856 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11859 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11866 free_extent_buffer(eb);
11867 if (!found_parent) {
11868 error("shared extent %llu referencer lost (parent: %llu)",
11870 return REFERENCER_MISSING;
11876 * This function will check a given extent item, including its backref and
11877 * itself (like crossing stripe boundary and type)
11879 * Since we don't use extent_record anymore, introduce new error bit
11881 static int check_extent_item(struct btrfs_fs_info *fs_info,
11882 struct extent_buffer *eb, int slot)
11884 struct btrfs_extent_item *ei;
11885 struct btrfs_extent_inline_ref *iref;
11886 struct btrfs_extent_data_ref *dref;
11890 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11891 u32 item_size = btrfs_item_size_nr(eb, slot);
11896 struct btrfs_key key;
11900 btrfs_item_key_to_cpu(eb, &key, slot);
11901 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11902 bytes_used += key.offset;
11904 bytes_used += nodesize;
11906 if (item_size < sizeof(*ei)) {
11908 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11909 * old thing when on disk format is still un-determined.
11910 * No need to care about it anymore
11912 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11916 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11917 flags = btrfs_extent_flags(eb, ei);
11919 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11921 if (metadata && check_crossing_stripes(global_info, key.objectid,
11923 error("bad metadata [%llu, %llu) crossing stripe boundary",
11924 key.objectid, key.objectid + nodesize);
11925 err |= CROSSING_STRIPE_BOUNDARY;
11928 ptr = (unsigned long)(ei + 1);
11930 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11931 /* Old EXTENT_ITEM metadata */
11932 struct btrfs_tree_block_info *info;
11934 info = (struct btrfs_tree_block_info *)ptr;
11935 level = btrfs_tree_block_level(eb, info);
11936 ptr += sizeof(struct btrfs_tree_block_info);
11938 /* New METADATA_ITEM */
11939 level = key.offset;
11941 end = (unsigned long)ei + item_size;
11944 /* Reached extent item end normally */
11948 /* Beyond extent item end, wrong item size */
11950 err |= ITEM_SIZE_MISMATCH;
11951 error("extent item at bytenr %llu slot %d has wrong size",
11956 /* Now check every backref in this extent item */
11957 iref = (struct btrfs_extent_inline_ref *)ptr;
11958 type = btrfs_extent_inline_ref_type(eb, iref);
11959 offset = btrfs_extent_inline_ref_offset(eb, iref);
11961 case BTRFS_TREE_BLOCK_REF_KEY:
11962 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11966 case BTRFS_SHARED_BLOCK_REF_KEY:
11967 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11971 case BTRFS_EXTENT_DATA_REF_KEY:
11972 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11973 ret = check_extent_data_backref(fs_info,
11974 btrfs_extent_data_ref_root(eb, dref),
11975 btrfs_extent_data_ref_objectid(eb, dref),
11976 btrfs_extent_data_ref_offset(eb, dref),
11977 key.objectid, key.offset,
11978 btrfs_extent_data_ref_count(eb, dref));
11981 case BTRFS_SHARED_DATA_REF_KEY:
11982 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11986 error("extent[%llu %d %llu] has unknown ref type: %d",
11987 key.objectid, key.type, key.offset, type);
11988 err |= UNKNOWN_TYPE;
11992 ptr += btrfs_extent_inline_ref_size(type);
12000 * Check if a dev extent item is referred correctly by its chunk
12002 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12003 struct extent_buffer *eb, int slot)
12005 struct btrfs_root *chunk_root = fs_info->chunk_root;
12006 struct btrfs_dev_extent *ptr;
12007 struct btrfs_path path;
12008 struct btrfs_key chunk_key;
12009 struct btrfs_key devext_key;
12010 struct btrfs_chunk *chunk;
12011 struct extent_buffer *l;
12015 int found_chunk = 0;
12018 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12019 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12020 length = btrfs_dev_extent_length(eb, ptr);
12022 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12023 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12024 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12026 btrfs_init_path(&path);
12027 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12032 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12033 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12038 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12041 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12042 for (i = 0; i < num_stripes; i++) {
12043 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12044 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12046 if (devid == devext_key.objectid &&
12047 offset == devext_key.offset) {
12053 btrfs_release_path(&path);
12054 if (!found_chunk) {
12056 "device extent[%llu, %llu, %llu] did not find the related chunk",
12057 devext_key.objectid, devext_key.offset, length);
12058 return REFERENCER_MISSING;
12064 * Check if the used space is correct with the dev item
12066 static int check_dev_item(struct btrfs_fs_info *fs_info,
12067 struct extent_buffer *eb, int slot)
12069 struct btrfs_root *dev_root = fs_info->dev_root;
12070 struct btrfs_dev_item *dev_item;
12071 struct btrfs_path path;
12072 struct btrfs_key key;
12073 struct btrfs_dev_extent *ptr;
12079 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12080 dev_id = btrfs_device_id(eb, dev_item);
12081 used = btrfs_device_bytes_used(eb, dev_item);
12083 key.objectid = dev_id;
12084 key.type = BTRFS_DEV_EXTENT_KEY;
12087 btrfs_init_path(&path);
12088 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12090 btrfs_item_key_to_cpu(eb, &key, slot);
12091 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12092 key.objectid, key.type, key.offset);
12093 btrfs_release_path(&path);
12094 return REFERENCER_MISSING;
12097 /* Iterate dev_extents to calculate the used space of a device */
12099 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12102 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12103 if (key.objectid > dev_id)
12105 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12108 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12109 struct btrfs_dev_extent);
12110 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12112 ret = btrfs_next_item(dev_root, &path);
12116 btrfs_release_path(&path);
12118 if (used != total) {
12119 btrfs_item_key_to_cpu(eb, &key, slot);
12121 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12122 total, used, BTRFS_ROOT_TREE_OBJECTID,
12123 BTRFS_DEV_EXTENT_KEY, dev_id);
12124 return ACCOUNTING_MISMATCH;
12130 * Check a block group item with its referener (chunk) and its used space
12131 * with extent/metadata item
12133 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12134 struct extent_buffer *eb, int slot)
12136 struct btrfs_root *extent_root = fs_info->extent_root;
12137 struct btrfs_root *chunk_root = fs_info->chunk_root;
12138 struct btrfs_block_group_item *bi;
12139 struct btrfs_block_group_item bg_item;
12140 struct btrfs_path path;
12141 struct btrfs_key bg_key;
12142 struct btrfs_key chunk_key;
12143 struct btrfs_key extent_key;
12144 struct btrfs_chunk *chunk;
12145 struct extent_buffer *leaf;
12146 struct btrfs_extent_item *ei;
12147 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12155 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12156 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12157 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12158 used = btrfs_block_group_used(&bg_item);
12159 bg_flags = btrfs_block_group_flags(&bg_item);
12161 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12162 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12163 chunk_key.offset = bg_key.objectid;
12165 btrfs_init_path(&path);
12166 /* Search for the referencer chunk */
12167 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12170 "block group[%llu %llu] did not find the related chunk item",
12171 bg_key.objectid, bg_key.offset);
12172 err |= REFERENCER_MISSING;
12174 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12175 struct btrfs_chunk);
12176 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12179 "block group[%llu %llu] related chunk item length does not match",
12180 bg_key.objectid, bg_key.offset);
12181 err |= REFERENCER_MISMATCH;
12184 btrfs_release_path(&path);
12186 /* Search from the block group bytenr */
12187 extent_key.objectid = bg_key.objectid;
12188 extent_key.type = 0;
12189 extent_key.offset = 0;
12191 btrfs_init_path(&path);
12192 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12196 /* Iterate extent tree to account used space */
12198 leaf = path.nodes[0];
12200 /* Search slot can point to the last item beyond leaf nritems */
12201 if (path.slots[0] >= btrfs_header_nritems(leaf))
12204 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12205 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12208 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12209 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12211 if (extent_key.objectid < bg_key.objectid)
12214 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12217 total += extent_key.offset;
12219 ei = btrfs_item_ptr(leaf, path.slots[0],
12220 struct btrfs_extent_item);
12221 flags = btrfs_extent_flags(leaf, ei);
12222 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12223 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12225 "bad extent[%llu, %llu) type mismatch with chunk",
12226 extent_key.objectid,
12227 extent_key.objectid + extent_key.offset);
12228 err |= CHUNK_TYPE_MISMATCH;
12230 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12231 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12232 BTRFS_BLOCK_GROUP_METADATA))) {
12234 "bad extent[%llu, %llu) type mismatch with chunk",
12235 extent_key.objectid,
12236 extent_key.objectid + nodesize);
12237 err |= CHUNK_TYPE_MISMATCH;
12241 ret = btrfs_next_item(extent_root, &path);
12247 btrfs_release_path(&path);
12249 if (total != used) {
12251 "block group[%llu %llu] used %llu but extent items used %llu",
12252 bg_key.objectid, bg_key.offset, used, total);
12253 err |= ACCOUNTING_MISMATCH;
12259 * Check a chunk item.
12260 * Including checking all referred dev_extents and block group
12262 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12263 struct extent_buffer *eb, int slot)
12265 struct btrfs_root *extent_root = fs_info->extent_root;
12266 struct btrfs_root *dev_root = fs_info->dev_root;
12267 struct btrfs_path path;
12268 struct btrfs_key chunk_key;
12269 struct btrfs_key bg_key;
12270 struct btrfs_key devext_key;
12271 struct btrfs_chunk *chunk;
12272 struct extent_buffer *leaf;
12273 struct btrfs_block_group_item *bi;
12274 struct btrfs_block_group_item bg_item;
12275 struct btrfs_dev_extent *ptr;
12287 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12288 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12289 length = btrfs_chunk_length(eb, chunk);
12290 chunk_end = chunk_key.offset + length;
12291 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12294 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12296 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12299 type = btrfs_chunk_type(eb, chunk);
12301 bg_key.objectid = chunk_key.offset;
12302 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12303 bg_key.offset = length;
12305 btrfs_init_path(&path);
12306 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12309 "chunk[%llu %llu) did not find the related block group item",
12310 chunk_key.offset, chunk_end);
12311 err |= REFERENCER_MISSING;
12313 leaf = path.nodes[0];
12314 bi = btrfs_item_ptr(leaf, path.slots[0],
12315 struct btrfs_block_group_item);
12316 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12318 if (btrfs_block_group_flags(&bg_item) != type) {
12320 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12321 chunk_key.offset, chunk_end, type,
12322 btrfs_block_group_flags(&bg_item));
12323 err |= REFERENCER_MISSING;
12327 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12328 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12329 for (i = 0; i < num_stripes; i++) {
12330 btrfs_release_path(&path);
12331 btrfs_init_path(&path);
12332 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12333 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12334 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12336 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12339 goto not_match_dev;
12341 leaf = path.nodes[0];
12342 ptr = btrfs_item_ptr(leaf, path.slots[0],
12343 struct btrfs_dev_extent);
12344 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12345 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12346 if (objectid != chunk_key.objectid ||
12347 offset != chunk_key.offset ||
12348 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12349 goto not_match_dev;
12352 err |= BACKREF_MISSING;
12354 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12355 chunk_key.objectid, chunk_end, i);
12358 btrfs_release_path(&path);
12364 * Main entry function to check known items and update related accounting info
12366 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
12368 struct btrfs_fs_info *fs_info = root->fs_info;
12369 struct btrfs_key key;
12372 struct btrfs_extent_data_ref *dref;
12377 btrfs_item_key_to_cpu(eb, &key, slot);
12381 case BTRFS_EXTENT_DATA_KEY:
12382 ret = check_extent_data_item(root, eb, slot);
12385 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12386 ret = check_block_group_item(fs_info, eb, slot);
12389 case BTRFS_DEV_ITEM_KEY:
12390 ret = check_dev_item(fs_info, eb, slot);
12393 case BTRFS_CHUNK_ITEM_KEY:
12394 ret = check_chunk_item(fs_info, eb, slot);
12397 case BTRFS_DEV_EXTENT_KEY:
12398 ret = check_dev_extent_item(fs_info, eb, slot);
12401 case BTRFS_EXTENT_ITEM_KEY:
12402 case BTRFS_METADATA_ITEM_KEY:
12403 ret = check_extent_item(fs_info, eb, slot);
12406 case BTRFS_EXTENT_CSUM_KEY:
12407 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12409 case BTRFS_TREE_BLOCK_REF_KEY:
12410 ret = check_tree_block_backref(fs_info, key.offset,
12414 case BTRFS_EXTENT_DATA_REF_KEY:
12415 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12416 ret = check_extent_data_backref(fs_info,
12417 btrfs_extent_data_ref_root(eb, dref),
12418 btrfs_extent_data_ref_objectid(eb, dref),
12419 btrfs_extent_data_ref_offset(eb, dref),
12421 btrfs_extent_data_ref_count(eb, dref));
12424 case BTRFS_SHARED_BLOCK_REF_KEY:
12425 ret = check_shared_block_backref(fs_info, key.offset,
12429 case BTRFS_SHARED_DATA_REF_KEY:
12430 ret = check_shared_data_backref(fs_info, key.offset,
12438 if (++slot < btrfs_header_nritems(eb))
12445 * Helper function for later fs/subvol tree check. To determine if a tree
12446 * block should be checked.
12447 * This function will ensure only the direct referencer with lowest rootid to
12448 * check a fs/subvolume tree block.
12450 * Backref check at extent tree would detect errors like missing subvolume
12451 * tree, so we can do aggressive check to reduce duplicated checks.
12453 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12455 struct btrfs_root *extent_root = root->fs_info->extent_root;
12456 struct btrfs_key key;
12457 struct btrfs_path path;
12458 struct extent_buffer *leaf;
12460 struct btrfs_extent_item *ei;
12466 struct btrfs_extent_inline_ref *iref;
12469 btrfs_init_path(&path);
12470 key.objectid = btrfs_header_bytenr(eb);
12471 key.type = BTRFS_METADATA_ITEM_KEY;
12472 key.offset = (u64)-1;
12475 * Any failure in backref resolving means we can't determine
12476 * whom the tree block belongs to.
12477 * So in that case, we need to check that tree block
12479 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12483 ret = btrfs_previous_extent_item(extent_root, &path,
12484 btrfs_header_bytenr(eb));
12488 leaf = path.nodes[0];
12489 slot = path.slots[0];
12490 btrfs_item_key_to_cpu(leaf, &key, slot);
12491 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12493 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12494 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12496 struct btrfs_tree_block_info *info;
12498 info = (struct btrfs_tree_block_info *)(ei + 1);
12499 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12502 item_size = btrfs_item_size_nr(leaf, slot);
12503 ptr = (unsigned long)iref;
12504 end = (unsigned long)ei + item_size;
12505 while (ptr < end) {
12506 iref = (struct btrfs_extent_inline_ref *)ptr;
12507 type = btrfs_extent_inline_ref_type(leaf, iref);
12508 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12511 * We only check the tree block if current root is
12512 * the lowest referencer of it.
12514 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12515 offset < root->objectid) {
12516 btrfs_release_path(&path);
12520 ptr += btrfs_extent_inline_ref_size(type);
12523 * Normally we should also check keyed tree block ref, but that may be
12524 * very time consuming. Inlined ref should already make us skip a lot
12525 * of refs now. So skip search keyed tree block ref.
12529 btrfs_release_path(&path);
12534 * Traversal function for tree block. We will do:
12535 * 1) Skip shared fs/subvolume tree blocks
12536 * 2) Update related bytes accounting
12537 * 3) Pre-order traversal
12539 static int traverse_tree_block(struct btrfs_root *root,
12540 struct extent_buffer *node)
12542 struct extent_buffer *eb;
12543 struct btrfs_key key;
12544 struct btrfs_key drop_key;
12552 * Skip shared fs/subvolume tree block, in that case they will
12553 * be checked by referencer with lowest rootid
12555 if (is_fstree(root->objectid) && !should_check(root, node))
12558 /* Update bytes accounting */
12559 total_btree_bytes += node->len;
12560 if (fs_root_objectid(btrfs_header_owner(node)))
12561 total_fs_tree_bytes += node->len;
12562 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12563 total_extent_tree_bytes += node->len;
12565 /* pre-order tranversal, check itself first */
12566 level = btrfs_header_level(node);
12567 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12568 btrfs_header_level(node),
12569 btrfs_header_owner(node));
12573 "check %s failed root %llu bytenr %llu level %d, force continue check",
12574 level ? "node":"leaf", root->objectid,
12575 btrfs_header_bytenr(node), btrfs_header_level(node));
12578 btree_space_waste += btrfs_leaf_free_space(root, node);
12579 ret = check_leaf_items(root, node);
12584 nr = btrfs_header_nritems(node);
12585 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12586 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12587 sizeof(struct btrfs_key_ptr);
12589 /* Then check all its children */
12590 for (i = 0; i < nr; i++) {
12591 u64 blocknr = btrfs_node_blockptr(node, i);
12593 btrfs_node_key_to_cpu(node, &key, i);
12594 if (level == root->root_item.drop_level &&
12595 is_dropped_key(&key, &drop_key))
12599 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12600 * to call the function itself.
12602 eb = read_tree_block(root->fs_info, blocknr, 0);
12603 if (extent_buffer_uptodate(eb)) {
12604 ret = traverse_tree_block(root, eb);
12607 free_extent_buffer(eb);
12614 * Low memory usage version check_chunks_and_extents.
12616 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12618 struct btrfs_path path;
12619 struct btrfs_key key;
12620 struct btrfs_root *root1;
12621 struct btrfs_root *root;
12622 struct btrfs_root *cur_root;
12626 root = fs_info->fs_root;
12628 root1 = root->fs_info->chunk_root;
12629 ret = traverse_tree_block(root1, root1->node);
12632 root1 = root->fs_info->tree_root;
12633 ret = traverse_tree_block(root1, root1->node);
12636 btrfs_init_path(&path);
12637 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12639 key.type = BTRFS_ROOT_ITEM_KEY;
12641 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12643 error("cannot find extent treet in tree_root");
12648 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12649 if (key.type != BTRFS_ROOT_ITEM_KEY)
12651 key.offset = (u64)-1;
12653 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12654 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12657 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12658 if (IS_ERR(cur_root) || !cur_root) {
12659 error("failed to read tree: %lld", key.objectid);
12663 ret = traverse_tree_block(cur_root, cur_root->node);
12666 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12667 btrfs_free_fs_root(cur_root);
12669 ret = btrfs_next_item(root1, &path);
12675 btrfs_release_path(&path);
12679 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12683 if (!ctx.progress_enabled)
12684 fprintf(stderr, "checking extents\n");
12685 if (check_mode == CHECK_MODE_LOWMEM)
12686 ret = check_chunks_and_extents_v2(fs_info);
12688 ret = check_chunks_and_extents(fs_info);
12693 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12694 struct btrfs_root *root, int overwrite)
12696 struct extent_buffer *c;
12697 struct extent_buffer *old = root->node;
12700 struct btrfs_disk_key disk_key = {0,0,0};
12706 extent_buffer_get(c);
12709 c = btrfs_alloc_free_block(trans, root,
12710 root->fs_info->nodesize,
12711 root->root_key.objectid,
12712 &disk_key, level, 0, 0);
12715 extent_buffer_get(c);
12719 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12720 btrfs_set_header_level(c, level);
12721 btrfs_set_header_bytenr(c, c->start);
12722 btrfs_set_header_generation(c, trans->transid);
12723 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12724 btrfs_set_header_owner(c, root->root_key.objectid);
12726 write_extent_buffer(c, root->fs_info->fsid,
12727 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12729 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12730 btrfs_header_chunk_tree_uuid(c),
12733 btrfs_mark_buffer_dirty(c);
12735 * this case can happen in the following case:
12737 * 1.overwrite previous root.
12739 * 2.reinit reloc data root, this is because we skip pin
12740 * down reloc data tree before which means we can allocate
12741 * same block bytenr here.
12743 if (old->start == c->start) {
12744 btrfs_set_root_generation(&root->root_item,
12746 root->root_item.level = btrfs_header_level(root->node);
12747 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12748 &root->root_key, &root->root_item);
12750 free_extent_buffer(c);
12754 free_extent_buffer(old);
12756 add_root_to_dirty_list(root);
12760 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12761 struct extent_buffer *eb, int tree_root)
12763 struct extent_buffer *tmp;
12764 struct btrfs_root_item *ri;
12765 struct btrfs_key key;
12767 int level = btrfs_header_level(eb);
12773 * If we have pinned this block before, don't pin it again.
12774 * This can not only avoid forever loop with broken filesystem
12775 * but also give us some speedups.
12777 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12778 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12781 btrfs_pin_extent(fs_info, eb->start, eb->len);
12783 nritems = btrfs_header_nritems(eb);
12784 for (i = 0; i < nritems; i++) {
12786 btrfs_item_key_to_cpu(eb, &key, i);
12787 if (key.type != BTRFS_ROOT_ITEM_KEY)
12789 /* Skip the extent root and reloc roots */
12790 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12791 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12792 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12794 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12795 bytenr = btrfs_disk_root_bytenr(eb, ri);
12798 * If at any point we start needing the real root we
12799 * will have to build a stump root for the root we are
12800 * in, but for now this doesn't actually use the root so
12801 * just pass in extent_root.
12803 tmp = read_tree_block(fs_info, bytenr, 0);
12804 if (!extent_buffer_uptodate(tmp)) {
12805 fprintf(stderr, "Error reading root block\n");
12808 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12809 free_extent_buffer(tmp);
12813 bytenr = btrfs_node_blockptr(eb, i);
12815 /* If we aren't the tree root don't read the block */
12816 if (level == 1 && !tree_root) {
12817 btrfs_pin_extent(fs_info, bytenr,
12818 fs_info->nodesize);
12822 tmp = read_tree_block(fs_info, bytenr, 0);
12823 if (!extent_buffer_uptodate(tmp)) {
12824 fprintf(stderr, "Error reading tree block\n");
12827 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12828 free_extent_buffer(tmp);
12837 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12841 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12845 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12848 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12850 struct btrfs_block_group_cache *cache;
12851 struct btrfs_path path;
12852 struct extent_buffer *leaf;
12853 struct btrfs_chunk *chunk;
12854 struct btrfs_key key;
12858 btrfs_init_path(&path);
12860 key.type = BTRFS_CHUNK_ITEM_KEY;
12862 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12864 btrfs_release_path(&path);
12869 * We do this in case the block groups were screwed up and had alloc
12870 * bits that aren't actually set on the chunks. This happens with
12871 * restored images every time and could happen in real life I guess.
12873 fs_info->avail_data_alloc_bits = 0;
12874 fs_info->avail_metadata_alloc_bits = 0;
12875 fs_info->avail_system_alloc_bits = 0;
12877 /* First we need to create the in-memory block groups */
12879 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12880 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12882 btrfs_release_path(&path);
12890 leaf = path.nodes[0];
12891 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12892 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12897 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12898 btrfs_add_block_group(fs_info, 0,
12899 btrfs_chunk_type(leaf, chunk),
12900 key.objectid, key.offset,
12901 btrfs_chunk_length(leaf, chunk));
12902 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12903 key.offset + btrfs_chunk_length(leaf, chunk));
12908 cache = btrfs_lookup_first_block_group(fs_info, start);
12912 start = cache->key.objectid + cache->key.offset;
12915 btrfs_release_path(&path);
12919 static int reset_balance(struct btrfs_trans_handle *trans,
12920 struct btrfs_fs_info *fs_info)
12922 struct btrfs_root *root = fs_info->tree_root;
12923 struct btrfs_path path;
12924 struct extent_buffer *leaf;
12925 struct btrfs_key key;
12926 int del_slot, del_nr = 0;
12930 btrfs_init_path(&path);
12931 key.objectid = BTRFS_BALANCE_OBJECTID;
12932 key.type = BTRFS_BALANCE_ITEM_KEY;
12934 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12939 goto reinit_data_reloc;
12944 ret = btrfs_del_item(trans, root, &path);
12947 btrfs_release_path(&path);
12949 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12950 key.type = BTRFS_ROOT_ITEM_KEY;
12952 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12956 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12961 ret = btrfs_del_items(trans, root, &path,
12968 btrfs_release_path(&path);
12971 ret = btrfs_search_slot(trans, root, &key, &path,
12978 leaf = path.nodes[0];
12979 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12980 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12982 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12987 del_slot = path.slots[0];
12996 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13000 btrfs_release_path(&path);
13003 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13004 key.type = BTRFS_ROOT_ITEM_KEY;
13005 key.offset = (u64)-1;
13006 root = btrfs_read_fs_root(fs_info, &key);
13007 if (IS_ERR(root)) {
13008 fprintf(stderr, "Error reading data reloc tree\n");
13009 ret = PTR_ERR(root);
13012 record_root_in_trans(trans, root);
13013 ret = btrfs_fsck_reinit_root(trans, root, 0);
13016 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13018 btrfs_release_path(&path);
13022 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13023 struct btrfs_fs_info *fs_info)
13029 * The only reason we don't do this is because right now we're just
13030 * walking the trees we find and pinning down their bytes, we don't look
13031 * at any of the leaves. In order to do mixed groups we'd have to check
13032 * the leaves of any fs roots and pin down the bytes for any file
13033 * extents we find. Not hard but why do it if we don't have to?
13035 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13036 fprintf(stderr, "We don't support re-initing the extent tree "
13037 "for mixed block groups yet, please notify a btrfs "
13038 "developer you want to do this so they can add this "
13039 "functionality.\n");
13044 * first we need to walk all of the trees except the extent tree and pin
13045 * down the bytes that are in use so we don't overwrite any existing
13048 ret = pin_metadata_blocks(fs_info);
13050 fprintf(stderr, "error pinning down used bytes\n");
13055 * Need to drop all the block groups since we're going to recreate all
13058 btrfs_free_block_groups(fs_info);
13059 ret = reset_block_groups(fs_info);
13061 fprintf(stderr, "error resetting the block groups\n");
13065 /* Ok we can allocate now, reinit the extent root */
13066 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13068 fprintf(stderr, "extent root initialization failed\n");
13070 * When the transaction code is updated we should end the
13071 * transaction, but for now progs only knows about commit so
13072 * just return an error.
13078 * Now we have all the in-memory block groups setup so we can make
13079 * allocations properly, and the metadata we care about is safe since we
13080 * pinned all of it above.
13083 struct btrfs_block_group_cache *cache;
13085 cache = btrfs_lookup_first_block_group(fs_info, start);
13088 start = cache->key.objectid + cache->key.offset;
13089 ret = btrfs_insert_item(trans, fs_info->extent_root,
13090 &cache->key, &cache->item,
13091 sizeof(cache->item));
13093 fprintf(stderr, "Error adding block group\n");
13096 btrfs_extent_post_op(trans, fs_info->extent_root);
13099 ret = reset_balance(trans, fs_info);
13101 fprintf(stderr, "error resetting the pending balance\n");
13106 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13108 struct btrfs_path path;
13109 struct btrfs_trans_handle *trans;
13110 struct btrfs_key key;
13113 printf("Recowing metadata block %llu\n", eb->start);
13114 key.objectid = btrfs_header_owner(eb);
13115 key.type = BTRFS_ROOT_ITEM_KEY;
13116 key.offset = (u64)-1;
13118 root = btrfs_read_fs_root(root->fs_info, &key);
13119 if (IS_ERR(root)) {
13120 fprintf(stderr, "Couldn't find owner root %llu\n",
13122 return PTR_ERR(root);
13125 trans = btrfs_start_transaction(root, 1);
13127 return PTR_ERR(trans);
13129 btrfs_init_path(&path);
13130 path.lowest_level = btrfs_header_level(eb);
13131 if (path.lowest_level)
13132 btrfs_node_key_to_cpu(eb, &key, 0);
13134 btrfs_item_key_to_cpu(eb, &key, 0);
13136 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13137 btrfs_commit_transaction(trans, root);
13138 btrfs_release_path(&path);
13142 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13144 struct btrfs_path path;
13145 struct btrfs_trans_handle *trans;
13146 struct btrfs_key key;
13149 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13150 bad->key.type, bad->key.offset);
13151 key.objectid = bad->root_id;
13152 key.type = BTRFS_ROOT_ITEM_KEY;
13153 key.offset = (u64)-1;
13155 root = btrfs_read_fs_root(root->fs_info, &key);
13156 if (IS_ERR(root)) {
13157 fprintf(stderr, "Couldn't find owner root %llu\n",
13159 return PTR_ERR(root);
13162 trans = btrfs_start_transaction(root, 1);
13164 return PTR_ERR(trans);
13166 btrfs_init_path(&path);
13167 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13173 ret = btrfs_del_item(trans, root, &path);
13175 btrfs_commit_transaction(trans, root);
13176 btrfs_release_path(&path);
13180 static int zero_log_tree(struct btrfs_root *root)
13182 struct btrfs_trans_handle *trans;
13185 trans = btrfs_start_transaction(root, 1);
13186 if (IS_ERR(trans)) {
13187 ret = PTR_ERR(trans);
13190 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13191 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13192 ret = btrfs_commit_transaction(trans, root);
13196 static int populate_csum(struct btrfs_trans_handle *trans,
13197 struct btrfs_root *csum_root, char *buf, u64 start,
13200 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13205 while (offset < len) {
13206 sectorsize = fs_info->sectorsize;
13207 ret = read_extent_data(fs_info, buf, start + offset,
13211 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13212 start + offset, buf, sectorsize);
13215 offset += sectorsize;
13220 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13221 struct btrfs_root *csum_root,
13222 struct btrfs_root *cur_root)
13224 struct btrfs_path path;
13225 struct btrfs_key key;
13226 struct extent_buffer *node;
13227 struct btrfs_file_extent_item *fi;
13234 buf = malloc(cur_root->fs_info->sectorsize);
13238 btrfs_init_path(&path);
13242 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13245 /* Iterate all regular file extents and fill its csum */
13247 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13249 if (key.type != BTRFS_EXTENT_DATA_KEY)
13251 node = path.nodes[0];
13252 slot = path.slots[0];
13253 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13254 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13256 start = btrfs_file_extent_disk_bytenr(node, fi);
13257 len = btrfs_file_extent_disk_num_bytes(node, fi);
13259 ret = populate_csum(trans, csum_root, buf, start, len);
13260 if (ret == -EEXIST)
13266 * TODO: if next leaf is corrupted, jump to nearest next valid
13269 ret = btrfs_next_item(cur_root, &path);
13279 btrfs_release_path(&path);
13284 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13285 struct btrfs_root *csum_root)
13287 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13288 struct btrfs_path path;
13289 struct btrfs_root *tree_root = fs_info->tree_root;
13290 struct btrfs_root *cur_root;
13291 struct extent_buffer *node;
13292 struct btrfs_key key;
13296 btrfs_init_path(&path);
13297 key.objectid = BTRFS_FS_TREE_OBJECTID;
13299 key.type = BTRFS_ROOT_ITEM_KEY;
13300 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13309 node = path.nodes[0];
13310 slot = path.slots[0];
13311 btrfs_item_key_to_cpu(node, &key, slot);
13312 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13314 if (key.type != BTRFS_ROOT_ITEM_KEY)
13316 if (!is_fstree(key.objectid))
13318 key.offset = (u64)-1;
13320 cur_root = btrfs_read_fs_root(fs_info, &key);
13321 if (IS_ERR(cur_root) || !cur_root) {
13322 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13326 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13331 ret = btrfs_next_item(tree_root, &path);
13341 btrfs_release_path(&path);
13345 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13346 struct btrfs_root *csum_root)
13348 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13349 struct btrfs_path path;
13350 struct btrfs_extent_item *ei;
13351 struct extent_buffer *leaf;
13353 struct btrfs_key key;
13356 btrfs_init_path(&path);
13358 key.type = BTRFS_EXTENT_ITEM_KEY;
13360 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13362 btrfs_release_path(&path);
13366 buf = malloc(csum_root->fs_info->sectorsize);
13368 btrfs_release_path(&path);
13373 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13374 ret = btrfs_next_leaf(extent_root, &path);
13382 leaf = path.nodes[0];
13384 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13385 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13390 ei = btrfs_item_ptr(leaf, path.slots[0],
13391 struct btrfs_extent_item);
13392 if (!(btrfs_extent_flags(leaf, ei) &
13393 BTRFS_EXTENT_FLAG_DATA)) {
13398 ret = populate_csum(trans, csum_root, buf, key.objectid,
13405 btrfs_release_path(&path);
13411 * Recalculate the csum and put it into the csum tree.
13413 * Extent tree init will wipe out all the extent info, so in that case, we
13414 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13415 * will use fs/subvol trees to init the csum tree.
13417 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13418 struct btrfs_root *csum_root,
13419 int search_fs_tree)
13421 if (search_fs_tree)
13422 return fill_csum_tree_from_fs(trans, csum_root);
13424 return fill_csum_tree_from_extent(trans, csum_root);
13427 static void free_roots_info_cache(void)
13429 if (!roots_info_cache)
13432 while (!cache_tree_empty(roots_info_cache)) {
13433 struct cache_extent *entry;
13434 struct root_item_info *rii;
13436 entry = first_cache_extent(roots_info_cache);
13439 remove_cache_extent(roots_info_cache, entry);
13440 rii = container_of(entry, struct root_item_info, cache_extent);
13444 free(roots_info_cache);
13445 roots_info_cache = NULL;
13448 static int build_roots_info_cache(struct btrfs_fs_info *info)
13451 struct btrfs_key key;
13452 struct extent_buffer *leaf;
13453 struct btrfs_path path;
13455 if (!roots_info_cache) {
13456 roots_info_cache = malloc(sizeof(*roots_info_cache));
13457 if (!roots_info_cache)
13459 cache_tree_init(roots_info_cache);
13462 btrfs_init_path(&path);
13464 key.type = BTRFS_EXTENT_ITEM_KEY;
13466 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13469 leaf = path.nodes[0];
13472 struct btrfs_key found_key;
13473 struct btrfs_extent_item *ei;
13474 struct btrfs_extent_inline_ref *iref;
13475 int slot = path.slots[0];
13480 struct cache_extent *entry;
13481 struct root_item_info *rii;
13483 if (slot >= btrfs_header_nritems(leaf)) {
13484 ret = btrfs_next_leaf(info->extent_root, &path);
13491 leaf = path.nodes[0];
13492 slot = path.slots[0];
13495 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13497 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13498 found_key.type != BTRFS_METADATA_ITEM_KEY)
13501 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13502 flags = btrfs_extent_flags(leaf, ei);
13504 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13505 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13508 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13509 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13510 level = found_key.offset;
13512 struct btrfs_tree_block_info *binfo;
13514 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13515 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13516 level = btrfs_tree_block_level(leaf, binfo);
13520 * For a root extent, it must be of the following type and the
13521 * first (and only one) iref in the item.
13523 type = btrfs_extent_inline_ref_type(leaf, iref);
13524 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13527 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13528 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13530 rii = malloc(sizeof(struct root_item_info));
13535 rii->cache_extent.start = root_id;
13536 rii->cache_extent.size = 1;
13537 rii->level = (u8)-1;
13538 entry = &rii->cache_extent;
13539 ret = insert_cache_extent(roots_info_cache, entry);
13542 rii = container_of(entry, struct root_item_info,
13546 ASSERT(rii->cache_extent.start == root_id);
13547 ASSERT(rii->cache_extent.size == 1);
13549 if (level > rii->level || rii->level == (u8)-1) {
13550 rii->level = level;
13551 rii->bytenr = found_key.objectid;
13552 rii->gen = btrfs_extent_generation(leaf, ei);
13553 rii->node_count = 1;
13554 } else if (level == rii->level) {
13562 btrfs_release_path(&path);
13567 static int maybe_repair_root_item(struct btrfs_path *path,
13568 const struct btrfs_key *root_key,
13569 const int read_only_mode)
13571 const u64 root_id = root_key->objectid;
13572 struct cache_extent *entry;
13573 struct root_item_info *rii;
13574 struct btrfs_root_item ri;
13575 unsigned long offset;
13577 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13580 "Error: could not find extent items for root %llu\n",
13581 root_key->objectid);
13585 rii = container_of(entry, struct root_item_info, cache_extent);
13586 ASSERT(rii->cache_extent.start == root_id);
13587 ASSERT(rii->cache_extent.size == 1);
13589 if (rii->node_count != 1) {
13591 "Error: could not find btree root extent for root %llu\n",
13596 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13597 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13599 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13600 btrfs_root_level(&ri) != rii->level ||
13601 btrfs_root_generation(&ri) != rii->gen) {
13604 * If we're in repair mode but our caller told us to not update
13605 * the root item, i.e. just check if it needs to be updated, don't
13606 * print this message, since the caller will call us again shortly
13607 * for the same root item without read only mode (the caller will
13608 * open a transaction first).
13610 if (!(read_only_mode && repair))
13612 "%sroot item for root %llu,"
13613 " current bytenr %llu, current gen %llu, current level %u,"
13614 " new bytenr %llu, new gen %llu, new level %u\n",
13615 (read_only_mode ? "" : "fixing "),
13617 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13618 btrfs_root_level(&ri),
13619 rii->bytenr, rii->gen, rii->level);
13621 if (btrfs_root_generation(&ri) > rii->gen) {
13623 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13624 root_id, btrfs_root_generation(&ri), rii->gen);
13628 if (!read_only_mode) {
13629 btrfs_set_root_bytenr(&ri, rii->bytenr);
13630 btrfs_set_root_level(&ri, rii->level);
13631 btrfs_set_root_generation(&ri, rii->gen);
13632 write_extent_buffer(path->nodes[0], &ri,
13633 offset, sizeof(ri));
13643 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13644 * caused read-only snapshots to be corrupted if they were created at a moment
13645 * when the source subvolume/snapshot had orphan items. The issue was that the
13646 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13647 * node instead of the post orphan cleanup root node.
13648 * So this function, and its callees, just detects and fixes those cases. Even
13649 * though the regression was for read-only snapshots, this function applies to
13650 * any snapshot/subvolume root.
13651 * This must be run before any other repair code - not doing it so, makes other
13652 * repair code delete or modify backrefs in the extent tree for example, which
13653 * will result in an inconsistent fs after repairing the root items.
13655 static int repair_root_items(struct btrfs_fs_info *info)
13657 struct btrfs_path path;
13658 struct btrfs_key key;
13659 struct extent_buffer *leaf;
13660 struct btrfs_trans_handle *trans = NULL;
13663 int need_trans = 0;
13665 btrfs_init_path(&path);
13667 ret = build_roots_info_cache(info);
13671 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13672 key.type = BTRFS_ROOT_ITEM_KEY;
13677 * Avoid opening and committing transactions if a leaf doesn't have
13678 * any root items that need to be fixed, so that we avoid rotating
13679 * backup roots unnecessarily.
13682 trans = btrfs_start_transaction(info->tree_root, 1);
13683 if (IS_ERR(trans)) {
13684 ret = PTR_ERR(trans);
13689 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13693 leaf = path.nodes[0];
13696 struct btrfs_key found_key;
13698 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13699 int no_more_keys = find_next_key(&path, &key);
13701 btrfs_release_path(&path);
13703 ret = btrfs_commit_transaction(trans,
13715 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13717 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13719 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13722 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13726 if (!trans && repair) {
13729 btrfs_release_path(&path);
13739 free_roots_info_cache();
13740 btrfs_release_path(&path);
13742 btrfs_commit_transaction(trans, info->tree_root);
13749 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13751 struct btrfs_trans_handle *trans;
13752 struct btrfs_block_group_cache *bg_cache;
13756 /* Clear all free space cache inodes and its extent data */
13758 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13761 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13764 current = bg_cache->key.objectid + bg_cache->key.offset;
13767 /* Don't forget to set cache_generation to -1 */
13768 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13769 if (IS_ERR(trans)) {
13770 error("failed to update super block cache generation");
13771 return PTR_ERR(trans);
13773 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13774 btrfs_commit_transaction(trans, fs_info->tree_root);
13779 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13784 if (clear_version == 1) {
13785 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13787 "free space cache v2 detected, use --clear-space-cache v2");
13791 printf("Clearing free space cache\n");
13792 ret = clear_free_space_cache(fs_info);
13794 error("failed to clear free space cache");
13797 printf("Free space cache cleared\n");
13799 } else if (clear_version == 2) {
13800 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13801 printf("no free space cache v2 to clear\n");
13805 printf("Clear free space cache v2\n");
13806 ret = btrfs_clear_free_space_tree(fs_info);
13808 error("failed to clear free space cache v2: %d", ret);
13811 printf("free space cache v2 cleared\n");
13818 const char * const cmd_check_usage[] = {
13819 "btrfs check [options] <device>",
13820 "Check structural integrity of a filesystem (unmounted).",
13821 "Check structural integrity of an unmounted filesystem. Verify internal",
13822 "trees' consistency and item connectivity. In the repair mode try to",
13823 "fix the problems found. ",
13824 "WARNING: the repair mode is considered dangerous",
13826 "-s|--super <superblock> use this superblock copy",
13827 "-b|--backup use the first valid backup root copy",
13828 "--force skip mount checks, repair is not possible",
13829 "--repair try to repair the filesystem",
13830 "--readonly run in read-only mode (default)",
13831 "--init-csum-tree create a new CRC tree",
13832 "--init-extent-tree create a new extent tree",
13833 "--mode <MODE> allows choice of memory/IO trade-offs",
13834 " where MODE is one of:",
13835 " original - read inodes and extents to memory (requires",
13836 " more memory, does less IO)",
13837 " lowmem - try to use less memory but read blocks again",
13839 "--check-data-csum verify checksums of data blocks",
13840 "-Q|--qgroup-report print a report on qgroup consistency",
13841 "-E|--subvol-extents <subvolid>",
13842 " print subvolume extents and sharing state",
13843 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13844 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13845 "-p|--progress indicate progress",
13846 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13850 int cmd_check(int argc, char **argv)
13852 struct cache_tree root_cache;
13853 struct btrfs_root *root;
13854 struct btrfs_fs_info *info;
13857 u64 tree_root_bytenr = 0;
13858 u64 chunk_root_bytenr = 0;
13859 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13863 int init_csum_tree = 0;
13865 int clear_space_cache = 0;
13866 int qgroup_report = 0;
13867 int qgroups_repaired = 0;
13868 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13873 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13874 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13875 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13876 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13877 GETOPT_VAL_FORCE };
13878 static const struct option long_options[] = {
13879 { "super", required_argument, NULL, 's' },
13880 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13881 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13882 { "init-csum-tree", no_argument, NULL,
13883 GETOPT_VAL_INIT_CSUM },
13884 { "init-extent-tree", no_argument, NULL,
13885 GETOPT_VAL_INIT_EXTENT },
13886 { "check-data-csum", no_argument, NULL,
13887 GETOPT_VAL_CHECK_CSUM },
13888 { "backup", no_argument, NULL, 'b' },
13889 { "subvol-extents", required_argument, NULL, 'E' },
13890 { "qgroup-report", no_argument, NULL, 'Q' },
13891 { "tree-root", required_argument, NULL, 'r' },
13892 { "chunk-root", required_argument, NULL,
13893 GETOPT_VAL_CHUNK_TREE },
13894 { "progress", no_argument, NULL, 'p' },
13895 { "mode", required_argument, NULL,
13897 { "clear-space-cache", required_argument, NULL,
13898 GETOPT_VAL_CLEAR_SPACE_CACHE},
13899 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13900 { NULL, 0, NULL, 0}
13903 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13907 case 'a': /* ignored */ break;
13909 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13912 num = arg_strtou64(optarg);
13913 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13915 "super mirror should be less than %d",
13916 BTRFS_SUPER_MIRROR_MAX);
13919 bytenr = btrfs_sb_offset(((int)num));
13920 printf("using SB copy %llu, bytenr %llu\n", num,
13921 (unsigned long long)bytenr);
13927 subvolid = arg_strtou64(optarg);
13930 tree_root_bytenr = arg_strtou64(optarg);
13932 case GETOPT_VAL_CHUNK_TREE:
13933 chunk_root_bytenr = arg_strtou64(optarg);
13936 ctx.progress_enabled = true;
13940 usage(cmd_check_usage);
13941 case GETOPT_VAL_REPAIR:
13942 printf("enabling repair mode\n");
13944 ctree_flags |= OPEN_CTREE_WRITES;
13946 case GETOPT_VAL_READONLY:
13949 case GETOPT_VAL_INIT_CSUM:
13950 printf("Creating a new CRC tree\n");
13951 init_csum_tree = 1;
13953 ctree_flags |= OPEN_CTREE_WRITES;
13955 case GETOPT_VAL_INIT_EXTENT:
13956 init_extent_tree = 1;
13957 ctree_flags |= (OPEN_CTREE_WRITES |
13958 OPEN_CTREE_NO_BLOCK_GROUPS);
13961 case GETOPT_VAL_CHECK_CSUM:
13962 check_data_csum = 1;
13964 case GETOPT_VAL_MODE:
13965 check_mode = parse_check_mode(optarg);
13966 if (check_mode == CHECK_MODE_UNKNOWN) {
13967 error("unknown mode: %s", optarg);
13971 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13972 if (strcmp(optarg, "v1") == 0) {
13973 clear_space_cache = 1;
13974 } else if (strcmp(optarg, "v2") == 0) {
13975 clear_space_cache = 2;
13976 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13979 "invalid argument to --clear-space-cache, must be v1 or v2");
13982 ctree_flags |= OPEN_CTREE_WRITES;
13984 case GETOPT_VAL_FORCE:
13990 if (check_argc_exact(argc - optind, 1))
13991 usage(cmd_check_usage);
13993 if (ctx.progress_enabled) {
13994 ctx.tp = TASK_NOTHING;
13995 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13998 /* This check is the only reason for --readonly to exist */
13999 if (readonly && repair) {
14000 error("repair options are not compatible with --readonly");
14005 * experimental and dangerous
14007 if (repair && check_mode == CHECK_MODE_LOWMEM)
14008 warning("low-memory mode repair support is only partial");
14011 cache_tree_init(&root_cache);
14013 ret = check_mounted(argv[optind]);
14016 error("could not check mount status: %s",
14022 "%s is currently mounted, use --force if you really intend to check the filesystem",
14030 error("repair and --force is not yet supported");
14037 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14041 "filesystem mounted, continuing because of --force");
14043 /* A block device is mounted in exclusive mode by kernel */
14044 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14047 /* only allow partial opening under repair mode */
14049 ctree_flags |= OPEN_CTREE_PARTIAL;
14051 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14052 chunk_root_bytenr, ctree_flags);
14054 error("cannot open file system");
14060 global_info = info;
14061 root = info->fs_root;
14062 uuid_unparse(info->super_copy->fsid, uuidbuf);
14064 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14067 * Check the bare minimum before starting anything else that could rely
14068 * on it, namely the tree roots, any local consistency checks
14070 if (!extent_buffer_uptodate(info->tree_root->node) ||
14071 !extent_buffer_uptodate(info->dev_root->node) ||
14072 !extent_buffer_uptodate(info->chunk_root->node)) {
14073 error("critical roots corrupted, unable to check the filesystem");
14079 if (clear_space_cache) {
14080 ret = do_clear_free_space_cache(info, clear_space_cache);
14086 * repair mode will force us to commit transaction which
14087 * will make us fail to load log tree when mounting.
14089 if (repair && btrfs_super_log_root(info->super_copy)) {
14090 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14096 ret = zero_log_tree(root);
14099 error("failed to zero log tree: %d", ret);
14104 if (qgroup_report) {
14105 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14107 ret = qgroup_verify_all(info);
14114 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14115 subvolid, argv[optind], uuidbuf);
14116 ret = print_extent_state(info, subvolid);
14121 if (init_extent_tree || init_csum_tree) {
14122 struct btrfs_trans_handle *trans;
14124 trans = btrfs_start_transaction(info->extent_root, 0);
14125 if (IS_ERR(trans)) {
14126 error("error starting transaction");
14127 ret = PTR_ERR(trans);
14132 if (init_extent_tree) {
14133 printf("Creating a new extent tree\n");
14134 ret = reinit_extent_tree(trans, info);
14140 if (init_csum_tree) {
14141 printf("Reinitialize checksum tree\n");
14142 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14144 error("checksum tree initialization failed: %d",
14151 ret = fill_csum_tree(trans, info->csum_root,
14155 error("checksum tree refilling failed: %d", ret);
14160 * Ok now we commit and run the normal fsck, which will add
14161 * extent entries for all of the items it finds.
14163 ret = btrfs_commit_transaction(trans, info->extent_root);
14168 if (!extent_buffer_uptodate(info->extent_root->node)) {
14169 error("critical: extent_root, unable to check the filesystem");
14174 if (!extent_buffer_uptodate(info->csum_root->node)) {
14175 error("critical: csum_root, unable to check the filesystem");
14181 ret = do_check_chunks_and_extents(info);
14185 "errors found in extent allocation tree or chunk allocation");
14187 ret = repair_root_items(info);
14190 error("failed to repair root items: %s", strerror(-ret));
14194 fprintf(stderr, "Fixed %d roots.\n", ret);
14196 } else if (ret > 0) {
14198 "Found %d roots with an outdated root item.\n",
14201 "Please run a filesystem check with the option --repair to fix them.\n");
14207 if (!ctx.progress_enabled) {
14208 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14209 fprintf(stderr, "checking free space tree\n");
14211 fprintf(stderr, "checking free space cache\n");
14213 ret = check_space_cache(root);
14216 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14217 error("errors found in free space tree");
14219 error("errors found in free space cache");
14224 * We used to have to have these hole extents in between our real
14225 * extents so if we don't have this flag set we need to make sure there
14226 * are no gaps in the file extents for inodes, otherwise we can just
14227 * ignore it when this happens.
14229 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14230 ret = do_check_fs_roots(info, &root_cache);
14233 error("errors found in fs roots");
14237 fprintf(stderr, "checking csums\n");
14238 ret = check_csums(root);
14241 error("errors found in csum tree");
14245 fprintf(stderr, "checking root refs\n");
14246 /* For low memory mode, check_fs_roots_v2 handles root refs */
14247 if (check_mode != CHECK_MODE_LOWMEM) {
14248 ret = check_root_refs(root, &root_cache);
14251 error("errors found in root refs");
14256 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14257 struct extent_buffer *eb;
14259 eb = list_first_entry(&root->fs_info->recow_ebs,
14260 struct extent_buffer, recow);
14261 list_del_init(&eb->recow);
14262 ret = recow_extent_buffer(root, eb);
14265 error("fails to fix transid errors");
14270 while (!list_empty(&delete_items)) {
14271 struct bad_item *bad;
14273 bad = list_first_entry(&delete_items, struct bad_item, list);
14274 list_del_init(&bad->list);
14276 ret = delete_bad_item(root, bad);
14282 if (info->quota_enabled) {
14283 fprintf(stderr, "checking quota groups\n");
14284 ret = qgroup_verify_all(info);
14287 error("failed to check quota groups");
14291 ret = repair_qgroups(info, &qgroups_repaired);
14294 error("failed to repair quota groups");
14300 if (!list_empty(&root->fs_info->recow_ebs)) {
14301 error("transid errors in file system");
14306 printf("found %llu bytes used, ",
14307 (unsigned long long)bytes_used);
14309 printf("error(s) found\n");
14311 printf("no error found\n");
14312 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14313 printf("total tree bytes: %llu\n",
14314 (unsigned long long)total_btree_bytes);
14315 printf("total fs tree bytes: %llu\n",
14316 (unsigned long long)total_fs_tree_bytes);
14317 printf("total extent tree bytes: %llu\n",
14318 (unsigned long long)total_extent_tree_bytes);
14319 printf("btree space waste bytes: %llu\n",
14320 (unsigned long long)btree_space_waste);
14321 printf("file data blocks allocated: %llu\n referenced %llu\n",
14322 (unsigned long long)data_bytes_allocated,
14323 (unsigned long long)data_bytes_referenced);
14325 free_qgroup_counts();
14326 free_root_recs_tree(&root_cache);
14330 if (ctx.progress_enabled)
14331 task_deinit(ctx.info);