2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 return container_of(back, struct data_backref, node);
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146 struct data_backref *back1 = to_data_backref(ext1);
147 struct data_backref *back2 = to_data_backref(ext2);
149 WARN_ON(!ext1->is_data);
150 WARN_ON(!ext2->is_data);
152 /* parent and root are a union, so this covers both */
153 if (back1->parent > back2->parent)
155 if (back1->parent < back2->parent)
158 /* This is a full backref and the parents match. */
159 if (back1->node.full_backref)
162 if (back1->owner > back2->owner)
164 if (back1->owner < back2->owner)
167 if (back1->offset > back2->offset)
169 if (back1->offset < back2->offset)
172 if (back1->found_ref && back2->found_ref) {
173 if (back1->disk_bytenr > back2->disk_bytenr)
175 if (back1->disk_bytenr < back2->disk_bytenr)
178 if (back1->bytes > back2->bytes)
180 if (back1->bytes < back2->bytes)
188 * Much like data_backref, just removed the undetermined members
189 * and change it to use list_head.
190 * During extent scan, it is stored in root->orphan_data_extent.
191 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193 struct orphan_data_extent {
194 struct list_head list;
202 struct tree_backref {
203 struct extent_backref node;
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 return container_of(back, struct tree_backref, node);
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219 struct tree_backref *back1 = to_tree_backref(ext1);
220 struct tree_backref *back2 = to_tree_backref(ext2);
222 WARN_ON(ext1->is_data);
223 WARN_ON(ext2->is_data);
225 /* parent and root are a union, so this covers both */
226 if (back1->parent > back2->parent)
228 if (back1->parent < back2->parent)
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239 if (ext1->is_data > ext2->is_data)
242 if (ext1->is_data < ext2->is_data)
245 if (ext1->full_backref > ext2->full_backref)
247 if (ext1->full_backref < ext2->full_backref)
251 return compare_data_backref(node1, node2);
253 return compare_tree_backref(node1, node2);
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
259 struct extent_record {
260 struct list_head backrefs;
261 struct list_head dups;
262 struct rb_root backref_tree;
263 struct list_head list;
264 struct cache_extent cache;
265 struct btrfs_disk_key parent_key;
270 u64 extent_item_refs;
272 u64 parent_generation;
276 unsigned int flag_block_full_backref:2;
277 unsigned int found_rec:1;
278 unsigned int content_checked:1;
279 unsigned int owner_ref_checked:1;
280 unsigned int is_root:1;
281 unsigned int metadata:1;
282 unsigned int bad_full_backref:1;
283 unsigned int crossing_stripes:1;
284 unsigned int wrong_chunk_type:1;
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 return container_of(entry, struct extent_record, list);
292 struct inode_backref {
293 struct list_head list;
294 unsigned int found_dir_item:1;
295 unsigned int found_dir_index:1;
296 unsigned int found_inode_ref:1;
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 return list_entry(entry, struct inode_backref, list);
311 struct root_item_record {
312 struct list_head list;
318 struct btrfs_key drop_key;
321 #define REF_ERR_NO_DIR_ITEM (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX (1 << 1)
323 #define REF_ERR_NO_INODE_REF (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
326 #define REF_ERR_DUP_INODE_REF (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
335 struct file_extent_hole {
341 struct inode_record {
342 struct list_head backrefs;
343 unsigned int checked:1;
344 unsigned int merging:1;
345 unsigned int found_inode_item:1;
346 unsigned int found_dir_item:1;
347 unsigned int found_file_extent:1;
348 unsigned int found_csum_item:1;
349 unsigned int some_csum_missing:1;
350 unsigned int nodatasum:1;
363 struct rb_root holes;
364 struct list_head orphan_extents;
369 #define I_ERR_NO_INODE_ITEM (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
385 struct root_backref {
386 struct list_head list;
387 unsigned int found_dir_item:1;
388 unsigned int found_dir_index:1;
389 unsigned int found_back_ref:1;
390 unsigned int found_forward_ref:1;
391 unsigned int reachable:1;
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 return list_entry(entry, struct root_backref, list);
406 struct list_head backrefs;
407 struct cache_extent cache;
408 unsigned int found_root_item:1;
414 struct cache_extent cache;
419 struct cache_extent cache;
420 struct cache_tree root_cache;
421 struct cache_tree inode_cache;
422 struct inode_record *current;
431 struct walk_control {
432 struct cache_tree shared;
433 struct shared_node *nodes[BTRFS_MAX_LEVEL];
439 struct btrfs_key key;
441 struct list_head list;
444 struct extent_entry {
449 struct list_head list;
452 struct root_item_info {
453 /* level of the root */
455 /* number of nodes at this level, must be 1 for a root */
459 struct cache_extent cache_extent;
463 * Error bit for low memory mode check.
465 * Currently no caller cares about it yet. Just internal use for error
468 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH (1 << 8)
479 static void *print_status_check(void *p)
481 struct task_ctx *priv = p;
482 const char work_indicator[] = { '.', 'o', 'O', 'o' };
484 static char *task_position_string[] = {
486 "checking free space cache",
490 task_period_start(priv->info, 1000 /* 1s */);
492 if (priv->tp == TASK_NOTHING)
496 printf("%s [%c]\r", task_position_string[priv->tp],
497 work_indicator[count % 4]);
500 task_period_wait(priv->info);
505 static int print_status_return(void *p)
513 static enum btrfs_check_mode parse_check_mode(const char *str)
515 if (strcmp(str, "lowmem") == 0)
516 return CHECK_MODE_LOWMEM;
517 if (strcmp(str, "orig") == 0)
518 return CHECK_MODE_ORIGINAL;
519 if (strcmp(str, "original") == 0)
520 return CHECK_MODE_ORIGINAL;
522 return CHECK_MODE_UNKNOWN;
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
528 struct file_extent_hole *hole;
530 if (RB_EMPTY_ROOT(holes))
533 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 struct file_extent_hole *hole1;
540 struct file_extent_hole *hole2;
542 hole1 = rb_entry(node1, struct file_extent_hole, node);
543 hole2 = rb_entry(node2, struct file_extent_hole, node);
545 if (hole1->start > hole2->start)
547 if (hole1->start < hole2->start)
549 /* Now hole1->start == hole2->start */
550 if (hole1->len >= hole2->len)
552 * Hole 1 will be merge center
553 * Same hole will be merged later
556 /* Hole 2 will be merge center */
561 * Add a hole to the record
563 * This will do hole merge for copy_file_extent_holes(),
564 * which will ensure there won't be continuous holes.
566 static int add_file_extent_hole(struct rb_root *holes,
569 struct file_extent_hole *hole;
570 struct file_extent_hole *prev = NULL;
571 struct file_extent_hole *next = NULL;
573 hole = malloc(sizeof(*hole));
578 /* Since compare will not return 0, no -EEXIST will happen */
579 rb_insert(holes, &hole->node, compare_hole);
581 /* simple merge with previous hole */
582 if (rb_prev(&hole->node))
583 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585 if (prev && prev->start + prev->len >= hole->start) {
586 hole->len = hole->start + hole->len - prev->start;
587 hole->start = prev->start;
588 rb_erase(&prev->node, holes);
593 /* iterate merge with next holes */
595 if (!rb_next(&hole->node))
597 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599 if (hole->start + hole->len >= next->start) {
600 if (hole->start + hole->len <= next->start + next->len)
601 hole->len = next->start + next->len -
603 rb_erase(&next->node, holes);
612 static int compare_hole_range(struct rb_node *node, void *data)
614 struct file_extent_hole *hole;
617 hole = (struct file_extent_hole *)data;
620 hole = rb_entry(node, struct file_extent_hole, node);
621 if (start < hole->start)
623 if (start >= hole->start && start < hole->start + hole->len)
629 * Delete a hole in the record
631 * This will do the hole split and is much restrict than add.
633 static int del_file_extent_hole(struct rb_root *holes,
636 struct file_extent_hole *hole;
637 struct file_extent_hole tmp;
642 struct rb_node *node;
649 node = rb_search(holes, &tmp, compare_hole_range, NULL);
652 hole = rb_entry(node, struct file_extent_hole, node);
653 if (start + len > hole->start + hole->len)
657 * Now there will be no overlap, delete the hole and re-add the
658 * split(s) if they exists.
660 if (start > hole->start) {
661 prev_start = hole->start;
662 prev_len = start - hole->start;
665 if (hole->start + hole->len > start + len) {
666 next_start = start + len;
667 next_len = hole->start + hole->len - start - len;
670 rb_erase(node, holes);
673 ret = add_file_extent_hole(holes, prev_start, prev_len);
678 ret = add_file_extent_hole(holes, next_start, next_len);
685 static int copy_file_extent_holes(struct rb_root *dst,
688 struct file_extent_hole *hole;
689 struct rb_node *node;
692 node = rb_first(src);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 ret = add_file_extent_hole(dst, hole->start, hole->len);
698 node = rb_next(node);
703 static void free_file_extent_holes(struct rb_root *holes)
705 struct rb_node *node;
706 struct file_extent_hole *hole;
708 node = rb_first(holes);
710 hole = rb_entry(node, struct file_extent_hole, node);
711 rb_erase(node, holes);
713 node = rb_first(holes);
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720 struct btrfs_root *root)
722 if (root->last_trans != trans->transid) {
723 root->track_dirty = 1;
724 root->last_trans = trans->transid;
725 root->commit_root = root->node;
726 extent_buffer_get(root->node);
730 static u8 imode_to_type(u32 imode)
733 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
735 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
736 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
737 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
738 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
739 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
740 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
743 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 struct device_record *rec1;
750 struct device_record *rec2;
752 rec1 = rb_entry(node1, struct device_record, node);
753 rec2 = rb_entry(node2, struct device_record, node);
754 if (rec1->devid > rec2->devid)
756 else if (rec1->devid < rec2->devid)
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 struct inode_record *rec;
765 struct inode_backref *backref;
766 struct inode_backref *orig;
767 struct inode_backref *tmp;
768 struct orphan_data_extent *src_orphan;
769 struct orphan_data_extent *dst_orphan;
774 rec = malloc(sizeof(*rec));
776 return ERR_PTR(-ENOMEM);
777 memcpy(rec, orig_rec, sizeof(*rec));
779 INIT_LIST_HEAD(&rec->backrefs);
780 INIT_LIST_HEAD(&rec->orphan_extents);
781 rec->holes = RB_ROOT;
783 list_for_each_entry(orig, &orig_rec->backrefs, list) {
784 size = sizeof(*orig) + orig->namelen + 1;
785 backref = malloc(size);
790 memcpy(backref, orig, size);
791 list_add_tail(&backref->list, &rec->backrefs);
793 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794 dst_orphan = malloc(sizeof(*dst_orphan));
799 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
809 rb = rb_first(&rec->holes);
811 struct file_extent_hole *hole;
813 hole = rb_entry(rb, struct file_extent_hole, node);
819 if (!list_empty(&rec->backrefs))
820 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821 list_del(&orig->list);
825 if (!list_empty(&rec->orphan_extents))
826 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827 list_del(&orig->list);
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
839 struct orphan_data_extent *orphan;
841 if (list_empty(orphan_extents))
843 printf("The following data extent is lost in tree %llu:\n",
845 list_for_each_entry(orphan, orphan_extents, list) {
846 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847 orphan->objectid, orphan->offset, orphan->disk_bytenr,
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 u64 root_objectid = root->root_key.objectid;
855 int errors = rec->errors;
859 /* reloc root errors, we print its corresponding fs root objectid*/
860 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861 root_objectid = root->root_key.offset;
862 fprintf(stderr, "reloc");
864 fprintf(stderr, "root %llu inode %llu errors %x",
865 (unsigned long long) root_objectid,
866 (unsigned long long) rec->ino, rec->errors);
868 if (errors & I_ERR_NO_INODE_ITEM)
869 fprintf(stderr, ", no inode item");
870 if (errors & I_ERR_NO_ORPHAN_ITEM)
871 fprintf(stderr, ", no orphan item");
872 if (errors & I_ERR_DUP_INODE_ITEM)
873 fprintf(stderr, ", dup inode item");
874 if (errors & I_ERR_DUP_DIR_INDEX)
875 fprintf(stderr, ", dup dir index");
876 if (errors & I_ERR_ODD_DIR_ITEM)
877 fprintf(stderr, ", odd dir item");
878 if (errors & I_ERR_ODD_FILE_EXTENT)
879 fprintf(stderr, ", odd file extent");
880 if (errors & I_ERR_BAD_FILE_EXTENT)
881 fprintf(stderr, ", bad file extent");
882 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883 fprintf(stderr, ", file extent overlap");
884 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885 fprintf(stderr, ", file extent discount");
886 if (errors & I_ERR_DIR_ISIZE_WRONG)
887 fprintf(stderr, ", dir isize wrong");
888 if (errors & I_ERR_FILE_NBYTES_WRONG)
889 fprintf(stderr, ", nbytes wrong");
890 if (errors & I_ERR_ODD_CSUM_ITEM)
891 fprintf(stderr, ", odd csum item");
892 if (errors & I_ERR_SOME_CSUM_MISSING)
893 fprintf(stderr, ", some csum missing");
894 if (errors & I_ERR_LINK_COUNT_WRONG)
895 fprintf(stderr, ", link count wrong");
896 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897 fprintf(stderr, ", orphan file extent");
898 fprintf(stderr, "\n");
899 /* Print the orphan extents if needed */
900 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903 /* Print the holes if needed */
904 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905 struct file_extent_hole *hole;
906 struct rb_node *node;
909 node = rb_first(&rec->holes);
910 fprintf(stderr, "Found file extent holes:\n");
913 hole = rb_entry(node, struct file_extent_hole, node);
914 fprintf(stderr, "\tstart: %llu, len: %llu\n",
915 hole->start, hole->len);
916 node = rb_next(node);
919 fprintf(stderr, "\tstart: 0, len: %llu\n",
921 root->fs_info->sectorsize));
925 static void print_ref_error(int errors)
927 if (errors & REF_ERR_NO_DIR_ITEM)
928 fprintf(stderr, ", no dir item");
929 if (errors & REF_ERR_NO_DIR_INDEX)
930 fprintf(stderr, ", no dir index");
931 if (errors & REF_ERR_NO_INODE_REF)
932 fprintf(stderr, ", no inode ref");
933 if (errors & REF_ERR_DUP_DIR_ITEM)
934 fprintf(stderr, ", dup dir item");
935 if (errors & REF_ERR_DUP_DIR_INDEX)
936 fprintf(stderr, ", dup dir index");
937 if (errors & REF_ERR_DUP_INODE_REF)
938 fprintf(stderr, ", dup inode ref");
939 if (errors & REF_ERR_INDEX_UNMATCH)
940 fprintf(stderr, ", index mismatch");
941 if (errors & REF_ERR_FILETYPE_UNMATCH)
942 fprintf(stderr, ", filetype mismatch");
943 if (errors & REF_ERR_NAME_TOO_LONG)
944 fprintf(stderr, ", name too long");
945 if (errors & REF_ERR_NO_ROOT_REF)
946 fprintf(stderr, ", no root ref");
947 if (errors & REF_ERR_NO_ROOT_BACKREF)
948 fprintf(stderr, ", no root backref");
949 if (errors & REF_ERR_DUP_ROOT_REF)
950 fprintf(stderr, ", dup root ref");
951 if (errors & REF_ERR_DUP_ROOT_BACKREF)
952 fprintf(stderr, ", dup root backref");
953 fprintf(stderr, "\n");
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
959 struct ptr_node *node;
960 struct cache_extent *cache;
961 struct inode_record *rec = NULL;
964 cache = lookup_cache_extent(inode_cache, ino, 1);
966 node = container_of(cache, struct ptr_node, cache);
968 if (mod && rec->refs > 1) {
969 node->data = clone_inode_rec(rec);
970 if (IS_ERR(node->data))
976 rec = calloc(1, sizeof(*rec));
978 return ERR_PTR(-ENOMEM);
980 rec->extent_start = (u64)-1;
982 INIT_LIST_HEAD(&rec->backrefs);
983 INIT_LIST_HEAD(&rec->orphan_extents);
984 rec->holes = RB_ROOT;
986 node = malloc(sizeof(*node));
989 return ERR_PTR(-ENOMEM);
991 node->cache.start = ino;
992 node->cache.size = 1;
995 if (ino == BTRFS_FREE_INO_OBJECTID)
998 ret = insert_cache_extent(inode_cache, &node->cache);
1000 return ERR_PTR(-EEXIST);
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 struct orphan_data_extent *orphan;
1009 while (!list_empty(orphan_extents)) {
1010 orphan = list_entry(orphan_extents->next,
1011 struct orphan_data_extent, list);
1012 list_del(&orphan->list);
1017 static void free_inode_rec(struct inode_record *rec)
1019 struct inode_backref *backref;
1021 if (--rec->refs > 0)
1024 while (!list_empty(&rec->backrefs)) {
1025 backref = to_inode_backref(rec->backrefs.next);
1026 list_del(&backref->list);
1029 free_orphan_data_extents(&rec->orphan_extents);
1030 free_file_extent_holes(&rec->holes);
1034 static int can_free_inode_rec(struct inode_record *rec)
1036 if (!rec->errors && rec->checked && rec->found_inode_item &&
1037 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043 struct inode_record *rec)
1045 struct cache_extent *cache;
1046 struct inode_backref *tmp, *backref;
1047 struct ptr_node *node;
1050 if (!rec->found_inode_item)
1053 filetype = imode_to_type(rec->imode);
1054 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055 if (backref->found_dir_item && backref->found_dir_index) {
1056 if (backref->filetype != filetype)
1057 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058 if (!backref->errors && backref->found_inode_ref &&
1059 rec->nlink == rec->found_link) {
1060 list_del(&backref->list);
1066 if (!rec->checked || rec->merging)
1069 if (S_ISDIR(rec->imode)) {
1070 if (rec->found_size != rec->isize)
1071 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072 if (rec->found_file_extent)
1073 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075 if (rec->found_dir_item)
1076 rec->errors |= I_ERR_ODD_DIR_ITEM;
1077 if (rec->found_size != rec->nbytes)
1078 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079 if (rec->nlink > 0 && !no_holes &&
1080 (rec->extent_end < rec->isize ||
1081 first_extent_gap(&rec->holes) < rec->isize))
1082 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1085 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086 if (rec->found_csum_item && rec->nodatasum)
1087 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088 if (rec->some_csum_missing && !rec->nodatasum)
1089 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1092 BUG_ON(rec->refs != 1);
1093 if (can_free_inode_rec(rec)) {
1094 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095 node = container_of(cache, struct ptr_node, cache);
1096 BUG_ON(node->data != rec);
1097 remove_cache_extent(inode_cache, &node->cache);
1099 free_inode_rec(rec);
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 struct btrfs_path path;
1106 struct btrfs_key key;
1109 key.objectid = BTRFS_ORPHAN_OBJECTID;
1110 key.type = BTRFS_ORPHAN_ITEM_KEY;
1113 btrfs_init_path(&path);
1114 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115 btrfs_release_path(&path);
1121 static int process_inode_item(struct extent_buffer *eb,
1122 int slot, struct btrfs_key *key,
1123 struct shared_node *active_node)
1125 struct inode_record *rec;
1126 struct btrfs_inode_item *item;
1128 rec = active_node->current;
1129 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130 if (rec->found_inode_item) {
1131 rec->errors |= I_ERR_DUP_INODE_ITEM;
1134 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135 rec->nlink = btrfs_inode_nlink(eb, item);
1136 rec->isize = btrfs_inode_size(eb, item);
1137 rec->nbytes = btrfs_inode_nbytes(eb, item);
1138 rec->imode = btrfs_inode_mode(eb, item);
1139 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141 rec->found_inode_item = 1;
1142 if (rec->nlink == 0)
1143 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144 maybe_free_inode_rec(&active_node->inode_cache, rec);
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150 int namelen, u64 dir)
1152 struct inode_backref *backref;
1154 list_for_each_entry(backref, &rec->backrefs, list) {
1155 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157 if (backref->dir != dir || backref->namelen != namelen)
1159 if (memcmp(name, backref->name, namelen))
1164 backref = malloc(sizeof(*backref) + namelen + 1);
1167 memset(backref, 0, sizeof(*backref));
1169 backref->namelen = namelen;
1170 memcpy(backref->name, name, namelen);
1171 backref->name[namelen] = '\0';
1172 list_add_tail(&backref->list, &rec->backrefs);
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177 u64 ino, u64 dir, u64 index,
1178 const char *name, int namelen,
1179 u8 filetype, u8 itemtype, int errors)
1181 struct inode_record *rec;
1182 struct inode_backref *backref;
1184 rec = get_inode_rec(inode_cache, ino, 1);
1185 BUG_ON(IS_ERR(rec));
1186 backref = get_inode_backref(rec, name, namelen, dir);
1189 backref->errors |= errors;
1190 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191 if (backref->found_dir_index)
1192 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193 if (backref->found_inode_ref && backref->index != index)
1194 backref->errors |= REF_ERR_INDEX_UNMATCH;
1195 if (backref->found_dir_item && backref->filetype != filetype)
1196 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198 backref->index = index;
1199 backref->filetype = filetype;
1200 backref->found_dir_index = 1;
1201 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203 if (backref->found_dir_item)
1204 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205 if (backref->found_dir_index && backref->filetype != filetype)
1206 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208 backref->filetype = filetype;
1209 backref->found_dir_item = 1;
1210 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212 if (backref->found_inode_ref)
1213 backref->errors |= REF_ERR_DUP_INODE_REF;
1214 if (backref->found_dir_index && backref->index != index)
1215 backref->errors |= REF_ERR_INDEX_UNMATCH;
1217 backref->index = index;
1219 backref->ref_type = itemtype;
1220 backref->found_inode_ref = 1;
1225 maybe_free_inode_rec(inode_cache, rec);
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230 struct cache_tree *dst_cache)
1232 struct inode_backref *backref;
1237 list_for_each_entry(backref, &src->backrefs, list) {
1238 if (backref->found_dir_index) {
1239 add_inode_backref(dst_cache, dst->ino, backref->dir,
1240 backref->index, backref->name,
1241 backref->namelen, backref->filetype,
1242 BTRFS_DIR_INDEX_KEY, backref->errors);
1244 if (backref->found_dir_item) {
1246 add_inode_backref(dst_cache, dst->ino,
1247 backref->dir, 0, backref->name,
1248 backref->namelen, backref->filetype,
1249 BTRFS_DIR_ITEM_KEY, backref->errors);
1251 if (backref->found_inode_ref) {
1252 add_inode_backref(dst_cache, dst->ino,
1253 backref->dir, backref->index,
1254 backref->name, backref->namelen, 0,
1255 backref->ref_type, backref->errors);
1259 if (src->found_dir_item)
1260 dst->found_dir_item = 1;
1261 if (src->found_file_extent)
1262 dst->found_file_extent = 1;
1263 if (src->found_csum_item)
1264 dst->found_csum_item = 1;
1265 if (src->some_csum_missing)
1266 dst->some_csum_missing = 1;
1267 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1273 BUG_ON(src->found_link < dir_count);
1274 dst->found_link += src->found_link - dir_count;
1275 dst->found_size += src->found_size;
1276 if (src->extent_start != (u64)-1) {
1277 if (dst->extent_start == (u64)-1) {
1278 dst->extent_start = src->extent_start;
1279 dst->extent_end = src->extent_end;
1281 if (dst->extent_end > src->extent_start)
1282 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283 else if (dst->extent_end < src->extent_start) {
1284 ret = add_file_extent_hole(&dst->holes,
1286 src->extent_start - dst->extent_end);
1288 if (dst->extent_end < src->extent_end)
1289 dst->extent_end = src->extent_end;
1293 dst->errors |= src->errors;
1294 if (src->found_inode_item) {
1295 if (!dst->found_inode_item) {
1296 dst->nlink = src->nlink;
1297 dst->isize = src->isize;
1298 dst->nbytes = src->nbytes;
1299 dst->imode = src->imode;
1300 dst->nodatasum = src->nodatasum;
1301 dst->found_inode_item = 1;
1303 dst->errors |= I_ERR_DUP_INODE_ITEM;
1311 static int splice_shared_node(struct shared_node *src_node,
1312 struct shared_node *dst_node)
1314 struct cache_extent *cache;
1315 struct ptr_node *node, *ins;
1316 struct cache_tree *src, *dst;
1317 struct inode_record *rec, *conflict;
1318 u64 current_ino = 0;
1322 if (--src_node->refs == 0)
1324 if (src_node->current)
1325 current_ino = src_node->current->ino;
1327 src = &src_node->root_cache;
1328 dst = &dst_node->root_cache;
1330 cache = search_cache_extent(src, 0);
1332 node = container_of(cache, struct ptr_node, cache);
1334 cache = next_cache_extent(cache);
1337 remove_cache_extent(src, &node->cache);
1340 ins = malloc(sizeof(*ins));
1342 ins->cache.start = node->cache.start;
1343 ins->cache.size = node->cache.size;
1347 ret = insert_cache_extent(dst, &ins->cache);
1348 if (ret == -EEXIST) {
1349 conflict = get_inode_rec(dst, rec->ino, 1);
1350 BUG_ON(IS_ERR(conflict));
1351 merge_inode_recs(rec, conflict, dst);
1353 conflict->checked = 1;
1354 if (dst_node->current == conflict)
1355 dst_node->current = NULL;
1357 maybe_free_inode_rec(dst, conflict);
1358 free_inode_rec(rec);
1365 if (src == &src_node->root_cache) {
1366 src = &src_node->inode_cache;
1367 dst = &dst_node->inode_cache;
1371 if (current_ino > 0 && (!dst_node->current ||
1372 current_ino > dst_node->current->ino)) {
1373 if (dst_node->current) {
1374 dst_node->current->checked = 1;
1375 maybe_free_inode_rec(dst, dst_node->current);
1377 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378 BUG_ON(IS_ERR(dst_node->current));
1383 static void free_inode_ptr(struct cache_extent *cache)
1385 struct ptr_node *node;
1386 struct inode_record *rec;
1388 node = container_of(cache, struct ptr_node, cache);
1390 free_inode_rec(rec);
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1399 struct cache_extent *cache;
1400 struct shared_node *node;
1402 cache = lookup_cache_extent(shared, bytenr, 1);
1404 node = container_of(cache, struct shared_node, cache);
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1413 struct shared_node *node;
1415 node = calloc(1, sizeof(*node));
1418 node->cache.start = bytenr;
1419 node->cache.size = 1;
1420 cache_tree_init(&node->root_cache);
1421 cache_tree_init(&node->inode_cache);
1424 ret = insert_cache_extent(shared, &node->cache);
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430 struct walk_control *wc, int level)
1432 struct shared_node *node;
1433 struct shared_node *dest;
1436 if (level == wc->active_node)
1439 BUG_ON(wc->active_node <= level);
1440 node = find_shared_node(&wc->shared, bytenr);
1442 ret = add_shared_node(&wc->shared, bytenr, refs);
1444 node = find_shared_node(&wc->shared, bytenr);
1445 wc->nodes[level] = node;
1446 wc->active_node = level;
1450 if (wc->root_level == wc->active_node &&
1451 btrfs_root_refs(&root->root_item) == 0) {
1452 if (--node->refs == 0) {
1453 free_inode_recs_tree(&node->root_cache);
1454 free_inode_recs_tree(&node->inode_cache);
1455 remove_cache_extent(&wc->shared, &node->cache);
1461 dest = wc->nodes[wc->active_node];
1462 splice_shared_node(node, dest);
1463 if (node->refs == 0) {
1464 remove_cache_extent(&wc->shared, &node->cache);
1470 static int leave_shared_node(struct btrfs_root *root,
1471 struct walk_control *wc, int level)
1473 struct shared_node *node;
1474 struct shared_node *dest;
1477 if (level == wc->root_level)
1480 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1484 BUG_ON(i >= BTRFS_MAX_LEVEL);
1486 node = wc->nodes[wc->active_node];
1487 wc->nodes[wc->active_node] = NULL;
1488 wc->active_node = i;
1490 dest = wc->nodes[wc->active_node];
1491 if (wc->active_node < wc->root_level ||
1492 btrfs_root_refs(&root->root_item) > 0) {
1493 BUG_ON(node->refs <= 1);
1494 splice_shared_node(node, dest);
1496 BUG_ON(node->refs < 2);
1505 * 1 - if the root with id child_root_id is a child of root parent_root_id
1506 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1507 * has other root(s) as parent(s)
1508 * 2 - if the root child_root_id doesn't have any parent roots
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1513 struct btrfs_path path;
1514 struct btrfs_key key;
1515 struct extent_buffer *leaf;
1519 btrfs_init_path(&path);
1521 key.objectid = parent_root_id;
1522 key.type = BTRFS_ROOT_REF_KEY;
1523 key.offset = child_root_id;
1524 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1528 btrfs_release_path(&path);
1532 key.objectid = child_root_id;
1533 key.type = BTRFS_ROOT_BACKREF_KEY;
1535 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1541 leaf = path.nodes[0];
1542 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1546 leaf = path.nodes[0];
1549 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550 if (key.objectid != child_root_id ||
1551 key.type != BTRFS_ROOT_BACKREF_KEY)
1556 if (key.offset == parent_root_id) {
1557 btrfs_release_path(&path);
1564 btrfs_release_path(&path);
1567 return has_parent ? 0 : 2;
1570 static int process_dir_item(struct extent_buffer *eb,
1571 int slot, struct btrfs_key *key,
1572 struct shared_node *active_node)
1582 struct btrfs_dir_item *di;
1583 struct inode_record *rec;
1584 struct cache_tree *root_cache;
1585 struct cache_tree *inode_cache;
1586 struct btrfs_key location;
1587 char namebuf[BTRFS_NAME_LEN];
1589 root_cache = &active_node->root_cache;
1590 inode_cache = &active_node->inode_cache;
1591 rec = active_node->current;
1592 rec->found_dir_item = 1;
1594 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595 total = btrfs_item_size_nr(eb, slot);
1596 while (cur < total) {
1598 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599 name_len = btrfs_dir_name_len(eb, di);
1600 data_len = btrfs_dir_data_len(eb, di);
1601 filetype = btrfs_dir_type(eb, di);
1603 rec->found_size += name_len;
1604 if (cur + sizeof(*di) + name_len > total ||
1605 name_len > BTRFS_NAME_LEN) {
1606 error = REF_ERR_NAME_TOO_LONG;
1608 if (cur + sizeof(*di) > total)
1610 len = min_t(u32, total - cur - sizeof(*di),
1617 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620 key->offset != btrfs_name_hash(namebuf, len)) {
1621 rec->errors |= I_ERR_ODD_DIR_ITEM;
1622 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623 key->objectid, key->offset, namebuf, len, filetype,
1624 key->offset, btrfs_name_hash(namebuf, len));
1627 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628 add_inode_backref(inode_cache, location.objectid,
1629 key->objectid, key->offset, namebuf,
1630 len, filetype, key->type, error);
1631 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632 add_inode_backref(root_cache, location.objectid,
1633 key->objectid, key->offset,
1634 namebuf, len, filetype,
1637 fprintf(stderr, "invalid location in dir item %u\n",
1639 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640 key->objectid, key->offset, namebuf,
1641 len, filetype, key->type, error);
1644 len = sizeof(*di) + name_len + data_len;
1645 di = (struct btrfs_dir_item *)((char *)di + len);
1648 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649 rec->errors |= I_ERR_DUP_DIR_INDEX;
1654 static int process_inode_ref(struct extent_buffer *eb,
1655 int slot, struct btrfs_key *key,
1656 struct shared_node *active_node)
1664 struct cache_tree *inode_cache;
1665 struct btrfs_inode_ref *ref;
1666 char namebuf[BTRFS_NAME_LEN];
1668 inode_cache = &active_node->inode_cache;
1670 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671 total = btrfs_item_size_nr(eb, slot);
1672 while (cur < total) {
1673 name_len = btrfs_inode_ref_name_len(eb, ref);
1674 index = btrfs_inode_ref_index(eb, ref);
1676 /* inode_ref + namelen should not cross item boundary */
1677 if (cur + sizeof(*ref) + name_len > total ||
1678 name_len > BTRFS_NAME_LEN) {
1679 if (total < cur + sizeof(*ref))
1682 /* Still try to read out the remaining part */
1683 len = min_t(u32, total - cur - sizeof(*ref),
1685 error = REF_ERR_NAME_TOO_LONG;
1691 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692 add_inode_backref(inode_cache, key->objectid, key->offset,
1693 index, namebuf, len, 0, key->type, error);
1695 len = sizeof(*ref) + name_len;
1696 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1702 static int process_inode_extref(struct extent_buffer *eb,
1703 int slot, struct btrfs_key *key,
1704 struct shared_node *active_node)
1713 struct cache_tree *inode_cache;
1714 struct btrfs_inode_extref *extref;
1715 char namebuf[BTRFS_NAME_LEN];
1717 inode_cache = &active_node->inode_cache;
1719 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720 total = btrfs_item_size_nr(eb, slot);
1721 while (cur < total) {
1722 name_len = btrfs_inode_extref_name_len(eb, extref);
1723 index = btrfs_inode_extref_index(eb, extref);
1724 parent = btrfs_inode_extref_parent(eb, extref);
1725 if (name_len <= BTRFS_NAME_LEN) {
1729 len = BTRFS_NAME_LEN;
1730 error = REF_ERR_NAME_TOO_LONG;
1732 read_extent_buffer(eb, namebuf,
1733 (unsigned long)(extref + 1), len);
1734 add_inode_backref(inode_cache, key->objectid, parent,
1735 index, namebuf, len, 0, key->type, error);
1737 len = sizeof(*extref) + name_len;
1738 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746 u64 len, u64 *found)
1748 struct btrfs_key key;
1749 struct btrfs_path path;
1750 struct extent_buffer *leaf;
1755 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1757 btrfs_init_path(&path);
1759 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761 key.type = BTRFS_EXTENT_CSUM_KEY;
1763 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1767 if (ret > 0 && path.slots[0] > 0) {
1768 leaf = path.nodes[0];
1769 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771 key.type == BTRFS_EXTENT_CSUM_KEY)
1776 leaf = path.nodes[0];
1777 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1783 leaf = path.nodes[0];
1786 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788 key.type != BTRFS_EXTENT_CSUM_KEY)
1791 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792 if (key.offset >= start + len)
1795 if (key.offset > start)
1798 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799 csum_end = key.offset + (size / csum_size) *
1800 root->fs_info->sectorsize;
1801 if (csum_end > start) {
1802 size = min(csum_end - start, len);
1811 btrfs_release_path(&path);
1817 static int process_file_extent(struct btrfs_root *root,
1818 struct extent_buffer *eb,
1819 int slot, struct btrfs_key *key,
1820 struct shared_node *active_node)
1822 struct inode_record *rec;
1823 struct btrfs_file_extent_item *fi;
1825 u64 disk_bytenr = 0;
1826 u64 extent_offset = 0;
1827 u64 mask = root->fs_info->sectorsize - 1;
1831 rec = active_node->current;
1832 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833 rec->found_file_extent = 1;
1835 if (rec->extent_start == (u64)-1) {
1836 rec->extent_start = key->offset;
1837 rec->extent_end = key->offset;
1840 if (rec->extent_end > key->offset)
1841 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842 else if (rec->extent_end < key->offset) {
1843 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844 key->offset - rec->extent_end);
1849 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850 extent_type = btrfs_file_extent_type(eb, fi);
1852 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856 rec->found_size += num_bytes;
1857 num_bytes = (num_bytes + mask) & ~mask;
1858 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862 extent_offset = btrfs_file_extent_offset(eb, fi);
1863 if (num_bytes == 0 || (num_bytes & mask))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (num_bytes + extent_offset >
1866 btrfs_file_extent_ram_bytes(eb, fi))
1867 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869 (btrfs_file_extent_compression(eb, fi) ||
1870 btrfs_file_extent_encryption(eb, fi) ||
1871 btrfs_file_extent_other_encoding(eb, fi)))
1872 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873 if (disk_bytenr > 0)
1874 rec->found_size += num_bytes;
1876 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1878 rec->extent_end = key->offset + num_bytes;
1881 * The data reloc tree will copy full extents into its inode and then
1882 * copy the corresponding csums. Because the extent it copied could be
1883 * a preallocated extent that hasn't been written to yet there may be no
1884 * csums to copy, ergo we won't have csums for our file extent. This is
1885 * ok so just don't bother checking csums if the inode belongs to the
1888 if (disk_bytenr > 0 &&
1889 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891 if (btrfs_file_extent_compression(eb, fi))
1892 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894 disk_bytenr += extent_offset;
1896 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1899 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901 rec->found_csum_item = 1;
1902 if (found < num_bytes)
1903 rec->some_csum_missing = 1;
1904 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913 struct walk_control *wc)
1915 struct btrfs_key key;
1919 struct cache_tree *inode_cache;
1920 struct shared_node *active_node;
1922 if (wc->root_level == wc->active_node &&
1923 btrfs_root_refs(&root->root_item) == 0)
1926 active_node = wc->nodes[wc->active_node];
1927 inode_cache = &active_node->inode_cache;
1928 nritems = btrfs_header_nritems(eb);
1929 for (i = 0; i < nritems; i++) {
1930 btrfs_item_key_to_cpu(eb, &key, i);
1932 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1937 if (active_node->current == NULL ||
1938 active_node->current->ino < key.objectid) {
1939 if (active_node->current) {
1940 active_node->current->checked = 1;
1941 maybe_free_inode_rec(inode_cache,
1942 active_node->current);
1944 active_node->current = get_inode_rec(inode_cache,
1946 BUG_ON(IS_ERR(active_node->current));
1949 case BTRFS_DIR_ITEM_KEY:
1950 case BTRFS_DIR_INDEX_KEY:
1951 ret = process_dir_item(eb, i, &key, active_node);
1953 case BTRFS_INODE_REF_KEY:
1954 ret = process_inode_ref(eb, i, &key, active_node);
1956 case BTRFS_INODE_EXTREF_KEY:
1957 ret = process_inode_extref(eb, i, &key, active_node);
1959 case BTRFS_INODE_ITEM_KEY:
1960 ret = process_inode_item(eb, i, &key, active_node);
1962 case BTRFS_EXTENT_DATA_KEY:
1963 ret = process_file_extent(root, eb, i, &key,
1974 u64 bytenr[BTRFS_MAX_LEVEL];
1975 u64 refs[BTRFS_MAX_LEVEL];
1976 int need_check[BTRFS_MAX_LEVEL];
1979 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1980 struct node_refs *nrefs, u64 level);
1981 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1982 unsigned int ext_ref);
1985 * Returns >0 Found error, not fatal, should continue
1986 * Returns <0 Fatal error, must exit the whole check
1987 * Returns 0 No errors found
1989 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1990 struct node_refs *nrefs, int *level, int ext_ref)
1992 struct extent_buffer *cur = path->nodes[0];
1993 struct btrfs_key key;
1997 int root_level = btrfs_header_level(root->node);
1999 int ret = 0; /* Final return value */
2000 int err = 0; /* Positive error bitmap */
2002 cur_bytenr = cur->start;
2004 /* skip to first inode item or the first inode number change */
2005 nritems = btrfs_header_nritems(cur);
2006 for (i = 0; i < nritems; i++) {
2007 btrfs_item_key_to_cpu(cur, &key, i);
2009 first_ino = key.objectid;
2010 if (key.type == BTRFS_INODE_ITEM_KEY ||
2011 (first_ino && first_ino != key.objectid))
2015 path->slots[0] = nritems;
2021 err |= check_inode_item(root, path, ext_ref);
2023 /* modify cur since check_inode_item may change path */
2024 cur = path->nodes[0];
2026 if (err & LAST_ITEM)
2029 /* still have inode items in thie leaf */
2030 if (cur->start == cur_bytenr)
2034 * we have switched to another leaf, above nodes may
2035 * have changed, here walk down the path, if a node
2036 * or leaf is shared, check whether we can skip this
2039 for (i = root_level; i >= 0; i--) {
2040 if (path->nodes[i]->start == nrefs->bytenr[i])
2043 ret = update_nodes_refs(root,
2044 path->nodes[i]->start,
2049 if (!nrefs->need_check[i]) {
2055 for (i = 0; i < *level; i++) {
2056 free_extent_buffer(path->nodes[i]);
2057 path->nodes[i] = NULL;
2066 static void reada_walk_down(struct btrfs_root *root,
2067 struct extent_buffer *node, int slot)
2069 struct btrfs_fs_info *fs_info = root->fs_info;
2076 level = btrfs_header_level(node);
2080 nritems = btrfs_header_nritems(node);
2081 for (i = slot; i < nritems; i++) {
2082 bytenr = btrfs_node_blockptr(node, i);
2083 ptr_gen = btrfs_node_ptr_generation(node, i);
2084 readahead_tree_block(fs_info, bytenr, ptr_gen);
2089 * Check the child node/leaf by the following condition:
2090 * 1. the first item key of the node/leaf should be the same with the one
2092 * 2. block in parent node should match the child node/leaf.
2093 * 3. generation of parent node and child's header should be consistent.
2095 * Or the child node/leaf pointed by the key in parent is not valid.
2097 * We hope to check leaf owner too, but since subvol may share leaves,
2098 * which makes leaf owner check not so strong, key check should be
2099 * sufficient enough for that case.
2101 static int check_child_node(struct extent_buffer *parent, int slot,
2102 struct extent_buffer *child)
2104 struct btrfs_key parent_key;
2105 struct btrfs_key child_key;
2108 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2109 if (btrfs_header_level(child) == 0)
2110 btrfs_item_key_to_cpu(child, &child_key, 0);
2112 btrfs_node_key_to_cpu(child, &child_key, 0);
2114 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2117 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2118 parent_key.objectid, parent_key.type, parent_key.offset,
2119 child_key.objectid, child_key.type, child_key.offset);
2121 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2123 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2124 btrfs_node_blockptr(parent, slot),
2125 btrfs_header_bytenr(child));
2127 if (btrfs_node_ptr_generation(parent, slot) !=
2128 btrfs_header_generation(child)) {
2130 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2131 btrfs_header_generation(child),
2132 btrfs_node_ptr_generation(parent, slot));
2138 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2139 * in every fs or file tree check. Here we find its all root ids, and only check
2140 * it in the fs or file tree which has the smallest root id.
2142 static int need_check(struct btrfs_root *root, struct ulist *roots)
2144 struct rb_node *node;
2145 struct ulist_node *u;
2147 if (roots->nnodes == 1)
2150 node = rb_first(&roots->root);
2151 u = rb_entry(node, struct ulist_node, rb_node);
2153 * current root id is not smallest, we skip it and let it be checked
2154 * in the fs or file tree who hash the smallest root id.
2156 if (root->objectid != u->val)
2163 * for a tree node or leaf, we record its reference count, so later if we still
2164 * process this node or leaf, don't need to compute its reference count again.
2166 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2167 struct node_refs *nrefs, u64 level)
2171 struct ulist *roots;
2173 if (nrefs->bytenr[level] != bytenr) {
2174 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2175 level, 1, &refs, NULL);
2179 nrefs->bytenr[level] = bytenr;
2180 nrefs->refs[level] = refs;
2182 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2187 check = need_check(root, roots);
2189 nrefs->need_check[level] = check;
2191 nrefs->need_check[level] = 1;
2198 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2199 struct walk_control *wc, int *level,
2200 struct node_refs *nrefs)
2202 enum btrfs_tree_block_status status;
2205 struct btrfs_fs_info *fs_info = root->fs_info;
2206 struct extent_buffer *next;
2207 struct extent_buffer *cur;
2211 WARN_ON(*level < 0);
2212 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2214 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2215 refs = nrefs->refs[*level];
2218 ret = btrfs_lookup_extent_info(NULL, root,
2219 path->nodes[*level]->start,
2220 *level, 1, &refs, NULL);
2225 nrefs->bytenr[*level] = path->nodes[*level]->start;
2226 nrefs->refs[*level] = refs;
2230 ret = enter_shared_node(root, path->nodes[*level]->start,
2238 while (*level >= 0) {
2239 WARN_ON(*level < 0);
2240 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2241 cur = path->nodes[*level];
2243 if (btrfs_header_level(cur) != *level)
2246 if (path->slots[*level] >= btrfs_header_nritems(cur))
2249 ret = process_one_leaf(root, cur, wc);
2254 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2255 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2257 if (bytenr == nrefs->bytenr[*level - 1]) {
2258 refs = nrefs->refs[*level - 1];
2260 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2261 *level - 1, 1, &refs, NULL);
2265 nrefs->bytenr[*level - 1] = bytenr;
2266 nrefs->refs[*level - 1] = refs;
2271 ret = enter_shared_node(root, bytenr, refs,
2274 path->slots[*level]++;
2279 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2280 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281 free_extent_buffer(next);
2282 reada_walk_down(root, cur, path->slots[*level]);
2283 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2284 if (!extent_buffer_uptodate(next)) {
2285 struct btrfs_key node_key;
2287 btrfs_node_key_to_cpu(path->nodes[*level],
2289 path->slots[*level]);
2290 btrfs_add_corrupt_extent_record(root->fs_info,
2292 path->nodes[*level]->start,
2293 root->fs_info->nodesize,
2300 ret = check_child_node(cur, path->slots[*level], next);
2302 free_extent_buffer(next);
2307 if (btrfs_is_leaf(next))
2308 status = btrfs_check_leaf(root, NULL, next);
2310 status = btrfs_check_node(root, NULL, next);
2311 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2312 free_extent_buffer(next);
2317 *level = *level - 1;
2318 free_extent_buffer(path->nodes[*level]);
2319 path->nodes[*level] = next;
2320 path->slots[*level] = 0;
2323 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2327 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2328 unsigned int ext_ref);
2331 * Returns >0 Found error, should continue
2332 * Returns <0 Fatal error, must exit the whole check
2333 * Returns 0 No errors found
2335 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2336 int *level, struct node_refs *nrefs, int ext_ref)
2338 enum btrfs_tree_block_status status;
2341 struct btrfs_fs_info *fs_info = root->fs_info;
2342 struct extent_buffer *next;
2343 struct extent_buffer *cur;
2346 WARN_ON(*level < 0);
2347 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2349 ret = update_nodes_refs(root, path->nodes[*level]->start,
2354 while (*level >= 0) {
2355 WARN_ON(*level < 0);
2356 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2357 cur = path->nodes[*level];
2359 if (btrfs_header_level(cur) != *level)
2362 if (path->slots[*level] >= btrfs_header_nritems(cur))
2364 /* Don't forgot to check leaf/node validation */
2366 ret = btrfs_check_leaf(root, NULL, cur);
2367 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2371 ret = process_one_leaf_v2(root, path, nrefs,
2373 cur = path->nodes[*level];
2376 ret = btrfs_check_node(root, NULL, cur);
2377 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2382 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2383 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2385 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2388 if (!nrefs->need_check[*level - 1]) {
2389 path->slots[*level]++;
2393 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2394 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2395 free_extent_buffer(next);
2396 reada_walk_down(root, cur, path->slots[*level]);
2397 next = read_tree_block(fs_info, bytenr, ptr_gen);
2398 if (!extent_buffer_uptodate(next)) {
2399 struct btrfs_key node_key;
2401 btrfs_node_key_to_cpu(path->nodes[*level],
2403 path->slots[*level]);
2404 btrfs_add_corrupt_extent_record(fs_info,
2406 path->nodes[*level]->start,
2414 ret = check_child_node(cur, path->slots[*level], next);
2418 if (btrfs_is_leaf(next))
2419 status = btrfs_check_leaf(root, NULL, next);
2421 status = btrfs_check_node(root, NULL, next);
2422 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2423 free_extent_buffer(next);
2428 *level = *level - 1;
2429 free_extent_buffer(path->nodes[*level]);
2430 path->nodes[*level] = next;
2431 path->slots[*level] = 0;
2436 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2437 struct walk_control *wc, int *level)
2440 struct extent_buffer *leaf;
2442 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2443 leaf = path->nodes[i];
2444 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2449 free_extent_buffer(path->nodes[*level]);
2450 path->nodes[*level] = NULL;
2451 BUG_ON(*level > wc->active_node);
2452 if (*level == wc->active_node)
2453 leave_shared_node(root, wc, *level);
2460 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2464 struct extent_buffer *leaf;
2466 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2467 leaf = path->nodes[i];
2468 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2473 free_extent_buffer(path->nodes[*level]);
2474 path->nodes[*level] = NULL;
2481 static int check_root_dir(struct inode_record *rec)
2483 struct inode_backref *backref;
2486 if (!rec->found_inode_item || rec->errors)
2488 if (rec->nlink != 1 || rec->found_link != 0)
2490 if (list_empty(&rec->backrefs))
2492 backref = to_inode_backref(rec->backrefs.next);
2493 if (!backref->found_inode_ref)
2495 if (backref->index != 0 || backref->namelen != 2 ||
2496 memcmp(backref->name, "..", 2))
2498 if (backref->found_dir_index || backref->found_dir_item)
2505 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2506 struct btrfs_root *root, struct btrfs_path *path,
2507 struct inode_record *rec)
2509 struct btrfs_inode_item *ei;
2510 struct btrfs_key key;
2513 key.objectid = rec->ino;
2514 key.type = BTRFS_INODE_ITEM_KEY;
2515 key.offset = (u64)-1;
2517 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2521 if (!path->slots[0]) {
2528 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2529 if (key.objectid != rec->ino) {
2534 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2535 struct btrfs_inode_item);
2536 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2537 btrfs_mark_buffer_dirty(path->nodes[0]);
2538 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2539 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2540 root->root_key.objectid);
2542 btrfs_release_path(path);
2546 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2547 struct btrfs_root *root,
2548 struct btrfs_path *path,
2549 struct inode_record *rec)
2553 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2554 btrfs_release_path(path);
2556 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2560 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2561 struct btrfs_root *root,
2562 struct btrfs_path *path,
2563 struct inode_record *rec)
2565 struct btrfs_inode_item *ei;
2566 struct btrfs_key key;
2569 key.objectid = rec->ino;
2570 key.type = BTRFS_INODE_ITEM_KEY;
2573 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2580 /* Since ret == 0, no need to check anything */
2581 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582 struct btrfs_inode_item);
2583 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2584 btrfs_mark_buffer_dirty(path->nodes[0]);
2585 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2586 printf("reset nbytes for ino %llu root %llu\n",
2587 rec->ino, root->root_key.objectid);
2589 btrfs_release_path(path);
2593 static int add_missing_dir_index(struct btrfs_root *root,
2594 struct cache_tree *inode_cache,
2595 struct inode_record *rec,
2596 struct inode_backref *backref)
2598 struct btrfs_path path;
2599 struct btrfs_trans_handle *trans;
2600 struct btrfs_dir_item *dir_item;
2601 struct extent_buffer *leaf;
2602 struct btrfs_key key;
2603 struct btrfs_disk_key disk_key;
2604 struct inode_record *dir_rec;
2605 unsigned long name_ptr;
2606 u32 data_size = sizeof(*dir_item) + backref->namelen;
2609 trans = btrfs_start_transaction(root, 1);
2611 return PTR_ERR(trans);
2613 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2614 (unsigned long long)rec->ino);
2616 btrfs_init_path(&path);
2617 key.objectid = backref->dir;
2618 key.type = BTRFS_DIR_INDEX_KEY;
2619 key.offset = backref->index;
2620 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2623 leaf = path.nodes[0];
2624 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2626 disk_key.objectid = cpu_to_le64(rec->ino);
2627 disk_key.type = BTRFS_INODE_ITEM_KEY;
2628 disk_key.offset = 0;
2630 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2631 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2632 btrfs_set_dir_data_len(leaf, dir_item, 0);
2633 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2634 name_ptr = (unsigned long)(dir_item + 1);
2635 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2636 btrfs_mark_buffer_dirty(leaf);
2637 btrfs_release_path(&path);
2638 btrfs_commit_transaction(trans, root);
2640 backref->found_dir_index = 1;
2641 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2642 BUG_ON(IS_ERR(dir_rec));
2645 dir_rec->found_size += backref->namelen;
2646 if (dir_rec->found_size == dir_rec->isize &&
2647 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2648 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2649 if (dir_rec->found_size != dir_rec->isize)
2650 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2655 static int delete_dir_index(struct btrfs_root *root,
2656 struct inode_backref *backref)
2658 struct btrfs_trans_handle *trans;
2659 struct btrfs_dir_item *di;
2660 struct btrfs_path path;
2663 trans = btrfs_start_transaction(root, 1);
2665 return PTR_ERR(trans);
2667 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2668 (unsigned long long)backref->dir,
2669 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2670 (unsigned long long)root->objectid);
2672 btrfs_init_path(&path);
2673 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2674 backref->name, backref->namelen,
2675 backref->index, -1);
2678 btrfs_release_path(&path);
2679 btrfs_commit_transaction(trans, root);
2686 ret = btrfs_del_item(trans, root, &path);
2688 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2690 btrfs_release_path(&path);
2691 btrfs_commit_transaction(trans, root);
2695 static int __create_inode_item(struct btrfs_trans_handle *trans,
2696 struct btrfs_root *root, u64 ino, u64 size,
2697 u64 nbytes, u64 nlink, u32 mode)
2699 struct btrfs_inode_item ii;
2700 time_t now = time(NULL);
2703 btrfs_set_stack_inode_size(&ii, size);
2704 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2705 btrfs_set_stack_inode_nlink(&ii, nlink);
2706 btrfs_set_stack_inode_mode(&ii, mode);
2707 btrfs_set_stack_inode_generation(&ii, trans->transid);
2708 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2709 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2710 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2711 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2712 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2713 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2714 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2716 ret = btrfs_insert_inode(trans, root, ino, &ii);
2719 warning("root %llu inode %llu recreating inode item, this may "
2720 "be incomplete, please check permissions and content after "
2721 "the fsck completes.\n", (unsigned long long)root->objectid,
2722 (unsigned long long)ino);
2727 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2728 struct btrfs_root *root, u64 ino,
2731 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2733 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2736 static int create_inode_item(struct btrfs_root *root,
2737 struct inode_record *rec, int root_dir)
2739 struct btrfs_trans_handle *trans;
2745 trans = btrfs_start_transaction(root, 1);
2746 if (IS_ERR(trans)) {
2747 ret = PTR_ERR(trans);
2751 nlink = root_dir ? 1 : rec->found_link;
2752 if (rec->found_dir_item) {
2753 if (rec->found_file_extent)
2754 fprintf(stderr, "root %llu inode %llu has both a dir "
2755 "item and extents, unsure if it is a dir or a "
2756 "regular file so setting it as a directory\n",
2757 (unsigned long long)root->objectid,
2758 (unsigned long long)rec->ino);
2759 mode = S_IFDIR | 0755;
2760 size = rec->found_size;
2761 } else if (!rec->found_dir_item) {
2762 size = rec->extent_end;
2763 mode = S_IFREG | 0755;
2766 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2768 btrfs_commit_transaction(trans, root);
2772 static int repair_inode_backrefs(struct btrfs_root *root,
2773 struct inode_record *rec,
2774 struct cache_tree *inode_cache,
2777 struct inode_backref *tmp, *backref;
2778 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2782 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2783 if (!delete && rec->ino == root_dirid) {
2784 if (!rec->found_inode_item) {
2785 ret = create_inode_item(root, rec, 1);
2792 /* Index 0 for root dir's are special, don't mess with it */
2793 if (rec->ino == root_dirid && backref->index == 0)
2797 ((backref->found_dir_index && !backref->found_inode_ref) ||
2798 (backref->found_dir_index && backref->found_inode_ref &&
2799 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2800 ret = delete_dir_index(root, backref);
2804 list_del(&backref->list);
2809 if (!delete && !backref->found_dir_index &&
2810 backref->found_dir_item && backref->found_inode_ref) {
2811 ret = add_missing_dir_index(root, inode_cache, rec,
2816 if (backref->found_dir_item &&
2817 backref->found_dir_index) {
2818 if (!backref->errors &&
2819 backref->found_inode_ref) {
2820 list_del(&backref->list);
2827 if (!delete && (!backref->found_dir_index &&
2828 !backref->found_dir_item &&
2829 backref->found_inode_ref)) {
2830 struct btrfs_trans_handle *trans;
2831 struct btrfs_key location;
2833 ret = check_dir_conflict(root, backref->name,
2839 * let nlink fixing routine to handle it,
2840 * which can do it better.
2845 location.objectid = rec->ino;
2846 location.type = BTRFS_INODE_ITEM_KEY;
2847 location.offset = 0;
2849 trans = btrfs_start_transaction(root, 1);
2850 if (IS_ERR(trans)) {
2851 ret = PTR_ERR(trans);
2854 fprintf(stderr, "adding missing dir index/item pair "
2856 (unsigned long long)rec->ino);
2857 ret = btrfs_insert_dir_item(trans, root, backref->name,
2859 backref->dir, &location,
2860 imode_to_type(rec->imode),
2863 btrfs_commit_transaction(trans, root);
2867 if (!delete && (backref->found_inode_ref &&
2868 backref->found_dir_index &&
2869 backref->found_dir_item &&
2870 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2871 !rec->found_inode_item)) {
2872 ret = create_inode_item(root, rec, 0);
2879 return ret ? ret : repaired;
2883 * To determine the file type for nlink/inode_item repair
2885 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2886 * Return -ENOENT if file type is not found.
2888 static int find_file_type(struct inode_record *rec, u8 *type)
2890 struct inode_backref *backref;
2892 /* For inode item recovered case */
2893 if (rec->found_inode_item) {
2894 *type = imode_to_type(rec->imode);
2898 list_for_each_entry(backref, &rec->backrefs, list) {
2899 if (backref->found_dir_index || backref->found_dir_item) {
2900 *type = backref->filetype;
2908 * To determine the file name for nlink repair
2910 * Return 0 if file name is found, set name and namelen.
2911 * Return -ENOENT if file name is not found.
2913 static int find_file_name(struct inode_record *rec,
2914 char *name, int *namelen)
2916 struct inode_backref *backref;
2918 list_for_each_entry(backref, &rec->backrefs, list) {
2919 if (backref->found_dir_index || backref->found_dir_item ||
2920 backref->found_inode_ref) {
2921 memcpy(name, backref->name, backref->namelen);
2922 *namelen = backref->namelen;
2929 /* Reset the nlink of the inode to the correct one */
2930 static int reset_nlink(struct btrfs_trans_handle *trans,
2931 struct btrfs_root *root,
2932 struct btrfs_path *path,
2933 struct inode_record *rec)
2935 struct inode_backref *backref;
2936 struct inode_backref *tmp;
2937 struct btrfs_key key;
2938 struct btrfs_inode_item *inode_item;
2941 /* We don't believe this either, reset it and iterate backref */
2942 rec->found_link = 0;
2944 /* Remove all backref including the valid ones */
2945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2946 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2947 backref->index, backref->name,
2948 backref->namelen, 0);
2952 /* remove invalid backref, so it won't be added back */
2953 if (!(backref->found_dir_index &&
2954 backref->found_dir_item &&
2955 backref->found_inode_ref)) {
2956 list_del(&backref->list);
2963 /* Set nlink to 0 */
2964 key.objectid = rec->ino;
2965 key.type = BTRFS_INODE_ITEM_KEY;
2967 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2974 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2975 struct btrfs_inode_item);
2976 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2977 btrfs_mark_buffer_dirty(path->nodes[0]);
2978 btrfs_release_path(path);
2981 * Add back valid inode_ref/dir_item/dir_index,
2982 * add_link() will handle the nlink inc, so new nlink must be correct
2984 list_for_each_entry(backref, &rec->backrefs, list) {
2985 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2986 backref->name, backref->namelen,
2987 backref->filetype, &backref->index, 1, 0);
2992 btrfs_release_path(path);
2996 static int get_highest_inode(struct btrfs_trans_handle *trans,
2997 struct btrfs_root *root,
2998 struct btrfs_path *path,
3001 struct btrfs_key key, found_key;
3004 btrfs_init_path(path);
3005 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3007 key.type = BTRFS_INODE_ITEM_KEY;
3008 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3010 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3011 path->slots[0] - 1);
3012 *highest_ino = found_key.objectid;
3015 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3017 btrfs_release_path(path);
3021 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3022 struct btrfs_root *root,
3023 struct btrfs_path *path,
3024 struct inode_record *rec)
3026 char *dir_name = "lost+found";
3027 char namebuf[BTRFS_NAME_LEN] = {0};
3032 int name_recovered = 0;
3033 int type_recovered = 0;
3037 * Get file name and type first before these invalid inode ref
3038 * are deleted by remove_all_invalid_backref()
3040 name_recovered = !find_file_name(rec, namebuf, &namelen);
3041 type_recovered = !find_file_type(rec, &type);
3043 if (!name_recovered) {
3044 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3045 rec->ino, rec->ino);
3046 namelen = count_digits(rec->ino);
3047 sprintf(namebuf, "%llu", rec->ino);
3050 if (!type_recovered) {
3051 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3053 type = BTRFS_FT_REG_FILE;
3057 ret = reset_nlink(trans, root, path, rec);
3060 "Failed to reset nlink for inode %llu: %s\n",
3061 rec->ino, strerror(-ret));
3065 if (rec->found_link == 0) {
3066 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3070 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3071 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3074 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3075 dir_name, strerror(-ret));
3078 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3079 namebuf, namelen, type, NULL, 1, 0);
3081 * Add ".INO" suffix several times to handle case where
3082 * "FILENAME.INO" is already taken by another file.
3084 while (ret == -EEXIST) {
3086 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3088 if (namelen + count_digits(rec->ino) + 1 >
3093 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3095 namelen += count_digits(rec->ino) + 1;
3096 ret = btrfs_add_link(trans, root, rec->ino,
3097 lost_found_ino, namebuf,
3098 namelen, type, NULL, 1, 0);
3102 "Failed to link the inode %llu to %s dir: %s\n",
3103 rec->ino, dir_name, strerror(-ret));
3107 * Just increase the found_link, don't actually add the
3108 * backref. This will make things easier and this inode
3109 * record will be freed after the repair is done.
3110 * So fsck will not report problem about this inode.
3113 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3114 namelen, namebuf, dir_name);
3116 printf("Fixed the nlink of inode %llu\n", rec->ino);
3119 * Clear the flag anyway, or we will loop forever for the same inode
3120 * as it will not be removed from the bad inode list and the dead loop
3123 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3124 btrfs_release_path(path);
3129 * Check if there is any normal(reg or prealloc) file extent for given
3131 * This is used to determine the file type when neither its dir_index/item or
3132 * inode_item exists.
3134 * This will *NOT* report error, if any error happens, just consider it does
3135 * not have any normal file extent.
3137 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3139 struct btrfs_path path;
3140 struct btrfs_key key;
3141 struct btrfs_key found_key;
3142 struct btrfs_file_extent_item *fi;
3146 btrfs_init_path(&path);
3148 key.type = BTRFS_EXTENT_DATA_KEY;
3151 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3156 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3157 ret = btrfs_next_leaf(root, &path);
3164 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3166 if (found_key.objectid != ino ||
3167 found_key.type != BTRFS_EXTENT_DATA_KEY)
3169 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3170 struct btrfs_file_extent_item);
3171 type = btrfs_file_extent_type(path.nodes[0], fi);
3172 if (type != BTRFS_FILE_EXTENT_INLINE) {
3178 btrfs_release_path(&path);
3182 static u32 btrfs_type_to_imode(u8 type)
3184 static u32 imode_by_btrfs_type[] = {
3185 [BTRFS_FT_REG_FILE] = S_IFREG,
3186 [BTRFS_FT_DIR] = S_IFDIR,
3187 [BTRFS_FT_CHRDEV] = S_IFCHR,
3188 [BTRFS_FT_BLKDEV] = S_IFBLK,
3189 [BTRFS_FT_FIFO] = S_IFIFO,
3190 [BTRFS_FT_SOCK] = S_IFSOCK,
3191 [BTRFS_FT_SYMLINK] = S_IFLNK,
3194 return imode_by_btrfs_type[(type)];
3197 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3198 struct btrfs_root *root,
3199 struct btrfs_path *path,
3200 struct inode_record *rec)
3204 int type_recovered = 0;
3207 printf("Trying to rebuild inode:%llu\n", rec->ino);
3209 type_recovered = !find_file_type(rec, &filetype);
3212 * Try to determine inode type if type not found.
3214 * For found regular file extent, it must be FILE.
3215 * For found dir_item/index, it must be DIR.
3217 * For undetermined one, use FILE as fallback.
3220 * 1. If found backref(inode_index/item is already handled) to it,
3222 * Need new inode-inode ref structure to allow search for that.
3224 if (!type_recovered) {
3225 if (rec->found_file_extent &&
3226 find_normal_file_extent(root, rec->ino)) {
3228 filetype = BTRFS_FT_REG_FILE;
3229 } else if (rec->found_dir_item) {
3231 filetype = BTRFS_FT_DIR;
3232 } else if (!list_empty(&rec->orphan_extents)) {
3234 filetype = BTRFS_FT_REG_FILE;
3236 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3239 filetype = BTRFS_FT_REG_FILE;
3243 ret = btrfs_new_inode(trans, root, rec->ino,
3244 mode | btrfs_type_to_imode(filetype));
3249 * Here inode rebuild is done, we only rebuild the inode item,
3250 * don't repair the nlink(like move to lost+found).
3251 * That is the job of nlink repair.
3253 * We just fill the record and return
3255 rec->found_dir_item = 1;
3256 rec->imode = mode | btrfs_type_to_imode(filetype);
3258 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3259 /* Ensure the inode_nlinks repair function will be called */
3260 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3265 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3266 struct btrfs_root *root,
3267 struct btrfs_path *path,
3268 struct inode_record *rec)
3270 struct orphan_data_extent *orphan;
3271 struct orphan_data_extent *tmp;
3274 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3276 * Check for conflicting file extents
3278 * Here we don't know whether the extents is compressed or not,
3279 * so we can only assume it not compressed nor data offset,
3280 * and use its disk_len as extent length.
3282 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3283 orphan->offset, orphan->disk_len, 0);
3284 btrfs_release_path(path);
3289 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3290 orphan->disk_bytenr, orphan->disk_len);
3291 ret = btrfs_free_extent(trans,
3292 root->fs_info->extent_root,
3293 orphan->disk_bytenr, orphan->disk_len,
3294 0, root->objectid, orphan->objectid,
3299 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3300 orphan->offset, orphan->disk_bytenr,
3301 orphan->disk_len, orphan->disk_len);
3305 /* Update file size info */
3306 rec->found_size += orphan->disk_len;
3307 if (rec->found_size == rec->nbytes)
3308 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3310 /* Update the file extent hole info too */
3311 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3315 if (RB_EMPTY_ROOT(&rec->holes))
3316 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3318 list_del(&orphan->list);
3321 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3326 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3327 struct btrfs_root *root,
3328 struct btrfs_path *path,
3329 struct inode_record *rec)
3331 struct rb_node *node;
3332 struct file_extent_hole *hole;
3336 node = rb_first(&rec->holes);
3340 hole = rb_entry(node, struct file_extent_hole, node);
3341 ret = btrfs_punch_hole(trans, root, rec->ino,
3342 hole->start, hole->len);
3345 ret = del_file_extent_hole(&rec->holes, hole->start,
3349 if (RB_EMPTY_ROOT(&rec->holes))
3350 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3351 node = rb_first(&rec->holes);
3353 /* special case for a file losing all its file extent */
3355 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3356 round_up(rec->isize,
3357 root->fs_info->sectorsize));
3361 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3362 rec->ino, root->objectid);
3367 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3369 struct btrfs_trans_handle *trans;
3370 struct btrfs_path path;
3373 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3374 I_ERR_NO_ORPHAN_ITEM |
3375 I_ERR_LINK_COUNT_WRONG |
3376 I_ERR_NO_INODE_ITEM |
3377 I_ERR_FILE_EXTENT_ORPHAN |
3378 I_ERR_FILE_EXTENT_DISCOUNT|
3379 I_ERR_FILE_NBYTES_WRONG)))
3383 * For nlink repair, it may create a dir and add link, so
3384 * 2 for parent(256)'s dir_index and dir_item
3385 * 2 for lost+found dir's inode_item and inode_ref
3386 * 1 for the new inode_ref of the file
3387 * 2 for lost+found dir's dir_index and dir_item for the file
3389 trans = btrfs_start_transaction(root, 7);
3391 return PTR_ERR(trans);
3393 btrfs_init_path(&path);
3394 if (rec->errors & I_ERR_NO_INODE_ITEM)
3395 ret = repair_inode_no_item(trans, root, &path, rec);
3396 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3397 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3398 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3399 ret = repair_inode_discount_extent(trans, root, &path, rec);
3400 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3401 ret = repair_inode_isize(trans, root, &path, rec);
3402 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3403 ret = repair_inode_orphan_item(trans, root, &path, rec);
3404 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3405 ret = repair_inode_nlinks(trans, root, &path, rec);
3406 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3407 ret = repair_inode_nbytes(trans, root, &path, rec);
3408 btrfs_commit_transaction(trans, root);
3409 btrfs_release_path(&path);
3413 static int check_inode_recs(struct btrfs_root *root,
3414 struct cache_tree *inode_cache)
3416 struct cache_extent *cache;
3417 struct ptr_node *node;
3418 struct inode_record *rec;
3419 struct inode_backref *backref;
3424 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3426 if (btrfs_root_refs(&root->root_item) == 0) {
3427 if (!cache_tree_empty(inode_cache))
3428 fprintf(stderr, "warning line %d\n", __LINE__);
3433 * We need to repair backrefs first because we could change some of the
3434 * errors in the inode recs.
3436 * We also need to go through and delete invalid backrefs first and then
3437 * add the correct ones second. We do this because we may get EEXIST
3438 * when adding back the correct index because we hadn't yet deleted the
3441 * For example, if we were missing a dir index then the directories
3442 * isize would be wrong, so if we fixed the isize to what we thought it
3443 * would be and then fixed the backref we'd still have a invalid fs, so
3444 * we need to add back the dir index and then check to see if the isize
3449 if (stage == 3 && !err)
3452 cache = search_cache_extent(inode_cache, 0);
3453 while (repair && cache) {
3454 node = container_of(cache, struct ptr_node, cache);
3456 cache = next_cache_extent(cache);
3458 /* Need to free everything up and rescan */
3460 remove_cache_extent(inode_cache, &node->cache);
3462 free_inode_rec(rec);
3466 if (list_empty(&rec->backrefs))
3469 ret = repair_inode_backrefs(root, rec, inode_cache,
3483 rec = get_inode_rec(inode_cache, root_dirid, 0);
3484 BUG_ON(IS_ERR(rec));
3486 ret = check_root_dir(rec);
3488 fprintf(stderr, "root %llu root dir %llu error\n",
3489 (unsigned long long)root->root_key.objectid,
3490 (unsigned long long)root_dirid);
3491 print_inode_error(root, rec);
3496 struct btrfs_trans_handle *trans;
3498 trans = btrfs_start_transaction(root, 1);
3499 if (IS_ERR(trans)) {
3500 err = PTR_ERR(trans);
3505 "root %llu missing its root dir, recreating\n",
3506 (unsigned long long)root->objectid);
3508 ret = btrfs_make_root_dir(trans, root, root_dirid);
3511 btrfs_commit_transaction(trans, root);
3515 fprintf(stderr, "root %llu root dir %llu not found\n",
3516 (unsigned long long)root->root_key.objectid,
3517 (unsigned long long)root_dirid);
3521 cache = search_cache_extent(inode_cache, 0);
3524 node = container_of(cache, struct ptr_node, cache);
3526 remove_cache_extent(inode_cache, &node->cache);
3528 if (rec->ino == root_dirid ||
3529 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3530 free_inode_rec(rec);
3534 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3535 ret = check_orphan_item(root, rec->ino);
3537 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3538 if (can_free_inode_rec(rec)) {
3539 free_inode_rec(rec);
3544 if (!rec->found_inode_item)
3545 rec->errors |= I_ERR_NO_INODE_ITEM;
3546 if (rec->found_link != rec->nlink)
3547 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3549 ret = try_repair_inode(root, rec);
3550 if (ret == 0 && can_free_inode_rec(rec)) {
3551 free_inode_rec(rec);
3557 if (!(repair && ret == 0))
3559 print_inode_error(root, rec);
3560 list_for_each_entry(backref, &rec->backrefs, list) {
3561 if (!backref->found_dir_item)
3562 backref->errors |= REF_ERR_NO_DIR_ITEM;
3563 if (!backref->found_dir_index)
3564 backref->errors |= REF_ERR_NO_DIR_INDEX;
3565 if (!backref->found_inode_ref)
3566 backref->errors |= REF_ERR_NO_INODE_REF;
3567 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3568 " namelen %u name %s filetype %d errors %x",
3569 (unsigned long long)backref->dir,
3570 (unsigned long long)backref->index,
3571 backref->namelen, backref->name,
3572 backref->filetype, backref->errors);
3573 print_ref_error(backref->errors);
3575 free_inode_rec(rec);
3577 return (error > 0) ? -1 : 0;
3580 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3583 struct cache_extent *cache;
3584 struct root_record *rec = NULL;
3587 cache = lookup_cache_extent(root_cache, objectid, 1);
3589 rec = container_of(cache, struct root_record, cache);
3591 rec = calloc(1, sizeof(*rec));
3593 return ERR_PTR(-ENOMEM);
3594 rec->objectid = objectid;
3595 INIT_LIST_HEAD(&rec->backrefs);
3596 rec->cache.start = objectid;
3597 rec->cache.size = 1;
3599 ret = insert_cache_extent(root_cache, &rec->cache);
3601 return ERR_PTR(-EEXIST);
3606 static struct root_backref *get_root_backref(struct root_record *rec,
3607 u64 ref_root, u64 dir, u64 index,
3608 const char *name, int namelen)
3610 struct root_backref *backref;
3612 list_for_each_entry(backref, &rec->backrefs, list) {
3613 if (backref->ref_root != ref_root || backref->dir != dir ||
3614 backref->namelen != namelen)
3616 if (memcmp(name, backref->name, namelen))
3621 backref = calloc(1, sizeof(*backref) + namelen + 1);
3624 backref->ref_root = ref_root;
3626 backref->index = index;
3627 backref->namelen = namelen;
3628 memcpy(backref->name, name, namelen);
3629 backref->name[namelen] = '\0';
3630 list_add_tail(&backref->list, &rec->backrefs);
3634 static void free_root_record(struct cache_extent *cache)
3636 struct root_record *rec;
3637 struct root_backref *backref;
3639 rec = container_of(cache, struct root_record, cache);
3640 while (!list_empty(&rec->backrefs)) {
3641 backref = to_root_backref(rec->backrefs.next);
3642 list_del(&backref->list);
3649 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3651 static int add_root_backref(struct cache_tree *root_cache,
3652 u64 root_id, u64 ref_root, u64 dir, u64 index,
3653 const char *name, int namelen,
3654 int item_type, int errors)
3656 struct root_record *rec;
3657 struct root_backref *backref;
3659 rec = get_root_rec(root_cache, root_id);
3660 BUG_ON(IS_ERR(rec));
3661 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3664 backref->errors |= errors;
3666 if (item_type != BTRFS_DIR_ITEM_KEY) {
3667 if (backref->found_dir_index || backref->found_back_ref ||
3668 backref->found_forward_ref) {
3669 if (backref->index != index)
3670 backref->errors |= REF_ERR_INDEX_UNMATCH;
3672 backref->index = index;
3676 if (item_type == BTRFS_DIR_ITEM_KEY) {
3677 if (backref->found_forward_ref)
3679 backref->found_dir_item = 1;
3680 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3681 backref->found_dir_index = 1;
3682 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3683 if (backref->found_forward_ref)
3684 backref->errors |= REF_ERR_DUP_ROOT_REF;
3685 else if (backref->found_dir_item)
3687 backref->found_forward_ref = 1;
3688 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3689 if (backref->found_back_ref)
3690 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3691 backref->found_back_ref = 1;
3696 if (backref->found_forward_ref && backref->found_dir_item)
3697 backref->reachable = 1;
3701 static int merge_root_recs(struct btrfs_root *root,
3702 struct cache_tree *src_cache,
3703 struct cache_tree *dst_cache)
3705 struct cache_extent *cache;
3706 struct ptr_node *node;
3707 struct inode_record *rec;
3708 struct inode_backref *backref;
3711 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3712 free_inode_recs_tree(src_cache);
3717 cache = search_cache_extent(src_cache, 0);
3720 node = container_of(cache, struct ptr_node, cache);
3722 remove_cache_extent(src_cache, &node->cache);
3725 ret = is_child_root(root, root->objectid, rec->ino);
3731 list_for_each_entry(backref, &rec->backrefs, list) {
3732 BUG_ON(backref->found_inode_ref);
3733 if (backref->found_dir_item)
3734 add_root_backref(dst_cache, rec->ino,
3735 root->root_key.objectid, backref->dir,
3736 backref->index, backref->name,
3737 backref->namelen, BTRFS_DIR_ITEM_KEY,
3739 if (backref->found_dir_index)
3740 add_root_backref(dst_cache, rec->ino,
3741 root->root_key.objectid, backref->dir,
3742 backref->index, backref->name,
3743 backref->namelen, BTRFS_DIR_INDEX_KEY,
3747 free_inode_rec(rec);
3754 static int check_root_refs(struct btrfs_root *root,
3755 struct cache_tree *root_cache)
3757 struct root_record *rec;
3758 struct root_record *ref_root;
3759 struct root_backref *backref;
3760 struct cache_extent *cache;
3766 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3767 BUG_ON(IS_ERR(rec));
3770 /* fixme: this can not detect circular references */
3773 cache = search_cache_extent(root_cache, 0);
3777 rec = container_of(cache, struct root_record, cache);
3778 cache = next_cache_extent(cache);
3780 if (rec->found_ref == 0)
3783 list_for_each_entry(backref, &rec->backrefs, list) {
3784 if (!backref->reachable)
3787 ref_root = get_root_rec(root_cache,
3789 BUG_ON(IS_ERR(ref_root));
3790 if (ref_root->found_ref > 0)
3793 backref->reachable = 0;
3795 if (rec->found_ref == 0)
3801 cache = search_cache_extent(root_cache, 0);
3805 rec = container_of(cache, struct root_record, cache);
3806 cache = next_cache_extent(cache);
3808 if (rec->found_ref == 0 &&
3809 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3810 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3811 ret = check_orphan_item(root->fs_info->tree_root,
3817 * If we don't have a root item then we likely just have
3818 * a dir item in a snapshot for this root but no actual
3819 * ref key or anything so it's meaningless.
3821 if (!rec->found_root_item)
3824 fprintf(stderr, "fs tree %llu not referenced\n",
3825 (unsigned long long)rec->objectid);
3829 if (rec->found_ref > 0 && !rec->found_root_item)
3831 list_for_each_entry(backref, &rec->backrefs, list) {
3832 if (!backref->found_dir_item)
3833 backref->errors |= REF_ERR_NO_DIR_ITEM;
3834 if (!backref->found_dir_index)
3835 backref->errors |= REF_ERR_NO_DIR_INDEX;
3836 if (!backref->found_back_ref)
3837 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3838 if (!backref->found_forward_ref)
3839 backref->errors |= REF_ERR_NO_ROOT_REF;
3840 if (backref->reachable && backref->errors)
3847 fprintf(stderr, "fs tree %llu refs %u %s\n",
3848 (unsigned long long)rec->objectid, rec->found_ref,
3849 rec->found_root_item ? "" : "not found");
3851 list_for_each_entry(backref, &rec->backrefs, list) {
3852 if (!backref->reachable)
3854 if (!backref->errors && rec->found_root_item)
3856 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3857 " index %llu namelen %u name %s errors %x\n",
3858 (unsigned long long)backref->ref_root,
3859 (unsigned long long)backref->dir,
3860 (unsigned long long)backref->index,
3861 backref->namelen, backref->name,
3863 print_ref_error(backref->errors);
3866 return errors > 0 ? 1 : 0;
3869 static int process_root_ref(struct extent_buffer *eb, int slot,
3870 struct btrfs_key *key,
3871 struct cache_tree *root_cache)
3877 struct btrfs_root_ref *ref;
3878 char namebuf[BTRFS_NAME_LEN];
3881 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3883 dirid = btrfs_root_ref_dirid(eb, ref);
3884 index = btrfs_root_ref_sequence(eb, ref);
3885 name_len = btrfs_root_ref_name_len(eb, ref);
3887 if (name_len <= BTRFS_NAME_LEN) {
3891 len = BTRFS_NAME_LEN;
3892 error = REF_ERR_NAME_TOO_LONG;
3894 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3896 if (key->type == BTRFS_ROOT_REF_KEY) {
3897 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3898 index, namebuf, len, key->type, error);
3900 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3901 index, namebuf, len, key->type, error);
3906 static void free_corrupt_block(struct cache_extent *cache)
3908 struct btrfs_corrupt_block *corrupt;
3910 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3914 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3917 * Repair the btree of the given root.
3919 * The fix is to remove the node key in corrupt_blocks cache_tree.
3920 * and rebalance the tree.
3921 * After the fix, the btree should be writeable.
3923 static int repair_btree(struct btrfs_root *root,
3924 struct cache_tree *corrupt_blocks)
3926 struct btrfs_trans_handle *trans;
3927 struct btrfs_path path;
3928 struct btrfs_corrupt_block *corrupt;
3929 struct cache_extent *cache;
3930 struct btrfs_key key;
3935 if (cache_tree_empty(corrupt_blocks))
3938 trans = btrfs_start_transaction(root, 1);
3939 if (IS_ERR(trans)) {
3940 ret = PTR_ERR(trans);
3941 fprintf(stderr, "Error starting transaction: %s\n",
3945 btrfs_init_path(&path);
3946 cache = first_cache_extent(corrupt_blocks);
3948 corrupt = container_of(cache, struct btrfs_corrupt_block,
3950 level = corrupt->level;
3951 path.lowest_level = level;
3952 key.objectid = corrupt->key.objectid;
3953 key.type = corrupt->key.type;
3954 key.offset = corrupt->key.offset;
3957 * Here we don't want to do any tree balance, since it may
3958 * cause a balance with corrupted brother leaf/node,
3959 * so ins_len set to 0 here.
3960 * Balance will be done after all corrupt node/leaf is deleted.
3962 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3965 offset = btrfs_node_blockptr(path.nodes[level],
3968 /* Remove the ptr */
3969 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3973 * Remove the corresponding extent
3974 * return value is not concerned.
3976 btrfs_release_path(&path);
3977 ret = btrfs_free_extent(trans, root, offset,
3978 root->fs_info->nodesize, 0,
3979 root->root_key.objectid, level - 1, 0);
3980 cache = next_cache_extent(cache);
3983 /* Balance the btree using btrfs_search_slot() */
3984 cache = first_cache_extent(corrupt_blocks);
3986 corrupt = container_of(cache, struct btrfs_corrupt_block,
3988 memcpy(&key, &corrupt->key, sizeof(key));
3989 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3992 /* return will always >0 since it won't find the item */
3994 btrfs_release_path(&path);
3995 cache = next_cache_extent(cache);
3998 btrfs_commit_transaction(trans, root);
3999 btrfs_release_path(&path);
4003 static int check_fs_root(struct btrfs_root *root,
4004 struct cache_tree *root_cache,
4005 struct walk_control *wc)
4011 struct btrfs_path path;
4012 struct shared_node root_node;
4013 struct root_record *rec;
4014 struct btrfs_root_item *root_item = &root->root_item;
4015 struct cache_tree corrupt_blocks;
4016 struct orphan_data_extent *orphan;
4017 struct orphan_data_extent *tmp;
4018 enum btrfs_tree_block_status status;
4019 struct node_refs nrefs;
4022 * Reuse the corrupt_block cache tree to record corrupted tree block
4024 * Unlike the usage in extent tree check, here we do it in a per
4025 * fs/subvol tree base.
4027 cache_tree_init(&corrupt_blocks);
4028 root->fs_info->corrupt_blocks = &corrupt_blocks;
4030 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4031 rec = get_root_rec(root_cache, root->root_key.objectid);
4032 BUG_ON(IS_ERR(rec));
4033 if (btrfs_root_refs(root_item) > 0)
4034 rec->found_root_item = 1;
4037 btrfs_init_path(&path);
4038 memset(&root_node, 0, sizeof(root_node));
4039 cache_tree_init(&root_node.root_cache);
4040 cache_tree_init(&root_node.inode_cache);
4041 memset(&nrefs, 0, sizeof(nrefs));
4043 /* Move the orphan extent record to corresponding inode_record */
4044 list_for_each_entry_safe(orphan, tmp,
4045 &root->orphan_data_extents, list) {
4046 struct inode_record *inode;
4048 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4050 BUG_ON(IS_ERR(inode));
4051 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4052 list_move(&orphan->list, &inode->orphan_extents);
4055 level = btrfs_header_level(root->node);
4056 memset(wc->nodes, 0, sizeof(wc->nodes));
4057 wc->nodes[level] = &root_node;
4058 wc->active_node = level;
4059 wc->root_level = level;
4061 /* We may not have checked the root block, lets do that now */
4062 if (btrfs_is_leaf(root->node))
4063 status = btrfs_check_leaf(root, NULL, root->node);
4065 status = btrfs_check_node(root, NULL, root->node);
4066 if (status != BTRFS_TREE_BLOCK_CLEAN)
4069 if (btrfs_root_refs(root_item) > 0 ||
4070 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4071 path.nodes[level] = root->node;
4072 extent_buffer_get(root->node);
4073 path.slots[level] = 0;
4075 struct btrfs_key key;
4076 struct btrfs_disk_key found_key;
4078 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4079 level = root_item->drop_level;
4080 path.lowest_level = level;
4081 if (level > btrfs_header_level(root->node) ||
4082 level >= BTRFS_MAX_LEVEL) {
4083 error("ignoring invalid drop level: %u", level);
4086 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4089 btrfs_node_key(path.nodes[level], &found_key,
4091 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4092 sizeof(found_key)));
4096 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4102 wret = walk_up_tree(root, &path, wc, &level);
4109 btrfs_release_path(&path);
4111 if (!cache_tree_empty(&corrupt_blocks)) {
4112 struct cache_extent *cache;
4113 struct btrfs_corrupt_block *corrupt;
4115 printf("The following tree block(s) is corrupted in tree %llu:\n",
4116 root->root_key.objectid);
4117 cache = first_cache_extent(&corrupt_blocks);
4119 corrupt = container_of(cache,
4120 struct btrfs_corrupt_block,
4122 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4123 cache->start, corrupt->level,
4124 corrupt->key.objectid, corrupt->key.type,
4125 corrupt->key.offset);
4126 cache = next_cache_extent(cache);
4129 printf("Try to repair the btree for root %llu\n",
4130 root->root_key.objectid);
4131 ret = repair_btree(root, &corrupt_blocks);
4133 fprintf(stderr, "Failed to repair btree: %s\n",
4136 printf("Btree for root %llu is fixed\n",
4137 root->root_key.objectid);
4141 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4145 if (root_node.current) {
4146 root_node.current->checked = 1;
4147 maybe_free_inode_rec(&root_node.inode_cache,
4151 err = check_inode_recs(root, &root_node.inode_cache);
4155 free_corrupt_blocks_tree(&corrupt_blocks);
4156 root->fs_info->corrupt_blocks = NULL;
4157 free_orphan_data_extents(&root->orphan_data_extents);
4161 static int fs_root_objectid(u64 objectid)
4163 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4164 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4166 return is_fstree(objectid);
4169 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4170 struct cache_tree *root_cache)
4172 struct btrfs_path path;
4173 struct btrfs_key key;
4174 struct walk_control wc;
4175 struct extent_buffer *leaf, *tree_node;
4176 struct btrfs_root *tmp_root;
4177 struct btrfs_root *tree_root = fs_info->tree_root;
4181 if (ctx.progress_enabled) {
4182 ctx.tp = TASK_FS_ROOTS;
4183 task_start(ctx.info);
4187 * Just in case we made any changes to the extent tree that weren't
4188 * reflected into the free space cache yet.
4191 reset_cached_block_groups(fs_info);
4192 memset(&wc, 0, sizeof(wc));
4193 cache_tree_init(&wc.shared);
4194 btrfs_init_path(&path);
4199 key.type = BTRFS_ROOT_ITEM_KEY;
4200 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4205 tree_node = tree_root->node;
4207 if (tree_node != tree_root->node) {
4208 free_root_recs_tree(root_cache);
4209 btrfs_release_path(&path);
4212 leaf = path.nodes[0];
4213 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4214 ret = btrfs_next_leaf(tree_root, &path);
4220 leaf = path.nodes[0];
4222 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4223 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4224 fs_root_objectid(key.objectid)) {
4225 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4226 tmp_root = btrfs_read_fs_root_no_cache(
4229 key.offset = (u64)-1;
4230 tmp_root = btrfs_read_fs_root(
4233 if (IS_ERR(tmp_root)) {
4237 ret = check_fs_root(tmp_root, root_cache, &wc);
4238 if (ret == -EAGAIN) {
4239 free_root_recs_tree(root_cache);
4240 btrfs_release_path(&path);
4245 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4246 btrfs_free_fs_root(tmp_root);
4247 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4248 key.type == BTRFS_ROOT_BACKREF_KEY) {
4249 process_root_ref(leaf, path.slots[0], &key,
4256 btrfs_release_path(&path);
4258 free_extent_cache_tree(&wc.shared);
4259 if (!cache_tree_empty(&wc.shared))
4260 fprintf(stderr, "warning line %d\n", __LINE__);
4262 task_stop(ctx.info);
4268 * Find the @index according by @ino and name.
4269 * Notice:time efficiency is O(N)
4271 * @root: the root of the fs/file tree
4272 * @index_ret: the index as return value
4273 * @namebuf: the name to match
4274 * @name_len: the length of name to match
4275 * @file_type: the file_type of INODE_ITEM to match
4277 * Returns 0 if found and *@index_ret will be modified with right value
4278 * Returns< 0 not found and *@index_ret will be (u64)-1
4280 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4281 u64 *index_ret, char *namebuf, u32 name_len,
4284 struct btrfs_path path;
4285 struct extent_buffer *node;
4286 struct btrfs_dir_item *di;
4287 struct btrfs_key key;
4288 struct btrfs_key location;
4289 char name[BTRFS_NAME_LEN] = {0};
4301 /* search from the last index */
4302 key.objectid = dirid;
4303 key.offset = (u64)-1;
4304 key.type = BTRFS_DIR_INDEX_KEY;
4306 btrfs_init_path(&path);
4307 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4312 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4315 *index_ret = (64)-1;
4318 /* Check whether inode_id/filetype/name match */
4319 node = path.nodes[0];
4320 slot = path.slots[0];
4321 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4322 total = btrfs_item_size_nr(node, slot);
4323 while (cur < total) {
4325 len = btrfs_dir_name_len(node, di);
4326 data_len = btrfs_dir_data_len(node, di);
4328 btrfs_dir_item_key_to_cpu(node, di, &location);
4329 if (location.objectid != location_id ||
4330 location.type != BTRFS_INODE_ITEM_KEY ||
4331 location.offset != 0)
4334 filetype = btrfs_dir_type(node, di);
4335 if (file_type != filetype)
4338 if (len > BTRFS_NAME_LEN)
4339 len = BTRFS_NAME_LEN;
4341 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4342 if (len != name_len || strncmp(namebuf, name, len))
4345 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4346 *index_ret = key.offset;
4350 len += sizeof(*di) + data_len;
4351 di = (struct btrfs_dir_item *)((char *)di + len);
4357 btrfs_release_path(&path);
4362 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4363 * INODE_REF/INODE_EXTREF match.
4365 * @root: the root of the fs/file tree
4366 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4367 * value while find index
4368 * @location_key: location key of the struct btrfs_dir_item to match
4369 * @name: the name to match
4370 * @namelen: the length of name
4371 * @file_type: the type of file to math
4373 * Return 0 if no error occurred.
4374 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4375 * DIR_ITEM/DIR_INDEX
4376 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4377 * and DIR_ITEM/DIR_INDEX mismatch
4379 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4380 struct btrfs_key *location_key, char *name,
4381 u32 namelen, u8 file_type)
4383 struct btrfs_path path;
4384 struct extent_buffer *node;
4385 struct btrfs_dir_item *di;
4386 struct btrfs_key location;
4387 char namebuf[BTRFS_NAME_LEN] = {0};
4396 /* get the index by traversing all index */
4397 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4398 ret = find_dir_index(root, key->objectid,
4399 location_key->objectid, &key->offset,
4400 name, namelen, file_type);
4402 ret = DIR_INDEX_MISSING;
4406 btrfs_init_path(&path);
4407 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4409 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4414 /* Check whether inode_id/filetype/name match */
4415 node = path.nodes[0];
4416 slot = path.slots[0];
4417 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4418 total = btrfs_item_size_nr(node, slot);
4419 while (cur < total) {
4420 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4421 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4423 len = btrfs_dir_name_len(node, di);
4424 data_len = btrfs_dir_data_len(node, di);
4426 btrfs_dir_item_key_to_cpu(node, di, &location);
4427 if (location.objectid != location_key->objectid ||
4428 location.type != location_key->type ||
4429 location.offset != location_key->offset)
4432 filetype = btrfs_dir_type(node, di);
4433 if (file_type != filetype)
4436 if (len > BTRFS_NAME_LEN) {
4437 len = BTRFS_NAME_LEN;
4438 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4440 key->type == BTRFS_DIR_ITEM_KEY ?
4441 "DIR_ITEM" : "DIR_INDEX",
4442 key->objectid, key->offset, len);
4444 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4446 if (len != namelen || strncmp(namebuf, name, len))
4452 len += sizeof(*di) + data_len;
4453 di = (struct btrfs_dir_item *)((char *)di + len);
4458 btrfs_release_path(&path);
4463 * Prints inode ref error message
4465 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4466 u64 index, const char *namebuf, int name_len,
4467 u8 filetype, int err)
4472 /* root dir error */
4473 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4475 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4476 root->objectid, key->objectid, key->offset, namebuf);
4481 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4482 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4483 root->objectid, key->offset,
4484 btrfs_name_hash(namebuf, name_len),
4485 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4487 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4488 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4489 root->objectid, key->offset, index,
4490 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4495 * Insert the missing inode item.
4497 * Returns 0 means success.
4498 * Returns <0 means error.
4500 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4503 struct btrfs_key key;
4504 struct btrfs_trans_handle *trans;
4505 struct btrfs_path path;
4509 key.type = BTRFS_INODE_ITEM_KEY;
4512 btrfs_init_path(&path);
4513 trans = btrfs_start_transaction(root, 1);
4514 if (IS_ERR(trans)) {
4519 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4520 if (ret < 0 || !ret)
4523 /* insert inode item */
4524 create_inode_item_lowmem(trans, root, ino, filetype);
4527 btrfs_commit_transaction(trans, root);
4530 error("failed to repair root %llu INODE ITEM[%llu] missing",
4531 root->objectid, ino);
4532 btrfs_release_path(&path);
4537 * The ternary means dir item, dir index and relative inode ref.
4538 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4539 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4541 * If two of three is missing or mismatched, delete the existing one.
4542 * If one of three is missing or mismatched, add the missing one.
4544 * returns 0 means success.
4545 * returns not 0 means on error;
4547 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4548 u64 index, char *name, int name_len, u8 filetype,
4551 struct btrfs_trans_handle *trans;
4556 * stage shall be one of following valild values:
4557 * 0: Fine, nothing to do.
4558 * 1: One of three is wrong, so add missing one.
4559 * 2: Two of three is wrong, so delete existed one.
4561 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4563 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4565 if (err & (INODE_REF_MISSING))
4568 /* stage must be smllarer than 3 */
4571 trans = btrfs_start_transaction(root, 1);
4573 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4578 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4579 filetype, &index, 1, 1);
4583 btrfs_commit_transaction(trans, root);
4586 error("fail to repair inode %llu name %s filetype %u",
4587 ino, name, filetype);
4589 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4590 stage == 2 ? "Delete" : "Add",
4591 ino, name, filetype);
4597 * Traverse the given INODE_REF and call find_dir_item() to find related
4598 * DIR_ITEM/DIR_INDEX.
4600 * @root: the root of the fs/file tree
4601 * @ref_key: the key of the INODE_REF
4602 * @path the path provides node and slot
4603 * @refs: the count of INODE_REF
4604 * @mode: the st_mode of INODE_ITEM
4605 * @name_ret: returns with the first ref's name
4606 * @name_len_ret: len of the name_ret
4608 * Return 0 if no error occurred.
4610 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4611 struct btrfs_path *path, char *name_ret,
4612 u32 *namelen_ret, u64 *refs_ret, int mode)
4614 struct btrfs_key key;
4615 struct btrfs_key location;
4616 struct btrfs_inode_ref *ref;
4617 struct extent_buffer *node;
4618 char namebuf[BTRFS_NAME_LEN] = {0};
4628 int need_research = 0;
4636 /* since after repair, path and the dir item may be changed */
4637 if (need_research) {
4639 btrfs_release_path(path);
4640 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4641 /* the item was deleted, let path point to the last checked item */
4643 if (path->slots[0] == 0)
4644 btrfs_prev_leaf(root, path);
4652 location.objectid = ref_key->objectid;
4653 location.type = BTRFS_INODE_ITEM_KEY;
4654 location.offset = 0;
4655 node = path->nodes[0];
4656 slot = path->slots[0];
4658 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4659 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4660 total = btrfs_item_size_nr(node, slot);
4663 /* Update inode ref count */
4666 index = btrfs_inode_ref_index(node, ref);
4667 name_len = btrfs_inode_ref_name_len(node, ref);
4669 if (name_len <= BTRFS_NAME_LEN) {
4672 len = BTRFS_NAME_LEN;
4673 warning("root %llu INODE_REF[%llu %llu] name too long",
4674 root->objectid, ref_key->objectid, ref_key->offset);
4677 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4679 /* copy the first name found to name_ret */
4680 if (refs == 1 && name_ret) {
4681 memcpy(name_ret, namebuf, len);
4685 /* Check root dir ref */
4686 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4687 if (index != 0 || len != strlen("..") ||
4688 strncmp("..", namebuf, len) ||
4689 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4690 /* set err bits then repair will delete the ref */
4691 err |= DIR_INDEX_MISSING;
4692 err |= DIR_ITEM_MISSING;
4697 /* Find related DIR_INDEX */
4698 key.objectid = ref_key->offset;
4699 key.type = BTRFS_DIR_INDEX_KEY;
4701 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4702 imode_to_type(mode));
4704 /* Find related dir_item */
4705 key.objectid = ref_key->offset;
4706 key.type = BTRFS_DIR_ITEM_KEY;
4707 key.offset = btrfs_name_hash(namebuf, len);
4708 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4709 imode_to_type(mode));
4711 if (tmp_err && repair) {
4712 ret = repair_ternary_lowmem(root, ref_key->offset,
4713 ref_key->objectid, index, namebuf,
4714 name_len, imode_to_type(mode),
4721 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4722 imode_to_type(mode), tmp_err);
4724 len = sizeof(*ref) + name_len;
4725 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4736 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4737 * DIR_ITEM/DIR_INDEX.
4739 * @root: the root of the fs/file tree
4740 * @ref_key: the key of the INODE_EXTREF
4741 * @refs: the count of INODE_EXTREF
4742 * @mode: the st_mode of INODE_ITEM
4744 * Return 0 if no error occurred.
4746 static int check_inode_extref(struct btrfs_root *root,
4747 struct btrfs_key *ref_key,
4748 struct extent_buffer *node, int slot, u64 *refs,
4751 struct btrfs_key key;
4752 struct btrfs_key location;
4753 struct btrfs_inode_extref *extref;
4754 char namebuf[BTRFS_NAME_LEN] = {0};
4764 location.objectid = ref_key->objectid;
4765 location.type = BTRFS_INODE_ITEM_KEY;
4766 location.offset = 0;
4768 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4769 total = btrfs_item_size_nr(node, slot);
4772 /* update inode ref count */
4774 name_len = btrfs_inode_extref_name_len(node, extref);
4775 index = btrfs_inode_extref_index(node, extref);
4776 parent = btrfs_inode_extref_parent(node, extref);
4777 if (name_len <= BTRFS_NAME_LEN) {
4780 len = BTRFS_NAME_LEN;
4781 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4782 root->objectid, ref_key->objectid, ref_key->offset);
4784 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4786 /* Check root dir ref name */
4787 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4788 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4789 root->objectid, ref_key->objectid, ref_key->offset,
4791 err |= ROOT_DIR_ERROR;
4794 /* find related dir_index */
4795 key.objectid = parent;
4796 key.type = BTRFS_DIR_INDEX_KEY;
4798 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4801 /* find related dir_item */
4802 key.objectid = parent;
4803 key.type = BTRFS_DIR_ITEM_KEY;
4804 key.offset = btrfs_name_hash(namebuf, len);
4805 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4808 len = sizeof(*extref) + name_len;
4809 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4819 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4820 * DIR_ITEM/DIR_INDEX match.
4821 * Return with @index_ret.
4823 * @root: the root of the fs/file tree
4824 * @key: the key of the INODE_REF/INODE_EXTREF
4825 * @name: the name in the INODE_REF/INODE_EXTREF
4826 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4827 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4828 * value (64)-1 means do not check index
4829 * @ext_ref: the EXTENDED_IREF feature
4831 * Return 0 if no error occurred.
4832 * Return >0 for error bitmap
4834 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4835 char *name, int namelen, u64 *index_ret,
4836 unsigned int ext_ref)
4838 struct btrfs_path path;
4839 struct btrfs_inode_ref *ref;
4840 struct btrfs_inode_extref *extref;
4841 struct extent_buffer *node;
4842 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4855 btrfs_init_path(&path);
4856 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4858 ret = INODE_REF_MISSING;
4862 node = path.nodes[0];
4863 slot = path.slots[0];
4865 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4866 total = btrfs_item_size_nr(node, slot);
4868 /* Iterate all entry of INODE_REF */
4869 while (cur < total) {
4870 ret = INODE_REF_MISSING;
4872 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4873 ref_index = btrfs_inode_ref_index(node, ref);
4874 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4877 if (cur + sizeof(*ref) + ref_namelen > total ||
4878 ref_namelen > BTRFS_NAME_LEN) {
4879 warning("root %llu INODE %s[%llu %llu] name too long",
4881 key->type == BTRFS_INODE_REF_KEY ?
4883 key->objectid, key->offset);
4885 if (cur + sizeof(*ref) > total)
4887 len = min_t(u32, total - cur - sizeof(*ref),
4893 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4896 if (len != namelen || strncmp(ref_namebuf, name, len))
4899 *index_ret = ref_index;
4903 len = sizeof(*ref) + ref_namelen;
4904 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4909 /* Skip if not support EXTENDED_IREF feature */
4913 btrfs_release_path(&path);
4914 btrfs_init_path(&path);
4916 dir_id = key->offset;
4917 key->type = BTRFS_INODE_EXTREF_KEY;
4918 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4920 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4922 ret = INODE_REF_MISSING;
4926 node = path.nodes[0];
4927 slot = path.slots[0];
4929 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4931 total = btrfs_item_size_nr(node, slot);
4933 /* Iterate all entry of INODE_EXTREF */
4934 while (cur < total) {
4935 ret = INODE_REF_MISSING;
4937 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4938 ref_index = btrfs_inode_extref_index(node, extref);
4939 parent = btrfs_inode_extref_parent(node, extref);
4940 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4943 if (parent != dir_id)
4946 if (ref_namelen <= BTRFS_NAME_LEN) {
4949 len = BTRFS_NAME_LEN;
4950 warning("root %llu INODE %s[%llu %llu] name too long",
4952 key->type == BTRFS_INODE_REF_KEY ?
4954 key->objectid, key->offset);
4956 read_extent_buffer(node, ref_namebuf,
4957 (unsigned long)(extref + 1), len);
4959 if (len != namelen || strncmp(ref_namebuf, name, len))
4962 *index_ret = ref_index;
4967 len = sizeof(*extref) + ref_namelen;
4968 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4973 btrfs_release_path(&path);
4977 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4978 u64 ino, u64 index, const char *namebuf,
4979 int name_len, u8 filetype, int err)
4981 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4982 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4983 root->objectid, key->objectid, key->offset, namebuf,
4985 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4988 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4989 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4990 root->objectid, key->objectid, index, namebuf, filetype,
4991 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4994 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4996 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4997 root->objectid, ino, index, namebuf, filetype,
4998 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5001 if (err & INODE_REF_MISSING)
5003 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5004 root->objectid, ino, key->objectid, namebuf, filetype);
5009 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5011 * Returns error after repair
5013 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5014 u64 index, u8 filetype, char *namebuf, u32 name_len,
5019 if (err & INODE_ITEM_MISSING) {
5020 ret = repair_inode_item_missing(root, ino, filetype);
5022 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5025 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5026 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5027 name_len, filetype, err);
5029 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5030 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5031 err &= ~(INODE_REF_MISSING);
5037 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5040 struct btrfs_key key;
5041 struct btrfs_path path;
5043 struct btrfs_dir_item *di;
5053 key.offset = (u64)-1;
5055 btrfs_init_path(&path);
5056 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5061 /* if found, go to spacial case */
5066 ret = btrfs_previous_item(root, &path, ino, type);
5074 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5076 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5078 while (cur < total) {
5079 len = btrfs_dir_name_len(path.nodes[0], di);
5080 if (len > BTRFS_NAME_LEN)
5081 len = BTRFS_NAME_LEN;
5084 len += btrfs_dir_data_len(path.nodes[0], di);
5086 di = (struct btrfs_dir_item *)((char *)di + len);
5092 btrfs_release_path(&path);
5096 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5103 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5107 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5111 *size = item_size + index_size;
5115 error("failed to count root %llu INODE[%llu] root size",
5116 root->objectid, ino);
5121 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5122 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5124 * @root: the root of the fs/file tree
5125 * @key: the key of the INODE_REF/INODE_EXTREF
5127 * @size: the st_size of the INODE_ITEM
5128 * @ext_ref: the EXTENDED_IREF feature
5130 * Return 0 if no error occurred.
5131 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5133 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5134 struct btrfs_path *path, u64 *size,
5135 unsigned int ext_ref)
5137 struct btrfs_dir_item *di;
5138 struct btrfs_inode_item *ii;
5139 struct btrfs_key key;
5140 struct btrfs_key location;
5141 struct extent_buffer *node;
5143 char namebuf[BTRFS_NAME_LEN] = {0};
5155 int need_research = 0;
5158 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5159 * ignore index check.
5161 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5162 index = di_key->offset;
5169 /* since after repair, path and the dir item may be changed */
5170 if (need_research) {
5172 err |= DIR_COUNT_AGAIN;
5173 btrfs_release_path(path);
5174 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5175 /* the item was deleted, let path point the last checked item */
5177 if (path->slots[0] == 0)
5178 btrfs_prev_leaf(root, path);
5186 node = path->nodes[0];
5187 slot = path->slots[0];
5189 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5190 total = btrfs_item_size_nr(node, slot);
5191 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5193 while (cur < total) {
5194 data_len = btrfs_dir_data_len(node, di);
5197 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5199 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5200 di_key->objectid, di_key->offset, data_len);
5202 name_len = btrfs_dir_name_len(node, di);
5203 if (name_len <= BTRFS_NAME_LEN) {
5206 len = BTRFS_NAME_LEN;
5207 warning("root %llu %s[%llu %llu] name too long",
5209 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5210 di_key->objectid, di_key->offset);
5212 (*size) += name_len;
5213 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5215 filetype = btrfs_dir_type(node, di);
5217 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5218 di_key->offset != btrfs_name_hash(namebuf, len)) {
5220 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5221 root->objectid, di_key->objectid, di_key->offset,
5222 namebuf, len, filetype, di_key->offset,
5223 btrfs_name_hash(namebuf, len));
5226 btrfs_dir_item_key_to_cpu(node, di, &location);
5227 /* Ignore related ROOT_ITEM check */
5228 if (location.type == BTRFS_ROOT_ITEM_KEY)
5231 btrfs_release_path(path);
5232 /* Check relative INODE_ITEM(existence/filetype) */
5233 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5235 tmp_err |= INODE_ITEM_MISSING;
5239 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5240 struct btrfs_inode_item);
5241 mode = btrfs_inode_mode(path->nodes[0], ii);
5242 if (imode_to_type(mode) != filetype) {
5243 tmp_err |= INODE_ITEM_MISMATCH;
5247 /* Check relative INODE_REF/INODE_EXTREF */
5248 key.objectid = location.objectid;
5249 key.type = BTRFS_INODE_REF_KEY;
5250 key.offset = di_key->objectid;
5251 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5254 /* check relative INDEX/ITEM */
5255 key.objectid = di_key->objectid;
5256 if (key.type == BTRFS_DIR_ITEM_KEY) {
5257 key.type = BTRFS_DIR_INDEX_KEY;
5260 key.type = BTRFS_DIR_ITEM_KEY;
5261 key.offset = btrfs_name_hash(namebuf, name_len);
5264 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5265 name_len, filetype);
5266 /* find_dir_item may find index */
5267 if (key.type == BTRFS_DIR_INDEX_KEY)
5271 if (tmp_err && repair) {
5272 ret = repair_dir_item(root, di_key->objectid,
5273 location.objectid, index,
5274 imode_to_type(mode), namebuf,
5276 if (ret != tmp_err) {
5281 btrfs_release_path(path);
5282 print_dir_item_err(root, di_key, location.objectid, index,
5283 namebuf, name_len, filetype, tmp_err);
5285 len = sizeof(*di) + name_len + data_len;
5286 di = (struct btrfs_dir_item *)((char *)di + len);
5289 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5290 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5291 root->objectid, di_key->objectid,
5298 btrfs_release_path(path);
5299 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5301 err |= ret > 0 ? -ENOENT : ret;
5306 * Check file extent datasum/hole, update the size of the file extents,
5307 * check and update the last offset of the file extent.
5309 * @root: the root of fs/file tree.
5310 * @fkey: the key of the file extent.
5311 * @nodatasum: INODE_NODATASUM feature.
5312 * @size: the sum of all EXTENT_DATA items size for this inode.
5313 * @end: the offset of the last extent.
5315 * Return 0 if no error occurred.
5317 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5318 struct extent_buffer *node, int slot,
5319 unsigned int nodatasum, u64 *size, u64 *end)
5321 struct btrfs_file_extent_item *fi;
5324 u64 extent_num_bytes;
5326 u64 csum_found; /* In byte size, sectorsize aligned */
5327 u64 search_start; /* Logical range start we search for csum */
5328 u64 search_len; /* Logical range len we search for csum */
5329 unsigned int extent_type;
5330 unsigned int is_hole;
5335 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5337 /* Check inline extent */
5338 extent_type = btrfs_file_extent_type(node, fi);
5339 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5340 struct btrfs_item *e = btrfs_item_nr(slot);
5341 u32 item_inline_len;
5343 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5344 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5345 compressed = btrfs_file_extent_compression(node, fi);
5346 if (extent_num_bytes == 0) {
5348 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5349 root->objectid, fkey->objectid, fkey->offset);
5350 err |= FILE_EXTENT_ERROR;
5352 if (!compressed && extent_num_bytes != item_inline_len) {
5354 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5355 root->objectid, fkey->objectid, fkey->offset,
5356 extent_num_bytes, item_inline_len);
5357 err |= FILE_EXTENT_ERROR;
5359 *end += extent_num_bytes;
5360 *size += extent_num_bytes;
5364 /* Check extent type */
5365 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5366 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5367 err |= FILE_EXTENT_ERROR;
5368 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5369 root->objectid, fkey->objectid, fkey->offset);
5373 /* Check REG_EXTENT/PREALLOC_EXTENT */
5374 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5375 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5376 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5377 extent_offset = btrfs_file_extent_offset(node, fi);
5378 compressed = btrfs_file_extent_compression(node, fi);
5379 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5382 * Check EXTENT_DATA csum
5384 * For plain (uncompressed) extent, we should only check the range
5385 * we're referring to, as it's possible that part of prealloc extent
5386 * has been written, and has csum:
5388 * |<--- Original large preallocated extent A ---->|
5389 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5392 * For compressed extent, we should check the whole range.
5395 search_start = disk_bytenr + extent_offset;
5396 search_len = extent_num_bytes;
5398 search_start = disk_bytenr;
5399 search_len = disk_num_bytes;
5401 ret = count_csum_range(root, search_start, search_len, &csum_found);
5402 if (csum_found > 0 && nodatasum) {
5403 err |= ODD_CSUM_ITEM;
5404 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5405 root->objectid, fkey->objectid, fkey->offset);
5406 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5407 !is_hole && (ret < 0 || csum_found < search_len)) {
5408 err |= CSUM_ITEM_MISSING;
5409 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5410 root->objectid, fkey->objectid, fkey->offset,
5411 csum_found, search_len);
5412 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5413 err |= ODD_CSUM_ITEM;
5414 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5415 root->objectid, fkey->objectid, fkey->offset, csum_found);
5418 /* Check EXTENT_DATA hole */
5419 if (!no_holes && *end != fkey->offset) {
5420 err |= FILE_EXTENT_ERROR;
5421 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5422 root->objectid, fkey->objectid, fkey->offset);
5425 *end += extent_num_bytes;
5427 *size += extent_num_bytes;
5433 * Set inode item nbytes to @nbytes
5435 * Returns 0 on success
5436 * Returns != 0 on error
5438 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5439 struct btrfs_path *path,
5440 u64 ino, u64 nbytes)
5442 struct btrfs_trans_handle *trans;
5443 struct btrfs_inode_item *ii;
5444 struct btrfs_key key;
5445 struct btrfs_key research_key;
5449 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5452 key.type = BTRFS_INODE_ITEM_KEY;
5455 trans = btrfs_start_transaction(root, 1);
5456 if (IS_ERR(trans)) {
5457 ret = PTR_ERR(trans);
5462 btrfs_release_path(path);
5463 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5471 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5472 struct btrfs_inode_item);
5473 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5474 btrfs_mark_buffer_dirty(path->nodes[0]);
5476 btrfs_commit_transaction(trans, root);
5479 error("failed to set nbytes in inode %llu root %llu",
5480 ino, root->root_key.objectid);
5482 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5483 root->root_key.objectid, nbytes);
5486 btrfs_release_path(path);
5487 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5494 * Set directory inode isize to @isize.
5496 * Returns 0 on success.
5497 * Returns != 0 on error.
5499 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5500 struct btrfs_path *path,
5503 struct btrfs_trans_handle *trans;
5504 struct btrfs_inode_item *ii;
5505 struct btrfs_key key;
5506 struct btrfs_key research_key;
5510 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5513 key.type = BTRFS_INODE_ITEM_KEY;
5516 trans = btrfs_start_transaction(root, 1);
5517 if (IS_ERR(trans)) {
5518 ret = PTR_ERR(trans);
5523 btrfs_release_path(path);
5524 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5532 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5533 struct btrfs_inode_item);
5534 btrfs_set_inode_size(path->nodes[0], ii, isize);
5535 btrfs_mark_buffer_dirty(path->nodes[0]);
5537 btrfs_commit_transaction(trans, root);
5540 error("failed to set isize in inode %llu root %llu",
5541 ino, root->root_key.objectid);
5543 printf("Set isize in inode %llu root %llu to %llu\n",
5544 ino, root->root_key.objectid, isize);
5546 btrfs_release_path(path);
5547 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5554 * Wrapper function for btrfs_add_orphan_item().
5556 * Returns 0 on success.
5557 * Returns != 0 on error.
5559 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5560 struct btrfs_path *path, u64 ino)
5562 struct btrfs_trans_handle *trans;
5563 struct btrfs_key research_key;
5567 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5569 trans = btrfs_start_transaction(root, 1);
5570 if (IS_ERR(trans)) {
5571 ret = PTR_ERR(trans);
5576 btrfs_release_path(path);
5577 ret = btrfs_add_orphan_item(trans, root, path, ino);
5579 btrfs_commit_transaction(trans, root);
5582 error("failed to add inode %llu as orphan item root %llu",
5583 ino, root->root_key.objectid);
5585 printf("Added inode %llu as orphan item root %llu\n",
5586 ino, root->root_key.objectid);
5588 btrfs_release_path(path);
5589 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5596 * Check INODE_ITEM and related ITEMs (the same inode number)
5597 * 1. check link count
5598 * 2. check inode ref/extref
5599 * 3. check dir item/index
5601 * @ext_ref: the EXTENDED_IREF feature
5603 * Return 0 if no error occurred.
5604 * Return >0 for error or hit the traversal is done(by error bitmap)
5606 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5607 unsigned int ext_ref)
5609 struct extent_buffer *node;
5610 struct btrfs_inode_item *ii;
5611 struct btrfs_key key;
5620 u64 extent_size = 0;
5622 unsigned int nodatasum;
5626 char namebuf[BTRFS_NAME_LEN] = {0};
5629 node = path->nodes[0];
5630 slot = path->slots[0];
5632 btrfs_item_key_to_cpu(node, &key, slot);
5633 inode_id = key.objectid;
5635 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5636 ret = btrfs_next_item(root, path);
5642 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5643 isize = btrfs_inode_size(node, ii);
5644 nbytes = btrfs_inode_nbytes(node, ii);
5645 mode = btrfs_inode_mode(node, ii);
5646 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5647 nlink = btrfs_inode_nlink(node, ii);
5648 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5651 ret = btrfs_next_item(root, path);
5653 /* out will fill 'err' rusing current statistics */
5655 } else if (ret > 0) {
5660 node = path->nodes[0];
5661 slot = path->slots[0];
5662 btrfs_item_key_to_cpu(node, &key, slot);
5663 if (key.objectid != inode_id)
5667 case BTRFS_INODE_REF_KEY:
5668 ret = check_inode_ref(root, &key, path, namebuf,
5669 &name_len, &refs, mode);
5672 case BTRFS_INODE_EXTREF_KEY:
5673 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5674 warning("root %llu EXTREF[%llu %llu] isn't supported",
5675 root->objectid, key.objectid,
5677 ret = check_inode_extref(root, &key, node, slot, &refs,
5681 case BTRFS_DIR_ITEM_KEY:
5682 case BTRFS_DIR_INDEX_KEY:
5684 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5685 root->objectid, inode_id,
5686 imode_to_type(mode), key.objectid,
5689 ret = check_dir_item(root, &key, path, &size, ext_ref);
5692 case BTRFS_EXTENT_DATA_KEY:
5694 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5695 root->objectid, inode_id, key.objectid,
5698 ret = check_file_extent(root, &key, node, slot,
5699 nodatasum, &extent_size,
5703 case BTRFS_XATTR_ITEM_KEY:
5706 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5707 key.objectid, key.type, key.offset);
5712 /* verify INODE_ITEM nlink/isize/nbytes */
5714 if (repair && (err & DIR_COUNT_AGAIN)) {
5715 err &= ~DIR_COUNT_AGAIN;
5716 count_dir_isize(root, inode_id, &size);
5719 err |= LINK_COUNT_ERROR;
5720 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5721 root->objectid, inode_id, nlink);
5725 * Just a warning, as dir inode nbytes is just an
5726 * instructive value.
5728 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5729 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5730 root->objectid, inode_id,
5731 root->fs_info->nodesize);
5734 if (isize != size) {
5736 ret = repair_dir_isize_lowmem(root, path,
5738 if (!repair || ret) {
5741 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5742 root->objectid, inode_id, isize, size);
5746 if (nlink != refs) {
5747 err |= LINK_COUNT_ERROR;
5748 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5749 root->objectid, inode_id, nlink, refs);
5750 } else if (!nlink) {
5752 ret = repair_inode_orphan_item_lowmem(root,
5754 if (!repair || ret) {
5756 error("root %llu INODE[%llu] is orphan item",
5757 root->objectid, inode_id);
5761 if (!nbytes && !no_holes && extent_end < isize) {
5762 err |= NBYTES_ERROR;
5763 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5764 root->objectid, inode_id, isize);
5767 if (nbytes != extent_size) {
5769 ret = repair_inode_nbytes_lowmem(root, path,
5770 inode_id, extent_size);
5771 if (!repair || ret) {
5772 err |= NBYTES_ERROR;
5774 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5775 root->objectid, inode_id, nbytes,
5785 * Insert the missing inode item and inode ref.
5787 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5788 * Root dir should be handled specially because root dir is the root of fs.
5790 * returns err (>0 or 0) after repair
5792 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5794 struct btrfs_trans_handle *trans;
5795 struct btrfs_key key;
5796 struct btrfs_path path;
5797 int filetype = BTRFS_FT_DIR;
5800 btrfs_init_path(&path);
5802 if (err & INODE_REF_MISSING) {
5803 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5804 key.type = BTRFS_INODE_REF_KEY;
5805 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5807 trans = btrfs_start_transaction(root, 1);
5808 if (IS_ERR(trans)) {
5809 ret = PTR_ERR(trans);
5813 btrfs_release_path(&path);
5814 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5818 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5819 BTRFS_FIRST_FREE_OBJECTID,
5820 BTRFS_FIRST_FREE_OBJECTID, 0);
5824 printf("Add INODE_REF[%llu %llu] name %s\n",
5825 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5827 err &= ~INODE_REF_MISSING;
5830 error("fail to insert first inode's ref");
5831 btrfs_commit_transaction(trans, root);
5834 if (err & INODE_ITEM_MISSING) {
5835 ret = repair_inode_item_missing(root,
5836 BTRFS_FIRST_FREE_OBJECTID, filetype);
5839 err &= ~INODE_ITEM_MISSING;
5843 error("fail to repair first inode");
5844 btrfs_release_path(&path);
5849 * check first root dir's inode_item and inode_ref
5851 * returns 0 means no error
5852 * returns >0 means error
5853 * returns <0 means fatal error
5855 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5857 struct btrfs_path path;
5858 struct btrfs_key key;
5859 struct btrfs_inode_item *ii;
5865 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5866 key.type = BTRFS_INODE_ITEM_KEY;
5869 /* For root being dropped, we don't need to check first inode */
5870 if (btrfs_root_refs(&root->root_item) == 0 &&
5871 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5872 BTRFS_FIRST_FREE_OBJECTID)
5875 btrfs_init_path(&path);
5876 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5881 err |= INODE_ITEM_MISSING;
5883 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5884 struct btrfs_inode_item);
5885 mode = btrfs_inode_mode(path.nodes[0], ii);
5886 if (imode_to_type(mode) != BTRFS_FT_DIR)
5887 err |= INODE_ITEM_MISMATCH;
5890 /* lookup first inode ref */
5891 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5892 key.type = BTRFS_INODE_REF_KEY;
5893 /* special index value */
5896 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5902 btrfs_release_path(&path);
5905 err = repair_fs_first_inode(root, err);
5907 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5908 error("root dir INODE_ITEM is %s",
5909 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5910 if (err & INODE_REF_MISSING)
5911 error("root dir INODE_REF is missing");
5913 return ret < 0 ? ret : err;
5916 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5917 u64 parent, u64 root)
5919 struct rb_node *node;
5920 struct tree_backref *back = NULL;
5921 struct tree_backref match = {
5928 match.parent = parent;
5929 match.node.full_backref = 1;
5934 node = rb_search(&rec->backref_tree, &match.node.node,
5935 (rb_compare_keys)compare_extent_backref, NULL);
5937 back = to_tree_backref(rb_node_to_extent_backref(node));
5942 static struct data_backref *find_data_backref(struct extent_record *rec,
5943 u64 parent, u64 root,
5944 u64 owner, u64 offset,
5946 u64 disk_bytenr, u64 bytes)
5948 struct rb_node *node;
5949 struct data_backref *back = NULL;
5950 struct data_backref match = {
5957 .found_ref = found_ref,
5958 .disk_bytenr = disk_bytenr,
5962 match.parent = parent;
5963 match.node.full_backref = 1;
5968 node = rb_search(&rec->backref_tree, &match.node.node,
5969 (rb_compare_keys)compare_extent_backref, NULL);
5971 back = to_data_backref(rb_node_to_extent_backref(node));
5976 * Iterate all item on the tree and call check_inode_item() to check.
5978 * @root: the root of the tree to be checked.
5979 * @ext_ref: the EXTENDED_IREF feature
5981 * Return 0 if no error found.
5982 * Return <0 for error.
5984 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5986 struct btrfs_path path;
5987 struct node_refs nrefs;
5988 struct btrfs_root_item *root_item = &root->root_item;
5994 * We need to manually check the first inode item(256)
5995 * As the following traversal function will only start from
5996 * the first inode item in the leaf, if inode item(256) is missing
5997 * we will just skip it forever.
5999 ret = check_fs_first_inode(root, ext_ref);
6004 memset(&nrefs, 0, sizeof(nrefs));
6005 level = btrfs_header_level(root->node);
6006 btrfs_init_path(&path);
6008 if (btrfs_root_refs(root_item) > 0 ||
6009 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6010 path.nodes[level] = root->node;
6011 path.slots[level] = 0;
6012 extent_buffer_get(root->node);
6014 struct btrfs_key key;
6016 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6017 level = root_item->drop_level;
6018 path.lowest_level = level;
6019 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6026 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
6029 /* if ret is negative, walk shall stop */
6035 ret = walk_up_tree_v2(root, &path, &level);
6037 /* Normal exit, reset ret to err */
6044 btrfs_release_path(&path);
6049 * Find the relative ref for root_ref and root_backref.
6051 * @root: the root of the root tree.
6052 * @ref_key: the key of the root ref.
6054 * Return 0 if no error occurred.
6056 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6057 struct extent_buffer *node, int slot)
6059 struct btrfs_path path;
6060 struct btrfs_key key;
6061 struct btrfs_root_ref *ref;
6062 struct btrfs_root_ref *backref;
6063 char ref_name[BTRFS_NAME_LEN] = {0};
6064 char backref_name[BTRFS_NAME_LEN] = {0};
6070 u32 backref_namelen;
6075 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6076 ref_dirid = btrfs_root_ref_dirid(node, ref);
6077 ref_seq = btrfs_root_ref_sequence(node, ref);
6078 ref_namelen = btrfs_root_ref_name_len(node, ref);
6080 if (ref_namelen <= BTRFS_NAME_LEN) {
6083 len = BTRFS_NAME_LEN;
6084 warning("%s[%llu %llu] ref_name too long",
6085 ref_key->type == BTRFS_ROOT_REF_KEY ?
6086 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6089 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6091 /* Find relative root_ref */
6092 key.objectid = ref_key->offset;
6093 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6094 key.offset = ref_key->objectid;
6096 btrfs_init_path(&path);
6097 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6099 err |= ROOT_REF_MISSING;
6100 error("%s[%llu %llu] couldn't find relative ref",
6101 ref_key->type == BTRFS_ROOT_REF_KEY ?
6102 "ROOT_REF" : "ROOT_BACKREF",
6103 ref_key->objectid, ref_key->offset);
6107 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6108 struct btrfs_root_ref);
6109 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6110 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6111 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6113 if (backref_namelen <= BTRFS_NAME_LEN) {
6114 len = backref_namelen;
6116 len = BTRFS_NAME_LEN;
6117 warning("%s[%llu %llu] ref_name too long",
6118 key.type == BTRFS_ROOT_REF_KEY ?
6119 "ROOT_REF" : "ROOT_BACKREF",
6120 key.objectid, key.offset);
6122 read_extent_buffer(path.nodes[0], backref_name,
6123 (unsigned long)(backref + 1), len);
6125 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6126 ref_namelen != backref_namelen ||
6127 strncmp(ref_name, backref_name, len)) {
6128 err |= ROOT_REF_MISMATCH;
6129 error("%s[%llu %llu] mismatch relative ref",
6130 ref_key->type == BTRFS_ROOT_REF_KEY ?
6131 "ROOT_REF" : "ROOT_BACKREF",
6132 ref_key->objectid, ref_key->offset);
6135 btrfs_release_path(&path);
6140 * Check all fs/file tree in low_memory mode.
6142 * 1. for fs tree root item, call check_fs_root_v2()
6143 * 2. for fs tree root ref/backref, call check_root_ref()
6145 * Return 0 if no error occurred.
6147 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6149 struct btrfs_root *tree_root = fs_info->tree_root;
6150 struct btrfs_root *cur_root = NULL;
6151 struct btrfs_path path;
6152 struct btrfs_key key;
6153 struct extent_buffer *node;
6154 unsigned int ext_ref;
6159 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6161 btrfs_init_path(&path);
6162 key.objectid = BTRFS_FS_TREE_OBJECTID;
6164 key.type = BTRFS_ROOT_ITEM_KEY;
6166 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6170 } else if (ret > 0) {
6176 node = path.nodes[0];
6177 slot = path.slots[0];
6178 btrfs_item_key_to_cpu(node, &key, slot);
6179 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6181 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6182 fs_root_objectid(key.objectid)) {
6183 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6184 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6187 key.offset = (u64)-1;
6188 cur_root = btrfs_read_fs_root(fs_info, &key);
6191 if (IS_ERR(cur_root)) {
6192 error("Fail to read fs/subvol tree: %lld",
6198 ret = check_fs_root_v2(cur_root, ext_ref);
6201 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6202 btrfs_free_fs_root(cur_root);
6203 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6204 key.type == BTRFS_ROOT_BACKREF_KEY) {
6205 ret = check_root_ref(tree_root, &key, node, slot);
6209 ret = btrfs_next_item(tree_root, &path);
6219 btrfs_release_path(&path);
6223 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6224 struct cache_tree *root_cache)
6228 if (!ctx.progress_enabled)
6229 fprintf(stderr, "checking fs roots\n");
6230 if (check_mode == CHECK_MODE_LOWMEM)
6231 ret = check_fs_roots_v2(fs_info);
6233 ret = check_fs_roots(fs_info, root_cache);
6238 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6240 struct extent_backref *back, *tmp;
6241 struct tree_backref *tback;
6242 struct data_backref *dback;
6246 rbtree_postorder_for_each_entry_safe(back, tmp,
6247 &rec->backref_tree, node) {
6248 if (!back->found_extent_tree) {
6252 if (back->is_data) {
6253 dback = to_data_backref(back);
6254 fprintf(stderr, "Data backref %llu %s %llu"
6255 " owner %llu offset %llu num_refs %lu"
6256 " not found in extent tree\n",
6257 (unsigned long long)rec->start,
6258 back->full_backref ?
6260 back->full_backref ?
6261 (unsigned long long)dback->parent:
6262 (unsigned long long)dback->root,
6263 (unsigned long long)dback->owner,
6264 (unsigned long long)dback->offset,
6265 (unsigned long)dback->num_refs);
6267 tback = to_tree_backref(back);
6268 fprintf(stderr, "Tree backref %llu parent %llu"
6269 " root %llu not found in extent tree\n",
6270 (unsigned long long)rec->start,
6271 (unsigned long long)tback->parent,
6272 (unsigned long long)tback->root);
6275 if (!back->is_data && !back->found_ref) {
6279 tback = to_tree_backref(back);
6280 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6281 (unsigned long long)rec->start,
6282 back->full_backref ? "parent" : "root",
6283 back->full_backref ?
6284 (unsigned long long)tback->parent :
6285 (unsigned long long)tback->root, back);
6287 if (back->is_data) {
6288 dback = to_data_backref(back);
6289 if (dback->found_ref != dback->num_refs) {
6293 fprintf(stderr, "Incorrect local backref count"
6294 " on %llu %s %llu owner %llu"
6295 " offset %llu found %u wanted %u back %p\n",
6296 (unsigned long long)rec->start,
6297 back->full_backref ?
6299 back->full_backref ?
6300 (unsigned long long)dback->parent:
6301 (unsigned long long)dback->root,
6302 (unsigned long long)dback->owner,
6303 (unsigned long long)dback->offset,
6304 dback->found_ref, dback->num_refs, back);
6306 if (dback->disk_bytenr != rec->start) {
6310 fprintf(stderr, "Backref disk bytenr does not"
6311 " match extent record, bytenr=%llu, "
6312 "ref bytenr=%llu\n",
6313 (unsigned long long)rec->start,
6314 (unsigned long long)dback->disk_bytenr);
6317 if (dback->bytes != rec->nr) {
6321 fprintf(stderr, "Backref bytes do not match "
6322 "extent backref, bytenr=%llu, ref "
6323 "bytes=%llu, backref bytes=%llu\n",
6324 (unsigned long long)rec->start,
6325 (unsigned long long)rec->nr,
6326 (unsigned long long)dback->bytes);
6329 if (!back->is_data) {
6332 dback = to_data_backref(back);
6333 found += dback->found_ref;
6336 if (found != rec->refs) {
6340 fprintf(stderr, "Incorrect global backref count "
6341 "on %llu found %llu wanted %llu\n",
6342 (unsigned long long)rec->start,
6343 (unsigned long long)found,
6344 (unsigned long long)rec->refs);
6350 static void __free_one_backref(struct rb_node *node)
6352 struct extent_backref *back = rb_node_to_extent_backref(node);
6357 static void free_all_extent_backrefs(struct extent_record *rec)
6359 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6362 static void free_extent_record_cache(struct cache_tree *extent_cache)
6364 struct cache_extent *cache;
6365 struct extent_record *rec;
6368 cache = first_cache_extent(extent_cache);
6371 rec = container_of(cache, struct extent_record, cache);
6372 remove_cache_extent(extent_cache, cache);
6373 free_all_extent_backrefs(rec);
6378 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6379 struct extent_record *rec)
6381 if (rec->content_checked && rec->owner_ref_checked &&
6382 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6383 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6384 !rec->bad_full_backref && !rec->crossing_stripes &&
6385 !rec->wrong_chunk_type) {
6386 remove_cache_extent(extent_cache, &rec->cache);
6387 free_all_extent_backrefs(rec);
6388 list_del_init(&rec->list);
6394 static int check_owner_ref(struct btrfs_root *root,
6395 struct extent_record *rec,
6396 struct extent_buffer *buf)
6398 struct extent_backref *node, *tmp;
6399 struct tree_backref *back;
6400 struct btrfs_root *ref_root;
6401 struct btrfs_key key;
6402 struct btrfs_path path;
6403 struct extent_buffer *parent;
6408 rbtree_postorder_for_each_entry_safe(node, tmp,
6409 &rec->backref_tree, node) {
6412 if (!node->found_ref)
6414 if (node->full_backref)
6416 back = to_tree_backref(node);
6417 if (btrfs_header_owner(buf) == back->root)
6420 BUG_ON(rec->is_root);
6422 /* try to find the block by search corresponding fs tree */
6423 key.objectid = btrfs_header_owner(buf);
6424 key.type = BTRFS_ROOT_ITEM_KEY;
6425 key.offset = (u64)-1;
6427 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6428 if (IS_ERR(ref_root))
6431 level = btrfs_header_level(buf);
6433 btrfs_item_key_to_cpu(buf, &key, 0);
6435 btrfs_node_key_to_cpu(buf, &key, 0);
6437 btrfs_init_path(&path);
6438 path.lowest_level = level + 1;
6439 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6443 parent = path.nodes[level + 1];
6444 if (parent && buf->start == btrfs_node_blockptr(parent,
6445 path.slots[level + 1]))
6448 btrfs_release_path(&path);
6449 return found ? 0 : 1;
6452 static int is_extent_tree_record(struct extent_record *rec)
6454 struct extent_backref *node, *tmp;
6455 struct tree_backref *back;
6458 rbtree_postorder_for_each_entry_safe(node, tmp,
6459 &rec->backref_tree, node) {
6462 back = to_tree_backref(node);
6463 if (node->full_backref)
6465 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6472 static int record_bad_block_io(struct btrfs_fs_info *info,
6473 struct cache_tree *extent_cache,
6476 struct extent_record *rec;
6477 struct cache_extent *cache;
6478 struct btrfs_key key;
6480 cache = lookup_cache_extent(extent_cache, start, len);
6484 rec = container_of(cache, struct extent_record, cache);
6485 if (!is_extent_tree_record(rec))
6488 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6489 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6492 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6493 struct extent_buffer *buf, int slot)
6495 if (btrfs_header_level(buf)) {
6496 struct btrfs_key_ptr ptr1, ptr2;
6498 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6499 sizeof(struct btrfs_key_ptr));
6500 read_extent_buffer(buf, &ptr2,
6501 btrfs_node_key_ptr_offset(slot + 1),
6502 sizeof(struct btrfs_key_ptr));
6503 write_extent_buffer(buf, &ptr1,
6504 btrfs_node_key_ptr_offset(slot + 1),
6505 sizeof(struct btrfs_key_ptr));
6506 write_extent_buffer(buf, &ptr2,
6507 btrfs_node_key_ptr_offset(slot),
6508 sizeof(struct btrfs_key_ptr));
6510 struct btrfs_disk_key key;
6511 btrfs_node_key(buf, &key, 0);
6512 btrfs_fixup_low_keys(root, path, &key,
6513 btrfs_header_level(buf) + 1);
6516 struct btrfs_item *item1, *item2;
6517 struct btrfs_key k1, k2;
6518 char *item1_data, *item2_data;
6519 u32 item1_offset, item2_offset, item1_size, item2_size;
6521 item1 = btrfs_item_nr(slot);
6522 item2 = btrfs_item_nr(slot + 1);
6523 btrfs_item_key_to_cpu(buf, &k1, slot);
6524 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6525 item1_offset = btrfs_item_offset(buf, item1);
6526 item2_offset = btrfs_item_offset(buf, item2);
6527 item1_size = btrfs_item_size(buf, item1);
6528 item2_size = btrfs_item_size(buf, item2);
6530 item1_data = malloc(item1_size);
6533 item2_data = malloc(item2_size);
6539 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6540 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6542 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6543 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6547 btrfs_set_item_offset(buf, item1, item2_offset);
6548 btrfs_set_item_offset(buf, item2, item1_offset);
6549 btrfs_set_item_size(buf, item1, item2_size);
6550 btrfs_set_item_size(buf, item2, item1_size);
6552 path->slots[0] = slot;
6553 btrfs_set_item_key_unsafe(root, path, &k2);
6554 path->slots[0] = slot + 1;
6555 btrfs_set_item_key_unsafe(root, path, &k1);
6560 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6562 struct extent_buffer *buf;
6563 struct btrfs_key k1, k2;
6565 int level = path->lowest_level;
6568 buf = path->nodes[level];
6569 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6571 btrfs_node_key_to_cpu(buf, &k1, i);
6572 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6574 btrfs_item_key_to_cpu(buf, &k1, i);
6575 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6577 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6579 ret = swap_values(root, path, buf, i);
6582 btrfs_mark_buffer_dirty(buf);
6588 static int delete_bogus_item(struct btrfs_root *root,
6589 struct btrfs_path *path,
6590 struct extent_buffer *buf, int slot)
6592 struct btrfs_key key;
6593 int nritems = btrfs_header_nritems(buf);
6595 btrfs_item_key_to_cpu(buf, &key, slot);
6597 /* These are all the keys we can deal with missing. */
6598 if (key.type != BTRFS_DIR_INDEX_KEY &&
6599 key.type != BTRFS_EXTENT_ITEM_KEY &&
6600 key.type != BTRFS_METADATA_ITEM_KEY &&
6601 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6602 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6605 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6606 (unsigned long long)key.objectid, key.type,
6607 (unsigned long long)key.offset, slot, buf->start);
6608 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6609 btrfs_item_nr_offset(slot + 1),
6610 sizeof(struct btrfs_item) *
6611 (nritems - slot - 1));
6612 btrfs_set_header_nritems(buf, nritems - 1);
6614 struct btrfs_disk_key disk_key;
6616 btrfs_item_key(buf, &disk_key, 0);
6617 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6619 btrfs_mark_buffer_dirty(buf);
6623 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6625 struct extent_buffer *buf;
6629 /* We should only get this for leaves */
6630 BUG_ON(path->lowest_level);
6631 buf = path->nodes[0];
6633 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6634 unsigned int shift = 0, offset;
6636 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6637 BTRFS_LEAF_DATA_SIZE(root)) {
6638 if (btrfs_item_end_nr(buf, i) >
6639 BTRFS_LEAF_DATA_SIZE(root)) {
6640 ret = delete_bogus_item(root, path, buf, i);
6643 fprintf(stderr, "item is off the end of the "
6644 "leaf, can't fix\n");
6648 shift = BTRFS_LEAF_DATA_SIZE(root) -
6649 btrfs_item_end_nr(buf, i);
6650 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6651 btrfs_item_offset_nr(buf, i - 1)) {
6652 if (btrfs_item_end_nr(buf, i) >
6653 btrfs_item_offset_nr(buf, i - 1)) {
6654 ret = delete_bogus_item(root, path, buf, i);
6657 fprintf(stderr, "items overlap, can't fix\n");
6661 shift = btrfs_item_offset_nr(buf, i - 1) -
6662 btrfs_item_end_nr(buf, i);
6667 printf("Shifting item nr %d by %u bytes in block %llu\n",
6668 i, shift, (unsigned long long)buf->start);
6669 offset = btrfs_item_offset_nr(buf, i);
6670 memmove_extent_buffer(buf,
6671 btrfs_leaf_data(buf) + offset + shift,
6672 btrfs_leaf_data(buf) + offset,
6673 btrfs_item_size_nr(buf, i));
6674 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6676 btrfs_mark_buffer_dirty(buf);
6680 * We may have moved things, in which case we want to exit so we don't
6681 * write those changes out. Once we have proper abort functionality in
6682 * progs this can be changed to something nicer.
6689 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6690 * then just return -EIO.
6692 static int try_to_fix_bad_block(struct btrfs_root *root,
6693 struct extent_buffer *buf,
6694 enum btrfs_tree_block_status status)
6696 struct btrfs_trans_handle *trans;
6697 struct ulist *roots;
6698 struct ulist_node *node;
6699 struct btrfs_root *search_root;
6700 struct btrfs_path path;
6701 struct ulist_iterator iter;
6702 struct btrfs_key root_key, key;
6705 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6706 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6709 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6713 btrfs_init_path(&path);
6714 ULIST_ITER_INIT(&iter);
6715 while ((node = ulist_next(roots, &iter))) {
6716 root_key.objectid = node->val;
6717 root_key.type = BTRFS_ROOT_ITEM_KEY;
6718 root_key.offset = (u64)-1;
6720 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6727 trans = btrfs_start_transaction(search_root, 0);
6728 if (IS_ERR(trans)) {
6729 ret = PTR_ERR(trans);
6733 path.lowest_level = btrfs_header_level(buf);
6734 path.skip_check_block = 1;
6735 if (path.lowest_level)
6736 btrfs_node_key_to_cpu(buf, &key, 0);
6738 btrfs_item_key_to_cpu(buf, &key, 0);
6739 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6742 btrfs_commit_transaction(trans, search_root);
6745 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6746 ret = fix_key_order(search_root, &path);
6747 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6748 ret = fix_item_offset(search_root, &path);
6750 btrfs_commit_transaction(trans, search_root);
6753 btrfs_release_path(&path);
6754 btrfs_commit_transaction(trans, search_root);
6757 btrfs_release_path(&path);
6761 static int check_block(struct btrfs_root *root,
6762 struct cache_tree *extent_cache,
6763 struct extent_buffer *buf, u64 flags)
6765 struct extent_record *rec;
6766 struct cache_extent *cache;
6767 struct btrfs_key key;
6768 enum btrfs_tree_block_status status;
6772 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6775 rec = container_of(cache, struct extent_record, cache);
6776 rec->generation = btrfs_header_generation(buf);
6778 level = btrfs_header_level(buf);
6779 if (btrfs_header_nritems(buf) > 0) {
6782 btrfs_item_key_to_cpu(buf, &key, 0);
6784 btrfs_node_key_to_cpu(buf, &key, 0);
6786 rec->info_objectid = key.objectid;
6788 rec->info_level = level;
6790 if (btrfs_is_leaf(buf))
6791 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6793 status = btrfs_check_node(root, &rec->parent_key, buf);
6795 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6797 status = try_to_fix_bad_block(root, buf, status);
6798 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6800 fprintf(stderr, "bad block %llu\n",
6801 (unsigned long long)buf->start);
6804 * Signal to callers we need to start the scan over
6805 * again since we'll have cowed blocks.
6810 rec->content_checked = 1;
6811 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6812 rec->owner_ref_checked = 1;
6814 ret = check_owner_ref(root, rec, buf);
6816 rec->owner_ref_checked = 1;
6820 maybe_free_extent_rec(extent_cache, rec);
6825 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6826 u64 parent, u64 root)
6828 struct list_head *cur = rec->backrefs.next;
6829 struct extent_backref *node;
6830 struct tree_backref *back;
6832 while(cur != &rec->backrefs) {
6833 node = to_extent_backref(cur);
6837 back = to_tree_backref(node);
6839 if (!node->full_backref)
6841 if (parent == back->parent)
6844 if (node->full_backref)
6846 if (back->root == root)
6854 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6855 u64 parent, u64 root)
6857 struct tree_backref *ref = malloc(sizeof(*ref));
6861 memset(&ref->node, 0, sizeof(ref->node));
6863 ref->parent = parent;
6864 ref->node.full_backref = 1;
6867 ref->node.full_backref = 0;
6874 static struct data_backref *find_data_backref(struct extent_record *rec,
6875 u64 parent, u64 root,
6876 u64 owner, u64 offset,
6878 u64 disk_bytenr, u64 bytes)
6880 struct list_head *cur = rec->backrefs.next;
6881 struct extent_backref *node;
6882 struct data_backref *back;
6884 while(cur != &rec->backrefs) {
6885 node = to_extent_backref(cur);
6889 back = to_data_backref(node);
6891 if (!node->full_backref)
6893 if (parent == back->parent)
6896 if (node->full_backref)
6898 if (back->root == root && back->owner == owner &&
6899 back->offset == offset) {
6900 if (found_ref && node->found_ref &&
6901 (back->bytes != bytes ||
6902 back->disk_bytenr != disk_bytenr))
6912 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6913 u64 parent, u64 root,
6914 u64 owner, u64 offset,
6917 struct data_backref *ref = malloc(sizeof(*ref));
6921 memset(&ref->node, 0, sizeof(ref->node));
6922 ref->node.is_data = 1;
6925 ref->parent = parent;
6928 ref->node.full_backref = 1;
6932 ref->offset = offset;
6933 ref->node.full_backref = 0;
6935 ref->bytes = max_size;
6938 if (max_size > rec->max_size)
6939 rec->max_size = max_size;
6943 /* Check if the type of extent matches with its chunk */
6944 static void check_extent_type(struct extent_record *rec)
6946 struct btrfs_block_group_cache *bg_cache;
6948 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6952 /* data extent, check chunk directly*/
6953 if (!rec->metadata) {
6954 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6955 rec->wrong_chunk_type = 1;
6959 /* metadata extent, check the obvious case first */
6960 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6961 BTRFS_BLOCK_GROUP_METADATA))) {
6962 rec->wrong_chunk_type = 1;
6967 * Check SYSTEM extent, as it's also marked as metadata, we can only
6968 * make sure it's a SYSTEM extent by its backref
6970 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6971 struct extent_backref *node;
6972 struct tree_backref *tback;
6975 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6976 if (node->is_data) {
6977 /* tree block shouldn't have data backref */
6978 rec->wrong_chunk_type = 1;
6981 tback = container_of(node, struct tree_backref, node);
6983 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6984 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6986 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6987 if (!(bg_cache->flags & bg_type))
6988 rec->wrong_chunk_type = 1;
6993 * Allocate a new extent record, fill default values from @tmpl and insert int
6994 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6995 * the cache, otherwise it fails.
6997 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6998 struct extent_record *tmpl)
7000 struct extent_record *rec;
7003 BUG_ON(tmpl->max_size == 0);
7004 rec = malloc(sizeof(*rec));
7007 rec->start = tmpl->start;
7008 rec->max_size = tmpl->max_size;
7009 rec->nr = max(tmpl->nr, tmpl->max_size);
7010 rec->found_rec = tmpl->found_rec;
7011 rec->content_checked = tmpl->content_checked;
7012 rec->owner_ref_checked = tmpl->owner_ref_checked;
7013 rec->num_duplicates = 0;
7014 rec->metadata = tmpl->metadata;
7015 rec->flag_block_full_backref = FLAG_UNSET;
7016 rec->bad_full_backref = 0;
7017 rec->crossing_stripes = 0;
7018 rec->wrong_chunk_type = 0;
7019 rec->is_root = tmpl->is_root;
7020 rec->refs = tmpl->refs;
7021 rec->extent_item_refs = tmpl->extent_item_refs;
7022 rec->parent_generation = tmpl->parent_generation;
7023 INIT_LIST_HEAD(&rec->backrefs);
7024 INIT_LIST_HEAD(&rec->dups);
7025 INIT_LIST_HEAD(&rec->list);
7026 rec->backref_tree = RB_ROOT;
7027 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7028 rec->cache.start = tmpl->start;
7029 rec->cache.size = tmpl->nr;
7030 ret = insert_cache_extent(extent_cache, &rec->cache);
7035 bytes_used += rec->nr;
7038 rec->crossing_stripes = check_crossing_stripes(global_info,
7039 rec->start, global_info->nodesize);
7040 check_extent_type(rec);
7045 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7047 * - refs - if found, increase refs
7048 * - is_root - if found, set
7049 * - content_checked - if found, set
7050 * - owner_ref_checked - if found, set
7052 * If not found, create a new one, initialize and insert.
7054 static int add_extent_rec(struct cache_tree *extent_cache,
7055 struct extent_record *tmpl)
7057 struct extent_record *rec;
7058 struct cache_extent *cache;
7062 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7064 rec = container_of(cache, struct extent_record, cache);
7068 rec->nr = max(tmpl->nr, tmpl->max_size);
7071 * We need to make sure to reset nr to whatever the extent
7072 * record says was the real size, this way we can compare it to
7075 if (tmpl->found_rec) {
7076 if (tmpl->start != rec->start || rec->found_rec) {
7077 struct extent_record *tmp;
7080 if (list_empty(&rec->list))
7081 list_add_tail(&rec->list,
7082 &duplicate_extents);
7085 * We have to do this song and dance in case we
7086 * find an extent record that falls inside of
7087 * our current extent record but does not have
7088 * the same objectid.
7090 tmp = malloc(sizeof(*tmp));
7093 tmp->start = tmpl->start;
7094 tmp->max_size = tmpl->max_size;
7097 tmp->metadata = tmpl->metadata;
7098 tmp->extent_item_refs = tmpl->extent_item_refs;
7099 INIT_LIST_HEAD(&tmp->list);
7100 list_add_tail(&tmp->list, &rec->dups);
7101 rec->num_duplicates++;
7108 if (tmpl->extent_item_refs && !dup) {
7109 if (rec->extent_item_refs) {
7110 fprintf(stderr, "block %llu rec "
7111 "extent_item_refs %llu, passed %llu\n",
7112 (unsigned long long)tmpl->start,
7113 (unsigned long long)
7114 rec->extent_item_refs,
7115 (unsigned long long)tmpl->extent_item_refs);
7117 rec->extent_item_refs = tmpl->extent_item_refs;
7121 if (tmpl->content_checked)
7122 rec->content_checked = 1;
7123 if (tmpl->owner_ref_checked)
7124 rec->owner_ref_checked = 1;
7125 memcpy(&rec->parent_key, &tmpl->parent_key,
7126 sizeof(tmpl->parent_key));
7127 if (tmpl->parent_generation)
7128 rec->parent_generation = tmpl->parent_generation;
7129 if (rec->max_size < tmpl->max_size)
7130 rec->max_size = tmpl->max_size;
7133 * A metadata extent can't cross stripe_len boundary, otherwise
7134 * kernel scrub won't be able to handle it.
7135 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7139 rec->crossing_stripes = check_crossing_stripes(
7140 global_info, rec->start,
7141 global_info->nodesize);
7142 check_extent_type(rec);
7143 maybe_free_extent_rec(extent_cache, rec);
7147 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7152 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7153 u64 parent, u64 root, int found_ref)
7155 struct extent_record *rec;
7156 struct tree_backref *back;
7157 struct cache_extent *cache;
7159 bool insert = false;
7161 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7163 struct extent_record tmpl;
7165 memset(&tmpl, 0, sizeof(tmpl));
7166 tmpl.start = bytenr;
7171 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7175 /* really a bug in cache_extent implement now */
7176 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7181 rec = container_of(cache, struct extent_record, cache);
7182 if (rec->start != bytenr) {
7184 * Several cause, from unaligned bytenr to over lapping extents
7189 back = find_tree_backref(rec, parent, root);
7191 back = alloc_tree_backref(rec, parent, root);
7198 if (back->node.found_ref) {
7199 fprintf(stderr, "Extent back ref already exists "
7200 "for %llu parent %llu root %llu \n",
7201 (unsigned long long)bytenr,
7202 (unsigned long long)parent,
7203 (unsigned long long)root);
7205 back->node.found_ref = 1;
7207 if (back->node.found_extent_tree) {
7208 fprintf(stderr, "Extent back ref already exists "
7209 "for %llu parent %llu root %llu \n",
7210 (unsigned long long)bytenr,
7211 (unsigned long long)parent,
7212 (unsigned long long)root);
7214 back->node.found_extent_tree = 1;
7217 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7218 compare_extent_backref));
7219 check_extent_type(rec);
7220 maybe_free_extent_rec(extent_cache, rec);
7224 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7225 u64 parent, u64 root, u64 owner, u64 offset,
7226 u32 num_refs, int found_ref, u64 max_size)
7228 struct extent_record *rec;
7229 struct data_backref *back;
7230 struct cache_extent *cache;
7232 bool insert = false;
7234 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7236 struct extent_record tmpl;
7238 memset(&tmpl, 0, sizeof(tmpl));
7239 tmpl.start = bytenr;
7241 tmpl.max_size = max_size;
7243 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7247 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7252 rec = container_of(cache, struct extent_record, cache);
7253 if (rec->max_size < max_size)
7254 rec->max_size = max_size;
7257 * If found_ref is set then max_size is the real size and must match the
7258 * existing refs. So if we have already found a ref then we need to
7259 * make sure that this ref matches the existing one, otherwise we need
7260 * to add a new backref so we can notice that the backrefs don't match
7261 * and we need to figure out who is telling the truth. This is to
7262 * account for that awful fsync bug I introduced where we'd end up with
7263 * a btrfs_file_extent_item that would have its length include multiple
7264 * prealloc extents or point inside of a prealloc extent.
7266 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7269 back = alloc_data_backref(rec, parent, root, owner, offset,
7276 BUG_ON(num_refs != 1);
7277 if (back->node.found_ref)
7278 BUG_ON(back->bytes != max_size);
7279 back->node.found_ref = 1;
7280 back->found_ref += 1;
7281 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7282 back->bytes = max_size;
7283 back->disk_bytenr = bytenr;
7285 /* Need to reinsert if not already in the tree */
7287 rb_erase(&back->node.node, &rec->backref_tree);
7292 rec->content_checked = 1;
7293 rec->owner_ref_checked = 1;
7295 if (back->node.found_extent_tree) {
7296 fprintf(stderr, "Extent back ref already exists "
7297 "for %llu parent %llu root %llu "
7298 "owner %llu offset %llu num_refs %lu\n",
7299 (unsigned long long)bytenr,
7300 (unsigned long long)parent,
7301 (unsigned long long)root,
7302 (unsigned long long)owner,
7303 (unsigned long long)offset,
7304 (unsigned long)num_refs);
7306 back->num_refs = num_refs;
7307 back->node.found_extent_tree = 1;
7310 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7311 compare_extent_backref));
7313 maybe_free_extent_rec(extent_cache, rec);
7317 static int add_pending(struct cache_tree *pending,
7318 struct cache_tree *seen, u64 bytenr, u32 size)
7321 ret = add_cache_extent(seen, bytenr, size);
7324 add_cache_extent(pending, bytenr, size);
7328 static int pick_next_pending(struct cache_tree *pending,
7329 struct cache_tree *reada,
7330 struct cache_tree *nodes,
7331 u64 last, struct block_info *bits, int bits_nr,
7334 unsigned long node_start = last;
7335 struct cache_extent *cache;
7338 cache = search_cache_extent(reada, 0);
7340 bits[0].start = cache->start;
7341 bits[0].size = cache->size;
7346 if (node_start > 32768)
7347 node_start -= 32768;
7349 cache = search_cache_extent(nodes, node_start);
7351 cache = search_cache_extent(nodes, 0);
7354 cache = search_cache_extent(pending, 0);
7359 bits[ret].start = cache->start;
7360 bits[ret].size = cache->size;
7361 cache = next_cache_extent(cache);
7363 } while (cache && ret < bits_nr);
7369 bits[ret].start = cache->start;
7370 bits[ret].size = cache->size;
7371 cache = next_cache_extent(cache);
7373 } while (cache && ret < bits_nr);
7375 if (bits_nr - ret > 8) {
7376 u64 lookup = bits[0].start + bits[0].size;
7377 struct cache_extent *next;
7378 next = search_cache_extent(pending, lookup);
7380 if (next->start - lookup > 32768)
7382 bits[ret].start = next->start;
7383 bits[ret].size = next->size;
7384 lookup = next->start + next->size;
7388 next = next_cache_extent(next);
7396 static void free_chunk_record(struct cache_extent *cache)
7398 struct chunk_record *rec;
7400 rec = container_of(cache, struct chunk_record, cache);
7401 list_del_init(&rec->list);
7402 list_del_init(&rec->dextents);
7406 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7408 cache_tree_free_extents(chunk_cache, free_chunk_record);
7411 static void free_device_record(struct rb_node *node)
7413 struct device_record *rec;
7415 rec = container_of(node, struct device_record, node);
7419 FREE_RB_BASED_TREE(device_cache, free_device_record);
7421 int insert_block_group_record(struct block_group_tree *tree,
7422 struct block_group_record *bg_rec)
7426 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7430 list_add_tail(&bg_rec->list, &tree->block_groups);
7434 static void free_block_group_record(struct cache_extent *cache)
7436 struct block_group_record *rec;
7438 rec = container_of(cache, struct block_group_record, cache);
7439 list_del_init(&rec->list);
7443 void free_block_group_tree(struct block_group_tree *tree)
7445 cache_tree_free_extents(&tree->tree, free_block_group_record);
7448 int insert_device_extent_record(struct device_extent_tree *tree,
7449 struct device_extent_record *de_rec)
7454 * Device extent is a bit different from the other extents, because
7455 * the extents which belong to the different devices may have the
7456 * same start and size, so we need use the special extent cache
7457 * search/insert functions.
7459 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7463 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7464 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7468 static void free_device_extent_record(struct cache_extent *cache)
7470 struct device_extent_record *rec;
7472 rec = container_of(cache, struct device_extent_record, cache);
7473 if (!list_empty(&rec->chunk_list))
7474 list_del_init(&rec->chunk_list);
7475 if (!list_empty(&rec->device_list))
7476 list_del_init(&rec->device_list);
7480 void free_device_extent_tree(struct device_extent_tree *tree)
7482 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7485 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7486 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7487 struct extent_buffer *leaf, int slot)
7489 struct btrfs_extent_ref_v0 *ref0;
7490 struct btrfs_key key;
7493 btrfs_item_key_to_cpu(leaf, &key, slot);
7494 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7495 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7496 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7499 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7500 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7506 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7507 struct btrfs_key *key,
7510 struct btrfs_chunk *ptr;
7511 struct chunk_record *rec;
7514 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7515 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7517 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7519 fprintf(stderr, "memory allocation failed\n");
7523 INIT_LIST_HEAD(&rec->list);
7524 INIT_LIST_HEAD(&rec->dextents);
7527 rec->cache.start = key->offset;
7528 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7530 rec->generation = btrfs_header_generation(leaf);
7532 rec->objectid = key->objectid;
7533 rec->type = key->type;
7534 rec->offset = key->offset;
7536 rec->length = rec->cache.size;
7537 rec->owner = btrfs_chunk_owner(leaf, ptr);
7538 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7539 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7540 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7541 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7542 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7543 rec->num_stripes = num_stripes;
7544 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7546 for (i = 0; i < rec->num_stripes; ++i) {
7547 rec->stripes[i].devid =
7548 btrfs_stripe_devid_nr(leaf, ptr, i);
7549 rec->stripes[i].offset =
7550 btrfs_stripe_offset_nr(leaf, ptr, i);
7551 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7552 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7559 static int process_chunk_item(struct cache_tree *chunk_cache,
7560 struct btrfs_key *key, struct extent_buffer *eb,
7563 struct chunk_record *rec;
7564 struct btrfs_chunk *chunk;
7567 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7569 * Do extra check for this chunk item,
7571 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7572 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7573 * and owner<->key_type check.
7575 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7578 error("chunk(%llu, %llu) is not valid, ignore it",
7579 key->offset, btrfs_chunk_length(eb, chunk));
7582 rec = btrfs_new_chunk_record(eb, key, slot);
7583 ret = insert_cache_extent(chunk_cache, &rec->cache);
7585 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7586 rec->offset, rec->length);
7593 static int process_device_item(struct rb_root *dev_cache,
7594 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7596 struct btrfs_dev_item *ptr;
7597 struct device_record *rec;
7600 ptr = btrfs_item_ptr(eb,
7601 slot, struct btrfs_dev_item);
7603 rec = malloc(sizeof(*rec));
7605 fprintf(stderr, "memory allocation failed\n");
7609 rec->devid = key->offset;
7610 rec->generation = btrfs_header_generation(eb);
7612 rec->objectid = key->objectid;
7613 rec->type = key->type;
7614 rec->offset = key->offset;
7616 rec->devid = btrfs_device_id(eb, ptr);
7617 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7618 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7620 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7622 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7629 struct block_group_record *
7630 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7633 struct btrfs_block_group_item *ptr;
7634 struct block_group_record *rec;
7636 rec = calloc(1, sizeof(*rec));
7638 fprintf(stderr, "memory allocation failed\n");
7642 rec->cache.start = key->objectid;
7643 rec->cache.size = key->offset;
7645 rec->generation = btrfs_header_generation(leaf);
7647 rec->objectid = key->objectid;
7648 rec->type = key->type;
7649 rec->offset = key->offset;
7651 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7652 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7654 INIT_LIST_HEAD(&rec->list);
7659 static int process_block_group_item(struct block_group_tree *block_group_cache,
7660 struct btrfs_key *key,
7661 struct extent_buffer *eb, int slot)
7663 struct block_group_record *rec;
7666 rec = btrfs_new_block_group_record(eb, key, slot);
7667 ret = insert_block_group_record(block_group_cache, rec);
7669 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7670 rec->objectid, rec->offset);
7677 struct device_extent_record *
7678 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7679 struct btrfs_key *key, int slot)
7681 struct device_extent_record *rec;
7682 struct btrfs_dev_extent *ptr;
7684 rec = calloc(1, sizeof(*rec));
7686 fprintf(stderr, "memory allocation failed\n");
7690 rec->cache.objectid = key->objectid;
7691 rec->cache.start = key->offset;
7693 rec->generation = btrfs_header_generation(leaf);
7695 rec->objectid = key->objectid;
7696 rec->type = key->type;
7697 rec->offset = key->offset;
7699 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7700 rec->chunk_objecteid =
7701 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7703 btrfs_dev_extent_chunk_offset(leaf, ptr);
7704 rec->length = btrfs_dev_extent_length(leaf, ptr);
7705 rec->cache.size = rec->length;
7707 INIT_LIST_HEAD(&rec->chunk_list);
7708 INIT_LIST_HEAD(&rec->device_list);
7714 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7715 struct btrfs_key *key, struct extent_buffer *eb,
7718 struct device_extent_record *rec;
7721 rec = btrfs_new_device_extent_record(eb, key, slot);
7722 ret = insert_device_extent_record(dev_extent_cache, rec);
7725 "Device extent[%llu, %llu, %llu] existed.\n",
7726 rec->objectid, rec->offset, rec->length);
7733 static int process_extent_item(struct btrfs_root *root,
7734 struct cache_tree *extent_cache,
7735 struct extent_buffer *eb, int slot)
7737 struct btrfs_extent_item *ei;
7738 struct btrfs_extent_inline_ref *iref;
7739 struct btrfs_extent_data_ref *dref;
7740 struct btrfs_shared_data_ref *sref;
7741 struct btrfs_key key;
7742 struct extent_record tmpl;
7747 u32 item_size = btrfs_item_size_nr(eb, slot);
7753 btrfs_item_key_to_cpu(eb, &key, slot);
7755 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7757 num_bytes = root->fs_info->nodesize;
7759 num_bytes = key.offset;
7762 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7763 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7764 key.objectid, root->fs_info->sectorsize);
7767 if (item_size < sizeof(*ei)) {
7768 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7769 struct btrfs_extent_item_v0 *ei0;
7770 BUG_ON(item_size != sizeof(*ei0));
7771 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7772 refs = btrfs_extent_refs_v0(eb, ei0);
7776 memset(&tmpl, 0, sizeof(tmpl));
7777 tmpl.start = key.objectid;
7778 tmpl.nr = num_bytes;
7779 tmpl.extent_item_refs = refs;
7780 tmpl.metadata = metadata;
7782 tmpl.max_size = num_bytes;
7784 return add_extent_rec(extent_cache, &tmpl);
7787 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7788 refs = btrfs_extent_refs(eb, ei);
7789 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7793 if (metadata && num_bytes != root->fs_info->nodesize) {
7794 error("ignore invalid metadata extent, length %llu does not equal to %u",
7795 num_bytes, root->fs_info->nodesize);
7798 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7799 error("ignore invalid data extent, length %llu is not aligned to %u",
7800 num_bytes, root->fs_info->sectorsize);
7804 memset(&tmpl, 0, sizeof(tmpl));
7805 tmpl.start = key.objectid;
7806 tmpl.nr = num_bytes;
7807 tmpl.extent_item_refs = refs;
7808 tmpl.metadata = metadata;
7810 tmpl.max_size = num_bytes;
7811 add_extent_rec(extent_cache, &tmpl);
7813 ptr = (unsigned long)(ei + 1);
7814 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7815 key.type == BTRFS_EXTENT_ITEM_KEY)
7816 ptr += sizeof(struct btrfs_tree_block_info);
7818 end = (unsigned long)ei + item_size;
7820 iref = (struct btrfs_extent_inline_ref *)ptr;
7821 type = btrfs_extent_inline_ref_type(eb, iref);
7822 offset = btrfs_extent_inline_ref_offset(eb, iref);
7824 case BTRFS_TREE_BLOCK_REF_KEY:
7825 ret = add_tree_backref(extent_cache, key.objectid,
7829 "add_tree_backref failed (extent items tree block): %s",
7832 case BTRFS_SHARED_BLOCK_REF_KEY:
7833 ret = add_tree_backref(extent_cache, key.objectid,
7837 "add_tree_backref failed (extent items shared block): %s",
7840 case BTRFS_EXTENT_DATA_REF_KEY:
7841 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7842 add_data_backref(extent_cache, key.objectid, 0,
7843 btrfs_extent_data_ref_root(eb, dref),
7844 btrfs_extent_data_ref_objectid(eb,
7846 btrfs_extent_data_ref_offset(eb, dref),
7847 btrfs_extent_data_ref_count(eb, dref),
7850 case BTRFS_SHARED_DATA_REF_KEY:
7851 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7852 add_data_backref(extent_cache, key.objectid, offset,
7854 btrfs_shared_data_ref_count(eb, sref),
7858 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7859 key.objectid, key.type, num_bytes);
7862 ptr += btrfs_extent_inline_ref_size(type);
7869 static int check_cache_range(struct btrfs_root *root,
7870 struct btrfs_block_group_cache *cache,
7871 u64 offset, u64 bytes)
7873 struct btrfs_free_space *entry;
7879 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7880 bytenr = btrfs_sb_offset(i);
7881 ret = btrfs_rmap_block(root->fs_info,
7882 cache->key.objectid, bytenr, 0,
7883 &logical, &nr, &stripe_len);
7888 if (logical[nr] + stripe_len <= offset)
7890 if (offset + bytes <= logical[nr])
7892 if (logical[nr] == offset) {
7893 if (stripe_len >= bytes) {
7897 bytes -= stripe_len;
7898 offset += stripe_len;
7899 } else if (logical[nr] < offset) {
7900 if (logical[nr] + stripe_len >=
7905 bytes = (offset + bytes) -
7906 (logical[nr] + stripe_len);
7907 offset = logical[nr] + stripe_len;
7910 * Could be tricky, the super may land in the
7911 * middle of the area we're checking. First
7912 * check the easiest case, it's at the end.
7914 if (logical[nr] + stripe_len >=
7916 bytes = logical[nr] - offset;
7920 /* Check the left side */
7921 ret = check_cache_range(root, cache,
7923 logical[nr] - offset);
7929 /* Now we continue with the right side */
7930 bytes = (offset + bytes) -
7931 (logical[nr] + stripe_len);
7932 offset = logical[nr] + stripe_len;
7939 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7941 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7942 offset, offset+bytes);
7946 if (entry->offset != offset) {
7947 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7952 if (entry->bytes != bytes) {
7953 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7954 bytes, entry->bytes, offset);
7958 unlink_free_space(cache->free_space_ctl, entry);
7963 static int verify_space_cache(struct btrfs_root *root,
7964 struct btrfs_block_group_cache *cache)
7966 struct btrfs_path path;
7967 struct extent_buffer *leaf;
7968 struct btrfs_key key;
7972 root = root->fs_info->extent_root;
7974 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7976 btrfs_init_path(&path);
7977 key.objectid = last;
7979 key.type = BTRFS_EXTENT_ITEM_KEY;
7980 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7985 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7986 ret = btrfs_next_leaf(root, &path);
7994 leaf = path.nodes[0];
7995 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7996 if (key.objectid >= cache->key.offset + cache->key.objectid)
7998 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7999 key.type != BTRFS_METADATA_ITEM_KEY) {
8004 if (last == key.objectid) {
8005 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8006 last = key.objectid + key.offset;
8008 last = key.objectid + root->fs_info->nodesize;
8013 ret = check_cache_range(root, cache, last,
8014 key.objectid - last);
8017 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8018 last = key.objectid + key.offset;
8020 last = key.objectid + root->fs_info->nodesize;
8024 if (last < cache->key.objectid + cache->key.offset)
8025 ret = check_cache_range(root, cache, last,
8026 cache->key.objectid +
8027 cache->key.offset - last);
8030 btrfs_release_path(&path);
8033 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8034 fprintf(stderr, "There are still entries left in the space "
8042 static int check_space_cache(struct btrfs_root *root)
8044 struct btrfs_block_group_cache *cache;
8045 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8049 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8050 btrfs_super_generation(root->fs_info->super_copy) !=
8051 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8052 printf("cache and super generation don't match, space cache "
8053 "will be invalidated\n");
8057 if (ctx.progress_enabled) {
8058 ctx.tp = TASK_FREE_SPACE;
8059 task_start(ctx.info);
8063 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8067 start = cache->key.objectid + cache->key.offset;
8068 if (!cache->free_space_ctl) {
8069 if (btrfs_init_free_space_ctl(cache,
8070 root->fs_info->sectorsize)) {
8075 btrfs_remove_free_space_cache(cache);
8078 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8079 ret = exclude_super_stripes(root, cache);
8081 fprintf(stderr, "could not exclude super stripes: %s\n",
8086 ret = load_free_space_tree(root->fs_info, cache);
8087 free_excluded_extents(root, cache);
8089 fprintf(stderr, "could not load free space tree: %s\n",
8096 ret = load_free_space_cache(root->fs_info, cache);
8101 ret = verify_space_cache(root, cache);
8103 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8104 cache->key.objectid);
8109 task_stop(ctx.info);
8111 return error ? -EINVAL : 0;
8114 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8115 u64 num_bytes, unsigned long leaf_offset,
8116 struct extent_buffer *eb) {
8118 struct btrfs_fs_info *fs_info = root->fs_info;
8120 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8122 unsigned long csum_offset;
8126 u64 data_checked = 0;
8132 if (num_bytes % fs_info->sectorsize)
8135 data = malloc(num_bytes);
8139 while (offset < num_bytes) {
8142 read_len = num_bytes - offset;
8143 /* read as much space once a time */
8144 ret = read_extent_data(fs_info, data + offset,
8145 bytenr + offset, &read_len, mirror);
8149 /* verify every 4k data's checksum */
8150 while (data_checked < read_len) {
8152 tmp = offset + data_checked;
8154 csum = btrfs_csum_data((char *)data + tmp,
8155 csum, fs_info->sectorsize);
8156 btrfs_csum_final(csum, (u8 *)&csum);
8158 csum_offset = leaf_offset +
8159 tmp / fs_info->sectorsize * csum_size;
8160 read_extent_buffer(eb, (char *)&csum_expected,
8161 csum_offset, csum_size);
8162 /* try another mirror */
8163 if (csum != csum_expected) {
8164 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8165 mirror, bytenr + tmp,
8166 csum, csum_expected);
8167 num_copies = btrfs_num_copies(root->fs_info,
8169 if (mirror < num_copies - 1) {
8174 data_checked += fs_info->sectorsize;
8183 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8186 struct btrfs_path path;
8187 struct extent_buffer *leaf;
8188 struct btrfs_key key;
8191 btrfs_init_path(&path);
8192 key.objectid = bytenr;
8193 key.type = BTRFS_EXTENT_ITEM_KEY;
8194 key.offset = (u64)-1;
8197 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8200 fprintf(stderr, "Error looking up extent record %d\n", ret);
8201 btrfs_release_path(&path);
8204 if (path.slots[0] > 0) {
8207 ret = btrfs_prev_leaf(root, &path);
8210 } else if (ret > 0) {
8217 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8220 * Block group items come before extent items if they have the same
8221 * bytenr, so walk back one more just in case. Dear future traveller,
8222 * first congrats on mastering time travel. Now if it's not too much
8223 * trouble could you go back to 2006 and tell Chris to make the
8224 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8225 * EXTENT_ITEM_KEY please?
8227 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8228 if (path.slots[0] > 0) {
8231 ret = btrfs_prev_leaf(root, &path);
8234 } else if (ret > 0) {
8239 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8243 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8244 ret = btrfs_next_leaf(root, &path);
8246 fprintf(stderr, "Error going to next leaf "
8248 btrfs_release_path(&path);
8254 leaf = path.nodes[0];
8255 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8256 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8260 if (key.objectid + key.offset < bytenr) {
8264 if (key.objectid > bytenr + num_bytes)
8267 if (key.objectid == bytenr) {
8268 if (key.offset >= num_bytes) {
8272 num_bytes -= key.offset;
8273 bytenr += key.offset;
8274 } else if (key.objectid < bytenr) {
8275 if (key.objectid + key.offset >= bytenr + num_bytes) {
8279 num_bytes = (bytenr + num_bytes) -
8280 (key.objectid + key.offset);
8281 bytenr = key.objectid + key.offset;
8283 if (key.objectid + key.offset < bytenr + num_bytes) {
8284 u64 new_start = key.objectid + key.offset;
8285 u64 new_bytes = bytenr + num_bytes - new_start;
8288 * Weird case, the extent is in the middle of
8289 * our range, we'll have to search one side
8290 * and then the other. Not sure if this happens
8291 * in real life, but no harm in coding it up
8292 * anyway just in case.
8294 btrfs_release_path(&path);
8295 ret = check_extent_exists(root, new_start,
8298 fprintf(stderr, "Right section didn't "
8302 num_bytes = key.objectid - bytenr;
8305 num_bytes = key.objectid - bytenr;
8312 if (num_bytes && !ret) {
8313 fprintf(stderr, "There are no extents for csum range "
8314 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8318 btrfs_release_path(&path);
8322 static int check_csums(struct btrfs_root *root)
8324 struct btrfs_path path;
8325 struct extent_buffer *leaf;
8326 struct btrfs_key key;
8327 u64 offset = 0, num_bytes = 0;
8328 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8332 unsigned long leaf_offset;
8334 root = root->fs_info->csum_root;
8335 if (!extent_buffer_uptodate(root->node)) {
8336 fprintf(stderr, "No valid csum tree found\n");
8340 btrfs_init_path(&path);
8341 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8342 key.type = BTRFS_EXTENT_CSUM_KEY;
8344 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8346 fprintf(stderr, "Error searching csum tree %d\n", ret);
8347 btrfs_release_path(&path);
8351 if (ret > 0 && path.slots[0])
8356 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8357 ret = btrfs_next_leaf(root, &path);
8359 fprintf(stderr, "Error going to next leaf "
8366 leaf = path.nodes[0];
8368 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8369 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8374 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8375 csum_size) * root->fs_info->sectorsize;
8376 if (!check_data_csum)
8377 goto skip_csum_check;
8378 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8379 ret = check_extent_csums(root, key.offset, data_len,
8385 offset = key.offset;
8386 } else if (key.offset != offset + num_bytes) {
8387 ret = check_extent_exists(root, offset, num_bytes);
8389 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8390 "there is no extent record\n",
8391 offset, offset+num_bytes);
8394 offset = key.offset;
8397 num_bytes += data_len;
8401 btrfs_release_path(&path);
8405 static int is_dropped_key(struct btrfs_key *key,
8406 struct btrfs_key *drop_key) {
8407 if (key->objectid < drop_key->objectid)
8409 else if (key->objectid == drop_key->objectid) {
8410 if (key->type < drop_key->type)
8412 else if (key->type == drop_key->type) {
8413 if (key->offset < drop_key->offset)
8421 * Here are the rules for FULL_BACKREF.
8423 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8424 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8426 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8427 * if it happened after the relocation occurred since we'll have dropped the
8428 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8429 * have no real way to know for sure.
8431 * We process the blocks one root at a time, and we start from the lowest root
8432 * objectid and go to the highest. So we can just lookup the owner backref for
8433 * the record and if we don't find it then we know it doesn't exist and we have
8436 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8437 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8438 * be set or not and then we can check later once we've gathered all the refs.
8440 static int calc_extent_flag(struct cache_tree *extent_cache,
8441 struct extent_buffer *buf,
8442 struct root_item_record *ri,
8445 struct extent_record *rec;
8446 struct cache_extent *cache;
8447 struct tree_backref *tback;
8450 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8451 /* we have added this extent before */
8455 rec = container_of(cache, struct extent_record, cache);
8458 * Except file/reloc tree, we can not have
8461 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8466 if (buf->start == ri->bytenr)
8469 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8472 owner = btrfs_header_owner(buf);
8473 if (owner == ri->objectid)
8476 tback = find_tree_backref(rec, 0, owner);
8481 if (rec->flag_block_full_backref != FLAG_UNSET &&
8482 rec->flag_block_full_backref != 0)
8483 rec->bad_full_backref = 1;
8486 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8487 if (rec->flag_block_full_backref != FLAG_UNSET &&
8488 rec->flag_block_full_backref != 1)
8489 rec->bad_full_backref = 1;
8493 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8495 fprintf(stderr, "Invalid key type(");
8496 print_key_type(stderr, 0, key_type);
8497 fprintf(stderr, ") found in root(");
8498 print_objectid(stderr, rootid, 0);
8499 fprintf(stderr, ")\n");
8503 * Check if the key is valid with its extent buffer.
8505 * This is a early check in case invalid key exists in a extent buffer
8506 * This is not comprehensive yet, but should prevent wrong key/item passed
8509 static int check_type_with_root(u64 rootid, u8 key_type)
8512 /* Only valid in chunk tree */
8513 case BTRFS_DEV_ITEM_KEY:
8514 case BTRFS_CHUNK_ITEM_KEY:
8515 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8518 /* valid in csum and log tree */
8519 case BTRFS_CSUM_TREE_OBJECTID:
8520 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8524 case BTRFS_EXTENT_ITEM_KEY:
8525 case BTRFS_METADATA_ITEM_KEY:
8526 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8527 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8530 case BTRFS_ROOT_ITEM_KEY:
8531 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8534 case BTRFS_DEV_EXTENT_KEY:
8535 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8541 report_mismatch_key_root(key_type, rootid);
8545 static int run_next_block(struct btrfs_root *root,
8546 struct block_info *bits,
8549 struct cache_tree *pending,
8550 struct cache_tree *seen,
8551 struct cache_tree *reada,
8552 struct cache_tree *nodes,
8553 struct cache_tree *extent_cache,
8554 struct cache_tree *chunk_cache,
8555 struct rb_root *dev_cache,
8556 struct block_group_tree *block_group_cache,
8557 struct device_extent_tree *dev_extent_cache,
8558 struct root_item_record *ri)
8560 struct btrfs_fs_info *fs_info = root->fs_info;
8561 struct extent_buffer *buf;
8562 struct extent_record *rec = NULL;
8573 struct btrfs_key key;
8574 struct cache_extent *cache;
8577 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8578 bits_nr, &reada_bits);
8583 for(i = 0; i < nritems; i++) {
8584 ret = add_cache_extent(reada, bits[i].start,
8589 /* fixme, get the parent transid */
8590 readahead_tree_block(fs_info, bits[i].start, 0);
8593 *last = bits[0].start;
8594 bytenr = bits[0].start;
8595 size = bits[0].size;
8597 cache = lookup_cache_extent(pending, bytenr, size);
8599 remove_cache_extent(pending, cache);
8602 cache = lookup_cache_extent(reada, bytenr, size);
8604 remove_cache_extent(reada, cache);
8607 cache = lookup_cache_extent(nodes, bytenr, size);
8609 remove_cache_extent(nodes, cache);
8612 cache = lookup_cache_extent(extent_cache, bytenr, size);
8614 rec = container_of(cache, struct extent_record, cache);
8615 gen = rec->parent_generation;
8618 /* fixme, get the real parent transid */
8619 buf = read_tree_block(root->fs_info, bytenr, gen);
8620 if (!extent_buffer_uptodate(buf)) {
8621 record_bad_block_io(root->fs_info,
8622 extent_cache, bytenr, size);
8626 nritems = btrfs_header_nritems(buf);
8629 if (!init_extent_tree) {
8630 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8631 btrfs_header_level(buf), 1, NULL,
8634 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8636 fprintf(stderr, "Couldn't calc extent flags\n");
8637 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8642 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8644 fprintf(stderr, "Couldn't calc extent flags\n");
8645 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8649 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8651 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8652 ri->objectid == btrfs_header_owner(buf)) {
8654 * Ok we got to this block from it's original owner and
8655 * we have FULL_BACKREF set. Relocation can leave
8656 * converted blocks over so this is altogether possible,
8657 * however it's not possible if the generation > the
8658 * last snapshot, so check for this case.
8660 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8661 btrfs_header_generation(buf) > ri->last_snapshot) {
8662 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8663 rec->bad_full_backref = 1;
8668 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8669 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8670 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8671 rec->bad_full_backref = 1;
8675 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8676 rec->flag_block_full_backref = 1;
8680 rec->flag_block_full_backref = 0;
8682 owner = btrfs_header_owner(buf);
8685 ret = check_block(root, extent_cache, buf, flags);
8689 if (btrfs_is_leaf(buf)) {
8690 btree_space_waste += btrfs_leaf_free_space(root, buf);
8691 for (i = 0; i < nritems; i++) {
8692 struct btrfs_file_extent_item *fi;
8693 btrfs_item_key_to_cpu(buf, &key, i);
8695 * Check key type against the leaf owner.
8696 * Could filter quite a lot of early error if
8699 if (check_type_with_root(btrfs_header_owner(buf),
8701 fprintf(stderr, "ignoring invalid key\n");
8704 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8705 process_extent_item(root, extent_cache, buf,
8709 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8710 process_extent_item(root, extent_cache, buf,
8714 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8716 btrfs_item_size_nr(buf, i);
8719 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8720 process_chunk_item(chunk_cache, &key, buf, i);
8723 if (key.type == BTRFS_DEV_ITEM_KEY) {
8724 process_device_item(dev_cache, &key, buf, i);
8727 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8728 process_block_group_item(block_group_cache,
8732 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8733 process_device_extent_item(dev_extent_cache,
8738 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8739 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8740 process_extent_ref_v0(extent_cache, buf, i);
8747 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8748 ret = add_tree_backref(extent_cache,
8749 key.objectid, 0, key.offset, 0);
8752 "add_tree_backref failed (leaf tree block): %s",
8756 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8757 ret = add_tree_backref(extent_cache,
8758 key.objectid, key.offset, 0, 0);
8761 "add_tree_backref failed (leaf shared block): %s",
8765 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8766 struct btrfs_extent_data_ref *ref;
8767 ref = btrfs_item_ptr(buf, i,
8768 struct btrfs_extent_data_ref);
8769 add_data_backref(extent_cache,
8771 btrfs_extent_data_ref_root(buf, ref),
8772 btrfs_extent_data_ref_objectid(buf,
8774 btrfs_extent_data_ref_offset(buf, ref),
8775 btrfs_extent_data_ref_count(buf, ref),
8776 0, root->fs_info->sectorsize);
8779 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8780 struct btrfs_shared_data_ref *ref;
8781 ref = btrfs_item_ptr(buf, i,
8782 struct btrfs_shared_data_ref);
8783 add_data_backref(extent_cache,
8784 key.objectid, key.offset, 0, 0, 0,
8785 btrfs_shared_data_ref_count(buf, ref),
8786 0, root->fs_info->sectorsize);
8789 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8790 struct bad_item *bad;
8792 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8796 bad = malloc(sizeof(struct bad_item));
8799 INIT_LIST_HEAD(&bad->list);
8800 memcpy(&bad->key, &key,
8801 sizeof(struct btrfs_key));
8802 bad->root_id = owner;
8803 list_add_tail(&bad->list, &delete_items);
8806 if (key.type != BTRFS_EXTENT_DATA_KEY)
8808 fi = btrfs_item_ptr(buf, i,
8809 struct btrfs_file_extent_item);
8810 if (btrfs_file_extent_type(buf, fi) ==
8811 BTRFS_FILE_EXTENT_INLINE)
8813 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8816 data_bytes_allocated +=
8817 btrfs_file_extent_disk_num_bytes(buf, fi);
8818 if (data_bytes_allocated < root->fs_info->sectorsize) {
8821 data_bytes_referenced +=
8822 btrfs_file_extent_num_bytes(buf, fi);
8823 add_data_backref(extent_cache,
8824 btrfs_file_extent_disk_bytenr(buf, fi),
8825 parent, owner, key.objectid, key.offset -
8826 btrfs_file_extent_offset(buf, fi), 1, 1,
8827 btrfs_file_extent_disk_num_bytes(buf, fi));
8831 struct btrfs_key first_key;
8833 first_key.objectid = 0;
8836 btrfs_item_key_to_cpu(buf, &first_key, 0);
8837 level = btrfs_header_level(buf);
8838 for (i = 0; i < nritems; i++) {
8839 struct extent_record tmpl;
8841 ptr = btrfs_node_blockptr(buf, i);
8842 size = root->fs_info->nodesize;
8843 btrfs_node_key_to_cpu(buf, &key, i);
8845 if ((level == ri->drop_level)
8846 && is_dropped_key(&key, &ri->drop_key)) {
8851 memset(&tmpl, 0, sizeof(tmpl));
8852 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8853 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8858 tmpl.max_size = size;
8859 ret = add_extent_rec(extent_cache, &tmpl);
8863 ret = add_tree_backref(extent_cache, ptr, parent,
8867 "add_tree_backref failed (non-leaf block): %s",
8873 add_pending(nodes, seen, ptr, size);
8875 add_pending(pending, seen, ptr, size);
8878 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8879 nritems) * sizeof(struct btrfs_key_ptr);
8881 total_btree_bytes += buf->len;
8882 if (fs_root_objectid(btrfs_header_owner(buf)))
8883 total_fs_tree_bytes += buf->len;
8884 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8885 total_extent_tree_bytes += buf->len;
8887 free_extent_buffer(buf);
8891 static int add_root_to_pending(struct extent_buffer *buf,
8892 struct cache_tree *extent_cache,
8893 struct cache_tree *pending,
8894 struct cache_tree *seen,
8895 struct cache_tree *nodes,
8898 struct extent_record tmpl;
8901 if (btrfs_header_level(buf) > 0)
8902 add_pending(nodes, seen, buf->start, buf->len);
8904 add_pending(pending, seen, buf->start, buf->len);
8906 memset(&tmpl, 0, sizeof(tmpl));
8907 tmpl.start = buf->start;
8912 tmpl.max_size = buf->len;
8913 add_extent_rec(extent_cache, &tmpl);
8915 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8916 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8917 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8920 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8925 /* as we fix the tree, we might be deleting blocks that
8926 * we're tracking for repair. This hook makes sure we
8927 * remove any backrefs for blocks as we are fixing them.
8929 static int free_extent_hook(struct btrfs_trans_handle *trans,
8930 struct btrfs_root *root,
8931 u64 bytenr, u64 num_bytes, u64 parent,
8932 u64 root_objectid, u64 owner, u64 offset,
8935 struct extent_record *rec;
8936 struct cache_extent *cache;
8938 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8940 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8941 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8945 rec = container_of(cache, struct extent_record, cache);
8947 struct data_backref *back;
8948 back = find_data_backref(rec, parent, root_objectid, owner,
8949 offset, 1, bytenr, num_bytes);
8952 if (back->node.found_ref) {
8953 back->found_ref -= refs_to_drop;
8955 rec->refs -= refs_to_drop;
8957 if (back->node.found_extent_tree) {
8958 back->num_refs -= refs_to_drop;
8959 if (rec->extent_item_refs)
8960 rec->extent_item_refs -= refs_to_drop;
8962 if (back->found_ref == 0)
8963 back->node.found_ref = 0;
8964 if (back->num_refs == 0)
8965 back->node.found_extent_tree = 0;
8967 if (!back->node.found_extent_tree && back->node.found_ref) {
8968 rb_erase(&back->node.node, &rec->backref_tree);
8972 struct tree_backref *back;
8973 back = find_tree_backref(rec, parent, root_objectid);
8976 if (back->node.found_ref) {
8979 back->node.found_ref = 0;
8981 if (back->node.found_extent_tree) {
8982 if (rec->extent_item_refs)
8983 rec->extent_item_refs--;
8984 back->node.found_extent_tree = 0;
8986 if (!back->node.found_extent_tree && back->node.found_ref) {
8987 rb_erase(&back->node.node, &rec->backref_tree);
8991 maybe_free_extent_rec(extent_cache, rec);
8996 static int delete_extent_records(struct btrfs_trans_handle *trans,
8997 struct btrfs_root *root,
8998 struct btrfs_path *path,
9001 struct btrfs_key key;
9002 struct btrfs_key found_key;
9003 struct extent_buffer *leaf;
9008 key.objectid = bytenr;
9010 key.offset = (u64)-1;
9013 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9020 if (path->slots[0] == 0)
9026 leaf = path->nodes[0];
9027 slot = path->slots[0];
9029 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9030 if (found_key.objectid != bytenr)
9033 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9034 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9035 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9036 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9037 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9038 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9039 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9040 btrfs_release_path(path);
9041 if (found_key.type == 0) {
9042 if (found_key.offset == 0)
9044 key.offset = found_key.offset - 1;
9045 key.type = found_key.type;
9047 key.type = found_key.type - 1;
9048 key.offset = (u64)-1;
9052 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9053 found_key.objectid, found_key.type, found_key.offset);
9055 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9058 btrfs_release_path(path);
9060 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9061 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9062 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9063 found_key.offset : root->fs_info->nodesize;
9065 ret = btrfs_update_block_group(trans, root, bytenr,
9072 btrfs_release_path(path);
9077 * for a single backref, this will allocate a new extent
9078 * and add the backref to it.
9080 static int record_extent(struct btrfs_trans_handle *trans,
9081 struct btrfs_fs_info *info,
9082 struct btrfs_path *path,
9083 struct extent_record *rec,
9084 struct extent_backref *back,
9085 int allocated, u64 flags)
9088 struct btrfs_root *extent_root = info->extent_root;
9089 struct extent_buffer *leaf;
9090 struct btrfs_key ins_key;
9091 struct btrfs_extent_item *ei;
9092 struct data_backref *dback;
9093 struct btrfs_tree_block_info *bi;
9096 rec->max_size = max_t(u64, rec->max_size,
9100 u32 item_size = sizeof(*ei);
9103 item_size += sizeof(*bi);
9105 ins_key.objectid = rec->start;
9106 ins_key.offset = rec->max_size;
9107 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9109 ret = btrfs_insert_empty_item(trans, extent_root, path,
9110 &ins_key, item_size);
9114 leaf = path->nodes[0];
9115 ei = btrfs_item_ptr(leaf, path->slots[0],
9116 struct btrfs_extent_item);
9118 btrfs_set_extent_refs(leaf, ei, 0);
9119 btrfs_set_extent_generation(leaf, ei, rec->generation);
9121 if (back->is_data) {
9122 btrfs_set_extent_flags(leaf, ei,
9123 BTRFS_EXTENT_FLAG_DATA);
9125 struct btrfs_disk_key copy_key;;
9127 bi = (struct btrfs_tree_block_info *)(ei + 1);
9128 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9131 btrfs_set_disk_key_objectid(©_key,
9132 rec->info_objectid);
9133 btrfs_set_disk_key_type(©_key, 0);
9134 btrfs_set_disk_key_offset(©_key, 0);
9136 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9137 btrfs_set_tree_block_key(leaf, bi, ©_key);
9139 btrfs_set_extent_flags(leaf, ei,
9140 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9143 btrfs_mark_buffer_dirty(leaf);
9144 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9145 rec->max_size, 1, 0);
9148 btrfs_release_path(path);
9151 if (back->is_data) {
9155 dback = to_data_backref(back);
9156 if (back->full_backref)
9157 parent = dback->parent;
9161 for (i = 0; i < dback->found_ref; i++) {
9162 /* if parent != 0, we're doing a full backref
9163 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9164 * just makes the backref allocator create a data
9167 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9168 rec->start, rec->max_size,
9172 BTRFS_FIRST_FREE_OBJECTID :
9178 fprintf(stderr, "adding new data backref"
9179 " on %llu %s %llu owner %llu"
9180 " offset %llu found %d\n",
9181 (unsigned long long)rec->start,
9182 back->full_backref ?
9184 back->full_backref ?
9185 (unsigned long long)parent :
9186 (unsigned long long)dback->root,
9187 (unsigned long long)dback->owner,
9188 (unsigned long long)dback->offset,
9192 struct tree_backref *tback;
9194 tback = to_tree_backref(back);
9195 if (back->full_backref)
9196 parent = tback->parent;
9200 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9201 rec->start, rec->max_size,
9202 parent, tback->root, 0, 0);
9203 fprintf(stderr, "adding new tree backref on "
9204 "start %llu len %llu parent %llu root %llu\n",
9205 rec->start, rec->max_size, parent, tback->root);
9208 btrfs_release_path(path);
9212 static struct extent_entry *find_entry(struct list_head *entries,
9213 u64 bytenr, u64 bytes)
9215 struct extent_entry *entry = NULL;
9217 list_for_each_entry(entry, entries, list) {
9218 if (entry->bytenr == bytenr && entry->bytes == bytes)
9225 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9227 struct extent_entry *entry, *best = NULL, *prev = NULL;
9229 list_for_each_entry(entry, entries, list) {
9231 * If there are as many broken entries as entries then we know
9232 * not to trust this particular entry.
9234 if (entry->broken == entry->count)
9238 * Special case, when there are only two entries and 'best' is
9248 * If our current entry == best then we can't be sure our best
9249 * is really the best, so we need to keep searching.
9251 if (best && best->count == entry->count) {
9257 /* Prev == entry, not good enough, have to keep searching */
9258 if (!prev->broken && prev->count == entry->count)
9262 best = (prev->count > entry->count) ? prev : entry;
9263 else if (best->count < entry->count)
9271 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9272 struct data_backref *dback, struct extent_entry *entry)
9274 struct btrfs_trans_handle *trans;
9275 struct btrfs_root *root;
9276 struct btrfs_file_extent_item *fi;
9277 struct extent_buffer *leaf;
9278 struct btrfs_key key;
9282 key.objectid = dback->root;
9283 key.type = BTRFS_ROOT_ITEM_KEY;
9284 key.offset = (u64)-1;
9285 root = btrfs_read_fs_root(info, &key);
9287 fprintf(stderr, "Couldn't find root for our ref\n");
9292 * The backref points to the original offset of the extent if it was
9293 * split, so we need to search down to the offset we have and then walk
9294 * forward until we find the backref we're looking for.
9296 key.objectid = dback->owner;
9297 key.type = BTRFS_EXTENT_DATA_KEY;
9298 key.offset = dback->offset;
9299 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9301 fprintf(stderr, "Error looking up ref %d\n", ret);
9306 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9307 ret = btrfs_next_leaf(root, path);
9309 fprintf(stderr, "Couldn't find our ref, next\n");
9313 leaf = path->nodes[0];
9314 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9315 if (key.objectid != dback->owner ||
9316 key.type != BTRFS_EXTENT_DATA_KEY) {
9317 fprintf(stderr, "Couldn't find our ref, search\n");
9320 fi = btrfs_item_ptr(leaf, path->slots[0],
9321 struct btrfs_file_extent_item);
9322 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9323 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9325 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9330 btrfs_release_path(path);
9332 trans = btrfs_start_transaction(root, 1);
9334 return PTR_ERR(trans);
9337 * Ok we have the key of the file extent we want to fix, now we can cow
9338 * down to the thing and fix it.
9340 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9342 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9343 key.objectid, key.type, key.offset, ret);
9347 fprintf(stderr, "Well that's odd, we just found this key "
9348 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9353 leaf = path->nodes[0];
9354 fi = btrfs_item_ptr(leaf, path->slots[0],
9355 struct btrfs_file_extent_item);
9357 if (btrfs_file_extent_compression(leaf, fi) &&
9358 dback->disk_bytenr != entry->bytenr) {
9359 fprintf(stderr, "Ref doesn't match the record start and is "
9360 "compressed, please take a btrfs-image of this file "
9361 "system and send it to a btrfs developer so they can "
9362 "complete this functionality for bytenr %Lu\n",
9363 dback->disk_bytenr);
9368 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9369 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9370 } else if (dback->disk_bytenr > entry->bytenr) {
9371 u64 off_diff, offset;
9373 off_diff = dback->disk_bytenr - entry->bytenr;
9374 offset = btrfs_file_extent_offset(leaf, fi);
9375 if (dback->disk_bytenr + offset +
9376 btrfs_file_extent_num_bytes(leaf, fi) >
9377 entry->bytenr + entry->bytes) {
9378 fprintf(stderr, "Ref is past the entry end, please "
9379 "take a btrfs-image of this file system and "
9380 "send it to a btrfs developer, ref %Lu\n",
9381 dback->disk_bytenr);
9386 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9387 btrfs_set_file_extent_offset(leaf, fi, offset);
9388 } else if (dback->disk_bytenr < entry->bytenr) {
9391 offset = btrfs_file_extent_offset(leaf, fi);
9392 if (dback->disk_bytenr + offset < entry->bytenr) {
9393 fprintf(stderr, "Ref is before the entry start, please"
9394 " take a btrfs-image of this file system and "
9395 "send it to a btrfs developer, ref %Lu\n",
9396 dback->disk_bytenr);
9401 offset += dback->disk_bytenr;
9402 offset -= entry->bytenr;
9403 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9404 btrfs_set_file_extent_offset(leaf, fi, offset);
9407 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9410 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9411 * only do this if we aren't using compression, otherwise it's a
9414 if (!btrfs_file_extent_compression(leaf, fi))
9415 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9417 printf("ram bytes may be wrong?\n");
9418 btrfs_mark_buffer_dirty(leaf);
9420 err = btrfs_commit_transaction(trans, root);
9421 btrfs_release_path(path);
9422 return ret ? ret : err;
9425 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9426 struct extent_record *rec)
9428 struct extent_backref *back, *tmp;
9429 struct data_backref *dback;
9430 struct extent_entry *entry, *best = NULL;
9433 int broken_entries = 0;
9438 * Metadata is easy and the backrefs should always agree on bytenr and
9439 * size, if not we've got bigger issues.
9444 rbtree_postorder_for_each_entry_safe(back, tmp,
9445 &rec->backref_tree, node) {
9446 if (back->full_backref || !back->is_data)
9449 dback = to_data_backref(back);
9452 * We only pay attention to backrefs that we found a real
9455 if (dback->found_ref == 0)
9459 * For now we only catch when the bytes don't match, not the
9460 * bytenr. We can easily do this at the same time, but I want
9461 * to have a fs image to test on before we just add repair
9462 * functionality willy-nilly so we know we won't screw up the
9466 entry = find_entry(&entries, dback->disk_bytenr,
9469 entry = malloc(sizeof(struct extent_entry));
9474 memset(entry, 0, sizeof(*entry));
9475 entry->bytenr = dback->disk_bytenr;
9476 entry->bytes = dback->bytes;
9477 list_add_tail(&entry->list, &entries);
9482 * If we only have on entry we may think the entries agree when
9483 * in reality they don't so we have to do some extra checking.
9485 if (dback->disk_bytenr != rec->start ||
9486 dback->bytes != rec->nr || back->broken)
9497 /* Yay all the backrefs agree, carry on good sir */
9498 if (nr_entries <= 1 && !mismatch)
9501 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9502 "%Lu\n", rec->start);
9505 * First we want to see if the backrefs can agree amongst themselves who
9506 * is right, so figure out which one of the entries has the highest
9509 best = find_most_right_entry(&entries);
9512 * Ok so we may have an even split between what the backrefs think, so
9513 * this is where we use the extent ref to see what it thinks.
9516 entry = find_entry(&entries, rec->start, rec->nr);
9517 if (!entry && (!broken_entries || !rec->found_rec)) {
9518 fprintf(stderr, "Backrefs don't agree with each other "
9519 "and extent record doesn't agree with anybody,"
9520 " so we can't fix bytenr %Lu bytes %Lu\n",
9521 rec->start, rec->nr);
9524 } else if (!entry) {
9526 * Ok our backrefs were broken, we'll assume this is the
9527 * correct value and add an entry for this range.
9529 entry = malloc(sizeof(struct extent_entry));
9534 memset(entry, 0, sizeof(*entry));
9535 entry->bytenr = rec->start;
9536 entry->bytes = rec->nr;
9537 list_add_tail(&entry->list, &entries);
9541 best = find_most_right_entry(&entries);
9543 fprintf(stderr, "Backrefs and extent record evenly "
9544 "split on who is right, this is going to "
9545 "require user input to fix bytenr %Lu bytes "
9546 "%Lu\n", rec->start, rec->nr);
9553 * I don't think this can happen currently as we'll abort() if we catch
9554 * this case higher up, but in case somebody removes that we still can't
9555 * deal with it properly here yet, so just bail out of that's the case.
9557 if (best->bytenr != rec->start) {
9558 fprintf(stderr, "Extent start and backref starts don't match, "
9559 "please use btrfs-image on this file system and send "
9560 "it to a btrfs developer so they can make fsck fix "
9561 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9562 rec->start, rec->nr);
9568 * Ok great we all agreed on an extent record, let's go find the real
9569 * references and fix up the ones that don't match.
9571 rbtree_postorder_for_each_entry_safe(back, tmp,
9572 &rec->backref_tree, node) {
9573 if (back->full_backref || !back->is_data)
9576 dback = to_data_backref(back);
9579 * Still ignoring backrefs that don't have a real ref attached
9582 if (dback->found_ref == 0)
9585 if (dback->bytes == best->bytes &&
9586 dback->disk_bytenr == best->bytenr)
9589 ret = repair_ref(info, path, dback, best);
9595 * Ok we messed with the actual refs, which means we need to drop our
9596 * entire cache and go back and rescan. I know this is a huge pain and
9597 * adds a lot of extra work, but it's the only way to be safe. Once all
9598 * the backrefs agree we may not need to do anything to the extent
9603 while (!list_empty(&entries)) {
9604 entry = list_entry(entries.next, struct extent_entry, list);
9605 list_del_init(&entry->list);
9611 static int process_duplicates(struct cache_tree *extent_cache,
9612 struct extent_record *rec)
9614 struct extent_record *good, *tmp;
9615 struct cache_extent *cache;
9619 * If we found a extent record for this extent then return, or if we
9620 * have more than one duplicate we are likely going to need to delete
9623 if (rec->found_rec || rec->num_duplicates > 1)
9626 /* Shouldn't happen but just in case */
9627 BUG_ON(!rec->num_duplicates);
9630 * So this happens if we end up with a backref that doesn't match the
9631 * actual extent entry. So either the backref is bad or the extent
9632 * entry is bad. Either way we want to have the extent_record actually
9633 * reflect what we found in the extent_tree, so we need to take the
9634 * duplicate out and use that as the extent_record since the only way we
9635 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9637 remove_cache_extent(extent_cache, &rec->cache);
9639 good = to_extent_record(rec->dups.next);
9640 list_del_init(&good->list);
9641 INIT_LIST_HEAD(&good->backrefs);
9642 INIT_LIST_HEAD(&good->dups);
9643 good->cache.start = good->start;
9644 good->cache.size = good->nr;
9645 good->content_checked = 0;
9646 good->owner_ref_checked = 0;
9647 good->num_duplicates = 0;
9648 good->refs = rec->refs;
9649 list_splice_init(&rec->backrefs, &good->backrefs);
9651 cache = lookup_cache_extent(extent_cache, good->start,
9655 tmp = container_of(cache, struct extent_record, cache);
9658 * If we find another overlapping extent and it's found_rec is
9659 * set then it's a duplicate and we need to try and delete
9662 if (tmp->found_rec || tmp->num_duplicates > 0) {
9663 if (list_empty(&good->list))
9664 list_add_tail(&good->list,
9665 &duplicate_extents);
9666 good->num_duplicates += tmp->num_duplicates + 1;
9667 list_splice_init(&tmp->dups, &good->dups);
9668 list_del_init(&tmp->list);
9669 list_add_tail(&tmp->list, &good->dups);
9670 remove_cache_extent(extent_cache, &tmp->cache);
9675 * Ok we have another non extent item backed extent rec, so lets
9676 * just add it to this extent and carry on like we did above.
9678 good->refs += tmp->refs;
9679 list_splice_init(&tmp->backrefs, &good->backrefs);
9680 remove_cache_extent(extent_cache, &tmp->cache);
9683 ret = insert_cache_extent(extent_cache, &good->cache);
9686 return good->num_duplicates ? 0 : 1;
9689 static int delete_duplicate_records(struct btrfs_root *root,
9690 struct extent_record *rec)
9692 struct btrfs_trans_handle *trans;
9693 LIST_HEAD(delete_list);
9694 struct btrfs_path path;
9695 struct extent_record *tmp, *good, *n;
9698 struct btrfs_key key;
9700 btrfs_init_path(&path);
9703 /* Find the record that covers all of the duplicates. */
9704 list_for_each_entry(tmp, &rec->dups, list) {
9705 if (good->start < tmp->start)
9707 if (good->nr > tmp->nr)
9710 if (tmp->start + tmp->nr < good->start + good->nr) {
9711 fprintf(stderr, "Ok we have overlapping extents that "
9712 "aren't completely covered by each other, this "
9713 "is going to require more careful thought. "
9714 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9715 tmp->start, tmp->nr, good->start, good->nr);
9722 list_add_tail(&rec->list, &delete_list);
9724 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9727 list_move_tail(&tmp->list, &delete_list);
9730 root = root->fs_info->extent_root;
9731 trans = btrfs_start_transaction(root, 1);
9732 if (IS_ERR(trans)) {
9733 ret = PTR_ERR(trans);
9737 list_for_each_entry(tmp, &delete_list, list) {
9738 if (tmp->found_rec == 0)
9740 key.objectid = tmp->start;
9741 key.type = BTRFS_EXTENT_ITEM_KEY;
9742 key.offset = tmp->nr;
9744 /* Shouldn't happen but just in case */
9745 if (tmp->metadata) {
9746 fprintf(stderr, "Well this shouldn't happen, extent "
9747 "record overlaps but is metadata? "
9748 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9752 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9758 ret = btrfs_del_item(trans, root, &path);
9761 btrfs_release_path(&path);
9764 err = btrfs_commit_transaction(trans, root);
9768 while (!list_empty(&delete_list)) {
9769 tmp = to_extent_record(delete_list.next);
9770 list_del_init(&tmp->list);
9776 while (!list_empty(&rec->dups)) {
9777 tmp = to_extent_record(rec->dups.next);
9778 list_del_init(&tmp->list);
9782 btrfs_release_path(&path);
9784 if (!ret && !nr_del)
9785 rec->num_duplicates = 0;
9787 return ret ? ret : nr_del;
9790 static int find_possible_backrefs(struct btrfs_fs_info *info,
9791 struct btrfs_path *path,
9792 struct cache_tree *extent_cache,
9793 struct extent_record *rec)
9795 struct btrfs_root *root;
9796 struct extent_backref *back, *tmp;
9797 struct data_backref *dback;
9798 struct cache_extent *cache;
9799 struct btrfs_file_extent_item *fi;
9800 struct btrfs_key key;
9804 rbtree_postorder_for_each_entry_safe(back, tmp,
9805 &rec->backref_tree, node) {
9806 /* Don't care about full backrefs (poor unloved backrefs) */
9807 if (back->full_backref || !back->is_data)
9810 dback = to_data_backref(back);
9812 /* We found this one, we don't need to do a lookup */
9813 if (dback->found_ref)
9816 key.objectid = dback->root;
9817 key.type = BTRFS_ROOT_ITEM_KEY;
9818 key.offset = (u64)-1;
9820 root = btrfs_read_fs_root(info, &key);
9822 /* No root, definitely a bad ref, skip */
9823 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9825 /* Other err, exit */
9827 return PTR_ERR(root);
9829 key.objectid = dback->owner;
9830 key.type = BTRFS_EXTENT_DATA_KEY;
9831 key.offset = dback->offset;
9832 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9834 btrfs_release_path(path);
9837 /* Didn't find it, we can carry on */
9842 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9843 struct btrfs_file_extent_item);
9844 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9845 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9846 btrfs_release_path(path);
9847 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9849 struct extent_record *tmp;
9850 tmp = container_of(cache, struct extent_record, cache);
9853 * If we found an extent record for the bytenr for this
9854 * particular backref then we can't add it to our
9855 * current extent record. We only want to add backrefs
9856 * that don't have a corresponding extent item in the
9857 * extent tree since they likely belong to this record
9858 * and we need to fix it if it doesn't match bytenrs.
9864 dback->found_ref += 1;
9865 dback->disk_bytenr = bytenr;
9866 dback->bytes = bytes;
9869 * Set this so the verify backref code knows not to trust the
9870 * values in this backref.
9879 * Record orphan data ref into corresponding root.
9881 * Return 0 if the extent item contains data ref and recorded.
9882 * Return 1 if the extent item contains no useful data ref
9883 * On that case, it may contains only shared_dataref or metadata backref
9884 * or the file extent exists(this should be handled by the extent bytenr
9886 * Return <0 if something goes wrong.
9888 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9889 struct extent_record *rec)
9891 struct btrfs_key key;
9892 struct btrfs_root *dest_root;
9893 struct extent_backref *back, *tmp;
9894 struct data_backref *dback;
9895 struct orphan_data_extent *orphan;
9896 struct btrfs_path path;
9897 int recorded_data_ref = 0;
9902 btrfs_init_path(&path);
9903 rbtree_postorder_for_each_entry_safe(back, tmp,
9904 &rec->backref_tree, node) {
9905 if (back->full_backref || !back->is_data ||
9906 !back->found_extent_tree)
9908 dback = to_data_backref(back);
9909 if (dback->found_ref)
9911 key.objectid = dback->root;
9912 key.type = BTRFS_ROOT_ITEM_KEY;
9913 key.offset = (u64)-1;
9915 dest_root = btrfs_read_fs_root(fs_info, &key);
9917 /* For non-exist root we just skip it */
9918 if (IS_ERR(dest_root) || !dest_root)
9921 key.objectid = dback->owner;
9922 key.type = BTRFS_EXTENT_DATA_KEY;
9923 key.offset = dback->offset;
9925 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9926 btrfs_release_path(&path);
9928 * For ret < 0, it's OK since the fs-tree may be corrupted,
9929 * we need to record it for inode/file extent rebuild.
9930 * For ret > 0, we record it only for file extent rebuild.
9931 * For ret == 0, the file extent exists but only bytenr
9932 * mismatch, let the original bytenr fix routine to handle,
9938 orphan = malloc(sizeof(*orphan));
9943 INIT_LIST_HEAD(&orphan->list);
9944 orphan->root = dback->root;
9945 orphan->objectid = dback->owner;
9946 orphan->offset = dback->offset;
9947 orphan->disk_bytenr = rec->cache.start;
9948 orphan->disk_len = rec->cache.size;
9949 list_add(&dest_root->orphan_data_extents, &orphan->list);
9950 recorded_data_ref = 1;
9953 btrfs_release_path(&path);
9955 return !recorded_data_ref;
9961 * when an incorrect extent item is found, this will delete
9962 * all of the existing entries for it and recreate them
9963 * based on what the tree scan found.
9965 static int fixup_extent_refs(struct btrfs_fs_info *info,
9966 struct cache_tree *extent_cache,
9967 struct extent_record *rec)
9969 struct btrfs_trans_handle *trans = NULL;
9971 struct btrfs_path path;
9972 struct cache_extent *cache;
9973 struct extent_backref *back, *tmp;
9977 if (rec->flag_block_full_backref)
9978 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9980 btrfs_init_path(&path);
9981 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9983 * Sometimes the backrefs themselves are so broken they don't
9984 * get attached to any meaningful rec, so first go back and
9985 * check any of our backrefs that we couldn't find and throw
9986 * them into the list if we find the backref so that
9987 * verify_backrefs can figure out what to do.
9989 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9994 /* step one, make sure all of the backrefs agree */
9995 ret = verify_backrefs(info, &path, rec);
9999 trans = btrfs_start_transaction(info->extent_root, 1);
10000 if (IS_ERR(trans)) {
10001 ret = PTR_ERR(trans);
10005 /* step two, delete all the existing records */
10006 ret = delete_extent_records(trans, info->extent_root, &path,
10012 /* was this block corrupt? If so, don't add references to it */
10013 cache = lookup_cache_extent(info->corrupt_blocks,
10014 rec->start, rec->max_size);
10020 /* step three, recreate all the refs we did find */
10021 rbtree_postorder_for_each_entry_safe(back, tmp,
10022 &rec->backref_tree, node) {
10024 * if we didn't find any references, don't create a
10025 * new extent record
10027 if (!back->found_ref)
10030 rec->bad_full_backref = 0;
10031 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10039 int err = btrfs_commit_transaction(trans, info->extent_root);
10045 fprintf(stderr, "Repaired extent references for %llu\n",
10046 (unsigned long long)rec->start);
10048 btrfs_release_path(&path);
10052 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10053 struct extent_record *rec)
10055 struct btrfs_trans_handle *trans;
10056 struct btrfs_root *root = fs_info->extent_root;
10057 struct btrfs_path path;
10058 struct btrfs_extent_item *ei;
10059 struct btrfs_key key;
10063 key.objectid = rec->start;
10064 if (rec->metadata) {
10065 key.type = BTRFS_METADATA_ITEM_KEY;
10066 key.offset = rec->info_level;
10068 key.type = BTRFS_EXTENT_ITEM_KEY;
10069 key.offset = rec->max_size;
10072 trans = btrfs_start_transaction(root, 0);
10074 return PTR_ERR(trans);
10076 btrfs_init_path(&path);
10077 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10079 btrfs_release_path(&path);
10080 btrfs_commit_transaction(trans, root);
10083 fprintf(stderr, "Didn't find extent for %llu\n",
10084 (unsigned long long)rec->start);
10085 btrfs_release_path(&path);
10086 btrfs_commit_transaction(trans, root);
10090 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10091 struct btrfs_extent_item);
10092 flags = btrfs_extent_flags(path.nodes[0], ei);
10093 if (rec->flag_block_full_backref) {
10094 fprintf(stderr, "setting full backref on %llu\n",
10095 (unsigned long long)key.objectid);
10096 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10098 fprintf(stderr, "clearing full backref on %llu\n",
10099 (unsigned long long)key.objectid);
10100 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10102 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10103 btrfs_mark_buffer_dirty(path.nodes[0]);
10104 btrfs_release_path(&path);
10105 ret = btrfs_commit_transaction(trans, root);
10107 fprintf(stderr, "Repaired extent flags for %llu\n",
10108 (unsigned long long)rec->start);
10113 /* right now we only prune from the extent allocation tree */
10114 static int prune_one_block(struct btrfs_trans_handle *trans,
10115 struct btrfs_fs_info *info,
10116 struct btrfs_corrupt_block *corrupt)
10119 struct btrfs_path path;
10120 struct extent_buffer *eb;
10124 int level = corrupt->level + 1;
10126 btrfs_init_path(&path);
10128 /* we want to stop at the parent to our busted block */
10129 path.lowest_level = level;
10131 ret = btrfs_search_slot(trans, info->extent_root,
10132 &corrupt->key, &path, -1, 1);
10137 eb = path.nodes[level];
10144 * hopefully the search gave us the block we want to prune,
10145 * lets try that first
10147 slot = path.slots[level];
10148 found = btrfs_node_blockptr(eb, slot);
10149 if (found == corrupt->cache.start)
10152 nritems = btrfs_header_nritems(eb);
10154 /* the search failed, lets scan this node and hope we find it */
10155 for (slot = 0; slot < nritems; slot++) {
10156 found = btrfs_node_blockptr(eb, slot);
10157 if (found == corrupt->cache.start)
10161 * we couldn't find the bad block. TODO, search all the nodes for pointers
10164 if (eb == info->extent_root->node) {
10169 btrfs_release_path(&path);
10174 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10175 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10178 btrfs_release_path(&path);
10182 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10184 struct btrfs_trans_handle *trans = NULL;
10185 struct cache_extent *cache;
10186 struct btrfs_corrupt_block *corrupt;
10189 cache = search_cache_extent(info->corrupt_blocks, 0);
10193 trans = btrfs_start_transaction(info->extent_root, 1);
10195 return PTR_ERR(trans);
10197 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10198 prune_one_block(trans, info, corrupt);
10199 remove_cache_extent(info->corrupt_blocks, cache);
10202 return btrfs_commit_transaction(trans, info->extent_root);
10206 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10208 struct btrfs_block_group_cache *cache;
10213 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10214 &start, &end, EXTENT_DIRTY);
10217 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10222 cache = btrfs_lookup_first_block_group(fs_info, start);
10227 start = cache->key.objectid + cache->key.offset;
10231 static int check_extent_refs(struct btrfs_root *root,
10232 struct cache_tree *extent_cache)
10234 struct extent_record *rec;
10235 struct cache_extent *cache;
10241 * if we're doing a repair, we have to make sure
10242 * we don't allocate from the problem extents.
10243 * In the worst case, this will be all the
10244 * extents in the FS
10246 cache = search_cache_extent(extent_cache, 0);
10248 rec = container_of(cache, struct extent_record, cache);
10249 set_extent_dirty(root->fs_info->excluded_extents,
10251 rec->start + rec->max_size - 1);
10252 cache = next_cache_extent(cache);
10255 /* pin down all the corrupted blocks too */
10256 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10258 set_extent_dirty(root->fs_info->excluded_extents,
10260 cache->start + cache->size - 1);
10261 cache = next_cache_extent(cache);
10263 prune_corrupt_blocks(root->fs_info);
10264 reset_cached_block_groups(root->fs_info);
10267 reset_cached_block_groups(root->fs_info);
10270 * We need to delete any duplicate entries we find first otherwise we
10271 * could mess up the extent tree when we have backrefs that actually
10272 * belong to a different extent item and not the weird duplicate one.
10274 while (repair && !list_empty(&duplicate_extents)) {
10275 rec = to_extent_record(duplicate_extents.next);
10276 list_del_init(&rec->list);
10278 /* Sometimes we can find a backref before we find an actual
10279 * extent, so we need to process it a little bit to see if there
10280 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10281 * if this is a backref screwup. If we need to delete stuff
10282 * process_duplicates() will return 0, otherwise it will return
10285 if (process_duplicates(extent_cache, rec))
10287 ret = delete_duplicate_records(root, rec);
10291 * delete_duplicate_records will return the number of entries
10292 * deleted, so if it's greater than 0 then we know we actually
10293 * did something and we need to remove.
10306 cache = search_cache_extent(extent_cache, 0);
10309 rec = container_of(cache, struct extent_record, cache);
10310 if (rec->num_duplicates) {
10311 fprintf(stderr, "extent item %llu has multiple extent "
10312 "items\n", (unsigned long long)rec->start);
10316 if (rec->refs != rec->extent_item_refs) {
10317 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10318 (unsigned long long)rec->start,
10319 (unsigned long long)rec->nr);
10320 fprintf(stderr, "extent item %llu, found %llu\n",
10321 (unsigned long long)rec->extent_item_refs,
10322 (unsigned long long)rec->refs);
10323 ret = record_orphan_data_extents(root->fs_info, rec);
10329 if (all_backpointers_checked(rec, 1)) {
10330 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10331 (unsigned long long)rec->start,
10332 (unsigned long long)rec->nr);
10336 if (!rec->owner_ref_checked) {
10337 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10338 (unsigned long long)rec->start,
10339 (unsigned long long)rec->nr);
10344 if (repair && fix) {
10345 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10351 if (rec->bad_full_backref) {
10352 fprintf(stderr, "bad full backref, on [%llu]\n",
10353 (unsigned long long)rec->start);
10355 ret = fixup_extent_flags(root->fs_info, rec);
10363 * Although it's not a extent ref's problem, we reuse this
10364 * routine for error reporting.
10365 * No repair function yet.
10367 if (rec->crossing_stripes) {
10369 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10370 rec->start, rec->start + rec->max_size);
10374 if (rec->wrong_chunk_type) {
10376 "bad extent [%llu, %llu), type mismatch with chunk\n",
10377 rec->start, rec->start + rec->max_size);
10381 remove_cache_extent(extent_cache, cache);
10382 free_all_extent_backrefs(rec);
10383 if (!init_extent_tree && repair && (!cur_err || fix))
10384 clear_extent_dirty(root->fs_info->excluded_extents,
10386 rec->start + rec->max_size - 1);
10391 if (ret && ret != -EAGAIN) {
10392 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10395 struct btrfs_trans_handle *trans;
10397 root = root->fs_info->extent_root;
10398 trans = btrfs_start_transaction(root, 1);
10399 if (IS_ERR(trans)) {
10400 ret = PTR_ERR(trans);
10404 ret = btrfs_fix_block_accounting(trans, root);
10407 ret = btrfs_commit_transaction(trans, root);
10416 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10420 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10421 stripe_size = length;
10422 stripe_size /= num_stripes;
10423 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10424 stripe_size = length * 2;
10425 stripe_size /= num_stripes;
10426 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10427 stripe_size = length;
10428 stripe_size /= (num_stripes - 1);
10429 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10430 stripe_size = length;
10431 stripe_size /= (num_stripes - 2);
10433 stripe_size = length;
10435 return stripe_size;
10439 * Check the chunk with its block group/dev list ref:
10440 * Return 0 if all refs seems valid.
10441 * Return 1 if part of refs seems valid, need later check for rebuild ref
10442 * like missing block group and needs to search extent tree to rebuild them.
10443 * Return -1 if essential refs are missing and unable to rebuild.
10445 static int check_chunk_refs(struct chunk_record *chunk_rec,
10446 struct block_group_tree *block_group_cache,
10447 struct device_extent_tree *dev_extent_cache,
10450 struct cache_extent *block_group_item;
10451 struct block_group_record *block_group_rec;
10452 struct cache_extent *dev_extent_item;
10453 struct device_extent_record *dev_extent_rec;
10457 int metadump_v2 = 0;
10461 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10463 chunk_rec->length);
10464 if (block_group_item) {
10465 block_group_rec = container_of(block_group_item,
10466 struct block_group_record,
10468 if (chunk_rec->length != block_group_rec->offset ||
10469 chunk_rec->offset != block_group_rec->objectid ||
10471 chunk_rec->type_flags != block_group_rec->flags)) {
10474 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10475 chunk_rec->objectid,
10480 chunk_rec->type_flags,
10481 block_group_rec->objectid,
10482 block_group_rec->type,
10483 block_group_rec->offset,
10484 block_group_rec->offset,
10485 block_group_rec->objectid,
10486 block_group_rec->flags);
10489 list_del_init(&block_group_rec->list);
10490 chunk_rec->bg_rec = block_group_rec;
10495 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10496 chunk_rec->objectid,
10501 chunk_rec->type_flags);
10508 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10509 chunk_rec->num_stripes);
10510 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10511 devid = chunk_rec->stripes[i].devid;
10512 offset = chunk_rec->stripes[i].offset;
10513 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10514 devid, offset, length);
10515 if (dev_extent_item) {
10516 dev_extent_rec = container_of(dev_extent_item,
10517 struct device_extent_record,
10519 if (dev_extent_rec->objectid != devid ||
10520 dev_extent_rec->offset != offset ||
10521 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10522 dev_extent_rec->length != length) {
10525 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10526 chunk_rec->objectid,
10529 chunk_rec->stripes[i].devid,
10530 chunk_rec->stripes[i].offset,
10531 dev_extent_rec->objectid,
10532 dev_extent_rec->offset,
10533 dev_extent_rec->length);
10536 list_move(&dev_extent_rec->chunk_list,
10537 &chunk_rec->dextents);
10542 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10543 chunk_rec->objectid,
10546 chunk_rec->stripes[i].devid,
10547 chunk_rec->stripes[i].offset);
10554 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10555 int check_chunks(struct cache_tree *chunk_cache,
10556 struct block_group_tree *block_group_cache,
10557 struct device_extent_tree *dev_extent_cache,
10558 struct list_head *good, struct list_head *bad,
10559 struct list_head *rebuild, int silent)
10561 struct cache_extent *chunk_item;
10562 struct chunk_record *chunk_rec;
10563 struct block_group_record *bg_rec;
10564 struct device_extent_record *dext_rec;
10568 chunk_item = first_cache_extent(chunk_cache);
10569 while (chunk_item) {
10570 chunk_rec = container_of(chunk_item, struct chunk_record,
10572 err = check_chunk_refs(chunk_rec, block_group_cache,
10573 dev_extent_cache, silent);
10576 if (err == 0 && good)
10577 list_add_tail(&chunk_rec->list, good);
10578 if (err > 0 && rebuild)
10579 list_add_tail(&chunk_rec->list, rebuild);
10580 if (err < 0 && bad)
10581 list_add_tail(&chunk_rec->list, bad);
10582 chunk_item = next_cache_extent(chunk_item);
10585 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10588 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10596 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10600 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10601 dext_rec->objectid,
10611 static int check_device_used(struct device_record *dev_rec,
10612 struct device_extent_tree *dext_cache)
10614 struct cache_extent *cache;
10615 struct device_extent_record *dev_extent_rec;
10616 u64 total_byte = 0;
10618 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10620 dev_extent_rec = container_of(cache,
10621 struct device_extent_record,
10623 if (dev_extent_rec->objectid != dev_rec->devid)
10626 list_del_init(&dev_extent_rec->device_list);
10627 total_byte += dev_extent_rec->length;
10628 cache = next_cache_extent(cache);
10631 if (total_byte != dev_rec->byte_used) {
10633 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10634 total_byte, dev_rec->byte_used, dev_rec->objectid,
10635 dev_rec->type, dev_rec->offset);
10642 /* check btrfs_dev_item -> btrfs_dev_extent */
10643 static int check_devices(struct rb_root *dev_cache,
10644 struct device_extent_tree *dev_extent_cache)
10646 struct rb_node *dev_node;
10647 struct device_record *dev_rec;
10648 struct device_extent_record *dext_rec;
10652 dev_node = rb_first(dev_cache);
10654 dev_rec = container_of(dev_node, struct device_record, node);
10655 err = check_device_used(dev_rec, dev_extent_cache);
10659 dev_node = rb_next(dev_node);
10661 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10664 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10665 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10672 static int add_root_item_to_list(struct list_head *head,
10673 u64 objectid, u64 bytenr, u64 last_snapshot,
10674 u8 level, u8 drop_level,
10675 struct btrfs_key *drop_key)
10678 struct root_item_record *ri_rec;
10679 ri_rec = malloc(sizeof(*ri_rec));
10682 ri_rec->bytenr = bytenr;
10683 ri_rec->objectid = objectid;
10684 ri_rec->level = level;
10685 ri_rec->drop_level = drop_level;
10686 ri_rec->last_snapshot = last_snapshot;
10688 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10689 list_add_tail(&ri_rec->list, head);
10694 static void free_root_item_list(struct list_head *list)
10696 struct root_item_record *ri_rec;
10698 while (!list_empty(list)) {
10699 ri_rec = list_first_entry(list, struct root_item_record,
10701 list_del_init(&ri_rec->list);
10706 static int deal_root_from_list(struct list_head *list,
10707 struct btrfs_root *root,
10708 struct block_info *bits,
10710 struct cache_tree *pending,
10711 struct cache_tree *seen,
10712 struct cache_tree *reada,
10713 struct cache_tree *nodes,
10714 struct cache_tree *extent_cache,
10715 struct cache_tree *chunk_cache,
10716 struct rb_root *dev_cache,
10717 struct block_group_tree *block_group_cache,
10718 struct device_extent_tree *dev_extent_cache)
10723 while (!list_empty(list)) {
10724 struct root_item_record *rec;
10725 struct extent_buffer *buf;
10726 rec = list_entry(list->next,
10727 struct root_item_record, list);
10729 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10730 if (!extent_buffer_uptodate(buf)) {
10731 free_extent_buffer(buf);
10735 ret = add_root_to_pending(buf, extent_cache, pending,
10736 seen, nodes, rec->objectid);
10740 * To rebuild extent tree, we need deal with snapshot
10741 * one by one, otherwise we deal with node firstly which
10742 * can maximize readahead.
10745 ret = run_next_block(root, bits, bits_nr, &last,
10746 pending, seen, reada, nodes,
10747 extent_cache, chunk_cache,
10748 dev_cache, block_group_cache,
10749 dev_extent_cache, rec);
10753 free_extent_buffer(buf);
10754 list_del(&rec->list);
10760 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10761 reada, nodes, extent_cache, chunk_cache,
10762 dev_cache, block_group_cache,
10763 dev_extent_cache, NULL);
10773 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10775 struct rb_root dev_cache;
10776 struct cache_tree chunk_cache;
10777 struct block_group_tree block_group_cache;
10778 struct device_extent_tree dev_extent_cache;
10779 struct cache_tree extent_cache;
10780 struct cache_tree seen;
10781 struct cache_tree pending;
10782 struct cache_tree reada;
10783 struct cache_tree nodes;
10784 struct extent_io_tree excluded_extents;
10785 struct cache_tree corrupt_blocks;
10786 struct btrfs_path path;
10787 struct btrfs_key key;
10788 struct btrfs_key found_key;
10790 struct block_info *bits;
10792 struct extent_buffer *leaf;
10794 struct btrfs_root_item ri;
10795 struct list_head dropping_trees;
10796 struct list_head normal_trees;
10797 struct btrfs_root *root1;
10798 struct btrfs_root *root;
10802 root = fs_info->fs_root;
10803 dev_cache = RB_ROOT;
10804 cache_tree_init(&chunk_cache);
10805 block_group_tree_init(&block_group_cache);
10806 device_extent_tree_init(&dev_extent_cache);
10808 cache_tree_init(&extent_cache);
10809 cache_tree_init(&seen);
10810 cache_tree_init(&pending);
10811 cache_tree_init(&nodes);
10812 cache_tree_init(&reada);
10813 cache_tree_init(&corrupt_blocks);
10814 extent_io_tree_init(&excluded_extents);
10815 INIT_LIST_HEAD(&dropping_trees);
10816 INIT_LIST_HEAD(&normal_trees);
10819 fs_info->excluded_extents = &excluded_extents;
10820 fs_info->fsck_extent_cache = &extent_cache;
10821 fs_info->free_extent_hook = free_extent_hook;
10822 fs_info->corrupt_blocks = &corrupt_blocks;
10826 bits = malloc(bits_nr * sizeof(struct block_info));
10832 if (ctx.progress_enabled) {
10833 ctx.tp = TASK_EXTENTS;
10834 task_start(ctx.info);
10838 root1 = fs_info->tree_root;
10839 level = btrfs_header_level(root1->node);
10840 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10841 root1->node->start, 0, level, 0, NULL);
10844 root1 = fs_info->chunk_root;
10845 level = btrfs_header_level(root1->node);
10846 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10847 root1->node->start, 0, level, 0, NULL);
10850 btrfs_init_path(&path);
10853 key.type = BTRFS_ROOT_ITEM_KEY;
10854 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10858 leaf = path.nodes[0];
10859 slot = path.slots[0];
10860 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10861 ret = btrfs_next_leaf(root, &path);
10864 leaf = path.nodes[0];
10865 slot = path.slots[0];
10867 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10868 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10869 unsigned long offset;
10872 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10873 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10874 last_snapshot = btrfs_root_last_snapshot(&ri);
10875 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10876 level = btrfs_root_level(&ri);
10877 ret = add_root_item_to_list(&normal_trees,
10878 found_key.objectid,
10879 btrfs_root_bytenr(&ri),
10880 last_snapshot, level,
10885 level = btrfs_root_level(&ri);
10886 objectid = found_key.objectid;
10887 btrfs_disk_key_to_cpu(&found_key,
10888 &ri.drop_progress);
10889 ret = add_root_item_to_list(&dropping_trees,
10891 btrfs_root_bytenr(&ri),
10892 last_snapshot, level,
10893 ri.drop_level, &found_key);
10900 btrfs_release_path(&path);
10903 * check_block can return -EAGAIN if it fixes something, please keep
10904 * this in mind when dealing with return values from these functions, if
10905 * we get -EAGAIN we want to fall through and restart the loop.
10907 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10908 &seen, &reada, &nodes, &extent_cache,
10909 &chunk_cache, &dev_cache, &block_group_cache,
10910 &dev_extent_cache);
10912 if (ret == -EAGAIN)
10916 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10917 &pending, &seen, &reada, &nodes,
10918 &extent_cache, &chunk_cache, &dev_cache,
10919 &block_group_cache, &dev_extent_cache);
10921 if (ret == -EAGAIN)
10926 ret = check_chunks(&chunk_cache, &block_group_cache,
10927 &dev_extent_cache, NULL, NULL, NULL, 0);
10929 if (ret == -EAGAIN)
10934 ret = check_extent_refs(root, &extent_cache);
10936 if (ret == -EAGAIN)
10941 ret = check_devices(&dev_cache, &dev_extent_cache);
10946 task_stop(ctx.info);
10948 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10949 extent_io_tree_cleanup(&excluded_extents);
10950 fs_info->fsck_extent_cache = NULL;
10951 fs_info->free_extent_hook = NULL;
10952 fs_info->corrupt_blocks = NULL;
10953 fs_info->excluded_extents = NULL;
10956 free_chunk_cache_tree(&chunk_cache);
10957 free_device_cache_tree(&dev_cache);
10958 free_block_group_tree(&block_group_cache);
10959 free_device_extent_tree(&dev_extent_cache);
10960 free_extent_cache_tree(&seen);
10961 free_extent_cache_tree(&pending);
10962 free_extent_cache_tree(&reada);
10963 free_extent_cache_tree(&nodes);
10964 free_root_item_list(&normal_trees);
10965 free_root_item_list(&dropping_trees);
10968 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10969 free_extent_cache_tree(&seen);
10970 free_extent_cache_tree(&pending);
10971 free_extent_cache_tree(&reada);
10972 free_extent_cache_tree(&nodes);
10973 free_chunk_cache_tree(&chunk_cache);
10974 free_block_group_tree(&block_group_cache);
10975 free_device_cache_tree(&dev_cache);
10976 free_device_extent_tree(&dev_extent_cache);
10977 free_extent_record_cache(&extent_cache);
10978 free_root_item_list(&normal_trees);
10979 free_root_item_list(&dropping_trees);
10980 extent_io_tree_cleanup(&excluded_extents);
10985 * Check backrefs of a tree block given by @bytenr or @eb.
10987 * @root: the root containing the @bytenr or @eb
10988 * @eb: tree block extent buffer, can be NULL
10989 * @bytenr: bytenr of the tree block to search
10990 * @level: tree level of the tree block
10991 * @owner: owner of the tree block
10993 * Return >0 for any error found and output error message
10994 * Return 0 for no error found
10996 static int check_tree_block_ref(struct btrfs_root *root,
10997 struct extent_buffer *eb, u64 bytenr,
10998 int level, u64 owner)
11000 struct btrfs_key key;
11001 struct btrfs_root *extent_root = root->fs_info->extent_root;
11002 struct btrfs_path path;
11003 struct btrfs_extent_item *ei;
11004 struct btrfs_extent_inline_ref *iref;
11005 struct extent_buffer *leaf;
11011 u32 nodesize = root->fs_info->nodesize;
11014 int tree_reloc_root = 0;
11019 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11020 btrfs_header_bytenr(root->node) == bytenr)
11021 tree_reloc_root = 1;
11023 btrfs_init_path(&path);
11024 key.objectid = bytenr;
11025 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11026 key.type = BTRFS_METADATA_ITEM_KEY;
11028 key.type = BTRFS_EXTENT_ITEM_KEY;
11029 key.offset = (u64)-1;
11031 /* Search for the backref in extent tree */
11032 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11034 err |= BACKREF_MISSING;
11037 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11039 err |= BACKREF_MISSING;
11043 leaf = path.nodes[0];
11044 slot = path.slots[0];
11045 btrfs_item_key_to_cpu(leaf, &key, slot);
11047 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11049 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11050 skinny_level = (int)key.offset;
11051 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11053 struct btrfs_tree_block_info *info;
11055 info = (struct btrfs_tree_block_info *)(ei + 1);
11056 skinny_level = btrfs_tree_block_level(leaf, info);
11057 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11064 if (!(btrfs_extent_flags(leaf, ei) &
11065 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11067 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11068 key.objectid, nodesize,
11069 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11070 err = BACKREF_MISMATCH;
11072 header_gen = btrfs_header_generation(eb);
11073 extent_gen = btrfs_extent_generation(leaf, ei);
11074 if (header_gen != extent_gen) {
11076 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11077 key.objectid, nodesize, header_gen,
11079 err = BACKREF_MISMATCH;
11081 if (level != skinny_level) {
11083 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11084 key.objectid, nodesize, level, skinny_level);
11085 err = BACKREF_MISMATCH;
11087 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11089 "extent[%llu %u] is referred by other roots than %llu",
11090 key.objectid, nodesize, root->objectid);
11091 err = BACKREF_MISMATCH;
11096 * Iterate the extent/metadata item to find the exact backref
11098 item_size = btrfs_item_size_nr(leaf, slot);
11099 ptr = (unsigned long)iref;
11100 end = (unsigned long)ei + item_size;
11101 while (ptr < end) {
11102 iref = (struct btrfs_extent_inline_ref *)ptr;
11103 type = btrfs_extent_inline_ref_type(leaf, iref);
11104 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11106 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11107 (offset == root->objectid || offset == owner)) {
11109 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11111 * Backref of tree reloc root points to itself, no need
11112 * to check backref any more.
11114 if (tree_reloc_root)
11117 /* Check if the backref points to valid referencer */
11118 found_ref = !check_tree_block_ref(root, NULL,
11119 offset, level + 1, owner);
11124 ptr += btrfs_extent_inline_ref_size(type);
11128 * Inlined extent item doesn't have what we need, check
11129 * TREE_BLOCK_REF_KEY
11132 btrfs_release_path(&path);
11133 key.objectid = bytenr;
11134 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11135 key.offset = root->objectid;
11137 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11142 err |= BACKREF_MISSING;
11144 btrfs_release_path(&path);
11145 if (eb && (err & BACKREF_MISSING))
11146 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
11147 bytenr, nodesize, owner, level);
11152 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11154 * Return >0 any error found and output error message
11155 * Return 0 for no error found
11157 static int check_extent_data_item(struct btrfs_root *root,
11158 struct extent_buffer *eb, int slot)
11160 struct btrfs_file_extent_item *fi;
11161 struct btrfs_path path;
11162 struct btrfs_root *extent_root = root->fs_info->extent_root;
11163 struct btrfs_key fi_key;
11164 struct btrfs_key dbref_key;
11165 struct extent_buffer *leaf;
11166 struct btrfs_extent_item *ei;
11167 struct btrfs_extent_inline_ref *iref;
11168 struct btrfs_extent_data_ref *dref;
11171 u64 disk_num_bytes;
11172 u64 extent_num_bytes;
11179 int found_dbackref = 0;
11183 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11184 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11186 /* Nothing to check for hole and inline data extents */
11187 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11188 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11191 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11192 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11193 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11195 /* Check unaligned disk_num_bytes and num_bytes */
11196 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11198 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11199 fi_key.objectid, fi_key.offset, disk_num_bytes,
11200 root->fs_info->sectorsize);
11201 err |= BYTES_UNALIGNED;
11203 data_bytes_allocated += disk_num_bytes;
11205 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11207 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11208 fi_key.objectid, fi_key.offset, extent_num_bytes,
11209 root->fs_info->sectorsize);
11210 err |= BYTES_UNALIGNED;
11212 data_bytes_referenced += extent_num_bytes;
11214 owner = btrfs_header_owner(eb);
11216 /* Check the extent item of the file extent in extent tree */
11217 btrfs_init_path(&path);
11218 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11219 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11220 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11222 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11226 leaf = path.nodes[0];
11227 slot = path.slots[0];
11228 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11230 extent_flags = btrfs_extent_flags(leaf, ei);
11232 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11234 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11235 disk_bytenr, disk_num_bytes,
11236 BTRFS_EXTENT_FLAG_DATA);
11237 err |= BACKREF_MISMATCH;
11240 /* Check data backref inside that extent item */
11241 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11242 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11243 ptr = (unsigned long)iref;
11244 end = (unsigned long)ei + item_size;
11245 while (ptr < end) {
11246 iref = (struct btrfs_extent_inline_ref *)ptr;
11247 type = btrfs_extent_inline_ref_type(leaf, iref);
11248 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11250 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11251 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11252 if (ref_root == owner || ref_root == root->objectid)
11253 found_dbackref = 1;
11254 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11255 found_dbackref = !check_tree_block_ref(root, NULL,
11256 btrfs_extent_inline_ref_offset(leaf, iref),
11260 if (found_dbackref)
11262 ptr += btrfs_extent_inline_ref_size(type);
11265 if (!found_dbackref) {
11266 btrfs_release_path(&path);
11268 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11269 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11270 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11271 dbref_key.offset = hash_extent_data_ref(root->objectid,
11272 fi_key.objectid, fi_key.offset);
11274 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11275 &dbref_key, &path, 0, 0);
11277 found_dbackref = 1;
11281 btrfs_release_path(&path);
11284 * Neither inlined nor EXTENT_DATA_REF found, try
11285 * SHARED_DATA_REF as last chance.
11287 dbref_key.objectid = disk_bytenr;
11288 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11289 dbref_key.offset = eb->start;
11291 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11292 &dbref_key, &path, 0, 0);
11294 found_dbackref = 1;
11300 if (!found_dbackref)
11301 err |= BACKREF_MISSING;
11302 btrfs_release_path(&path);
11303 if (err & BACKREF_MISSING) {
11304 error("data extent[%llu %llu] backref lost",
11305 disk_bytenr, disk_num_bytes);
11311 * Get real tree block level for the case like shared block
11312 * Return >= 0 as tree level
11313 * Return <0 for error
11315 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11317 struct extent_buffer *eb;
11318 struct btrfs_path path;
11319 struct btrfs_key key;
11320 struct btrfs_extent_item *ei;
11327 /* Search extent tree for extent generation and level */
11328 key.objectid = bytenr;
11329 key.type = BTRFS_METADATA_ITEM_KEY;
11330 key.offset = (u64)-1;
11332 btrfs_init_path(&path);
11333 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11336 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11344 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11345 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11346 struct btrfs_extent_item);
11347 flags = btrfs_extent_flags(path.nodes[0], ei);
11348 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11353 /* Get transid for later read_tree_block() check */
11354 transid = btrfs_extent_generation(path.nodes[0], ei);
11356 /* Get backref level as one source */
11357 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11358 backref_level = key.offset;
11360 struct btrfs_tree_block_info *info;
11362 info = (struct btrfs_tree_block_info *)(ei + 1);
11363 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11365 btrfs_release_path(&path);
11367 /* Get level from tree block as an alternative source */
11368 eb = read_tree_block(fs_info, bytenr, transid);
11369 if (!extent_buffer_uptodate(eb)) {
11370 free_extent_buffer(eb);
11373 header_level = btrfs_header_level(eb);
11374 free_extent_buffer(eb);
11376 if (header_level != backref_level)
11378 return header_level;
11381 btrfs_release_path(&path);
11386 * Check if a tree block backref is valid (points to a valid tree block)
11387 * if level == -1, level will be resolved
11388 * Return >0 for any error found and print error message
11390 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11391 u64 bytenr, int level)
11393 struct btrfs_root *root;
11394 struct btrfs_key key;
11395 struct btrfs_path path;
11396 struct extent_buffer *eb;
11397 struct extent_buffer *node;
11398 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11402 /* Query level for level == -1 special case */
11404 level = query_tree_block_level(fs_info, bytenr);
11406 err |= REFERENCER_MISSING;
11410 key.objectid = root_id;
11411 key.type = BTRFS_ROOT_ITEM_KEY;
11412 key.offset = (u64)-1;
11414 root = btrfs_read_fs_root(fs_info, &key);
11415 if (IS_ERR(root)) {
11416 err |= REFERENCER_MISSING;
11420 /* Read out the tree block to get item/node key */
11421 eb = read_tree_block(fs_info, bytenr, 0);
11422 if (!extent_buffer_uptodate(eb)) {
11423 err |= REFERENCER_MISSING;
11424 free_extent_buffer(eb);
11428 /* Empty tree, no need to check key */
11429 if (!btrfs_header_nritems(eb) && !level) {
11430 free_extent_buffer(eb);
11435 btrfs_node_key_to_cpu(eb, &key, 0);
11437 btrfs_item_key_to_cpu(eb, &key, 0);
11439 free_extent_buffer(eb);
11441 btrfs_init_path(&path);
11442 path.lowest_level = level;
11443 /* Search with the first key, to ensure we can reach it */
11444 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11446 err |= REFERENCER_MISSING;
11450 node = path.nodes[level];
11451 if (btrfs_header_bytenr(node) != bytenr) {
11453 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11454 bytenr, nodesize, bytenr,
11455 btrfs_header_bytenr(node));
11456 err |= REFERENCER_MISMATCH;
11458 if (btrfs_header_level(node) != level) {
11460 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11461 bytenr, nodesize, level,
11462 btrfs_header_level(node));
11463 err |= REFERENCER_MISMATCH;
11467 btrfs_release_path(&path);
11469 if (err & REFERENCER_MISSING) {
11471 error("extent [%llu %d] lost referencer (owner: %llu)",
11472 bytenr, nodesize, root_id);
11475 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11476 bytenr, nodesize, root_id, level);
11483 * Check if tree block @eb is tree reloc root.
11484 * Return 0 if it's not or any problem happens
11485 * Return 1 if it's a tree reloc root
11487 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11488 struct extent_buffer *eb)
11490 struct btrfs_root *tree_reloc_root;
11491 struct btrfs_key key;
11492 u64 bytenr = btrfs_header_bytenr(eb);
11493 u64 owner = btrfs_header_owner(eb);
11496 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11497 key.offset = owner;
11498 key.type = BTRFS_ROOT_ITEM_KEY;
11500 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11501 if (IS_ERR(tree_reloc_root))
11504 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11506 btrfs_free_fs_root(tree_reloc_root);
11511 * Check referencer for shared block backref
11512 * If level == -1, this function will resolve the level.
11514 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11515 u64 parent, u64 bytenr, int level)
11517 struct extent_buffer *eb;
11519 int found_parent = 0;
11522 eb = read_tree_block(fs_info, parent, 0);
11523 if (!extent_buffer_uptodate(eb))
11527 level = query_tree_block_level(fs_info, bytenr);
11531 /* It's possible it's a tree reloc root */
11532 if (parent == bytenr) {
11533 if (is_tree_reloc_root(fs_info, eb))
11538 if (level + 1 != btrfs_header_level(eb))
11541 nr = btrfs_header_nritems(eb);
11542 for (i = 0; i < nr; i++) {
11543 if (bytenr == btrfs_node_blockptr(eb, i)) {
11549 free_extent_buffer(eb);
11550 if (!found_parent) {
11552 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11553 bytenr, fs_info->nodesize, parent, level);
11554 return REFERENCER_MISSING;
11560 * Check referencer for normal (inlined) data ref
11561 * If len == 0, it will be resolved by searching in extent tree
11563 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11564 u64 root_id, u64 objectid, u64 offset,
11565 u64 bytenr, u64 len, u32 count)
11567 struct btrfs_root *root;
11568 struct btrfs_root *extent_root = fs_info->extent_root;
11569 struct btrfs_key key;
11570 struct btrfs_path path;
11571 struct extent_buffer *leaf;
11572 struct btrfs_file_extent_item *fi;
11573 u32 found_count = 0;
11578 key.objectid = bytenr;
11579 key.type = BTRFS_EXTENT_ITEM_KEY;
11580 key.offset = (u64)-1;
11582 btrfs_init_path(&path);
11583 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11586 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11589 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11590 if (key.objectid != bytenr ||
11591 key.type != BTRFS_EXTENT_ITEM_KEY)
11594 btrfs_release_path(&path);
11596 key.objectid = root_id;
11597 key.type = BTRFS_ROOT_ITEM_KEY;
11598 key.offset = (u64)-1;
11599 btrfs_init_path(&path);
11601 root = btrfs_read_fs_root(fs_info, &key);
11605 key.objectid = objectid;
11606 key.type = BTRFS_EXTENT_DATA_KEY;
11608 * It can be nasty as data backref offset is
11609 * file offset - file extent offset, which is smaller or
11610 * equal to original backref offset. The only special case is
11611 * overflow. So we need to special check and do further search.
11613 key.offset = offset & (1ULL << 63) ? 0 : offset;
11615 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11620 * Search afterwards to get correct one
11621 * NOTE: As we must do a comprehensive check on the data backref to
11622 * make sure the dref count also matches, we must iterate all file
11623 * extents for that inode.
11626 leaf = path.nodes[0];
11627 slot = path.slots[0];
11629 if (slot >= btrfs_header_nritems(leaf))
11631 btrfs_item_key_to_cpu(leaf, &key, slot);
11632 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11634 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11636 * Except normal disk bytenr and disk num bytes, we still
11637 * need to do extra check on dbackref offset as
11638 * dbackref offset = file_offset - file_extent_offset
11640 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11641 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11642 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11647 ret = btrfs_next_item(root, &path);
11652 btrfs_release_path(&path);
11653 if (found_count != count) {
11655 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11656 bytenr, len, root_id, objectid, offset, count, found_count);
11657 return REFERENCER_MISSING;
11663 * Check if the referencer of a shared data backref exists
11665 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11666 u64 parent, u64 bytenr)
11668 struct extent_buffer *eb;
11669 struct btrfs_key key;
11670 struct btrfs_file_extent_item *fi;
11672 int found_parent = 0;
11675 eb = read_tree_block(fs_info, parent, 0);
11676 if (!extent_buffer_uptodate(eb))
11679 nr = btrfs_header_nritems(eb);
11680 for (i = 0; i < nr; i++) {
11681 btrfs_item_key_to_cpu(eb, &key, i);
11682 if (key.type != BTRFS_EXTENT_DATA_KEY)
11685 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11686 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11689 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11696 free_extent_buffer(eb);
11697 if (!found_parent) {
11698 error("shared extent %llu referencer lost (parent: %llu)",
11700 return REFERENCER_MISSING;
11706 * This function will check a given extent item, including its backref and
11707 * itself (like crossing stripe boundary and type)
11709 * Since we don't use extent_record anymore, introduce new error bit
11711 static int check_extent_item(struct btrfs_fs_info *fs_info,
11712 struct extent_buffer *eb, int slot)
11714 struct btrfs_extent_item *ei;
11715 struct btrfs_extent_inline_ref *iref;
11716 struct btrfs_extent_data_ref *dref;
11720 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11721 u32 item_size = btrfs_item_size_nr(eb, slot);
11726 struct btrfs_key key;
11730 btrfs_item_key_to_cpu(eb, &key, slot);
11731 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11732 bytes_used += key.offset;
11734 bytes_used += nodesize;
11736 if (item_size < sizeof(*ei)) {
11738 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11739 * old thing when on disk format is still un-determined.
11740 * No need to care about it anymore
11742 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11746 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11747 flags = btrfs_extent_flags(eb, ei);
11749 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11751 if (metadata && check_crossing_stripes(global_info, key.objectid,
11753 error("bad metadata [%llu, %llu) crossing stripe boundary",
11754 key.objectid, key.objectid + nodesize);
11755 err |= CROSSING_STRIPE_BOUNDARY;
11758 ptr = (unsigned long)(ei + 1);
11760 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11761 /* Old EXTENT_ITEM metadata */
11762 struct btrfs_tree_block_info *info;
11764 info = (struct btrfs_tree_block_info *)ptr;
11765 level = btrfs_tree_block_level(eb, info);
11766 ptr += sizeof(struct btrfs_tree_block_info);
11768 /* New METADATA_ITEM */
11769 level = key.offset;
11771 end = (unsigned long)ei + item_size;
11774 /* Reached extent item end normally */
11778 /* Beyond extent item end, wrong item size */
11780 err |= ITEM_SIZE_MISMATCH;
11781 error("extent item at bytenr %llu slot %d has wrong size",
11786 /* Now check every backref in this extent item */
11787 iref = (struct btrfs_extent_inline_ref *)ptr;
11788 type = btrfs_extent_inline_ref_type(eb, iref);
11789 offset = btrfs_extent_inline_ref_offset(eb, iref);
11791 case BTRFS_TREE_BLOCK_REF_KEY:
11792 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11796 case BTRFS_SHARED_BLOCK_REF_KEY:
11797 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11801 case BTRFS_EXTENT_DATA_REF_KEY:
11802 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11803 ret = check_extent_data_backref(fs_info,
11804 btrfs_extent_data_ref_root(eb, dref),
11805 btrfs_extent_data_ref_objectid(eb, dref),
11806 btrfs_extent_data_ref_offset(eb, dref),
11807 key.objectid, key.offset,
11808 btrfs_extent_data_ref_count(eb, dref));
11811 case BTRFS_SHARED_DATA_REF_KEY:
11812 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11816 error("extent[%llu %d %llu] has unknown ref type: %d",
11817 key.objectid, key.type, key.offset, type);
11818 err |= UNKNOWN_TYPE;
11822 ptr += btrfs_extent_inline_ref_size(type);
11830 * Check if a dev extent item is referred correctly by its chunk
11832 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11833 struct extent_buffer *eb, int slot)
11835 struct btrfs_root *chunk_root = fs_info->chunk_root;
11836 struct btrfs_dev_extent *ptr;
11837 struct btrfs_path path;
11838 struct btrfs_key chunk_key;
11839 struct btrfs_key devext_key;
11840 struct btrfs_chunk *chunk;
11841 struct extent_buffer *l;
11845 int found_chunk = 0;
11848 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11849 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11850 length = btrfs_dev_extent_length(eb, ptr);
11852 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11853 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11854 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11856 btrfs_init_path(&path);
11857 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11862 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11863 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11868 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11871 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11872 for (i = 0; i < num_stripes; i++) {
11873 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11874 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11876 if (devid == devext_key.objectid &&
11877 offset == devext_key.offset) {
11883 btrfs_release_path(&path);
11884 if (!found_chunk) {
11886 "device extent[%llu, %llu, %llu] did not find the related chunk",
11887 devext_key.objectid, devext_key.offset, length);
11888 return REFERENCER_MISSING;
11894 * Check if the used space is correct with the dev item
11896 static int check_dev_item(struct btrfs_fs_info *fs_info,
11897 struct extent_buffer *eb, int slot)
11899 struct btrfs_root *dev_root = fs_info->dev_root;
11900 struct btrfs_dev_item *dev_item;
11901 struct btrfs_path path;
11902 struct btrfs_key key;
11903 struct btrfs_dev_extent *ptr;
11909 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11910 dev_id = btrfs_device_id(eb, dev_item);
11911 used = btrfs_device_bytes_used(eb, dev_item);
11913 key.objectid = dev_id;
11914 key.type = BTRFS_DEV_EXTENT_KEY;
11917 btrfs_init_path(&path);
11918 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11920 btrfs_item_key_to_cpu(eb, &key, slot);
11921 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11922 key.objectid, key.type, key.offset);
11923 btrfs_release_path(&path);
11924 return REFERENCER_MISSING;
11927 /* Iterate dev_extents to calculate the used space of a device */
11929 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11932 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11933 if (key.objectid > dev_id)
11935 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11938 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11939 struct btrfs_dev_extent);
11940 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11942 ret = btrfs_next_item(dev_root, &path);
11946 btrfs_release_path(&path);
11948 if (used != total) {
11949 btrfs_item_key_to_cpu(eb, &key, slot);
11951 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11952 total, used, BTRFS_ROOT_TREE_OBJECTID,
11953 BTRFS_DEV_EXTENT_KEY, dev_id);
11954 return ACCOUNTING_MISMATCH;
11960 * Check a block group item with its referener (chunk) and its used space
11961 * with extent/metadata item
11963 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11964 struct extent_buffer *eb, int slot)
11966 struct btrfs_root *extent_root = fs_info->extent_root;
11967 struct btrfs_root *chunk_root = fs_info->chunk_root;
11968 struct btrfs_block_group_item *bi;
11969 struct btrfs_block_group_item bg_item;
11970 struct btrfs_path path;
11971 struct btrfs_key bg_key;
11972 struct btrfs_key chunk_key;
11973 struct btrfs_key extent_key;
11974 struct btrfs_chunk *chunk;
11975 struct extent_buffer *leaf;
11976 struct btrfs_extent_item *ei;
11977 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11985 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11986 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11987 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11988 used = btrfs_block_group_used(&bg_item);
11989 bg_flags = btrfs_block_group_flags(&bg_item);
11991 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11992 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11993 chunk_key.offset = bg_key.objectid;
11995 btrfs_init_path(&path);
11996 /* Search for the referencer chunk */
11997 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12000 "block group[%llu %llu] did not find the related chunk item",
12001 bg_key.objectid, bg_key.offset);
12002 err |= REFERENCER_MISSING;
12004 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12005 struct btrfs_chunk);
12006 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12009 "block group[%llu %llu] related chunk item length does not match",
12010 bg_key.objectid, bg_key.offset);
12011 err |= REFERENCER_MISMATCH;
12014 btrfs_release_path(&path);
12016 /* Search from the block group bytenr */
12017 extent_key.objectid = bg_key.objectid;
12018 extent_key.type = 0;
12019 extent_key.offset = 0;
12021 btrfs_init_path(&path);
12022 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12026 /* Iterate extent tree to account used space */
12028 leaf = path.nodes[0];
12030 /* Search slot can point to the last item beyond leaf nritems */
12031 if (path.slots[0] >= btrfs_header_nritems(leaf))
12034 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12035 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12038 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12039 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12041 if (extent_key.objectid < bg_key.objectid)
12044 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12047 total += extent_key.offset;
12049 ei = btrfs_item_ptr(leaf, path.slots[0],
12050 struct btrfs_extent_item);
12051 flags = btrfs_extent_flags(leaf, ei);
12052 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12053 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12055 "bad extent[%llu, %llu) type mismatch with chunk",
12056 extent_key.objectid,
12057 extent_key.objectid + extent_key.offset);
12058 err |= CHUNK_TYPE_MISMATCH;
12060 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12061 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12062 BTRFS_BLOCK_GROUP_METADATA))) {
12064 "bad extent[%llu, %llu) type mismatch with chunk",
12065 extent_key.objectid,
12066 extent_key.objectid + nodesize);
12067 err |= CHUNK_TYPE_MISMATCH;
12071 ret = btrfs_next_item(extent_root, &path);
12077 btrfs_release_path(&path);
12079 if (total != used) {
12081 "block group[%llu %llu] used %llu but extent items used %llu",
12082 bg_key.objectid, bg_key.offset, used, total);
12083 err |= ACCOUNTING_MISMATCH;
12089 * Check a chunk item.
12090 * Including checking all referred dev_extents and block group
12092 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12093 struct extent_buffer *eb, int slot)
12095 struct btrfs_root *extent_root = fs_info->extent_root;
12096 struct btrfs_root *dev_root = fs_info->dev_root;
12097 struct btrfs_path path;
12098 struct btrfs_key chunk_key;
12099 struct btrfs_key bg_key;
12100 struct btrfs_key devext_key;
12101 struct btrfs_chunk *chunk;
12102 struct extent_buffer *leaf;
12103 struct btrfs_block_group_item *bi;
12104 struct btrfs_block_group_item bg_item;
12105 struct btrfs_dev_extent *ptr;
12117 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12118 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12119 length = btrfs_chunk_length(eb, chunk);
12120 chunk_end = chunk_key.offset + length;
12121 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12124 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12126 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12129 type = btrfs_chunk_type(eb, chunk);
12131 bg_key.objectid = chunk_key.offset;
12132 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12133 bg_key.offset = length;
12135 btrfs_init_path(&path);
12136 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12139 "chunk[%llu %llu) did not find the related block group item",
12140 chunk_key.offset, chunk_end);
12141 err |= REFERENCER_MISSING;
12143 leaf = path.nodes[0];
12144 bi = btrfs_item_ptr(leaf, path.slots[0],
12145 struct btrfs_block_group_item);
12146 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12148 if (btrfs_block_group_flags(&bg_item) != type) {
12150 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12151 chunk_key.offset, chunk_end, type,
12152 btrfs_block_group_flags(&bg_item));
12153 err |= REFERENCER_MISSING;
12157 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12158 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12159 for (i = 0; i < num_stripes; i++) {
12160 btrfs_release_path(&path);
12161 btrfs_init_path(&path);
12162 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12163 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12164 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12166 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12169 goto not_match_dev;
12171 leaf = path.nodes[0];
12172 ptr = btrfs_item_ptr(leaf, path.slots[0],
12173 struct btrfs_dev_extent);
12174 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12175 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12176 if (objectid != chunk_key.objectid ||
12177 offset != chunk_key.offset ||
12178 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12179 goto not_match_dev;
12182 err |= BACKREF_MISSING;
12184 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12185 chunk_key.objectid, chunk_end, i);
12188 btrfs_release_path(&path);
12194 * Main entry function to check known items and update related accounting info
12196 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
12198 struct btrfs_fs_info *fs_info = root->fs_info;
12199 struct btrfs_key key;
12202 struct btrfs_extent_data_ref *dref;
12207 btrfs_item_key_to_cpu(eb, &key, slot);
12211 case BTRFS_EXTENT_DATA_KEY:
12212 ret = check_extent_data_item(root, eb, slot);
12215 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12216 ret = check_block_group_item(fs_info, eb, slot);
12219 case BTRFS_DEV_ITEM_KEY:
12220 ret = check_dev_item(fs_info, eb, slot);
12223 case BTRFS_CHUNK_ITEM_KEY:
12224 ret = check_chunk_item(fs_info, eb, slot);
12227 case BTRFS_DEV_EXTENT_KEY:
12228 ret = check_dev_extent_item(fs_info, eb, slot);
12231 case BTRFS_EXTENT_ITEM_KEY:
12232 case BTRFS_METADATA_ITEM_KEY:
12233 ret = check_extent_item(fs_info, eb, slot);
12236 case BTRFS_EXTENT_CSUM_KEY:
12237 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12239 case BTRFS_TREE_BLOCK_REF_KEY:
12240 ret = check_tree_block_backref(fs_info, key.offset,
12244 case BTRFS_EXTENT_DATA_REF_KEY:
12245 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12246 ret = check_extent_data_backref(fs_info,
12247 btrfs_extent_data_ref_root(eb, dref),
12248 btrfs_extent_data_ref_objectid(eb, dref),
12249 btrfs_extent_data_ref_offset(eb, dref),
12251 btrfs_extent_data_ref_count(eb, dref));
12254 case BTRFS_SHARED_BLOCK_REF_KEY:
12255 ret = check_shared_block_backref(fs_info, key.offset,
12259 case BTRFS_SHARED_DATA_REF_KEY:
12260 ret = check_shared_data_backref(fs_info, key.offset,
12268 if (++slot < btrfs_header_nritems(eb))
12275 * Helper function for later fs/subvol tree check. To determine if a tree
12276 * block should be checked.
12277 * This function will ensure only the direct referencer with lowest rootid to
12278 * check a fs/subvolume tree block.
12280 * Backref check at extent tree would detect errors like missing subvolume
12281 * tree, so we can do aggressive check to reduce duplicated checks.
12283 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12285 struct btrfs_root *extent_root = root->fs_info->extent_root;
12286 struct btrfs_key key;
12287 struct btrfs_path path;
12288 struct extent_buffer *leaf;
12290 struct btrfs_extent_item *ei;
12296 struct btrfs_extent_inline_ref *iref;
12299 btrfs_init_path(&path);
12300 key.objectid = btrfs_header_bytenr(eb);
12301 key.type = BTRFS_METADATA_ITEM_KEY;
12302 key.offset = (u64)-1;
12305 * Any failure in backref resolving means we can't determine
12306 * whom the tree block belongs to.
12307 * So in that case, we need to check that tree block
12309 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12313 ret = btrfs_previous_extent_item(extent_root, &path,
12314 btrfs_header_bytenr(eb));
12318 leaf = path.nodes[0];
12319 slot = path.slots[0];
12320 btrfs_item_key_to_cpu(leaf, &key, slot);
12321 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12323 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12324 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12326 struct btrfs_tree_block_info *info;
12328 info = (struct btrfs_tree_block_info *)(ei + 1);
12329 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12332 item_size = btrfs_item_size_nr(leaf, slot);
12333 ptr = (unsigned long)iref;
12334 end = (unsigned long)ei + item_size;
12335 while (ptr < end) {
12336 iref = (struct btrfs_extent_inline_ref *)ptr;
12337 type = btrfs_extent_inline_ref_type(leaf, iref);
12338 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12341 * We only check the tree block if current root is
12342 * the lowest referencer of it.
12344 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12345 offset < root->objectid) {
12346 btrfs_release_path(&path);
12350 ptr += btrfs_extent_inline_ref_size(type);
12353 * Normally we should also check keyed tree block ref, but that may be
12354 * very time consuming. Inlined ref should already make us skip a lot
12355 * of refs now. So skip search keyed tree block ref.
12359 btrfs_release_path(&path);
12364 * Traversal function for tree block. We will do:
12365 * 1) Skip shared fs/subvolume tree blocks
12366 * 2) Update related bytes accounting
12367 * 3) Pre-order traversal
12369 static int traverse_tree_block(struct btrfs_root *root,
12370 struct extent_buffer *node)
12372 struct extent_buffer *eb;
12373 struct btrfs_key key;
12374 struct btrfs_key drop_key;
12382 * Skip shared fs/subvolume tree block, in that case they will
12383 * be checked by referencer with lowest rootid
12385 if (is_fstree(root->objectid) && !should_check(root, node))
12388 /* Update bytes accounting */
12389 total_btree_bytes += node->len;
12390 if (fs_root_objectid(btrfs_header_owner(node)))
12391 total_fs_tree_bytes += node->len;
12392 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12393 total_extent_tree_bytes += node->len;
12395 /* pre-order tranversal, check itself first */
12396 level = btrfs_header_level(node);
12397 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12398 btrfs_header_level(node),
12399 btrfs_header_owner(node));
12403 "check %s failed root %llu bytenr %llu level %d, force continue check",
12404 level ? "node":"leaf", root->objectid,
12405 btrfs_header_bytenr(node), btrfs_header_level(node));
12408 btree_space_waste += btrfs_leaf_free_space(root, node);
12409 ret = check_leaf_items(root, node);
12414 nr = btrfs_header_nritems(node);
12415 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12416 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12417 sizeof(struct btrfs_key_ptr);
12419 /* Then check all its children */
12420 for (i = 0; i < nr; i++) {
12421 u64 blocknr = btrfs_node_blockptr(node, i);
12423 btrfs_node_key_to_cpu(node, &key, i);
12424 if (level == root->root_item.drop_level &&
12425 is_dropped_key(&key, &drop_key))
12429 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12430 * to call the function itself.
12432 eb = read_tree_block(root->fs_info, blocknr, 0);
12433 if (extent_buffer_uptodate(eb)) {
12434 ret = traverse_tree_block(root, eb);
12437 free_extent_buffer(eb);
12444 * Low memory usage version check_chunks_and_extents.
12446 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12448 struct btrfs_path path;
12449 struct btrfs_key key;
12450 struct btrfs_root *root1;
12451 struct btrfs_root *root;
12452 struct btrfs_root *cur_root;
12456 root = fs_info->fs_root;
12458 root1 = root->fs_info->chunk_root;
12459 ret = traverse_tree_block(root1, root1->node);
12462 root1 = root->fs_info->tree_root;
12463 ret = traverse_tree_block(root1, root1->node);
12466 btrfs_init_path(&path);
12467 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12469 key.type = BTRFS_ROOT_ITEM_KEY;
12471 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12473 error("cannot find extent treet in tree_root");
12478 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12479 if (key.type != BTRFS_ROOT_ITEM_KEY)
12481 key.offset = (u64)-1;
12483 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12484 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12487 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12488 if (IS_ERR(cur_root) || !cur_root) {
12489 error("failed to read tree: %lld", key.objectid);
12493 ret = traverse_tree_block(cur_root, cur_root->node);
12496 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12497 btrfs_free_fs_root(cur_root);
12499 ret = btrfs_next_item(root1, &path);
12505 btrfs_release_path(&path);
12509 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12513 if (!ctx.progress_enabled)
12514 fprintf(stderr, "checking extents\n");
12515 if (check_mode == CHECK_MODE_LOWMEM)
12516 ret = check_chunks_and_extents_v2(fs_info);
12518 ret = check_chunks_and_extents(fs_info);
12523 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12524 struct btrfs_root *root, int overwrite)
12526 struct extent_buffer *c;
12527 struct extent_buffer *old = root->node;
12530 struct btrfs_disk_key disk_key = {0,0,0};
12536 extent_buffer_get(c);
12539 c = btrfs_alloc_free_block(trans, root,
12540 root->fs_info->nodesize,
12541 root->root_key.objectid,
12542 &disk_key, level, 0, 0);
12545 extent_buffer_get(c);
12549 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12550 btrfs_set_header_level(c, level);
12551 btrfs_set_header_bytenr(c, c->start);
12552 btrfs_set_header_generation(c, trans->transid);
12553 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12554 btrfs_set_header_owner(c, root->root_key.objectid);
12556 write_extent_buffer(c, root->fs_info->fsid,
12557 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12559 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12560 btrfs_header_chunk_tree_uuid(c),
12563 btrfs_mark_buffer_dirty(c);
12565 * this case can happen in the following case:
12567 * 1.overwrite previous root.
12569 * 2.reinit reloc data root, this is because we skip pin
12570 * down reloc data tree before which means we can allocate
12571 * same block bytenr here.
12573 if (old->start == c->start) {
12574 btrfs_set_root_generation(&root->root_item,
12576 root->root_item.level = btrfs_header_level(root->node);
12577 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12578 &root->root_key, &root->root_item);
12580 free_extent_buffer(c);
12584 free_extent_buffer(old);
12586 add_root_to_dirty_list(root);
12590 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12591 struct extent_buffer *eb, int tree_root)
12593 struct extent_buffer *tmp;
12594 struct btrfs_root_item *ri;
12595 struct btrfs_key key;
12597 int level = btrfs_header_level(eb);
12603 * If we have pinned this block before, don't pin it again.
12604 * This can not only avoid forever loop with broken filesystem
12605 * but also give us some speedups.
12607 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12608 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12611 btrfs_pin_extent(fs_info, eb->start, eb->len);
12613 nritems = btrfs_header_nritems(eb);
12614 for (i = 0; i < nritems; i++) {
12616 btrfs_item_key_to_cpu(eb, &key, i);
12617 if (key.type != BTRFS_ROOT_ITEM_KEY)
12619 /* Skip the extent root and reloc roots */
12620 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12621 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12622 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12624 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12625 bytenr = btrfs_disk_root_bytenr(eb, ri);
12628 * If at any point we start needing the real root we
12629 * will have to build a stump root for the root we are
12630 * in, but for now this doesn't actually use the root so
12631 * just pass in extent_root.
12633 tmp = read_tree_block(fs_info, bytenr, 0);
12634 if (!extent_buffer_uptodate(tmp)) {
12635 fprintf(stderr, "Error reading root block\n");
12638 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12639 free_extent_buffer(tmp);
12643 bytenr = btrfs_node_blockptr(eb, i);
12645 /* If we aren't the tree root don't read the block */
12646 if (level == 1 && !tree_root) {
12647 btrfs_pin_extent(fs_info, bytenr,
12648 fs_info->nodesize);
12652 tmp = read_tree_block(fs_info, bytenr, 0);
12653 if (!extent_buffer_uptodate(tmp)) {
12654 fprintf(stderr, "Error reading tree block\n");
12657 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12658 free_extent_buffer(tmp);
12667 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12671 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12675 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12678 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12680 struct btrfs_block_group_cache *cache;
12681 struct btrfs_path path;
12682 struct extent_buffer *leaf;
12683 struct btrfs_chunk *chunk;
12684 struct btrfs_key key;
12688 btrfs_init_path(&path);
12690 key.type = BTRFS_CHUNK_ITEM_KEY;
12692 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12694 btrfs_release_path(&path);
12699 * We do this in case the block groups were screwed up and had alloc
12700 * bits that aren't actually set on the chunks. This happens with
12701 * restored images every time and could happen in real life I guess.
12703 fs_info->avail_data_alloc_bits = 0;
12704 fs_info->avail_metadata_alloc_bits = 0;
12705 fs_info->avail_system_alloc_bits = 0;
12707 /* First we need to create the in-memory block groups */
12709 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12710 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12712 btrfs_release_path(&path);
12720 leaf = path.nodes[0];
12721 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12722 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12727 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12728 btrfs_add_block_group(fs_info, 0,
12729 btrfs_chunk_type(leaf, chunk),
12730 key.objectid, key.offset,
12731 btrfs_chunk_length(leaf, chunk));
12732 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12733 key.offset + btrfs_chunk_length(leaf, chunk));
12738 cache = btrfs_lookup_first_block_group(fs_info, start);
12742 start = cache->key.objectid + cache->key.offset;
12745 btrfs_release_path(&path);
12749 static int reset_balance(struct btrfs_trans_handle *trans,
12750 struct btrfs_fs_info *fs_info)
12752 struct btrfs_root *root = fs_info->tree_root;
12753 struct btrfs_path path;
12754 struct extent_buffer *leaf;
12755 struct btrfs_key key;
12756 int del_slot, del_nr = 0;
12760 btrfs_init_path(&path);
12761 key.objectid = BTRFS_BALANCE_OBJECTID;
12762 key.type = BTRFS_BALANCE_ITEM_KEY;
12764 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12769 goto reinit_data_reloc;
12774 ret = btrfs_del_item(trans, root, &path);
12777 btrfs_release_path(&path);
12779 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12780 key.type = BTRFS_ROOT_ITEM_KEY;
12782 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12786 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12791 ret = btrfs_del_items(trans, root, &path,
12798 btrfs_release_path(&path);
12801 ret = btrfs_search_slot(trans, root, &key, &path,
12808 leaf = path.nodes[0];
12809 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12810 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12812 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12817 del_slot = path.slots[0];
12826 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12830 btrfs_release_path(&path);
12833 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12834 key.type = BTRFS_ROOT_ITEM_KEY;
12835 key.offset = (u64)-1;
12836 root = btrfs_read_fs_root(fs_info, &key);
12837 if (IS_ERR(root)) {
12838 fprintf(stderr, "Error reading data reloc tree\n");
12839 ret = PTR_ERR(root);
12842 record_root_in_trans(trans, root);
12843 ret = btrfs_fsck_reinit_root(trans, root, 0);
12846 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12848 btrfs_release_path(&path);
12852 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12853 struct btrfs_fs_info *fs_info)
12859 * The only reason we don't do this is because right now we're just
12860 * walking the trees we find and pinning down their bytes, we don't look
12861 * at any of the leaves. In order to do mixed groups we'd have to check
12862 * the leaves of any fs roots and pin down the bytes for any file
12863 * extents we find. Not hard but why do it if we don't have to?
12865 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12866 fprintf(stderr, "We don't support re-initing the extent tree "
12867 "for mixed block groups yet, please notify a btrfs "
12868 "developer you want to do this so they can add this "
12869 "functionality.\n");
12874 * first we need to walk all of the trees except the extent tree and pin
12875 * down the bytes that are in use so we don't overwrite any existing
12878 ret = pin_metadata_blocks(fs_info);
12880 fprintf(stderr, "error pinning down used bytes\n");
12885 * Need to drop all the block groups since we're going to recreate all
12888 btrfs_free_block_groups(fs_info);
12889 ret = reset_block_groups(fs_info);
12891 fprintf(stderr, "error resetting the block groups\n");
12895 /* Ok we can allocate now, reinit the extent root */
12896 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12898 fprintf(stderr, "extent root initialization failed\n");
12900 * When the transaction code is updated we should end the
12901 * transaction, but for now progs only knows about commit so
12902 * just return an error.
12908 * Now we have all the in-memory block groups setup so we can make
12909 * allocations properly, and the metadata we care about is safe since we
12910 * pinned all of it above.
12913 struct btrfs_block_group_cache *cache;
12915 cache = btrfs_lookup_first_block_group(fs_info, start);
12918 start = cache->key.objectid + cache->key.offset;
12919 ret = btrfs_insert_item(trans, fs_info->extent_root,
12920 &cache->key, &cache->item,
12921 sizeof(cache->item));
12923 fprintf(stderr, "Error adding block group\n");
12926 btrfs_extent_post_op(trans, fs_info->extent_root);
12929 ret = reset_balance(trans, fs_info);
12931 fprintf(stderr, "error resetting the pending balance\n");
12936 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12938 struct btrfs_path path;
12939 struct btrfs_trans_handle *trans;
12940 struct btrfs_key key;
12943 printf("Recowing metadata block %llu\n", eb->start);
12944 key.objectid = btrfs_header_owner(eb);
12945 key.type = BTRFS_ROOT_ITEM_KEY;
12946 key.offset = (u64)-1;
12948 root = btrfs_read_fs_root(root->fs_info, &key);
12949 if (IS_ERR(root)) {
12950 fprintf(stderr, "Couldn't find owner root %llu\n",
12952 return PTR_ERR(root);
12955 trans = btrfs_start_transaction(root, 1);
12957 return PTR_ERR(trans);
12959 btrfs_init_path(&path);
12960 path.lowest_level = btrfs_header_level(eb);
12961 if (path.lowest_level)
12962 btrfs_node_key_to_cpu(eb, &key, 0);
12964 btrfs_item_key_to_cpu(eb, &key, 0);
12966 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12967 btrfs_commit_transaction(trans, root);
12968 btrfs_release_path(&path);
12972 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12974 struct btrfs_path path;
12975 struct btrfs_trans_handle *trans;
12976 struct btrfs_key key;
12979 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12980 bad->key.type, bad->key.offset);
12981 key.objectid = bad->root_id;
12982 key.type = BTRFS_ROOT_ITEM_KEY;
12983 key.offset = (u64)-1;
12985 root = btrfs_read_fs_root(root->fs_info, &key);
12986 if (IS_ERR(root)) {
12987 fprintf(stderr, "Couldn't find owner root %llu\n",
12989 return PTR_ERR(root);
12992 trans = btrfs_start_transaction(root, 1);
12994 return PTR_ERR(trans);
12996 btrfs_init_path(&path);
12997 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13003 ret = btrfs_del_item(trans, root, &path);
13005 btrfs_commit_transaction(trans, root);
13006 btrfs_release_path(&path);
13010 static int zero_log_tree(struct btrfs_root *root)
13012 struct btrfs_trans_handle *trans;
13015 trans = btrfs_start_transaction(root, 1);
13016 if (IS_ERR(trans)) {
13017 ret = PTR_ERR(trans);
13020 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13021 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13022 ret = btrfs_commit_transaction(trans, root);
13026 static int populate_csum(struct btrfs_trans_handle *trans,
13027 struct btrfs_root *csum_root, char *buf, u64 start,
13030 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13035 while (offset < len) {
13036 sectorsize = fs_info->sectorsize;
13037 ret = read_extent_data(fs_info, buf, start + offset,
13041 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13042 start + offset, buf, sectorsize);
13045 offset += sectorsize;
13050 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13051 struct btrfs_root *csum_root,
13052 struct btrfs_root *cur_root)
13054 struct btrfs_path path;
13055 struct btrfs_key key;
13056 struct extent_buffer *node;
13057 struct btrfs_file_extent_item *fi;
13064 buf = malloc(cur_root->fs_info->sectorsize);
13068 btrfs_init_path(&path);
13072 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13075 /* Iterate all regular file extents and fill its csum */
13077 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13079 if (key.type != BTRFS_EXTENT_DATA_KEY)
13081 node = path.nodes[0];
13082 slot = path.slots[0];
13083 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13084 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13086 start = btrfs_file_extent_disk_bytenr(node, fi);
13087 len = btrfs_file_extent_disk_num_bytes(node, fi);
13089 ret = populate_csum(trans, csum_root, buf, start, len);
13090 if (ret == -EEXIST)
13096 * TODO: if next leaf is corrupted, jump to nearest next valid
13099 ret = btrfs_next_item(cur_root, &path);
13109 btrfs_release_path(&path);
13114 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13115 struct btrfs_root *csum_root)
13117 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13118 struct btrfs_path path;
13119 struct btrfs_root *tree_root = fs_info->tree_root;
13120 struct btrfs_root *cur_root;
13121 struct extent_buffer *node;
13122 struct btrfs_key key;
13126 btrfs_init_path(&path);
13127 key.objectid = BTRFS_FS_TREE_OBJECTID;
13129 key.type = BTRFS_ROOT_ITEM_KEY;
13130 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13139 node = path.nodes[0];
13140 slot = path.slots[0];
13141 btrfs_item_key_to_cpu(node, &key, slot);
13142 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13144 if (key.type != BTRFS_ROOT_ITEM_KEY)
13146 if (!is_fstree(key.objectid))
13148 key.offset = (u64)-1;
13150 cur_root = btrfs_read_fs_root(fs_info, &key);
13151 if (IS_ERR(cur_root) || !cur_root) {
13152 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13156 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13161 ret = btrfs_next_item(tree_root, &path);
13171 btrfs_release_path(&path);
13175 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13176 struct btrfs_root *csum_root)
13178 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13179 struct btrfs_path path;
13180 struct btrfs_extent_item *ei;
13181 struct extent_buffer *leaf;
13183 struct btrfs_key key;
13186 btrfs_init_path(&path);
13188 key.type = BTRFS_EXTENT_ITEM_KEY;
13190 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13192 btrfs_release_path(&path);
13196 buf = malloc(csum_root->fs_info->sectorsize);
13198 btrfs_release_path(&path);
13203 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13204 ret = btrfs_next_leaf(extent_root, &path);
13212 leaf = path.nodes[0];
13214 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13215 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13220 ei = btrfs_item_ptr(leaf, path.slots[0],
13221 struct btrfs_extent_item);
13222 if (!(btrfs_extent_flags(leaf, ei) &
13223 BTRFS_EXTENT_FLAG_DATA)) {
13228 ret = populate_csum(trans, csum_root, buf, key.objectid,
13235 btrfs_release_path(&path);
13241 * Recalculate the csum and put it into the csum tree.
13243 * Extent tree init will wipe out all the extent info, so in that case, we
13244 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13245 * will use fs/subvol trees to init the csum tree.
13247 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13248 struct btrfs_root *csum_root,
13249 int search_fs_tree)
13251 if (search_fs_tree)
13252 return fill_csum_tree_from_fs(trans, csum_root);
13254 return fill_csum_tree_from_extent(trans, csum_root);
13257 static void free_roots_info_cache(void)
13259 if (!roots_info_cache)
13262 while (!cache_tree_empty(roots_info_cache)) {
13263 struct cache_extent *entry;
13264 struct root_item_info *rii;
13266 entry = first_cache_extent(roots_info_cache);
13269 remove_cache_extent(roots_info_cache, entry);
13270 rii = container_of(entry, struct root_item_info, cache_extent);
13274 free(roots_info_cache);
13275 roots_info_cache = NULL;
13278 static int build_roots_info_cache(struct btrfs_fs_info *info)
13281 struct btrfs_key key;
13282 struct extent_buffer *leaf;
13283 struct btrfs_path path;
13285 if (!roots_info_cache) {
13286 roots_info_cache = malloc(sizeof(*roots_info_cache));
13287 if (!roots_info_cache)
13289 cache_tree_init(roots_info_cache);
13292 btrfs_init_path(&path);
13294 key.type = BTRFS_EXTENT_ITEM_KEY;
13296 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13299 leaf = path.nodes[0];
13302 struct btrfs_key found_key;
13303 struct btrfs_extent_item *ei;
13304 struct btrfs_extent_inline_ref *iref;
13305 int slot = path.slots[0];
13310 struct cache_extent *entry;
13311 struct root_item_info *rii;
13313 if (slot >= btrfs_header_nritems(leaf)) {
13314 ret = btrfs_next_leaf(info->extent_root, &path);
13321 leaf = path.nodes[0];
13322 slot = path.slots[0];
13325 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13327 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13328 found_key.type != BTRFS_METADATA_ITEM_KEY)
13331 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13332 flags = btrfs_extent_flags(leaf, ei);
13334 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13335 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13338 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13339 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13340 level = found_key.offset;
13342 struct btrfs_tree_block_info *binfo;
13344 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13345 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13346 level = btrfs_tree_block_level(leaf, binfo);
13350 * For a root extent, it must be of the following type and the
13351 * first (and only one) iref in the item.
13353 type = btrfs_extent_inline_ref_type(leaf, iref);
13354 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13357 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13358 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13360 rii = malloc(sizeof(struct root_item_info));
13365 rii->cache_extent.start = root_id;
13366 rii->cache_extent.size = 1;
13367 rii->level = (u8)-1;
13368 entry = &rii->cache_extent;
13369 ret = insert_cache_extent(roots_info_cache, entry);
13372 rii = container_of(entry, struct root_item_info,
13376 ASSERT(rii->cache_extent.start == root_id);
13377 ASSERT(rii->cache_extent.size == 1);
13379 if (level > rii->level || rii->level == (u8)-1) {
13380 rii->level = level;
13381 rii->bytenr = found_key.objectid;
13382 rii->gen = btrfs_extent_generation(leaf, ei);
13383 rii->node_count = 1;
13384 } else if (level == rii->level) {
13392 btrfs_release_path(&path);
13397 static int maybe_repair_root_item(struct btrfs_path *path,
13398 const struct btrfs_key *root_key,
13399 const int read_only_mode)
13401 const u64 root_id = root_key->objectid;
13402 struct cache_extent *entry;
13403 struct root_item_info *rii;
13404 struct btrfs_root_item ri;
13405 unsigned long offset;
13407 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13410 "Error: could not find extent items for root %llu\n",
13411 root_key->objectid);
13415 rii = container_of(entry, struct root_item_info, cache_extent);
13416 ASSERT(rii->cache_extent.start == root_id);
13417 ASSERT(rii->cache_extent.size == 1);
13419 if (rii->node_count != 1) {
13421 "Error: could not find btree root extent for root %llu\n",
13426 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13427 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13429 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13430 btrfs_root_level(&ri) != rii->level ||
13431 btrfs_root_generation(&ri) != rii->gen) {
13434 * If we're in repair mode but our caller told us to not update
13435 * the root item, i.e. just check if it needs to be updated, don't
13436 * print this message, since the caller will call us again shortly
13437 * for the same root item without read only mode (the caller will
13438 * open a transaction first).
13440 if (!(read_only_mode && repair))
13442 "%sroot item for root %llu,"
13443 " current bytenr %llu, current gen %llu, current level %u,"
13444 " new bytenr %llu, new gen %llu, new level %u\n",
13445 (read_only_mode ? "" : "fixing "),
13447 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13448 btrfs_root_level(&ri),
13449 rii->bytenr, rii->gen, rii->level);
13451 if (btrfs_root_generation(&ri) > rii->gen) {
13453 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13454 root_id, btrfs_root_generation(&ri), rii->gen);
13458 if (!read_only_mode) {
13459 btrfs_set_root_bytenr(&ri, rii->bytenr);
13460 btrfs_set_root_level(&ri, rii->level);
13461 btrfs_set_root_generation(&ri, rii->gen);
13462 write_extent_buffer(path->nodes[0], &ri,
13463 offset, sizeof(ri));
13473 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13474 * caused read-only snapshots to be corrupted if they were created at a moment
13475 * when the source subvolume/snapshot had orphan items. The issue was that the
13476 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13477 * node instead of the post orphan cleanup root node.
13478 * So this function, and its callees, just detects and fixes those cases. Even
13479 * though the regression was for read-only snapshots, this function applies to
13480 * any snapshot/subvolume root.
13481 * This must be run before any other repair code - not doing it so, makes other
13482 * repair code delete or modify backrefs in the extent tree for example, which
13483 * will result in an inconsistent fs after repairing the root items.
13485 static int repair_root_items(struct btrfs_fs_info *info)
13487 struct btrfs_path path;
13488 struct btrfs_key key;
13489 struct extent_buffer *leaf;
13490 struct btrfs_trans_handle *trans = NULL;
13493 int need_trans = 0;
13495 btrfs_init_path(&path);
13497 ret = build_roots_info_cache(info);
13501 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13502 key.type = BTRFS_ROOT_ITEM_KEY;
13507 * Avoid opening and committing transactions if a leaf doesn't have
13508 * any root items that need to be fixed, so that we avoid rotating
13509 * backup roots unnecessarily.
13512 trans = btrfs_start_transaction(info->tree_root, 1);
13513 if (IS_ERR(trans)) {
13514 ret = PTR_ERR(trans);
13519 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13523 leaf = path.nodes[0];
13526 struct btrfs_key found_key;
13528 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13529 int no_more_keys = find_next_key(&path, &key);
13531 btrfs_release_path(&path);
13533 ret = btrfs_commit_transaction(trans,
13545 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13547 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13549 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13552 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13556 if (!trans && repair) {
13559 btrfs_release_path(&path);
13569 free_roots_info_cache();
13570 btrfs_release_path(&path);
13572 btrfs_commit_transaction(trans, info->tree_root);
13579 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13581 struct btrfs_trans_handle *trans;
13582 struct btrfs_block_group_cache *bg_cache;
13586 /* Clear all free space cache inodes and its extent data */
13588 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13591 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13594 current = bg_cache->key.objectid + bg_cache->key.offset;
13597 /* Don't forget to set cache_generation to -1 */
13598 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13599 if (IS_ERR(trans)) {
13600 error("failed to update super block cache generation");
13601 return PTR_ERR(trans);
13603 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13604 btrfs_commit_transaction(trans, fs_info->tree_root);
13609 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13614 if (clear_version == 1) {
13615 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13617 "free space cache v2 detected, use --clear-space-cache v2");
13621 printf("Clearing free space cache\n");
13622 ret = clear_free_space_cache(fs_info);
13624 error("failed to clear free space cache");
13627 printf("Free space cache cleared\n");
13629 } else if (clear_version == 2) {
13630 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13631 printf("no free space cache v2 to clear\n");
13635 printf("Clear free space cache v2\n");
13636 ret = btrfs_clear_free_space_tree(fs_info);
13638 error("failed to clear free space cache v2: %d", ret);
13641 printf("free space cache v2 cleared\n");
13648 const char * const cmd_check_usage[] = {
13649 "btrfs check [options] <device>",
13650 "Check structural integrity of a filesystem (unmounted).",
13651 "Check structural integrity of an unmounted filesystem. Verify internal",
13652 "trees' consistency and item connectivity. In the repair mode try to",
13653 "fix the problems found. ",
13654 "WARNING: the repair mode is considered dangerous",
13656 "-s|--super <superblock> use this superblock copy",
13657 "-b|--backup use the first valid backup root copy",
13658 "--force skip mount checks, repair is not possible",
13659 "--repair try to repair the filesystem",
13660 "--readonly run in read-only mode (default)",
13661 "--init-csum-tree create a new CRC tree",
13662 "--init-extent-tree create a new extent tree",
13663 "--mode <MODE> allows choice of memory/IO trade-offs",
13664 " where MODE is one of:",
13665 " original - read inodes and extents to memory (requires",
13666 " more memory, does less IO)",
13667 " lowmem - try to use less memory but read blocks again",
13669 "--check-data-csum verify checksums of data blocks",
13670 "-Q|--qgroup-report print a report on qgroup consistency",
13671 "-E|--subvol-extents <subvolid>",
13672 " print subvolume extents and sharing state",
13673 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13674 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13675 "-p|--progress indicate progress",
13676 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13680 int cmd_check(int argc, char **argv)
13682 struct cache_tree root_cache;
13683 struct btrfs_root *root;
13684 struct btrfs_fs_info *info;
13687 u64 tree_root_bytenr = 0;
13688 u64 chunk_root_bytenr = 0;
13689 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13693 int init_csum_tree = 0;
13695 int clear_space_cache = 0;
13696 int qgroup_report = 0;
13697 int qgroups_repaired = 0;
13698 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13703 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13704 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13705 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13706 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13707 GETOPT_VAL_FORCE };
13708 static const struct option long_options[] = {
13709 { "super", required_argument, NULL, 's' },
13710 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13711 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13712 { "init-csum-tree", no_argument, NULL,
13713 GETOPT_VAL_INIT_CSUM },
13714 { "init-extent-tree", no_argument, NULL,
13715 GETOPT_VAL_INIT_EXTENT },
13716 { "check-data-csum", no_argument, NULL,
13717 GETOPT_VAL_CHECK_CSUM },
13718 { "backup", no_argument, NULL, 'b' },
13719 { "subvol-extents", required_argument, NULL, 'E' },
13720 { "qgroup-report", no_argument, NULL, 'Q' },
13721 { "tree-root", required_argument, NULL, 'r' },
13722 { "chunk-root", required_argument, NULL,
13723 GETOPT_VAL_CHUNK_TREE },
13724 { "progress", no_argument, NULL, 'p' },
13725 { "mode", required_argument, NULL,
13727 { "clear-space-cache", required_argument, NULL,
13728 GETOPT_VAL_CLEAR_SPACE_CACHE},
13729 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13730 { NULL, 0, NULL, 0}
13733 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13737 case 'a': /* ignored */ break;
13739 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13742 num = arg_strtou64(optarg);
13743 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13745 "super mirror should be less than %d",
13746 BTRFS_SUPER_MIRROR_MAX);
13749 bytenr = btrfs_sb_offset(((int)num));
13750 printf("using SB copy %llu, bytenr %llu\n", num,
13751 (unsigned long long)bytenr);
13757 subvolid = arg_strtou64(optarg);
13760 tree_root_bytenr = arg_strtou64(optarg);
13762 case GETOPT_VAL_CHUNK_TREE:
13763 chunk_root_bytenr = arg_strtou64(optarg);
13766 ctx.progress_enabled = true;
13770 usage(cmd_check_usage);
13771 case GETOPT_VAL_REPAIR:
13772 printf("enabling repair mode\n");
13774 ctree_flags |= OPEN_CTREE_WRITES;
13776 case GETOPT_VAL_READONLY:
13779 case GETOPT_VAL_INIT_CSUM:
13780 printf("Creating a new CRC tree\n");
13781 init_csum_tree = 1;
13783 ctree_flags |= OPEN_CTREE_WRITES;
13785 case GETOPT_VAL_INIT_EXTENT:
13786 init_extent_tree = 1;
13787 ctree_flags |= (OPEN_CTREE_WRITES |
13788 OPEN_CTREE_NO_BLOCK_GROUPS);
13791 case GETOPT_VAL_CHECK_CSUM:
13792 check_data_csum = 1;
13794 case GETOPT_VAL_MODE:
13795 check_mode = parse_check_mode(optarg);
13796 if (check_mode == CHECK_MODE_UNKNOWN) {
13797 error("unknown mode: %s", optarg);
13801 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13802 if (strcmp(optarg, "v1") == 0) {
13803 clear_space_cache = 1;
13804 } else if (strcmp(optarg, "v2") == 0) {
13805 clear_space_cache = 2;
13806 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13809 "invalid argument to --clear-space-cache, must be v1 or v2");
13812 ctree_flags |= OPEN_CTREE_WRITES;
13814 case GETOPT_VAL_FORCE:
13820 if (check_argc_exact(argc - optind, 1))
13821 usage(cmd_check_usage);
13823 if (ctx.progress_enabled) {
13824 ctx.tp = TASK_NOTHING;
13825 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13828 /* This check is the only reason for --readonly to exist */
13829 if (readonly && repair) {
13830 error("repair options are not compatible with --readonly");
13835 * experimental and dangerous
13837 if (repair && check_mode == CHECK_MODE_LOWMEM)
13838 warning("low-memory mode repair support is only partial");
13841 cache_tree_init(&root_cache);
13843 ret = check_mounted(argv[optind]);
13846 error("could not check mount status: %s",
13852 "%s is currently mounted, use --force if you really intend to check the filesystem",
13860 error("repair and --force is not yet supported");
13867 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13871 "filesystem mounted, continuing because of --force");
13873 /* A block device is mounted in exclusive mode by kernel */
13874 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13877 /* only allow partial opening under repair mode */
13879 ctree_flags |= OPEN_CTREE_PARTIAL;
13881 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13882 chunk_root_bytenr, ctree_flags);
13884 error("cannot open file system");
13890 global_info = info;
13891 root = info->fs_root;
13892 uuid_unparse(info->super_copy->fsid, uuidbuf);
13894 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13897 * Check the bare minimum before starting anything else that could rely
13898 * on it, namely the tree roots, any local consistency checks
13900 if (!extent_buffer_uptodate(info->tree_root->node) ||
13901 !extent_buffer_uptodate(info->dev_root->node) ||
13902 !extent_buffer_uptodate(info->chunk_root->node)) {
13903 error("critical roots corrupted, unable to check the filesystem");
13909 if (clear_space_cache) {
13910 ret = do_clear_free_space_cache(info, clear_space_cache);
13916 * repair mode will force us to commit transaction which
13917 * will make us fail to load log tree when mounting.
13919 if (repair && btrfs_super_log_root(info->super_copy)) {
13920 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13926 ret = zero_log_tree(root);
13929 error("failed to zero log tree: %d", ret);
13934 if (qgroup_report) {
13935 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13937 ret = qgroup_verify_all(info);
13944 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13945 subvolid, argv[optind], uuidbuf);
13946 ret = print_extent_state(info, subvolid);
13951 if (init_extent_tree || init_csum_tree) {
13952 struct btrfs_trans_handle *trans;
13954 trans = btrfs_start_transaction(info->extent_root, 0);
13955 if (IS_ERR(trans)) {
13956 error("error starting transaction");
13957 ret = PTR_ERR(trans);
13962 if (init_extent_tree) {
13963 printf("Creating a new extent tree\n");
13964 ret = reinit_extent_tree(trans, info);
13970 if (init_csum_tree) {
13971 printf("Reinitialize checksum tree\n");
13972 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13974 error("checksum tree initialization failed: %d",
13981 ret = fill_csum_tree(trans, info->csum_root,
13985 error("checksum tree refilling failed: %d", ret);
13990 * Ok now we commit and run the normal fsck, which will add
13991 * extent entries for all of the items it finds.
13993 ret = btrfs_commit_transaction(trans, info->extent_root);
13998 if (!extent_buffer_uptodate(info->extent_root->node)) {
13999 error("critical: extent_root, unable to check the filesystem");
14004 if (!extent_buffer_uptodate(info->csum_root->node)) {
14005 error("critical: csum_root, unable to check the filesystem");
14011 ret = do_check_chunks_and_extents(info);
14015 "errors found in extent allocation tree or chunk allocation");
14017 ret = repair_root_items(info);
14020 error("failed to repair root items: %s", strerror(-ret));
14024 fprintf(stderr, "Fixed %d roots.\n", ret);
14026 } else if (ret > 0) {
14028 "Found %d roots with an outdated root item.\n",
14031 "Please run a filesystem check with the option --repair to fix them.\n");
14037 if (!ctx.progress_enabled) {
14038 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14039 fprintf(stderr, "checking free space tree\n");
14041 fprintf(stderr, "checking free space cache\n");
14043 ret = check_space_cache(root);
14046 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14047 error("errors found in free space tree");
14049 error("errors found in free space cache");
14054 * We used to have to have these hole extents in between our real
14055 * extents so if we don't have this flag set we need to make sure there
14056 * are no gaps in the file extents for inodes, otherwise we can just
14057 * ignore it when this happens.
14059 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14060 ret = do_check_fs_roots(info, &root_cache);
14063 error("errors found in fs roots");
14067 fprintf(stderr, "checking csums\n");
14068 ret = check_csums(root);
14071 error("errors found in csum tree");
14075 fprintf(stderr, "checking root refs\n");
14076 /* For low memory mode, check_fs_roots_v2 handles root refs */
14077 if (check_mode != CHECK_MODE_LOWMEM) {
14078 ret = check_root_refs(root, &root_cache);
14081 error("errors found in root refs");
14086 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14087 struct extent_buffer *eb;
14089 eb = list_first_entry(&root->fs_info->recow_ebs,
14090 struct extent_buffer, recow);
14091 list_del_init(&eb->recow);
14092 ret = recow_extent_buffer(root, eb);
14095 error("fails to fix transid errors");
14100 while (!list_empty(&delete_items)) {
14101 struct bad_item *bad;
14103 bad = list_first_entry(&delete_items, struct bad_item, list);
14104 list_del_init(&bad->list);
14106 ret = delete_bad_item(root, bad);
14112 if (info->quota_enabled) {
14113 fprintf(stderr, "checking quota groups\n");
14114 ret = qgroup_verify_all(info);
14117 error("failed to check quota groups");
14121 ret = repair_qgroups(info, &qgroups_repaired);
14124 error("failed to repair quota groups");
14130 if (!list_empty(&root->fs_info->recow_ebs)) {
14131 error("transid errors in file system");
14136 printf("found %llu bytes used, ",
14137 (unsigned long long)bytes_used);
14139 printf("error(s) found\n");
14141 printf("no error found\n");
14142 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14143 printf("total tree bytes: %llu\n",
14144 (unsigned long long)total_btree_bytes);
14145 printf("total fs tree bytes: %llu\n",
14146 (unsigned long long)total_fs_tree_bytes);
14147 printf("total extent tree bytes: %llu\n",
14148 (unsigned long long)total_extent_tree_bytes);
14149 printf("btree space waste bytes: %llu\n",
14150 (unsigned long long)btree_space_waste);
14151 printf("file data blocks allocated: %llu\n referenced %llu\n",
14152 (unsigned long long)data_bytes_allocated,
14153 (unsigned long long)data_bytes_referenced);
14155 free_qgroup_counts();
14156 free_root_recs_tree(&root_cache);
14160 if (ctx.progress_enabled)
14161 task_deinit(ctx.info);