2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 return container_of(back, struct data_backref, node);
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146 struct data_backref *back1 = to_data_backref(ext1);
147 struct data_backref *back2 = to_data_backref(ext2);
149 WARN_ON(!ext1->is_data);
150 WARN_ON(!ext2->is_data);
152 /* parent and root are a union, so this covers both */
153 if (back1->parent > back2->parent)
155 if (back1->parent < back2->parent)
158 /* This is a full backref and the parents match. */
159 if (back1->node.full_backref)
162 if (back1->owner > back2->owner)
164 if (back1->owner < back2->owner)
167 if (back1->offset > back2->offset)
169 if (back1->offset < back2->offset)
172 if (back1->found_ref && back2->found_ref) {
173 if (back1->disk_bytenr > back2->disk_bytenr)
175 if (back1->disk_bytenr < back2->disk_bytenr)
178 if (back1->bytes > back2->bytes)
180 if (back1->bytes < back2->bytes)
188 * Much like data_backref, just removed the undetermined members
189 * and change it to use list_head.
190 * During extent scan, it is stored in root->orphan_data_extent.
191 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193 struct orphan_data_extent {
194 struct list_head list;
202 struct tree_backref {
203 struct extent_backref node;
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 return container_of(back, struct tree_backref, node);
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219 struct tree_backref *back1 = to_tree_backref(ext1);
220 struct tree_backref *back2 = to_tree_backref(ext2);
222 WARN_ON(ext1->is_data);
223 WARN_ON(ext2->is_data);
225 /* parent and root are a union, so this covers both */
226 if (back1->parent > back2->parent)
228 if (back1->parent < back2->parent)
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239 if (ext1->is_data > ext2->is_data)
242 if (ext1->is_data < ext2->is_data)
245 if (ext1->full_backref > ext2->full_backref)
247 if (ext1->full_backref < ext2->full_backref)
251 return compare_data_backref(node1, node2);
253 return compare_tree_backref(node1, node2);
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
259 struct extent_record {
260 struct list_head backrefs;
261 struct list_head dups;
262 struct rb_root backref_tree;
263 struct list_head list;
264 struct cache_extent cache;
265 struct btrfs_disk_key parent_key;
270 u64 extent_item_refs;
272 u64 parent_generation;
276 unsigned int flag_block_full_backref:2;
277 unsigned int found_rec:1;
278 unsigned int content_checked:1;
279 unsigned int owner_ref_checked:1;
280 unsigned int is_root:1;
281 unsigned int metadata:1;
282 unsigned int bad_full_backref:1;
283 unsigned int crossing_stripes:1;
284 unsigned int wrong_chunk_type:1;
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 return container_of(entry, struct extent_record, list);
292 struct inode_backref {
293 struct list_head list;
294 unsigned int found_dir_item:1;
295 unsigned int found_dir_index:1;
296 unsigned int found_inode_ref:1;
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 return list_entry(entry, struct inode_backref, list);
311 struct root_item_record {
312 struct list_head list;
318 struct btrfs_key drop_key;
321 #define REF_ERR_NO_DIR_ITEM (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX (1 << 1)
323 #define REF_ERR_NO_INODE_REF (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
326 #define REF_ERR_DUP_INODE_REF (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
335 struct file_extent_hole {
341 struct inode_record {
342 struct list_head backrefs;
343 unsigned int checked:1;
344 unsigned int merging:1;
345 unsigned int found_inode_item:1;
346 unsigned int found_dir_item:1;
347 unsigned int found_file_extent:1;
348 unsigned int found_csum_item:1;
349 unsigned int some_csum_missing:1;
350 unsigned int nodatasum:1;
363 struct rb_root holes;
364 struct list_head orphan_extents;
369 #define I_ERR_NO_INODE_ITEM (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
385 struct root_backref {
386 struct list_head list;
387 unsigned int found_dir_item:1;
388 unsigned int found_dir_index:1;
389 unsigned int found_back_ref:1;
390 unsigned int found_forward_ref:1;
391 unsigned int reachable:1;
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 return list_entry(entry, struct root_backref, list);
406 struct list_head backrefs;
407 struct cache_extent cache;
408 unsigned int found_root_item:1;
414 struct cache_extent cache;
419 struct cache_extent cache;
420 struct cache_tree root_cache;
421 struct cache_tree inode_cache;
422 struct inode_record *current;
431 struct walk_control {
432 struct cache_tree shared;
433 struct shared_node *nodes[BTRFS_MAX_LEVEL];
439 struct btrfs_key key;
441 struct list_head list;
444 struct extent_entry {
449 struct list_head list;
452 struct root_item_info {
453 /* level of the root */
455 /* number of nodes at this level, must be 1 for a root */
459 struct cache_extent cache_extent;
463 * Error bit for low memory mode check.
465 * Currently no caller cares about it yet. Just internal use for error
468 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH (1 << 8)
479 static void *print_status_check(void *p)
481 struct task_ctx *priv = p;
482 const char work_indicator[] = { '.', 'o', 'O', 'o' };
484 static char *task_position_string[] = {
486 "checking free space cache",
490 task_period_start(priv->info, 1000 /* 1s */);
492 if (priv->tp == TASK_NOTHING)
496 printf("%s [%c]\r", task_position_string[priv->tp],
497 work_indicator[count % 4]);
500 task_period_wait(priv->info);
505 static int print_status_return(void *p)
513 static enum btrfs_check_mode parse_check_mode(const char *str)
515 if (strcmp(str, "lowmem") == 0)
516 return CHECK_MODE_LOWMEM;
517 if (strcmp(str, "orig") == 0)
518 return CHECK_MODE_ORIGINAL;
519 if (strcmp(str, "original") == 0)
520 return CHECK_MODE_ORIGINAL;
522 return CHECK_MODE_UNKNOWN;
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
528 struct file_extent_hole *hole;
530 if (RB_EMPTY_ROOT(holes))
533 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 struct file_extent_hole *hole1;
540 struct file_extent_hole *hole2;
542 hole1 = rb_entry(node1, struct file_extent_hole, node);
543 hole2 = rb_entry(node2, struct file_extent_hole, node);
545 if (hole1->start > hole2->start)
547 if (hole1->start < hole2->start)
549 /* Now hole1->start == hole2->start */
550 if (hole1->len >= hole2->len)
552 * Hole 1 will be merge center
553 * Same hole will be merged later
556 /* Hole 2 will be merge center */
561 * Add a hole to the record
563 * This will do hole merge for copy_file_extent_holes(),
564 * which will ensure there won't be continuous holes.
566 static int add_file_extent_hole(struct rb_root *holes,
569 struct file_extent_hole *hole;
570 struct file_extent_hole *prev = NULL;
571 struct file_extent_hole *next = NULL;
573 hole = malloc(sizeof(*hole));
578 /* Since compare will not return 0, no -EEXIST will happen */
579 rb_insert(holes, &hole->node, compare_hole);
581 /* simple merge with previous hole */
582 if (rb_prev(&hole->node))
583 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585 if (prev && prev->start + prev->len >= hole->start) {
586 hole->len = hole->start + hole->len - prev->start;
587 hole->start = prev->start;
588 rb_erase(&prev->node, holes);
593 /* iterate merge with next holes */
595 if (!rb_next(&hole->node))
597 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599 if (hole->start + hole->len >= next->start) {
600 if (hole->start + hole->len <= next->start + next->len)
601 hole->len = next->start + next->len -
603 rb_erase(&next->node, holes);
612 static int compare_hole_range(struct rb_node *node, void *data)
614 struct file_extent_hole *hole;
617 hole = (struct file_extent_hole *)data;
620 hole = rb_entry(node, struct file_extent_hole, node);
621 if (start < hole->start)
623 if (start >= hole->start && start < hole->start + hole->len)
629 * Delete a hole in the record
631 * This will do the hole split and is much restrict than add.
633 static int del_file_extent_hole(struct rb_root *holes,
636 struct file_extent_hole *hole;
637 struct file_extent_hole tmp;
642 struct rb_node *node;
649 node = rb_search(holes, &tmp, compare_hole_range, NULL);
652 hole = rb_entry(node, struct file_extent_hole, node);
653 if (start + len > hole->start + hole->len)
657 * Now there will be no overlap, delete the hole and re-add the
658 * split(s) if they exists.
660 if (start > hole->start) {
661 prev_start = hole->start;
662 prev_len = start - hole->start;
665 if (hole->start + hole->len > start + len) {
666 next_start = start + len;
667 next_len = hole->start + hole->len - start - len;
670 rb_erase(node, holes);
673 ret = add_file_extent_hole(holes, prev_start, prev_len);
678 ret = add_file_extent_hole(holes, next_start, next_len);
685 static int copy_file_extent_holes(struct rb_root *dst,
688 struct file_extent_hole *hole;
689 struct rb_node *node;
692 node = rb_first(src);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 ret = add_file_extent_hole(dst, hole->start, hole->len);
698 node = rb_next(node);
703 static void free_file_extent_holes(struct rb_root *holes)
705 struct rb_node *node;
706 struct file_extent_hole *hole;
708 node = rb_first(holes);
710 hole = rb_entry(node, struct file_extent_hole, node);
711 rb_erase(node, holes);
713 node = rb_first(holes);
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720 struct btrfs_root *root)
722 if (root->last_trans != trans->transid) {
723 root->track_dirty = 1;
724 root->last_trans = trans->transid;
725 root->commit_root = root->node;
726 extent_buffer_get(root->node);
730 static u8 imode_to_type(u32 imode)
733 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
735 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
736 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
737 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
738 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
739 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
740 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
743 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 struct device_record *rec1;
750 struct device_record *rec2;
752 rec1 = rb_entry(node1, struct device_record, node);
753 rec2 = rb_entry(node2, struct device_record, node);
754 if (rec1->devid > rec2->devid)
756 else if (rec1->devid < rec2->devid)
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 struct inode_record *rec;
765 struct inode_backref *backref;
766 struct inode_backref *orig;
767 struct inode_backref *tmp;
768 struct orphan_data_extent *src_orphan;
769 struct orphan_data_extent *dst_orphan;
774 rec = malloc(sizeof(*rec));
776 return ERR_PTR(-ENOMEM);
777 memcpy(rec, orig_rec, sizeof(*rec));
779 INIT_LIST_HEAD(&rec->backrefs);
780 INIT_LIST_HEAD(&rec->orphan_extents);
781 rec->holes = RB_ROOT;
783 list_for_each_entry(orig, &orig_rec->backrefs, list) {
784 size = sizeof(*orig) + orig->namelen + 1;
785 backref = malloc(size);
790 memcpy(backref, orig, size);
791 list_add_tail(&backref->list, &rec->backrefs);
793 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794 dst_orphan = malloc(sizeof(*dst_orphan));
799 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
809 rb = rb_first(&rec->holes);
811 struct file_extent_hole *hole;
813 hole = rb_entry(rb, struct file_extent_hole, node);
819 if (!list_empty(&rec->backrefs))
820 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821 list_del(&orig->list);
825 if (!list_empty(&rec->orphan_extents))
826 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827 list_del(&orig->list);
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
839 struct orphan_data_extent *orphan;
841 if (list_empty(orphan_extents))
843 printf("The following data extent is lost in tree %llu:\n",
845 list_for_each_entry(orphan, orphan_extents, list) {
846 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847 orphan->objectid, orphan->offset, orphan->disk_bytenr,
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 u64 root_objectid = root->root_key.objectid;
855 int errors = rec->errors;
859 /* reloc root errors, we print its corresponding fs root objectid*/
860 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861 root_objectid = root->root_key.offset;
862 fprintf(stderr, "reloc");
864 fprintf(stderr, "root %llu inode %llu errors %x",
865 (unsigned long long) root_objectid,
866 (unsigned long long) rec->ino, rec->errors);
868 if (errors & I_ERR_NO_INODE_ITEM)
869 fprintf(stderr, ", no inode item");
870 if (errors & I_ERR_NO_ORPHAN_ITEM)
871 fprintf(stderr, ", no orphan item");
872 if (errors & I_ERR_DUP_INODE_ITEM)
873 fprintf(stderr, ", dup inode item");
874 if (errors & I_ERR_DUP_DIR_INDEX)
875 fprintf(stderr, ", dup dir index");
876 if (errors & I_ERR_ODD_DIR_ITEM)
877 fprintf(stderr, ", odd dir item");
878 if (errors & I_ERR_ODD_FILE_EXTENT)
879 fprintf(stderr, ", odd file extent");
880 if (errors & I_ERR_BAD_FILE_EXTENT)
881 fprintf(stderr, ", bad file extent");
882 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883 fprintf(stderr, ", file extent overlap");
884 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885 fprintf(stderr, ", file extent discount");
886 if (errors & I_ERR_DIR_ISIZE_WRONG)
887 fprintf(stderr, ", dir isize wrong");
888 if (errors & I_ERR_FILE_NBYTES_WRONG)
889 fprintf(stderr, ", nbytes wrong");
890 if (errors & I_ERR_ODD_CSUM_ITEM)
891 fprintf(stderr, ", odd csum item");
892 if (errors & I_ERR_SOME_CSUM_MISSING)
893 fprintf(stderr, ", some csum missing");
894 if (errors & I_ERR_LINK_COUNT_WRONG)
895 fprintf(stderr, ", link count wrong");
896 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897 fprintf(stderr, ", orphan file extent");
898 fprintf(stderr, "\n");
899 /* Print the orphan extents if needed */
900 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903 /* Print the holes if needed */
904 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905 struct file_extent_hole *hole;
906 struct rb_node *node;
909 node = rb_first(&rec->holes);
910 fprintf(stderr, "Found file extent holes:\n");
913 hole = rb_entry(node, struct file_extent_hole, node);
914 fprintf(stderr, "\tstart: %llu, len: %llu\n",
915 hole->start, hole->len);
916 node = rb_next(node);
919 fprintf(stderr, "\tstart: 0, len: %llu\n",
921 root->fs_info->sectorsize));
925 static void print_ref_error(int errors)
927 if (errors & REF_ERR_NO_DIR_ITEM)
928 fprintf(stderr, ", no dir item");
929 if (errors & REF_ERR_NO_DIR_INDEX)
930 fprintf(stderr, ", no dir index");
931 if (errors & REF_ERR_NO_INODE_REF)
932 fprintf(stderr, ", no inode ref");
933 if (errors & REF_ERR_DUP_DIR_ITEM)
934 fprintf(stderr, ", dup dir item");
935 if (errors & REF_ERR_DUP_DIR_INDEX)
936 fprintf(stderr, ", dup dir index");
937 if (errors & REF_ERR_DUP_INODE_REF)
938 fprintf(stderr, ", dup inode ref");
939 if (errors & REF_ERR_INDEX_UNMATCH)
940 fprintf(stderr, ", index mismatch");
941 if (errors & REF_ERR_FILETYPE_UNMATCH)
942 fprintf(stderr, ", filetype mismatch");
943 if (errors & REF_ERR_NAME_TOO_LONG)
944 fprintf(stderr, ", name too long");
945 if (errors & REF_ERR_NO_ROOT_REF)
946 fprintf(stderr, ", no root ref");
947 if (errors & REF_ERR_NO_ROOT_BACKREF)
948 fprintf(stderr, ", no root backref");
949 if (errors & REF_ERR_DUP_ROOT_REF)
950 fprintf(stderr, ", dup root ref");
951 if (errors & REF_ERR_DUP_ROOT_BACKREF)
952 fprintf(stderr, ", dup root backref");
953 fprintf(stderr, "\n");
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
959 struct ptr_node *node;
960 struct cache_extent *cache;
961 struct inode_record *rec = NULL;
964 cache = lookup_cache_extent(inode_cache, ino, 1);
966 node = container_of(cache, struct ptr_node, cache);
968 if (mod && rec->refs > 1) {
969 node->data = clone_inode_rec(rec);
970 if (IS_ERR(node->data))
976 rec = calloc(1, sizeof(*rec));
978 return ERR_PTR(-ENOMEM);
980 rec->extent_start = (u64)-1;
982 INIT_LIST_HEAD(&rec->backrefs);
983 INIT_LIST_HEAD(&rec->orphan_extents);
984 rec->holes = RB_ROOT;
986 node = malloc(sizeof(*node));
989 return ERR_PTR(-ENOMEM);
991 node->cache.start = ino;
992 node->cache.size = 1;
995 if (ino == BTRFS_FREE_INO_OBJECTID)
998 ret = insert_cache_extent(inode_cache, &node->cache);
1000 return ERR_PTR(-EEXIST);
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 struct orphan_data_extent *orphan;
1009 while (!list_empty(orphan_extents)) {
1010 orphan = list_entry(orphan_extents->next,
1011 struct orphan_data_extent, list);
1012 list_del(&orphan->list);
1017 static void free_inode_rec(struct inode_record *rec)
1019 struct inode_backref *backref;
1021 if (--rec->refs > 0)
1024 while (!list_empty(&rec->backrefs)) {
1025 backref = to_inode_backref(rec->backrefs.next);
1026 list_del(&backref->list);
1029 free_orphan_data_extents(&rec->orphan_extents);
1030 free_file_extent_holes(&rec->holes);
1034 static int can_free_inode_rec(struct inode_record *rec)
1036 if (!rec->errors && rec->checked && rec->found_inode_item &&
1037 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043 struct inode_record *rec)
1045 struct cache_extent *cache;
1046 struct inode_backref *tmp, *backref;
1047 struct ptr_node *node;
1050 if (!rec->found_inode_item)
1053 filetype = imode_to_type(rec->imode);
1054 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055 if (backref->found_dir_item && backref->found_dir_index) {
1056 if (backref->filetype != filetype)
1057 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058 if (!backref->errors && backref->found_inode_ref &&
1059 rec->nlink == rec->found_link) {
1060 list_del(&backref->list);
1066 if (!rec->checked || rec->merging)
1069 if (S_ISDIR(rec->imode)) {
1070 if (rec->found_size != rec->isize)
1071 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072 if (rec->found_file_extent)
1073 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075 if (rec->found_dir_item)
1076 rec->errors |= I_ERR_ODD_DIR_ITEM;
1077 if (rec->found_size != rec->nbytes)
1078 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079 if (rec->nlink > 0 && !no_holes &&
1080 (rec->extent_end < rec->isize ||
1081 first_extent_gap(&rec->holes) < rec->isize))
1082 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1085 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086 if (rec->found_csum_item && rec->nodatasum)
1087 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088 if (rec->some_csum_missing && !rec->nodatasum)
1089 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1092 BUG_ON(rec->refs != 1);
1093 if (can_free_inode_rec(rec)) {
1094 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095 node = container_of(cache, struct ptr_node, cache);
1096 BUG_ON(node->data != rec);
1097 remove_cache_extent(inode_cache, &node->cache);
1099 free_inode_rec(rec);
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 struct btrfs_path path;
1106 struct btrfs_key key;
1109 key.objectid = BTRFS_ORPHAN_OBJECTID;
1110 key.type = BTRFS_ORPHAN_ITEM_KEY;
1113 btrfs_init_path(&path);
1114 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115 btrfs_release_path(&path);
1121 static int process_inode_item(struct extent_buffer *eb,
1122 int slot, struct btrfs_key *key,
1123 struct shared_node *active_node)
1125 struct inode_record *rec;
1126 struct btrfs_inode_item *item;
1128 rec = active_node->current;
1129 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130 if (rec->found_inode_item) {
1131 rec->errors |= I_ERR_DUP_INODE_ITEM;
1134 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135 rec->nlink = btrfs_inode_nlink(eb, item);
1136 rec->isize = btrfs_inode_size(eb, item);
1137 rec->nbytes = btrfs_inode_nbytes(eb, item);
1138 rec->imode = btrfs_inode_mode(eb, item);
1139 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141 rec->found_inode_item = 1;
1142 if (rec->nlink == 0)
1143 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144 maybe_free_inode_rec(&active_node->inode_cache, rec);
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150 int namelen, u64 dir)
1152 struct inode_backref *backref;
1154 list_for_each_entry(backref, &rec->backrefs, list) {
1155 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157 if (backref->dir != dir || backref->namelen != namelen)
1159 if (memcmp(name, backref->name, namelen))
1164 backref = malloc(sizeof(*backref) + namelen + 1);
1167 memset(backref, 0, sizeof(*backref));
1169 backref->namelen = namelen;
1170 memcpy(backref->name, name, namelen);
1171 backref->name[namelen] = '\0';
1172 list_add_tail(&backref->list, &rec->backrefs);
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177 u64 ino, u64 dir, u64 index,
1178 const char *name, int namelen,
1179 u8 filetype, u8 itemtype, int errors)
1181 struct inode_record *rec;
1182 struct inode_backref *backref;
1184 rec = get_inode_rec(inode_cache, ino, 1);
1185 BUG_ON(IS_ERR(rec));
1186 backref = get_inode_backref(rec, name, namelen, dir);
1189 backref->errors |= errors;
1190 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191 if (backref->found_dir_index)
1192 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193 if (backref->found_inode_ref && backref->index != index)
1194 backref->errors |= REF_ERR_INDEX_UNMATCH;
1195 if (backref->found_dir_item && backref->filetype != filetype)
1196 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198 backref->index = index;
1199 backref->filetype = filetype;
1200 backref->found_dir_index = 1;
1201 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203 if (backref->found_dir_item)
1204 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205 if (backref->found_dir_index && backref->filetype != filetype)
1206 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208 backref->filetype = filetype;
1209 backref->found_dir_item = 1;
1210 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212 if (backref->found_inode_ref)
1213 backref->errors |= REF_ERR_DUP_INODE_REF;
1214 if (backref->found_dir_index && backref->index != index)
1215 backref->errors |= REF_ERR_INDEX_UNMATCH;
1217 backref->index = index;
1219 backref->ref_type = itemtype;
1220 backref->found_inode_ref = 1;
1225 maybe_free_inode_rec(inode_cache, rec);
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230 struct cache_tree *dst_cache)
1232 struct inode_backref *backref;
1237 list_for_each_entry(backref, &src->backrefs, list) {
1238 if (backref->found_dir_index) {
1239 add_inode_backref(dst_cache, dst->ino, backref->dir,
1240 backref->index, backref->name,
1241 backref->namelen, backref->filetype,
1242 BTRFS_DIR_INDEX_KEY, backref->errors);
1244 if (backref->found_dir_item) {
1246 add_inode_backref(dst_cache, dst->ino,
1247 backref->dir, 0, backref->name,
1248 backref->namelen, backref->filetype,
1249 BTRFS_DIR_ITEM_KEY, backref->errors);
1251 if (backref->found_inode_ref) {
1252 add_inode_backref(dst_cache, dst->ino,
1253 backref->dir, backref->index,
1254 backref->name, backref->namelen, 0,
1255 backref->ref_type, backref->errors);
1259 if (src->found_dir_item)
1260 dst->found_dir_item = 1;
1261 if (src->found_file_extent)
1262 dst->found_file_extent = 1;
1263 if (src->found_csum_item)
1264 dst->found_csum_item = 1;
1265 if (src->some_csum_missing)
1266 dst->some_csum_missing = 1;
1267 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1273 BUG_ON(src->found_link < dir_count);
1274 dst->found_link += src->found_link - dir_count;
1275 dst->found_size += src->found_size;
1276 if (src->extent_start != (u64)-1) {
1277 if (dst->extent_start == (u64)-1) {
1278 dst->extent_start = src->extent_start;
1279 dst->extent_end = src->extent_end;
1281 if (dst->extent_end > src->extent_start)
1282 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283 else if (dst->extent_end < src->extent_start) {
1284 ret = add_file_extent_hole(&dst->holes,
1286 src->extent_start - dst->extent_end);
1288 if (dst->extent_end < src->extent_end)
1289 dst->extent_end = src->extent_end;
1293 dst->errors |= src->errors;
1294 if (src->found_inode_item) {
1295 if (!dst->found_inode_item) {
1296 dst->nlink = src->nlink;
1297 dst->isize = src->isize;
1298 dst->nbytes = src->nbytes;
1299 dst->imode = src->imode;
1300 dst->nodatasum = src->nodatasum;
1301 dst->found_inode_item = 1;
1303 dst->errors |= I_ERR_DUP_INODE_ITEM;
1311 static int splice_shared_node(struct shared_node *src_node,
1312 struct shared_node *dst_node)
1314 struct cache_extent *cache;
1315 struct ptr_node *node, *ins;
1316 struct cache_tree *src, *dst;
1317 struct inode_record *rec, *conflict;
1318 u64 current_ino = 0;
1322 if (--src_node->refs == 0)
1324 if (src_node->current)
1325 current_ino = src_node->current->ino;
1327 src = &src_node->root_cache;
1328 dst = &dst_node->root_cache;
1330 cache = search_cache_extent(src, 0);
1332 node = container_of(cache, struct ptr_node, cache);
1334 cache = next_cache_extent(cache);
1337 remove_cache_extent(src, &node->cache);
1340 ins = malloc(sizeof(*ins));
1342 ins->cache.start = node->cache.start;
1343 ins->cache.size = node->cache.size;
1347 ret = insert_cache_extent(dst, &ins->cache);
1348 if (ret == -EEXIST) {
1349 conflict = get_inode_rec(dst, rec->ino, 1);
1350 BUG_ON(IS_ERR(conflict));
1351 merge_inode_recs(rec, conflict, dst);
1353 conflict->checked = 1;
1354 if (dst_node->current == conflict)
1355 dst_node->current = NULL;
1357 maybe_free_inode_rec(dst, conflict);
1358 free_inode_rec(rec);
1365 if (src == &src_node->root_cache) {
1366 src = &src_node->inode_cache;
1367 dst = &dst_node->inode_cache;
1371 if (current_ino > 0 && (!dst_node->current ||
1372 current_ino > dst_node->current->ino)) {
1373 if (dst_node->current) {
1374 dst_node->current->checked = 1;
1375 maybe_free_inode_rec(dst, dst_node->current);
1377 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378 BUG_ON(IS_ERR(dst_node->current));
1383 static void free_inode_ptr(struct cache_extent *cache)
1385 struct ptr_node *node;
1386 struct inode_record *rec;
1388 node = container_of(cache, struct ptr_node, cache);
1390 free_inode_rec(rec);
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1399 struct cache_extent *cache;
1400 struct shared_node *node;
1402 cache = lookup_cache_extent(shared, bytenr, 1);
1404 node = container_of(cache, struct shared_node, cache);
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1413 struct shared_node *node;
1415 node = calloc(1, sizeof(*node));
1418 node->cache.start = bytenr;
1419 node->cache.size = 1;
1420 cache_tree_init(&node->root_cache);
1421 cache_tree_init(&node->inode_cache);
1424 ret = insert_cache_extent(shared, &node->cache);
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430 struct walk_control *wc, int level)
1432 struct shared_node *node;
1433 struct shared_node *dest;
1436 if (level == wc->active_node)
1439 BUG_ON(wc->active_node <= level);
1440 node = find_shared_node(&wc->shared, bytenr);
1442 ret = add_shared_node(&wc->shared, bytenr, refs);
1444 node = find_shared_node(&wc->shared, bytenr);
1445 wc->nodes[level] = node;
1446 wc->active_node = level;
1450 if (wc->root_level == wc->active_node &&
1451 btrfs_root_refs(&root->root_item) == 0) {
1452 if (--node->refs == 0) {
1453 free_inode_recs_tree(&node->root_cache);
1454 free_inode_recs_tree(&node->inode_cache);
1455 remove_cache_extent(&wc->shared, &node->cache);
1461 dest = wc->nodes[wc->active_node];
1462 splice_shared_node(node, dest);
1463 if (node->refs == 0) {
1464 remove_cache_extent(&wc->shared, &node->cache);
1470 static int leave_shared_node(struct btrfs_root *root,
1471 struct walk_control *wc, int level)
1473 struct shared_node *node;
1474 struct shared_node *dest;
1477 if (level == wc->root_level)
1480 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1484 BUG_ON(i >= BTRFS_MAX_LEVEL);
1486 node = wc->nodes[wc->active_node];
1487 wc->nodes[wc->active_node] = NULL;
1488 wc->active_node = i;
1490 dest = wc->nodes[wc->active_node];
1491 if (wc->active_node < wc->root_level ||
1492 btrfs_root_refs(&root->root_item) > 0) {
1493 BUG_ON(node->refs <= 1);
1494 splice_shared_node(node, dest);
1496 BUG_ON(node->refs < 2);
1505 * 1 - if the root with id child_root_id is a child of root parent_root_id
1506 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1507 * has other root(s) as parent(s)
1508 * 2 - if the root child_root_id doesn't have any parent roots
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1513 struct btrfs_path path;
1514 struct btrfs_key key;
1515 struct extent_buffer *leaf;
1519 btrfs_init_path(&path);
1521 key.objectid = parent_root_id;
1522 key.type = BTRFS_ROOT_REF_KEY;
1523 key.offset = child_root_id;
1524 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1528 btrfs_release_path(&path);
1532 key.objectid = child_root_id;
1533 key.type = BTRFS_ROOT_BACKREF_KEY;
1535 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1541 leaf = path.nodes[0];
1542 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1546 leaf = path.nodes[0];
1549 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550 if (key.objectid != child_root_id ||
1551 key.type != BTRFS_ROOT_BACKREF_KEY)
1556 if (key.offset == parent_root_id) {
1557 btrfs_release_path(&path);
1564 btrfs_release_path(&path);
1567 return has_parent ? 0 : 2;
1570 static int process_dir_item(struct extent_buffer *eb,
1571 int slot, struct btrfs_key *key,
1572 struct shared_node *active_node)
1582 struct btrfs_dir_item *di;
1583 struct inode_record *rec;
1584 struct cache_tree *root_cache;
1585 struct cache_tree *inode_cache;
1586 struct btrfs_key location;
1587 char namebuf[BTRFS_NAME_LEN];
1589 root_cache = &active_node->root_cache;
1590 inode_cache = &active_node->inode_cache;
1591 rec = active_node->current;
1592 rec->found_dir_item = 1;
1594 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595 total = btrfs_item_size_nr(eb, slot);
1596 while (cur < total) {
1598 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599 name_len = btrfs_dir_name_len(eb, di);
1600 data_len = btrfs_dir_data_len(eb, di);
1601 filetype = btrfs_dir_type(eb, di);
1603 rec->found_size += name_len;
1604 if (cur + sizeof(*di) + name_len > total ||
1605 name_len > BTRFS_NAME_LEN) {
1606 error = REF_ERR_NAME_TOO_LONG;
1608 if (cur + sizeof(*di) > total)
1610 len = min_t(u32, total - cur - sizeof(*di),
1617 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620 key->offset != btrfs_name_hash(namebuf, len)) {
1621 rec->errors |= I_ERR_ODD_DIR_ITEM;
1622 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623 key->objectid, key->offset, namebuf, len, filetype,
1624 key->offset, btrfs_name_hash(namebuf, len));
1627 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628 add_inode_backref(inode_cache, location.objectid,
1629 key->objectid, key->offset, namebuf,
1630 len, filetype, key->type, error);
1631 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632 add_inode_backref(root_cache, location.objectid,
1633 key->objectid, key->offset,
1634 namebuf, len, filetype,
1637 fprintf(stderr, "invalid location in dir item %u\n",
1639 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640 key->objectid, key->offset, namebuf,
1641 len, filetype, key->type, error);
1644 len = sizeof(*di) + name_len + data_len;
1645 di = (struct btrfs_dir_item *)((char *)di + len);
1648 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649 rec->errors |= I_ERR_DUP_DIR_INDEX;
1654 static int process_inode_ref(struct extent_buffer *eb,
1655 int slot, struct btrfs_key *key,
1656 struct shared_node *active_node)
1664 struct cache_tree *inode_cache;
1665 struct btrfs_inode_ref *ref;
1666 char namebuf[BTRFS_NAME_LEN];
1668 inode_cache = &active_node->inode_cache;
1670 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671 total = btrfs_item_size_nr(eb, slot);
1672 while (cur < total) {
1673 name_len = btrfs_inode_ref_name_len(eb, ref);
1674 index = btrfs_inode_ref_index(eb, ref);
1676 /* inode_ref + namelen should not cross item boundary */
1677 if (cur + sizeof(*ref) + name_len > total ||
1678 name_len > BTRFS_NAME_LEN) {
1679 if (total < cur + sizeof(*ref))
1682 /* Still try to read out the remaining part */
1683 len = min_t(u32, total - cur - sizeof(*ref),
1685 error = REF_ERR_NAME_TOO_LONG;
1691 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692 add_inode_backref(inode_cache, key->objectid, key->offset,
1693 index, namebuf, len, 0, key->type, error);
1695 len = sizeof(*ref) + name_len;
1696 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1702 static int process_inode_extref(struct extent_buffer *eb,
1703 int slot, struct btrfs_key *key,
1704 struct shared_node *active_node)
1713 struct cache_tree *inode_cache;
1714 struct btrfs_inode_extref *extref;
1715 char namebuf[BTRFS_NAME_LEN];
1717 inode_cache = &active_node->inode_cache;
1719 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720 total = btrfs_item_size_nr(eb, slot);
1721 while (cur < total) {
1722 name_len = btrfs_inode_extref_name_len(eb, extref);
1723 index = btrfs_inode_extref_index(eb, extref);
1724 parent = btrfs_inode_extref_parent(eb, extref);
1725 if (name_len <= BTRFS_NAME_LEN) {
1729 len = BTRFS_NAME_LEN;
1730 error = REF_ERR_NAME_TOO_LONG;
1732 read_extent_buffer(eb, namebuf,
1733 (unsigned long)(extref + 1), len);
1734 add_inode_backref(inode_cache, key->objectid, parent,
1735 index, namebuf, len, 0, key->type, error);
1737 len = sizeof(*extref) + name_len;
1738 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746 u64 len, u64 *found)
1748 struct btrfs_key key;
1749 struct btrfs_path path;
1750 struct extent_buffer *leaf;
1755 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1757 btrfs_init_path(&path);
1759 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761 key.type = BTRFS_EXTENT_CSUM_KEY;
1763 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1767 if (ret > 0 && path.slots[0] > 0) {
1768 leaf = path.nodes[0];
1769 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771 key.type == BTRFS_EXTENT_CSUM_KEY)
1776 leaf = path.nodes[0];
1777 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1783 leaf = path.nodes[0];
1786 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788 key.type != BTRFS_EXTENT_CSUM_KEY)
1791 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792 if (key.offset >= start + len)
1795 if (key.offset > start)
1798 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799 csum_end = key.offset + (size / csum_size) *
1800 root->fs_info->sectorsize;
1801 if (csum_end > start) {
1802 size = min(csum_end - start, len);
1811 btrfs_release_path(&path);
1817 static int process_file_extent(struct btrfs_root *root,
1818 struct extent_buffer *eb,
1819 int slot, struct btrfs_key *key,
1820 struct shared_node *active_node)
1822 struct inode_record *rec;
1823 struct btrfs_file_extent_item *fi;
1825 u64 disk_bytenr = 0;
1826 u64 extent_offset = 0;
1827 u64 mask = root->fs_info->sectorsize - 1;
1831 rec = active_node->current;
1832 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833 rec->found_file_extent = 1;
1835 if (rec->extent_start == (u64)-1) {
1836 rec->extent_start = key->offset;
1837 rec->extent_end = key->offset;
1840 if (rec->extent_end > key->offset)
1841 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842 else if (rec->extent_end < key->offset) {
1843 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844 key->offset - rec->extent_end);
1849 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850 extent_type = btrfs_file_extent_type(eb, fi);
1852 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856 rec->found_size += num_bytes;
1857 num_bytes = (num_bytes + mask) & ~mask;
1858 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862 extent_offset = btrfs_file_extent_offset(eb, fi);
1863 if (num_bytes == 0 || (num_bytes & mask))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (num_bytes + extent_offset >
1866 btrfs_file_extent_ram_bytes(eb, fi))
1867 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869 (btrfs_file_extent_compression(eb, fi) ||
1870 btrfs_file_extent_encryption(eb, fi) ||
1871 btrfs_file_extent_other_encoding(eb, fi)))
1872 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873 if (disk_bytenr > 0)
1874 rec->found_size += num_bytes;
1876 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1878 rec->extent_end = key->offset + num_bytes;
1881 * The data reloc tree will copy full extents into its inode and then
1882 * copy the corresponding csums. Because the extent it copied could be
1883 * a preallocated extent that hasn't been written to yet there may be no
1884 * csums to copy, ergo we won't have csums for our file extent. This is
1885 * ok so just don't bother checking csums if the inode belongs to the
1888 if (disk_bytenr > 0 &&
1889 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891 if (btrfs_file_extent_compression(eb, fi))
1892 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894 disk_bytenr += extent_offset;
1896 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1899 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901 rec->found_csum_item = 1;
1902 if (found < num_bytes)
1903 rec->some_csum_missing = 1;
1904 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913 struct walk_control *wc)
1915 struct btrfs_key key;
1919 struct cache_tree *inode_cache;
1920 struct shared_node *active_node;
1922 if (wc->root_level == wc->active_node &&
1923 btrfs_root_refs(&root->root_item) == 0)
1926 active_node = wc->nodes[wc->active_node];
1927 inode_cache = &active_node->inode_cache;
1928 nritems = btrfs_header_nritems(eb);
1929 for (i = 0; i < nritems; i++) {
1930 btrfs_item_key_to_cpu(eb, &key, i);
1932 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1937 if (active_node->current == NULL ||
1938 active_node->current->ino < key.objectid) {
1939 if (active_node->current) {
1940 active_node->current->checked = 1;
1941 maybe_free_inode_rec(inode_cache,
1942 active_node->current);
1944 active_node->current = get_inode_rec(inode_cache,
1946 BUG_ON(IS_ERR(active_node->current));
1949 case BTRFS_DIR_ITEM_KEY:
1950 case BTRFS_DIR_INDEX_KEY:
1951 ret = process_dir_item(eb, i, &key, active_node);
1953 case BTRFS_INODE_REF_KEY:
1954 ret = process_inode_ref(eb, i, &key, active_node);
1956 case BTRFS_INODE_EXTREF_KEY:
1957 ret = process_inode_extref(eb, i, &key, active_node);
1959 case BTRFS_INODE_ITEM_KEY:
1960 ret = process_inode_item(eb, i, &key, active_node);
1962 case BTRFS_EXTENT_DATA_KEY:
1963 ret = process_file_extent(root, eb, i, &key,
1974 u64 bytenr[BTRFS_MAX_LEVEL];
1975 u64 refs[BTRFS_MAX_LEVEL];
1976 int need_check[BTRFS_MAX_LEVEL];
1979 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1980 struct node_refs *nrefs, u64 level);
1981 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1982 unsigned int ext_ref);
1985 * Returns >0 Found error, not fatal, should continue
1986 * Returns <0 Fatal error, must exit the whole check
1987 * Returns 0 No errors found
1989 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1990 struct node_refs *nrefs, int *level, int ext_ref)
1992 struct extent_buffer *cur = path->nodes[0];
1993 struct btrfs_key key;
1997 int root_level = btrfs_header_level(root->node);
1999 int ret = 0; /* Final return value */
2000 int err = 0; /* Positive error bitmap */
2002 cur_bytenr = cur->start;
2004 /* skip to first inode item or the first inode number change */
2005 nritems = btrfs_header_nritems(cur);
2006 for (i = 0; i < nritems; i++) {
2007 btrfs_item_key_to_cpu(cur, &key, i);
2009 first_ino = key.objectid;
2010 if (key.type == BTRFS_INODE_ITEM_KEY ||
2011 (first_ino && first_ino != key.objectid))
2015 path->slots[0] = nritems;
2021 err |= check_inode_item(root, path, ext_ref);
2023 /* modify cur since check_inode_item may change path */
2024 cur = path->nodes[0];
2026 if (err & LAST_ITEM)
2029 /* still have inode items in thie leaf */
2030 if (cur->start == cur_bytenr)
2034 * we have switched to another leaf, above nodes may
2035 * have changed, here walk down the path, if a node
2036 * or leaf is shared, check whether we can skip this
2039 for (i = root_level; i >= 0; i--) {
2040 if (path->nodes[i]->start == nrefs->bytenr[i])
2043 ret = update_nodes_refs(root,
2044 path->nodes[i]->start,
2049 if (!nrefs->need_check[i]) {
2055 for (i = 0; i < *level; i++) {
2056 free_extent_buffer(path->nodes[i]);
2057 path->nodes[i] = NULL;
2066 static void reada_walk_down(struct btrfs_root *root,
2067 struct extent_buffer *node, int slot)
2069 struct btrfs_fs_info *fs_info = root->fs_info;
2076 level = btrfs_header_level(node);
2080 nritems = btrfs_header_nritems(node);
2081 for (i = slot; i < nritems; i++) {
2082 bytenr = btrfs_node_blockptr(node, i);
2083 ptr_gen = btrfs_node_ptr_generation(node, i);
2084 readahead_tree_block(fs_info, bytenr, ptr_gen);
2089 * Check the child node/leaf by the following condition:
2090 * 1. the first item key of the node/leaf should be the same with the one
2092 * 2. block in parent node should match the child node/leaf.
2093 * 3. generation of parent node and child's header should be consistent.
2095 * Or the child node/leaf pointed by the key in parent is not valid.
2097 * We hope to check leaf owner too, but since subvol may share leaves,
2098 * which makes leaf owner check not so strong, key check should be
2099 * sufficient enough for that case.
2101 static int check_child_node(struct extent_buffer *parent, int slot,
2102 struct extent_buffer *child)
2104 struct btrfs_key parent_key;
2105 struct btrfs_key child_key;
2108 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2109 if (btrfs_header_level(child) == 0)
2110 btrfs_item_key_to_cpu(child, &child_key, 0);
2112 btrfs_node_key_to_cpu(child, &child_key, 0);
2114 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2117 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2118 parent_key.objectid, parent_key.type, parent_key.offset,
2119 child_key.objectid, child_key.type, child_key.offset);
2121 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2123 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2124 btrfs_node_blockptr(parent, slot),
2125 btrfs_header_bytenr(child));
2127 if (btrfs_node_ptr_generation(parent, slot) !=
2128 btrfs_header_generation(child)) {
2130 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2131 btrfs_header_generation(child),
2132 btrfs_node_ptr_generation(parent, slot));
2138 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2139 * in every fs or file tree check. Here we find its all root ids, and only check
2140 * it in the fs or file tree which has the smallest root id.
2142 static int need_check(struct btrfs_root *root, struct ulist *roots)
2144 struct rb_node *node;
2145 struct ulist_node *u;
2147 if (roots->nnodes == 1)
2150 node = rb_first(&roots->root);
2151 u = rb_entry(node, struct ulist_node, rb_node);
2153 * current root id is not smallest, we skip it and let it be checked
2154 * in the fs or file tree who hash the smallest root id.
2156 if (root->objectid != u->val)
2163 * for a tree node or leaf, we record its reference count, so later if we still
2164 * process this node or leaf, don't need to compute its reference count again.
2166 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2167 struct node_refs *nrefs, u64 level)
2171 struct ulist *roots;
2173 if (nrefs->bytenr[level] != bytenr) {
2174 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2175 level, 1, &refs, NULL);
2179 nrefs->bytenr[level] = bytenr;
2180 nrefs->refs[level] = refs;
2182 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2187 check = need_check(root, roots);
2189 nrefs->need_check[level] = check;
2191 nrefs->need_check[level] = 1;
2198 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2199 struct walk_control *wc, int *level,
2200 struct node_refs *nrefs)
2202 enum btrfs_tree_block_status status;
2205 struct btrfs_fs_info *fs_info = root->fs_info;
2206 struct extent_buffer *next;
2207 struct extent_buffer *cur;
2211 WARN_ON(*level < 0);
2212 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2214 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2215 refs = nrefs->refs[*level];
2218 ret = btrfs_lookup_extent_info(NULL, root,
2219 path->nodes[*level]->start,
2220 *level, 1, &refs, NULL);
2225 nrefs->bytenr[*level] = path->nodes[*level]->start;
2226 nrefs->refs[*level] = refs;
2230 ret = enter_shared_node(root, path->nodes[*level]->start,
2238 while (*level >= 0) {
2239 WARN_ON(*level < 0);
2240 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2241 cur = path->nodes[*level];
2243 if (btrfs_header_level(cur) != *level)
2246 if (path->slots[*level] >= btrfs_header_nritems(cur))
2249 ret = process_one_leaf(root, cur, wc);
2254 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2255 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2257 if (bytenr == nrefs->bytenr[*level - 1]) {
2258 refs = nrefs->refs[*level - 1];
2260 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2261 *level - 1, 1, &refs, NULL);
2265 nrefs->bytenr[*level - 1] = bytenr;
2266 nrefs->refs[*level - 1] = refs;
2271 ret = enter_shared_node(root, bytenr, refs,
2274 path->slots[*level]++;
2279 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2280 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281 free_extent_buffer(next);
2282 reada_walk_down(root, cur, path->slots[*level]);
2283 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2284 if (!extent_buffer_uptodate(next)) {
2285 struct btrfs_key node_key;
2287 btrfs_node_key_to_cpu(path->nodes[*level],
2289 path->slots[*level]);
2290 btrfs_add_corrupt_extent_record(root->fs_info,
2292 path->nodes[*level]->start,
2293 root->fs_info->nodesize,
2300 ret = check_child_node(cur, path->slots[*level], next);
2302 free_extent_buffer(next);
2307 if (btrfs_is_leaf(next))
2308 status = btrfs_check_leaf(root, NULL, next);
2310 status = btrfs_check_node(root, NULL, next);
2311 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2312 free_extent_buffer(next);
2317 *level = *level - 1;
2318 free_extent_buffer(path->nodes[*level]);
2319 path->nodes[*level] = next;
2320 path->slots[*level] = 0;
2323 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2327 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2328 unsigned int ext_ref);
2331 * Returns >0 Found error, should continue
2332 * Returns <0 Fatal error, must exit the whole check
2333 * Returns 0 No errors found
2335 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2336 int *level, struct node_refs *nrefs, int ext_ref)
2338 enum btrfs_tree_block_status status;
2341 struct btrfs_fs_info *fs_info = root->fs_info;
2342 struct extent_buffer *next;
2343 struct extent_buffer *cur;
2346 WARN_ON(*level < 0);
2347 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2349 ret = update_nodes_refs(root, path->nodes[*level]->start,
2354 while (*level >= 0) {
2355 WARN_ON(*level < 0);
2356 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2357 cur = path->nodes[*level];
2359 if (btrfs_header_level(cur) != *level)
2362 if (path->slots[*level] >= btrfs_header_nritems(cur))
2364 /* Don't forgot to check leaf/node validation */
2366 ret = btrfs_check_leaf(root, NULL, cur);
2367 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2371 ret = process_one_leaf_v2(root, path, nrefs,
2373 cur = path->nodes[*level];
2376 ret = btrfs_check_node(root, NULL, cur);
2377 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2382 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2383 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2385 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2388 if (!nrefs->need_check[*level - 1]) {
2389 path->slots[*level]++;
2393 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2394 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2395 free_extent_buffer(next);
2396 reada_walk_down(root, cur, path->slots[*level]);
2397 next = read_tree_block(fs_info, bytenr, ptr_gen);
2398 if (!extent_buffer_uptodate(next)) {
2399 struct btrfs_key node_key;
2401 btrfs_node_key_to_cpu(path->nodes[*level],
2403 path->slots[*level]);
2404 btrfs_add_corrupt_extent_record(fs_info,
2406 path->nodes[*level]->start,
2414 ret = check_child_node(cur, path->slots[*level], next);
2418 if (btrfs_is_leaf(next))
2419 status = btrfs_check_leaf(root, NULL, next);
2421 status = btrfs_check_node(root, NULL, next);
2422 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2423 free_extent_buffer(next);
2428 *level = *level - 1;
2429 free_extent_buffer(path->nodes[*level]);
2430 path->nodes[*level] = next;
2431 path->slots[*level] = 0;
2436 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2437 struct walk_control *wc, int *level)
2440 struct extent_buffer *leaf;
2442 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2443 leaf = path->nodes[i];
2444 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2449 free_extent_buffer(path->nodes[*level]);
2450 path->nodes[*level] = NULL;
2451 BUG_ON(*level > wc->active_node);
2452 if (*level == wc->active_node)
2453 leave_shared_node(root, wc, *level);
2460 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2464 struct extent_buffer *leaf;
2466 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2467 leaf = path->nodes[i];
2468 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2473 free_extent_buffer(path->nodes[*level]);
2474 path->nodes[*level] = NULL;
2481 static int check_root_dir(struct inode_record *rec)
2483 struct inode_backref *backref;
2486 if (!rec->found_inode_item || rec->errors)
2488 if (rec->nlink != 1 || rec->found_link != 0)
2490 if (list_empty(&rec->backrefs))
2492 backref = to_inode_backref(rec->backrefs.next);
2493 if (!backref->found_inode_ref)
2495 if (backref->index != 0 || backref->namelen != 2 ||
2496 memcmp(backref->name, "..", 2))
2498 if (backref->found_dir_index || backref->found_dir_item)
2505 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2506 struct btrfs_root *root, struct btrfs_path *path,
2507 struct inode_record *rec)
2509 struct btrfs_inode_item *ei;
2510 struct btrfs_key key;
2513 key.objectid = rec->ino;
2514 key.type = BTRFS_INODE_ITEM_KEY;
2515 key.offset = (u64)-1;
2517 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2521 if (!path->slots[0]) {
2528 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2529 if (key.objectid != rec->ino) {
2534 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2535 struct btrfs_inode_item);
2536 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2537 btrfs_mark_buffer_dirty(path->nodes[0]);
2538 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2539 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2540 root->root_key.objectid);
2542 btrfs_release_path(path);
2546 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2547 struct btrfs_root *root,
2548 struct btrfs_path *path,
2549 struct inode_record *rec)
2553 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2554 btrfs_release_path(path);
2556 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2560 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2561 struct btrfs_root *root,
2562 struct btrfs_path *path,
2563 struct inode_record *rec)
2565 struct btrfs_inode_item *ei;
2566 struct btrfs_key key;
2569 key.objectid = rec->ino;
2570 key.type = BTRFS_INODE_ITEM_KEY;
2573 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2580 /* Since ret == 0, no need to check anything */
2581 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582 struct btrfs_inode_item);
2583 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2584 btrfs_mark_buffer_dirty(path->nodes[0]);
2585 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2586 printf("reset nbytes for ino %llu root %llu\n",
2587 rec->ino, root->root_key.objectid);
2589 btrfs_release_path(path);
2593 static int add_missing_dir_index(struct btrfs_root *root,
2594 struct cache_tree *inode_cache,
2595 struct inode_record *rec,
2596 struct inode_backref *backref)
2598 struct btrfs_path path;
2599 struct btrfs_trans_handle *trans;
2600 struct btrfs_dir_item *dir_item;
2601 struct extent_buffer *leaf;
2602 struct btrfs_key key;
2603 struct btrfs_disk_key disk_key;
2604 struct inode_record *dir_rec;
2605 unsigned long name_ptr;
2606 u32 data_size = sizeof(*dir_item) + backref->namelen;
2609 trans = btrfs_start_transaction(root, 1);
2611 return PTR_ERR(trans);
2613 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2614 (unsigned long long)rec->ino);
2616 btrfs_init_path(&path);
2617 key.objectid = backref->dir;
2618 key.type = BTRFS_DIR_INDEX_KEY;
2619 key.offset = backref->index;
2620 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2623 leaf = path.nodes[0];
2624 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2626 disk_key.objectid = cpu_to_le64(rec->ino);
2627 disk_key.type = BTRFS_INODE_ITEM_KEY;
2628 disk_key.offset = 0;
2630 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2631 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2632 btrfs_set_dir_data_len(leaf, dir_item, 0);
2633 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2634 name_ptr = (unsigned long)(dir_item + 1);
2635 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2636 btrfs_mark_buffer_dirty(leaf);
2637 btrfs_release_path(&path);
2638 btrfs_commit_transaction(trans, root);
2640 backref->found_dir_index = 1;
2641 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2642 BUG_ON(IS_ERR(dir_rec));
2645 dir_rec->found_size += backref->namelen;
2646 if (dir_rec->found_size == dir_rec->isize &&
2647 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2648 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2649 if (dir_rec->found_size != dir_rec->isize)
2650 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2655 static int delete_dir_index(struct btrfs_root *root,
2656 struct inode_backref *backref)
2658 struct btrfs_trans_handle *trans;
2659 struct btrfs_dir_item *di;
2660 struct btrfs_path path;
2663 trans = btrfs_start_transaction(root, 1);
2665 return PTR_ERR(trans);
2667 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2668 (unsigned long long)backref->dir,
2669 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2670 (unsigned long long)root->objectid);
2672 btrfs_init_path(&path);
2673 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2674 backref->name, backref->namelen,
2675 backref->index, -1);
2678 btrfs_release_path(&path);
2679 btrfs_commit_transaction(trans, root);
2686 ret = btrfs_del_item(trans, root, &path);
2688 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2690 btrfs_release_path(&path);
2691 btrfs_commit_transaction(trans, root);
2695 static int __create_inode_item(struct btrfs_trans_handle *trans,
2696 struct btrfs_root *root, u64 ino, u64 size,
2697 u64 nbytes, u64 nlink, u32 mode)
2699 struct btrfs_inode_item ii;
2700 time_t now = time(NULL);
2703 btrfs_set_stack_inode_size(&ii, size);
2704 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2705 btrfs_set_stack_inode_nlink(&ii, nlink);
2706 btrfs_set_stack_inode_mode(&ii, mode);
2707 btrfs_set_stack_inode_generation(&ii, trans->transid);
2708 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2709 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2710 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2711 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2712 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2713 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2714 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2716 ret = btrfs_insert_inode(trans, root, ino, &ii);
2719 warning("root %llu inode %llu recreating inode item, this may "
2720 "be incomplete, please check permissions and content after "
2721 "the fsck completes.\n", (unsigned long long)root->objectid,
2722 (unsigned long long)ino);
2727 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2728 struct btrfs_root *root, u64 ino,
2731 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2733 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2736 static int create_inode_item(struct btrfs_root *root,
2737 struct inode_record *rec, int root_dir)
2739 struct btrfs_trans_handle *trans;
2745 trans = btrfs_start_transaction(root, 1);
2746 if (IS_ERR(trans)) {
2747 ret = PTR_ERR(trans);
2751 nlink = root_dir ? 1 : rec->found_link;
2752 if (rec->found_dir_item) {
2753 if (rec->found_file_extent)
2754 fprintf(stderr, "root %llu inode %llu has both a dir "
2755 "item and extents, unsure if it is a dir or a "
2756 "regular file so setting it as a directory\n",
2757 (unsigned long long)root->objectid,
2758 (unsigned long long)rec->ino);
2759 mode = S_IFDIR | 0755;
2760 size = rec->found_size;
2761 } else if (!rec->found_dir_item) {
2762 size = rec->extent_end;
2763 mode = S_IFREG | 0755;
2766 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2768 btrfs_commit_transaction(trans, root);
2772 static int repair_inode_backrefs(struct btrfs_root *root,
2773 struct inode_record *rec,
2774 struct cache_tree *inode_cache,
2777 struct inode_backref *tmp, *backref;
2778 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2782 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2783 if (!delete && rec->ino == root_dirid) {
2784 if (!rec->found_inode_item) {
2785 ret = create_inode_item(root, rec, 1);
2792 /* Index 0 for root dir's are special, don't mess with it */
2793 if (rec->ino == root_dirid && backref->index == 0)
2797 ((backref->found_dir_index && !backref->found_inode_ref) ||
2798 (backref->found_dir_index && backref->found_inode_ref &&
2799 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2800 ret = delete_dir_index(root, backref);
2804 list_del(&backref->list);
2809 if (!delete && !backref->found_dir_index &&
2810 backref->found_dir_item && backref->found_inode_ref) {
2811 ret = add_missing_dir_index(root, inode_cache, rec,
2816 if (backref->found_dir_item &&
2817 backref->found_dir_index) {
2818 if (!backref->errors &&
2819 backref->found_inode_ref) {
2820 list_del(&backref->list);
2827 if (!delete && (!backref->found_dir_index &&
2828 !backref->found_dir_item &&
2829 backref->found_inode_ref)) {
2830 struct btrfs_trans_handle *trans;
2831 struct btrfs_key location;
2833 ret = check_dir_conflict(root, backref->name,
2839 * let nlink fixing routine to handle it,
2840 * which can do it better.
2845 location.objectid = rec->ino;
2846 location.type = BTRFS_INODE_ITEM_KEY;
2847 location.offset = 0;
2849 trans = btrfs_start_transaction(root, 1);
2850 if (IS_ERR(trans)) {
2851 ret = PTR_ERR(trans);
2854 fprintf(stderr, "adding missing dir index/item pair "
2856 (unsigned long long)rec->ino);
2857 ret = btrfs_insert_dir_item(trans, root, backref->name,
2859 backref->dir, &location,
2860 imode_to_type(rec->imode),
2863 btrfs_commit_transaction(trans, root);
2867 if (!delete && (backref->found_inode_ref &&
2868 backref->found_dir_index &&
2869 backref->found_dir_item &&
2870 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2871 !rec->found_inode_item)) {
2872 ret = create_inode_item(root, rec, 0);
2879 return ret ? ret : repaired;
2883 * To determine the file type for nlink/inode_item repair
2885 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2886 * Return -ENOENT if file type is not found.
2888 static int find_file_type(struct inode_record *rec, u8 *type)
2890 struct inode_backref *backref;
2892 /* For inode item recovered case */
2893 if (rec->found_inode_item) {
2894 *type = imode_to_type(rec->imode);
2898 list_for_each_entry(backref, &rec->backrefs, list) {
2899 if (backref->found_dir_index || backref->found_dir_item) {
2900 *type = backref->filetype;
2908 * To determine the file name for nlink repair
2910 * Return 0 if file name is found, set name and namelen.
2911 * Return -ENOENT if file name is not found.
2913 static int find_file_name(struct inode_record *rec,
2914 char *name, int *namelen)
2916 struct inode_backref *backref;
2918 list_for_each_entry(backref, &rec->backrefs, list) {
2919 if (backref->found_dir_index || backref->found_dir_item ||
2920 backref->found_inode_ref) {
2921 memcpy(name, backref->name, backref->namelen);
2922 *namelen = backref->namelen;
2929 /* Reset the nlink of the inode to the correct one */
2930 static int reset_nlink(struct btrfs_trans_handle *trans,
2931 struct btrfs_root *root,
2932 struct btrfs_path *path,
2933 struct inode_record *rec)
2935 struct inode_backref *backref;
2936 struct inode_backref *tmp;
2937 struct btrfs_key key;
2938 struct btrfs_inode_item *inode_item;
2941 /* We don't believe this either, reset it and iterate backref */
2942 rec->found_link = 0;
2944 /* Remove all backref including the valid ones */
2945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2946 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2947 backref->index, backref->name,
2948 backref->namelen, 0);
2952 /* remove invalid backref, so it won't be added back */
2953 if (!(backref->found_dir_index &&
2954 backref->found_dir_item &&
2955 backref->found_inode_ref)) {
2956 list_del(&backref->list);
2963 /* Set nlink to 0 */
2964 key.objectid = rec->ino;
2965 key.type = BTRFS_INODE_ITEM_KEY;
2967 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2974 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2975 struct btrfs_inode_item);
2976 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2977 btrfs_mark_buffer_dirty(path->nodes[0]);
2978 btrfs_release_path(path);
2981 * Add back valid inode_ref/dir_item/dir_index,
2982 * add_link() will handle the nlink inc, so new nlink must be correct
2984 list_for_each_entry(backref, &rec->backrefs, list) {
2985 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2986 backref->name, backref->namelen,
2987 backref->filetype, &backref->index, 1, 0);
2992 btrfs_release_path(path);
2996 static int get_highest_inode(struct btrfs_trans_handle *trans,
2997 struct btrfs_root *root,
2998 struct btrfs_path *path,
3001 struct btrfs_key key, found_key;
3004 btrfs_init_path(path);
3005 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3007 key.type = BTRFS_INODE_ITEM_KEY;
3008 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3010 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3011 path->slots[0] - 1);
3012 *highest_ino = found_key.objectid;
3015 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3017 btrfs_release_path(path);
3022 * Link inode to dir 'lost+found'. Increase @ref_count.
3024 * Returns 0 means success.
3025 * Returns <0 means failure.
3027 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3028 struct btrfs_root *root,
3029 struct btrfs_path *path,
3030 u64 ino, char *namebuf, u32 name_len,
3031 u8 filetype, u64 *ref_count)
3033 char *dir_name = "lost+found";
3038 btrfs_release_path(path);
3039 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3044 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3045 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3048 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3051 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3052 namebuf, name_len, filetype, NULL, 1, 0);
3054 * Add ".INO" suffix several times to handle case where
3055 * "FILENAME.INO" is already taken by another file.
3057 while (ret == -EEXIST) {
3059 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3061 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3065 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3067 name_len += count_digits(ino) + 1;
3068 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3069 name_len, filetype, NULL, 1, 0);
3072 error("failed to link the inode %llu to %s dir: %s",
3073 ino, dir_name, strerror(-ret));
3078 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3079 name_len, namebuf, dir_name);
3081 btrfs_release_path(path);
3083 error("failed to move file '%.*s' to '%s' dir", name_len,
3088 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3089 struct btrfs_root *root,
3090 struct btrfs_path *path,
3091 struct inode_record *rec)
3093 char namebuf[BTRFS_NAME_LEN] = {0};
3096 int name_recovered = 0;
3097 int type_recovered = 0;
3101 * Get file name and type first before these invalid inode ref
3102 * are deleted by remove_all_invalid_backref()
3104 name_recovered = !find_file_name(rec, namebuf, &namelen);
3105 type_recovered = !find_file_type(rec, &type);
3107 if (!name_recovered) {
3108 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3109 rec->ino, rec->ino);
3110 namelen = count_digits(rec->ino);
3111 sprintf(namebuf, "%llu", rec->ino);
3114 if (!type_recovered) {
3115 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3117 type = BTRFS_FT_REG_FILE;
3121 ret = reset_nlink(trans, root, path, rec);
3124 "Failed to reset nlink for inode %llu: %s\n",
3125 rec->ino, strerror(-ret));
3129 if (rec->found_link == 0) {
3130 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3131 namebuf, namelen, type,
3132 (u64 *)&rec->found_link);
3136 printf("Fixed the nlink of inode %llu\n", rec->ino);
3139 * Clear the flag anyway, or we will loop forever for the same inode
3140 * as it will not be removed from the bad inode list and the dead loop
3143 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3144 btrfs_release_path(path);
3149 * Check if there is any normal(reg or prealloc) file extent for given
3151 * This is used to determine the file type when neither its dir_index/item or
3152 * inode_item exists.
3154 * This will *NOT* report error, if any error happens, just consider it does
3155 * not have any normal file extent.
3157 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3159 struct btrfs_path path;
3160 struct btrfs_key key;
3161 struct btrfs_key found_key;
3162 struct btrfs_file_extent_item *fi;
3166 btrfs_init_path(&path);
3168 key.type = BTRFS_EXTENT_DATA_KEY;
3171 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3176 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3177 ret = btrfs_next_leaf(root, &path);
3184 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3186 if (found_key.objectid != ino ||
3187 found_key.type != BTRFS_EXTENT_DATA_KEY)
3189 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3190 struct btrfs_file_extent_item);
3191 type = btrfs_file_extent_type(path.nodes[0], fi);
3192 if (type != BTRFS_FILE_EXTENT_INLINE) {
3198 btrfs_release_path(&path);
3202 static u32 btrfs_type_to_imode(u8 type)
3204 static u32 imode_by_btrfs_type[] = {
3205 [BTRFS_FT_REG_FILE] = S_IFREG,
3206 [BTRFS_FT_DIR] = S_IFDIR,
3207 [BTRFS_FT_CHRDEV] = S_IFCHR,
3208 [BTRFS_FT_BLKDEV] = S_IFBLK,
3209 [BTRFS_FT_FIFO] = S_IFIFO,
3210 [BTRFS_FT_SOCK] = S_IFSOCK,
3211 [BTRFS_FT_SYMLINK] = S_IFLNK,
3214 return imode_by_btrfs_type[(type)];
3217 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3218 struct btrfs_root *root,
3219 struct btrfs_path *path,
3220 struct inode_record *rec)
3224 int type_recovered = 0;
3227 printf("Trying to rebuild inode:%llu\n", rec->ino);
3229 type_recovered = !find_file_type(rec, &filetype);
3232 * Try to determine inode type if type not found.
3234 * For found regular file extent, it must be FILE.
3235 * For found dir_item/index, it must be DIR.
3237 * For undetermined one, use FILE as fallback.
3240 * 1. If found backref(inode_index/item is already handled) to it,
3242 * Need new inode-inode ref structure to allow search for that.
3244 if (!type_recovered) {
3245 if (rec->found_file_extent &&
3246 find_normal_file_extent(root, rec->ino)) {
3248 filetype = BTRFS_FT_REG_FILE;
3249 } else if (rec->found_dir_item) {
3251 filetype = BTRFS_FT_DIR;
3252 } else if (!list_empty(&rec->orphan_extents)) {
3254 filetype = BTRFS_FT_REG_FILE;
3256 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3259 filetype = BTRFS_FT_REG_FILE;
3263 ret = btrfs_new_inode(trans, root, rec->ino,
3264 mode | btrfs_type_to_imode(filetype));
3269 * Here inode rebuild is done, we only rebuild the inode item,
3270 * don't repair the nlink(like move to lost+found).
3271 * That is the job of nlink repair.
3273 * We just fill the record and return
3275 rec->found_dir_item = 1;
3276 rec->imode = mode | btrfs_type_to_imode(filetype);
3278 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3279 /* Ensure the inode_nlinks repair function will be called */
3280 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3285 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3286 struct btrfs_root *root,
3287 struct btrfs_path *path,
3288 struct inode_record *rec)
3290 struct orphan_data_extent *orphan;
3291 struct orphan_data_extent *tmp;
3294 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3296 * Check for conflicting file extents
3298 * Here we don't know whether the extents is compressed or not,
3299 * so we can only assume it not compressed nor data offset,
3300 * and use its disk_len as extent length.
3302 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3303 orphan->offset, orphan->disk_len, 0);
3304 btrfs_release_path(path);
3309 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3310 orphan->disk_bytenr, orphan->disk_len);
3311 ret = btrfs_free_extent(trans,
3312 root->fs_info->extent_root,
3313 orphan->disk_bytenr, orphan->disk_len,
3314 0, root->objectid, orphan->objectid,
3319 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3320 orphan->offset, orphan->disk_bytenr,
3321 orphan->disk_len, orphan->disk_len);
3325 /* Update file size info */
3326 rec->found_size += orphan->disk_len;
3327 if (rec->found_size == rec->nbytes)
3328 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3330 /* Update the file extent hole info too */
3331 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3335 if (RB_EMPTY_ROOT(&rec->holes))
3336 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3338 list_del(&orphan->list);
3341 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3346 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3347 struct btrfs_root *root,
3348 struct btrfs_path *path,
3349 struct inode_record *rec)
3351 struct rb_node *node;
3352 struct file_extent_hole *hole;
3356 node = rb_first(&rec->holes);
3360 hole = rb_entry(node, struct file_extent_hole, node);
3361 ret = btrfs_punch_hole(trans, root, rec->ino,
3362 hole->start, hole->len);
3365 ret = del_file_extent_hole(&rec->holes, hole->start,
3369 if (RB_EMPTY_ROOT(&rec->holes))
3370 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3371 node = rb_first(&rec->holes);
3373 /* special case for a file losing all its file extent */
3375 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3376 round_up(rec->isize,
3377 root->fs_info->sectorsize));
3381 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3382 rec->ino, root->objectid);
3387 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3389 struct btrfs_trans_handle *trans;
3390 struct btrfs_path path;
3393 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3394 I_ERR_NO_ORPHAN_ITEM |
3395 I_ERR_LINK_COUNT_WRONG |
3396 I_ERR_NO_INODE_ITEM |
3397 I_ERR_FILE_EXTENT_ORPHAN |
3398 I_ERR_FILE_EXTENT_DISCOUNT|
3399 I_ERR_FILE_NBYTES_WRONG)))
3403 * For nlink repair, it may create a dir and add link, so
3404 * 2 for parent(256)'s dir_index and dir_item
3405 * 2 for lost+found dir's inode_item and inode_ref
3406 * 1 for the new inode_ref of the file
3407 * 2 for lost+found dir's dir_index and dir_item for the file
3409 trans = btrfs_start_transaction(root, 7);
3411 return PTR_ERR(trans);
3413 btrfs_init_path(&path);
3414 if (rec->errors & I_ERR_NO_INODE_ITEM)
3415 ret = repair_inode_no_item(trans, root, &path, rec);
3416 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3417 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3418 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3419 ret = repair_inode_discount_extent(trans, root, &path, rec);
3420 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3421 ret = repair_inode_isize(trans, root, &path, rec);
3422 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3423 ret = repair_inode_orphan_item(trans, root, &path, rec);
3424 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3425 ret = repair_inode_nlinks(trans, root, &path, rec);
3426 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3427 ret = repair_inode_nbytes(trans, root, &path, rec);
3428 btrfs_commit_transaction(trans, root);
3429 btrfs_release_path(&path);
3433 static int check_inode_recs(struct btrfs_root *root,
3434 struct cache_tree *inode_cache)
3436 struct cache_extent *cache;
3437 struct ptr_node *node;
3438 struct inode_record *rec;
3439 struct inode_backref *backref;
3444 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3446 if (btrfs_root_refs(&root->root_item) == 0) {
3447 if (!cache_tree_empty(inode_cache))
3448 fprintf(stderr, "warning line %d\n", __LINE__);
3453 * We need to repair backrefs first because we could change some of the
3454 * errors in the inode recs.
3456 * We also need to go through and delete invalid backrefs first and then
3457 * add the correct ones second. We do this because we may get EEXIST
3458 * when adding back the correct index because we hadn't yet deleted the
3461 * For example, if we were missing a dir index then the directories
3462 * isize would be wrong, so if we fixed the isize to what we thought it
3463 * would be and then fixed the backref we'd still have a invalid fs, so
3464 * we need to add back the dir index and then check to see if the isize
3469 if (stage == 3 && !err)
3472 cache = search_cache_extent(inode_cache, 0);
3473 while (repair && cache) {
3474 node = container_of(cache, struct ptr_node, cache);
3476 cache = next_cache_extent(cache);
3478 /* Need to free everything up and rescan */
3480 remove_cache_extent(inode_cache, &node->cache);
3482 free_inode_rec(rec);
3486 if (list_empty(&rec->backrefs))
3489 ret = repair_inode_backrefs(root, rec, inode_cache,
3503 rec = get_inode_rec(inode_cache, root_dirid, 0);
3504 BUG_ON(IS_ERR(rec));
3506 ret = check_root_dir(rec);
3508 fprintf(stderr, "root %llu root dir %llu error\n",
3509 (unsigned long long)root->root_key.objectid,
3510 (unsigned long long)root_dirid);
3511 print_inode_error(root, rec);
3516 struct btrfs_trans_handle *trans;
3518 trans = btrfs_start_transaction(root, 1);
3519 if (IS_ERR(trans)) {
3520 err = PTR_ERR(trans);
3525 "root %llu missing its root dir, recreating\n",
3526 (unsigned long long)root->objectid);
3528 ret = btrfs_make_root_dir(trans, root, root_dirid);
3531 btrfs_commit_transaction(trans, root);
3535 fprintf(stderr, "root %llu root dir %llu not found\n",
3536 (unsigned long long)root->root_key.objectid,
3537 (unsigned long long)root_dirid);
3541 cache = search_cache_extent(inode_cache, 0);
3544 node = container_of(cache, struct ptr_node, cache);
3546 remove_cache_extent(inode_cache, &node->cache);
3548 if (rec->ino == root_dirid ||
3549 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3550 free_inode_rec(rec);
3554 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3555 ret = check_orphan_item(root, rec->ino);
3557 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3558 if (can_free_inode_rec(rec)) {
3559 free_inode_rec(rec);
3564 if (!rec->found_inode_item)
3565 rec->errors |= I_ERR_NO_INODE_ITEM;
3566 if (rec->found_link != rec->nlink)
3567 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3569 ret = try_repair_inode(root, rec);
3570 if (ret == 0 && can_free_inode_rec(rec)) {
3571 free_inode_rec(rec);
3577 if (!(repair && ret == 0))
3579 print_inode_error(root, rec);
3580 list_for_each_entry(backref, &rec->backrefs, list) {
3581 if (!backref->found_dir_item)
3582 backref->errors |= REF_ERR_NO_DIR_ITEM;
3583 if (!backref->found_dir_index)
3584 backref->errors |= REF_ERR_NO_DIR_INDEX;
3585 if (!backref->found_inode_ref)
3586 backref->errors |= REF_ERR_NO_INODE_REF;
3587 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3588 " namelen %u name %s filetype %d errors %x",
3589 (unsigned long long)backref->dir,
3590 (unsigned long long)backref->index,
3591 backref->namelen, backref->name,
3592 backref->filetype, backref->errors);
3593 print_ref_error(backref->errors);
3595 free_inode_rec(rec);
3597 return (error > 0) ? -1 : 0;
3600 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3603 struct cache_extent *cache;
3604 struct root_record *rec = NULL;
3607 cache = lookup_cache_extent(root_cache, objectid, 1);
3609 rec = container_of(cache, struct root_record, cache);
3611 rec = calloc(1, sizeof(*rec));
3613 return ERR_PTR(-ENOMEM);
3614 rec->objectid = objectid;
3615 INIT_LIST_HEAD(&rec->backrefs);
3616 rec->cache.start = objectid;
3617 rec->cache.size = 1;
3619 ret = insert_cache_extent(root_cache, &rec->cache);
3621 return ERR_PTR(-EEXIST);
3626 static struct root_backref *get_root_backref(struct root_record *rec,
3627 u64 ref_root, u64 dir, u64 index,
3628 const char *name, int namelen)
3630 struct root_backref *backref;
3632 list_for_each_entry(backref, &rec->backrefs, list) {
3633 if (backref->ref_root != ref_root || backref->dir != dir ||
3634 backref->namelen != namelen)
3636 if (memcmp(name, backref->name, namelen))
3641 backref = calloc(1, sizeof(*backref) + namelen + 1);
3644 backref->ref_root = ref_root;
3646 backref->index = index;
3647 backref->namelen = namelen;
3648 memcpy(backref->name, name, namelen);
3649 backref->name[namelen] = '\0';
3650 list_add_tail(&backref->list, &rec->backrefs);
3654 static void free_root_record(struct cache_extent *cache)
3656 struct root_record *rec;
3657 struct root_backref *backref;
3659 rec = container_of(cache, struct root_record, cache);
3660 while (!list_empty(&rec->backrefs)) {
3661 backref = to_root_backref(rec->backrefs.next);
3662 list_del(&backref->list);
3669 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3671 static int add_root_backref(struct cache_tree *root_cache,
3672 u64 root_id, u64 ref_root, u64 dir, u64 index,
3673 const char *name, int namelen,
3674 int item_type, int errors)
3676 struct root_record *rec;
3677 struct root_backref *backref;
3679 rec = get_root_rec(root_cache, root_id);
3680 BUG_ON(IS_ERR(rec));
3681 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3684 backref->errors |= errors;
3686 if (item_type != BTRFS_DIR_ITEM_KEY) {
3687 if (backref->found_dir_index || backref->found_back_ref ||
3688 backref->found_forward_ref) {
3689 if (backref->index != index)
3690 backref->errors |= REF_ERR_INDEX_UNMATCH;
3692 backref->index = index;
3696 if (item_type == BTRFS_DIR_ITEM_KEY) {
3697 if (backref->found_forward_ref)
3699 backref->found_dir_item = 1;
3700 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3701 backref->found_dir_index = 1;
3702 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3703 if (backref->found_forward_ref)
3704 backref->errors |= REF_ERR_DUP_ROOT_REF;
3705 else if (backref->found_dir_item)
3707 backref->found_forward_ref = 1;
3708 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3709 if (backref->found_back_ref)
3710 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3711 backref->found_back_ref = 1;
3716 if (backref->found_forward_ref && backref->found_dir_item)
3717 backref->reachable = 1;
3721 static int merge_root_recs(struct btrfs_root *root,
3722 struct cache_tree *src_cache,
3723 struct cache_tree *dst_cache)
3725 struct cache_extent *cache;
3726 struct ptr_node *node;
3727 struct inode_record *rec;
3728 struct inode_backref *backref;
3731 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3732 free_inode_recs_tree(src_cache);
3737 cache = search_cache_extent(src_cache, 0);
3740 node = container_of(cache, struct ptr_node, cache);
3742 remove_cache_extent(src_cache, &node->cache);
3745 ret = is_child_root(root, root->objectid, rec->ino);
3751 list_for_each_entry(backref, &rec->backrefs, list) {
3752 BUG_ON(backref->found_inode_ref);
3753 if (backref->found_dir_item)
3754 add_root_backref(dst_cache, rec->ino,
3755 root->root_key.objectid, backref->dir,
3756 backref->index, backref->name,
3757 backref->namelen, BTRFS_DIR_ITEM_KEY,
3759 if (backref->found_dir_index)
3760 add_root_backref(dst_cache, rec->ino,
3761 root->root_key.objectid, backref->dir,
3762 backref->index, backref->name,
3763 backref->namelen, BTRFS_DIR_INDEX_KEY,
3767 free_inode_rec(rec);
3774 static int check_root_refs(struct btrfs_root *root,
3775 struct cache_tree *root_cache)
3777 struct root_record *rec;
3778 struct root_record *ref_root;
3779 struct root_backref *backref;
3780 struct cache_extent *cache;
3786 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3787 BUG_ON(IS_ERR(rec));
3790 /* fixme: this can not detect circular references */
3793 cache = search_cache_extent(root_cache, 0);
3797 rec = container_of(cache, struct root_record, cache);
3798 cache = next_cache_extent(cache);
3800 if (rec->found_ref == 0)
3803 list_for_each_entry(backref, &rec->backrefs, list) {
3804 if (!backref->reachable)
3807 ref_root = get_root_rec(root_cache,
3809 BUG_ON(IS_ERR(ref_root));
3810 if (ref_root->found_ref > 0)
3813 backref->reachable = 0;
3815 if (rec->found_ref == 0)
3821 cache = search_cache_extent(root_cache, 0);
3825 rec = container_of(cache, struct root_record, cache);
3826 cache = next_cache_extent(cache);
3828 if (rec->found_ref == 0 &&
3829 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3830 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3831 ret = check_orphan_item(root->fs_info->tree_root,
3837 * If we don't have a root item then we likely just have
3838 * a dir item in a snapshot for this root but no actual
3839 * ref key or anything so it's meaningless.
3841 if (!rec->found_root_item)
3844 fprintf(stderr, "fs tree %llu not referenced\n",
3845 (unsigned long long)rec->objectid);
3849 if (rec->found_ref > 0 && !rec->found_root_item)
3851 list_for_each_entry(backref, &rec->backrefs, list) {
3852 if (!backref->found_dir_item)
3853 backref->errors |= REF_ERR_NO_DIR_ITEM;
3854 if (!backref->found_dir_index)
3855 backref->errors |= REF_ERR_NO_DIR_INDEX;
3856 if (!backref->found_back_ref)
3857 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3858 if (!backref->found_forward_ref)
3859 backref->errors |= REF_ERR_NO_ROOT_REF;
3860 if (backref->reachable && backref->errors)
3867 fprintf(stderr, "fs tree %llu refs %u %s\n",
3868 (unsigned long long)rec->objectid, rec->found_ref,
3869 rec->found_root_item ? "" : "not found");
3871 list_for_each_entry(backref, &rec->backrefs, list) {
3872 if (!backref->reachable)
3874 if (!backref->errors && rec->found_root_item)
3876 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3877 " index %llu namelen %u name %s errors %x\n",
3878 (unsigned long long)backref->ref_root,
3879 (unsigned long long)backref->dir,
3880 (unsigned long long)backref->index,
3881 backref->namelen, backref->name,
3883 print_ref_error(backref->errors);
3886 return errors > 0 ? 1 : 0;
3889 static int process_root_ref(struct extent_buffer *eb, int slot,
3890 struct btrfs_key *key,
3891 struct cache_tree *root_cache)
3897 struct btrfs_root_ref *ref;
3898 char namebuf[BTRFS_NAME_LEN];
3901 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3903 dirid = btrfs_root_ref_dirid(eb, ref);
3904 index = btrfs_root_ref_sequence(eb, ref);
3905 name_len = btrfs_root_ref_name_len(eb, ref);
3907 if (name_len <= BTRFS_NAME_LEN) {
3911 len = BTRFS_NAME_LEN;
3912 error = REF_ERR_NAME_TOO_LONG;
3914 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3916 if (key->type == BTRFS_ROOT_REF_KEY) {
3917 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3918 index, namebuf, len, key->type, error);
3920 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3921 index, namebuf, len, key->type, error);
3926 static void free_corrupt_block(struct cache_extent *cache)
3928 struct btrfs_corrupt_block *corrupt;
3930 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3934 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3937 * Repair the btree of the given root.
3939 * The fix is to remove the node key in corrupt_blocks cache_tree.
3940 * and rebalance the tree.
3941 * After the fix, the btree should be writeable.
3943 static int repair_btree(struct btrfs_root *root,
3944 struct cache_tree *corrupt_blocks)
3946 struct btrfs_trans_handle *trans;
3947 struct btrfs_path path;
3948 struct btrfs_corrupt_block *corrupt;
3949 struct cache_extent *cache;
3950 struct btrfs_key key;
3955 if (cache_tree_empty(corrupt_blocks))
3958 trans = btrfs_start_transaction(root, 1);
3959 if (IS_ERR(trans)) {
3960 ret = PTR_ERR(trans);
3961 fprintf(stderr, "Error starting transaction: %s\n",
3965 btrfs_init_path(&path);
3966 cache = first_cache_extent(corrupt_blocks);
3968 corrupt = container_of(cache, struct btrfs_corrupt_block,
3970 level = corrupt->level;
3971 path.lowest_level = level;
3972 key.objectid = corrupt->key.objectid;
3973 key.type = corrupt->key.type;
3974 key.offset = corrupt->key.offset;
3977 * Here we don't want to do any tree balance, since it may
3978 * cause a balance with corrupted brother leaf/node,
3979 * so ins_len set to 0 here.
3980 * Balance will be done after all corrupt node/leaf is deleted.
3982 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3985 offset = btrfs_node_blockptr(path.nodes[level],
3988 /* Remove the ptr */
3989 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3993 * Remove the corresponding extent
3994 * return value is not concerned.
3996 btrfs_release_path(&path);
3997 ret = btrfs_free_extent(trans, root, offset,
3998 root->fs_info->nodesize, 0,
3999 root->root_key.objectid, level - 1, 0);
4000 cache = next_cache_extent(cache);
4003 /* Balance the btree using btrfs_search_slot() */
4004 cache = first_cache_extent(corrupt_blocks);
4006 corrupt = container_of(cache, struct btrfs_corrupt_block,
4008 memcpy(&key, &corrupt->key, sizeof(key));
4009 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4012 /* return will always >0 since it won't find the item */
4014 btrfs_release_path(&path);
4015 cache = next_cache_extent(cache);
4018 btrfs_commit_transaction(trans, root);
4019 btrfs_release_path(&path);
4023 static int check_fs_root(struct btrfs_root *root,
4024 struct cache_tree *root_cache,
4025 struct walk_control *wc)
4031 struct btrfs_path path;
4032 struct shared_node root_node;
4033 struct root_record *rec;
4034 struct btrfs_root_item *root_item = &root->root_item;
4035 struct cache_tree corrupt_blocks;
4036 struct orphan_data_extent *orphan;
4037 struct orphan_data_extent *tmp;
4038 enum btrfs_tree_block_status status;
4039 struct node_refs nrefs;
4042 * Reuse the corrupt_block cache tree to record corrupted tree block
4044 * Unlike the usage in extent tree check, here we do it in a per
4045 * fs/subvol tree base.
4047 cache_tree_init(&corrupt_blocks);
4048 root->fs_info->corrupt_blocks = &corrupt_blocks;
4050 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4051 rec = get_root_rec(root_cache, root->root_key.objectid);
4052 BUG_ON(IS_ERR(rec));
4053 if (btrfs_root_refs(root_item) > 0)
4054 rec->found_root_item = 1;
4057 btrfs_init_path(&path);
4058 memset(&root_node, 0, sizeof(root_node));
4059 cache_tree_init(&root_node.root_cache);
4060 cache_tree_init(&root_node.inode_cache);
4061 memset(&nrefs, 0, sizeof(nrefs));
4063 /* Move the orphan extent record to corresponding inode_record */
4064 list_for_each_entry_safe(orphan, tmp,
4065 &root->orphan_data_extents, list) {
4066 struct inode_record *inode;
4068 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4070 BUG_ON(IS_ERR(inode));
4071 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4072 list_move(&orphan->list, &inode->orphan_extents);
4075 level = btrfs_header_level(root->node);
4076 memset(wc->nodes, 0, sizeof(wc->nodes));
4077 wc->nodes[level] = &root_node;
4078 wc->active_node = level;
4079 wc->root_level = level;
4081 /* We may not have checked the root block, lets do that now */
4082 if (btrfs_is_leaf(root->node))
4083 status = btrfs_check_leaf(root, NULL, root->node);
4085 status = btrfs_check_node(root, NULL, root->node);
4086 if (status != BTRFS_TREE_BLOCK_CLEAN)
4089 if (btrfs_root_refs(root_item) > 0 ||
4090 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4091 path.nodes[level] = root->node;
4092 extent_buffer_get(root->node);
4093 path.slots[level] = 0;
4095 struct btrfs_key key;
4096 struct btrfs_disk_key found_key;
4098 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4099 level = root_item->drop_level;
4100 path.lowest_level = level;
4101 if (level > btrfs_header_level(root->node) ||
4102 level >= BTRFS_MAX_LEVEL) {
4103 error("ignoring invalid drop level: %u", level);
4106 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4109 btrfs_node_key(path.nodes[level], &found_key,
4111 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4112 sizeof(found_key)));
4116 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4122 wret = walk_up_tree(root, &path, wc, &level);
4129 btrfs_release_path(&path);
4131 if (!cache_tree_empty(&corrupt_blocks)) {
4132 struct cache_extent *cache;
4133 struct btrfs_corrupt_block *corrupt;
4135 printf("The following tree block(s) is corrupted in tree %llu:\n",
4136 root->root_key.objectid);
4137 cache = first_cache_extent(&corrupt_blocks);
4139 corrupt = container_of(cache,
4140 struct btrfs_corrupt_block,
4142 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4143 cache->start, corrupt->level,
4144 corrupt->key.objectid, corrupt->key.type,
4145 corrupt->key.offset);
4146 cache = next_cache_extent(cache);
4149 printf("Try to repair the btree for root %llu\n",
4150 root->root_key.objectid);
4151 ret = repair_btree(root, &corrupt_blocks);
4153 fprintf(stderr, "Failed to repair btree: %s\n",
4156 printf("Btree for root %llu is fixed\n",
4157 root->root_key.objectid);
4161 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4165 if (root_node.current) {
4166 root_node.current->checked = 1;
4167 maybe_free_inode_rec(&root_node.inode_cache,
4171 err = check_inode_recs(root, &root_node.inode_cache);
4175 free_corrupt_blocks_tree(&corrupt_blocks);
4176 root->fs_info->corrupt_blocks = NULL;
4177 free_orphan_data_extents(&root->orphan_data_extents);
4181 static int fs_root_objectid(u64 objectid)
4183 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4184 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4186 return is_fstree(objectid);
4189 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4190 struct cache_tree *root_cache)
4192 struct btrfs_path path;
4193 struct btrfs_key key;
4194 struct walk_control wc;
4195 struct extent_buffer *leaf, *tree_node;
4196 struct btrfs_root *tmp_root;
4197 struct btrfs_root *tree_root = fs_info->tree_root;
4201 if (ctx.progress_enabled) {
4202 ctx.tp = TASK_FS_ROOTS;
4203 task_start(ctx.info);
4207 * Just in case we made any changes to the extent tree that weren't
4208 * reflected into the free space cache yet.
4211 reset_cached_block_groups(fs_info);
4212 memset(&wc, 0, sizeof(wc));
4213 cache_tree_init(&wc.shared);
4214 btrfs_init_path(&path);
4219 key.type = BTRFS_ROOT_ITEM_KEY;
4220 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4225 tree_node = tree_root->node;
4227 if (tree_node != tree_root->node) {
4228 free_root_recs_tree(root_cache);
4229 btrfs_release_path(&path);
4232 leaf = path.nodes[0];
4233 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4234 ret = btrfs_next_leaf(tree_root, &path);
4240 leaf = path.nodes[0];
4242 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4243 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4244 fs_root_objectid(key.objectid)) {
4245 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4246 tmp_root = btrfs_read_fs_root_no_cache(
4249 key.offset = (u64)-1;
4250 tmp_root = btrfs_read_fs_root(
4253 if (IS_ERR(tmp_root)) {
4257 ret = check_fs_root(tmp_root, root_cache, &wc);
4258 if (ret == -EAGAIN) {
4259 free_root_recs_tree(root_cache);
4260 btrfs_release_path(&path);
4265 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4266 btrfs_free_fs_root(tmp_root);
4267 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4268 key.type == BTRFS_ROOT_BACKREF_KEY) {
4269 process_root_ref(leaf, path.slots[0], &key,
4276 btrfs_release_path(&path);
4278 free_extent_cache_tree(&wc.shared);
4279 if (!cache_tree_empty(&wc.shared))
4280 fprintf(stderr, "warning line %d\n", __LINE__);
4282 task_stop(ctx.info);
4288 * Find the @index according by @ino and name.
4289 * Notice:time efficiency is O(N)
4291 * @root: the root of the fs/file tree
4292 * @index_ret: the index as return value
4293 * @namebuf: the name to match
4294 * @name_len: the length of name to match
4295 * @file_type: the file_type of INODE_ITEM to match
4297 * Returns 0 if found and *@index_ret will be modified with right value
4298 * Returns< 0 not found and *@index_ret will be (u64)-1
4300 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4301 u64 *index_ret, char *namebuf, u32 name_len,
4304 struct btrfs_path path;
4305 struct extent_buffer *node;
4306 struct btrfs_dir_item *di;
4307 struct btrfs_key key;
4308 struct btrfs_key location;
4309 char name[BTRFS_NAME_LEN] = {0};
4321 /* search from the last index */
4322 key.objectid = dirid;
4323 key.offset = (u64)-1;
4324 key.type = BTRFS_DIR_INDEX_KEY;
4326 btrfs_init_path(&path);
4327 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4332 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4335 *index_ret = (64)-1;
4338 /* Check whether inode_id/filetype/name match */
4339 node = path.nodes[0];
4340 slot = path.slots[0];
4341 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4342 total = btrfs_item_size_nr(node, slot);
4343 while (cur < total) {
4345 len = btrfs_dir_name_len(node, di);
4346 data_len = btrfs_dir_data_len(node, di);
4348 btrfs_dir_item_key_to_cpu(node, di, &location);
4349 if (location.objectid != location_id ||
4350 location.type != BTRFS_INODE_ITEM_KEY ||
4351 location.offset != 0)
4354 filetype = btrfs_dir_type(node, di);
4355 if (file_type != filetype)
4358 if (len > BTRFS_NAME_LEN)
4359 len = BTRFS_NAME_LEN;
4361 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4362 if (len != name_len || strncmp(namebuf, name, len))
4365 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4366 *index_ret = key.offset;
4370 len += sizeof(*di) + data_len;
4371 di = (struct btrfs_dir_item *)((char *)di + len);
4377 btrfs_release_path(&path);
4382 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4383 * INODE_REF/INODE_EXTREF match.
4385 * @root: the root of the fs/file tree
4386 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4387 * value while find index
4388 * @location_key: location key of the struct btrfs_dir_item to match
4389 * @name: the name to match
4390 * @namelen: the length of name
4391 * @file_type: the type of file to math
4393 * Return 0 if no error occurred.
4394 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4395 * DIR_ITEM/DIR_INDEX
4396 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4397 * and DIR_ITEM/DIR_INDEX mismatch
4399 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4400 struct btrfs_key *location_key, char *name,
4401 u32 namelen, u8 file_type)
4403 struct btrfs_path path;
4404 struct extent_buffer *node;
4405 struct btrfs_dir_item *di;
4406 struct btrfs_key location;
4407 char namebuf[BTRFS_NAME_LEN] = {0};
4416 /* get the index by traversing all index */
4417 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4418 ret = find_dir_index(root, key->objectid,
4419 location_key->objectid, &key->offset,
4420 name, namelen, file_type);
4422 ret = DIR_INDEX_MISSING;
4426 btrfs_init_path(&path);
4427 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4429 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4434 /* Check whether inode_id/filetype/name match */
4435 node = path.nodes[0];
4436 slot = path.slots[0];
4437 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4438 total = btrfs_item_size_nr(node, slot);
4439 while (cur < total) {
4440 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4441 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4443 len = btrfs_dir_name_len(node, di);
4444 data_len = btrfs_dir_data_len(node, di);
4446 btrfs_dir_item_key_to_cpu(node, di, &location);
4447 if (location.objectid != location_key->objectid ||
4448 location.type != location_key->type ||
4449 location.offset != location_key->offset)
4452 filetype = btrfs_dir_type(node, di);
4453 if (file_type != filetype)
4456 if (len > BTRFS_NAME_LEN) {
4457 len = BTRFS_NAME_LEN;
4458 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4460 key->type == BTRFS_DIR_ITEM_KEY ?
4461 "DIR_ITEM" : "DIR_INDEX",
4462 key->objectid, key->offset, len);
4464 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4466 if (len != namelen || strncmp(namebuf, name, len))
4472 len += sizeof(*di) + data_len;
4473 di = (struct btrfs_dir_item *)((char *)di + len);
4478 btrfs_release_path(&path);
4483 * Prints inode ref error message
4485 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4486 u64 index, const char *namebuf, int name_len,
4487 u8 filetype, int err)
4492 /* root dir error */
4493 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4495 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4496 root->objectid, key->objectid, key->offset, namebuf);
4501 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4502 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4503 root->objectid, key->offset,
4504 btrfs_name_hash(namebuf, name_len),
4505 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4507 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4508 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4509 root->objectid, key->offset, index,
4510 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4515 * Insert the missing inode item.
4517 * Returns 0 means success.
4518 * Returns <0 means error.
4520 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4523 struct btrfs_key key;
4524 struct btrfs_trans_handle *trans;
4525 struct btrfs_path path;
4529 key.type = BTRFS_INODE_ITEM_KEY;
4532 btrfs_init_path(&path);
4533 trans = btrfs_start_transaction(root, 1);
4534 if (IS_ERR(trans)) {
4539 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4540 if (ret < 0 || !ret)
4543 /* insert inode item */
4544 create_inode_item_lowmem(trans, root, ino, filetype);
4547 btrfs_commit_transaction(trans, root);
4550 error("failed to repair root %llu INODE ITEM[%llu] missing",
4551 root->objectid, ino);
4552 btrfs_release_path(&path);
4557 * The ternary means dir item, dir index and relative inode ref.
4558 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4559 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4561 * If two of three is missing or mismatched, delete the existing one.
4562 * If one of three is missing or mismatched, add the missing one.
4564 * returns 0 means success.
4565 * returns not 0 means on error;
4567 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4568 u64 index, char *name, int name_len, u8 filetype,
4571 struct btrfs_trans_handle *trans;
4576 * stage shall be one of following valild values:
4577 * 0: Fine, nothing to do.
4578 * 1: One of three is wrong, so add missing one.
4579 * 2: Two of three is wrong, so delete existed one.
4581 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4583 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4585 if (err & (INODE_REF_MISSING))
4588 /* stage must be smllarer than 3 */
4591 trans = btrfs_start_transaction(root, 1);
4593 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4598 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4599 filetype, &index, 1, 1);
4603 btrfs_commit_transaction(trans, root);
4606 error("fail to repair inode %llu name %s filetype %u",
4607 ino, name, filetype);
4609 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4610 stage == 2 ? "Delete" : "Add",
4611 ino, name, filetype);
4617 * Traverse the given INODE_REF and call find_dir_item() to find related
4618 * DIR_ITEM/DIR_INDEX.
4620 * @root: the root of the fs/file tree
4621 * @ref_key: the key of the INODE_REF
4622 * @path the path provides node and slot
4623 * @refs: the count of INODE_REF
4624 * @mode: the st_mode of INODE_ITEM
4625 * @name_ret: returns with the first ref's name
4626 * @name_len_ret: len of the name_ret
4628 * Return 0 if no error occurred.
4630 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4631 struct btrfs_path *path, char *name_ret,
4632 u32 *namelen_ret, u64 *refs_ret, int mode)
4634 struct btrfs_key key;
4635 struct btrfs_key location;
4636 struct btrfs_inode_ref *ref;
4637 struct extent_buffer *node;
4638 char namebuf[BTRFS_NAME_LEN] = {0};
4648 int need_research = 0;
4656 /* since after repair, path and the dir item may be changed */
4657 if (need_research) {
4659 btrfs_release_path(path);
4660 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4661 /* the item was deleted, let path point to the last checked item */
4663 if (path->slots[0] == 0)
4664 btrfs_prev_leaf(root, path);
4672 location.objectid = ref_key->objectid;
4673 location.type = BTRFS_INODE_ITEM_KEY;
4674 location.offset = 0;
4675 node = path->nodes[0];
4676 slot = path->slots[0];
4678 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4679 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4680 total = btrfs_item_size_nr(node, slot);
4683 /* Update inode ref count */
4686 index = btrfs_inode_ref_index(node, ref);
4687 name_len = btrfs_inode_ref_name_len(node, ref);
4689 if (name_len <= BTRFS_NAME_LEN) {
4692 len = BTRFS_NAME_LEN;
4693 warning("root %llu INODE_REF[%llu %llu] name too long",
4694 root->objectid, ref_key->objectid, ref_key->offset);
4697 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4699 /* copy the first name found to name_ret */
4700 if (refs == 1 && name_ret) {
4701 memcpy(name_ret, namebuf, len);
4705 /* Check root dir ref */
4706 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4707 if (index != 0 || len != strlen("..") ||
4708 strncmp("..", namebuf, len) ||
4709 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4710 /* set err bits then repair will delete the ref */
4711 err |= DIR_INDEX_MISSING;
4712 err |= DIR_ITEM_MISSING;
4717 /* Find related DIR_INDEX */
4718 key.objectid = ref_key->offset;
4719 key.type = BTRFS_DIR_INDEX_KEY;
4721 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4722 imode_to_type(mode));
4724 /* Find related dir_item */
4725 key.objectid = ref_key->offset;
4726 key.type = BTRFS_DIR_ITEM_KEY;
4727 key.offset = btrfs_name_hash(namebuf, len);
4728 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4729 imode_to_type(mode));
4731 if (tmp_err && repair) {
4732 ret = repair_ternary_lowmem(root, ref_key->offset,
4733 ref_key->objectid, index, namebuf,
4734 name_len, imode_to_type(mode),
4741 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4742 imode_to_type(mode), tmp_err);
4744 len = sizeof(*ref) + name_len;
4745 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4756 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4757 * DIR_ITEM/DIR_INDEX.
4759 * @root: the root of the fs/file tree
4760 * @ref_key: the key of the INODE_EXTREF
4761 * @refs: the count of INODE_EXTREF
4762 * @mode: the st_mode of INODE_ITEM
4764 * Return 0 if no error occurred.
4766 static int check_inode_extref(struct btrfs_root *root,
4767 struct btrfs_key *ref_key,
4768 struct extent_buffer *node, int slot, u64 *refs,
4771 struct btrfs_key key;
4772 struct btrfs_key location;
4773 struct btrfs_inode_extref *extref;
4774 char namebuf[BTRFS_NAME_LEN] = {0};
4784 location.objectid = ref_key->objectid;
4785 location.type = BTRFS_INODE_ITEM_KEY;
4786 location.offset = 0;
4788 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4789 total = btrfs_item_size_nr(node, slot);
4792 /* update inode ref count */
4794 name_len = btrfs_inode_extref_name_len(node, extref);
4795 index = btrfs_inode_extref_index(node, extref);
4796 parent = btrfs_inode_extref_parent(node, extref);
4797 if (name_len <= BTRFS_NAME_LEN) {
4800 len = BTRFS_NAME_LEN;
4801 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4802 root->objectid, ref_key->objectid, ref_key->offset);
4804 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4806 /* Check root dir ref name */
4807 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4808 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4809 root->objectid, ref_key->objectid, ref_key->offset,
4811 err |= ROOT_DIR_ERROR;
4814 /* find related dir_index */
4815 key.objectid = parent;
4816 key.type = BTRFS_DIR_INDEX_KEY;
4818 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4821 /* find related dir_item */
4822 key.objectid = parent;
4823 key.type = BTRFS_DIR_ITEM_KEY;
4824 key.offset = btrfs_name_hash(namebuf, len);
4825 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4828 len = sizeof(*extref) + name_len;
4829 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4839 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4840 * DIR_ITEM/DIR_INDEX match.
4841 * Return with @index_ret.
4843 * @root: the root of the fs/file tree
4844 * @key: the key of the INODE_REF/INODE_EXTREF
4845 * @name: the name in the INODE_REF/INODE_EXTREF
4846 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4847 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4848 * value (64)-1 means do not check index
4849 * @ext_ref: the EXTENDED_IREF feature
4851 * Return 0 if no error occurred.
4852 * Return >0 for error bitmap
4854 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4855 char *name, int namelen, u64 *index_ret,
4856 unsigned int ext_ref)
4858 struct btrfs_path path;
4859 struct btrfs_inode_ref *ref;
4860 struct btrfs_inode_extref *extref;
4861 struct extent_buffer *node;
4862 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4875 btrfs_init_path(&path);
4876 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4878 ret = INODE_REF_MISSING;
4882 node = path.nodes[0];
4883 slot = path.slots[0];
4885 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4886 total = btrfs_item_size_nr(node, slot);
4888 /* Iterate all entry of INODE_REF */
4889 while (cur < total) {
4890 ret = INODE_REF_MISSING;
4892 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4893 ref_index = btrfs_inode_ref_index(node, ref);
4894 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4897 if (cur + sizeof(*ref) + ref_namelen > total ||
4898 ref_namelen > BTRFS_NAME_LEN) {
4899 warning("root %llu INODE %s[%llu %llu] name too long",
4901 key->type == BTRFS_INODE_REF_KEY ?
4903 key->objectid, key->offset);
4905 if (cur + sizeof(*ref) > total)
4907 len = min_t(u32, total - cur - sizeof(*ref),
4913 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4916 if (len != namelen || strncmp(ref_namebuf, name, len))
4919 *index_ret = ref_index;
4923 len = sizeof(*ref) + ref_namelen;
4924 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4929 /* Skip if not support EXTENDED_IREF feature */
4933 btrfs_release_path(&path);
4934 btrfs_init_path(&path);
4936 dir_id = key->offset;
4937 key->type = BTRFS_INODE_EXTREF_KEY;
4938 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4940 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4942 ret = INODE_REF_MISSING;
4946 node = path.nodes[0];
4947 slot = path.slots[0];
4949 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4951 total = btrfs_item_size_nr(node, slot);
4953 /* Iterate all entry of INODE_EXTREF */
4954 while (cur < total) {
4955 ret = INODE_REF_MISSING;
4957 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4958 ref_index = btrfs_inode_extref_index(node, extref);
4959 parent = btrfs_inode_extref_parent(node, extref);
4960 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4963 if (parent != dir_id)
4966 if (ref_namelen <= BTRFS_NAME_LEN) {
4969 len = BTRFS_NAME_LEN;
4970 warning("root %llu INODE %s[%llu %llu] name too long",
4972 key->type == BTRFS_INODE_REF_KEY ?
4974 key->objectid, key->offset);
4976 read_extent_buffer(node, ref_namebuf,
4977 (unsigned long)(extref + 1), len);
4979 if (len != namelen || strncmp(ref_namebuf, name, len))
4982 *index_ret = ref_index;
4987 len = sizeof(*extref) + ref_namelen;
4988 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4993 btrfs_release_path(&path);
4997 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4998 u64 ino, u64 index, const char *namebuf,
4999 int name_len, u8 filetype, int err)
5001 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5002 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5003 root->objectid, key->objectid, key->offset, namebuf,
5005 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5008 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5009 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5010 root->objectid, key->objectid, index, namebuf, filetype,
5011 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5014 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5016 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5017 root->objectid, ino, index, namebuf, filetype,
5018 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5021 if (err & INODE_REF_MISSING)
5023 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5024 root->objectid, ino, key->objectid, namebuf, filetype);
5029 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5031 * Returns error after repair
5033 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5034 u64 index, u8 filetype, char *namebuf, u32 name_len,
5039 if (err & INODE_ITEM_MISSING) {
5040 ret = repair_inode_item_missing(root, ino, filetype);
5042 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5045 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5046 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5047 name_len, filetype, err);
5049 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5050 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5051 err &= ~(INODE_REF_MISSING);
5057 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5060 struct btrfs_key key;
5061 struct btrfs_path path;
5063 struct btrfs_dir_item *di;
5073 key.offset = (u64)-1;
5075 btrfs_init_path(&path);
5076 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5081 /* if found, go to spacial case */
5086 ret = btrfs_previous_item(root, &path, ino, type);
5094 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5096 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5098 while (cur < total) {
5099 len = btrfs_dir_name_len(path.nodes[0], di);
5100 if (len > BTRFS_NAME_LEN)
5101 len = BTRFS_NAME_LEN;
5104 len += btrfs_dir_data_len(path.nodes[0], di);
5106 di = (struct btrfs_dir_item *)((char *)di + len);
5112 btrfs_release_path(&path);
5116 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5123 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5127 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5131 *size = item_size + index_size;
5135 error("failed to count root %llu INODE[%llu] root size",
5136 root->objectid, ino);
5141 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5142 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5144 * @root: the root of the fs/file tree
5145 * @key: the key of the INODE_REF/INODE_EXTREF
5147 * @size: the st_size of the INODE_ITEM
5148 * @ext_ref: the EXTENDED_IREF feature
5150 * Return 0 if no error occurred.
5151 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5153 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5154 struct btrfs_path *path, u64 *size,
5155 unsigned int ext_ref)
5157 struct btrfs_dir_item *di;
5158 struct btrfs_inode_item *ii;
5159 struct btrfs_key key;
5160 struct btrfs_key location;
5161 struct extent_buffer *node;
5163 char namebuf[BTRFS_NAME_LEN] = {0};
5175 int need_research = 0;
5178 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5179 * ignore index check.
5181 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5182 index = di_key->offset;
5189 /* since after repair, path and the dir item may be changed */
5190 if (need_research) {
5192 err |= DIR_COUNT_AGAIN;
5193 btrfs_release_path(path);
5194 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5195 /* the item was deleted, let path point the last checked item */
5197 if (path->slots[0] == 0)
5198 btrfs_prev_leaf(root, path);
5206 node = path->nodes[0];
5207 slot = path->slots[0];
5209 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5210 total = btrfs_item_size_nr(node, slot);
5211 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5213 while (cur < total) {
5214 data_len = btrfs_dir_data_len(node, di);
5217 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5219 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5220 di_key->objectid, di_key->offset, data_len);
5222 name_len = btrfs_dir_name_len(node, di);
5223 if (name_len <= BTRFS_NAME_LEN) {
5226 len = BTRFS_NAME_LEN;
5227 warning("root %llu %s[%llu %llu] name too long",
5229 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5230 di_key->objectid, di_key->offset);
5232 (*size) += name_len;
5233 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5235 filetype = btrfs_dir_type(node, di);
5237 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5238 di_key->offset != btrfs_name_hash(namebuf, len)) {
5240 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5241 root->objectid, di_key->objectid, di_key->offset,
5242 namebuf, len, filetype, di_key->offset,
5243 btrfs_name_hash(namebuf, len));
5246 btrfs_dir_item_key_to_cpu(node, di, &location);
5247 /* Ignore related ROOT_ITEM check */
5248 if (location.type == BTRFS_ROOT_ITEM_KEY)
5251 btrfs_release_path(path);
5252 /* Check relative INODE_ITEM(existence/filetype) */
5253 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5255 tmp_err |= INODE_ITEM_MISSING;
5259 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5260 struct btrfs_inode_item);
5261 mode = btrfs_inode_mode(path->nodes[0], ii);
5262 if (imode_to_type(mode) != filetype) {
5263 tmp_err |= INODE_ITEM_MISMATCH;
5267 /* Check relative INODE_REF/INODE_EXTREF */
5268 key.objectid = location.objectid;
5269 key.type = BTRFS_INODE_REF_KEY;
5270 key.offset = di_key->objectid;
5271 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5274 /* check relative INDEX/ITEM */
5275 key.objectid = di_key->objectid;
5276 if (key.type == BTRFS_DIR_ITEM_KEY) {
5277 key.type = BTRFS_DIR_INDEX_KEY;
5280 key.type = BTRFS_DIR_ITEM_KEY;
5281 key.offset = btrfs_name_hash(namebuf, name_len);
5284 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5285 name_len, filetype);
5286 /* find_dir_item may find index */
5287 if (key.type == BTRFS_DIR_INDEX_KEY)
5291 if (tmp_err && repair) {
5292 ret = repair_dir_item(root, di_key->objectid,
5293 location.objectid, index,
5294 imode_to_type(mode), namebuf,
5296 if (ret != tmp_err) {
5301 btrfs_release_path(path);
5302 print_dir_item_err(root, di_key, location.objectid, index,
5303 namebuf, name_len, filetype, tmp_err);
5305 len = sizeof(*di) + name_len + data_len;
5306 di = (struct btrfs_dir_item *)((char *)di + len);
5309 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5310 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5311 root->objectid, di_key->objectid,
5318 btrfs_release_path(path);
5319 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5321 err |= ret > 0 ? -ENOENT : ret;
5326 * Wrapper function of btrfs_punch_hole.
5328 * Returns 0 means success.
5329 * Returns not 0 means error.
5331 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5334 struct btrfs_trans_handle *trans;
5337 trans = btrfs_start_transaction(root, 1);
5339 return PTR_ERR(trans);
5341 ret = btrfs_punch_hole(trans, root, ino, start, len);
5343 error("failed to add hole [%llu, %llu] in inode [%llu]",
5346 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5349 btrfs_commit_transaction(trans, root);
5354 * Check file extent datasum/hole, update the size of the file extents,
5355 * check and update the last offset of the file extent.
5357 * @root: the root of fs/file tree.
5358 * @fkey: the key of the file extent.
5359 * @nodatasum: INODE_NODATASUM feature.
5360 * @size: the sum of all EXTENT_DATA items size for this inode.
5361 * @end: the offset of the last extent.
5363 * Return 0 if no error occurred.
5365 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5366 struct extent_buffer *node, int slot,
5367 unsigned int nodatasum, u64 *size, u64 *end)
5369 struct btrfs_file_extent_item *fi;
5372 u64 extent_num_bytes;
5374 u64 csum_found; /* In byte size, sectorsize aligned */
5375 u64 search_start; /* Logical range start we search for csum */
5376 u64 search_len; /* Logical range len we search for csum */
5377 unsigned int extent_type;
5378 unsigned int is_hole;
5383 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5385 /* Check inline extent */
5386 extent_type = btrfs_file_extent_type(node, fi);
5387 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5388 struct btrfs_item *e = btrfs_item_nr(slot);
5389 u32 item_inline_len;
5391 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5392 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5393 compressed = btrfs_file_extent_compression(node, fi);
5394 if (extent_num_bytes == 0) {
5396 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5397 root->objectid, fkey->objectid, fkey->offset);
5398 err |= FILE_EXTENT_ERROR;
5400 if (!compressed && extent_num_bytes != item_inline_len) {
5402 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5403 root->objectid, fkey->objectid, fkey->offset,
5404 extent_num_bytes, item_inline_len);
5405 err |= FILE_EXTENT_ERROR;
5407 *end += extent_num_bytes;
5408 *size += extent_num_bytes;
5412 /* Check extent type */
5413 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5414 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5415 err |= FILE_EXTENT_ERROR;
5416 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5417 root->objectid, fkey->objectid, fkey->offset);
5421 /* Check REG_EXTENT/PREALLOC_EXTENT */
5422 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5423 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5424 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5425 extent_offset = btrfs_file_extent_offset(node, fi);
5426 compressed = btrfs_file_extent_compression(node, fi);
5427 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5430 * Check EXTENT_DATA csum
5432 * For plain (uncompressed) extent, we should only check the range
5433 * we're referring to, as it's possible that part of prealloc extent
5434 * has been written, and has csum:
5436 * |<--- Original large preallocated extent A ---->|
5437 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5440 * For compressed extent, we should check the whole range.
5443 search_start = disk_bytenr + extent_offset;
5444 search_len = extent_num_bytes;
5446 search_start = disk_bytenr;
5447 search_len = disk_num_bytes;
5449 ret = count_csum_range(root, search_start, search_len, &csum_found);
5450 if (csum_found > 0 && nodatasum) {
5451 err |= ODD_CSUM_ITEM;
5452 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5453 root->objectid, fkey->objectid, fkey->offset);
5454 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5455 !is_hole && (ret < 0 || csum_found < search_len)) {
5456 err |= CSUM_ITEM_MISSING;
5457 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5458 root->objectid, fkey->objectid, fkey->offset,
5459 csum_found, search_len);
5460 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5461 err |= ODD_CSUM_ITEM;
5462 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5463 root->objectid, fkey->objectid, fkey->offset, csum_found);
5466 /* Check EXTENT_DATA hole */
5467 if (!no_holes && *end != fkey->offset) {
5469 ret = punch_extent_hole(root, fkey->objectid,
5470 *end, fkey->offset - *end);
5471 if (!repair || ret) {
5472 err |= FILE_EXTENT_ERROR;
5473 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5474 root->objectid, fkey->objectid, fkey->offset);
5478 *end += extent_num_bytes;
5480 *size += extent_num_bytes;
5486 * Set inode item nbytes to @nbytes
5488 * Returns 0 on success
5489 * Returns != 0 on error
5491 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5492 struct btrfs_path *path,
5493 u64 ino, u64 nbytes)
5495 struct btrfs_trans_handle *trans;
5496 struct btrfs_inode_item *ii;
5497 struct btrfs_key key;
5498 struct btrfs_key research_key;
5502 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5505 key.type = BTRFS_INODE_ITEM_KEY;
5508 trans = btrfs_start_transaction(root, 1);
5509 if (IS_ERR(trans)) {
5510 ret = PTR_ERR(trans);
5515 btrfs_release_path(path);
5516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5524 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5525 struct btrfs_inode_item);
5526 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5527 btrfs_mark_buffer_dirty(path->nodes[0]);
5529 btrfs_commit_transaction(trans, root);
5532 error("failed to set nbytes in inode %llu root %llu",
5533 ino, root->root_key.objectid);
5535 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5536 root->root_key.objectid, nbytes);
5539 btrfs_release_path(path);
5540 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5547 * Set directory inode isize to @isize.
5549 * Returns 0 on success.
5550 * Returns != 0 on error.
5552 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5553 struct btrfs_path *path,
5556 struct btrfs_trans_handle *trans;
5557 struct btrfs_inode_item *ii;
5558 struct btrfs_key key;
5559 struct btrfs_key research_key;
5563 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5566 key.type = BTRFS_INODE_ITEM_KEY;
5569 trans = btrfs_start_transaction(root, 1);
5570 if (IS_ERR(trans)) {
5571 ret = PTR_ERR(trans);
5576 btrfs_release_path(path);
5577 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5585 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5586 struct btrfs_inode_item);
5587 btrfs_set_inode_size(path->nodes[0], ii, isize);
5588 btrfs_mark_buffer_dirty(path->nodes[0]);
5590 btrfs_commit_transaction(trans, root);
5593 error("failed to set isize in inode %llu root %llu",
5594 ino, root->root_key.objectid);
5596 printf("Set isize in inode %llu root %llu to %llu\n",
5597 ino, root->root_key.objectid, isize);
5599 btrfs_release_path(path);
5600 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5607 * Wrapper function for btrfs_add_orphan_item().
5609 * Returns 0 on success.
5610 * Returns != 0 on error.
5612 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5613 struct btrfs_path *path, u64 ino)
5615 struct btrfs_trans_handle *trans;
5616 struct btrfs_key research_key;
5620 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5622 trans = btrfs_start_transaction(root, 1);
5623 if (IS_ERR(trans)) {
5624 ret = PTR_ERR(trans);
5629 btrfs_release_path(path);
5630 ret = btrfs_add_orphan_item(trans, root, path, ino);
5632 btrfs_commit_transaction(trans, root);
5635 error("failed to add inode %llu as orphan item root %llu",
5636 ino, root->root_key.objectid);
5638 printf("Added inode %llu as orphan item root %llu\n",
5639 ino, root->root_key.objectid);
5641 btrfs_release_path(path);
5642 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5648 /* Set inode_item nlink to @ref_count.
5649 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5651 * Returns 0 on success
5653 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5654 struct btrfs_path *path, u64 ino,
5655 const char *name, u32 namelen,
5656 u64 ref_count, u8 filetype, u64 *nlink)
5658 struct btrfs_trans_handle *trans;
5659 struct btrfs_inode_item *ii;
5660 struct btrfs_key key;
5661 struct btrfs_key old_key;
5662 char namebuf[BTRFS_NAME_LEN] = {0};
5668 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5670 if (name && namelen) {
5671 ASSERT(namelen <= BTRFS_NAME_LEN);
5672 memcpy(namebuf, name, namelen);
5675 sprintf(namebuf, "%llu", ino);
5676 name_len = count_digits(ino);
5677 printf("Can't find file name for inode %llu, use %s instead\n",
5681 trans = btrfs_start_transaction(root, 1);
5682 if (IS_ERR(trans)) {
5683 ret = PTR_ERR(trans);
5687 btrfs_release_path(path);
5688 /* if refs is 0, put it into lostfound */
5689 if (ref_count == 0) {
5690 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5691 name_len, filetype, &ref_count);
5696 /* reset inode_item's nlink to ref_count */
5698 key.type = BTRFS_INODE_ITEM_KEY;
5701 btrfs_release_path(path);
5702 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5708 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5709 struct btrfs_inode_item);
5710 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5711 btrfs_mark_buffer_dirty(path->nodes[0]);
5716 btrfs_commit_transaction(trans, root);
5720 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5721 root->objectid, ino, namebuf, filetype);
5723 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5724 root->objectid, ino, namebuf, filetype);
5727 btrfs_release_path(path);
5728 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5735 * Check INODE_ITEM and related ITEMs (the same inode number)
5736 * 1. check link count
5737 * 2. check inode ref/extref
5738 * 3. check dir item/index
5740 * @ext_ref: the EXTENDED_IREF feature
5742 * Return 0 if no error occurred.
5743 * Return >0 for error or hit the traversal is done(by error bitmap)
5745 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5746 unsigned int ext_ref)
5748 struct extent_buffer *node;
5749 struct btrfs_inode_item *ii;
5750 struct btrfs_key key;
5759 u64 extent_size = 0;
5761 unsigned int nodatasum;
5765 char namebuf[BTRFS_NAME_LEN] = {0};
5768 node = path->nodes[0];
5769 slot = path->slots[0];
5771 btrfs_item_key_to_cpu(node, &key, slot);
5772 inode_id = key.objectid;
5774 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5775 ret = btrfs_next_item(root, path);
5781 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5782 isize = btrfs_inode_size(node, ii);
5783 nbytes = btrfs_inode_nbytes(node, ii);
5784 mode = btrfs_inode_mode(node, ii);
5785 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5786 nlink = btrfs_inode_nlink(node, ii);
5787 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5790 ret = btrfs_next_item(root, path);
5792 /* out will fill 'err' rusing current statistics */
5794 } else if (ret > 0) {
5799 node = path->nodes[0];
5800 slot = path->slots[0];
5801 btrfs_item_key_to_cpu(node, &key, slot);
5802 if (key.objectid != inode_id)
5806 case BTRFS_INODE_REF_KEY:
5807 ret = check_inode_ref(root, &key, path, namebuf,
5808 &name_len, &refs, mode);
5811 case BTRFS_INODE_EXTREF_KEY:
5812 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5813 warning("root %llu EXTREF[%llu %llu] isn't supported",
5814 root->objectid, key.objectid,
5816 ret = check_inode_extref(root, &key, node, slot, &refs,
5820 case BTRFS_DIR_ITEM_KEY:
5821 case BTRFS_DIR_INDEX_KEY:
5823 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5824 root->objectid, inode_id,
5825 imode_to_type(mode), key.objectid,
5828 ret = check_dir_item(root, &key, path, &size, ext_ref);
5831 case BTRFS_EXTENT_DATA_KEY:
5833 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5834 root->objectid, inode_id, key.objectid,
5837 ret = check_file_extent(root, &key, node, slot,
5838 nodatasum, &extent_size,
5842 case BTRFS_XATTR_ITEM_KEY:
5845 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5846 key.objectid, key.type, key.offset);
5851 /* verify INODE_ITEM nlink/isize/nbytes */
5853 if (repair && (err & DIR_COUNT_AGAIN)) {
5854 err &= ~DIR_COUNT_AGAIN;
5855 count_dir_isize(root, inode_id, &size);
5858 if ((nlink != 1 || refs != 1) && repair) {
5859 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5860 namebuf, name_len, refs, imode_to_type(mode),
5865 err |= LINK_COUNT_ERROR;
5866 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5867 root->objectid, inode_id, nlink);
5871 * Just a warning, as dir inode nbytes is just an
5872 * instructive value.
5874 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5875 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5876 root->objectid, inode_id,
5877 root->fs_info->nodesize);
5880 if (isize != size) {
5882 ret = repair_dir_isize_lowmem(root, path,
5884 if (!repair || ret) {
5887 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5888 root->objectid, inode_id, isize, size);
5892 if (nlink != refs) {
5894 ret = repair_inode_nlinks_lowmem(root, path,
5895 inode_id, namebuf, name_len, refs,
5896 imode_to_type(mode), &nlink);
5897 if (!repair || ret) {
5898 err |= LINK_COUNT_ERROR;
5900 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5901 root->objectid, inode_id, nlink, refs);
5903 } else if (!nlink) {
5905 ret = repair_inode_orphan_item_lowmem(root,
5907 if (!repair || ret) {
5909 error("root %llu INODE[%llu] is orphan item",
5910 root->objectid, inode_id);
5914 if (!nbytes && !no_holes && extent_end < isize) {
5916 ret = punch_extent_hole(root, inode_id,
5917 extent_end, isize - extent_end);
5918 if (!repair || ret) {
5919 err |= NBYTES_ERROR;
5921 "root %llu INODE[%llu] size %llu should have a file extent hole",
5922 root->objectid, inode_id, isize);
5926 if (nbytes != extent_size) {
5928 ret = repair_inode_nbytes_lowmem(root, path,
5929 inode_id, extent_size);
5930 if (!repair || ret) {
5931 err |= NBYTES_ERROR;
5933 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5934 root->objectid, inode_id, nbytes,
5944 * Insert the missing inode item and inode ref.
5946 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5947 * Root dir should be handled specially because root dir is the root of fs.
5949 * returns err (>0 or 0) after repair
5951 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5953 struct btrfs_trans_handle *trans;
5954 struct btrfs_key key;
5955 struct btrfs_path path;
5956 int filetype = BTRFS_FT_DIR;
5959 btrfs_init_path(&path);
5961 if (err & INODE_REF_MISSING) {
5962 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5963 key.type = BTRFS_INODE_REF_KEY;
5964 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5966 trans = btrfs_start_transaction(root, 1);
5967 if (IS_ERR(trans)) {
5968 ret = PTR_ERR(trans);
5972 btrfs_release_path(&path);
5973 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5977 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5978 BTRFS_FIRST_FREE_OBJECTID,
5979 BTRFS_FIRST_FREE_OBJECTID, 0);
5983 printf("Add INODE_REF[%llu %llu] name %s\n",
5984 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5986 err &= ~INODE_REF_MISSING;
5989 error("fail to insert first inode's ref");
5990 btrfs_commit_transaction(trans, root);
5993 if (err & INODE_ITEM_MISSING) {
5994 ret = repair_inode_item_missing(root,
5995 BTRFS_FIRST_FREE_OBJECTID, filetype);
5998 err &= ~INODE_ITEM_MISSING;
6002 error("fail to repair first inode");
6003 btrfs_release_path(&path);
6008 * check first root dir's inode_item and inode_ref
6010 * returns 0 means no error
6011 * returns >0 means error
6012 * returns <0 means fatal error
6014 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6016 struct btrfs_path path;
6017 struct btrfs_key key;
6018 struct btrfs_inode_item *ii;
6024 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6025 key.type = BTRFS_INODE_ITEM_KEY;
6028 /* For root being dropped, we don't need to check first inode */
6029 if (btrfs_root_refs(&root->root_item) == 0 &&
6030 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6031 BTRFS_FIRST_FREE_OBJECTID)
6034 btrfs_init_path(&path);
6035 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6040 err |= INODE_ITEM_MISSING;
6042 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6043 struct btrfs_inode_item);
6044 mode = btrfs_inode_mode(path.nodes[0], ii);
6045 if (imode_to_type(mode) != BTRFS_FT_DIR)
6046 err |= INODE_ITEM_MISMATCH;
6049 /* lookup first inode ref */
6050 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6051 key.type = BTRFS_INODE_REF_KEY;
6052 /* special index value */
6055 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6061 btrfs_release_path(&path);
6064 err = repair_fs_first_inode(root, err);
6066 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6067 error("root dir INODE_ITEM is %s",
6068 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6069 if (err & INODE_REF_MISSING)
6070 error("root dir INODE_REF is missing");
6072 return ret < 0 ? ret : err;
6075 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6076 u64 parent, u64 root)
6078 struct rb_node *node;
6079 struct tree_backref *back = NULL;
6080 struct tree_backref match = {
6087 match.parent = parent;
6088 match.node.full_backref = 1;
6093 node = rb_search(&rec->backref_tree, &match.node.node,
6094 (rb_compare_keys)compare_extent_backref, NULL);
6096 back = to_tree_backref(rb_node_to_extent_backref(node));
6101 static struct data_backref *find_data_backref(struct extent_record *rec,
6102 u64 parent, u64 root,
6103 u64 owner, u64 offset,
6105 u64 disk_bytenr, u64 bytes)
6107 struct rb_node *node;
6108 struct data_backref *back = NULL;
6109 struct data_backref match = {
6116 .found_ref = found_ref,
6117 .disk_bytenr = disk_bytenr,
6121 match.parent = parent;
6122 match.node.full_backref = 1;
6127 node = rb_search(&rec->backref_tree, &match.node.node,
6128 (rb_compare_keys)compare_extent_backref, NULL);
6130 back = to_data_backref(rb_node_to_extent_backref(node));
6135 * Iterate all item on the tree and call check_inode_item() to check.
6137 * @root: the root of the tree to be checked.
6138 * @ext_ref: the EXTENDED_IREF feature
6140 * Return 0 if no error found.
6141 * Return <0 for error.
6143 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6145 struct btrfs_path path;
6146 struct node_refs nrefs;
6147 struct btrfs_root_item *root_item = &root->root_item;
6153 * We need to manually check the first inode item(256)
6154 * As the following traversal function will only start from
6155 * the first inode item in the leaf, if inode item(256) is missing
6156 * we will just skip it forever.
6158 ret = check_fs_first_inode(root, ext_ref);
6163 memset(&nrefs, 0, sizeof(nrefs));
6164 level = btrfs_header_level(root->node);
6165 btrfs_init_path(&path);
6167 if (btrfs_root_refs(root_item) > 0 ||
6168 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6169 path.nodes[level] = root->node;
6170 path.slots[level] = 0;
6171 extent_buffer_get(root->node);
6173 struct btrfs_key key;
6175 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6176 level = root_item->drop_level;
6177 path.lowest_level = level;
6178 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6185 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
6188 /* if ret is negative, walk shall stop */
6194 ret = walk_up_tree_v2(root, &path, &level);
6196 /* Normal exit, reset ret to err */
6203 btrfs_release_path(&path);
6208 * Find the relative ref for root_ref and root_backref.
6210 * @root: the root of the root tree.
6211 * @ref_key: the key of the root ref.
6213 * Return 0 if no error occurred.
6215 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6216 struct extent_buffer *node, int slot)
6218 struct btrfs_path path;
6219 struct btrfs_key key;
6220 struct btrfs_root_ref *ref;
6221 struct btrfs_root_ref *backref;
6222 char ref_name[BTRFS_NAME_LEN] = {0};
6223 char backref_name[BTRFS_NAME_LEN] = {0};
6229 u32 backref_namelen;
6234 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6235 ref_dirid = btrfs_root_ref_dirid(node, ref);
6236 ref_seq = btrfs_root_ref_sequence(node, ref);
6237 ref_namelen = btrfs_root_ref_name_len(node, ref);
6239 if (ref_namelen <= BTRFS_NAME_LEN) {
6242 len = BTRFS_NAME_LEN;
6243 warning("%s[%llu %llu] ref_name too long",
6244 ref_key->type == BTRFS_ROOT_REF_KEY ?
6245 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6248 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6250 /* Find relative root_ref */
6251 key.objectid = ref_key->offset;
6252 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6253 key.offset = ref_key->objectid;
6255 btrfs_init_path(&path);
6256 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6258 err |= ROOT_REF_MISSING;
6259 error("%s[%llu %llu] couldn't find relative ref",
6260 ref_key->type == BTRFS_ROOT_REF_KEY ?
6261 "ROOT_REF" : "ROOT_BACKREF",
6262 ref_key->objectid, ref_key->offset);
6266 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6267 struct btrfs_root_ref);
6268 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6269 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6270 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6272 if (backref_namelen <= BTRFS_NAME_LEN) {
6273 len = backref_namelen;
6275 len = BTRFS_NAME_LEN;
6276 warning("%s[%llu %llu] ref_name too long",
6277 key.type == BTRFS_ROOT_REF_KEY ?
6278 "ROOT_REF" : "ROOT_BACKREF",
6279 key.objectid, key.offset);
6281 read_extent_buffer(path.nodes[0], backref_name,
6282 (unsigned long)(backref + 1), len);
6284 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6285 ref_namelen != backref_namelen ||
6286 strncmp(ref_name, backref_name, len)) {
6287 err |= ROOT_REF_MISMATCH;
6288 error("%s[%llu %llu] mismatch relative ref",
6289 ref_key->type == BTRFS_ROOT_REF_KEY ?
6290 "ROOT_REF" : "ROOT_BACKREF",
6291 ref_key->objectid, ref_key->offset);
6294 btrfs_release_path(&path);
6299 * Check all fs/file tree in low_memory mode.
6301 * 1. for fs tree root item, call check_fs_root_v2()
6302 * 2. for fs tree root ref/backref, call check_root_ref()
6304 * Return 0 if no error occurred.
6306 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6308 struct btrfs_root *tree_root = fs_info->tree_root;
6309 struct btrfs_root *cur_root = NULL;
6310 struct btrfs_path path;
6311 struct btrfs_key key;
6312 struct extent_buffer *node;
6313 unsigned int ext_ref;
6318 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6320 btrfs_init_path(&path);
6321 key.objectid = BTRFS_FS_TREE_OBJECTID;
6323 key.type = BTRFS_ROOT_ITEM_KEY;
6325 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6329 } else if (ret > 0) {
6335 node = path.nodes[0];
6336 slot = path.slots[0];
6337 btrfs_item_key_to_cpu(node, &key, slot);
6338 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6340 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6341 fs_root_objectid(key.objectid)) {
6342 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6343 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6346 key.offset = (u64)-1;
6347 cur_root = btrfs_read_fs_root(fs_info, &key);
6350 if (IS_ERR(cur_root)) {
6351 error("Fail to read fs/subvol tree: %lld",
6357 ret = check_fs_root_v2(cur_root, ext_ref);
6360 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6361 btrfs_free_fs_root(cur_root);
6362 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6363 key.type == BTRFS_ROOT_BACKREF_KEY) {
6364 ret = check_root_ref(tree_root, &key, node, slot);
6368 ret = btrfs_next_item(tree_root, &path);
6378 btrfs_release_path(&path);
6382 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6383 struct cache_tree *root_cache)
6387 if (!ctx.progress_enabled)
6388 fprintf(stderr, "checking fs roots\n");
6389 if (check_mode == CHECK_MODE_LOWMEM)
6390 ret = check_fs_roots_v2(fs_info);
6392 ret = check_fs_roots(fs_info, root_cache);
6397 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6399 struct extent_backref *back, *tmp;
6400 struct tree_backref *tback;
6401 struct data_backref *dback;
6405 rbtree_postorder_for_each_entry_safe(back, tmp,
6406 &rec->backref_tree, node) {
6407 if (!back->found_extent_tree) {
6411 if (back->is_data) {
6412 dback = to_data_backref(back);
6413 fprintf(stderr, "Data backref %llu %s %llu"
6414 " owner %llu offset %llu num_refs %lu"
6415 " not found in extent tree\n",
6416 (unsigned long long)rec->start,
6417 back->full_backref ?
6419 back->full_backref ?
6420 (unsigned long long)dback->parent:
6421 (unsigned long long)dback->root,
6422 (unsigned long long)dback->owner,
6423 (unsigned long long)dback->offset,
6424 (unsigned long)dback->num_refs);
6426 tback = to_tree_backref(back);
6427 fprintf(stderr, "Tree backref %llu parent %llu"
6428 " root %llu not found in extent tree\n",
6429 (unsigned long long)rec->start,
6430 (unsigned long long)tback->parent,
6431 (unsigned long long)tback->root);
6434 if (!back->is_data && !back->found_ref) {
6438 tback = to_tree_backref(back);
6439 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6440 (unsigned long long)rec->start,
6441 back->full_backref ? "parent" : "root",
6442 back->full_backref ?
6443 (unsigned long long)tback->parent :
6444 (unsigned long long)tback->root, back);
6446 if (back->is_data) {
6447 dback = to_data_backref(back);
6448 if (dback->found_ref != dback->num_refs) {
6452 fprintf(stderr, "Incorrect local backref count"
6453 " on %llu %s %llu owner %llu"
6454 " offset %llu found %u wanted %u back %p\n",
6455 (unsigned long long)rec->start,
6456 back->full_backref ?
6458 back->full_backref ?
6459 (unsigned long long)dback->parent:
6460 (unsigned long long)dback->root,
6461 (unsigned long long)dback->owner,
6462 (unsigned long long)dback->offset,
6463 dback->found_ref, dback->num_refs, back);
6465 if (dback->disk_bytenr != rec->start) {
6469 fprintf(stderr, "Backref disk bytenr does not"
6470 " match extent record, bytenr=%llu, "
6471 "ref bytenr=%llu\n",
6472 (unsigned long long)rec->start,
6473 (unsigned long long)dback->disk_bytenr);
6476 if (dback->bytes != rec->nr) {
6480 fprintf(stderr, "Backref bytes do not match "
6481 "extent backref, bytenr=%llu, ref "
6482 "bytes=%llu, backref bytes=%llu\n",
6483 (unsigned long long)rec->start,
6484 (unsigned long long)rec->nr,
6485 (unsigned long long)dback->bytes);
6488 if (!back->is_data) {
6491 dback = to_data_backref(back);
6492 found += dback->found_ref;
6495 if (found != rec->refs) {
6499 fprintf(stderr, "Incorrect global backref count "
6500 "on %llu found %llu wanted %llu\n",
6501 (unsigned long long)rec->start,
6502 (unsigned long long)found,
6503 (unsigned long long)rec->refs);
6509 static void __free_one_backref(struct rb_node *node)
6511 struct extent_backref *back = rb_node_to_extent_backref(node);
6516 static void free_all_extent_backrefs(struct extent_record *rec)
6518 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6521 static void free_extent_record_cache(struct cache_tree *extent_cache)
6523 struct cache_extent *cache;
6524 struct extent_record *rec;
6527 cache = first_cache_extent(extent_cache);
6530 rec = container_of(cache, struct extent_record, cache);
6531 remove_cache_extent(extent_cache, cache);
6532 free_all_extent_backrefs(rec);
6537 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6538 struct extent_record *rec)
6540 if (rec->content_checked && rec->owner_ref_checked &&
6541 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6542 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6543 !rec->bad_full_backref && !rec->crossing_stripes &&
6544 !rec->wrong_chunk_type) {
6545 remove_cache_extent(extent_cache, &rec->cache);
6546 free_all_extent_backrefs(rec);
6547 list_del_init(&rec->list);
6553 static int check_owner_ref(struct btrfs_root *root,
6554 struct extent_record *rec,
6555 struct extent_buffer *buf)
6557 struct extent_backref *node, *tmp;
6558 struct tree_backref *back;
6559 struct btrfs_root *ref_root;
6560 struct btrfs_key key;
6561 struct btrfs_path path;
6562 struct extent_buffer *parent;
6567 rbtree_postorder_for_each_entry_safe(node, tmp,
6568 &rec->backref_tree, node) {
6571 if (!node->found_ref)
6573 if (node->full_backref)
6575 back = to_tree_backref(node);
6576 if (btrfs_header_owner(buf) == back->root)
6579 BUG_ON(rec->is_root);
6581 /* try to find the block by search corresponding fs tree */
6582 key.objectid = btrfs_header_owner(buf);
6583 key.type = BTRFS_ROOT_ITEM_KEY;
6584 key.offset = (u64)-1;
6586 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6587 if (IS_ERR(ref_root))
6590 level = btrfs_header_level(buf);
6592 btrfs_item_key_to_cpu(buf, &key, 0);
6594 btrfs_node_key_to_cpu(buf, &key, 0);
6596 btrfs_init_path(&path);
6597 path.lowest_level = level + 1;
6598 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6602 parent = path.nodes[level + 1];
6603 if (parent && buf->start == btrfs_node_blockptr(parent,
6604 path.slots[level + 1]))
6607 btrfs_release_path(&path);
6608 return found ? 0 : 1;
6611 static int is_extent_tree_record(struct extent_record *rec)
6613 struct extent_backref *node, *tmp;
6614 struct tree_backref *back;
6617 rbtree_postorder_for_each_entry_safe(node, tmp,
6618 &rec->backref_tree, node) {
6621 back = to_tree_backref(node);
6622 if (node->full_backref)
6624 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6631 static int record_bad_block_io(struct btrfs_fs_info *info,
6632 struct cache_tree *extent_cache,
6635 struct extent_record *rec;
6636 struct cache_extent *cache;
6637 struct btrfs_key key;
6639 cache = lookup_cache_extent(extent_cache, start, len);
6643 rec = container_of(cache, struct extent_record, cache);
6644 if (!is_extent_tree_record(rec))
6647 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6648 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6651 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6652 struct extent_buffer *buf, int slot)
6654 if (btrfs_header_level(buf)) {
6655 struct btrfs_key_ptr ptr1, ptr2;
6657 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6658 sizeof(struct btrfs_key_ptr));
6659 read_extent_buffer(buf, &ptr2,
6660 btrfs_node_key_ptr_offset(slot + 1),
6661 sizeof(struct btrfs_key_ptr));
6662 write_extent_buffer(buf, &ptr1,
6663 btrfs_node_key_ptr_offset(slot + 1),
6664 sizeof(struct btrfs_key_ptr));
6665 write_extent_buffer(buf, &ptr2,
6666 btrfs_node_key_ptr_offset(slot),
6667 sizeof(struct btrfs_key_ptr));
6669 struct btrfs_disk_key key;
6670 btrfs_node_key(buf, &key, 0);
6671 btrfs_fixup_low_keys(root, path, &key,
6672 btrfs_header_level(buf) + 1);
6675 struct btrfs_item *item1, *item2;
6676 struct btrfs_key k1, k2;
6677 char *item1_data, *item2_data;
6678 u32 item1_offset, item2_offset, item1_size, item2_size;
6680 item1 = btrfs_item_nr(slot);
6681 item2 = btrfs_item_nr(slot + 1);
6682 btrfs_item_key_to_cpu(buf, &k1, slot);
6683 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6684 item1_offset = btrfs_item_offset(buf, item1);
6685 item2_offset = btrfs_item_offset(buf, item2);
6686 item1_size = btrfs_item_size(buf, item1);
6687 item2_size = btrfs_item_size(buf, item2);
6689 item1_data = malloc(item1_size);
6692 item2_data = malloc(item2_size);
6698 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6699 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6701 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6702 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6706 btrfs_set_item_offset(buf, item1, item2_offset);
6707 btrfs_set_item_offset(buf, item2, item1_offset);
6708 btrfs_set_item_size(buf, item1, item2_size);
6709 btrfs_set_item_size(buf, item2, item1_size);
6711 path->slots[0] = slot;
6712 btrfs_set_item_key_unsafe(root, path, &k2);
6713 path->slots[0] = slot + 1;
6714 btrfs_set_item_key_unsafe(root, path, &k1);
6719 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6721 struct extent_buffer *buf;
6722 struct btrfs_key k1, k2;
6724 int level = path->lowest_level;
6727 buf = path->nodes[level];
6728 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6730 btrfs_node_key_to_cpu(buf, &k1, i);
6731 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6733 btrfs_item_key_to_cpu(buf, &k1, i);
6734 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6736 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6738 ret = swap_values(root, path, buf, i);
6741 btrfs_mark_buffer_dirty(buf);
6747 static int delete_bogus_item(struct btrfs_root *root,
6748 struct btrfs_path *path,
6749 struct extent_buffer *buf, int slot)
6751 struct btrfs_key key;
6752 int nritems = btrfs_header_nritems(buf);
6754 btrfs_item_key_to_cpu(buf, &key, slot);
6756 /* These are all the keys we can deal with missing. */
6757 if (key.type != BTRFS_DIR_INDEX_KEY &&
6758 key.type != BTRFS_EXTENT_ITEM_KEY &&
6759 key.type != BTRFS_METADATA_ITEM_KEY &&
6760 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6761 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6764 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6765 (unsigned long long)key.objectid, key.type,
6766 (unsigned long long)key.offset, slot, buf->start);
6767 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6768 btrfs_item_nr_offset(slot + 1),
6769 sizeof(struct btrfs_item) *
6770 (nritems - slot - 1));
6771 btrfs_set_header_nritems(buf, nritems - 1);
6773 struct btrfs_disk_key disk_key;
6775 btrfs_item_key(buf, &disk_key, 0);
6776 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6778 btrfs_mark_buffer_dirty(buf);
6782 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6784 struct extent_buffer *buf;
6788 /* We should only get this for leaves */
6789 BUG_ON(path->lowest_level);
6790 buf = path->nodes[0];
6792 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6793 unsigned int shift = 0, offset;
6795 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6796 BTRFS_LEAF_DATA_SIZE(root)) {
6797 if (btrfs_item_end_nr(buf, i) >
6798 BTRFS_LEAF_DATA_SIZE(root)) {
6799 ret = delete_bogus_item(root, path, buf, i);
6802 fprintf(stderr, "item is off the end of the "
6803 "leaf, can't fix\n");
6807 shift = BTRFS_LEAF_DATA_SIZE(root) -
6808 btrfs_item_end_nr(buf, i);
6809 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6810 btrfs_item_offset_nr(buf, i - 1)) {
6811 if (btrfs_item_end_nr(buf, i) >
6812 btrfs_item_offset_nr(buf, i - 1)) {
6813 ret = delete_bogus_item(root, path, buf, i);
6816 fprintf(stderr, "items overlap, can't fix\n");
6820 shift = btrfs_item_offset_nr(buf, i - 1) -
6821 btrfs_item_end_nr(buf, i);
6826 printf("Shifting item nr %d by %u bytes in block %llu\n",
6827 i, shift, (unsigned long long)buf->start);
6828 offset = btrfs_item_offset_nr(buf, i);
6829 memmove_extent_buffer(buf,
6830 btrfs_leaf_data(buf) + offset + shift,
6831 btrfs_leaf_data(buf) + offset,
6832 btrfs_item_size_nr(buf, i));
6833 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6835 btrfs_mark_buffer_dirty(buf);
6839 * We may have moved things, in which case we want to exit so we don't
6840 * write those changes out. Once we have proper abort functionality in
6841 * progs this can be changed to something nicer.
6848 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6849 * then just return -EIO.
6851 static int try_to_fix_bad_block(struct btrfs_root *root,
6852 struct extent_buffer *buf,
6853 enum btrfs_tree_block_status status)
6855 struct btrfs_trans_handle *trans;
6856 struct ulist *roots;
6857 struct ulist_node *node;
6858 struct btrfs_root *search_root;
6859 struct btrfs_path path;
6860 struct ulist_iterator iter;
6861 struct btrfs_key root_key, key;
6864 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6865 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6868 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6872 btrfs_init_path(&path);
6873 ULIST_ITER_INIT(&iter);
6874 while ((node = ulist_next(roots, &iter))) {
6875 root_key.objectid = node->val;
6876 root_key.type = BTRFS_ROOT_ITEM_KEY;
6877 root_key.offset = (u64)-1;
6879 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6886 trans = btrfs_start_transaction(search_root, 0);
6887 if (IS_ERR(trans)) {
6888 ret = PTR_ERR(trans);
6892 path.lowest_level = btrfs_header_level(buf);
6893 path.skip_check_block = 1;
6894 if (path.lowest_level)
6895 btrfs_node_key_to_cpu(buf, &key, 0);
6897 btrfs_item_key_to_cpu(buf, &key, 0);
6898 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6901 btrfs_commit_transaction(trans, search_root);
6904 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6905 ret = fix_key_order(search_root, &path);
6906 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6907 ret = fix_item_offset(search_root, &path);
6909 btrfs_commit_transaction(trans, search_root);
6912 btrfs_release_path(&path);
6913 btrfs_commit_transaction(trans, search_root);
6916 btrfs_release_path(&path);
6920 static int check_block(struct btrfs_root *root,
6921 struct cache_tree *extent_cache,
6922 struct extent_buffer *buf, u64 flags)
6924 struct extent_record *rec;
6925 struct cache_extent *cache;
6926 struct btrfs_key key;
6927 enum btrfs_tree_block_status status;
6931 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6934 rec = container_of(cache, struct extent_record, cache);
6935 rec->generation = btrfs_header_generation(buf);
6937 level = btrfs_header_level(buf);
6938 if (btrfs_header_nritems(buf) > 0) {
6941 btrfs_item_key_to_cpu(buf, &key, 0);
6943 btrfs_node_key_to_cpu(buf, &key, 0);
6945 rec->info_objectid = key.objectid;
6947 rec->info_level = level;
6949 if (btrfs_is_leaf(buf))
6950 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6952 status = btrfs_check_node(root, &rec->parent_key, buf);
6954 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6956 status = try_to_fix_bad_block(root, buf, status);
6957 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6959 fprintf(stderr, "bad block %llu\n",
6960 (unsigned long long)buf->start);
6963 * Signal to callers we need to start the scan over
6964 * again since we'll have cowed blocks.
6969 rec->content_checked = 1;
6970 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6971 rec->owner_ref_checked = 1;
6973 ret = check_owner_ref(root, rec, buf);
6975 rec->owner_ref_checked = 1;
6979 maybe_free_extent_rec(extent_cache, rec);
6984 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6985 u64 parent, u64 root)
6987 struct list_head *cur = rec->backrefs.next;
6988 struct extent_backref *node;
6989 struct tree_backref *back;
6991 while(cur != &rec->backrefs) {
6992 node = to_extent_backref(cur);
6996 back = to_tree_backref(node);
6998 if (!node->full_backref)
7000 if (parent == back->parent)
7003 if (node->full_backref)
7005 if (back->root == root)
7013 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7014 u64 parent, u64 root)
7016 struct tree_backref *ref = malloc(sizeof(*ref));
7020 memset(&ref->node, 0, sizeof(ref->node));
7022 ref->parent = parent;
7023 ref->node.full_backref = 1;
7026 ref->node.full_backref = 0;
7033 static struct data_backref *find_data_backref(struct extent_record *rec,
7034 u64 parent, u64 root,
7035 u64 owner, u64 offset,
7037 u64 disk_bytenr, u64 bytes)
7039 struct list_head *cur = rec->backrefs.next;
7040 struct extent_backref *node;
7041 struct data_backref *back;
7043 while(cur != &rec->backrefs) {
7044 node = to_extent_backref(cur);
7048 back = to_data_backref(node);
7050 if (!node->full_backref)
7052 if (parent == back->parent)
7055 if (node->full_backref)
7057 if (back->root == root && back->owner == owner &&
7058 back->offset == offset) {
7059 if (found_ref && node->found_ref &&
7060 (back->bytes != bytes ||
7061 back->disk_bytenr != disk_bytenr))
7071 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7072 u64 parent, u64 root,
7073 u64 owner, u64 offset,
7076 struct data_backref *ref = malloc(sizeof(*ref));
7080 memset(&ref->node, 0, sizeof(ref->node));
7081 ref->node.is_data = 1;
7084 ref->parent = parent;
7087 ref->node.full_backref = 1;
7091 ref->offset = offset;
7092 ref->node.full_backref = 0;
7094 ref->bytes = max_size;
7097 if (max_size > rec->max_size)
7098 rec->max_size = max_size;
7102 /* Check if the type of extent matches with its chunk */
7103 static void check_extent_type(struct extent_record *rec)
7105 struct btrfs_block_group_cache *bg_cache;
7107 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7111 /* data extent, check chunk directly*/
7112 if (!rec->metadata) {
7113 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7114 rec->wrong_chunk_type = 1;
7118 /* metadata extent, check the obvious case first */
7119 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7120 BTRFS_BLOCK_GROUP_METADATA))) {
7121 rec->wrong_chunk_type = 1;
7126 * Check SYSTEM extent, as it's also marked as metadata, we can only
7127 * make sure it's a SYSTEM extent by its backref
7129 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7130 struct extent_backref *node;
7131 struct tree_backref *tback;
7134 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7135 if (node->is_data) {
7136 /* tree block shouldn't have data backref */
7137 rec->wrong_chunk_type = 1;
7140 tback = container_of(node, struct tree_backref, node);
7142 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7143 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7145 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7146 if (!(bg_cache->flags & bg_type))
7147 rec->wrong_chunk_type = 1;
7152 * Allocate a new extent record, fill default values from @tmpl and insert int
7153 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7154 * the cache, otherwise it fails.
7156 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7157 struct extent_record *tmpl)
7159 struct extent_record *rec;
7162 BUG_ON(tmpl->max_size == 0);
7163 rec = malloc(sizeof(*rec));
7166 rec->start = tmpl->start;
7167 rec->max_size = tmpl->max_size;
7168 rec->nr = max(tmpl->nr, tmpl->max_size);
7169 rec->found_rec = tmpl->found_rec;
7170 rec->content_checked = tmpl->content_checked;
7171 rec->owner_ref_checked = tmpl->owner_ref_checked;
7172 rec->num_duplicates = 0;
7173 rec->metadata = tmpl->metadata;
7174 rec->flag_block_full_backref = FLAG_UNSET;
7175 rec->bad_full_backref = 0;
7176 rec->crossing_stripes = 0;
7177 rec->wrong_chunk_type = 0;
7178 rec->is_root = tmpl->is_root;
7179 rec->refs = tmpl->refs;
7180 rec->extent_item_refs = tmpl->extent_item_refs;
7181 rec->parent_generation = tmpl->parent_generation;
7182 INIT_LIST_HEAD(&rec->backrefs);
7183 INIT_LIST_HEAD(&rec->dups);
7184 INIT_LIST_HEAD(&rec->list);
7185 rec->backref_tree = RB_ROOT;
7186 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7187 rec->cache.start = tmpl->start;
7188 rec->cache.size = tmpl->nr;
7189 ret = insert_cache_extent(extent_cache, &rec->cache);
7194 bytes_used += rec->nr;
7197 rec->crossing_stripes = check_crossing_stripes(global_info,
7198 rec->start, global_info->nodesize);
7199 check_extent_type(rec);
7204 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7206 * - refs - if found, increase refs
7207 * - is_root - if found, set
7208 * - content_checked - if found, set
7209 * - owner_ref_checked - if found, set
7211 * If not found, create a new one, initialize and insert.
7213 static int add_extent_rec(struct cache_tree *extent_cache,
7214 struct extent_record *tmpl)
7216 struct extent_record *rec;
7217 struct cache_extent *cache;
7221 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7223 rec = container_of(cache, struct extent_record, cache);
7227 rec->nr = max(tmpl->nr, tmpl->max_size);
7230 * We need to make sure to reset nr to whatever the extent
7231 * record says was the real size, this way we can compare it to
7234 if (tmpl->found_rec) {
7235 if (tmpl->start != rec->start || rec->found_rec) {
7236 struct extent_record *tmp;
7239 if (list_empty(&rec->list))
7240 list_add_tail(&rec->list,
7241 &duplicate_extents);
7244 * We have to do this song and dance in case we
7245 * find an extent record that falls inside of
7246 * our current extent record but does not have
7247 * the same objectid.
7249 tmp = malloc(sizeof(*tmp));
7252 tmp->start = tmpl->start;
7253 tmp->max_size = tmpl->max_size;
7256 tmp->metadata = tmpl->metadata;
7257 tmp->extent_item_refs = tmpl->extent_item_refs;
7258 INIT_LIST_HEAD(&tmp->list);
7259 list_add_tail(&tmp->list, &rec->dups);
7260 rec->num_duplicates++;
7267 if (tmpl->extent_item_refs && !dup) {
7268 if (rec->extent_item_refs) {
7269 fprintf(stderr, "block %llu rec "
7270 "extent_item_refs %llu, passed %llu\n",
7271 (unsigned long long)tmpl->start,
7272 (unsigned long long)
7273 rec->extent_item_refs,
7274 (unsigned long long)tmpl->extent_item_refs);
7276 rec->extent_item_refs = tmpl->extent_item_refs;
7280 if (tmpl->content_checked)
7281 rec->content_checked = 1;
7282 if (tmpl->owner_ref_checked)
7283 rec->owner_ref_checked = 1;
7284 memcpy(&rec->parent_key, &tmpl->parent_key,
7285 sizeof(tmpl->parent_key));
7286 if (tmpl->parent_generation)
7287 rec->parent_generation = tmpl->parent_generation;
7288 if (rec->max_size < tmpl->max_size)
7289 rec->max_size = tmpl->max_size;
7292 * A metadata extent can't cross stripe_len boundary, otherwise
7293 * kernel scrub won't be able to handle it.
7294 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7298 rec->crossing_stripes = check_crossing_stripes(
7299 global_info, rec->start,
7300 global_info->nodesize);
7301 check_extent_type(rec);
7302 maybe_free_extent_rec(extent_cache, rec);
7306 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7311 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7312 u64 parent, u64 root, int found_ref)
7314 struct extent_record *rec;
7315 struct tree_backref *back;
7316 struct cache_extent *cache;
7318 bool insert = false;
7320 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7322 struct extent_record tmpl;
7324 memset(&tmpl, 0, sizeof(tmpl));
7325 tmpl.start = bytenr;
7330 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7334 /* really a bug in cache_extent implement now */
7335 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7340 rec = container_of(cache, struct extent_record, cache);
7341 if (rec->start != bytenr) {
7343 * Several cause, from unaligned bytenr to over lapping extents
7348 back = find_tree_backref(rec, parent, root);
7350 back = alloc_tree_backref(rec, parent, root);
7357 if (back->node.found_ref) {
7358 fprintf(stderr, "Extent back ref already exists "
7359 "for %llu parent %llu root %llu \n",
7360 (unsigned long long)bytenr,
7361 (unsigned long long)parent,
7362 (unsigned long long)root);
7364 back->node.found_ref = 1;
7366 if (back->node.found_extent_tree) {
7367 fprintf(stderr, "Extent back ref already exists "
7368 "for %llu parent %llu root %llu \n",
7369 (unsigned long long)bytenr,
7370 (unsigned long long)parent,
7371 (unsigned long long)root);
7373 back->node.found_extent_tree = 1;
7376 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7377 compare_extent_backref));
7378 check_extent_type(rec);
7379 maybe_free_extent_rec(extent_cache, rec);
7383 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7384 u64 parent, u64 root, u64 owner, u64 offset,
7385 u32 num_refs, int found_ref, u64 max_size)
7387 struct extent_record *rec;
7388 struct data_backref *back;
7389 struct cache_extent *cache;
7391 bool insert = false;
7393 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7395 struct extent_record tmpl;
7397 memset(&tmpl, 0, sizeof(tmpl));
7398 tmpl.start = bytenr;
7400 tmpl.max_size = max_size;
7402 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7406 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7411 rec = container_of(cache, struct extent_record, cache);
7412 if (rec->max_size < max_size)
7413 rec->max_size = max_size;
7416 * If found_ref is set then max_size is the real size and must match the
7417 * existing refs. So if we have already found a ref then we need to
7418 * make sure that this ref matches the existing one, otherwise we need
7419 * to add a new backref so we can notice that the backrefs don't match
7420 * and we need to figure out who is telling the truth. This is to
7421 * account for that awful fsync bug I introduced where we'd end up with
7422 * a btrfs_file_extent_item that would have its length include multiple
7423 * prealloc extents or point inside of a prealloc extent.
7425 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7428 back = alloc_data_backref(rec, parent, root, owner, offset,
7435 BUG_ON(num_refs != 1);
7436 if (back->node.found_ref)
7437 BUG_ON(back->bytes != max_size);
7438 back->node.found_ref = 1;
7439 back->found_ref += 1;
7440 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7441 back->bytes = max_size;
7442 back->disk_bytenr = bytenr;
7444 /* Need to reinsert if not already in the tree */
7446 rb_erase(&back->node.node, &rec->backref_tree);
7451 rec->content_checked = 1;
7452 rec->owner_ref_checked = 1;
7454 if (back->node.found_extent_tree) {
7455 fprintf(stderr, "Extent back ref already exists "
7456 "for %llu parent %llu root %llu "
7457 "owner %llu offset %llu num_refs %lu\n",
7458 (unsigned long long)bytenr,
7459 (unsigned long long)parent,
7460 (unsigned long long)root,
7461 (unsigned long long)owner,
7462 (unsigned long long)offset,
7463 (unsigned long)num_refs);
7465 back->num_refs = num_refs;
7466 back->node.found_extent_tree = 1;
7469 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7470 compare_extent_backref));
7472 maybe_free_extent_rec(extent_cache, rec);
7476 static int add_pending(struct cache_tree *pending,
7477 struct cache_tree *seen, u64 bytenr, u32 size)
7480 ret = add_cache_extent(seen, bytenr, size);
7483 add_cache_extent(pending, bytenr, size);
7487 static int pick_next_pending(struct cache_tree *pending,
7488 struct cache_tree *reada,
7489 struct cache_tree *nodes,
7490 u64 last, struct block_info *bits, int bits_nr,
7493 unsigned long node_start = last;
7494 struct cache_extent *cache;
7497 cache = search_cache_extent(reada, 0);
7499 bits[0].start = cache->start;
7500 bits[0].size = cache->size;
7505 if (node_start > 32768)
7506 node_start -= 32768;
7508 cache = search_cache_extent(nodes, node_start);
7510 cache = search_cache_extent(nodes, 0);
7513 cache = search_cache_extent(pending, 0);
7518 bits[ret].start = cache->start;
7519 bits[ret].size = cache->size;
7520 cache = next_cache_extent(cache);
7522 } while (cache && ret < bits_nr);
7528 bits[ret].start = cache->start;
7529 bits[ret].size = cache->size;
7530 cache = next_cache_extent(cache);
7532 } while (cache && ret < bits_nr);
7534 if (bits_nr - ret > 8) {
7535 u64 lookup = bits[0].start + bits[0].size;
7536 struct cache_extent *next;
7537 next = search_cache_extent(pending, lookup);
7539 if (next->start - lookup > 32768)
7541 bits[ret].start = next->start;
7542 bits[ret].size = next->size;
7543 lookup = next->start + next->size;
7547 next = next_cache_extent(next);
7555 static void free_chunk_record(struct cache_extent *cache)
7557 struct chunk_record *rec;
7559 rec = container_of(cache, struct chunk_record, cache);
7560 list_del_init(&rec->list);
7561 list_del_init(&rec->dextents);
7565 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7567 cache_tree_free_extents(chunk_cache, free_chunk_record);
7570 static void free_device_record(struct rb_node *node)
7572 struct device_record *rec;
7574 rec = container_of(node, struct device_record, node);
7578 FREE_RB_BASED_TREE(device_cache, free_device_record);
7580 int insert_block_group_record(struct block_group_tree *tree,
7581 struct block_group_record *bg_rec)
7585 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7589 list_add_tail(&bg_rec->list, &tree->block_groups);
7593 static void free_block_group_record(struct cache_extent *cache)
7595 struct block_group_record *rec;
7597 rec = container_of(cache, struct block_group_record, cache);
7598 list_del_init(&rec->list);
7602 void free_block_group_tree(struct block_group_tree *tree)
7604 cache_tree_free_extents(&tree->tree, free_block_group_record);
7607 int insert_device_extent_record(struct device_extent_tree *tree,
7608 struct device_extent_record *de_rec)
7613 * Device extent is a bit different from the other extents, because
7614 * the extents which belong to the different devices may have the
7615 * same start and size, so we need use the special extent cache
7616 * search/insert functions.
7618 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7622 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7623 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7627 static void free_device_extent_record(struct cache_extent *cache)
7629 struct device_extent_record *rec;
7631 rec = container_of(cache, struct device_extent_record, cache);
7632 if (!list_empty(&rec->chunk_list))
7633 list_del_init(&rec->chunk_list);
7634 if (!list_empty(&rec->device_list))
7635 list_del_init(&rec->device_list);
7639 void free_device_extent_tree(struct device_extent_tree *tree)
7641 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7644 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7645 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7646 struct extent_buffer *leaf, int slot)
7648 struct btrfs_extent_ref_v0 *ref0;
7649 struct btrfs_key key;
7652 btrfs_item_key_to_cpu(leaf, &key, slot);
7653 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7654 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7655 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7658 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7659 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7665 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7666 struct btrfs_key *key,
7669 struct btrfs_chunk *ptr;
7670 struct chunk_record *rec;
7673 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7674 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7676 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7678 fprintf(stderr, "memory allocation failed\n");
7682 INIT_LIST_HEAD(&rec->list);
7683 INIT_LIST_HEAD(&rec->dextents);
7686 rec->cache.start = key->offset;
7687 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7689 rec->generation = btrfs_header_generation(leaf);
7691 rec->objectid = key->objectid;
7692 rec->type = key->type;
7693 rec->offset = key->offset;
7695 rec->length = rec->cache.size;
7696 rec->owner = btrfs_chunk_owner(leaf, ptr);
7697 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7698 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7699 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7700 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7701 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7702 rec->num_stripes = num_stripes;
7703 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7705 for (i = 0; i < rec->num_stripes; ++i) {
7706 rec->stripes[i].devid =
7707 btrfs_stripe_devid_nr(leaf, ptr, i);
7708 rec->stripes[i].offset =
7709 btrfs_stripe_offset_nr(leaf, ptr, i);
7710 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7711 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7718 static int process_chunk_item(struct cache_tree *chunk_cache,
7719 struct btrfs_key *key, struct extent_buffer *eb,
7722 struct chunk_record *rec;
7723 struct btrfs_chunk *chunk;
7726 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7728 * Do extra check for this chunk item,
7730 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7731 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7732 * and owner<->key_type check.
7734 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7737 error("chunk(%llu, %llu) is not valid, ignore it",
7738 key->offset, btrfs_chunk_length(eb, chunk));
7741 rec = btrfs_new_chunk_record(eb, key, slot);
7742 ret = insert_cache_extent(chunk_cache, &rec->cache);
7744 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7745 rec->offset, rec->length);
7752 static int process_device_item(struct rb_root *dev_cache,
7753 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7755 struct btrfs_dev_item *ptr;
7756 struct device_record *rec;
7759 ptr = btrfs_item_ptr(eb,
7760 slot, struct btrfs_dev_item);
7762 rec = malloc(sizeof(*rec));
7764 fprintf(stderr, "memory allocation failed\n");
7768 rec->devid = key->offset;
7769 rec->generation = btrfs_header_generation(eb);
7771 rec->objectid = key->objectid;
7772 rec->type = key->type;
7773 rec->offset = key->offset;
7775 rec->devid = btrfs_device_id(eb, ptr);
7776 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7777 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7779 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7781 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7788 struct block_group_record *
7789 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7792 struct btrfs_block_group_item *ptr;
7793 struct block_group_record *rec;
7795 rec = calloc(1, sizeof(*rec));
7797 fprintf(stderr, "memory allocation failed\n");
7801 rec->cache.start = key->objectid;
7802 rec->cache.size = key->offset;
7804 rec->generation = btrfs_header_generation(leaf);
7806 rec->objectid = key->objectid;
7807 rec->type = key->type;
7808 rec->offset = key->offset;
7810 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7811 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7813 INIT_LIST_HEAD(&rec->list);
7818 static int process_block_group_item(struct block_group_tree *block_group_cache,
7819 struct btrfs_key *key,
7820 struct extent_buffer *eb, int slot)
7822 struct block_group_record *rec;
7825 rec = btrfs_new_block_group_record(eb, key, slot);
7826 ret = insert_block_group_record(block_group_cache, rec);
7828 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7829 rec->objectid, rec->offset);
7836 struct device_extent_record *
7837 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7838 struct btrfs_key *key, int slot)
7840 struct device_extent_record *rec;
7841 struct btrfs_dev_extent *ptr;
7843 rec = calloc(1, sizeof(*rec));
7845 fprintf(stderr, "memory allocation failed\n");
7849 rec->cache.objectid = key->objectid;
7850 rec->cache.start = key->offset;
7852 rec->generation = btrfs_header_generation(leaf);
7854 rec->objectid = key->objectid;
7855 rec->type = key->type;
7856 rec->offset = key->offset;
7858 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7859 rec->chunk_objecteid =
7860 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7862 btrfs_dev_extent_chunk_offset(leaf, ptr);
7863 rec->length = btrfs_dev_extent_length(leaf, ptr);
7864 rec->cache.size = rec->length;
7866 INIT_LIST_HEAD(&rec->chunk_list);
7867 INIT_LIST_HEAD(&rec->device_list);
7873 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7874 struct btrfs_key *key, struct extent_buffer *eb,
7877 struct device_extent_record *rec;
7880 rec = btrfs_new_device_extent_record(eb, key, slot);
7881 ret = insert_device_extent_record(dev_extent_cache, rec);
7884 "Device extent[%llu, %llu, %llu] existed.\n",
7885 rec->objectid, rec->offset, rec->length);
7892 static int process_extent_item(struct btrfs_root *root,
7893 struct cache_tree *extent_cache,
7894 struct extent_buffer *eb, int slot)
7896 struct btrfs_extent_item *ei;
7897 struct btrfs_extent_inline_ref *iref;
7898 struct btrfs_extent_data_ref *dref;
7899 struct btrfs_shared_data_ref *sref;
7900 struct btrfs_key key;
7901 struct extent_record tmpl;
7906 u32 item_size = btrfs_item_size_nr(eb, slot);
7912 btrfs_item_key_to_cpu(eb, &key, slot);
7914 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7916 num_bytes = root->fs_info->nodesize;
7918 num_bytes = key.offset;
7921 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7922 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7923 key.objectid, root->fs_info->sectorsize);
7926 if (item_size < sizeof(*ei)) {
7927 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7928 struct btrfs_extent_item_v0 *ei0;
7929 BUG_ON(item_size != sizeof(*ei0));
7930 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7931 refs = btrfs_extent_refs_v0(eb, ei0);
7935 memset(&tmpl, 0, sizeof(tmpl));
7936 tmpl.start = key.objectid;
7937 tmpl.nr = num_bytes;
7938 tmpl.extent_item_refs = refs;
7939 tmpl.metadata = metadata;
7941 tmpl.max_size = num_bytes;
7943 return add_extent_rec(extent_cache, &tmpl);
7946 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7947 refs = btrfs_extent_refs(eb, ei);
7948 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7952 if (metadata && num_bytes != root->fs_info->nodesize) {
7953 error("ignore invalid metadata extent, length %llu does not equal to %u",
7954 num_bytes, root->fs_info->nodesize);
7957 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7958 error("ignore invalid data extent, length %llu is not aligned to %u",
7959 num_bytes, root->fs_info->sectorsize);
7963 memset(&tmpl, 0, sizeof(tmpl));
7964 tmpl.start = key.objectid;
7965 tmpl.nr = num_bytes;
7966 tmpl.extent_item_refs = refs;
7967 tmpl.metadata = metadata;
7969 tmpl.max_size = num_bytes;
7970 add_extent_rec(extent_cache, &tmpl);
7972 ptr = (unsigned long)(ei + 1);
7973 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7974 key.type == BTRFS_EXTENT_ITEM_KEY)
7975 ptr += sizeof(struct btrfs_tree_block_info);
7977 end = (unsigned long)ei + item_size;
7979 iref = (struct btrfs_extent_inline_ref *)ptr;
7980 type = btrfs_extent_inline_ref_type(eb, iref);
7981 offset = btrfs_extent_inline_ref_offset(eb, iref);
7983 case BTRFS_TREE_BLOCK_REF_KEY:
7984 ret = add_tree_backref(extent_cache, key.objectid,
7988 "add_tree_backref failed (extent items tree block): %s",
7991 case BTRFS_SHARED_BLOCK_REF_KEY:
7992 ret = add_tree_backref(extent_cache, key.objectid,
7996 "add_tree_backref failed (extent items shared block): %s",
7999 case BTRFS_EXTENT_DATA_REF_KEY:
8000 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8001 add_data_backref(extent_cache, key.objectid, 0,
8002 btrfs_extent_data_ref_root(eb, dref),
8003 btrfs_extent_data_ref_objectid(eb,
8005 btrfs_extent_data_ref_offset(eb, dref),
8006 btrfs_extent_data_ref_count(eb, dref),
8009 case BTRFS_SHARED_DATA_REF_KEY:
8010 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8011 add_data_backref(extent_cache, key.objectid, offset,
8013 btrfs_shared_data_ref_count(eb, sref),
8017 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8018 key.objectid, key.type, num_bytes);
8021 ptr += btrfs_extent_inline_ref_size(type);
8028 static int check_cache_range(struct btrfs_root *root,
8029 struct btrfs_block_group_cache *cache,
8030 u64 offset, u64 bytes)
8032 struct btrfs_free_space *entry;
8038 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8039 bytenr = btrfs_sb_offset(i);
8040 ret = btrfs_rmap_block(root->fs_info,
8041 cache->key.objectid, bytenr, 0,
8042 &logical, &nr, &stripe_len);
8047 if (logical[nr] + stripe_len <= offset)
8049 if (offset + bytes <= logical[nr])
8051 if (logical[nr] == offset) {
8052 if (stripe_len >= bytes) {
8056 bytes -= stripe_len;
8057 offset += stripe_len;
8058 } else if (logical[nr] < offset) {
8059 if (logical[nr] + stripe_len >=
8064 bytes = (offset + bytes) -
8065 (logical[nr] + stripe_len);
8066 offset = logical[nr] + stripe_len;
8069 * Could be tricky, the super may land in the
8070 * middle of the area we're checking. First
8071 * check the easiest case, it's at the end.
8073 if (logical[nr] + stripe_len >=
8075 bytes = logical[nr] - offset;
8079 /* Check the left side */
8080 ret = check_cache_range(root, cache,
8082 logical[nr] - offset);
8088 /* Now we continue with the right side */
8089 bytes = (offset + bytes) -
8090 (logical[nr] + stripe_len);
8091 offset = logical[nr] + stripe_len;
8098 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8100 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8101 offset, offset+bytes);
8105 if (entry->offset != offset) {
8106 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8111 if (entry->bytes != bytes) {
8112 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8113 bytes, entry->bytes, offset);
8117 unlink_free_space(cache->free_space_ctl, entry);
8122 static int verify_space_cache(struct btrfs_root *root,
8123 struct btrfs_block_group_cache *cache)
8125 struct btrfs_path path;
8126 struct extent_buffer *leaf;
8127 struct btrfs_key key;
8131 root = root->fs_info->extent_root;
8133 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8135 btrfs_init_path(&path);
8136 key.objectid = last;
8138 key.type = BTRFS_EXTENT_ITEM_KEY;
8139 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8144 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8145 ret = btrfs_next_leaf(root, &path);
8153 leaf = path.nodes[0];
8154 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8155 if (key.objectid >= cache->key.offset + cache->key.objectid)
8157 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8158 key.type != BTRFS_METADATA_ITEM_KEY) {
8163 if (last == key.objectid) {
8164 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8165 last = key.objectid + key.offset;
8167 last = key.objectid + root->fs_info->nodesize;
8172 ret = check_cache_range(root, cache, last,
8173 key.objectid - last);
8176 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8177 last = key.objectid + key.offset;
8179 last = key.objectid + root->fs_info->nodesize;
8183 if (last < cache->key.objectid + cache->key.offset)
8184 ret = check_cache_range(root, cache, last,
8185 cache->key.objectid +
8186 cache->key.offset - last);
8189 btrfs_release_path(&path);
8192 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8193 fprintf(stderr, "There are still entries left in the space "
8201 static int check_space_cache(struct btrfs_root *root)
8203 struct btrfs_block_group_cache *cache;
8204 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8208 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8209 btrfs_super_generation(root->fs_info->super_copy) !=
8210 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8211 printf("cache and super generation don't match, space cache "
8212 "will be invalidated\n");
8216 if (ctx.progress_enabled) {
8217 ctx.tp = TASK_FREE_SPACE;
8218 task_start(ctx.info);
8222 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8226 start = cache->key.objectid + cache->key.offset;
8227 if (!cache->free_space_ctl) {
8228 if (btrfs_init_free_space_ctl(cache,
8229 root->fs_info->sectorsize)) {
8234 btrfs_remove_free_space_cache(cache);
8237 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8238 ret = exclude_super_stripes(root, cache);
8240 fprintf(stderr, "could not exclude super stripes: %s\n",
8245 ret = load_free_space_tree(root->fs_info, cache);
8246 free_excluded_extents(root, cache);
8248 fprintf(stderr, "could not load free space tree: %s\n",
8255 ret = load_free_space_cache(root->fs_info, cache);
8260 ret = verify_space_cache(root, cache);
8262 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8263 cache->key.objectid);
8268 task_stop(ctx.info);
8270 return error ? -EINVAL : 0;
8273 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8274 u64 num_bytes, unsigned long leaf_offset,
8275 struct extent_buffer *eb) {
8277 struct btrfs_fs_info *fs_info = root->fs_info;
8279 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8281 unsigned long csum_offset;
8285 u64 data_checked = 0;
8291 if (num_bytes % fs_info->sectorsize)
8294 data = malloc(num_bytes);
8298 while (offset < num_bytes) {
8301 read_len = num_bytes - offset;
8302 /* read as much space once a time */
8303 ret = read_extent_data(fs_info, data + offset,
8304 bytenr + offset, &read_len, mirror);
8308 /* verify every 4k data's checksum */
8309 while (data_checked < read_len) {
8311 tmp = offset + data_checked;
8313 csum = btrfs_csum_data((char *)data + tmp,
8314 csum, fs_info->sectorsize);
8315 btrfs_csum_final(csum, (u8 *)&csum);
8317 csum_offset = leaf_offset +
8318 tmp / fs_info->sectorsize * csum_size;
8319 read_extent_buffer(eb, (char *)&csum_expected,
8320 csum_offset, csum_size);
8321 /* try another mirror */
8322 if (csum != csum_expected) {
8323 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8324 mirror, bytenr + tmp,
8325 csum, csum_expected);
8326 num_copies = btrfs_num_copies(root->fs_info,
8328 if (mirror < num_copies - 1) {
8333 data_checked += fs_info->sectorsize;
8342 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8345 struct btrfs_path path;
8346 struct extent_buffer *leaf;
8347 struct btrfs_key key;
8350 btrfs_init_path(&path);
8351 key.objectid = bytenr;
8352 key.type = BTRFS_EXTENT_ITEM_KEY;
8353 key.offset = (u64)-1;
8356 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8359 fprintf(stderr, "Error looking up extent record %d\n", ret);
8360 btrfs_release_path(&path);
8363 if (path.slots[0] > 0) {
8366 ret = btrfs_prev_leaf(root, &path);
8369 } else if (ret > 0) {
8376 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8379 * Block group items come before extent items if they have the same
8380 * bytenr, so walk back one more just in case. Dear future traveller,
8381 * first congrats on mastering time travel. Now if it's not too much
8382 * trouble could you go back to 2006 and tell Chris to make the
8383 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8384 * EXTENT_ITEM_KEY please?
8386 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8387 if (path.slots[0] > 0) {
8390 ret = btrfs_prev_leaf(root, &path);
8393 } else if (ret > 0) {
8398 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8402 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8403 ret = btrfs_next_leaf(root, &path);
8405 fprintf(stderr, "Error going to next leaf "
8407 btrfs_release_path(&path);
8413 leaf = path.nodes[0];
8414 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8415 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8419 if (key.objectid + key.offset < bytenr) {
8423 if (key.objectid > bytenr + num_bytes)
8426 if (key.objectid == bytenr) {
8427 if (key.offset >= num_bytes) {
8431 num_bytes -= key.offset;
8432 bytenr += key.offset;
8433 } else if (key.objectid < bytenr) {
8434 if (key.objectid + key.offset >= bytenr + num_bytes) {
8438 num_bytes = (bytenr + num_bytes) -
8439 (key.objectid + key.offset);
8440 bytenr = key.objectid + key.offset;
8442 if (key.objectid + key.offset < bytenr + num_bytes) {
8443 u64 new_start = key.objectid + key.offset;
8444 u64 new_bytes = bytenr + num_bytes - new_start;
8447 * Weird case, the extent is in the middle of
8448 * our range, we'll have to search one side
8449 * and then the other. Not sure if this happens
8450 * in real life, but no harm in coding it up
8451 * anyway just in case.
8453 btrfs_release_path(&path);
8454 ret = check_extent_exists(root, new_start,
8457 fprintf(stderr, "Right section didn't "
8461 num_bytes = key.objectid - bytenr;
8464 num_bytes = key.objectid - bytenr;
8471 if (num_bytes && !ret) {
8472 fprintf(stderr, "There are no extents for csum range "
8473 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8477 btrfs_release_path(&path);
8481 static int check_csums(struct btrfs_root *root)
8483 struct btrfs_path path;
8484 struct extent_buffer *leaf;
8485 struct btrfs_key key;
8486 u64 offset = 0, num_bytes = 0;
8487 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8491 unsigned long leaf_offset;
8493 root = root->fs_info->csum_root;
8494 if (!extent_buffer_uptodate(root->node)) {
8495 fprintf(stderr, "No valid csum tree found\n");
8499 btrfs_init_path(&path);
8500 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8501 key.type = BTRFS_EXTENT_CSUM_KEY;
8503 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8505 fprintf(stderr, "Error searching csum tree %d\n", ret);
8506 btrfs_release_path(&path);
8510 if (ret > 0 && path.slots[0])
8515 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8516 ret = btrfs_next_leaf(root, &path);
8518 fprintf(stderr, "Error going to next leaf "
8525 leaf = path.nodes[0];
8527 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8528 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8533 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8534 csum_size) * root->fs_info->sectorsize;
8535 if (!check_data_csum)
8536 goto skip_csum_check;
8537 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8538 ret = check_extent_csums(root, key.offset, data_len,
8544 offset = key.offset;
8545 } else if (key.offset != offset + num_bytes) {
8546 ret = check_extent_exists(root, offset, num_bytes);
8548 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8549 "there is no extent record\n",
8550 offset, offset+num_bytes);
8553 offset = key.offset;
8556 num_bytes += data_len;
8560 btrfs_release_path(&path);
8564 static int is_dropped_key(struct btrfs_key *key,
8565 struct btrfs_key *drop_key) {
8566 if (key->objectid < drop_key->objectid)
8568 else if (key->objectid == drop_key->objectid) {
8569 if (key->type < drop_key->type)
8571 else if (key->type == drop_key->type) {
8572 if (key->offset < drop_key->offset)
8580 * Here are the rules for FULL_BACKREF.
8582 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8583 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8585 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8586 * if it happened after the relocation occurred since we'll have dropped the
8587 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8588 * have no real way to know for sure.
8590 * We process the blocks one root at a time, and we start from the lowest root
8591 * objectid and go to the highest. So we can just lookup the owner backref for
8592 * the record and if we don't find it then we know it doesn't exist and we have
8595 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8596 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8597 * be set or not and then we can check later once we've gathered all the refs.
8599 static int calc_extent_flag(struct cache_tree *extent_cache,
8600 struct extent_buffer *buf,
8601 struct root_item_record *ri,
8604 struct extent_record *rec;
8605 struct cache_extent *cache;
8606 struct tree_backref *tback;
8609 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8610 /* we have added this extent before */
8614 rec = container_of(cache, struct extent_record, cache);
8617 * Except file/reloc tree, we can not have
8620 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8625 if (buf->start == ri->bytenr)
8628 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8631 owner = btrfs_header_owner(buf);
8632 if (owner == ri->objectid)
8635 tback = find_tree_backref(rec, 0, owner);
8640 if (rec->flag_block_full_backref != FLAG_UNSET &&
8641 rec->flag_block_full_backref != 0)
8642 rec->bad_full_backref = 1;
8645 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8646 if (rec->flag_block_full_backref != FLAG_UNSET &&
8647 rec->flag_block_full_backref != 1)
8648 rec->bad_full_backref = 1;
8652 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8654 fprintf(stderr, "Invalid key type(");
8655 print_key_type(stderr, 0, key_type);
8656 fprintf(stderr, ") found in root(");
8657 print_objectid(stderr, rootid, 0);
8658 fprintf(stderr, ")\n");
8662 * Check if the key is valid with its extent buffer.
8664 * This is a early check in case invalid key exists in a extent buffer
8665 * This is not comprehensive yet, but should prevent wrong key/item passed
8668 static int check_type_with_root(u64 rootid, u8 key_type)
8671 /* Only valid in chunk tree */
8672 case BTRFS_DEV_ITEM_KEY:
8673 case BTRFS_CHUNK_ITEM_KEY:
8674 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8677 /* valid in csum and log tree */
8678 case BTRFS_CSUM_TREE_OBJECTID:
8679 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8683 case BTRFS_EXTENT_ITEM_KEY:
8684 case BTRFS_METADATA_ITEM_KEY:
8685 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8686 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8689 case BTRFS_ROOT_ITEM_KEY:
8690 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8693 case BTRFS_DEV_EXTENT_KEY:
8694 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8700 report_mismatch_key_root(key_type, rootid);
8704 static int run_next_block(struct btrfs_root *root,
8705 struct block_info *bits,
8708 struct cache_tree *pending,
8709 struct cache_tree *seen,
8710 struct cache_tree *reada,
8711 struct cache_tree *nodes,
8712 struct cache_tree *extent_cache,
8713 struct cache_tree *chunk_cache,
8714 struct rb_root *dev_cache,
8715 struct block_group_tree *block_group_cache,
8716 struct device_extent_tree *dev_extent_cache,
8717 struct root_item_record *ri)
8719 struct btrfs_fs_info *fs_info = root->fs_info;
8720 struct extent_buffer *buf;
8721 struct extent_record *rec = NULL;
8732 struct btrfs_key key;
8733 struct cache_extent *cache;
8736 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8737 bits_nr, &reada_bits);
8742 for(i = 0; i < nritems; i++) {
8743 ret = add_cache_extent(reada, bits[i].start,
8748 /* fixme, get the parent transid */
8749 readahead_tree_block(fs_info, bits[i].start, 0);
8752 *last = bits[0].start;
8753 bytenr = bits[0].start;
8754 size = bits[0].size;
8756 cache = lookup_cache_extent(pending, bytenr, size);
8758 remove_cache_extent(pending, cache);
8761 cache = lookup_cache_extent(reada, bytenr, size);
8763 remove_cache_extent(reada, cache);
8766 cache = lookup_cache_extent(nodes, bytenr, size);
8768 remove_cache_extent(nodes, cache);
8771 cache = lookup_cache_extent(extent_cache, bytenr, size);
8773 rec = container_of(cache, struct extent_record, cache);
8774 gen = rec->parent_generation;
8777 /* fixme, get the real parent transid */
8778 buf = read_tree_block(root->fs_info, bytenr, gen);
8779 if (!extent_buffer_uptodate(buf)) {
8780 record_bad_block_io(root->fs_info,
8781 extent_cache, bytenr, size);
8785 nritems = btrfs_header_nritems(buf);
8788 if (!init_extent_tree) {
8789 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8790 btrfs_header_level(buf), 1, NULL,
8793 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8795 fprintf(stderr, "Couldn't calc extent flags\n");
8796 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8801 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8803 fprintf(stderr, "Couldn't calc extent flags\n");
8804 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8808 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8810 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8811 ri->objectid == btrfs_header_owner(buf)) {
8813 * Ok we got to this block from it's original owner and
8814 * we have FULL_BACKREF set. Relocation can leave
8815 * converted blocks over so this is altogether possible,
8816 * however it's not possible if the generation > the
8817 * last snapshot, so check for this case.
8819 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8820 btrfs_header_generation(buf) > ri->last_snapshot) {
8821 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8822 rec->bad_full_backref = 1;
8827 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8828 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8829 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8830 rec->bad_full_backref = 1;
8834 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8835 rec->flag_block_full_backref = 1;
8839 rec->flag_block_full_backref = 0;
8841 owner = btrfs_header_owner(buf);
8844 ret = check_block(root, extent_cache, buf, flags);
8848 if (btrfs_is_leaf(buf)) {
8849 btree_space_waste += btrfs_leaf_free_space(root, buf);
8850 for (i = 0; i < nritems; i++) {
8851 struct btrfs_file_extent_item *fi;
8852 btrfs_item_key_to_cpu(buf, &key, i);
8854 * Check key type against the leaf owner.
8855 * Could filter quite a lot of early error if
8858 if (check_type_with_root(btrfs_header_owner(buf),
8860 fprintf(stderr, "ignoring invalid key\n");
8863 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8864 process_extent_item(root, extent_cache, buf,
8868 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8869 process_extent_item(root, extent_cache, buf,
8873 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8875 btrfs_item_size_nr(buf, i);
8878 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8879 process_chunk_item(chunk_cache, &key, buf, i);
8882 if (key.type == BTRFS_DEV_ITEM_KEY) {
8883 process_device_item(dev_cache, &key, buf, i);
8886 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8887 process_block_group_item(block_group_cache,
8891 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8892 process_device_extent_item(dev_extent_cache,
8897 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8898 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8899 process_extent_ref_v0(extent_cache, buf, i);
8906 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8907 ret = add_tree_backref(extent_cache,
8908 key.objectid, 0, key.offset, 0);
8911 "add_tree_backref failed (leaf tree block): %s",
8915 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8916 ret = add_tree_backref(extent_cache,
8917 key.objectid, key.offset, 0, 0);
8920 "add_tree_backref failed (leaf shared block): %s",
8924 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8925 struct btrfs_extent_data_ref *ref;
8926 ref = btrfs_item_ptr(buf, i,
8927 struct btrfs_extent_data_ref);
8928 add_data_backref(extent_cache,
8930 btrfs_extent_data_ref_root(buf, ref),
8931 btrfs_extent_data_ref_objectid(buf,
8933 btrfs_extent_data_ref_offset(buf, ref),
8934 btrfs_extent_data_ref_count(buf, ref),
8935 0, root->fs_info->sectorsize);
8938 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8939 struct btrfs_shared_data_ref *ref;
8940 ref = btrfs_item_ptr(buf, i,
8941 struct btrfs_shared_data_ref);
8942 add_data_backref(extent_cache,
8943 key.objectid, key.offset, 0, 0, 0,
8944 btrfs_shared_data_ref_count(buf, ref),
8945 0, root->fs_info->sectorsize);
8948 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8949 struct bad_item *bad;
8951 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8955 bad = malloc(sizeof(struct bad_item));
8958 INIT_LIST_HEAD(&bad->list);
8959 memcpy(&bad->key, &key,
8960 sizeof(struct btrfs_key));
8961 bad->root_id = owner;
8962 list_add_tail(&bad->list, &delete_items);
8965 if (key.type != BTRFS_EXTENT_DATA_KEY)
8967 fi = btrfs_item_ptr(buf, i,
8968 struct btrfs_file_extent_item);
8969 if (btrfs_file_extent_type(buf, fi) ==
8970 BTRFS_FILE_EXTENT_INLINE)
8972 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8975 data_bytes_allocated +=
8976 btrfs_file_extent_disk_num_bytes(buf, fi);
8977 if (data_bytes_allocated < root->fs_info->sectorsize) {
8980 data_bytes_referenced +=
8981 btrfs_file_extent_num_bytes(buf, fi);
8982 add_data_backref(extent_cache,
8983 btrfs_file_extent_disk_bytenr(buf, fi),
8984 parent, owner, key.objectid, key.offset -
8985 btrfs_file_extent_offset(buf, fi), 1, 1,
8986 btrfs_file_extent_disk_num_bytes(buf, fi));
8990 struct btrfs_key first_key;
8992 first_key.objectid = 0;
8995 btrfs_item_key_to_cpu(buf, &first_key, 0);
8996 level = btrfs_header_level(buf);
8997 for (i = 0; i < nritems; i++) {
8998 struct extent_record tmpl;
9000 ptr = btrfs_node_blockptr(buf, i);
9001 size = root->fs_info->nodesize;
9002 btrfs_node_key_to_cpu(buf, &key, i);
9004 if ((level == ri->drop_level)
9005 && is_dropped_key(&key, &ri->drop_key)) {
9010 memset(&tmpl, 0, sizeof(tmpl));
9011 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9012 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9017 tmpl.max_size = size;
9018 ret = add_extent_rec(extent_cache, &tmpl);
9022 ret = add_tree_backref(extent_cache, ptr, parent,
9026 "add_tree_backref failed (non-leaf block): %s",
9032 add_pending(nodes, seen, ptr, size);
9034 add_pending(pending, seen, ptr, size);
9037 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9038 nritems) * sizeof(struct btrfs_key_ptr);
9040 total_btree_bytes += buf->len;
9041 if (fs_root_objectid(btrfs_header_owner(buf)))
9042 total_fs_tree_bytes += buf->len;
9043 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9044 total_extent_tree_bytes += buf->len;
9046 free_extent_buffer(buf);
9050 static int add_root_to_pending(struct extent_buffer *buf,
9051 struct cache_tree *extent_cache,
9052 struct cache_tree *pending,
9053 struct cache_tree *seen,
9054 struct cache_tree *nodes,
9057 struct extent_record tmpl;
9060 if (btrfs_header_level(buf) > 0)
9061 add_pending(nodes, seen, buf->start, buf->len);
9063 add_pending(pending, seen, buf->start, buf->len);
9065 memset(&tmpl, 0, sizeof(tmpl));
9066 tmpl.start = buf->start;
9071 tmpl.max_size = buf->len;
9072 add_extent_rec(extent_cache, &tmpl);
9074 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9075 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9076 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9079 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9084 /* as we fix the tree, we might be deleting blocks that
9085 * we're tracking for repair. This hook makes sure we
9086 * remove any backrefs for blocks as we are fixing them.
9088 static int free_extent_hook(struct btrfs_trans_handle *trans,
9089 struct btrfs_root *root,
9090 u64 bytenr, u64 num_bytes, u64 parent,
9091 u64 root_objectid, u64 owner, u64 offset,
9094 struct extent_record *rec;
9095 struct cache_extent *cache;
9097 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9099 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9100 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9104 rec = container_of(cache, struct extent_record, cache);
9106 struct data_backref *back;
9107 back = find_data_backref(rec, parent, root_objectid, owner,
9108 offset, 1, bytenr, num_bytes);
9111 if (back->node.found_ref) {
9112 back->found_ref -= refs_to_drop;
9114 rec->refs -= refs_to_drop;
9116 if (back->node.found_extent_tree) {
9117 back->num_refs -= refs_to_drop;
9118 if (rec->extent_item_refs)
9119 rec->extent_item_refs -= refs_to_drop;
9121 if (back->found_ref == 0)
9122 back->node.found_ref = 0;
9123 if (back->num_refs == 0)
9124 back->node.found_extent_tree = 0;
9126 if (!back->node.found_extent_tree && back->node.found_ref) {
9127 rb_erase(&back->node.node, &rec->backref_tree);
9131 struct tree_backref *back;
9132 back = find_tree_backref(rec, parent, root_objectid);
9135 if (back->node.found_ref) {
9138 back->node.found_ref = 0;
9140 if (back->node.found_extent_tree) {
9141 if (rec->extent_item_refs)
9142 rec->extent_item_refs--;
9143 back->node.found_extent_tree = 0;
9145 if (!back->node.found_extent_tree && back->node.found_ref) {
9146 rb_erase(&back->node.node, &rec->backref_tree);
9150 maybe_free_extent_rec(extent_cache, rec);
9155 static int delete_extent_records(struct btrfs_trans_handle *trans,
9156 struct btrfs_root *root,
9157 struct btrfs_path *path,
9160 struct btrfs_key key;
9161 struct btrfs_key found_key;
9162 struct extent_buffer *leaf;
9167 key.objectid = bytenr;
9169 key.offset = (u64)-1;
9172 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9179 if (path->slots[0] == 0)
9185 leaf = path->nodes[0];
9186 slot = path->slots[0];
9188 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9189 if (found_key.objectid != bytenr)
9192 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9193 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9194 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9195 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9196 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9197 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9198 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9199 btrfs_release_path(path);
9200 if (found_key.type == 0) {
9201 if (found_key.offset == 0)
9203 key.offset = found_key.offset - 1;
9204 key.type = found_key.type;
9206 key.type = found_key.type - 1;
9207 key.offset = (u64)-1;
9211 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9212 found_key.objectid, found_key.type, found_key.offset);
9214 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9217 btrfs_release_path(path);
9219 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9220 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9221 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9222 found_key.offset : root->fs_info->nodesize;
9224 ret = btrfs_update_block_group(trans, root, bytenr,
9231 btrfs_release_path(path);
9236 * for a single backref, this will allocate a new extent
9237 * and add the backref to it.
9239 static int record_extent(struct btrfs_trans_handle *trans,
9240 struct btrfs_fs_info *info,
9241 struct btrfs_path *path,
9242 struct extent_record *rec,
9243 struct extent_backref *back,
9244 int allocated, u64 flags)
9247 struct btrfs_root *extent_root = info->extent_root;
9248 struct extent_buffer *leaf;
9249 struct btrfs_key ins_key;
9250 struct btrfs_extent_item *ei;
9251 struct data_backref *dback;
9252 struct btrfs_tree_block_info *bi;
9255 rec->max_size = max_t(u64, rec->max_size,
9259 u32 item_size = sizeof(*ei);
9262 item_size += sizeof(*bi);
9264 ins_key.objectid = rec->start;
9265 ins_key.offset = rec->max_size;
9266 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9268 ret = btrfs_insert_empty_item(trans, extent_root, path,
9269 &ins_key, item_size);
9273 leaf = path->nodes[0];
9274 ei = btrfs_item_ptr(leaf, path->slots[0],
9275 struct btrfs_extent_item);
9277 btrfs_set_extent_refs(leaf, ei, 0);
9278 btrfs_set_extent_generation(leaf, ei, rec->generation);
9280 if (back->is_data) {
9281 btrfs_set_extent_flags(leaf, ei,
9282 BTRFS_EXTENT_FLAG_DATA);
9284 struct btrfs_disk_key copy_key;;
9286 bi = (struct btrfs_tree_block_info *)(ei + 1);
9287 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9290 btrfs_set_disk_key_objectid(©_key,
9291 rec->info_objectid);
9292 btrfs_set_disk_key_type(©_key, 0);
9293 btrfs_set_disk_key_offset(©_key, 0);
9295 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9296 btrfs_set_tree_block_key(leaf, bi, ©_key);
9298 btrfs_set_extent_flags(leaf, ei,
9299 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9302 btrfs_mark_buffer_dirty(leaf);
9303 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9304 rec->max_size, 1, 0);
9307 btrfs_release_path(path);
9310 if (back->is_data) {
9314 dback = to_data_backref(back);
9315 if (back->full_backref)
9316 parent = dback->parent;
9320 for (i = 0; i < dback->found_ref; i++) {
9321 /* if parent != 0, we're doing a full backref
9322 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9323 * just makes the backref allocator create a data
9326 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9327 rec->start, rec->max_size,
9331 BTRFS_FIRST_FREE_OBJECTID :
9337 fprintf(stderr, "adding new data backref"
9338 " on %llu %s %llu owner %llu"
9339 " offset %llu found %d\n",
9340 (unsigned long long)rec->start,
9341 back->full_backref ?
9343 back->full_backref ?
9344 (unsigned long long)parent :
9345 (unsigned long long)dback->root,
9346 (unsigned long long)dback->owner,
9347 (unsigned long long)dback->offset,
9351 struct tree_backref *tback;
9353 tback = to_tree_backref(back);
9354 if (back->full_backref)
9355 parent = tback->parent;
9359 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9360 rec->start, rec->max_size,
9361 parent, tback->root, 0, 0);
9362 fprintf(stderr, "adding new tree backref on "
9363 "start %llu len %llu parent %llu root %llu\n",
9364 rec->start, rec->max_size, parent, tback->root);
9367 btrfs_release_path(path);
9371 static struct extent_entry *find_entry(struct list_head *entries,
9372 u64 bytenr, u64 bytes)
9374 struct extent_entry *entry = NULL;
9376 list_for_each_entry(entry, entries, list) {
9377 if (entry->bytenr == bytenr && entry->bytes == bytes)
9384 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9386 struct extent_entry *entry, *best = NULL, *prev = NULL;
9388 list_for_each_entry(entry, entries, list) {
9390 * If there are as many broken entries as entries then we know
9391 * not to trust this particular entry.
9393 if (entry->broken == entry->count)
9397 * Special case, when there are only two entries and 'best' is
9407 * If our current entry == best then we can't be sure our best
9408 * is really the best, so we need to keep searching.
9410 if (best && best->count == entry->count) {
9416 /* Prev == entry, not good enough, have to keep searching */
9417 if (!prev->broken && prev->count == entry->count)
9421 best = (prev->count > entry->count) ? prev : entry;
9422 else if (best->count < entry->count)
9430 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9431 struct data_backref *dback, struct extent_entry *entry)
9433 struct btrfs_trans_handle *trans;
9434 struct btrfs_root *root;
9435 struct btrfs_file_extent_item *fi;
9436 struct extent_buffer *leaf;
9437 struct btrfs_key key;
9441 key.objectid = dback->root;
9442 key.type = BTRFS_ROOT_ITEM_KEY;
9443 key.offset = (u64)-1;
9444 root = btrfs_read_fs_root(info, &key);
9446 fprintf(stderr, "Couldn't find root for our ref\n");
9451 * The backref points to the original offset of the extent if it was
9452 * split, so we need to search down to the offset we have and then walk
9453 * forward until we find the backref we're looking for.
9455 key.objectid = dback->owner;
9456 key.type = BTRFS_EXTENT_DATA_KEY;
9457 key.offset = dback->offset;
9458 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9460 fprintf(stderr, "Error looking up ref %d\n", ret);
9465 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9466 ret = btrfs_next_leaf(root, path);
9468 fprintf(stderr, "Couldn't find our ref, next\n");
9472 leaf = path->nodes[0];
9473 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9474 if (key.objectid != dback->owner ||
9475 key.type != BTRFS_EXTENT_DATA_KEY) {
9476 fprintf(stderr, "Couldn't find our ref, search\n");
9479 fi = btrfs_item_ptr(leaf, path->slots[0],
9480 struct btrfs_file_extent_item);
9481 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9482 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9484 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9489 btrfs_release_path(path);
9491 trans = btrfs_start_transaction(root, 1);
9493 return PTR_ERR(trans);
9496 * Ok we have the key of the file extent we want to fix, now we can cow
9497 * down to the thing and fix it.
9499 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9501 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9502 key.objectid, key.type, key.offset, ret);
9506 fprintf(stderr, "Well that's odd, we just found this key "
9507 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9512 leaf = path->nodes[0];
9513 fi = btrfs_item_ptr(leaf, path->slots[0],
9514 struct btrfs_file_extent_item);
9516 if (btrfs_file_extent_compression(leaf, fi) &&
9517 dback->disk_bytenr != entry->bytenr) {
9518 fprintf(stderr, "Ref doesn't match the record start and is "
9519 "compressed, please take a btrfs-image of this file "
9520 "system and send it to a btrfs developer so they can "
9521 "complete this functionality for bytenr %Lu\n",
9522 dback->disk_bytenr);
9527 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9528 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9529 } else if (dback->disk_bytenr > entry->bytenr) {
9530 u64 off_diff, offset;
9532 off_diff = dback->disk_bytenr - entry->bytenr;
9533 offset = btrfs_file_extent_offset(leaf, fi);
9534 if (dback->disk_bytenr + offset +
9535 btrfs_file_extent_num_bytes(leaf, fi) >
9536 entry->bytenr + entry->bytes) {
9537 fprintf(stderr, "Ref is past the entry end, please "
9538 "take a btrfs-image of this file system and "
9539 "send it to a btrfs developer, ref %Lu\n",
9540 dback->disk_bytenr);
9545 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9546 btrfs_set_file_extent_offset(leaf, fi, offset);
9547 } else if (dback->disk_bytenr < entry->bytenr) {
9550 offset = btrfs_file_extent_offset(leaf, fi);
9551 if (dback->disk_bytenr + offset < entry->bytenr) {
9552 fprintf(stderr, "Ref is before the entry start, please"
9553 " take a btrfs-image of this file system and "
9554 "send it to a btrfs developer, ref %Lu\n",
9555 dback->disk_bytenr);
9560 offset += dback->disk_bytenr;
9561 offset -= entry->bytenr;
9562 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9563 btrfs_set_file_extent_offset(leaf, fi, offset);
9566 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9569 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9570 * only do this if we aren't using compression, otherwise it's a
9573 if (!btrfs_file_extent_compression(leaf, fi))
9574 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9576 printf("ram bytes may be wrong?\n");
9577 btrfs_mark_buffer_dirty(leaf);
9579 err = btrfs_commit_transaction(trans, root);
9580 btrfs_release_path(path);
9581 return ret ? ret : err;
9584 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9585 struct extent_record *rec)
9587 struct extent_backref *back, *tmp;
9588 struct data_backref *dback;
9589 struct extent_entry *entry, *best = NULL;
9592 int broken_entries = 0;
9597 * Metadata is easy and the backrefs should always agree on bytenr and
9598 * size, if not we've got bigger issues.
9603 rbtree_postorder_for_each_entry_safe(back, tmp,
9604 &rec->backref_tree, node) {
9605 if (back->full_backref || !back->is_data)
9608 dback = to_data_backref(back);
9611 * We only pay attention to backrefs that we found a real
9614 if (dback->found_ref == 0)
9618 * For now we only catch when the bytes don't match, not the
9619 * bytenr. We can easily do this at the same time, but I want
9620 * to have a fs image to test on before we just add repair
9621 * functionality willy-nilly so we know we won't screw up the
9625 entry = find_entry(&entries, dback->disk_bytenr,
9628 entry = malloc(sizeof(struct extent_entry));
9633 memset(entry, 0, sizeof(*entry));
9634 entry->bytenr = dback->disk_bytenr;
9635 entry->bytes = dback->bytes;
9636 list_add_tail(&entry->list, &entries);
9641 * If we only have on entry we may think the entries agree when
9642 * in reality they don't so we have to do some extra checking.
9644 if (dback->disk_bytenr != rec->start ||
9645 dback->bytes != rec->nr || back->broken)
9656 /* Yay all the backrefs agree, carry on good sir */
9657 if (nr_entries <= 1 && !mismatch)
9660 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9661 "%Lu\n", rec->start);
9664 * First we want to see if the backrefs can agree amongst themselves who
9665 * is right, so figure out which one of the entries has the highest
9668 best = find_most_right_entry(&entries);
9671 * Ok so we may have an even split between what the backrefs think, so
9672 * this is where we use the extent ref to see what it thinks.
9675 entry = find_entry(&entries, rec->start, rec->nr);
9676 if (!entry && (!broken_entries || !rec->found_rec)) {
9677 fprintf(stderr, "Backrefs don't agree with each other "
9678 "and extent record doesn't agree with anybody,"
9679 " so we can't fix bytenr %Lu bytes %Lu\n",
9680 rec->start, rec->nr);
9683 } else if (!entry) {
9685 * Ok our backrefs were broken, we'll assume this is the
9686 * correct value and add an entry for this range.
9688 entry = malloc(sizeof(struct extent_entry));
9693 memset(entry, 0, sizeof(*entry));
9694 entry->bytenr = rec->start;
9695 entry->bytes = rec->nr;
9696 list_add_tail(&entry->list, &entries);
9700 best = find_most_right_entry(&entries);
9702 fprintf(stderr, "Backrefs and extent record evenly "
9703 "split on who is right, this is going to "
9704 "require user input to fix bytenr %Lu bytes "
9705 "%Lu\n", rec->start, rec->nr);
9712 * I don't think this can happen currently as we'll abort() if we catch
9713 * this case higher up, but in case somebody removes that we still can't
9714 * deal with it properly here yet, so just bail out of that's the case.
9716 if (best->bytenr != rec->start) {
9717 fprintf(stderr, "Extent start and backref starts don't match, "
9718 "please use btrfs-image on this file system and send "
9719 "it to a btrfs developer so they can make fsck fix "
9720 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9721 rec->start, rec->nr);
9727 * Ok great we all agreed on an extent record, let's go find the real
9728 * references and fix up the ones that don't match.
9730 rbtree_postorder_for_each_entry_safe(back, tmp,
9731 &rec->backref_tree, node) {
9732 if (back->full_backref || !back->is_data)
9735 dback = to_data_backref(back);
9738 * Still ignoring backrefs that don't have a real ref attached
9741 if (dback->found_ref == 0)
9744 if (dback->bytes == best->bytes &&
9745 dback->disk_bytenr == best->bytenr)
9748 ret = repair_ref(info, path, dback, best);
9754 * Ok we messed with the actual refs, which means we need to drop our
9755 * entire cache and go back and rescan. I know this is a huge pain and
9756 * adds a lot of extra work, but it's the only way to be safe. Once all
9757 * the backrefs agree we may not need to do anything to the extent
9762 while (!list_empty(&entries)) {
9763 entry = list_entry(entries.next, struct extent_entry, list);
9764 list_del_init(&entry->list);
9770 static int process_duplicates(struct cache_tree *extent_cache,
9771 struct extent_record *rec)
9773 struct extent_record *good, *tmp;
9774 struct cache_extent *cache;
9778 * If we found a extent record for this extent then return, or if we
9779 * have more than one duplicate we are likely going to need to delete
9782 if (rec->found_rec || rec->num_duplicates > 1)
9785 /* Shouldn't happen but just in case */
9786 BUG_ON(!rec->num_duplicates);
9789 * So this happens if we end up with a backref that doesn't match the
9790 * actual extent entry. So either the backref is bad or the extent
9791 * entry is bad. Either way we want to have the extent_record actually
9792 * reflect what we found in the extent_tree, so we need to take the
9793 * duplicate out and use that as the extent_record since the only way we
9794 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9796 remove_cache_extent(extent_cache, &rec->cache);
9798 good = to_extent_record(rec->dups.next);
9799 list_del_init(&good->list);
9800 INIT_LIST_HEAD(&good->backrefs);
9801 INIT_LIST_HEAD(&good->dups);
9802 good->cache.start = good->start;
9803 good->cache.size = good->nr;
9804 good->content_checked = 0;
9805 good->owner_ref_checked = 0;
9806 good->num_duplicates = 0;
9807 good->refs = rec->refs;
9808 list_splice_init(&rec->backrefs, &good->backrefs);
9810 cache = lookup_cache_extent(extent_cache, good->start,
9814 tmp = container_of(cache, struct extent_record, cache);
9817 * If we find another overlapping extent and it's found_rec is
9818 * set then it's a duplicate and we need to try and delete
9821 if (tmp->found_rec || tmp->num_duplicates > 0) {
9822 if (list_empty(&good->list))
9823 list_add_tail(&good->list,
9824 &duplicate_extents);
9825 good->num_duplicates += tmp->num_duplicates + 1;
9826 list_splice_init(&tmp->dups, &good->dups);
9827 list_del_init(&tmp->list);
9828 list_add_tail(&tmp->list, &good->dups);
9829 remove_cache_extent(extent_cache, &tmp->cache);
9834 * Ok we have another non extent item backed extent rec, so lets
9835 * just add it to this extent and carry on like we did above.
9837 good->refs += tmp->refs;
9838 list_splice_init(&tmp->backrefs, &good->backrefs);
9839 remove_cache_extent(extent_cache, &tmp->cache);
9842 ret = insert_cache_extent(extent_cache, &good->cache);
9845 return good->num_duplicates ? 0 : 1;
9848 static int delete_duplicate_records(struct btrfs_root *root,
9849 struct extent_record *rec)
9851 struct btrfs_trans_handle *trans;
9852 LIST_HEAD(delete_list);
9853 struct btrfs_path path;
9854 struct extent_record *tmp, *good, *n;
9857 struct btrfs_key key;
9859 btrfs_init_path(&path);
9862 /* Find the record that covers all of the duplicates. */
9863 list_for_each_entry(tmp, &rec->dups, list) {
9864 if (good->start < tmp->start)
9866 if (good->nr > tmp->nr)
9869 if (tmp->start + tmp->nr < good->start + good->nr) {
9870 fprintf(stderr, "Ok we have overlapping extents that "
9871 "aren't completely covered by each other, this "
9872 "is going to require more careful thought. "
9873 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9874 tmp->start, tmp->nr, good->start, good->nr);
9881 list_add_tail(&rec->list, &delete_list);
9883 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9886 list_move_tail(&tmp->list, &delete_list);
9889 root = root->fs_info->extent_root;
9890 trans = btrfs_start_transaction(root, 1);
9891 if (IS_ERR(trans)) {
9892 ret = PTR_ERR(trans);
9896 list_for_each_entry(tmp, &delete_list, list) {
9897 if (tmp->found_rec == 0)
9899 key.objectid = tmp->start;
9900 key.type = BTRFS_EXTENT_ITEM_KEY;
9901 key.offset = tmp->nr;
9903 /* Shouldn't happen but just in case */
9904 if (tmp->metadata) {
9905 fprintf(stderr, "Well this shouldn't happen, extent "
9906 "record overlaps but is metadata? "
9907 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9911 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9917 ret = btrfs_del_item(trans, root, &path);
9920 btrfs_release_path(&path);
9923 err = btrfs_commit_transaction(trans, root);
9927 while (!list_empty(&delete_list)) {
9928 tmp = to_extent_record(delete_list.next);
9929 list_del_init(&tmp->list);
9935 while (!list_empty(&rec->dups)) {
9936 tmp = to_extent_record(rec->dups.next);
9937 list_del_init(&tmp->list);
9941 btrfs_release_path(&path);
9943 if (!ret && !nr_del)
9944 rec->num_duplicates = 0;
9946 return ret ? ret : nr_del;
9949 static int find_possible_backrefs(struct btrfs_fs_info *info,
9950 struct btrfs_path *path,
9951 struct cache_tree *extent_cache,
9952 struct extent_record *rec)
9954 struct btrfs_root *root;
9955 struct extent_backref *back, *tmp;
9956 struct data_backref *dback;
9957 struct cache_extent *cache;
9958 struct btrfs_file_extent_item *fi;
9959 struct btrfs_key key;
9963 rbtree_postorder_for_each_entry_safe(back, tmp,
9964 &rec->backref_tree, node) {
9965 /* Don't care about full backrefs (poor unloved backrefs) */
9966 if (back->full_backref || !back->is_data)
9969 dback = to_data_backref(back);
9971 /* We found this one, we don't need to do a lookup */
9972 if (dback->found_ref)
9975 key.objectid = dback->root;
9976 key.type = BTRFS_ROOT_ITEM_KEY;
9977 key.offset = (u64)-1;
9979 root = btrfs_read_fs_root(info, &key);
9981 /* No root, definitely a bad ref, skip */
9982 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9984 /* Other err, exit */
9986 return PTR_ERR(root);
9988 key.objectid = dback->owner;
9989 key.type = BTRFS_EXTENT_DATA_KEY;
9990 key.offset = dback->offset;
9991 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9993 btrfs_release_path(path);
9996 /* Didn't find it, we can carry on */
10001 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10002 struct btrfs_file_extent_item);
10003 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10004 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10005 btrfs_release_path(path);
10006 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10008 struct extent_record *tmp;
10009 tmp = container_of(cache, struct extent_record, cache);
10012 * If we found an extent record for the bytenr for this
10013 * particular backref then we can't add it to our
10014 * current extent record. We only want to add backrefs
10015 * that don't have a corresponding extent item in the
10016 * extent tree since they likely belong to this record
10017 * and we need to fix it if it doesn't match bytenrs.
10019 if (tmp->found_rec)
10023 dback->found_ref += 1;
10024 dback->disk_bytenr = bytenr;
10025 dback->bytes = bytes;
10028 * Set this so the verify backref code knows not to trust the
10029 * values in this backref.
10038 * Record orphan data ref into corresponding root.
10040 * Return 0 if the extent item contains data ref and recorded.
10041 * Return 1 if the extent item contains no useful data ref
10042 * On that case, it may contains only shared_dataref or metadata backref
10043 * or the file extent exists(this should be handled by the extent bytenr
10044 * recovery routine)
10045 * Return <0 if something goes wrong.
10047 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10048 struct extent_record *rec)
10050 struct btrfs_key key;
10051 struct btrfs_root *dest_root;
10052 struct extent_backref *back, *tmp;
10053 struct data_backref *dback;
10054 struct orphan_data_extent *orphan;
10055 struct btrfs_path path;
10056 int recorded_data_ref = 0;
10061 btrfs_init_path(&path);
10062 rbtree_postorder_for_each_entry_safe(back, tmp,
10063 &rec->backref_tree, node) {
10064 if (back->full_backref || !back->is_data ||
10065 !back->found_extent_tree)
10067 dback = to_data_backref(back);
10068 if (dback->found_ref)
10070 key.objectid = dback->root;
10071 key.type = BTRFS_ROOT_ITEM_KEY;
10072 key.offset = (u64)-1;
10074 dest_root = btrfs_read_fs_root(fs_info, &key);
10076 /* For non-exist root we just skip it */
10077 if (IS_ERR(dest_root) || !dest_root)
10080 key.objectid = dback->owner;
10081 key.type = BTRFS_EXTENT_DATA_KEY;
10082 key.offset = dback->offset;
10084 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10085 btrfs_release_path(&path);
10087 * For ret < 0, it's OK since the fs-tree may be corrupted,
10088 * we need to record it for inode/file extent rebuild.
10089 * For ret > 0, we record it only for file extent rebuild.
10090 * For ret == 0, the file extent exists but only bytenr
10091 * mismatch, let the original bytenr fix routine to handle,
10097 orphan = malloc(sizeof(*orphan));
10102 INIT_LIST_HEAD(&orphan->list);
10103 orphan->root = dback->root;
10104 orphan->objectid = dback->owner;
10105 orphan->offset = dback->offset;
10106 orphan->disk_bytenr = rec->cache.start;
10107 orphan->disk_len = rec->cache.size;
10108 list_add(&dest_root->orphan_data_extents, &orphan->list);
10109 recorded_data_ref = 1;
10112 btrfs_release_path(&path);
10114 return !recorded_data_ref;
10120 * when an incorrect extent item is found, this will delete
10121 * all of the existing entries for it and recreate them
10122 * based on what the tree scan found.
10124 static int fixup_extent_refs(struct btrfs_fs_info *info,
10125 struct cache_tree *extent_cache,
10126 struct extent_record *rec)
10128 struct btrfs_trans_handle *trans = NULL;
10130 struct btrfs_path path;
10131 struct cache_extent *cache;
10132 struct extent_backref *back, *tmp;
10136 if (rec->flag_block_full_backref)
10137 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10139 btrfs_init_path(&path);
10140 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10142 * Sometimes the backrefs themselves are so broken they don't
10143 * get attached to any meaningful rec, so first go back and
10144 * check any of our backrefs that we couldn't find and throw
10145 * them into the list if we find the backref so that
10146 * verify_backrefs can figure out what to do.
10148 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10153 /* step one, make sure all of the backrefs agree */
10154 ret = verify_backrefs(info, &path, rec);
10158 trans = btrfs_start_transaction(info->extent_root, 1);
10159 if (IS_ERR(trans)) {
10160 ret = PTR_ERR(trans);
10164 /* step two, delete all the existing records */
10165 ret = delete_extent_records(trans, info->extent_root, &path,
10171 /* was this block corrupt? If so, don't add references to it */
10172 cache = lookup_cache_extent(info->corrupt_blocks,
10173 rec->start, rec->max_size);
10179 /* step three, recreate all the refs we did find */
10180 rbtree_postorder_for_each_entry_safe(back, tmp,
10181 &rec->backref_tree, node) {
10183 * if we didn't find any references, don't create a
10184 * new extent record
10186 if (!back->found_ref)
10189 rec->bad_full_backref = 0;
10190 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10198 int err = btrfs_commit_transaction(trans, info->extent_root);
10204 fprintf(stderr, "Repaired extent references for %llu\n",
10205 (unsigned long long)rec->start);
10207 btrfs_release_path(&path);
10211 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10212 struct extent_record *rec)
10214 struct btrfs_trans_handle *trans;
10215 struct btrfs_root *root = fs_info->extent_root;
10216 struct btrfs_path path;
10217 struct btrfs_extent_item *ei;
10218 struct btrfs_key key;
10222 key.objectid = rec->start;
10223 if (rec->metadata) {
10224 key.type = BTRFS_METADATA_ITEM_KEY;
10225 key.offset = rec->info_level;
10227 key.type = BTRFS_EXTENT_ITEM_KEY;
10228 key.offset = rec->max_size;
10231 trans = btrfs_start_transaction(root, 0);
10233 return PTR_ERR(trans);
10235 btrfs_init_path(&path);
10236 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10238 btrfs_release_path(&path);
10239 btrfs_commit_transaction(trans, root);
10242 fprintf(stderr, "Didn't find extent for %llu\n",
10243 (unsigned long long)rec->start);
10244 btrfs_release_path(&path);
10245 btrfs_commit_transaction(trans, root);
10249 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10250 struct btrfs_extent_item);
10251 flags = btrfs_extent_flags(path.nodes[0], ei);
10252 if (rec->flag_block_full_backref) {
10253 fprintf(stderr, "setting full backref on %llu\n",
10254 (unsigned long long)key.objectid);
10255 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10257 fprintf(stderr, "clearing full backref on %llu\n",
10258 (unsigned long long)key.objectid);
10259 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10261 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10262 btrfs_mark_buffer_dirty(path.nodes[0]);
10263 btrfs_release_path(&path);
10264 ret = btrfs_commit_transaction(trans, root);
10266 fprintf(stderr, "Repaired extent flags for %llu\n",
10267 (unsigned long long)rec->start);
10272 /* right now we only prune from the extent allocation tree */
10273 static int prune_one_block(struct btrfs_trans_handle *trans,
10274 struct btrfs_fs_info *info,
10275 struct btrfs_corrupt_block *corrupt)
10278 struct btrfs_path path;
10279 struct extent_buffer *eb;
10283 int level = corrupt->level + 1;
10285 btrfs_init_path(&path);
10287 /* we want to stop at the parent to our busted block */
10288 path.lowest_level = level;
10290 ret = btrfs_search_slot(trans, info->extent_root,
10291 &corrupt->key, &path, -1, 1);
10296 eb = path.nodes[level];
10303 * hopefully the search gave us the block we want to prune,
10304 * lets try that first
10306 slot = path.slots[level];
10307 found = btrfs_node_blockptr(eb, slot);
10308 if (found == corrupt->cache.start)
10311 nritems = btrfs_header_nritems(eb);
10313 /* the search failed, lets scan this node and hope we find it */
10314 for (slot = 0; slot < nritems; slot++) {
10315 found = btrfs_node_blockptr(eb, slot);
10316 if (found == corrupt->cache.start)
10320 * we couldn't find the bad block. TODO, search all the nodes for pointers
10323 if (eb == info->extent_root->node) {
10328 btrfs_release_path(&path);
10333 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10334 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10337 btrfs_release_path(&path);
10341 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10343 struct btrfs_trans_handle *trans = NULL;
10344 struct cache_extent *cache;
10345 struct btrfs_corrupt_block *corrupt;
10348 cache = search_cache_extent(info->corrupt_blocks, 0);
10352 trans = btrfs_start_transaction(info->extent_root, 1);
10354 return PTR_ERR(trans);
10356 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10357 prune_one_block(trans, info, corrupt);
10358 remove_cache_extent(info->corrupt_blocks, cache);
10361 return btrfs_commit_transaction(trans, info->extent_root);
10365 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10367 struct btrfs_block_group_cache *cache;
10372 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10373 &start, &end, EXTENT_DIRTY);
10376 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10381 cache = btrfs_lookup_first_block_group(fs_info, start);
10386 start = cache->key.objectid + cache->key.offset;
10390 static int check_extent_refs(struct btrfs_root *root,
10391 struct cache_tree *extent_cache)
10393 struct extent_record *rec;
10394 struct cache_extent *cache;
10400 * if we're doing a repair, we have to make sure
10401 * we don't allocate from the problem extents.
10402 * In the worst case, this will be all the
10403 * extents in the FS
10405 cache = search_cache_extent(extent_cache, 0);
10407 rec = container_of(cache, struct extent_record, cache);
10408 set_extent_dirty(root->fs_info->excluded_extents,
10410 rec->start + rec->max_size - 1);
10411 cache = next_cache_extent(cache);
10414 /* pin down all the corrupted blocks too */
10415 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10417 set_extent_dirty(root->fs_info->excluded_extents,
10419 cache->start + cache->size - 1);
10420 cache = next_cache_extent(cache);
10422 prune_corrupt_blocks(root->fs_info);
10423 reset_cached_block_groups(root->fs_info);
10426 reset_cached_block_groups(root->fs_info);
10429 * We need to delete any duplicate entries we find first otherwise we
10430 * could mess up the extent tree when we have backrefs that actually
10431 * belong to a different extent item and not the weird duplicate one.
10433 while (repair && !list_empty(&duplicate_extents)) {
10434 rec = to_extent_record(duplicate_extents.next);
10435 list_del_init(&rec->list);
10437 /* Sometimes we can find a backref before we find an actual
10438 * extent, so we need to process it a little bit to see if there
10439 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10440 * if this is a backref screwup. If we need to delete stuff
10441 * process_duplicates() will return 0, otherwise it will return
10444 if (process_duplicates(extent_cache, rec))
10446 ret = delete_duplicate_records(root, rec);
10450 * delete_duplicate_records will return the number of entries
10451 * deleted, so if it's greater than 0 then we know we actually
10452 * did something and we need to remove.
10465 cache = search_cache_extent(extent_cache, 0);
10468 rec = container_of(cache, struct extent_record, cache);
10469 if (rec->num_duplicates) {
10470 fprintf(stderr, "extent item %llu has multiple extent "
10471 "items\n", (unsigned long long)rec->start);
10475 if (rec->refs != rec->extent_item_refs) {
10476 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10477 (unsigned long long)rec->start,
10478 (unsigned long long)rec->nr);
10479 fprintf(stderr, "extent item %llu, found %llu\n",
10480 (unsigned long long)rec->extent_item_refs,
10481 (unsigned long long)rec->refs);
10482 ret = record_orphan_data_extents(root->fs_info, rec);
10488 if (all_backpointers_checked(rec, 1)) {
10489 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10490 (unsigned long long)rec->start,
10491 (unsigned long long)rec->nr);
10495 if (!rec->owner_ref_checked) {
10496 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10497 (unsigned long long)rec->start,
10498 (unsigned long long)rec->nr);
10503 if (repair && fix) {
10504 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10510 if (rec->bad_full_backref) {
10511 fprintf(stderr, "bad full backref, on [%llu]\n",
10512 (unsigned long long)rec->start);
10514 ret = fixup_extent_flags(root->fs_info, rec);
10522 * Although it's not a extent ref's problem, we reuse this
10523 * routine for error reporting.
10524 * No repair function yet.
10526 if (rec->crossing_stripes) {
10528 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10529 rec->start, rec->start + rec->max_size);
10533 if (rec->wrong_chunk_type) {
10535 "bad extent [%llu, %llu), type mismatch with chunk\n",
10536 rec->start, rec->start + rec->max_size);
10540 remove_cache_extent(extent_cache, cache);
10541 free_all_extent_backrefs(rec);
10542 if (!init_extent_tree && repair && (!cur_err || fix))
10543 clear_extent_dirty(root->fs_info->excluded_extents,
10545 rec->start + rec->max_size - 1);
10550 if (ret && ret != -EAGAIN) {
10551 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10554 struct btrfs_trans_handle *trans;
10556 root = root->fs_info->extent_root;
10557 trans = btrfs_start_transaction(root, 1);
10558 if (IS_ERR(trans)) {
10559 ret = PTR_ERR(trans);
10563 ret = btrfs_fix_block_accounting(trans, root);
10566 ret = btrfs_commit_transaction(trans, root);
10575 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10579 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10580 stripe_size = length;
10581 stripe_size /= num_stripes;
10582 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10583 stripe_size = length * 2;
10584 stripe_size /= num_stripes;
10585 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10586 stripe_size = length;
10587 stripe_size /= (num_stripes - 1);
10588 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10589 stripe_size = length;
10590 stripe_size /= (num_stripes - 2);
10592 stripe_size = length;
10594 return stripe_size;
10598 * Check the chunk with its block group/dev list ref:
10599 * Return 0 if all refs seems valid.
10600 * Return 1 if part of refs seems valid, need later check for rebuild ref
10601 * like missing block group and needs to search extent tree to rebuild them.
10602 * Return -1 if essential refs are missing and unable to rebuild.
10604 static int check_chunk_refs(struct chunk_record *chunk_rec,
10605 struct block_group_tree *block_group_cache,
10606 struct device_extent_tree *dev_extent_cache,
10609 struct cache_extent *block_group_item;
10610 struct block_group_record *block_group_rec;
10611 struct cache_extent *dev_extent_item;
10612 struct device_extent_record *dev_extent_rec;
10616 int metadump_v2 = 0;
10620 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10622 chunk_rec->length);
10623 if (block_group_item) {
10624 block_group_rec = container_of(block_group_item,
10625 struct block_group_record,
10627 if (chunk_rec->length != block_group_rec->offset ||
10628 chunk_rec->offset != block_group_rec->objectid ||
10630 chunk_rec->type_flags != block_group_rec->flags)) {
10633 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10634 chunk_rec->objectid,
10639 chunk_rec->type_flags,
10640 block_group_rec->objectid,
10641 block_group_rec->type,
10642 block_group_rec->offset,
10643 block_group_rec->offset,
10644 block_group_rec->objectid,
10645 block_group_rec->flags);
10648 list_del_init(&block_group_rec->list);
10649 chunk_rec->bg_rec = block_group_rec;
10654 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10655 chunk_rec->objectid,
10660 chunk_rec->type_flags);
10667 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10668 chunk_rec->num_stripes);
10669 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10670 devid = chunk_rec->stripes[i].devid;
10671 offset = chunk_rec->stripes[i].offset;
10672 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10673 devid, offset, length);
10674 if (dev_extent_item) {
10675 dev_extent_rec = container_of(dev_extent_item,
10676 struct device_extent_record,
10678 if (dev_extent_rec->objectid != devid ||
10679 dev_extent_rec->offset != offset ||
10680 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10681 dev_extent_rec->length != length) {
10684 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10685 chunk_rec->objectid,
10688 chunk_rec->stripes[i].devid,
10689 chunk_rec->stripes[i].offset,
10690 dev_extent_rec->objectid,
10691 dev_extent_rec->offset,
10692 dev_extent_rec->length);
10695 list_move(&dev_extent_rec->chunk_list,
10696 &chunk_rec->dextents);
10701 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10702 chunk_rec->objectid,
10705 chunk_rec->stripes[i].devid,
10706 chunk_rec->stripes[i].offset);
10713 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10714 int check_chunks(struct cache_tree *chunk_cache,
10715 struct block_group_tree *block_group_cache,
10716 struct device_extent_tree *dev_extent_cache,
10717 struct list_head *good, struct list_head *bad,
10718 struct list_head *rebuild, int silent)
10720 struct cache_extent *chunk_item;
10721 struct chunk_record *chunk_rec;
10722 struct block_group_record *bg_rec;
10723 struct device_extent_record *dext_rec;
10727 chunk_item = first_cache_extent(chunk_cache);
10728 while (chunk_item) {
10729 chunk_rec = container_of(chunk_item, struct chunk_record,
10731 err = check_chunk_refs(chunk_rec, block_group_cache,
10732 dev_extent_cache, silent);
10735 if (err == 0 && good)
10736 list_add_tail(&chunk_rec->list, good);
10737 if (err > 0 && rebuild)
10738 list_add_tail(&chunk_rec->list, rebuild);
10739 if (err < 0 && bad)
10740 list_add_tail(&chunk_rec->list, bad);
10741 chunk_item = next_cache_extent(chunk_item);
10744 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10747 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10755 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10759 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10760 dext_rec->objectid,
10770 static int check_device_used(struct device_record *dev_rec,
10771 struct device_extent_tree *dext_cache)
10773 struct cache_extent *cache;
10774 struct device_extent_record *dev_extent_rec;
10775 u64 total_byte = 0;
10777 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10779 dev_extent_rec = container_of(cache,
10780 struct device_extent_record,
10782 if (dev_extent_rec->objectid != dev_rec->devid)
10785 list_del_init(&dev_extent_rec->device_list);
10786 total_byte += dev_extent_rec->length;
10787 cache = next_cache_extent(cache);
10790 if (total_byte != dev_rec->byte_used) {
10792 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10793 total_byte, dev_rec->byte_used, dev_rec->objectid,
10794 dev_rec->type, dev_rec->offset);
10801 /* check btrfs_dev_item -> btrfs_dev_extent */
10802 static int check_devices(struct rb_root *dev_cache,
10803 struct device_extent_tree *dev_extent_cache)
10805 struct rb_node *dev_node;
10806 struct device_record *dev_rec;
10807 struct device_extent_record *dext_rec;
10811 dev_node = rb_first(dev_cache);
10813 dev_rec = container_of(dev_node, struct device_record, node);
10814 err = check_device_used(dev_rec, dev_extent_cache);
10818 dev_node = rb_next(dev_node);
10820 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10823 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10824 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10831 static int add_root_item_to_list(struct list_head *head,
10832 u64 objectid, u64 bytenr, u64 last_snapshot,
10833 u8 level, u8 drop_level,
10834 struct btrfs_key *drop_key)
10837 struct root_item_record *ri_rec;
10838 ri_rec = malloc(sizeof(*ri_rec));
10841 ri_rec->bytenr = bytenr;
10842 ri_rec->objectid = objectid;
10843 ri_rec->level = level;
10844 ri_rec->drop_level = drop_level;
10845 ri_rec->last_snapshot = last_snapshot;
10847 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10848 list_add_tail(&ri_rec->list, head);
10853 static void free_root_item_list(struct list_head *list)
10855 struct root_item_record *ri_rec;
10857 while (!list_empty(list)) {
10858 ri_rec = list_first_entry(list, struct root_item_record,
10860 list_del_init(&ri_rec->list);
10865 static int deal_root_from_list(struct list_head *list,
10866 struct btrfs_root *root,
10867 struct block_info *bits,
10869 struct cache_tree *pending,
10870 struct cache_tree *seen,
10871 struct cache_tree *reada,
10872 struct cache_tree *nodes,
10873 struct cache_tree *extent_cache,
10874 struct cache_tree *chunk_cache,
10875 struct rb_root *dev_cache,
10876 struct block_group_tree *block_group_cache,
10877 struct device_extent_tree *dev_extent_cache)
10882 while (!list_empty(list)) {
10883 struct root_item_record *rec;
10884 struct extent_buffer *buf;
10885 rec = list_entry(list->next,
10886 struct root_item_record, list);
10888 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10889 if (!extent_buffer_uptodate(buf)) {
10890 free_extent_buffer(buf);
10894 ret = add_root_to_pending(buf, extent_cache, pending,
10895 seen, nodes, rec->objectid);
10899 * To rebuild extent tree, we need deal with snapshot
10900 * one by one, otherwise we deal with node firstly which
10901 * can maximize readahead.
10904 ret = run_next_block(root, bits, bits_nr, &last,
10905 pending, seen, reada, nodes,
10906 extent_cache, chunk_cache,
10907 dev_cache, block_group_cache,
10908 dev_extent_cache, rec);
10912 free_extent_buffer(buf);
10913 list_del(&rec->list);
10919 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10920 reada, nodes, extent_cache, chunk_cache,
10921 dev_cache, block_group_cache,
10922 dev_extent_cache, NULL);
10932 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10934 struct rb_root dev_cache;
10935 struct cache_tree chunk_cache;
10936 struct block_group_tree block_group_cache;
10937 struct device_extent_tree dev_extent_cache;
10938 struct cache_tree extent_cache;
10939 struct cache_tree seen;
10940 struct cache_tree pending;
10941 struct cache_tree reada;
10942 struct cache_tree nodes;
10943 struct extent_io_tree excluded_extents;
10944 struct cache_tree corrupt_blocks;
10945 struct btrfs_path path;
10946 struct btrfs_key key;
10947 struct btrfs_key found_key;
10949 struct block_info *bits;
10951 struct extent_buffer *leaf;
10953 struct btrfs_root_item ri;
10954 struct list_head dropping_trees;
10955 struct list_head normal_trees;
10956 struct btrfs_root *root1;
10957 struct btrfs_root *root;
10961 root = fs_info->fs_root;
10962 dev_cache = RB_ROOT;
10963 cache_tree_init(&chunk_cache);
10964 block_group_tree_init(&block_group_cache);
10965 device_extent_tree_init(&dev_extent_cache);
10967 cache_tree_init(&extent_cache);
10968 cache_tree_init(&seen);
10969 cache_tree_init(&pending);
10970 cache_tree_init(&nodes);
10971 cache_tree_init(&reada);
10972 cache_tree_init(&corrupt_blocks);
10973 extent_io_tree_init(&excluded_extents);
10974 INIT_LIST_HEAD(&dropping_trees);
10975 INIT_LIST_HEAD(&normal_trees);
10978 fs_info->excluded_extents = &excluded_extents;
10979 fs_info->fsck_extent_cache = &extent_cache;
10980 fs_info->free_extent_hook = free_extent_hook;
10981 fs_info->corrupt_blocks = &corrupt_blocks;
10985 bits = malloc(bits_nr * sizeof(struct block_info));
10991 if (ctx.progress_enabled) {
10992 ctx.tp = TASK_EXTENTS;
10993 task_start(ctx.info);
10997 root1 = fs_info->tree_root;
10998 level = btrfs_header_level(root1->node);
10999 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11000 root1->node->start, 0, level, 0, NULL);
11003 root1 = fs_info->chunk_root;
11004 level = btrfs_header_level(root1->node);
11005 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11006 root1->node->start, 0, level, 0, NULL);
11009 btrfs_init_path(&path);
11012 key.type = BTRFS_ROOT_ITEM_KEY;
11013 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11017 leaf = path.nodes[0];
11018 slot = path.slots[0];
11019 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11020 ret = btrfs_next_leaf(root, &path);
11023 leaf = path.nodes[0];
11024 slot = path.slots[0];
11026 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11027 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11028 unsigned long offset;
11031 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11032 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11033 last_snapshot = btrfs_root_last_snapshot(&ri);
11034 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11035 level = btrfs_root_level(&ri);
11036 ret = add_root_item_to_list(&normal_trees,
11037 found_key.objectid,
11038 btrfs_root_bytenr(&ri),
11039 last_snapshot, level,
11044 level = btrfs_root_level(&ri);
11045 objectid = found_key.objectid;
11046 btrfs_disk_key_to_cpu(&found_key,
11047 &ri.drop_progress);
11048 ret = add_root_item_to_list(&dropping_trees,
11050 btrfs_root_bytenr(&ri),
11051 last_snapshot, level,
11052 ri.drop_level, &found_key);
11059 btrfs_release_path(&path);
11062 * check_block can return -EAGAIN if it fixes something, please keep
11063 * this in mind when dealing with return values from these functions, if
11064 * we get -EAGAIN we want to fall through and restart the loop.
11066 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11067 &seen, &reada, &nodes, &extent_cache,
11068 &chunk_cache, &dev_cache, &block_group_cache,
11069 &dev_extent_cache);
11071 if (ret == -EAGAIN)
11075 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11076 &pending, &seen, &reada, &nodes,
11077 &extent_cache, &chunk_cache, &dev_cache,
11078 &block_group_cache, &dev_extent_cache);
11080 if (ret == -EAGAIN)
11085 ret = check_chunks(&chunk_cache, &block_group_cache,
11086 &dev_extent_cache, NULL, NULL, NULL, 0);
11088 if (ret == -EAGAIN)
11093 ret = check_extent_refs(root, &extent_cache);
11095 if (ret == -EAGAIN)
11100 ret = check_devices(&dev_cache, &dev_extent_cache);
11105 task_stop(ctx.info);
11107 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11108 extent_io_tree_cleanup(&excluded_extents);
11109 fs_info->fsck_extent_cache = NULL;
11110 fs_info->free_extent_hook = NULL;
11111 fs_info->corrupt_blocks = NULL;
11112 fs_info->excluded_extents = NULL;
11115 free_chunk_cache_tree(&chunk_cache);
11116 free_device_cache_tree(&dev_cache);
11117 free_block_group_tree(&block_group_cache);
11118 free_device_extent_tree(&dev_extent_cache);
11119 free_extent_cache_tree(&seen);
11120 free_extent_cache_tree(&pending);
11121 free_extent_cache_tree(&reada);
11122 free_extent_cache_tree(&nodes);
11123 free_root_item_list(&normal_trees);
11124 free_root_item_list(&dropping_trees);
11127 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11128 free_extent_cache_tree(&seen);
11129 free_extent_cache_tree(&pending);
11130 free_extent_cache_tree(&reada);
11131 free_extent_cache_tree(&nodes);
11132 free_chunk_cache_tree(&chunk_cache);
11133 free_block_group_tree(&block_group_cache);
11134 free_device_cache_tree(&dev_cache);
11135 free_device_extent_tree(&dev_extent_cache);
11136 free_extent_record_cache(&extent_cache);
11137 free_root_item_list(&normal_trees);
11138 free_root_item_list(&dropping_trees);
11139 extent_io_tree_cleanup(&excluded_extents);
11144 * Check backrefs of a tree block given by @bytenr or @eb.
11146 * @root: the root containing the @bytenr or @eb
11147 * @eb: tree block extent buffer, can be NULL
11148 * @bytenr: bytenr of the tree block to search
11149 * @level: tree level of the tree block
11150 * @owner: owner of the tree block
11152 * Return >0 for any error found and output error message
11153 * Return 0 for no error found
11155 static int check_tree_block_ref(struct btrfs_root *root,
11156 struct extent_buffer *eb, u64 bytenr,
11157 int level, u64 owner)
11159 struct btrfs_key key;
11160 struct btrfs_root *extent_root = root->fs_info->extent_root;
11161 struct btrfs_path path;
11162 struct btrfs_extent_item *ei;
11163 struct btrfs_extent_inline_ref *iref;
11164 struct extent_buffer *leaf;
11170 u32 nodesize = root->fs_info->nodesize;
11173 int tree_reloc_root = 0;
11178 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11179 btrfs_header_bytenr(root->node) == bytenr)
11180 tree_reloc_root = 1;
11182 btrfs_init_path(&path);
11183 key.objectid = bytenr;
11184 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11185 key.type = BTRFS_METADATA_ITEM_KEY;
11187 key.type = BTRFS_EXTENT_ITEM_KEY;
11188 key.offset = (u64)-1;
11190 /* Search for the backref in extent tree */
11191 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11193 err |= BACKREF_MISSING;
11196 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11198 err |= BACKREF_MISSING;
11202 leaf = path.nodes[0];
11203 slot = path.slots[0];
11204 btrfs_item_key_to_cpu(leaf, &key, slot);
11206 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11208 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11209 skinny_level = (int)key.offset;
11210 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11212 struct btrfs_tree_block_info *info;
11214 info = (struct btrfs_tree_block_info *)(ei + 1);
11215 skinny_level = btrfs_tree_block_level(leaf, info);
11216 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11223 if (!(btrfs_extent_flags(leaf, ei) &
11224 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11226 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11227 key.objectid, nodesize,
11228 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11229 err = BACKREF_MISMATCH;
11231 header_gen = btrfs_header_generation(eb);
11232 extent_gen = btrfs_extent_generation(leaf, ei);
11233 if (header_gen != extent_gen) {
11235 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11236 key.objectid, nodesize, header_gen,
11238 err = BACKREF_MISMATCH;
11240 if (level != skinny_level) {
11242 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11243 key.objectid, nodesize, level, skinny_level);
11244 err = BACKREF_MISMATCH;
11246 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11248 "extent[%llu %u] is referred by other roots than %llu",
11249 key.objectid, nodesize, root->objectid);
11250 err = BACKREF_MISMATCH;
11255 * Iterate the extent/metadata item to find the exact backref
11257 item_size = btrfs_item_size_nr(leaf, slot);
11258 ptr = (unsigned long)iref;
11259 end = (unsigned long)ei + item_size;
11260 while (ptr < end) {
11261 iref = (struct btrfs_extent_inline_ref *)ptr;
11262 type = btrfs_extent_inline_ref_type(leaf, iref);
11263 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11265 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11266 (offset == root->objectid || offset == owner)) {
11268 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11270 * Backref of tree reloc root points to itself, no need
11271 * to check backref any more.
11273 if (tree_reloc_root)
11276 /* Check if the backref points to valid referencer */
11277 found_ref = !check_tree_block_ref(root, NULL,
11278 offset, level + 1, owner);
11283 ptr += btrfs_extent_inline_ref_size(type);
11287 * Inlined extent item doesn't have what we need, check
11288 * TREE_BLOCK_REF_KEY
11291 btrfs_release_path(&path);
11292 key.objectid = bytenr;
11293 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11294 key.offset = root->objectid;
11296 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11301 err |= BACKREF_MISSING;
11303 btrfs_release_path(&path);
11304 if (eb && (err & BACKREF_MISSING))
11305 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
11306 bytenr, nodesize, owner, level);
11311 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11313 * Return >0 any error found and output error message
11314 * Return 0 for no error found
11316 static int check_extent_data_item(struct btrfs_root *root,
11317 struct extent_buffer *eb, int slot)
11319 struct btrfs_file_extent_item *fi;
11320 struct btrfs_path path;
11321 struct btrfs_root *extent_root = root->fs_info->extent_root;
11322 struct btrfs_key fi_key;
11323 struct btrfs_key dbref_key;
11324 struct extent_buffer *leaf;
11325 struct btrfs_extent_item *ei;
11326 struct btrfs_extent_inline_ref *iref;
11327 struct btrfs_extent_data_ref *dref;
11330 u64 disk_num_bytes;
11331 u64 extent_num_bytes;
11338 int found_dbackref = 0;
11342 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11343 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11345 /* Nothing to check for hole and inline data extents */
11346 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11347 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11350 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11351 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11352 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11354 /* Check unaligned disk_num_bytes and num_bytes */
11355 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11357 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11358 fi_key.objectid, fi_key.offset, disk_num_bytes,
11359 root->fs_info->sectorsize);
11360 err |= BYTES_UNALIGNED;
11362 data_bytes_allocated += disk_num_bytes;
11364 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11366 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11367 fi_key.objectid, fi_key.offset, extent_num_bytes,
11368 root->fs_info->sectorsize);
11369 err |= BYTES_UNALIGNED;
11371 data_bytes_referenced += extent_num_bytes;
11373 owner = btrfs_header_owner(eb);
11375 /* Check the extent item of the file extent in extent tree */
11376 btrfs_init_path(&path);
11377 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11378 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11379 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11381 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11385 leaf = path.nodes[0];
11386 slot = path.slots[0];
11387 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11389 extent_flags = btrfs_extent_flags(leaf, ei);
11391 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11393 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11394 disk_bytenr, disk_num_bytes,
11395 BTRFS_EXTENT_FLAG_DATA);
11396 err |= BACKREF_MISMATCH;
11399 /* Check data backref inside that extent item */
11400 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11401 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11402 ptr = (unsigned long)iref;
11403 end = (unsigned long)ei + item_size;
11404 while (ptr < end) {
11405 iref = (struct btrfs_extent_inline_ref *)ptr;
11406 type = btrfs_extent_inline_ref_type(leaf, iref);
11407 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11409 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11410 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11411 if (ref_root == owner || ref_root == root->objectid)
11412 found_dbackref = 1;
11413 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11414 found_dbackref = !check_tree_block_ref(root, NULL,
11415 btrfs_extent_inline_ref_offset(leaf, iref),
11419 if (found_dbackref)
11421 ptr += btrfs_extent_inline_ref_size(type);
11424 if (!found_dbackref) {
11425 btrfs_release_path(&path);
11427 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11428 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11429 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11430 dbref_key.offset = hash_extent_data_ref(root->objectid,
11431 fi_key.objectid, fi_key.offset);
11433 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11434 &dbref_key, &path, 0, 0);
11436 found_dbackref = 1;
11440 btrfs_release_path(&path);
11443 * Neither inlined nor EXTENT_DATA_REF found, try
11444 * SHARED_DATA_REF as last chance.
11446 dbref_key.objectid = disk_bytenr;
11447 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11448 dbref_key.offset = eb->start;
11450 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11451 &dbref_key, &path, 0, 0);
11453 found_dbackref = 1;
11459 if (!found_dbackref)
11460 err |= BACKREF_MISSING;
11461 btrfs_release_path(&path);
11462 if (err & BACKREF_MISSING) {
11463 error("data extent[%llu %llu] backref lost",
11464 disk_bytenr, disk_num_bytes);
11470 * Get real tree block level for the case like shared block
11471 * Return >= 0 as tree level
11472 * Return <0 for error
11474 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11476 struct extent_buffer *eb;
11477 struct btrfs_path path;
11478 struct btrfs_key key;
11479 struct btrfs_extent_item *ei;
11486 /* Search extent tree for extent generation and level */
11487 key.objectid = bytenr;
11488 key.type = BTRFS_METADATA_ITEM_KEY;
11489 key.offset = (u64)-1;
11491 btrfs_init_path(&path);
11492 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11495 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11503 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11504 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11505 struct btrfs_extent_item);
11506 flags = btrfs_extent_flags(path.nodes[0], ei);
11507 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11512 /* Get transid for later read_tree_block() check */
11513 transid = btrfs_extent_generation(path.nodes[0], ei);
11515 /* Get backref level as one source */
11516 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11517 backref_level = key.offset;
11519 struct btrfs_tree_block_info *info;
11521 info = (struct btrfs_tree_block_info *)(ei + 1);
11522 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11524 btrfs_release_path(&path);
11526 /* Get level from tree block as an alternative source */
11527 eb = read_tree_block(fs_info, bytenr, transid);
11528 if (!extent_buffer_uptodate(eb)) {
11529 free_extent_buffer(eb);
11532 header_level = btrfs_header_level(eb);
11533 free_extent_buffer(eb);
11535 if (header_level != backref_level)
11537 return header_level;
11540 btrfs_release_path(&path);
11545 * Check if a tree block backref is valid (points to a valid tree block)
11546 * if level == -1, level will be resolved
11547 * Return >0 for any error found and print error message
11549 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11550 u64 bytenr, int level)
11552 struct btrfs_root *root;
11553 struct btrfs_key key;
11554 struct btrfs_path path;
11555 struct extent_buffer *eb;
11556 struct extent_buffer *node;
11557 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11561 /* Query level for level == -1 special case */
11563 level = query_tree_block_level(fs_info, bytenr);
11565 err |= REFERENCER_MISSING;
11569 key.objectid = root_id;
11570 key.type = BTRFS_ROOT_ITEM_KEY;
11571 key.offset = (u64)-1;
11573 root = btrfs_read_fs_root(fs_info, &key);
11574 if (IS_ERR(root)) {
11575 err |= REFERENCER_MISSING;
11579 /* Read out the tree block to get item/node key */
11580 eb = read_tree_block(fs_info, bytenr, 0);
11581 if (!extent_buffer_uptodate(eb)) {
11582 err |= REFERENCER_MISSING;
11583 free_extent_buffer(eb);
11587 /* Empty tree, no need to check key */
11588 if (!btrfs_header_nritems(eb) && !level) {
11589 free_extent_buffer(eb);
11594 btrfs_node_key_to_cpu(eb, &key, 0);
11596 btrfs_item_key_to_cpu(eb, &key, 0);
11598 free_extent_buffer(eb);
11600 btrfs_init_path(&path);
11601 path.lowest_level = level;
11602 /* Search with the first key, to ensure we can reach it */
11603 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11605 err |= REFERENCER_MISSING;
11609 node = path.nodes[level];
11610 if (btrfs_header_bytenr(node) != bytenr) {
11612 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11613 bytenr, nodesize, bytenr,
11614 btrfs_header_bytenr(node));
11615 err |= REFERENCER_MISMATCH;
11617 if (btrfs_header_level(node) != level) {
11619 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11620 bytenr, nodesize, level,
11621 btrfs_header_level(node));
11622 err |= REFERENCER_MISMATCH;
11626 btrfs_release_path(&path);
11628 if (err & REFERENCER_MISSING) {
11630 error("extent [%llu %d] lost referencer (owner: %llu)",
11631 bytenr, nodesize, root_id);
11634 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11635 bytenr, nodesize, root_id, level);
11642 * Check if tree block @eb is tree reloc root.
11643 * Return 0 if it's not or any problem happens
11644 * Return 1 if it's a tree reloc root
11646 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11647 struct extent_buffer *eb)
11649 struct btrfs_root *tree_reloc_root;
11650 struct btrfs_key key;
11651 u64 bytenr = btrfs_header_bytenr(eb);
11652 u64 owner = btrfs_header_owner(eb);
11655 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11656 key.offset = owner;
11657 key.type = BTRFS_ROOT_ITEM_KEY;
11659 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11660 if (IS_ERR(tree_reloc_root))
11663 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11665 btrfs_free_fs_root(tree_reloc_root);
11670 * Check referencer for shared block backref
11671 * If level == -1, this function will resolve the level.
11673 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11674 u64 parent, u64 bytenr, int level)
11676 struct extent_buffer *eb;
11678 int found_parent = 0;
11681 eb = read_tree_block(fs_info, parent, 0);
11682 if (!extent_buffer_uptodate(eb))
11686 level = query_tree_block_level(fs_info, bytenr);
11690 /* It's possible it's a tree reloc root */
11691 if (parent == bytenr) {
11692 if (is_tree_reloc_root(fs_info, eb))
11697 if (level + 1 != btrfs_header_level(eb))
11700 nr = btrfs_header_nritems(eb);
11701 for (i = 0; i < nr; i++) {
11702 if (bytenr == btrfs_node_blockptr(eb, i)) {
11708 free_extent_buffer(eb);
11709 if (!found_parent) {
11711 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11712 bytenr, fs_info->nodesize, parent, level);
11713 return REFERENCER_MISSING;
11719 * Check referencer for normal (inlined) data ref
11720 * If len == 0, it will be resolved by searching in extent tree
11722 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11723 u64 root_id, u64 objectid, u64 offset,
11724 u64 bytenr, u64 len, u32 count)
11726 struct btrfs_root *root;
11727 struct btrfs_root *extent_root = fs_info->extent_root;
11728 struct btrfs_key key;
11729 struct btrfs_path path;
11730 struct extent_buffer *leaf;
11731 struct btrfs_file_extent_item *fi;
11732 u32 found_count = 0;
11737 key.objectid = bytenr;
11738 key.type = BTRFS_EXTENT_ITEM_KEY;
11739 key.offset = (u64)-1;
11741 btrfs_init_path(&path);
11742 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11745 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11748 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11749 if (key.objectid != bytenr ||
11750 key.type != BTRFS_EXTENT_ITEM_KEY)
11753 btrfs_release_path(&path);
11755 key.objectid = root_id;
11756 key.type = BTRFS_ROOT_ITEM_KEY;
11757 key.offset = (u64)-1;
11758 btrfs_init_path(&path);
11760 root = btrfs_read_fs_root(fs_info, &key);
11764 key.objectid = objectid;
11765 key.type = BTRFS_EXTENT_DATA_KEY;
11767 * It can be nasty as data backref offset is
11768 * file offset - file extent offset, which is smaller or
11769 * equal to original backref offset. The only special case is
11770 * overflow. So we need to special check and do further search.
11772 key.offset = offset & (1ULL << 63) ? 0 : offset;
11774 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11779 * Search afterwards to get correct one
11780 * NOTE: As we must do a comprehensive check on the data backref to
11781 * make sure the dref count also matches, we must iterate all file
11782 * extents for that inode.
11785 leaf = path.nodes[0];
11786 slot = path.slots[0];
11788 if (slot >= btrfs_header_nritems(leaf))
11790 btrfs_item_key_to_cpu(leaf, &key, slot);
11791 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11793 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11795 * Except normal disk bytenr and disk num bytes, we still
11796 * need to do extra check on dbackref offset as
11797 * dbackref offset = file_offset - file_extent_offset
11799 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11800 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11801 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11806 ret = btrfs_next_item(root, &path);
11811 btrfs_release_path(&path);
11812 if (found_count != count) {
11814 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11815 bytenr, len, root_id, objectid, offset, count, found_count);
11816 return REFERENCER_MISSING;
11822 * Check if the referencer of a shared data backref exists
11824 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11825 u64 parent, u64 bytenr)
11827 struct extent_buffer *eb;
11828 struct btrfs_key key;
11829 struct btrfs_file_extent_item *fi;
11831 int found_parent = 0;
11834 eb = read_tree_block(fs_info, parent, 0);
11835 if (!extent_buffer_uptodate(eb))
11838 nr = btrfs_header_nritems(eb);
11839 for (i = 0; i < nr; i++) {
11840 btrfs_item_key_to_cpu(eb, &key, i);
11841 if (key.type != BTRFS_EXTENT_DATA_KEY)
11844 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11845 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11848 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11855 free_extent_buffer(eb);
11856 if (!found_parent) {
11857 error("shared extent %llu referencer lost (parent: %llu)",
11859 return REFERENCER_MISSING;
11865 * This function will check a given extent item, including its backref and
11866 * itself (like crossing stripe boundary and type)
11868 * Since we don't use extent_record anymore, introduce new error bit
11870 static int check_extent_item(struct btrfs_fs_info *fs_info,
11871 struct extent_buffer *eb, int slot)
11873 struct btrfs_extent_item *ei;
11874 struct btrfs_extent_inline_ref *iref;
11875 struct btrfs_extent_data_ref *dref;
11879 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11880 u32 item_size = btrfs_item_size_nr(eb, slot);
11885 struct btrfs_key key;
11889 btrfs_item_key_to_cpu(eb, &key, slot);
11890 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11891 bytes_used += key.offset;
11893 bytes_used += nodesize;
11895 if (item_size < sizeof(*ei)) {
11897 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11898 * old thing when on disk format is still un-determined.
11899 * No need to care about it anymore
11901 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11905 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11906 flags = btrfs_extent_flags(eb, ei);
11908 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11910 if (metadata && check_crossing_stripes(global_info, key.objectid,
11912 error("bad metadata [%llu, %llu) crossing stripe boundary",
11913 key.objectid, key.objectid + nodesize);
11914 err |= CROSSING_STRIPE_BOUNDARY;
11917 ptr = (unsigned long)(ei + 1);
11919 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11920 /* Old EXTENT_ITEM metadata */
11921 struct btrfs_tree_block_info *info;
11923 info = (struct btrfs_tree_block_info *)ptr;
11924 level = btrfs_tree_block_level(eb, info);
11925 ptr += sizeof(struct btrfs_tree_block_info);
11927 /* New METADATA_ITEM */
11928 level = key.offset;
11930 end = (unsigned long)ei + item_size;
11933 /* Reached extent item end normally */
11937 /* Beyond extent item end, wrong item size */
11939 err |= ITEM_SIZE_MISMATCH;
11940 error("extent item at bytenr %llu slot %d has wrong size",
11945 /* Now check every backref in this extent item */
11946 iref = (struct btrfs_extent_inline_ref *)ptr;
11947 type = btrfs_extent_inline_ref_type(eb, iref);
11948 offset = btrfs_extent_inline_ref_offset(eb, iref);
11950 case BTRFS_TREE_BLOCK_REF_KEY:
11951 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11955 case BTRFS_SHARED_BLOCK_REF_KEY:
11956 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11960 case BTRFS_EXTENT_DATA_REF_KEY:
11961 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11962 ret = check_extent_data_backref(fs_info,
11963 btrfs_extent_data_ref_root(eb, dref),
11964 btrfs_extent_data_ref_objectid(eb, dref),
11965 btrfs_extent_data_ref_offset(eb, dref),
11966 key.objectid, key.offset,
11967 btrfs_extent_data_ref_count(eb, dref));
11970 case BTRFS_SHARED_DATA_REF_KEY:
11971 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11975 error("extent[%llu %d %llu] has unknown ref type: %d",
11976 key.objectid, key.type, key.offset, type);
11977 err |= UNKNOWN_TYPE;
11981 ptr += btrfs_extent_inline_ref_size(type);
11989 * Check if a dev extent item is referred correctly by its chunk
11991 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11992 struct extent_buffer *eb, int slot)
11994 struct btrfs_root *chunk_root = fs_info->chunk_root;
11995 struct btrfs_dev_extent *ptr;
11996 struct btrfs_path path;
11997 struct btrfs_key chunk_key;
11998 struct btrfs_key devext_key;
11999 struct btrfs_chunk *chunk;
12000 struct extent_buffer *l;
12004 int found_chunk = 0;
12007 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12008 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12009 length = btrfs_dev_extent_length(eb, ptr);
12011 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12012 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12013 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12015 btrfs_init_path(&path);
12016 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12021 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12022 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12027 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12030 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12031 for (i = 0; i < num_stripes; i++) {
12032 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12033 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12035 if (devid == devext_key.objectid &&
12036 offset == devext_key.offset) {
12042 btrfs_release_path(&path);
12043 if (!found_chunk) {
12045 "device extent[%llu, %llu, %llu] did not find the related chunk",
12046 devext_key.objectid, devext_key.offset, length);
12047 return REFERENCER_MISSING;
12053 * Check if the used space is correct with the dev item
12055 static int check_dev_item(struct btrfs_fs_info *fs_info,
12056 struct extent_buffer *eb, int slot)
12058 struct btrfs_root *dev_root = fs_info->dev_root;
12059 struct btrfs_dev_item *dev_item;
12060 struct btrfs_path path;
12061 struct btrfs_key key;
12062 struct btrfs_dev_extent *ptr;
12068 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12069 dev_id = btrfs_device_id(eb, dev_item);
12070 used = btrfs_device_bytes_used(eb, dev_item);
12072 key.objectid = dev_id;
12073 key.type = BTRFS_DEV_EXTENT_KEY;
12076 btrfs_init_path(&path);
12077 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12079 btrfs_item_key_to_cpu(eb, &key, slot);
12080 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12081 key.objectid, key.type, key.offset);
12082 btrfs_release_path(&path);
12083 return REFERENCER_MISSING;
12086 /* Iterate dev_extents to calculate the used space of a device */
12088 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12091 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12092 if (key.objectid > dev_id)
12094 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12097 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12098 struct btrfs_dev_extent);
12099 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12101 ret = btrfs_next_item(dev_root, &path);
12105 btrfs_release_path(&path);
12107 if (used != total) {
12108 btrfs_item_key_to_cpu(eb, &key, slot);
12110 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12111 total, used, BTRFS_ROOT_TREE_OBJECTID,
12112 BTRFS_DEV_EXTENT_KEY, dev_id);
12113 return ACCOUNTING_MISMATCH;
12119 * Check a block group item with its referener (chunk) and its used space
12120 * with extent/metadata item
12122 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12123 struct extent_buffer *eb, int slot)
12125 struct btrfs_root *extent_root = fs_info->extent_root;
12126 struct btrfs_root *chunk_root = fs_info->chunk_root;
12127 struct btrfs_block_group_item *bi;
12128 struct btrfs_block_group_item bg_item;
12129 struct btrfs_path path;
12130 struct btrfs_key bg_key;
12131 struct btrfs_key chunk_key;
12132 struct btrfs_key extent_key;
12133 struct btrfs_chunk *chunk;
12134 struct extent_buffer *leaf;
12135 struct btrfs_extent_item *ei;
12136 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12144 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12145 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12146 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12147 used = btrfs_block_group_used(&bg_item);
12148 bg_flags = btrfs_block_group_flags(&bg_item);
12150 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12151 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12152 chunk_key.offset = bg_key.objectid;
12154 btrfs_init_path(&path);
12155 /* Search for the referencer chunk */
12156 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12159 "block group[%llu %llu] did not find the related chunk item",
12160 bg_key.objectid, bg_key.offset);
12161 err |= REFERENCER_MISSING;
12163 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12164 struct btrfs_chunk);
12165 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12168 "block group[%llu %llu] related chunk item length does not match",
12169 bg_key.objectid, bg_key.offset);
12170 err |= REFERENCER_MISMATCH;
12173 btrfs_release_path(&path);
12175 /* Search from the block group bytenr */
12176 extent_key.objectid = bg_key.objectid;
12177 extent_key.type = 0;
12178 extent_key.offset = 0;
12180 btrfs_init_path(&path);
12181 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12185 /* Iterate extent tree to account used space */
12187 leaf = path.nodes[0];
12189 /* Search slot can point to the last item beyond leaf nritems */
12190 if (path.slots[0] >= btrfs_header_nritems(leaf))
12193 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12194 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12197 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12198 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12200 if (extent_key.objectid < bg_key.objectid)
12203 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12206 total += extent_key.offset;
12208 ei = btrfs_item_ptr(leaf, path.slots[0],
12209 struct btrfs_extent_item);
12210 flags = btrfs_extent_flags(leaf, ei);
12211 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12212 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12214 "bad extent[%llu, %llu) type mismatch with chunk",
12215 extent_key.objectid,
12216 extent_key.objectid + extent_key.offset);
12217 err |= CHUNK_TYPE_MISMATCH;
12219 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12220 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12221 BTRFS_BLOCK_GROUP_METADATA))) {
12223 "bad extent[%llu, %llu) type mismatch with chunk",
12224 extent_key.objectid,
12225 extent_key.objectid + nodesize);
12226 err |= CHUNK_TYPE_MISMATCH;
12230 ret = btrfs_next_item(extent_root, &path);
12236 btrfs_release_path(&path);
12238 if (total != used) {
12240 "block group[%llu %llu] used %llu but extent items used %llu",
12241 bg_key.objectid, bg_key.offset, used, total);
12242 err |= ACCOUNTING_MISMATCH;
12248 * Check a chunk item.
12249 * Including checking all referred dev_extents and block group
12251 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12252 struct extent_buffer *eb, int slot)
12254 struct btrfs_root *extent_root = fs_info->extent_root;
12255 struct btrfs_root *dev_root = fs_info->dev_root;
12256 struct btrfs_path path;
12257 struct btrfs_key chunk_key;
12258 struct btrfs_key bg_key;
12259 struct btrfs_key devext_key;
12260 struct btrfs_chunk *chunk;
12261 struct extent_buffer *leaf;
12262 struct btrfs_block_group_item *bi;
12263 struct btrfs_block_group_item bg_item;
12264 struct btrfs_dev_extent *ptr;
12276 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12277 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12278 length = btrfs_chunk_length(eb, chunk);
12279 chunk_end = chunk_key.offset + length;
12280 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12283 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12285 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12288 type = btrfs_chunk_type(eb, chunk);
12290 bg_key.objectid = chunk_key.offset;
12291 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12292 bg_key.offset = length;
12294 btrfs_init_path(&path);
12295 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12298 "chunk[%llu %llu) did not find the related block group item",
12299 chunk_key.offset, chunk_end);
12300 err |= REFERENCER_MISSING;
12302 leaf = path.nodes[0];
12303 bi = btrfs_item_ptr(leaf, path.slots[0],
12304 struct btrfs_block_group_item);
12305 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12307 if (btrfs_block_group_flags(&bg_item) != type) {
12309 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12310 chunk_key.offset, chunk_end, type,
12311 btrfs_block_group_flags(&bg_item));
12312 err |= REFERENCER_MISSING;
12316 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12317 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12318 for (i = 0; i < num_stripes; i++) {
12319 btrfs_release_path(&path);
12320 btrfs_init_path(&path);
12321 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12322 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12323 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12325 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12328 goto not_match_dev;
12330 leaf = path.nodes[0];
12331 ptr = btrfs_item_ptr(leaf, path.slots[0],
12332 struct btrfs_dev_extent);
12333 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12334 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12335 if (objectid != chunk_key.objectid ||
12336 offset != chunk_key.offset ||
12337 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12338 goto not_match_dev;
12341 err |= BACKREF_MISSING;
12343 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12344 chunk_key.objectid, chunk_end, i);
12347 btrfs_release_path(&path);
12353 * Main entry function to check known items and update related accounting info
12355 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
12357 struct btrfs_fs_info *fs_info = root->fs_info;
12358 struct btrfs_key key;
12361 struct btrfs_extent_data_ref *dref;
12366 btrfs_item_key_to_cpu(eb, &key, slot);
12370 case BTRFS_EXTENT_DATA_KEY:
12371 ret = check_extent_data_item(root, eb, slot);
12374 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12375 ret = check_block_group_item(fs_info, eb, slot);
12378 case BTRFS_DEV_ITEM_KEY:
12379 ret = check_dev_item(fs_info, eb, slot);
12382 case BTRFS_CHUNK_ITEM_KEY:
12383 ret = check_chunk_item(fs_info, eb, slot);
12386 case BTRFS_DEV_EXTENT_KEY:
12387 ret = check_dev_extent_item(fs_info, eb, slot);
12390 case BTRFS_EXTENT_ITEM_KEY:
12391 case BTRFS_METADATA_ITEM_KEY:
12392 ret = check_extent_item(fs_info, eb, slot);
12395 case BTRFS_EXTENT_CSUM_KEY:
12396 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12398 case BTRFS_TREE_BLOCK_REF_KEY:
12399 ret = check_tree_block_backref(fs_info, key.offset,
12403 case BTRFS_EXTENT_DATA_REF_KEY:
12404 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12405 ret = check_extent_data_backref(fs_info,
12406 btrfs_extent_data_ref_root(eb, dref),
12407 btrfs_extent_data_ref_objectid(eb, dref),
12408 btrfs_extent_data_ref_offset(eb, dref),
12410 btrfs_extent_data_ref_count(eb, dref));
12413 case BTRFS_SHARED_BLOCK_REF_KEY:
12414 ret = check_shared_block_backref(fs_info, key.offset,
12418 case BTRFS_SHARED_DATA_REF_KEY:
12419 ret = check_shared_data_backref(fs_info, key.offset,
12427 if (++slot < btrfs_header_nritems(eb))
12434 * Helper function for later fs/subvol tree check. To determine if a tree
12435 * block should be checked.
12436 * This function will ensure only the direct referencer with lowest rootid to
12437 * check a fs/subvolume tree block.
12439 * Backref check at extent tree would detect errors like missing subvolume
12440 * tree, so we can do aggressive check to reduce duplicated checks.
12442 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12444 struct btrfs_root *extent_root = root->fs_info->extent_root;
12445 struct btrfs_key key;
12446 struct btrfs_path path;
12447 struct extent_buffer *leaf;
12449 struct btrfs_extent_item *ei;
12455 struct btrfs_extent_inline_ref *iref;
12458 btrfs_init_path(&path);
12459 key.objectid = btrfs_header_bytenr(eb);
12460 key.type = BTRFS_METADATA_ITEM_KEY;
12461 key.offset = (u64)-1;
12464 * Any failure in backref resolving means we can't determine
12465 * whom the tree block belongs to.
12466 * So in that case, we need to check that tree block
12468 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12472 ret = btrfs_previous_extent_item(extent_root, &path,
12473 btrfs_header_bytenr(eb));
12477 leaf = path.nodes[0];
12478 slot = path.slots[0];
12479 btrfs_item_key_to_cpu(leaf, &key, slot);
12480 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12482 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12483 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12485 struct btrfs_tree_block_info *info;
12487 info = (struct btrfs_tree_block_info *)(ei + 1);
12488 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12491 item_size = btrfs_item_size_nr(leaf, slot);
12492 ptr = (unsigned long)iref;
12493 end = (unsigned long)ei + item_size;
12494 while (ptr < end) {
12495 iref = (struct btrfs_extent_inline_ref *)ptr;
12496 type = btrfs_extent_inline_ref_type(leaf, iref);
12497 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12500 * We only check the tree block if current root is
12501 * the lowest referencer of it.
12503 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12504 offset < root->objectid) {
12505 btrfs_release_path(&path);
12509 ptr += btrfs_extent_inline_ref_size(type);
12512 * Normally we should also check keyed tree block ref, but that may be
12513 * very time consuming. Inlined ref should already make us skip a lot
12514 * of refs now. So skip search keyed tree block ref.
12518 btrfs_release_path(&path);
12523 * Traversal function for tree block. We will do:
12524 * 1) Skip shared fs/subvolume tree blocks
12525 * 2) Update related bytes accounting
12526 * 3) Pre-order traversal
12528 static int traverse_tree_block(struct btrfs_root *root,
12529 struct extent_buffer *node)
12531 struct extent_buffer *eb;
12532 struct btrfs_key key;
12533 struct btrfs_key drop_key;
12541 * Skip shared fs/subvolume tree block, in that case they will
12542 * be checked by referencer with lowest rootid
12544 if (is_fstree(root->objectid) && !should_check(root, node))
12547 /* Update bytes accounting */
12548 total_btree_bytes += node->len;
12549 if (fs_root_objectid(btrfs_header_owner(node)))
12550 total_fs_tree_bytes += node->len;
12551 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12552 total_extent_tree_bytes += node->len;
12554 /* pre-order tranversal, check itself first */
12555 level = btrfs_header_level(node);
12556 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12557 btrfs_header_level(node),
12558 btrfs_header_owner(node));
12562 "check %s failed root %llu bytenr %llu level %d, force continue check",
12563 level ? "node":"leaf", root->objectid,
12564 btrfs_header_bytenr(node), btrfs_header_level(node));
12567 btree_space_waste += btrfs_leaf_free_space(root, node);
12568 ret = check_leaf_items(root, node);
12573 nr = btrfs_header_nritems(node);
12574 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12575 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12576 sizeof(struct btrfs_key_ptr);
12578 /* Then check all its children */
12579 for (i = 0; i < nr; i++) {
12580 u64 blocknr = btrfs_node_blockptr(node, i);
12582 btrfs_node_key_to_cpu(node, &key, i);
12583 if (level == root->root_item.drop_level &&
12584 is_dropped_key(&key, &drop_key))
12588 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12589 * to call the function itself.
12591 eb = read_tree_block(root->fs_info, blocknr, 0);
12592 if (extent_buffer_uptodate(eb)) {
12593 ret = traverse_tree_block(root, eb);
12596 free_extent_buffer(eb);
12603 * Low memory usage version check_chunks_and_extents.
12605 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12607 struct btrfs_path path;
12608 struct btrfs_key key;
12609 struct btrfs_root *root1;
12610 struct btrfs_root *root;
12611 struct btrfs_root *cur_root;
12615 root = fs_info->fs_root;
12617 root1 = root->fs_info->chunk_root;
12618 ret = traverse_tree_block(root1, root1->node);
12621 root1 = root->fs_info->tree_root;
12622 ret = traverse_tree_block(root1, root1->node);
12625 btrfs_init_path(&path);
12626 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12628 key.type = BTRFS_ROOT_ITEM_KEY;
12630 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12632 error("cannot find extent treet in tree_root");
12637 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12638 if (key.type != BTRFS_ROOT_ITEM_KEY)
12640 key.offset = (u64)-1;
12642 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12643 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12646 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12647 if (IS_ERR(cur_root) || !cur_root) {
12648 error("failed to read tree: %lld", key.objectid);
12652 ret = traverse_tree_block(cur_root, cur_root->node);
12655 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12656 btrfs_free_fs_root(cur_root);
12658 ret = btrfs_next_item(root1, &path);
12664 btrfs_release_path(&path);
12668 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12672 if (!ctx.progress_enabled)
12673 fprintf(stderr, "checking extents\n");
12674 if (check_mode == CHECK_MODE_LOWMEM)
12675 ret = check_chunks_and_extents_v2(fs_info);
12677 ret = check_chunks_and_extents(fs_info);
12682 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12683 struct btrfs_root *root, int overwrite)
12685 struct extent_buffer *c;
12686 struct extent_buffer *old = root->node;
12689 struct btrfs_disk_key disk_key = {0,0,0};
12695 extent_buffer_get(c);
12698 c = btrfs_alloc_free_block(trans, root,
12699 root->fs_info->nodesize,
12700 root->root_key.objectid,
12701 &disk_key, level, 0, 0);
12704 extent_buffer_get(c);
12708 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12709 btrfs_set_header_level(c, level);
12710 btrfs_set_header_bytenr(c, c->start);
12711 btrfs_set_header_generation(c, trans->transid);
12712 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12713 btrfs_set_header_owner(c, root->root_key.objectid);
12715 write_extent_buffer(c, root->fs_info->fsid,
12716 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12718 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12719 btrfs_header_chunk_tree_uuid(c),
12722 btrfs_mark_buffer_dirty(c);
12724 * this case can happen in the following case:
12726 * 1.overwrite previous root.
12728 * 2.reinit reloc data root, this is because we skip pin
12729 * down reloc data tree before which means we can allocate
12730 * same block bytenr here.
12732 if (old->start == c->start) {
12733 btrfs_set_root_generation(&root->root_item,
12735 root->root_item.level = btrfs_header_level(root->node);
12736 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12737 &root->root_key, &root->root_item);
12739 free_extent_buffer(c);
12743 free_extent_buffer(old);
12745 add_root_to_dirty_list(root);
12749 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12750 struct extent_buffer *eb, int tree_root)
12752 struct extent_buffer *tmp;
12753 struct btrfs_root_item *ri;
12754 struct btrfs_key key;
12756 int level = btrfs_header_level(eb);
12762 * If we have pinned this block before, don't pin it again.
12763 * This can not only avoid forever loop with broken filesystem
12764 * but also give us some speedups.
12766 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12767 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12770 btrfs_pin_extent(fs_info, eb->start, eb->len);
12772 nritems = btrfs_header_nritems(eb);
12773 for (i = 0; i < nritems; i++) {
12775 btrfs_item_key_to_cpu(eb, &key, i);
12776 if (key.type != BTRFS_ROOT_ITEM_KEY)
12778 /* Skip the extent root and reloc roots */
12779 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12780 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12781 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12783 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12784 bytenr = btrfs_disk_root_bytenr(eb, ri);
12787 * If at any point we start needing the real root we
12788 * will have to build a stump root for the root we are
12789 * in, but for now this doesn't actually use the root so
12790 * just pass in extent_root.
12792 tmp = read_tree_block(fs_info, bytenr, 0);
12793 if (!extent_buffer_uptodate(tmp)) {
12794 fprintf(stderr, "Error reading root block\n");
12797 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12798 free_extent_buffer(tmp);
12802 bytenr = btrfs_node_blockptr(eb, i);
12804 /* If we aren't the tree root don't read the block */
12805 if (level == 1 && !tree_root) {
12806 btrfs_pin_extent(fs_info, bytenr,
12807 fs_info->nodesize);
12811 tmp = read_tree_block(fs_info, bytenr, 0);
12812 if (!extent_buffer_uptodate(tmp)) {
12813 fprintf(stderr, "Error reading tree block\n");
12816 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12817 free_extent_buffer(tmp);
12826 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12830 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12834 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12837 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12839 struct btrfs_block_group_cache *cache;
12840 struct btrfs_path path;
12841 struct extent_buffer *leaf;
12842 struct btrfs_chunk *chunk;
12843 struct btrfs_key key;
12847 btrfs_init_path(&path);
12849 key.type = BTRFS_CHUNK_ITEM_KEY;
12851 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12853 btrfs_release_path(&path);
12858 * We do this in case the block groups were screwed up and had alloc
12859 * bits that aren't actually set on the chunks. This happens with
12860 * restored images every time and could happen in real life I guess.
12862 fs_info->avail_data_alloc_bits = 0;
12863 fs_info->avail_metadata_alloc_bits = 0;
12864 fs_info->avail_system_alloc_bits = 0;
12866 /* First we need to create the in-memory block groups */
12868 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12869 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12871 btrfs_release_path(&path);
12879 leaf = path.nodes[0];
12880 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12881 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12886 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12887 btrfs_add_block_group(fs_info, 0,
12888 btrfs_chunk_type(leaf, chunk),
12889 key.objectid, key.offset,
12890 btrfs_chunk_length(leaf, chunk));
12891 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12892 key.offset + btrfs_chunk_length(leaf, chunk));
12897 cache = btrfs_lookup_first_block_group(fs_info, start);
12901 start = cache->key.objectid + cache->key.offset;
12904 btrfs_release_path(&path);
12908 static int reset_balance(struct btrfs_trans_handle *trans,
12909 struct btrfs_fs_info *fs_info)
12911 struct btrfs_root *root = fs_info->tree_root;
12912 struct btrfs_path path;
12913 struct extent_buffer *leaf;
12914 struct btrfs_key key;
12915 int del_slot, del_nr = 0;
12919 btrfs_init_path(&path);
12920 key.objectid = BTRFS_BALANCE_OBJECTID;
12921 key.type = BTRFS_BALANCE_ITEM_KEY;
12923 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12928 goto reinit_data_reloc;
12933 ret = btrfs_del_item(trans, root, &path);
12936 btrfs_release_path(&path);
12938 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12939 key.type = BTRFS_ROOT_ITEM_KEY;
12941 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12945 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12950 ret = btrfs_del_items(trans, root, &path,
12957 btrfs_release_path(&path);
12960 ret = btrfs_search_slot(trans, root, &key, &path,
12967 leaf = path.nodes[0];
12968 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12969 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12971 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12976 del_slot = path.slots[0];
12985 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12989 btrfs_release_path(&path);
12992 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12993 key.type = BTRFS_ROOT_ITEM_KEY;
12994 key.offset = (u64)-1;
12995 root = btrfs_read_fs_root(fs_info, &key);
12996 if (IS_ERR(root)) {
12997 fprintf(stderr, "Error reading data reloc tree\n");
12998 ret = PTR_ERR(root);
13001 record_root_in_trans(trans, root);
13002 ret = btrfs_fsck_reinit_root(trans, root, 0);
13005 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13007 btrfs_release_path(&path);
13011 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13012 struct btrfs_fs_info *fs_info)
13018 * The only reason we don't do this is because right now we're just
13019 * walking the trees we find and pinning down their bytes, we don't look
13020 * at any of the leaves. In order to do mixed groups we'd have to check
13021 * the leaves of any fs roots and pin down the bytes for any file
13022 * extents we find. Not hard but why do it if we don't have to?
13024 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13025 fprintf(stderr, "We don't support re-initing the extent tree "
13026 "for mixed block groups yet, please notify a btrfs "
13027 "developer you want to do this so they can add this "
13028 "functionality.\n");
13033 * first we need to walk all of the trees except the extent tree and pin
13034 * down the bytes that are in use so we don't overwrite any existing
13037 ret = pin_metadata_blocks(fs_info);
13039 fprintf(stderr, "error pinning down used bytes\n");
13044 * Need to drop all the block groups since we're going to recreate all
13047 btrfs_free_block_groups(fs_info);
13048 ret = reset_block_groups(fs_info);
13050 fprintf(stderr, "error resetting the block groups\n");
13054 /* Ok we can allocate now, reinit the extent root */
13055 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13057 fprintf(stderr, "extent root initialization failed\n");
13059 * When the transaction code is updated we should end the
13060 * transaction, but for now progs only knows about commit so
13061 * just return an error.
13067 * Now we have all the in-memory block groups setup so we can make
13068 * allocations properly, and the metadata we care about is safe since we
13069 * pinned all of it above.
13072 struct btrfs_block_group_cache *cache;
13074 cache = btrfs_lookup_first_block_group(fs_info, start);
13077 start = cache->key.objectid + cache->key.offset;
13078 ret = btrfs_insert_item(trans, fs_info->extent_root,
13079 &cache->key, &cache->item,
13080 sizeof(cache->item));
13082 fprintf(stderr, "Error adding block group\n");
13085 btrfs_extent_post_op(trans, fs_info->extent_root);
13088 ret = reset_balance(trans, fs_info);
13090 fprintf(stderr, "error resetting the pending balance\n");
13095 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13097 struct btrfs_path path;
13098 struct btrfs_trans_handle *trans;
13099 struct btrfs_key key;
13102 printf("Recowing metadata block %llu\n", eb->start);
13103 key.objectid = btrfs_header_owner(eb);
13104 key.type = BTRFS_ROOT_ITEM_KEY;
13105 key.offset = (u64)-1;
13107 root = btrfs_read_fs_root(root->fs_info, &key);
13108 if (IS_ERR(root)) {
13109 fprintf(stderr, "Couldn't find owner root %llu\n",
13111 return PTR_ERR(root);
13114 trans = btrfs_start_transaction(root, 1);
13116 return PTR_ERR(trans);
13118 btrfs_init_path(&path);
13119 path.lowest_level = btrfs_header_level(eb);
13120 if (path.lowest_level)
13121 btrfs_node_key_to_cpu(eb, &key, 0);
13123 btrfs_item_key_to_cpu(eb, &key, 0);
13125 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13126 btrfs_commit_transaction(trans, root);
13127 btrfs_release_path(&path);
13131 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13133 struct btrfs_path path;
13134 struct btrfs_trans_handle *trans;
13135 struct btrfs_key key;
13138 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13139 bad->key.type, bad->key.offset);
13140 key.objectid = bad->root_id;
13141 key.type = BTRFS_ROOT_ITEM_KEY;
13142 key.offset = (u64)-1;
13144 root = btrfs_read_fs_root(root->fs_info, &key);
13145 if (IS_ERR(root)) {
13146 fprintf(stderr, "Couldn't find owner root %llu\n",
13148 return PTR_ERR(root);
13151 trans = btrfs_start_transaction(root, 1);
13153 return PTR_ERR(trans);
13155 btrfs_init_path(&path);
13156 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13162 ret = btrfs_del_item(trans, root, &path);
13164 btrfs_commit_transaction(trans, root);
13165 btrfs_release_path(&path);
13169 static int zero_log_tree(struct btrfs_root *root)
13171 struct btrfs_trans_handle *trans;
13174 trans = btrfs_start_transaction(root, 1);
13175 if (IS_ERR(trans)) {
13176 ret = PTR_ERR(trans);
13179 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13180 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13181 ret = btrfs_commit_transaction(trans, root);
13185 static int populate_csum(struct btrfs_trans_handle *trans,
13186 struct btrfs_root *csum_root, char *buf, u64 start,
13189 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13194 while (offset < len) {
13195 sectorsize = fs_info->sectorsize;
13196 ret = read_extent_data(fs_info, buf, start + offset,
13200 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13201 start + offset, buf, sectorsize);
13204 offset += sectorsize;
13209 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13210 struct btrfs_root *csum_root,
13211 struct btrfs_root *cur_root)
13213 struct btrfs_path path;
13214 struct btrfs_key key;
13215 struct extent_buffer *node;
13216 struct btrfs_file_extent_item *fi;
13223 buf = malloc(cur_root->fs_info->sectorsize);
13227 btrfs_init_path(&path);
13231 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13234 /* Iterate all regular file extents and fill its csum */
13236 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13238 if (key.type != BTRFS_EXTENT_DATA_KEY)
13240 node = path.nodes[0];
13241 slot = path.slots[0];
13242 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13243 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13245 start = btrfs_file_extent_disk_bytenr(node, fi);
13246 len = btrfs_file_extent_disk_num_bytes(node, fi);
13248 ret = populate_csum(trans, csum_root, buf, start, len);
13249 if (ret == -EEXIST)
13255 * TODO: if next leaf is corrupted, jump to nearest next valid
13258 ret = btrfs_next_item(cur_root, &path);
13268 btrfs_release_path(&path);
13273 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13274 struct btrfs_root *csum_root)
13276 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13277 struct btrfs_path path;
13278 struct btrfs_root *tree_root = fs_info->tree_root;
13279 struct btrfs_root *cur_root;
13280 struct extent_buffer *node;
13281 struct btrfs_key key;
13285 btrfs_init_path(&path);
13286 key.objectid = BTRFS_FS_TREE_OBJECTID;
13288 key.type = BTRFS_ROOT_ITEM_KEY;
13289 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13298 node = path.nodes[0];
13299 slot = path.slots[0];
13300 btrfs_item_key_to_cpu(node, &key, slot);
13301 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13303 if (key.type != BTRFS_ROOT_ITEM_KEY)
13305 if (!is_fstree(key.objectid))
13307 key.offset = (u64)-1;
13309 cur_root = btrfs_read_fs_root(fs_info, &key);
13310 if (IS_ERR(cur_root) || !cur_root) {
13311 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13315 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13320 ret = btrfs_next_item(tree_root, &path);
13330 btrfs_release_path(&path);
13334 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13335 struct btrfs_root *csum_root)
13337 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13338 struct btrfs_path path;
13339 struct btrfs_extent_item *ei;
13340 struct extent_buffer *leaf;
13342 struct btrfs_key key;
13345 btrfs_init_path(&path);
13347 key.type = BTRFS_EXTENT_ITEM_KEY;
13349 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13351 btrfs_release_path(&path);
13355 buf = malloc(csum_root->fs_info->sectorsize);
13357 btrfs_release_path(&path);
13362 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13363 ret = btrfs_next_leaf(extent_root, &path);
13371 leaf = path.nodes[0];
13373 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13374 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13379 ei = btrfs_item_ptr(leaf, path.slots[0],
13380 struct btrfs_extent_item);
13381 if (!(btrfs_extent_flags(leaf, ei) &
13382 BTRFS_EXTENT_FLAG_DATA)) {
13387 ret = populate_csum(trans, csum_root, buf, key.objectid,
13394 btrfs_release_path(&path);
13400 * Recalculate the csum and put it into the csum tree.
13402 * Extent tree init will wipe out all the extent info, so in that case, we
13403 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13404 * will use fs/subvol trees to init the csum tree.
13406 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13407 struct btrfs_root *csum_root,
13408 int search_fs_tree)
13410 if (search_fs_tree)
13411 return fill_csum_tree_from_fs(trans, csum_root);
13413 return fill_csum_tree_from_extent(trans, csum_root);
13416 static void free_roots_info_cache(void)
13418 if (!roots_info_cache)
13421 while (!cache_tree_empty(roots_info_cache)) {
13422 struct cache_extent *entry;
13423 struct root_item_info *rii;
13425 entry = first_cache_extent(roots_info_cache);
13428 remove_cache_extent(roots_info_cache, entry);
13429 rii = container_of(entry, struct root_item_info, cache_extent);
13433 free(roots_info_cache);
13434 roots_info_cache = NULL;
13437 static int build_roots_info_cache(struct btrfs_fs_info *info)
13440 struct btrfs_key key;
13441 struct extent_buffer *leaf;
13442 struct btrfs_path path;
13444 if (!roots_info_cache) {
13445 roots_info_cache = malloc(sizeof(*roots_info_cache));
13446 if (!roots_info_cache)
13448 cache_tree_init(roots_info_cache);
13451 btrfs_init_path(&path);
13453 key.type = BTRFS_EXTENT_ITEM_KEY;
13455 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13458 leaf = path.nodes[0];
13461 struct btrfs_key found_key;
13462 struct btrfs_extent_item *ei;
13463 struct btrfs_extent_inline_ref *iref;
13464 int slot = path.slots[0];
13469 struct cache_extent *entry;
13470 struct root_item_info *rii;
13472 if (slot >= btrfs_header_nritems(leaf)) {
13473 ret = btrfs_next_leaf(info->extent_root, &path);
13480 leaf = path.nodes[0];
13481 slot = path.slots[0];
13484 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13486 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13487 found_key.type != BTRFS_METADATA_ITEM_KEY)
13490 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13491 flags = btrfs_extent_flags(leaf, ei);
13493 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13494 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13497 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13498 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13499 level = found_key.offset;
13501 struct btrfs_tree_block_info *binfo;
13503 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13504 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13505 level = btrfs_tree_block_level(leaf, binfo);
13509 * For a root extent, it must be of the following type and the
13510 * first (and only one) iref in the item.
13512 type = btrfs_extent_inline_ref_type(leaf, iref);
13513 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13516 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13517 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13519 rii = malloc(sizeof(struct root_item_info));
13524 rii->cache_extent.start = root_id;
13525 rii->cache_extent.size = 1;
13526 rii->level = (u8)-1;
13527 entry = &rii->cache_extent;
13528 ret = insert_cache_extent(roots_info_cache, entry);
13531 rii = container_of(entry, struct root_item_info,
13535 ASSERT(rii->cache_extent.start == root_id);
13536 ASSERT(rii->cache_extent.size == 1);
13538 if (level > rii->level || rii->level == (u8)-1) {
13539 rii->level = level;
13540 rii->bytenr = found_key.objectid;
13541 rii->gen = btrfs_extent_generation(leaf, ei);
13542 rii->node_count = 1;
13543 } else if (level == rii->level) {
13551 btrfs_release_path(&path);
13556 static int maybe_repair_root_item(struct btrfs_path *path,
13557 const struct btrfs_key *root_key,
13558 const int read_only_mode)
13560 const u64 root_id = root_key->objectid;
13561 struct cache_extent *entry;
13562 struct root_item_info *rii;
13563 struct btrfs_root_item ri;
13564 unsigned long offset;
13566 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13569 "Error: could not find extent items for root %llu\n",
13570 root_key->objectid);
13574 rii = container_of(entry, struct root_item_info, cache_extent);
13575 ASSERT(rii->cache_extent.start == root_id);
13576 ASSERT(rii->cache_extent.size == 1);
13578 if (rii->node_count != 1) {
13580 "Error: could not find btree root extent for root %llu\n",
13585 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13586 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13588 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13589 btrfs_root_level(&ri) != rii->level ||
13590 btrfs_root_generation(&ri) != rii->gen) {
13593 * If we're in repair mode but our caller told us to not update
13594 * the root item, i.e. just check if it needs to be updated, don't
13595 * print this message, since the caller will call us again shortly
13596 * for the same root item without read only mode (the caller will
13597 * open a transaction first).
13599 if (!(read_only_mode && repair))
13601 "%sroot item for root %llu,"
13602 " current bytenr %llu, current gen %llu, current level %u,"
13603 " new bytenr %llu, new gen %llu, new level %u\n",
13604 (read_only_mode ? "" : "fixing "),
13606 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13607 btrfs_root_level(&ri),
13608 rii->bytenr, rii->gen, rii->level);
13610 if (btrfs_root_generation(&ri) > rii->gen) {
13612 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13613 root_id, btrfs_root_generation(&ri), rii->gen);
13617 if (!read_only_mode) {
13618 btrfs_set_root_bytenr(&ri, rii->bytenr);
13619 btrfs_set_root_level(&ri, rii->level);
13620 btrfs_set_root_generation(&ri, rii->gen);
13621 write_extent_buffer(path->nodes[0], &ri,
13622 offset, sizeof(ri));
13632 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13633 * caused read-only snapshots to be corrupted if they were created at a moment
13634 * when the source subvolume/snapshot had orphan items. The issue was that the
13635 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13636 * node instead of the post orphan cleanup root node.
13637 * So this function, and its callees, just detects and fixes those cases. Even
13638 * though the regression was for read-only snapshots, this function applies to
13639 * any snapshot/subvolume root.
13640 * This must be run before any other repair code - not doing it so, makes other
13641 * repair code delete or modify backrefs in the extent tree for example, which
13642 * will result in an inconsistent fs after repairing the root items.
13644 static int repair_root_items(struct btrfs_fs_info *info)
13646 struct btrfs_path path;
13647 struct btrfs_key key;
13648 struct extent_buffer *leaf;
13649 struct btrfs_trans_handle *trans = NULL;
13652 int need_trans = 0;
13654 btrfs_init_path(&path);
13656 ret = build_roots_info_cache(info);
13660 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13661 key.type = BTRFS_ROOT_ITEM_KEY;
13666 * Avoid opening and committing transactions if a leaf doesn't have
13667 * any root items that need to be fixed, so that we avoid rotating
13668 * backup roots unnecessarily.
13671 trans = btrfs_start_transaction(info->tree_root, 1);
13672 if (IS_ERR(trans)) {
13673 ret = PTR_ERR(trans);
13678 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13682 leaf = path.nodes[0];
13685 struct btrfs_key found_key;
13687 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13688 int no_more_keys = find_next_key(&path, &key);
13690 btrfs_release_path(&path);
13692 ret = btrfs_commit_transaction(trans,
13704 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13706 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13708 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13711 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13715 if (!trans && repair) {
13718 btrfs_release_path(&path);
13728 free_roots_info_cache();
13729 btrfs_release_path(&path);
13731 btrfs_commit_transaction(trans, info->tree_root);
13738 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13740 struct btrfs_trans_handle *trans;
13741 struct btrfs_block_group_cache *bg_cache;
13745 /* Clear all free space cache inodes and its extent data */
13747 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13750 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13753 current = bg_cache->key.objectid + bg_cache->key.offset;
13756 /* Don't forget to set cache_generation to -1 */
13757 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13758 if (IS_ERR(trans)) {
13759 error("failed to update super block cache generation");
13760 return PTR_ERR(trans);
13762 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13763 btrfs_commit_transaction(trans, fs_info->tree_root);
13768 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13773 if (clear_version == 1) {
13774 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13776 "free space cache v2 detected, use --clear-space-cache v2");
13780 printf("Clearing free space cache\n");
13781 ret = clear_free_space_cache(fs_info);
13783 error("failed to clear free space cache");
13786 printf("Free space cache cleared\n");
13788 } else if (clear_version == 2) {
13789 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13790 printf("no free space cache v2 to clear\n");
13794 printf("Clear free space cache v2\n");
13795 ret = btrfs_clear_free_space_tree(fs_info);
13797 error("failed to clear free space cache v2: %d", ret);
13800 printf("free space cache v2 cleared\n");
13807 const char * const cmd_check_usage[] = {
13808 "btrfs check [options] <device>",
13809 "Check structural integrity of a filesystem (unmounted).",
13810 "Check structural integrity of an unmounted filesystem. Verify internal",
13811 "trees' consistency and item connectivity. In the repair mode try to",
13812 "fix the problems found. ",
13813 "WARNING: the repair mode is considered dangerous",
13815 "-s|--super <superblock> use this superblock copy",
13816 "-b|--backup use the first valid backup root copy",
13817 "--force skip mount checks, repair is not possible",
13818 "--repair try to repair the filesystem",
13819 "--readonly run in read-only mode (default)",
13820 "--init-csum-tree create a new CRC tree",
13821 "--init-extent-tree create a new extent tree",
13822 "--mode <MODE> allows choice of memory/IO trade-offs",
13823 " where MODE is one of:",
13824 " original - read inodes and extents to memory (requires",
13825 " more memory, does less IO)",
13826 " lowmem - try to use less memory but read blocks again",
13828 "--check-data-csum verify checksums of data blocks",
13829 "-Q|--qgroup-report print a report on qgroup consistency",
13830 "-E|--subvol-extents <subvolid>",
13831 " print subvolume extents and sharing state",
13832 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13833 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13834 "-p|--progress indicate progress",
13835 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13839 int cmd_check(int argc, char **argv)
13841 struct cache_tree root_cache;
13842 struct btrfs_root *root;
13843 struct btrfs_fs_info *info;
13846 u64 tree_root_bytenr = 0;
13847 u64 chunk_root_bytenr = 0;
13848 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13852 int init_csum_tree = 0;
13854 int clear_space_cache = 0;
13855 int qgroup_report = 0;
13856 int qgroups_repaired = 0;
13857 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13862 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13863 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13864 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13865 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13866 GETOPT_VAL_FORCE };
13867 static const struct option long_options[] = {
13868 { "super", required_argument, NULL, 's' },
13869 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13870 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13871 { "init-csum-tree", no_argument, NULL,
13872 GETOPT_VAL_INIT_CSUM },
13873 { "init-extent-tree", no_argument, NULL,
13874 GETOPT_VAL_INIT_EXTENT },
13875 { "check-data-csum", no_argument, NULL,
13876 GETOPT_VAL_CHECK_CSUM },
13877 { "backup", no_argument, NULL, 'b' },
13878 { "subvol-extents", required_argument, NULL, 'E' },
13879 { "qgroup-report", no_argument, NULL, 'Q' },
13880 { "tree-root", required_argument, NULL, 'r' },
13881 { "chunk-root", required_argument, NULL,
13882 GETOPT_VAL_CHUNK_TREE },
13883 { "progress", no_argument, NULL, 'p' },
13884 { "mode", required_argument, NULL,
13886 { "clear-space-cache", required_argument, NULL,
13887 GETOPT_VAL_CLEAR_SPACE_CACHE},
13888 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13889 { NULL, 0, NULL, 0}
13892 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13896 case 'a': /* ignored */ break;
13898 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13901 num = arg_strtou64(optarg);
13902 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13904 "super mirror should be less than %d",
13905 BTRFS_SUPER_MIRROR_MAX);
13908 bytenr = btrfs_sb_offset(((int)num));
13909 printf("using SB copy %llu, bytenr %llu\n", num,
13910 (unsigned long long)bytenr);
13916 subvolid = arg_strtou64(optarg);
13919 tree_root_bytenr = arg_strtou64(optarg);
13921 case GETOPT_VAL_CHUNK_TREE:
13922 chunk_root_bytenr = arg_strtou64(optarg);
13925 ctx.progress_enabled = true;
13929 usage(cmd_check_usage);
13930 case GETOPT_VAL_REPAIR:
13931 printf("enabling repair mode\n");
13933 ctree_flags |= OPEN_CTREE_WRITES;
13935 case GETOPT_VAL_READONLY:
13938 case GETOPT_VAL_INIT_CSUM:
13939 printf("Creating a new CRC tree\n");
13940 init_csum_tree = 1;
13942 ctree_flags |= OPEN_CTREE_WRITES;
13944 case GETOPT_VAL_INIT_EXTENT:
13945 init_extent_tree = 1;
13946 ctree_flags |= (OPEN_CTREE_WRITES |
13947 OPEN_CTREE_NO_BLOCK_GROUPS);
13950 case GETOPT_VAL_CHECK_CSUM:
13951 check_data_csum = 1;
13953 case GETOPT_VAL_MODE:
13954 check_mode = parse_check_mode(optarg);
13955 if (check_mode == CHECK_MODE_UNKNOWN) {
13956 error("unknown mode: %s", optarg);
13960 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13961 if (strcmp(optarg, "v1") == 0) {
13962 clear_space_cache = 1;
13963 } else if (strcmp(optarg, "v2") == 0) {
13964 clear_space_cache = 2;
13965 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13968 "invalid argument to --clear-space-cache, must be v1 or v2");
13971 ctree_flags |= OPEN_CTREE_WRITES;
13973 case GETOPT_VAL_FORCE:
13979 if (check_argc_exact(argc - optind, 1))
13980 usage(cmd_check_usage);
13982 if (ctx.progress_enabled) {
13983 ctx.tp = TASK_NOTHING;
13984 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13987 /* This check is the only reason for --readonly to exist */
13988 if (readonly && repair) {
13989 error("repair options are not compatible with --readonly");
13994 * experimental and dangerous
13996 if (repair && check_mode == CHECK_MODE_LOWMEM)
13997 warning("low-memory mode repair support is only partial");
14000 cache_tree_init(&root_cache);
14002 ret = check_mounted(argv[optind]);
14005 error("could not check mount status: %s",
14011 "%s is currently mounted, use --force if you really intend to check the filesystem",
14019 error("repair and --force is not yet supported");
14026 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14030 "filesystem mounted, continuing because of --force");
14032 /* A block device is mounted in exclusive mode by kernel */
14033 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14036 /* only allow partial opening under repair mode */
14038 ctree_flags |= OPEN_CTREE_PARTIAL;
14040 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14041 chunk_root_bytenr, ctree_flags);
14043 error("cannot open file system");
14049 global_info = info;
14050 root = info->fs_root;
14051 uuid_unparse(info->super_copy->fsid, uuidbuf);
14053 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14056 * Check the bare minimum before starting anything else that could rely
14057 * on it, namely the tree roots, any local consistency checks
14059 if (!extent_buffer_uptodate(info->tree_root->node) ||
14060 !extent_buffer_uptodate(info->dev_root->node) ||
14061 !extent_buffer_uptodate(info->chunk_root->node)) {
14062 error("critical roots corrupted, unable to check the filesystem");
14068 if (clear_space_cache) {
14069 ret = do_clear_free_space_cache(info, clear_space_cache);
14075 * repair mode will force us to commit transaction which
14076 * will make us fail to load log tree when mounting.
14078 if (repair && btrfs_super_log_root(info->super_copy)) {
14079 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14085 ret = zero_log_tree(root);
14088 error("failed to zero log tree: %d", ret);
14093 if (qgroup_report) {
14094 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14096 ret = qgroup_verify_all(info);
14103 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14104 subvolid, argv[optind], uuidbuf);
14105 ret = print_extent_state(info, subvolid);
14110 if (init_extent_tree || init_csum_tree) {
14111 struct btrfs_trans_handle *trans;
14113 trans = btrfs_start_transaction(info->extent_root, 0);
14114 if (IS_ERR(trans)) {
14115 error("error starting transaction");
14116 ret = PTR_ERR(trans);
14121 if (init_extent_tree) {
14122 printf("Creating a new extent tree\n");
14123 ret = reinit_extent_tree(trans, info);
14129 if (init_csum_tree) {
14130 printf("Reinitialize checksum tree\n");
14131 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14133 error("checksum tree initialization failed: %d",
14140 ret = fill_csum_tree(trans, info->csum_root,
14144 error("checksum tree refilling failed: %d", ret);
14149 * Ok now we commit and run the normal fsck, which will add
14150 * extent entries for all of the items it finds.
14152 ret = btrfs_commit_transaction(trans, info->extent_root);
14157 if (!extent_buffer_uptodate(info->extent_root->node)) {
14158 error("critical: extent_root, unable to check the filesystem");
14163 if (!extent_buffer_uptodate(info->csum_root->node)) {
14164 error("critical: csum_root, unable to check the filesystem");
14170 ret = do_check_chunks_and_extents(info);
14174 "errors found in extent allocation tree or chunk allocation");
14176 ret = repair_root_items(info);
14179 error("failed to repair root items: %s", strerror(-ret));
14183 fprintf(stderr, "Fixed %d roots.\n", ret);
14185 } else if (ret > 0) {
14187 "Found %d roots with an outdated root item.\n",
14190 "Please run a filesystem check with the option --repair to fix them.\n");
14196 if (!ctx.progress_enabled) {
14197 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14198 fprintf(stderr, "checking free space tree\n");
14200 fprintf(stderr, "checking free space cache\n");
14202 ret = check_space_cache(root);
14205 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14206 error("errors found in free space tree");
14208 error("errors found in free space cache");
14213 * We used to have to have these hole extents in between our real
14214 * extents so if we don't have this flag set we need to make sure there
14215 * are no gaps in the file extents for inodes, otherwise we can just
14216 * ignore it when this happens.
14218 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14219 ret = do_check_fs_roots(info, &root_cache);
14222 error("errors found in fs roots");
14226 fprintf(stderr, "checking csums\n");
14227 ret = check_csums(root);
14230 error("errors found in csum tree");
14234 fprintf(stderr, "checking root refs\n");
14235 /* For low memory mode, check_fs_roots_v2 handles root refs */
14236 if (check_mode != CHECK_MODE_LOWMEM) {
14237 ret = check_root_refs(root, &root_cache);
14240 error("errors found in root refs");
14245 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14246 struct extent_buffer *eb;
14248 eb = list_first_entry(&root->fs_info->recow_ebs,
14249 struct extent_buffer, recow);
14250 list_del_init(&eb->recow);
14251 ret = recow_extent_buffer(root, eb);
14254 error("fails to fix transid errors");
14259 while (!list_empty(&delete_items)) {
14260 struct bad_item *bad;
14262 bad = list_first_entry(&delete_items, struct bad_item, list);
14263 list_del_init(&bad->list);
14265 ret = delete_bad_item(root, bad);
14271 if (info->quota_enabled) {
14272 fprintf(stderr, "checking quota groups\n");
14273 ret = qgroup_verify_all(info);
14276 error("failed to check quota groups");
14280 ret = repair_qgroups(info, &qgroups_repaired);
14283 error("failed to repair quota groups");
14289 if (!list_empty(&root->fs_info->recow_ebs)) {
14290 error("transid errors in file system");
14295 printf("found %llu bytes used, ",
14296 (unsigned long long)bytes_used);
14298 printf("error(s) found\n");
14300 printf("no error found\n");
14301 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14302 printf("total tree bytes: %llu\n",
14303 (unsigned long long)total_btree_bytes);
14304 printf("total fs tree bytes: %llu\n",
14305 (unsigned long long)total_fs_tree_bytes);
14306 printf("total extent tree bytes: %llu\n",
14307 (unsigned long long)total_extent_tree_bytes);
14308 printf("btree space waste bytes: %llu\n",
14309 (unsigned long long)btree_space_waste);
14310 printf("file data blocks allocated: %llu\n referenced %llu\n",
14311 (unsigned long long)data_bytes_allocated,
14312 (unsigned long long)data_bytes_referenced);
14314 free_qgroup_counts();
14315 free_root_recs_tree(&root_cache);
14319 if (ctx.progress_enabled)
14320 task_deinit(ctx.info);