2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 return container_of(back, struct data_backref, node);
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146 struct data_backref *back1 = to_data_backref(ext1);
147 struct data_backref *back2 = to_data_backref(ext2);
149 WARN_ON(!ext1->is_data);
150 WARN_ON(!ext2->is_data);
152 /* parent and root are a union, so this covers both */
153 if (back1->parent > back2->parent)
155 if (back1->parent < back2->parent)
158 /* This is a full backref and the parents match. */
159 if (back1->node.full_backref)
162 if (back1->owner > back2->owner)
164 if (back1->owner < back2->owner)
167 if (back1->offset > back2->offset)
169 if (back1->offset < back2->offset)
172 if (back1->found_ref && back2->found_ref) {
173 if (back1->disk_bytenr > back2->disk_bytenr)
175 if (back1->disk_bytenr < back2->disk_bytenr)
178 if (back1->bytes > back2->bytes)
180 if (back1->bytes < back2->bytes)
188 * Much like data_backref, just removed the undetermined members
189 * and change it to use list_head.
190 * During extent scan, it is stored in root->orphan_data_extent.
191 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193 struct orphan_data_extent {
194 struct list_head list;
202 struct tree_backref {
203 struct extent_backref node;
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 return container_of(back, struct tree_backref, node);
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219 struct tree_backref *back1 = to_tree_backref(ext1);
220 struct tree_backref *back2 = to_tree_backref(ext2);
222 WARN_ON(ext1->is_data);
223 WARN_ON(ext2->is_data);
225 /* parent and root are a union, so this covers both */
226 if (back1->parent > back2->parent)
228 if (back1->parent < back2->parent)
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239 if (ext1->is_data > ext2->is_data)
242 if (ext1->is_data < ext2->is_data)
245 if (ext1->full_backref > ext2->full_backref)
247 if (ext1->full_backref < ext2->full_backref)
251 return compare_data_backref(node1, node2);
253 return compare_tree_backref(node1, node2);
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
259 struct extent_record {
260 struct list_head backrefs;
261 struct list_head dups;
262 struct rb_root backref_tree;
263 struct list_head list;
264 struct cache_extent cache;
265 struct btrfs_disk_key parent_key;
270 u64 extent_item_refs;
272 u64 parent_generation;
276 unsigned int flag_block_full_backref:2;
277 unsigned int found_rec:1;
278 unsigned int content_checked:1;
279 unsigned int owner_ref_checked:1;
280 unsigned int is_root:1;
281 unsigned int metadata:1;
282 unsigned int bad_full_backref:1;
283 unsigned int crossing_stripes:1;
284 unsigned int wrong_chunk_type:1;
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 return container_of(entry, struct extent_record, list);
292 struct inode_backref {
293 struct list_head list;
294 unsigned int found_dir_item:1;
295 unsigned int found_dir_index:1;
296 unsigned int found_inode_ref:1;
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 return list_entry(entry, struct inode_backref, list);
311 struct root_item_record {
312 struct list_head list;
318 struct btrfs_key drop_key;
321 #define REF_ERR_NO_DIR_ITEM (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX (1 << 1)
323 #define REF_ERR_NO_INODE_REF (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
326 #define REF_ERR_DUP_INODE_REF (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
335 struct file_extent_hole {
341 struct inode_record {
342 struct list_head backrefs;
343 unsigned int checked:1;
344 unsigned int merging:1;
345 unsigned int found_inode_item:1;
346 unsigned int found_dir_item:1;
347 unsigned int found_file_extent:1;
348 unsigned int found_csum_item:1;
349 unsigned int some_csum_missing:1;
350 unsigned int nodatasum:1;
363 struct rb_root holes;
364 struct list_head orphan_extents;
369 #define I_ERR_NO_INODE_ITEM (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
385 struct root_backref {
386 struct list_head list;
387 unsigned int found_dir_item:1;
388 unsigned int found_dir_index:1;
389 unsigned int found_back_ref:1;
390 unsigned int found_forward_ref:1;
391 unsigned int reachable:1;
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 return list_entry(entry, struct root_backref, list);
406 struct list_head backrefs;
407 struct cache_extent cache;
408 unsigned int found_root_item:1;
414 struct cache_extent cache;
419 struct cache_extent cache;
420 struct cache_tree root_cache;
421 struct cache_tree inode_cache;
422 struct inode_record *current;
431 struct walk_control {
432 struct cache_tree shared;
433 struct shared_node *nodes[BTRFS_MAX_LEVEL];
439 struct btrfs_key key;
441 struct list_head list;
444 struct extent_entry {
449 struct list_head list;
452 struct root_item_info {
453 /* level of the root */
455 /* number of nodes at this level, must be 1 for a root */
459 struct cache_extent cache_extent;
463 * Error bit for low memory mode check.
465 * Currently no caller cares about it yet. Just internal use for error
468 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH (1 << 8)
479 static void *print_status_check(void *p)
481 struct task_ctx *priv = p;
482 const char work_indicator[] = { '.', 'o', 'O', 'o' };
484 static char *task_position_string[] = {
486 "checking free space cache",
490 task_period_start(priv->info, 1000 /* 1s */);
492 if (priv->tp == TASK_NOTHING)
496 printf("%s [%c]\r", task_position_string[priv->tp],
497 work_indicator[count % 4]);
500 task_period_wait(priv->info);
505 static int print_status_return(void *p)
513 static enum btrfs_check_mode parse_check_mode(const char *str)
515 if (strcmp(str, "lowmem") == 0)
516 return CHECK_MODE_LOWMEM;
517 if (strcmp(str, "orig") == 0)
518 return CHECK_MODE_ORIGINAL;
519 if (strcmp(str, "original") == 0)
520 return CHECK_MODE_ORIGINAL;
522 return CHECK_MODE_UNKNOWN;
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
528 struct file_extent_hole *hole;
530 if (RB_EMPTY_ROOT(holes))
533 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 struct file_extent_hole *hole1;
540 struct file_extent_hole *hole2;
542 hole1 = rb_entry(node1, struct file_extent_hole, node);
543 hole2 = rb_entry(node2, struct file_extent_hole, node);
545 if (hole1->start > hole2->start)
547 if (hole1->start < hole2->start)
549 /* Now hole1->start == hole2->start */
550 if (hole1->len >= hole2->len)
552 * Hole 1 will be merge center
553 * Same hole will be merged later
556 /* Hole 2 will be merge center */
561 * Add a hole to the record
563 * This will do hole merge for copy_file_extent_holes(),
564 * which will ensure there won't be continuous holes.
566 static int add_file_extent_hole(struct rb_root *holes,
569 struct file_extent_hole *hole;
570 struct file_extent_hole *prev = NULL;
571 struct file_extent_hole *next = NULL;
573 hole = malloc(sizeof(*hole));
578 /* Since compare will not return 0, no -EEXIST will happen */
579 rb_insert(holes, &hole->node, compare_hole);
581 /* simple merge with previous hole */
582 if (rb_prev(&hole->node))
583 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585 if (prev && prev->start + prev->len >= hole->start) {
586 hole->len = hole->start + hole->len - prev->start;
587 hole->start = prev->start;
588 rb_erase(&prev->node, holes);
593 /* iterate merge with next holes */
595 if (!rb_next(&hole->node))
597 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599 if (hole->start + hole->len >= next->start) {
600 if (hole->start + hole->len <= next->start + next->len)
601 hole->len = next->start + next->len -
603 rb_erase(&next->node, holes);
612 static int compare_hole_range(struct rb_node *node, void *data)
614 struct file_extent_hole *hole;
617 hole = (struct file_extent_hole *)data;
620 hole = rb_entry(node, struct file_extent_hole, node);
621 if (start < hole->start)
623 if (start >= hole->start && start < hole->start + hole->len)
629 * Delete a hole in the record
631 * This will do the hole split and is much restrict than add.
633 static int del_file_extent_hole(struct rb_root *holes,
636 struct file_extent_hole *hole;
637 struct file_extent_hole tmp;
642 struct rb_node *node;
649 node = rb_search(holes, &tmp, compare_hole_range, NULL);
652 hole = rb_entry(node, struct file_extent_hole, node);
653 if (start + len > hole->start + hole->len)
657 * Now there will be no overlap, delete the hole and re-add the
658 * split(s) if they exists.
660 if (start > hole->start) {
661 prev_start = hole->start;
662 prev_len = start - hole->start;
665 if (hole->start + hole->len > start + len) {
666 next_start = start + len;
667 next_len = hole->start + hole->len - start - len;
670 rb_erase(node, holes);
673 ret = add_file_extent_hole(holes, prev_start, prev_len);
678 ret = add_file_extent_hole(holes, next_start, next_len);
685 static int copy_file_extent_holes(struct rb_root *dst,
688 struct file_extent_hole *hole;
689 struct rb_node *node;
692 node = rb_first(src);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 ret = add_file_extent_hole(dst, hole->start, hole->len);
698 node = rb_next(node);
703 static void free_file_extent_holes(struct rb_root *holes)
705 struct rb_node *node;
706 struct file_extent_hole *hole;
708 node = rb_first(holes);
710 hole = rb_entry(node, struct file_extent_hole, node);
711 rb_erase(node, holes);
713 node = rb_first(holes);
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720 struct btrfs_root *root)
722 if (root->last_trans != trans->transid) {
723 root->track_dirty = 1;
724 root->last_trans = trans->transid;
725 root->commit_root = root->node;
726 extent_buffer_get(root->node);
730 static u8 imode_to_type(u32 imode)
733 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
735 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
736 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
737 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
738 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
739 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
740 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
743 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 struct device_record *rec1;
750 struct device_record *rec2;
752 rec1 = rb_entry(node1, struct device_record, node);
753 rec2 = rb_entry(node2, struct device_record, node);
754 if (rec1->devid > rec2->devid)
756 else if (rec1->devid < rec2->devid)
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 struct inode_record *rec;
765 struct inode_backref *backref;
766 struct inode_backref *orig;
767 struct inode_backref *tmp;
768 struct orphan_data_extent *src_orphan;
769 struct orphan_data_extent *dst_orphan;
774 rec = malloc(sizeof(*rec));
776 return ERR_PTR(-ENOMEM);
777 memcpy(rec, orig_rec, sizeof(*rec));
779 INIT_LIST_HEAD(&rec->backrefs);
780 INIT_LIST_HEAD(&rec->orphan_extents);
781 rec->holes = RB_ROOT;
783 list_for_each_entry(orig, &orig_rec->backrefs, list) {
784 size = sizeof(*orig) + orig->namelen + 1;
785 backref = malloc(size);
790 memcpy(backref, orig, size);
791 list_add_tail(&backref->list, &rec->backrefs);
793 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794 dst_orphan = malloc(sizeof(*dst_orphan));
799 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
809 rb = rb_first(&rec->holes);
811 struct file_extent_hole *hole;
813 hole = rb_entry(rb, struct file_extent_hole, node);
819 if (!list_empty(&rec->backrefs))
820 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821 list_del(&orig->list);
825 if (!list_empty(&rec->orphan_extents))
826 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827 list_del(&orig->list);
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
839 struct orphan_data_extent *orphan;
841 if (list_empty(orphan_extents))
843 printf("The following data extent is lost in tree %llu:\n",
845 list_for_each_entry(orphan, orphan_extents, list) {
846 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847 orphan->objectid, orphan->offset, orphan->disk_bytenr,
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 u64 root_objectid = root->root_key.objectid;
855 int errors = rec->errors;
859 /* reloc root errors, we print its corresponding fs root objectid*/
860 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861 root_objectid = root->root_key.offset;
862 fprintf(stderr, "reloc");
864 fprintf(stderr, "root %llu inode %llu errors %x",
865 (unsigned long long) root_objectid,
866 (unsigned long long) rec->ino, rec->errors);
868 if (errors & I_ERR_NO_INODE_ITEM)
869 fprintf(stderr, ", no inode item");
870 if (errors & I_ERR_NO_ORPHAN_ITEM)
871 fprintf(stderr, ", no orphan item");
872 if (errors & I_ERR_DUP_INODE_ITEM)
873 fprintf(stderr, ", dup inode item");
874 if (errors & I_ERR_DUP_DIR_INDEX)
875 fprintf(stderr, ", dup dir index");
876 if (errors & I_ERR_ODD_DIR_ITEM)
877 fprintf(stderr, ", odd dir item");
878 if (errors & I_ERR_ODD_FILE_EXTENT)
879 fprintf(stderr, ", odd file extent");
880 if (errors & I_ERR_BAD_FILE_EXTENT)
881 fprintf(stderr, ", bad file extent");
882 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883 fprintf(stderr, ", file extent overlap");
884 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885 fprintf(stderr, ", file extent discount");
886 if (errors & I_ERR_DIR_ISIZE_WRONG)
887 fprintf(stderr, ", dir isize wrong");
888 if (errors & I_ERR_FILE_NBYTES_WRONG)
889 fprintf(stderr, ", nbytes wrong");
890 if (errors & I_ERR_ODD_CSUM_ITEM)
891 fprintf(stderr, ", odd csum item");
892 if (errors & I_ERR_SOME_CSUM_MISSING)
893 fprintf(stderr, ", some csum missing");
894 if (errors & I_ERR_LINK_COUNT_WRONG)
895 fprintf(stderr, ", link count wrong");
896 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897 fprintf(stderr, ", orphan file extent");
898 fprintf(stderr, "\n");
899 /* Print the orphan extents if needed */
900 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903 /* Print the holes if needed */
904 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905 struct file_extent_hole *hole;
906 struct rb_node *node;
909 node = rb_first(&rec->holes);
910 fprintf(stderr, "Found file extent holes:\n");
913 hole = rb_entry(node, struct file_extent_hole, node);
914 fprintf(stderr, "\tstart: %llu, len: %llu\n",
915 hole->start, hole->len);
916 node = rb_next(node);
919 fprintf(stderr, "\tstart: 0, len: %llu\n",
921 root->fs_info->sectorsize));
925 static void print_ref_error(int errors)
927 if (errors & REF_ERR_NO_DIR_ITEM)
928 fprintf(stderr, ", no dir item");
929 if (errors & REF_ERR_NO_DIR_INDEX)
930 fprintf(stderr, ", no dir index");
931 if (errors & REF_ERR_NO_INODE_REF)
932 fprintf(stderr, ", no inode ref");
933 if (errors & REF_ERR_DUP_DIR_ITEM)
934 fprintf(stderr, ", dup dir item");
935 if (errors & REF_ERR_DUP_DIR_INDEX)
936 fprintf(stderr, ", dup dir index");
937 if (errors & REF_ERR_DUP_INODE_REF)
938 fprintf(stderr, ", dup inode ref");
939 if (errors & REF_ERR_INDEX_UNMATCH)
940 fprintf(stderr, ", index mismatch");
941 if (errors & REF_ERR_FILETYPE_UNMATCH)
942 fprintf(stderr, ", filetype mismatch");
943 if (errors & REF_ERR_NAME_TOO_LONG)
944 fprintf(stderr, ", name too long");
945 if (errors & REF_ERR_NO_ROOT_REF)
946 fprintf(stderr, ", no root ref");
947 if (errors & REF_ERR_NO_ROOT_BACKREF)
948 fprintf(stderr, ", no root backref");
949 if (errors & REF_ERR_DUP_ROOT_REF)
950 fprintf(stderr, ", dup root ref");
951 if (errors & REF_ERR_DUP_ROOT_BACKREF)
952 fprintf(stderr, ", dup root backref");
953 fprintf(stderr, "\n");
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
959 struct ptr_node *node;
960 struct cache_extent *cache;
961 struct inode_record *rec = NULL;
964 cache = lookup_cache_extent(inode_cache, ino, 1);
966 node = container_of(cache, struct ptr_node, cache);
968 if (mod && rec->refs > 1) {
969 node->data = clone_inode_rec(rec);
970 if (IS_ERR(node->data))
976 rec = calloc(1, sizeof(*rec));
978 return ERR_PTR(-ENOMEM);
980 rec->extent_start = (u64)-1;
982 INIT_LIST_HEAD(&rec->backrefs);
983 INIT_LIST_HEAD(&rec->orphan_extents);
984 rec->holes = RB_ROOT;
986 node = malloc(sizeof(*node));
989 return ERR_PTR(-ENOMEM);
991 node->cache.start = ino;
992 node->cache.size = 1;
995 if (ino == BTRFS_FREE_INO_OBJECTID)
998 ret = insert_cache_extent(inode_cache, &node->cache);
1000 return ERR_PTR(-EEXIST);
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 struct orphan_data_extent *orphan;
1009 while (!list_empty(orphan_extents)) {
1010 orphan = list_entry(orphan_extents->next,
1011 struct orphan_data_extent, list);
1012 list_del(&orphan->list);
1017 static void free_inode_rec(struct inode_record *rec)
1019 struct inode_backref *backref;
1021 if (--rec->refs > 0)
1024 while (!list_empty(&rec->backrefs)) {
1025 backref = to_inode_backref(rec->backrefs.next);
1026 list_del(&backref->list);
1029 free_orphan_data_extents(&rec->orphan_extents);
1030 free_file_extent_holes(&rec->holes);
1034 static int can_free_inode_rec(struct inode_record *rec)
1036 if (!rec->errors && rec->checked && rec->found_inode_item &&
1037 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043 struct inode_record *rec)
1045 struct cache_extent *cache;
1046 struct inode_backref *tmp, *backref;
1047 struct ptr_node *node;
1050 if (!rec->found_inode_item)
1053 filetype = imode_to_type(rec->imode);
1054 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055 if (backref->found_dir_item && backref->found_dir_index) {
1056 if (backref->filetype != filetype)
1057 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058 if (!backref->errors && backref->found_inode_ref &&
1059 rec->nlink == rec->found_link) {
1060 list_del(&backref->list);
1066 if (!rec->checked || rec->merging)
1069 if (S_ISDIR(rec->imode)) {
1070 if (rec->found_size != rec->isize)
1071 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072 if (rec->found_file_extent)
1073 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075 if (rec->found_dir_item)
1076 rec->errors |= I_ERR_ODD_DIR_ITEM;
1077 if (rec->found_size != rec->nbytes)
1078 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079 if (rec->nlink > 0 && !no_holes &&
1080 (rec->extent_end < rec->isize ||
1081 first_extent_gap(&rec->holes) < rec->isize))
1082 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1085 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086 if (rec->found_csum_item && rec->nodatasum)
1087 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088 if (rec->some_csum_missing && !rec->nodatasum)
1089 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1092 BUG_ON(rec->refs != 1);
1093 if (can_free_inode_rec(rec)) {
1094 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095 node = container_of(cache, struct ptr_node, cache);
1096 BUG_ON(node->data != rec);
1097 remove_cache_extent(inode_cache, &node->cache);
1099 free_inode_rec(rec);
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 struct btrfs_path path;
1106 struct btrfs_key key;
1109 key.objectid = BTRFS_ORPHAN_OBJECTID;
1110 key.type = BTRFS_ORPHAN_ITEM_KEY;
1113 btrfs_init_path(&path);
1114 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115 btrfs_release_path(&path);
1121 static int process_inode_item(struct extent_buffer *eb,
1122 int slot, struct btrfs_key *key,
1123 struct shared_node *active_node)
1125 struct inode_record *rec;
1126 struct btrfs_inode_item *item;
1128 rec = active_node->current;
1129 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130 if (rec->found_inode_item) {
1131 rec->errors |= I_ERR_DUP_INODE_ITEM;
1134 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135 rec->nlink = btrfs_inode_nlink(eb, item);
1136 rec->isize = btrfs_inode_size(eb, item);
1137 rec->nbytes = btrfs_inode_nbytes(eb, item);
1138 rec->imode = btrfs_inode_mode(eb, item);
1139 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141 rec->found_inode_item = 1;
1142 if (rec->nlink == 0)
1143 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144 maybe_free_inode_rec(&active_node->inode_cache, rec);
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150 int namelen, u64 dir)
1152 struct inode_backref *backref;
1154 list_for_each_entry(backref, &rec->backrefs, list) {
1155 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157 if (backref->dir != dir || backref->namelen != namelen)
1159 if (memcmp(name, backref->name, namelen))
1164 backref = malloc(sizeof(*backref) + namelen + 1);
1167 memset(backref, 0, sizeof(*backref));
1169 backref->namelen = namelen;
1170 memcpy(backref->name, name, namelen);
1171 backref->name[namelen] = '\0';
1172 list_add_tail(&backref->list, &rec->backrefs);
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177 u64 ino, u64 dir, u64 index,
1178 const char *name, int namelen,
1179 u8 filetype, u8 itemtype, int errors)
1181 struct inode_record *rec;
1182 struct inode_backref *backref;
1184 rec = get_inode_rec(inode_cache, ino, 1);
1185 BUG_ON(IS_ERR(rec));
1186 backref = get_inode_backref(rec, name, namelen, dir);
1189 backref->errors |= errors;
1190 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191 if (backref->found_dir_index)
1192 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193 if (backref->found_inode_ref && backref->index != index)
1194 backref->errors |= REF_ERR_INDEX_UNMATCH;
1195 if (backref->found_dir_item && backref->filetype != filetype)
1196 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198 backref->index = index;
1199 backref->filetype = filetype;
1200 backref->found_dir_index = 1;
1201 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203 if (backref->found_dir_item)
1204 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205 if (backref->found_dir_index && backref->filetype != filetype)
1206 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208 backref->filetype = filetype;
1209 backref->found_dir_item = 1;
1210 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212 if (backref->found_inode_ref)
1213 backref->errors |= REF_ERR_DUP_INODE_REF;
1214 if (backref->found_dir_index && backref->index != index)
1215 backref->errors |= REF_ERR_INDEX_UNMATCH;
1217 backref->index = index;
1219 backref->ref_type = itemtype;
1220 backref->found_inode_ref = 1;
1225 maybe_free_inode_rec(inode_cache, rec);
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230 struct cache_tree *dst_cache)
1232 struct inode_backref *backref;
1237 list_for_each_entry(backref, &src->backrefs, list) {
1238 if (backref->found_dir_index) {
1239 add_inode_backref(dst_cache, dst->ino, backref->dir,
1240 backref->index, backref->name,
1241 backref->namelen, backref->filetype,
1242 BTRFS_DIR_INDEX_KEY, backref->errors);
1244 if (backref->found_dir_item) {
1246 add_inode_backref(dst_cache, dst->ino,
1247 backref->dir, 0, backref->name,
1248 backref->namelen, backref->filetype,
1249 BTRFS_DIR_ITEM_KEY, backref->errors);
1251 if (backref->found_inode_ref) {
1252 add_inode_backref(dst_cache, dst->ino,
1253 backref->dir, backref->index,
1254 backref->name, backref->namelen, 0,
1255 backref->ref_type, backref->errors);
1259 if (src->found_dir_item)
1260 dst->found_dir_item = 1;
1261 if (src->found_file_extent)
1262 dst->found_file_extent = 1;
1263 if (src->found_csum_item)
1264 dst->found_csum_item = 1;
1265 if (src->some_csum_missing)
1266 dst->some_csum_missing = 1;
1267 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1273 BUG_ON(src->found_link < dir_count);
1274 dst->found_link += src->found_link - dir_count;
1275 dst->found_size += src->found_size;
1276 if (src->extent_start != (u64)-1) {
1277 if (dst->extent_start == (u64)-1) {
1278 dst->extent_start = src->extent_start;
1279 dst->extent_end = src->extent_end;
1281 if (dst->extent_end > src->extent_start)
1282 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283 else if (dst->extent_end < src->extent_start) {
1284 ret = add_file_extent_hole(&dst->holes,
1286 src->extent_start - dst->extent_end);
1288 if (dst->extent_end < src->extent_end)
1289 dst->extent_end = src->extent_end;
1293 dst->errors |= src->errors;
1294 if (src->found_inode_item) {
1295 if (!dst->found_inode_item) {
1296 dst->nlink = src->nlink;
1297 dst->isize = src->isize;
1298 dst->nbytes = src->nbytes;
1299 dst->imode = src->imode;
1300 dst->nodatasum = src->nodatasum;
1301 dst->found_inode_item = 1;
1303 dst->errors |= I_ERR_DUP_INODE_ITEM;
1311 static int splice_shared_node(struct shared_node *src_node,
1312 struct shared_node *dst_node)
1314 struct cache_extent *cache;
1315 struct ptr_node *node, *ins;
1316 struct cache_tree *src, *dst;
1317 struct inode_record *rec, *conflict;
1318 u64 current_ino = 0;
1322 if (--src_node->refs == 0)
1324 if (src_node->current)
1325 current_ino = src_node->current->ino;
1327 src = &src_node->root_cache;
1328 dst = &dst_node->root_cache;
1330 cache = search_cache_extent(src, 0);
1332 node = container_of(cache, struct ptr_node, cache);
1334 cache = next_cache_extent(cache);
1337 remove_cache_extent(src, &node->cache);
1340 ins = malloc(sizeof(*ins));
1342 ins->cache.start = node->cache.start;
1343 ins->cache.size = node->cache.size;
1347 ret = insert_cache_extent(dst, &ins->cache);
1348 if (ret == -EEXIST) {
1349 conflict = get_inode_rec(dst, rec->ino, 1);
1350 BUG_ON(IS_ERR(conflict));
1351 merge_inode_recs(rec, conflict, dst);
1353 conflict->checked = 1;
1354 if (dst_node->current == conflict)
1355 dst_node->current = NULL;
1357 maybe_free_inode_rec(dst, conflict);
1358 free_inode_rec(rec);
1365 if (src == &src_node->root_cache) {
1366 src = &src_node->inode_cache;
1367 dst = &dst_node->inode_cache;
1371 if (current_ino > 0 && (!dst_node->current ||
1372 current_ino > dst_node->current->ino)) {
1373 if (dst_node->current) {
1374 dst_node->current->checked = 1;
1375 maybe_free_inode_rec(dst, dst_node->current);
1377 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378 BUG_ON(IS_ERR(dst_node->current));
1383 static void free_inode_ptr(struct cache_extent *cache)
1385 struct ptr_node *node;
1386 struct inode_record *rec;
1388 node = container_of(cache, struct ptr_node, cache);
1390 free_inode_rec(rec);
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1399 struct cache_extent *cache;
1400 struct shared_node *node;
1402 cache = lookup_cache_extent(shared, bytenr, 1);
1404 node = container_of(cache, struct shared_node, cache);
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1413 struct shared_node *node;
1415 node = calloc(1, sizeof(*node));
1418 node->cache.start = bytenr;
1419 node->cache.size = 1;
1420 cache_tree_init(&node->root_cache);
1421 cache_tree_init(&node->inode_cache);
1424 ret = insert_cache_extent(shared, &node->cache);
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430 struct walk_control *wc, int level)
1432 struct shared_node *node;
1433 struct shared_node *dest;
1436 if (level == wc->active_node)
1439 BUG_ON(wc->active_node <= level);
1440 node = find_shared_node(&wc->shared, bytenr);
1442 ret = add_shared_node(&wc->shared, bytenr, refs);
1444 node = find_shared_node(&wc->shared, bytenr);
1445 wc->nodes[level] = node;
1446 wc->active_node = level;
1450 if (wc->root_level == wc->active_node &&
1451 btrfs_root_refs(&root->root_item) == 0) {
1452 if (--node->refs == 0) {
1453 free_inode_recs_tree(&node->root_cache);
1454 free_inode_recs_tree(&node->inode_cache);
1455 remove_cache_extent(&wc->shared, &node->cache);
1461 dest = wc->nodes[wc->active_node];
1462 splice_shared_node(node, dest);
1463 if (node->refs == 0) {
1464 remove_cache_extent(&wc->shared, &node->cache);
1470 static int leave_shared_node(struct btrfs_root *root,
1471 struct walk_control *wc, int level)
1473 struct shared_node *node;
1474 struct shared_node *dest;
1477 if (level == wc->root_level)
1480 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1484 BUG_ON(i >= BTRFS_MAX_LEVEL);
1486 node = wc->nodes[wc->active_node];
1487 wc->nodes[wc->active_node] = NULL;
1488 wc->active_node = i;
1490 dest = wc->nodes[wc->active_node];
1491 if (wc->active_node < wc->root_level ||
1492 btrfs_root_refs(&root->root_item) > 0) {
1493 BUG_ON(node->refs <= 1);
1494 splice_shared_node(node, dest);
1496 BUG_ON(node->refs < 2);
1505 * 1 - if the root with id child_root_id is a child of root parent_root_id
1506 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1507 * has other root(s) as parent(s)
1508 * 2 - if the root child_root_id doesn't have any parent roots
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1513 struct btrfs_path path;
1514 struct btrfs_key key;
1515 struct extent_buffer *leaf;
1519 btrfs_init_path(&path);
1521 key.objectid = parent_root_id;
1522 key.type = BTRFS_ROOT_REF_KEY;
1523 key.offset = child_root_id;
1524 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1528 btrfs_release_path(&path);
1532 key.objectid = child_root_id;
1533 key.type = BTRFS_ROOT_BACKREF_KEY;
1535 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1541 leaf = path.nodes[0];
1542 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1546 leaf = path.nodes[0];
1549 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550 if (key.objectid != child_root_id ||
1551 key.type != BTRFS_ROOT_BACKREF_KEY)
1556 if (key.offset == parent_root_id) {
1557 btrfs_release_path(&path);
1564 btrfs_release_path(&path);
1567 return has_parent ? 0 : 2;
1570 static int process_dir_item(struct extent_buffer *eb,
1571 int slot, struct btrfs_key *key,
1572 struct shared_node *active_node)
1582 struct btrfs_dir_item *di;
1583 struct inode_record *rec;
1584 struct cache_tree *root_cache;
1585 struct cache_tree *inode_cache;
1586 struct btrfs_key location;
1587 char namebuf[BTRFS_NAME_LEN];
1589 root_cache = &active_node->root_cache;
1590 inode_cache = &active_node->inode_cache;
1591 rec = active_node->current;
1592 rec->found_dir_item = 1;
1594 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595 total = btrfs_item_size_nr(eb, slot);
1596 while (cur < total) {
1598 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599 name_len = btrfs_dir_name_len(eb, di);
1600 data_len = btrfs_dir_data_len(eb, di);
1601 filetype = btrfs_dir_type(eb, di);
1603 rec->found_size += name_len;
1604 if (cur + sizeof(*di) + name_len > total ||
1605 name_len > BTRFS_NAME_LEN) {
1606 error = REF_ERR_NAME_TOO_LONG;
1608 if (cur + sizeof(*di) > total)
1610 len = min_t(u32, total - cur - sizeof(*di),
1617 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620 key->offset != btrfs_name_hash(namebuf, len)) {
1621 rec->errors |= I_ERR_ODD_DIR_ITEM;
1622 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623 key->objectid, key->offset, namebuf, len, filetype,
1624 key->offset, btrfs_name_hash(namebuf, len));
1627 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628 add_inode_backref(inode_cache, location.objectid,
1629 key->objectid, key->offset, namebuf,
1630 len, filetype, key->type, error);
1631 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632 add_inode_backref(root_cache, location.objectid,
1633 key->objectid, key->offset,
1634 namebuf, len, filetype,
1637 fprintf(stderr, "invalid location in dir item %u\n",
1639 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640 key->objectid, key->offset, namebuf,
1641 len, filetype, key->type, error);
1644 len = sizeof(*di) + name_len + data_len;
1645 di = (struct btrfs_dir_item *)((char *)di + len);
1648 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649 rec->errors |= I_ERR_DUP_DIR_INDEX;
1654 static int process_inode_ref(struct extent_buffer *eb,
1655 int slot, struct btrfs_key *key,
1656 struct shared_node *active_node)
1664 struct cache_tree *inode_cache;
1665 struct btrfs_inode_ref *ref;
1666 char namebuf[BTRFS_NAME_LEN];
1668 inode_cache = &active_node->inode_cache;
1670 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671 total = btrfs_item_size_nr(eb, slot);
1672 while (cur < total) {
1673 name_len = btrfs_inode_ref_name_len(eb, ref);
1674 index = btrfs_inode_ref_index(eb, ref);
1676 /* inode_ref + namelen should not cross item boundary */
1677 if (cur + sizeof(*ref) + name_len > total ||
1678 name_len > BTRFS_NAME_LEN) {
1679 if (total < cur + sizeof(*ref))
1682 /* Still try to read out the remaining part */
1683 len = min_t(u32, total - cur - sizeof(*ref),
1685 error = REF_ERR_NAME_TOO_LONG;
1691 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692 add_inode_backref(inode_cache, key->objectid, key->offset,
1693 index, namebuf, len, 0, key->type, error);
1695 len = sizeof(*ref) + name_len;
1696 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1702 static int process_inode_extref(struct extent_buffer *eb,
1703 int slot, struct btrfs_key *key,
1704 struct shared_node *active_node)
1713 struct cache_tree *inode_cache;
1714 struct btrfs_inode_extref *extref;
1715 char namebuf[BTRFS_NAME_LEN];
1717 inode_cache = &active_node->inode_cache;
1719 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720 total = btrfs_item_size_nr(eb, slot);
1721 while (cur < total) {
1722 name_len = btrfs_inode_extref_name_len(eb, extref);
1723 index = btrfs_inode_extref_index(eb, extref);
1724 parent = btrfs_inode_extref_parent(eb, extref);
1725 if (name_len <= BTRFS_NAME_LEN) {
1729 len = BTRFS_NAME_LEN;
1730 error = REF_ERR_NAME_TOO_LONG;
1732 read_extent_buffer(eb, namebuf,
1733 (unsigned long)(extref + 1), len);
1734 add_inode_backref(inode_cache, key->objectid, parent,
1735 index, namebuf, len, 0, key->type, error);
1737 len = sizeof(*extref) + name_len;
1738 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746 u64 len, u64 *found)
1748 struct btrfs_key key;
1749 struct btrfs_path path;
1750 struct extent_buffer *leaf;
1755 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1757 btrfs_init_path(&path);
1759 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761 key.type = BTRFS_EXTENT_CSUM_KEY;
1763 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1767 if (ret > 0 && path.slots[0] > 0) {
1768 leaf = path.nodes[0];
1769 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771 key.type == BTRFS_EXTENT_CSUM_KEY)
1776 leaf = path.nodes[0];
1777 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1783 leaf = path.nodes[0];
1786 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788 key.type != BTRFS_EXTENT_CSUM_KEY)
1791 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792 if (key.offset >= start + len)
1795 if (key.offset > start)
1798 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799 csum_end = key.offset + (size / csum_size) *
1800 root->fs_info->sectorsize;
1801 if (csum_end > start) {
1802 size = min(csum_end - start, len);
1811 btrfs_release_path(&path);
1817 static int process_file_extent(struct btrfs_root *root,
1818 struct extent_buffer *eb,
1819 int slot, struct btrfs_key *key,
1820 struct shared_node *active_node)
1822 struct inode_record *rec;
1823 struct btrfs_file_extent_item *fi;
1825 u64 disk_bytenr = 0;
1826 u64 extent_offset = 0;
1827 u64 mask = root->fs_info->sectorsize - 1;
1831 rec = active_node->current;
1832 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833 rec->found_file_extent = 1;
1835 if (rec->extent_start == (u64)-1) {
1836 rec->extent_start = key->offset;
1837 rec->extent_end = key->offset;
1840 if (rec->extent_end > key->offset)
1841 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842 else if (rec->extent_end < key->offset) {
1843 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844 key->offset - rec->extent_end);
1849 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850 extent_type = btrfs_file_extent_type(eb, fi);
1852 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856 rec->found_size += num_bytes;
1857 num_bytes = (num_bytes + mask) & ~mask;
1858 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862 extent_offset = btrfs_file_extent_offset(eb, fi);
1863 if (num_bytes == 0 || (num_bytes & mask))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (num_bytes + extent_offset >
1866 btrfs_file_extent_ram_bytes(eb, fi))
1867 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869 (btrfs_file_extent_compression(eb, fi) ||
1870 btrfs_file_extent_encryption(eb, fi) ||
1871 btrfs_file_extent_other_encoding(eb, fi)))
1872 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873 if (disk_bytenr > 0)
1874 rec->found_size += num_bytes;
1876 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1878 rec->extent_end = key->offset + num_bytes;
1881 * The data reloc tree will copy full extents into its inode and then
1882 * copy the corresponding csums. Because the extent it copied could be
1883 * a preallocated extent that hasn't been written to yet there may be no
1884 * csums to copy, ergo we won't have csums for our file extent. This is
1885 * ok so just don't bother checking csums if the inode belongs to the
1888 if (disk_bytenr > 0 &&
1889 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891 if (btrfs_file_extent_compression(eb, fi))
1892 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894 disk_bytenr += extent_offset;
1896 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1899 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901 rec->found_csum_item = 1;
1902 if (found < num_bytes)
1903 rec->some_csum_missing = 1;
1904 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913 struct walk_control *wc)
1915 struct btrfs_key key;
1919 struct cache_tree *inode_cache;
1920 struct shared_node *active_node;
1922 if (wc->root_level == wc->active_node &&
1923 btrfs_root_refs(&root->root_item) == 0)
1926 active_node = wc->nodes[wc->active_node];
1927 inode_cache = &active_node->inode_cache;
1928 nritems = btrfs_header_nritems(eb);
1929 for (i = 0; i < nritems; i++) {
1930 btrfs_item_key_to_cpu(eb, &key, i);
1932 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1937 if (active_node->current == NULL ||
1938 active_node->current->ino < key.objectid) {
1939 if (active_node->current) {
1940 active_node->current->checked = 1;
1941 maybe_free_inode_rec(inode_cache,
1942 active_node->current);
1944 active_node->current = get_inode_rec(inode_cache,
1946 BUG_ON(IS_ERR(active_node->current));
1949 case BTRFS_DIR_ITEM_KEY:
1950 case BTRFS_DIR_INDEX_KEY:
1951 ret = process_dir_item(eb, i, &key, active_node);
1953 case BTRFS_INODE_REF_KEY:
1954 ret = process_inode_ref(eb, i, &key, active_node);
1956 case BTRFS_INODE_EXTREF_KEY:
1957 ret = process_inode_extref(eb, i, &key, active_node);
1959 case BTRFS_INODE_ITEM_KEY:
1960 ret = process_inode_item(eb, i, &key, active_node);
1962 case BTRFS_EXTENT_DATA_KEY:
1963 ret = process_file_extent(root, eb, i, &key,
1974 u64 bytenr[BTRFS_MAX_LEVEL];
1975 u64 refs[BTRFS_MAX_LEVEL];
1976 int need_check[BTRFS_MAX_LEVEL];
1979 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1980 struct node_refs *nrefs, u64 level);
1981 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1982 unsigned int ext_ref);
1985 * Returns >0 Found error, not fatal, should continue
1986 * Returns <0 Fatal error, must exit the whole check
1987 * Returns 0 No errors found
1989 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1990 struct node_refs *nrefs, int *level, int ext_ref)
1992 struct extent_buffer *cur = path->nodes[0];
1993 struct btrfs_key key;
1997 int root_level = btrfs_header_level(root->node);
1999 int ret = 0; /* Final return value */
2000 int err = 0; /* Positive error bitmap */
2002 cur_bytenr = cur->start;
2004 /* skip to first inode item or the first inode number change */
2005 nritems = btrfs_header_nritems(cur);
2006 for (i = 0; i < nritems; i++) {
2007 btrfs_item_key_to_cpu(cur, &key, i);
2009 first_ino = key.objectid;
2010 if (key.type == BTRFS_INODE_ITEM_KEY ||
2011 (first_ino && first_ino != key.objectid))
2015 path->slots[0] = nritems;
2021 err |= check_inode_item(root, path, ext_ref);
2023 /* modify cur since check_inode_item may change path */
2024 cur = path->nodes[0];
2026 if (err & LAST_ITEM)
2029 /* still have inode items in thie leaf */
2030 if (cur->start == cur_bytenr)
2034 * we have switched to another leaf, above nodes may
2035 * have changed, here walk down the path, if a node
2036 * or leaf is shared, check whether we can skip this
2039 for (i = root_level; i >= 0; i--) {
2040 if (path->nodes[i]->start == nrefs->bytenr[i])
2043 ret = update_nodes_refs(root,
2044 path->nodes[i]->start,
2049 if (!nrefs->need_check[i]) {
2055 for (i = 0; i < *level; i++) {
2056 free_extent_buffer(path->nodes[i]);
2057 path->nodes[i] = NULL;
2066 static void reada_walk_down(struct btrfs_root *root,
2067 struct extent_buffer *node, int slot)
2069 struct btrfs_fs_info *fs_info = root->fs_info;
2076 level = btrfs_header_level(node);
2080 nritems = btrfs_header_nritems(node);
2081 for (i = slot; i < nritems; i++) {
2082 bytenr = btrfs_node_blockptr(node, i);
2083 ptr_gen = btrfs_node_ptr_generation(node, i);
2084 readahead_tree_block(fs_info, bytenr, ptr_gen);
2089 * Check the child node/leaf by the following condition:
2090 * 1. the first item key of the node/leaf should be the same with the one
2092 * 2. block in parent node should match the child node/leaf.
2093 * 3. generation of parent node and child's header should be consistent.
2095 * Or the child node/leaf pointed by the key in parent is not valid.
2097 * We hope to check leaf owner too, but since subvol may share leaves,
2098 * which makes leaf owner check not so strong, key check should be
2099 * sufficient enough for that case.
2101 static int check_child_node(struct extent_buffer *parent, int slot,
2102 struct extent_buffer *child)
2104 struct btrfs_key parent_key;
2105 struct btrfs_key child_key;
2108 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2109 if (btrfs_header_level(child) == 0)
2110 btrfs_item_key_to_cpu(child, &child_key, 0);
2112 btrfs_node_key_to_cpu(child, &child_key, 0);
2114 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2117 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2118 parent_key.objectid, parent_key.type, parent_key.offset,
2119 child_key.objectid, child_key.type, child_key.offset);
2121 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2123 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2124 btrfs_node_blockptr(parent, slot),
2125 btrfs_header_bytenr(child));
2127 if (btrfs_node_ptr_generation(parent, slot) !=
2128 btrfs_header_generation(child)) {
2130 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2131 btrfs_header_generation(child),
2132 btrfs_node_ptr_generation(parent, slot));
2138 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2139 * in every fs or file tree check. Here we find its all root ids, and only check
2140 * it in the fs or file tree which has the smallest root id.
2142 static int need_check(struct btrfs_root *root, struct ulist *roots)
2144 struct rb_node *node;
2145 struct ulist_node *u;
2147 if (roots->nnodes == 1)
2150 node = rb_first(&roots->root);
2151 u = rb_entry(node, struct ulist_node, rb_node);
2153 * current root id is not smallest, we skip it and let it be checked
2154 * in the fs or file tree who hash the smallest root id.
2156 if (root->objectid != u->val)
2163 * for a tree node or leaf, we record its reference count, so later if we still
2164 * process this node or leaf, don't need to compute its reference count again.
2166 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2167 struct node_refs *nrefs, u64 level)
2171 struct ulist *roots;
2173 if (nrefs->bytenr[level] != bytenr) {
2174 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2175 level, 1, &refs, NULL);
2179 nrefs->bytenr[level] = bytenr;
2180 nrefs->refs[level] = refs;
2182 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2187 check = need_check(root, roots);
2189 nrefs->need_check[level] = check;
2191 nrefs->need_check[level] = 1;
2198 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2199 struct walk_control *wc, int *level,
2200 struct node_refs *nrefs)
2202 enum btrfs_tree_block_status status;
2205 struct btrfs_fs_info *fs_info = root->fs_info;
2206 struct extent_buffer *next;
2207 struct extent_buffer *cur;
2211 WARN_ON(*level < 0);
2212 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2214 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2215 refs = nrefs->refs[*level];
2218 ret = btrfs_lookup_extent_info(NULL, root,
2219 path->nodes[*level]->start,
2220 *level, 1, &refs, NULL);
2225 nrefs->bytenr[*level] = path->nodes[*level]->start;
2226 nrefs->refs[*level] = refs;
2230 ret = enter_shared_node(root, path->nodes[*level]->start,
2238 while (*level >= 0) {
2239 WARN_ON(*level < 0);
2240 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2241 cur = path->nodes[*level];
2243 if (btrfs_header_level(cur) != *level)
2246 if (path->slots[*level] >= btrfs_header_nritems(cur))
2249 ret = process_one_leaf(root, cur, wc);
2254 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2255 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2257 if (bytenr == nrefs->bytenr[*level - 1]) {
2258 refs = nrefs->refs[*level - 1];
2260 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2261 *level - 1, 1, &refs, NULL);
2265 nrefs->bytenr[*level - 1] = bytenr;
2266 nrefs->refs[*level - 1] = refs;
2271 ret = enter_shared_node(root, bytenr, refs,
2274 path->slots[*level]++;
2279 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2280 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281 free_extent_buffer(next);
2282 reada_walk_down(root, cur, path->slots[*level]);
2283 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2284 if (!extent_buffer_uptodate(next)) {
2285 struct btrfs_key node_key;
2287 btrfs_node_key_to_cpu(path->nodes[*level],
2289 path->slots[*level]);
2290 btrfs_add_corrupt_extent_record(root->fs_info,
2292 path->nodes[*level]->start,
2293 root->fs_info->nodesize,
2300 ret = check_child_node(cur, path->slots[*level], next);
2302 free_extent_buffer(next);
2307 if (btrfs_is_leaf(next))
2308 status = btrfs_check_leaf(root, NULL, next);
2310 status = btrfs_check_node(root, NULL, next);
2311 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2312 free_extent_buffer(next);
2317 *level = *level - 1;
2318 free_extent_buffer(path->nodes[*level]);
2319 path->nodes[*level] = next;
2320 path->slots[*level] = 0;
2323 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2327 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2328 unsigned int ext_ref);
2331 * Returns >0 Found error, should continue
2332 * Returns <0 Fatal error, must exit the whole check
2333 * Returns 0 No errors found
2335 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2336 int *level, struct node_refs *nrefs, int ext_ref)
2338 enum btrfs_tree_block_status status;
2341 struct btrfs_fs_info *fs_info = root->fs_info;
2342 struct extent_buffer *next;
2343 struct extent_buffer *cur;
2346 WARN_ON(*level < 0);
2347 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2349 ret = update_nodes_refs(root, path->nodes[*level]->start,
2354 while (*level >= 0) {
2355 WARN_ON(*level < 0);
2356 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2357 cur = path->nodes[*level];
2359 if (btrfs_header_level(cur) != *level)
2362 if (path->slots[*level] >= btrfs_header_nritems(cur))
2364 /* Don't forgot to check leaf/node validation */
2366 ret = btrfs_check_leaf(root, NULL, cur);
2367 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2371 ret = process_one_leaf_v2(root, path, nrefs,
2373 cur = path->nodes[*level];
2376 ret = btrfs_check_node(root, NULL, cur);
2377 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2382 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2383 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2385 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2388 if (!nrefs->need_check[*level - 1]) {
2389 path->slots[*level]++;
2393 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2394 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2395 free_extent_buffer(next);
2396 reada_walk_down(root, cur, path->slots[*level]);
2397 next = read_tree_block(fs_info, bytenr, ptr_gen);
2398 if (!extent_buffer_uptodate(next)) {
2399 struct btrfs_key node_key;
2401 btrfs_node_key_to_cpu(path->nodes[*level],
2403 path->slots[*level]);
2404 btrfs_add_corrupt_extent_record(fs_info,
2406 path->nodes[*level]->start,
2414 ret = check_child_node(cur, path->slots[*level], next);
2418 if (btrfs_is_leaf(next))
2419 status = btrfs_check_leaf(root, NULL, next);
2421 status = btrfs_check_node(root, NULL, next);
2422 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2423 free_extent_buffer(next);
2428 *level = *level - 1;
2429 free_extent_buffer(path->nodes[*level]);
2430 path->nodes[*level] = next;
2431 path->slots[*level] = 0;
2436 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2437 struct walk_control *wc, int *level)
2440 struct extent_buffer *leaf;
2442 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2443 leaf = path->nodes[i];
2444 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2449 free_extent_buffer(path->nodes[*level]);
2450 path->nodes[*level] = NULL;
2451 BUG_ON(*level > wc->active_node);
2452 if (*level == wc->active_node)
2453 leave_shared_node(root, wc, *level);
2460 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2464 struct extent_buffer *leaf;
2466 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2467 leaf = path->nodes[i];
2468 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2473 free_extent_buffer(path->nodes[*level]);
2474 path->nodes[*level] = NULL;
2481 static int check_root_dir(struct inode_record *rec)
2483 struct inode_backref *backref;
2486 if (!rec->found_inode_item || rec->errors)
2488 if (rec->nlink != 1 || rec->found_link != 0)
2490 if (list_empty(&rec->backrefs))
2492 backref = to_inode_backref(rec->backrefs.next);
2493 if (!backref->found_inode_ref)
2495 if (backref->index != 0 || backref->namelen != 2 ||
2496 memcmp(backref->name, "..", 2))
2498 if (backref->found_dir_index || backref->found_dir_item)
2505 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2506 struct btrfs_root *root, struct btrfs_path *path,
2507 struct inode_record *rec)
2509 struct btrfs_inode_item *ei;
2510 struct btrfs_key key;
2513 key.objectid = rec->ino;
2514 key.type = BTRFS_INODE_ITEM_KEY;
2515 key.offset = (u64)-1;
2517 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2521 if (!path->slots[0]) {
2528 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2529 if (key.objectid != rec->ino) {
2534 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2535 struct btrfs_inode_item);
2536 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2537 btrfs_mark_buffer_dirty(path->nodes[0]);
2538 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2539 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2540 root->root_key.objectid);
2542 btrfs_release_path(path);
2546 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2547 struct btrfs_root *root,
2548 struct btrfs_path *path,
2549 struct inode_record *rec)
2553 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2554 btrfs_release_path(path);
2556 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2560 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2561 struct btrfs_root *root,
2562 struct btrfs_path *path,
2563 struct inode_record *rec)
2565 struct btrfs_inode_item *ei;
2566 struct btrfs_key key;
2569 key.objectid = rec->ino;
2570 key.type = BTRFS_INODE_ITEM_KEY;
2573 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2580 /* Since ret == 0, no need to check anything */
2581 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582 struct btrfs_inode_item);
2583 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2584 btrfs_mark_buffer_dirty(path->nodes[0]);
2585 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2586 printf("reset nbytes for ino %llu root %llu\n",
2587 rec->ino, root->root_key.objectid);
2589 btrfs_release_path(path);
2593 static int add_missing_dir_index(struct btrfs_root *root,
2594 struct cache_tree *inode_cache,
2595 struct inode_record *rec,
2596 struct inode_backref *backref)
2598 struct btrfs_path path;
2599 struct btrfs_trans_handle *trans;
2600 struct btrfs_dir_item *dir_item;
2601 struct extent_buffer *leaf;
2602 struct btrfs_key key;
2603 struct btrfs_disk_key disk_key;
2604 struct inode_record *dir_rec;
2605 unsigned long name_ptr;
2606 u32 data_size = sizeof(*dir_item) + backref->namelen;
2609 trans = btrfs_start_transaction(root, 1);
2611 return PTR_ERR(trans);
2613 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2614 (unsigned long long)rec->ino);
2616 btrfs_init_path(&path);
2617 key.objectid = backref->dir;
2618 key.type = BTRFS_DIR_INDEX_KEY;
2619 key.offset = backref->index;
2620 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2623 leaf = path.nodes[0];
2624 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2626 disk_key.objectid = cpu_to_le64(rec->ino);
2627 disk_key.type = BTRFS_INODE_ITEM_KEY;
2628 disk_key.offset = 0;
2630 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2631 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2632 btrfs_set_dir_data_len(leaf, dir_item, 0);
2633 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2634 name_ptr = (unsigned long)(dir_item + 1);
2635 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2636 btrfs_mark_buffer_dirty(leaf);
2637 btrfs_release_path(&path);
2638 btrfs_commit_transaction(trans, root);
2640 backref->found_dir_index = 1;
2641 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2642 BUG_ON(IS_ERR(dir_rec));
2645 dir_rec->found_size += backref->namelen;
2646 if (dir_rec->found_size == dir_rec->isize &&
2647 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2648 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2649 if (dir_rec->found_size != dir_rec->isize)
2650 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2655 static int delete_dir_index(struct btrfs_root *root,
2656 struct inode_backref *backref)
2658 struct btrfs_trans_handle *trans;
2659 struct btrfs_dir_item *di;
2660 struct btrfs_path path;
2663 trans = btrfs_start_transaction(root, 1);
2665 return PTR_ERR(trans);
2667 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2668 (unsigned long long)backref->dir,
2669 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2670 (unsigned long long)root->objectid);
2672 btrfs_init_path(&path);
2673 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2674 backref->name, backref->namelen,
2675 backref->index, -1);
2678 btrfs_release_path(&path);
2679 btrfs_commit_transaction(trans, root);
2686 ret = btrfs_del_item(trans, root, &path);
2688 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2690 btrfs_release_path(&path);
2691 btrfs_commit_transaction(trans, root);
2695 static int __create_inode_item(struct btrfs_trans_handle *trans,
2696 struct btrfs_root *root, u64 ino, u64 size,
2697 u64 nbytes, u64 nlink, u32 mode)
2699 struct btrfs_inode_item ii;
2700 time_t now = time(NULL);
2703 btrfs_set_stack_inode_size(&ii, size);
2704 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2705 btrfs_set_stack_inode_nlink(&ii, nlink);
2706 btrfs_set_stack_inode_mode(&ii, mode);
2707 btrfs_set_stack_inode_generation(&ii, trans->transid);
2708 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2709 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2710 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2711 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2712 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2713 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2714 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2716 ret = btrfs_insert_inode(trans, root, ino, &ii);
2719 warning("root %llu inode %llu recreating inode item, this may "
2720 "be incomplete, please check permissions and content after "
2721 "the fsck completes.\n", (unsigned long long)root->objectid,
2722 (unsigned long long)ino);
2727 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2728 struct btrfs_root *root, u64 ino,
2731 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2733 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2736 static int create_inode_item(struct btrfs_root *root,
2737 struct inode_record *rec, int root_dir)
2739 struct btrfs_trans_handle *trans;
2745 trans = btrfs_start_transaction(root, 1);
2746 if (IS_ERR(trans)) {
2747 ret = PTR_ERR(trans);
2751 nlink = root_dir ? 1 : rec->found_link;
2752 if (rec->found_dir_item) {
2753 if (rec->found_file_extent)
2754 fprintf(stderr, "root %llu inode %llu has both a dir "
2755 "item and extents, unsure if it is a dir or a "
2756 "regular file so setting it as a directory\n",
2757 (unsigned long long)root->objectid,
2758 (unsigned long long)rec->ino);
2759 mode = S_IFDIR | 0755;
2760 size = rec->found_size;
2761 } else if (!rec->found_dir_item) {
2762 size = rec->extent_end;
2763 mode = S_IFREG | 0755;
2766 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2768 btrfs_commit_transaction(trans, root);
2772 static int repair_inode_backrefs(struct btrfs_root *root,
2773 struct inode_record *rec,
2774 struct cache_tree *inode_cache,
2777 struct inode_backref *tmp, *backref;
2778 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2782 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2783 if (!delete && rec->ino == root_dirid) {
2784 if (!rec->found_inode_item) {
2785 ret = create_inode_item(root, rec, 1);
2792 /* Index 0 for root dir's are special, don't mess with it */
2793 if (rec->ino == root_dirid && backref->index == 0)
2797 ((backref->found_dir_index && !backref->found_inode_ref) ||
2798 (backref->found_dir_index && backref->found_inode_ref &&
2799 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2800 ret = delete_dir_index(root, backref);
2804 list_del(&backref->list);
2809 if (!delete && !backref->found_dir_index &&
2810 backref->found_dir_item && backref->found_inode_ref) {
2811 ret = add_missing_dir_index(root, inode_cache, rec,
2816 if (backref->found_dir_item &&
2817 backref->found_dir_index) {
2818 if (!backref->errors &&
2819 backref->found_inode_ref) {
2820 list_del(&backref->list);
2827 if (!delete && (!backref->found_dir_index &&
2828 !backref->found_dir_item &&
2829 backref->found_inode_ref)) {
2830 struct btrfs_trans_handle *trans;
2831 struct btrfs_key location;
2833 ret = check_dir_conflict(root, backref->name,
2839 * let nlink fixing routine to handle it,
2840 * which can do it better.
2845 location.objectid = rec->ino;
2846 location.type = BTRFS_INODE_ITEM_KEY;
2847 location.offset = 0;
2849 trans = btrfs_start_transaction(root, 1);
2850 if (IS_ERR(trans)) {
2851 ret = PTR_ERR(trans);
2854 fprintf(stderr, "adding missing dir index/item pair "
2856 (unsigned long long)rec->ino);
2857 ret = btrfs_insert_dir_item(trans, root, backref->name,
2859 backref->dir, &location,
2860 imode_to_type(rec->imode),
2863 btrfs_commit_transaction(trans, root);
2867 if (!delete && (backref->found_inode_ref &&
2868 backref->found_dir_index &&
2869 backref->found_dir_item &&
2870 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2871 !rec->found_inode_item)) {
2872 ret = create_inode_item(root, rec, 0);
2879 return ret ? ret : repaired;
2883 * To determine the file type for nlink/inode_item repair
2885 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2886 * Return -ENOENT if file type is not found.
2888 static int find_file_type(struct inode_record *rec, u8 *type)
2890 struct inode_backref *backref;
2892 /* For inode item recovered case */
2893 if (rec->found_inode_item) {
2894 *type = imode_to_type(rec->imode);
2898 list_for_each_entry(backref, &rec->backrefs, list) {
2899 if (backref->found_dir_index || backref->found_dir_item) {
2900 *type = backref->filetype;
2908 * To determine the file name for nlink repair
2910 * Return 0 if file name is found, set name and namelen.
2911 * Return -ENOENT if file name is not found.
2913 static int find_file_name(struct inode_record *rec,
2914 char *name, int *namelen)
2916 struct inode_backref *backref;
2918 list_for_each_entry(backref, &rec->backrefs, list) {
2919 if (backref->found_dir_index || backref->found_dir_item ||
2920 backref->found_inode_ref) {
2921 memcpy(name, backref->name, backref->namelen);
2922 *namelen = backref->namelen;
2929 /* Reset the nlink of the inode to the correct one */
2930 static int reset_nlink(struct btrfs_trans_handle *trans,
2931 struct btrfs_root *root,
2932 struct btrfs_path *path,
2933 struct inode_record *rec)
2935 struct inode_backref *backref;
2936 struct inode_backref *tmp;
2937 struct btrfs_key key;
2938 struct btrfs_inode_item *inode_item;
2941 /* We don't believe this either, reset it and iterate backref */
2942 rec->found_link = 0;
2944 /* Remove all backref including the valid ones */
2945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2946 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2947 backref->index, backref->name,
2948 backref->namelen, 0);
2952 /* remove invalid backref, so it won't be added back */
2953 if (!(backref->found_dir_index &&
2954 backref->found_dir_item &&
2955 backref->found_inode_ref)) {
2956 list_del(&backref->list);
2963 /* Set nlink to 0 */
2964 key.objectid = rec->ino;
2965 key.type = BTRFS_INODE_ITEM_KEY;
2967 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2974 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2975 struct btrfs_inode_item);
2976 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2977 btrfs_mark_buffer_dirty(path->nodes[0]);
2978 btrfs_release_path(path);
2981 * Add back valid inode_ref/dir_item/dir_index,
2982 * add_link() will handle the nlink inc, so new nlink must be correct
2984 list_for_each_entry(backref, &rec->backrefs, list) {
2985 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2986 backref->name, backref->namelen,
2987 backref->filetype, &backref->index, 1, 0);
2992 btrfs_release_path(path);
2996 static int get_highest_inode(struct btrfs_trans_handle *trans,
2997 struct btrfs_root *root,
2998 struct btrfs_path *path,
3001 struct btrfs_key key, found_key;
3004 btrfs_init_path(path);
3005 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3007 key.type = BTRFS_INODE_ITEM_KEY;
3008 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3010 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3011 path->slots[0] - 1);
3012 *highest_ino = found_key.objectid;
3015 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3017 btrfs_release_path(path);
3022 * Link inode to dir 'lost+found'. Increase @ref_count.
3024 * Returns 0 means success.
3025 * Returns <0 means failure.
3027 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3028 struct btrfs_root *root,
3029 struct btrfs_path *path,
3030 u64 ino, char *namebuf, u32 name_len,
3031 u8 filetype, u64 *ref_count)
3033 char *dir_name = "lost+found";
3038 btrfs_release_path(path);
3039 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3044 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3045 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3048 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3051 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3052 namebuf, name_len, filetype, NULL, 1, 0);
3054 * Add ".INO" suffix several times to handle case where
3055 * "FILENAME.INO" is already taken by another file.
3057 while (ret == -EEXIST) {
3059 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3061 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3065 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3067 name_len += count_digits(ino) + 1;
3068 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3069 name_len, filetype, NULL, 1, 0);
3072 error("failed to link the inode %llu to %s dir: %s",
3073 ino, dir_name, strerror(-ret));
3078 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3079 name_len, namebuf, dir_name);
3081 btrfs_release_path(path);
3083 error("failed to move file '%.*s' to '%s' dir", name_len,
3088 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3089 struct btrfs_root *root,
3090 struct btrfs_path *path,
3091 struct inode_record *rec)
3093 char namebuf[BTRFS_NAME_LEN] = {0};
3096 int name_recovered = 0;
3097 int type_recovered = 0;
3101 * Get file name and type first before these invalid inode ref
3102 * are deleted by remove_all_invalid_backref()
3104 name_recovered = !find_file_name(rec, namebuf, &namelen);
3105 type_recovered = !find_file_type(rec, &type);
3107 if (!name_recovered) {
3108 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3109 rec->ino, rec->ino);
3110 namelen = count_digits(rec->ino);
3111 sprintf(namebuf, "%llu", rec->ino);
3114 if (!type_recovered) {
3115 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3117 type = BTRFS_FT_REG_FILE;
3121 ret = reset_nlink(trans, root, path, rec);
3124 "Failed to reset nlink for inode %llu: %s\n",
3125 rec->ino, strerror(-ret));
3129 if (rec->found_link == 0) {
3130 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3131 namebuf, namelen, type,
3132 (u64 *)&rec->found_link);
3136 printf("Fixed the nlink of inode %llu\n", rec->ino);
3139 * Clear the flag anyway, or we will loop forever for the same inode
3140 * as it will not be removed from the bad inode list and the dead loop
3143 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3144 btrfs_release_path(path);
3149 * Check if there is any normal(reg or prealloc) file extent for given
3151 * This is used to determine the file type when neither its dir_index/item or
3152 * inode_item exists.
3154 * This will *NOT* report error, if any error happens, just consider it does
3155 * not have any normal file extent.
3157 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3159 struct btrfs_path path;
3160 struct btrfs_key key;
3161 struct btrfs_key found_key;
3162 struct btrfs_file_extent_item *fi;
3166 btrfs_init_path(&path);
3168 key.type = BTRFS_EXTENT_DATA_KEY;
3171 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3176 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3177 ret = btrfs_next_leaf(root, &path);
3184 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3186 if (found_key.objectid != ino ||
3187 found_key.type != BTRFS_EXTENT_DATA_KEY)
3189 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3190 struct btrfs_file_extent_item);
3191 type = btrfs_file_extent_type(path.nodes[0], fi);
3192 if (type != BTRFS_FILE_EXTENT_INLINE) {
3198 btrfs_release_path(&path);
3202 static u32 btrfs_type_to_imode(u8 type)
3204 static u32 imode_by_btrfs_type[] = {
3205 [BTRFS_FT_REG_FILE] = S_IFREG,
3206 [BTRFS_FT_DIR] = S_IFDIR,
3207 [BTRFS_FT_CHRDEV] = S_IFCHR,
3208 [BTRFS_FT_BLKDEV] = S_IFBLK,
3209 [BTRFS_FT_FIFO] = S_IFIFO,
3210 [BTRFS_FT_SOCK] = S_IFSOCK,
3211 [BTRFS_FT_SYMLINK] = S_IFLNK,
3214 return imode_by_btrfs_type[(type)];
3217 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3218 struct btrfs_root *root,
3219 struct btrfs_path *path,
3220 struct inode_record *rec)
3224 int type_recovered = 0;
3227 printf("Trying to rebuild inode:%llu\n", rec->ino);
3229 type_recovered = !find_file_type(rec, &filetype);
3232 * Try to determine inode type if type not found.
3234 * For found regular file extent, it must be FILE.
3235 * For found dir_item/index, it must be DIR.
3237 * For undetermined one, use FILE as fallback.
3240 * 1. If found backref(inode_index/item is already handled) to it,
3242 * Need new inode-inode ref structure to allow search for that.
3244 if (!type_recovered) {
3245 if (rec->found_file_extent &&
3246 find_normal_file_extent(root, rec->ino)) {
3248 filetype = BTRFS_FT_REG_FILE;
3249 } else if (rec->found_dir_item) {
3251 filetype = BTRFS_FT_DIR;
3252 } else if (!list_empty(&rec->orphan_extents)) {
3254 filetype = BTRFS_FT_REG_FILE;
3256 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3259 filetype = BTRFS_FT_REG_FILE;
3263 ret = btrfs_new_inode(trans, root, rec->ino,
3264 mode | btrfs_type_to_imode(filetype));
3269 * Here inode rebuild is done, we only rebuild the inode item,
3270 * don't repair the nlink(like move to lost+found).
3271 * That is the job of nlink repair.
3273 * We just fill the record and return
3275 rec->found_dir_item = 1;
3276 rec->imode = mode | btrfs_type_to_imode(filetype);
3278 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3279 /* Ensure the inode_nlinks repair function will be called */
3280 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3285 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3286 struct btrfs_root *root,
3287 struct btrfs_path *path,
3288 struct inode_record *rec)
3290 struct orphan_data_extent *orphan;
3291 struct orphan_data_extent *tmp;
3294 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3296 * Check for conflicting file extents
3298 * Here we don't know whether the extents is compressed or not,
3299 * so we can only assume it not compressed nor data offset,
3300 * and use its disk_len as extent length.
3302 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3303 orphan->offset, orphan->disk_len, 0);
3304 btrfs_release_path(path);
3309 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3310 orphan->disk_bytenr, orphan->disk_len);
3311 ret = btrfs_free_extent(trans,
3312 root->fs_info->extent_root,
3313 orphan->disk_bytenr, orphan->disk_len,
3314 0, root->objectid, orphan->objectid,
3319 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3320 orphan->offset, orphan->disk_bytenr,
3321 orphan->disk_len, orphan->disk_len);
3325 /* Update file size info */
3326 rec->found_size += orphan->disk_len;
3327 if (rec->found_size == rec->nbytes)
3328 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3330 /* Update the file extent hole info too */
3331 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3335 if (RB_EMPTY_ROOT(&rec->holes))
3336 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3338 list_del(&orphan->list);
3341 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3346 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3347 struct btrfs_root *root,
3348 struct btrfs_path *path,
3349 struct inode_record *rec)
3351 struct rb_node *node;
3352 struct file_extent_hole *hole;
3356 node = rb_first(&rec->holes);
3360 hole = rb_entry(node, struct file_extent_hole, node);
3361 ret = btrfs_punch_hole(trans, root, rec->ino,
3362 hole->start, hole->len);
3365 ret = del_file_extent_hole(&rec->holes, hole->start,
3369 if (RB_EMPTY_ROOT(&rec->holes))
3370 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3371 node = rb_first(&rec->holes);
3373 /* special case for a file losing all its file extent */
3375 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3376 round_up(rec->isize,
3377 root->fs_info->sectorsize));
3381 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3382 rec->ino, root->objectid);
3387 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3389 struct btrfs_trans_handle *trans;
3390 struct btrfs_path path;
3393 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3394 I_ERR_NO_ORPHAN_ITEM |
3395 I_ERR_LINK_COUNT_WRONG |
3396 I_ERR_NO_INODE_ITEM |
3397 I_ERR_FILE_EXTENT_ORPHAN |
3398 I_ERR_FILE_EXTENT_DISCOUNT|
3399 I_ERR_FILE_NBYTES_WRONG)))
3403 * For nlink repair, it may create a dir and add link, so
3404 * 2 for parent(256)'s dir_index and dir_item
3405 * 2 for lost+found dir's inode_item and inode_ref
3406 * 1 for the new inode_ref of the file
3407 * 2 for lost+found dir's dir_index and dir_item for the file
3409 trans = btrfs_start_transaction(root, 7);
3411 return PTR_ERR(trans);
3413 btrfs_init_path(&path);
3414 if (rec->errors & I_ERR_NO_INODE_ITEM)
3415 ret = repair_inode_no_item(trans, root, &path, rec);
3416 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3417 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3418 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3419 ret = repair_inode_discount_extent(trans, root, &path, rec);
3420 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3421 ret = repair_inode_isize(trans, root, &path, rec);
3422 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3423 ret = repair_inode_orphan_item(trans, root, &path, rec);
3424 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3425 ret = repair_inode_nlinks(trans, root, &path, rec);
3426 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3427 ret = repair_inode_nbytes(trans, root, &path, rec);
3428 btrfs_commit_transaction(trans, root);
3429 btrfs_release_path(&path);
3433 static int check_inode_recs(struct btrfs_root *root,
3434 struct cache_tree *inode_cache)
3436 struct cache_extent *cache;
3437 struct ptr_node *node;
3438 struct inode_record *rec;
3439 struct inode_backref *backref;
3444 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3446 if (btrfs_root_refs(&root->root_item) == 0) {
3447 if (!cache_tree_empty(inode_cache))
3448 fprintf(stderr, "warning line %d\n", __LINE__);
3453 * We need to repair backrefs first because we could change some of the
3454 * errors in the inode recs.
3456 * We also need to go through and delete invalid backrefs first and then
3457 * add the correct ones second. We do this because we may get EEXIST
3458 * when adding back the correct index because we hadn't yet deleted the
3461 * For example, if we were missing a dir index then the directories
3462 * isize would be wrong, so if we fixed the isize to what we thought it
3463 * would be and then fixed the backref we'd still have a invalid fs, so
3464 * we need to add back the dir index and then check to see if the isize
3469 if (stage == 3 && !err)
3472 cache = search_cache_extent(inode_cache, 0);
3473 while (repair && cache) {
3474 node = container_of(cache, struct ptr_node, cache);
3476 cache = next_cache_extent(cache);
3478 /* Need to free everything up and rescan */
3480 remove_cache_extent(inode_cache, &node->cache);
3482 free_inode_rec(rec);
3486 if (list_empty(&rec->backrefs))
3489 ret = repair_inode_backrefs(root, rec, inode_cache,
3503 rec = get_inode_rec(inode_cache, root_dirid, 0);
3504 BUG_ON(IS_ERR(rec));
3506 ret = check_root_dir(rec);
3508 fprintf(stderr, "root %llu root dir %llu error\n",
3509 (unsigned long long)root->root_key.objectid,
3510 (unsigned long long)root_dirid);
3511 print_inode_error(root, rec);
3516 struct btrfs_trans_handle *trans;
3518 trans = btrfs_start_transaction(root, 1);
3519 if (IS_ERR(trans)) {
3520 err = PTR_ERR(trans);
3525 "root %llu missing its root dir, recreating\n",
3526 (unsigned long long)root->objectid);
3528 ret = btrfs_make_root_dir(trans, root, root_dirid);
3531 btrfs_commit_transaction(trans, root);
3535 fprintf(stderr, "root %llu root dir %llu not found\n",
3536 (unsigned long long)root->root_key.objectid,
3537 (unsigned long long)root_dirid);
3541 cache = search_cache_extent(inode_cache, 0);
3544 node = container_of(cache, struct ptr_node, cache);
3546 remove_cache_extent(inode_cache, &node->cache);
3548 if (rec->ino == root_dirid ||
3549 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3550 free_inode_rec(rec);
3554 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3555 ret = check_orphan_item(root, rec->ino);
3557 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3558 if (can_free_inode_rec(rec)) {
3559 free_inode_rec(rec);
3564 if (!rec->found_inode_item)
3565 rec->errors |= I_ERR_NO_INODE_ITEM;
3566 if (rec->found_link != rec->nlink)
3567 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3569 ret = try_repair_inode(root, rec);
3570 if (ret == 0 && can_free_inode_rec(rec)) {
3571 free_inode_rec(rec);
3577 if (!(repair && ret == 0))
3579 print_inode_error(root, rec);
3580 list_for_each_entry(backref, &rec->backrefs, list) {
3581 if (!backref->found_dir_item)
3582 backref->errors |= REF_ERR_NO_DIR_ITEM;
3583 if (!backref->found_dir_index)
3584 backref->errors |= REF_ERR_NO_DIR_INDEX;
3585 if (!backref->found_inode_ref)
3586 backref->errors |= REF_ERR_NO_INODE_REF;
3587 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3588 " namelen %u name %s filetype %d errors %x",
3589 (unsigned long long)backref->dir,
3590 (unsigned long long)backref->index,
3591 backref->namelen, backref->name,
3592 backref->filetype, backref->errors);
3593 print_ref_error(backref->errors);
3595 free_inode_rec(rec);
3597 return (error > 0) ? -1 : 0;
3600 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3603 struct cache_extent *cache;
3604 struct root_record *rec = NULL;
3607 cache = lookup_cache_extent(root_cache, objectid, 1);
3609 rec = container_of(cache, struct root_record, cache);
3611 rec = calloc(1, sizeof(*rec));
3613 return ERR_PTR(-ENOMEM);
3614 rec->objectid = objectid;
3615 INIT_LIST_HEAD(&rec->backrefs);
3616 rec->cache.start = objectid;
3617 rec->cache.size = 1;
3619 ret = insert_cache_extent(root_cache, &rec->cache);
3621 return ERR_PTR(-EEXIST);
3626 static struct root_backref *get_root_backref(struct root_record *rec,
3627 u64 ref_root, u64 dir, u64 index,
3628 const char *name, int namelen)
3630 struct root_backref *backref;
3632 list_for_each_entry(backref, &rec->backrefs, list) {
3633 if (backref->ref_root != ref_root || backref->dir != dir ||
3634 backref->namelen != namelen)
3636 if (memcmp(name, backref->name, namelen))
3641 backref = calloc(1, sizeof(*backref) + namelen + 1);
3644 backref->ref_root = ref_root;
3646 backref->index = index;
3647 backref->namelen = namelen;
3648 memcpy(backref->name, name, namelen);
3649 backref->name[namelen] = '\0';
3650 list_add_tail(&backref->list, &rec->backrefs);
3654 static void free_root_record(struct cache_extent *cache)
3656 struct root_record *rec;
3657 struct root_backref *backref;
3659 rec = container_of(cache, struct root_record, cache);
3660 while (!list_empty(&rec->backrefs)) {
3661 backref = to_root_backref(rec->backrefs.next);
3662 list_del(&backref->list);
3669 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3671 static int add_root_backref(struct cache_tree *root_cache,
3672 u64 root_id, u64 ref_root, u64 dir, u64 index,
3673 const char *name, int namelen,
3674 int item_type, int errors)
3676 struct root_record *rec;
3677 struct root_backref *backref;
3679 rec = get_root_rec(root_cache, root_id);
3680 BUG_ON(IS_ERR(rec));
3681 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3684 backref->errors |= errors;
3686 if (item_type != BTRFS_DIR_ITEM_KEY) {
3687 if (backref->found_dir_index || backref->found_back_ref ||
3688 backref->found_forward_ref) {
3689 if (backref->index != index)
3690 backref->errors |= REF_ERR_INDEX_UNMATCH;
3692 backref->index = index;
3696 if (item_type == BTRFS_DIR_ITEM_KEY) {
3697 if (backref->found_forward_ref)
3699 backref->found_dir_item = 1;
3700 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3701 backref->found_dir_index = 1;
3702 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3703 if (backref->found_forward_ref)
3704 backref->errors |= REF_ERR_DUP_ROOT_REF;
3705 else if (backref->found_dir_item)
3707 backref->found_forward_ref = 1;
3708 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3709 if (backref->found_back_ref)
3710 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3711 backref->found_back_ref = 1;
3716 if (backref->found_forward_ref && backref->found_dir_item)
3717 backref->reachable = 1;
3721 static int merge_root_recs(struct btrfs_root *root,
3722 struct cache_tree *src_cache,
3723 struct cache_tree *dst_cache)
3725 struct cache_extent *cache;
3726 struct ptr_node *node;
3727 struct inode_record *rec;
3728 struct inode_backref *backref;
3731 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3732 free_inode_recs_tree(src_cache);
3737 cache = search_cache_extent(src_cache, 0);
3740 node = container_of(cache, struct ptr_node, cache);
3742 remove_cache_extent(src_cache, &node->cache);
3745 ret = is_child_root(root, root->objectid, rec->ino);
3751 list_for_each_entry(backref, &rec->backrefs, list) {
3752 BUG_ON(backref->found_inode_ref);
3753 if (backref->found_dir_item)
3754 add_root_backref(dst_cache, rec->ino,
3755 root->root_key.objectid, backref->dir,
3756 backref->index, backref->name,
3757 backref->namelen, BTRFS_DIR_ITEM_KEY,
3759 if (backref->found_dir_index)
3760 add_root_backref(dst_cache, rec->ino,
3761 root->root_key.objectid, backref->dir,
3762 backref->index, backref->name,
3763 backref->namelen, BTRFS_DIR_INDEX_KEY,
3767 free_inode_rec(rec);
3774 static int check_root_refs(struct btrfs_root *root,
3775 struct cache_tree *root_cache)
3777 struct root_record *rec;
3778 struct root_record *ref_root;
3779 struct root_backref *backref;
3780 struct cache_extent *cache;
3786 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3787 BUG_ON(IS_ERR(rec));
3790 /* fixme: this can not detect circular references */
3793 cache = search_cache_extent(root_cache, 0);
3797 rec = container_of(cache, struct root_record, cache);
3798 cache = next_cache_extent(cache);
3800 if (rec->found_ref == 0)
3803 list_for_each_entry(backref, &rec->backrefs, list) {
3804 if (!backref->reachable)
3807 ref_root = get_root_rec(root_cache,
3809 BUG_ON(IS_ERR(ref_root));
3810 if (ref_root->found_ref > 0)
3813 backref->reachable = 0;
3815 if (rec->found_ref == 0)
3821 cache = search_cache_extent(root_cache, 0);
3825 rec = container_of(cache, struct root_record, cache);
3826 cache = next_cache_extent(cache);
3828 if (rec->found_ref == 0 &&
3829 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3830 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3831 ret = check_orphan_item(root->fs_info->tree_root,
3837 * If we don't have a root item then we likely just have
3838 * a dir item in a snapshot for this root but no actual
3839 * ref key or anything so it's meaningless.
3841 if (!rec->found_root_item)
3844 fprintf(stderr, "fs tree %llu not referenced\n",
3845 (unsigned long long)rec->objectid);
3849 if (rec->found_ref > 0 && !rec->found_root_item)
3851 list_for_each_entry(backref, &rec->backrefs, list) {
3852 if (!backref->found_dir_item)
3853 backref->errors |= REF_ERR_NO_DIR_ITEM;
3854 if (!backref->found_dir_index)
3855 backref->errors |= REF_ERR_NO_DIR_INDEX;
3856 if (!backref->found_back_ref)
3857 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3858 if (!backref->found_forward_ref)
3859 backref->errors |= REF_ERR_NO_ROOT_REF;
3860 if (backref->reachable && backref->errors)
3867 fprintf(stderr, "fs tree %llu refs %u %s\n",
3868 (unsigned long long)rec->objectid, rec->found_ref,
3869 rec->found_root_item ? "" : "not found");
3871 list_for_each_entry(backref, &rec->backrefs, list) {
3872 if (!backref->reachable)
3874 if (!backref->errors && rec->found_root_item)
3876 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3877 " index %llu namelen %u name %s errors %x\n",
3878 (unsigned long long)backref->ref_root,
3879 (unsigned long long)backref->dir,
3880 (unsigned long long)backref->index,
3881 backref->namelen, backref->name,
3883 print_ref_error(backref->errors);
3886 return errors > 0 ? 1 : 0;
3889 static int process_root_ref(struct extent_buffer *eb, int slot,
3890 struct btrfs_key *key,
3891 struct cache_tree *root_cache)
3897 struct btrfs_root_ref *ref;
3898 char namebuf[BTRFS_NAME_LEN];
3901 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3903 dirid = btrfs_root_ref_dirid(eb, ref);
3904 index = btrfs_root_ref_sequence(eb, ref);
3905 name_len = btrfs_root_ref_name_len(eb, ref);
3907 if (name_len <= BTRFS_NAME_LEN) {
3911 len = BTRFS_NAME_LEN;
3912 error = REF_ERR_NAME_TOO_LONG;
3914 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3916 if (key->type == BTRFS_ROOT_REF_KEY) {
3917 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3918 index, namebuf, len, key->type, error);
3920 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3921 index, namebuf, len, key->type, error);
3926 static void free_corrupt_block(struct cache_extent *cache)
3928 struct btrfs_corrupt_block *corrupt;
3930 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3934 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3937 * Repair the btree of the given root.
3939 * The fix is to remove the node key in corrupt_blocks cache_tree.
3940 * and rebalance the tree.
3941 * After the fix, the btree should be writeable.
3943 static int repair_btree(struct btrfs_root *root,
3944 struct cache_tree *corrupt_blocks)
3946 struct btrfs_trans_handle *trans;
3947 struct btrfs_path path;
3948 struct btrfs_corrupt_block *corrupt;
3949 struct cache_extent *cache;
3950 struct btrfs_key key;
3955 if (cache_tree_empty(corrupt_blocks))
3958 trans = btrfs_start_transaction(root, 1);
3959 if (IS_ERR(trans)) {
3960 ret = PTR_ERR(trans);
3961 fprintf(stderr, "Error starting transaction: %s\n",
3965 btrfs_init_path(&path);
3966 cache = first_cache_extent(corrupt_blocks);
3968 corrupt = container_of(cache, struct btrfs_corrupt_block,
3970 level = corrupt->level;
3971 path.lowest_level = level;
3972 key.objectid = corrupt->key.objectid;
3973 key.type = corrupt->key.type;
3974 key.offset = corrupt->key.offset;
3977 * Here we don't want to do any tree balance, since it may
3978 * cause a balance with corrupted brother leaf/node,
3979 * so ins_len set to 0 here.
3980 * Balance will be done after all corrupt node/leaf is deleted.
3982 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3985 offset = btrfs_node_blockptr(path.nodes[level],
3988 /* Remove the ptr */
3989 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3993 * Remove the corresponding extent
3994 * return value is not concerned.
3996 btrfs_release_path(&path);
3997 ret = btrfs_free_extent(trans, root, offset,
3998 root->fs_info->nodesize, 0,
3999 root->root_key.objectid, level - 1, 0);
4000 cache = next_cache_extent(cache);
4003 /* Balance the btree using btrfs_search_slot() */
4004 cache = first_cache_extent(corrupt_blocks);
4006 corrupt = container_of(cache, struct btrfs_corrupt_block,
4008 memcpy(&key, &corrupt->key, sizeof(key));
4009 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4012 /* return will always >0 since it won't find the item */
4014 btrfs_release_path(&path);
4015 cache = next_cache_extent(cache);
4018 btrfs_commit_transaction(trans, root);
4019 btrfs_release_path(&path);
4023 static int check_fs_root(struct btrfs_root *root,
4024 struct cache_tree *root_cache,
4025 struct walk_control *wc)
4031 struct btrfs_path path;
4032 struct shared_node root_node;
4033 struct root_record *rec;
4034 struct btrfs_root_item *root_item = &root->root_item;
4035 struct cache_tree corrupt_blocks;
4036 struct orphan_data_extent *orphan;
4037 struct orphan_data_extent *tmp;
4038 enum btrfs_tree_block_status status;
4039 struct node_refs nrefs;
4042 * Reuse the corrupt_block cache tree to record corrupted tree block
4044 * Unlike the usage in extent tree check, here we do it in a per
4045 * fs/subvol tree base.
4047 cache_tree_init(&corrupt_blocks);
4048 root->fs_info->corrupt_blocks = &corrupt_blocks;
4050 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4051 rec = get_root_rec(root_cache, root->root_key.objectid);
4052 BUG_ON(IS_ERR(rec));
4053 if (btrfs_root_refs(root_item) > 0)
4054 rec->found_root_item = 1;
4057 btrfs_init_path(&path);
4058 memset(&root_node, 0, sizeof(root_node));
4059 cache_tree_init(&root_node.root_cache);
4060 cache_tree_init(&root_node.inode_cache);
4061 memset(&nrefs, 0, sizeof(nrefs));
4063 /* Move the orphan extent record to corresponding inode_record */
4064 list_for_each_entry_safe(orphan, tmp,
4065 &root->orphan_data_extents, list) {
4066 struct inode_record *inode;
4068 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4070 BUG_ON(IS_ERR(inode));
4071 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4072 list_move(&orphan->list, &inode->orphan_extents);
4075 level = btrfs_header_level(root->node);
4076 memset(wc->nodes, 0, sizeof(wc->nodes));
4077 wc->nodes[level] = &root_node;
4078 wc->active_node = level;
4079 wc->root_level = level;
4081 /* We may not have checked the root block, lets do that now */
4082 if (btrfs_is_leaf(root->node))
4083 status = btrfs_check_leaf(root, NULL, root->node);
4085 status = btrfs_check_node(root, NULL, root->node);
4086 if (status != BTRFS_TREE_BLOCK_CLEAN)
4089 if (btrfs_root_refs(root_item) > 0 ||
4090 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4091 path.nodes[level] = root->node;
4092 extent_buffer_get(root->node);
4093 path.slots[level] = 0;
4095 struct btrfs_key key;
4096 struct btrfs_disk_key found_key;
4098 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4099 level = root_item->drop_level;
4100 path.lowest_level = level;
4101 if (level > btrfs_header_level(root->node) ||
4102 level >= BTRFS_MAX_LEVEL) {
4103 error("ignoring invalid drop level: %u", level);
4106 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4109 btrfs_node_key(path.nodes[level], &found_key,
4111 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4112 sizeof(found_key)));
4116 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4122 wret = walk_up_tree(root, &path, wc, &level);
4129 btrfs_release_path(&path);
4131 if (!cache_tree_empty(&corrupt_blocks)) {
4132 struct cache_extent *cache;
4133 struct btrfs_corrupt_block *corrupt;
4135 printf("The following tree block(s) is corrupted in tree %llu:\n",
4136 root->root_key.objectid);
4137 cache = first_cache_extent(&corrupt_blocks);
4139 corrupt = container_of(cache,
4140 struct btrfs_corrupt_block,
4142 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4143 cache->start, corrupt->level,
4144 corrupt->key.objectid, corrupt->key.type,
4145 corrupt->key.offset);
4146 cache = next_cache_extent(cache);
4149 printf("Try to repair the btree for root %llu\n",
4150 root->root_key.objectid);
4151 ret = repair_btree(root, &corrupt_blocks);
4153 fprintf(stderr, "Failed to repair btree: %s\n",
4156 printf("Btree for root %llu is fixed\n",
4157 root->root_key.objectid);
4161 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4165 if (root_node.current) {
4166 root_node.current->checked = 1;
4167 maybe_free_inode_rec(&root_node.inode_cache,
4171 err = check_inode_recs(root, &root_node.inode_cache);
4175 free_corrupt_blocks_tree(&corrupt_blocks);
4176 root->fs_info->corrupt_blocks = NULL;
4177 free_orphan_data_extents(&root->orphan_data_extents);
4181 static int fs_root_objectid(u64 objectid)
4183 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4184 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4186 return is_fstree(objectid);
4189 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4190 struct cache_tree *root_cache)
4192 struct btrfs_path path;
4193 struct btrfs_key key;
4194 struct walk_control wc;
4195 struct extent_buffer *leaf, *tree_node;
4196 struct btrfs_root *tmp_root;
4197 struct btrfs_root *tree_root = fs_info->tree_root;
4201 if (ctx.progress_enabled) {
4202 ctx.tp = TASK_FS_ROOTS;
4203 task_start(ctx.info);
4207 * Just in case we made any changes to the extent tree that weren't
4208 * reflected into the free space cache yet.
4211 reset_cached_block_groups(fs_info);
4212 memset(&wc, 0, sizeof(wc));
4213 cache_tree_init(&wc.shared);
4214 btrfs_init_path(&path);
4219 key.type = BTRFS_ROOT_ITEM_KEY;
4220 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4225 tree_node = tree_root->node;
4227 if (tree_node != tree_root->node) {
4228 free_root_recs_tree(root_cache);
4229 btrfs_release_path(&path);
4232 leaf = path.nodes[0];
4233 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4234 ret = btrfs_next_leaf(tree_root, &path);
4240 leaf = path.nodes[0];
4242 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4243 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4244 fs_root_objectid(key.objectid)) {
4245 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4246 tmp_root = btrfs_read_fs_root_no_cache(
4249 key.offset = (u64)-1;
4250 tmp_root = btrfs_read_fs_root(
4253 if (IS_ERR(tmp_root)) {
4257 ret = check_fs_root(tmp_root, root_cache, &wc);
4258 if (ret == -EAGAIN) {
4259 free_root_recs_tree(root_cache);
4260 btrfs_release_path(&path);
4265 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4266 btrfs_free_fs_root(tmp_root);
4267 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4268 key.type == BTRFS_ROOT_BACKREF_KEY) {
4269 process_root_ref(leaf, path.slots[0], &key,
4276 btrfs_release_path(&path);
4278 free_extent_cache_tree(&wc.shared);
4279 if (!cache_tree_empty(&wc.shared))
4280 fprintf(stderr, "warning line %d\n", __LINE__);
4282 task_stop(ctx.info);
4288 * Find the @index according by @ino and name.
4289 * Notice:time efficiency is O(N)
4291 * @root: the root of the fs/file tree
4292 * @index_ret: the index as return value
4293 * @namebuf: the name to match
4294 * @name_len: the length of name to match
4295 * @file_type: the file_type of INODE_ITEM to match
4297 * Returns 0 if found and *@index_ret will be modified with right value
4298 * Returns< 0 not found and *@index_ret will be (u64)-1
4300 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4301 u64 *index_ret, char *namebuf, u32 name_len,
4304 struct btrfs_path path;
4305 struct extent_buffer *node;
4306 struct btrfs_dir_item *di;
4307 struct btrfs_key key;
4308 struct btrfs_key location;
4309 char name[BTRFS_NAME_LEN] = {0};
4321 /* search from the last index */
4322 key.objectid = dirid;
4323 key.offset = (u64)-1;
4324 key.type = BTRFS_DIR_INDEX_KEY;
4326 btrfs_init_path(&path);
4327 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4332 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4335 *index_ret = (64)-1;
4338 /* Check whether inode_id/filetype/name match */
4339 node = path.nodes[0];
4340 slot = path.slots[0];
4341 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4342 total = btrfs_item_size_nr(node, slot);
4343 while (cur < total) {
4345 len = btrfs_dir_name_len(node, di);
4346 data_len = btrfs_dir_data_len(node, di);
4348 btrfs_dir_item_key_to_cpu(node, di, &location);
4349 if (location.objectid != location_id ||
4350 location.type != BTRFS_INODE_ITEM_KEY ||
4351 location.offset != 0)
4354 filetype = btrfs_dir_type(node, di);
4355 if (file_type != filetype)
4358 if (len > BTRFS_NAME_LEN)
4359 len = BTRFS_NAME_LEN;
4361 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4362 if (len != name_len || strncmp(namebuf, name, len))
4365 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4366 *index_ret = key.offset;
4370 len += sizeof(*di) + data_len;
4371 di = (struct btrfs_dir_item *)((char *)di + len);
4377 btrfs_release_path(&path);
4382 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4383 * INODE_REF/INODE_EXTREF match.
4385 * @root: the root of the fs/file tree
4386 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4387 * value while find index
4388 * @location_key: location key of the struct btrfs_dir_item to match
4389 * @name: the name to match
4390 * @namelen: the length of name
4391 * @file_type: the type of file to math
4393 * Return 0 if no error occurred.
4394 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4395 * DIR_ITEM/DIR_INDEX
4396 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4397 * and DIR_ITEM/DIR_INDEX mismatch
4399 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4400 struct btrfs_key *location_key, char *name,
4401 u32 namelen, u8 file_type)
4403 struct btrfs_path path;
4404 struct extent_buffer *node;
4405 struct btrfs_dir_item *di;
4406 struct btrfs_key location;
4407 char namebuf[BTRFS_NAME_LEN] = {0};
4416 /* get the index by traversing all index */
4417 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4418 ret = find_dir_index(root, key->objectid,
4419 location_key->objectid, &key->offset,
4420 name, namelen, file_type);
4422 ret = DIR_INDEX_MISSING;
4426 btrfs_init_path(&path);
4427 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4429 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4434 /* Check whether inode_id/filetype/name match */
4435 node = path.nodes[0];
4436 slot = path.slots[0];
4437 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4438 total = btrfs_item_size_nr(node, slot);
4439 while (cur < total) {
4440 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4441 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4443 len = btrfs_dir_name_len(node, di);
4444 data_len = btrfs_dir_data_len(node, di);
4446 btrfs_dir_item_key_to_cpu(node, di, &location);
4447 if (location.objectid != location_key->objectid ||
4448 location.type != location_key->type ||
4449 location.offset != location_key->offset)
4452 filetype = btrfs_dir_type(node, di);
4453 if (file_type != filetype)
4456 if (len > BTRFS_NAME_LEN) {
4457 len = BTRFS_NAME_LEN;
4458 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4460 key->type == BTRFS_DIR_ITEM_KEY ?
4461 "DIR_ITEM" : "DIR_INDEX",
4462 key->objectid, key->offset, len);
4464 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4466 if (len != namelen || strncmp(namebuf, name, len))
4472 len += sizeof(*di) + data_len;
4473 di = (struct btrfs_dir_item *)((char *)di + len);
4478 btrfs_release_path(&path);
4483 * Prints inode ref error message
4485 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4486 u64 index, const char *namebuf, int name_len,
4487 u8 filetype, int err)
4492 /* root dir error */
4493 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4495 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4496 root->objectid, key->objectid, key->offset, namebuf);
4501 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4502 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4503 root->objectid, key->offset,
4504 btrfs_name_hash(namebuf, name_len),
4505 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4507 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4508 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4509 root->objectid, key->offset, index,
4510 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4515 * Insert the missing inode item.
4517 * Returns 0 means success.
4518 * Returns <0 means error.
4520 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4523 struct btrfs_key key;
4524 struct btrfs_trans_handle *trans;
4525 struct btrfs_path path;
4529 key.type = BTRFS_INODE_ITEM_KEY;
4532 btrfs_init_path(&path);
4533 trans = btrfs_start_transaction(root, 1);
4534 if (IS_ERR(trans)) {
4539 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4540 if (ret < 0 || !ret)
4543 /* insert inode item */
4544 create_inode_item_lowmem(trans, root, ino, filetype);
4547 btrfs_commit_transaction(trans, root);
4550 error("failed to repair root %llu INODE ITEM[%llu] missing",
4551 root->objectid, ino);
4552 btrfs_release_path(&path);
4557 * The ternary means dir item, dir index and relative inode ref.
4558 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4559 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4561 * If two of three is missing or mismatched, delete the existing one.
4562 * If one of three is missing or mismatched, add the missing one.
4564 * returns 0 means success.
4565 * returns not 0 means on error;
4567 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4568 u64 index, char *name, int name_len, u8 filetype,
4571 struct btrfs_trans_handle *trans;
4576 * stage shall be one of following valild values:
4577 * 0: Fine, nothing to do.
4578 * 1: One of three is wrong, so add missing one.
4579 * 2: Two of three is wrong, so delete existed one.
4581 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4583 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4585 if (err & (INODE_REF_MISSING))
4588 /* stage must be smllarer than 3 */
4591 trans = btrfs_start_transaction(root, 1);
4593 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4598 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4599 filetype, &index, 1, 1);
4603 btrfs_commit_transaction(trans, root);
4606 error("fail to repair inode %llu name %s filetype %u",
4607 ino, name, filetype);
4609 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4610 stage == 2 ? "Delete" : "Add",
4611 ino, name, filetype);
4617 * Traverse the given INODE_REF and call find_dir_item() to find related
4618 * DIR_ITEM/DIR_INDEX.
4620 * @root: the root of the fs/file tree
4621 * @ref_key: the key of the INODE_REF
4622 * @path the path provides node and slot
4623 * @refs: the count of INODE_REF
4624 * @mode: the st_mode of INODE_ITEM
4625 * @name_ret: returns with the first ref's name
4626 * @name_len_ret: len of the name_ret
4628 * Return 0 if no error occurred.
4630 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4631 struct btrfs_path *path, char *name_ret,
4632 u32 *namelen_ret, u64 *refs_ret, int mode)
4634 struct btrfs_key key;
4635 struct btrfs_key location;
4636 struct btrfs_inode_ref *ref;
4637 struct extent_buffer *node;
4638 char namebuf[BTRFS_NAME_LEN] = {0};
4648 int need_research = 0;
4656 /* since after repair, path and the dir item may be changed */
4657 if (need_research) {
4659 btrfs_release_path(path);
4660 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4661 /* the item was deleted, let path point to the last checked item */
4663 if (path->slots[0] == 0)
4664 btrfs_prev_leaf(root, path);
4672 location.objectid = ref_key->objectid;
4673 location.type = BTRFS_INODE_ITEM_KEY;
4674 location.offset = 0;
4675 node = path->nodes[0];
4676 slot = path->slots[0];
4678 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4679 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4680 total = btrfs_item_size_nr(node, slot);
4683 /* Update inode ref count */
4686 index = btrfs_inode_ref_index(node, ref);
4687 name_len = btrfs_inode_ref_name_len(node, ref);
4689 if (name_len <= BTRFS_NAME_LEN) {
4692 len = BTRFS_NAME_LEN;
4693 warning("root %llu INODE_REF[%llu %llu] name too long",
4694 root->objectid, ref_key->objectid, ref_key->offset);
4697 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4699 /* copy the first name found to name_ret */
4700 if (refs == 1 && name_ret) {
4701 memcpy(name_ret, namebuf, len);
4705 /* Check root dir ref */
4706 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4707 if (index != 0 || len != strlen("..") ||
4708 strncmp("..", namebuf, len) ||
4709 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4710 /* set err bits then repair will delete the ref */
4711 err |= DIR_INDEX_MISSING;
4712 err |= DIR_ITEM_MISSING;
4717 /* Find related DIR_INDEX */
4718 key.objectid = ref_key->offset;
4719 key.type = BTRFS_DIR_INDEX_KEY;
4721 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4722 imode_to_type(mode));
4724 /* Find related dir_item */
4725 key.objectid = ref_key->offset;
4726 key.type = BTRFS_DIR_ITEM_KEY;
4727 key.offset = btrfs_name_hash(namebuf, len);
4728 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4729 imode_to_type(mode));
4731 if (tmp_err && repair) {
4732 ret = repair_ternary_lowmem(root, ref_key->offset,
4733 ref_key->objectid, index, namebuf,
4734 name_len, imode_to_type(mode),
4741 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4742 imode_to_type(mode), tmp_err);
4744 len = sizeof(*ref) + name_len;
4745 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4756 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4757 * DIR_ITEM/DIR_INDEX.
4759 * @root: the root of the fs/file tree
4760 * @ref_key: the key of the INODE_EXTREF
4761 * @refs: the count of INODE_EXTREF
4762 * @mode: the st_mode of INODE_ITEM
4764 * Return 0 if no error occurred.
4766 static int check_inode_extref(struct btrfs_root *root,
4767 struct btrfs_key *ref_key,
4768 struct extent_buffer *node, int slot, u64 *refs,
4771 struct btrfs_key key;
4772 struct btrfs_key location;
4773 struct btrfs_inode_extref *extref;
4774 char namebuf[BTRFS_NAME_LEN] = {0};
4784 location.objectid = ref_key->objectid;
4785 location.type = BTRFS_INODE_ITEM_KEY;
4786 location.offset = 0;
4788 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4789 total = btrfs_item_size_nr(node, slot);
4792 /* update inode ref count */
4794 name_len = btrfs_inode_extref_name_len(node, extref);
4795 index = btrfs_inode_extref_index(node, extref);
4796 parent = btrfs_inode_extref_parent(node, extref);
4797 if (name_len <= BTRFS_NAME_LEN) {
4800 len = BTRFS_NAME_LEN;
4801 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4802 root->objectid, ref_key->objectid, ref_key->offset);
4804 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4806 /* Check root dir ref name */
4807 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4808 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4809 root->objectid, ref_key->objectid, ref_key->offset,
4811 err |= ROOT_DIR_ERROR;
4814 /* find related dir_index */
4815 key.objectid = parent;
4816 key.type = BTRFS_DIR_INDEX_KEY;
4818 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4821 /* find related dir_item */
4822 key.objectid = parent;
4823 key.type = BTRFS_DIR_ITEM_KEY;
4824 key.offset = btrfs_name_hash(namebuf, len);
4825 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4828 len = sizeof(*extref) + name_len;
4829 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4839 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4840 * DIR_ITEM/DIR_INDEX match.
4841 * Return with @index_ret.
4843 * @root: the root of the fs/file tree
4844 * @key: the key of the INODE_REF/INODE_EXTREF
4845 * @name: the name in the INODE_REF/INODE_EXTREF
4846 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4847 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
4848 * value (64)-1 means do not check index
4849 * @ext_ref: the EXTENDED_IREF feature
4851 * Return 0 if no error occurred.
4852 * Return >0 for error bitmap
4854 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4855 char *name, int namelen, u64 *index_ret,
4856 unsigned int ext_ref)
4858 struct btrfs_path path;
4859 struct btrfs_inode_ref *ref;
4860 struct btrfs_inode_extref *extref;
4861 struct extent_buffer *node;
4862 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4875 btrfs_init_path(&path);
4876 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4878 ret = INODE_REF_MISSING;
4882 node = path.nodes[0];
4883 slot = path.slots[0];
4885 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4886 total = btrfs_item_size_nr(node, slot);
4888 /* Iterate all entry of INODE_REF */
4889 while (cur < total) {
4890 ret = INODE_REF_MISSING;
4892 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4893 ref_index = btrfs_inode_ref_index(node, ref);
4894 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4897 if (cur + sizeof(*ref) + ref_namelen > total ||
4898 ref_namelen > BTRFS_NAME_LEN) {
4899 warning("root %llu INODE %s[%llu %llu] name too long",
4901 key->type == BTRFS_INODE_REF_KEY ?
4903 key->objectid, key->offset);
4905 if (cur + sizeof(*ref) > total)
4907 len = min_t(u32, total - cur - sizeof(*ref),
4913 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4916 if (len != namelen || strncmp(ref_namebuf, name, len))
4919 *index_ret = ref_index;
4923 len = sizeof(*ref) + ref_namelen;
4924 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4929 /* Skip if not support EXTENDED_IREF feature */
4933 btrfs_release_path(&path);
4934 btrfs_init_path(&path);
4936 dir_id = key->offset;
4937 key->type = BTRFS_INODE_EXTREF_KEY;
4938 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4940 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4942 ret = INODE_REF_MISSING;
4946 node = path.nodes[0];
4947 slot = path.slots[0];
4949 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4951 total = btrfs_item_size_nr(node, slot);
4953 /* Iterate all entry of INODE_EXTREF */
4954 while (cur < total) {
4955 ret = INODE_REF_MISSING;
4957 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4958 ref_index = btrfs_inode_extref_index(node, extref);
4959 parent = btrfs_inode_extref_parent(node, extref);
4960 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4963 if (parent != dir_id)
4966 if (ref_namelen <= BTRFS_NAME_LEN) {
4969 len = BTRFS_NAME_LEN;
4970 warning("root %llu INODE %s[%llu %llu] name too long",
4972 key->type == BTRFS_INODE_REF_KEY ?
4974 key->objectid, key->offset);
4976 read_extent_buffer(node, ref_namebuf,
4977 (unsigned long)(extref + 1), len);
4979 if (len != namelen || strncmp(ref_namebuf, name, len))
4982 *index_ret = ref_index;
4987 len = sizeof(*extref) + ref_namelen;
4988 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4993 btrfs_release_path(&path);
4997 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4998 u64 ino, u64 index, const char *namebuf,
4999 int name_len, u8 filetype, int err)
5001 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5002 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5003 root->objectid, key->objectid, key->offset, namebuf,
5005 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5008 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5009 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5010 root->objectid, key->objectid, index, namebuf, filetype,
5011 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5014 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5016 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5017 root->objectid, ino, index, namebuf, filetype,
5018 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5021 if (err & INODE_REF_MISSING)
5023 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5024 root->objectid, ino, key->objectid, namebuf, filetype);
5029 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5031 * Returns error after repair
5033 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5034 u64 index, u8 filetype, char *namebuf, u32 name_len,
5039 if (err & INODE_ITEM_MISSING) {
5040 ret = repair_inode_item_missing(root, ino, filetype);
5042 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5045 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5046 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5047 name_len, filetype, err);
5049 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5050 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5051 err &= ~(INODE_REF_MISSING);
5057 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5060 struct btrfs_key key;
5061 struct btrfs_path path;
5063 struct btrfs_dir_item *di;
5073 key.offset = (u64)-1;
5075 btrfs_init_path(&path);
5076 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5081 /* if found, go to spacial case */
5086 ret = btrfs_previous_item(root, &path, ino, type);
5094 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5096 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5098 while (cur < total) {
5099 len = btrfs_dir_name_len(path.nodes[0], di);
5100 if (len > BTRFS_NAME_LEN)
5101 len = BTRFS_NAME_LEN;
5104 len += btrfs_dir_data_len(path.nodes[0], di);
5106 di = (struct btrfs_dir_item *)((char *)di + len);
5112 btrfs_release_path(&path);
5116 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5123 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5127 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5131 *size = item_size + index_size;
5135 error("failed to count root %llu INODE[%llu] root size",
5136 root->objectid, ino);
5141 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5142 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5144 * @root: the root of the fs/file tree
5145 * @key: the key of the INODE_REF/INODE_EXTREF
5147 * @size: the st_size of the INODE_ITEM
5148 * @ext_ref: the EXTENDED_IREF feature
5150 * Return 0 if no error occurred.
5151 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5153 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5154 struct btrfs_path *path, u64 *size,
5155 unsigned int ext_ref)
5157 struct btrfs_dir_item *di;
5158 struct btrfs_inode_item *ii;
5159 struct btrfs_key key;
5160 struct btrfs_key location;
5161 struct extent_buffer *node;
5163 char namebuf[BTRFS_NAME_LEN] = {0};
5175 int need_research = 0;
5178 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5179 * ignore index check.
5181 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5182 index = di_key->offset;
5189 /* since after repair, path and the dir item may be changed */
5190 if (need_research) {
5192 err |= DIR_COUNT_AGAIN;
5193 btrfs_release_path(path);
5194 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5195 /* the item was deleted, let path point the last checked item */
5197 if (path->slots[0] == 0)
5198 btrfs_prev_leaf(root, path);
5206 node = path->nodes[0];
5207 slot = path->slots[0];
5209 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5210 total = btrfs_item_size_nr(node, slot);
5211 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5213 while (cur < total) {
5214 data_len = btrfs_dir_data_len(node, di);
5217 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5219 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5220 di_key->objectid, di_key->offset, data_len);
5222 name_len = btrfs_dir_name_len(node, di);
5223 if (name_len <= BTRFS_NAME_LEN) {
5226 len = BTRFS_NAME_LEN;
5227 warning("root %llu %s[%llu %llu] name too long",
5229 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5230 di_key->objectid, di_key->offset);
5232 (*size) += name_len;
5233 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5235 filetype = btrfs_dir_type(node, di);
5237 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5238 di_key->offset != btrfs_name_hash(namebuf, len)) {
5240 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5241 root->objectid, di_key->objectid, di_key->offset,
5242 namebuf, len, filetype, di_key->offset,
5243 btrfs_name_hash(namebuf, len));
5246 btrfs_dir_item_key_to_cpu(node, di, &location);
5247 /* Ignore related ROOT_ITEM check */
5248 if (location.type == BTRFS_ROOT_ITEM_KEY)
5251 btrfs_release_path(path);
5252 /* Check relative INODE_ITEM(existence/filetype) */
5253 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5255 tmp_err |= INODE_ITEM_MISSING;
5259 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5260 struct btrfs_inode_item);
5261 mode = btrfs_inode_mode(path->nodes[0], ii);
5262 if (imode_to_type(mode) != filetype) {
5263 tmp_err |= INODE_ITEM_MISMATCH;
5267 /* Check relative INODE_REF/INODE_EXTREF */
5268 key.objectid = location.objectid;
5269 key.type = BTRFS_INODE_REF_KEY;
5270 key.offset = di_key->objectid;
5271 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5274 /* check relative INDEX/ITEM */
5275 key.objectid = di_key->objectid;
5276 if (key.type == BTRFS_DIR_ITEM_KEY) {
5277 key.type = BTRFS_DIR_INDEX_KEY;
5280 key.type = BTRFS_DIR_ITEM_KEY;
5281 key.offset = btrfs_name_hash(namebuf, name_len);
5284 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5285 name_len, filetype);
5286 /* find_dir_item may find index */
5287 if (key.type == BTRFS_DIR_INDEX_KEY)
5291 if (tmp_err && repair) {
5292 ret = repair_dir_item(root, di_key->objectid,
5293 location.objectid, index,
5294 imode_to_type(mode), namebuf,
5296 if (ret != tmp_err) {
5301 btrfs_release_path(path);
5302 print_dir_item_err(root, di_key, location.objectid, index,
5303 namebuf, name_len, filetype, tmp_err);
5305 len = sizeof(*di) + name_len + data_len;
5306 di = (struct btrfs_dir_item *)((char *)di + len);
5309 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5310 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5311 root->objectid, di_key->objectid,
5318 btrfs_release_path(path);
5319 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5321 err |= ret > 0 ? -ENOENT : ret;
5326 * Check file extent datasum/hole, update the size of the file extents,
5327 * check and update the last offset of the file extent.
5329 * @root: the root of fs/file tree.
5330 * @fkey: the key of the file extent.
5331 * @nodatasum: INODE_NODATASUM feature.
5332 * @size: the sum of all EXTENT_DATA items size for this inode.
5333 * @end: the offset of the last extent.
5335 * Return 0 if no error occurred.
5337 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5338 struct extent_buffer *node, int slot,
5339 unsigned int nodatasum, u64 *size, u64 *end)
5341 struct btrfs_file_extent_item *fi;
5344 u64 extent_num_bytes;
5346 u64 csum_found; /* In byte size, sectorsize aligned */
5347 u64 search_start; /* Logical range start we search for csum */
5348 u64 search_len; /* Logical range len we search for csum */
5349 unsigned int extent_type;
5350 unsigned int is_hole;
5355 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5357 /* Check inline extent */
5358 extent_type = btrfs_file_extent_type(node, fi);
5359 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5360 struct btrfs_item *e = btrfs_item_nr(slot);
5361 u32 item_inline_len;
5363 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5364 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5365 compressed = btrfs_file_extent_compression(node, fi);
5366 if (extent_num_bytes == 0) {
5368 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5369 root->objectid, fkey->objectid, fkey->offset);
5370 err |= FILE_EXTENT_ERROR;
5372 if (!compressed && extent_num_bytes != item_inline_len) {
5374 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5375 root->objectid, fkey->objectid, fkey->offset,
5376 extent_num_bytes, item_inline_len);
5377 err |= FILE_EXTENT_ERROR;
5379 *end += extent_num_bytes;
5380 *size += extent_num_bytes;
5384 /* Check extent type */
5385 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5386 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5387 err |= FILE_EXTENT_ERROR;
5388 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5389 root->objectid, fkey->objectid, fkey->offset);
5393 /* Check REG_EXTENT/PREALLOC_EXTENT */
5394 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5395 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5396 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5397 extent_offset = btrfs_file_extent_offset(node, fi);
5398 compressed = btrfs_file_extent_compression(node, fi);
5399 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5402 * Check EXTENT_DATA csum
5404 * For plain (uncompressed) extent, we should only check the range
5405 * we're referring to, as it's possible that part of prealloc extent
5406 * has been written, and has csum:
5408 * |<--- Original large preallocated extent A ---->|
5409 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5412 * For compressed extent, we should check the whole range.
5415 search_start = disk_bytenr + extent_offset;
5416 search_len = extent_num_bytes;
5418 search_start = disk_bytenr;
5419 search_len = disk_num_bytes;
5421 ret = count_csum_range(root, search_start, search_len, &csum_found);
5422 if (csum_found > 0 && nodatasum) {
5423 err |= ODD_CSUM_ITEM;
5424 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5425 root->objectid, fkey->objectid, fkey->offset);
5426 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5427 !is_hole && (ret < 0 || csum_found < search_len)) {
5428 err |= CSUM_ITEM_MISSING;
5429 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5430 root->objectid, fkey->objectid, fkey->offset,
5431 csum_found, search_len);
5432 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5433 err |= ODD_CSUM_ITEM;
5434 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5435 root->objectid, fkey->objectid, fkey->offset, csum_found);
5438 /* Check EXTENT_DATA hole */
5439 if (!no_holes && *end != fkey->offset) {
5440 err |= FILE_EXTENT_ERROR;
5441 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5442 root->objectid, fkey->objectid, fkey->offset);
5445 *end += extent_num_bytes;
5447 *size += extent_num_bytes;
5453 * Set inode item nbytes to @nbytes
5455 * Returns 0 on success
5456 * Returns != 0 on error
5458 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5459 struct btrfs_path *path,
5460 u64 ino, u64 nbytes)
5462 struct btrfs_trans_handle *trans;
5463 struct btrfs_inode_item *ii;
5464 struct btrfs_key key;
5465 struct btrfs_key research_key;
5469 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5472 key.type = BTRFS_INODE_ITEM_KEY;
5475 trans = btrfs_start_transaction(root, 1);
5476 if (IS_ERR(trans)) {
5477 ret = PTR_ERR(trans);
5482 btrfs_release_path(path);
5483 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5491 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5492 struct btrfs_inode_item);
5493 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5494 btrfs_mark_buffer_dirty(path->nodes[0]);
5496 btrfs_commit_transaction(trans, root);
5499 error("failed to set nbytes in inode %llu root %llu",
5500 ino, root->root_key.objectid);
5502 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5503 root->root_key.objectid, nbytes);
5506 btrfs_release_path(path);
5507 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5514 * Set directory inode isize to @isize.
5516 * Returns 0 on success.
5517 * Returns != 0 on error.
5519 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5520 struct btrfs_path *path,
5523 struct btrfs_trans_handle *trans;
5524 struct btrfs_inode_item *ii;
5525 struct btrfs_key key;
5526 struct btrfs_key research_key;
5530 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5533 key.type = BTRFS_INODE_ITEM_KEY;
5536 trans = btrfs_start_transaction(root, 1);
5537 if (IS_ERR(trans)) {
5538 ret = PTR_ERR(trans);
5543 btrfs_release_path(path);
5544 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5552 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5553 struct btrfs_inode_item);
5554 btrfs_set_inode_size(path->nodes[0], ii, isize);
5555 btrfs_mark_buffer_dirty(path->nodes[0]);
5557 btrfs_commit_transaction(trans, root);
5560 error("failed to set isize in inode %llu root %llu",
5561 ino, root->root_key.objectid);
5563 printf("Set isize in inode %llu root %llu to %llu\n",
5564 ino, root->root_key.objectid, isize);
5566 btrfs_release_path(path);
5567 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5574 * Wrapper function for btrfs_add_orphan_item().
5576 * Returns 0 on success.
5577 * Returns != 0 on error.
5579 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5580 struct btrfs_path *path, u64 ino)
5582 struct btrfs_trans_handle *trans;
5583 struct btrfs_key research_key;
5587 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5589 trans = btrfs_start_transaction(root, 1);
5590 if (IS_ERR(trans)) {
5591 ret = PTR_ERR(trans);
5596 btrfs_release_path(path);
5597 ret = btrfs_add_orphan_item(trans, root, path, ino);
5599 btrfs_commit_transaction(trans, root);
5602 error("failed to add inode %llu as orphan item root %llu",
5603 ino, root->root_key.objectid);
5605 printf("Added inode %llu as orphan item root %llu\n",
5606 ino, root->root_key.objectid);
5608 btrfs_release_path(path);
5609 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5615 /* Set inode_item nlink to @ref_count.
5616 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5618 * Returns 0 on success
5620 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5621 struct btrfs_path *path, u64 ino,
5622 const char *name, u32 namelen,
5623 u64 ref_count, u8 filetype, u64 *nlink)
5625 struct btrfs_trans_handle *trans;
5626 struct btrfs_inode_item *ii;
5627 struct btrfs_key key;
5628 struct btrfs_key old_key;
5629 char namebuf[BTRFS_NAME_LEN] = {0};
5635 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5637 if (name && namelen) {
5638 ASSERT(namelen <= BTRFS_NAME_LEN);
5639 memcpy(namebuf, name, namelen);
5642 sprintf(namebuf, "%llu", ino);
5643 name_len = count_digits(ino);
5644 printf("Can't find file name for inode %llu, use %s instead\n",
5648 trans = btrfs_start_transaction(root, 1);
5649 if (IS_ERR(trans)) {
5650 ret = PTR_ERR(trans);
5654 btrfs_release_path(path);
5655 /* if refs is 0, put it into lostfound */
5656 if (ref_count == 0) {
5657 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5658 name_len, filetype, &ref_count);
5663 /* reset inode_item's nlink to ref_count */
5665 key.type = BTRFS_INODE_ITEM_KEY;
5668 btrfs_release_path(path);
5669 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5675 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5676 struct btrfs_inode_item);
5677 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5678 btrfs_mark_buffer_dirty(path->nodes[0]);
5683 btrfs_commit_transaction(trans, root);
5687 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5688 root->objectid, ino, namebuf, filetype);
5690 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5691 root->objectid, ino, namebuf, filetype);
5694 btrfs_release_path(path);
5695 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5702 * Check INODE_ITEM and related ITEMs (the same inode number)
5703 * 1. check link count
5704 * 2. check inode ref/extref
5705 * 3. check dir item/index
5707 * @ext_ref: the EXTENDED_IREF feature
5709 * Return 0 if no error occurred.
5710 * Return >0 for error or hit the traversal is done(by error bitmap)
5712 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5713 unsigned int ext_ref)
5715 struct extent_buffer *node;
5716 struct btrfs_inode_item *ii;
5717 struct btrfs_key key;
5726 u64 extent_size = 0;
5728 unsigned int nodatasum;
5732 char namebuf[BTRFS_NAME_LEN] = {0};
5735 node = path->nodes[0];
5736 slot = path->slots[0];
5738 btrfs_item_key_to_cpu(node, &key, slot);
5739 inode_id = key.objectid;
5741 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5742 ret = btrfs_next_item(root, path);
5748 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5749 isize = btrfs_inode_size(node, ii);
5750 nbytes = btrfs_inode_nbytes(node, ii);
5751 mode = btrfs_inode_mode(node, ii);
5752 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5753 nlink = btrfs_inode_nlink(node, ii);
5754 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5757 ret = btrfs_next_item(root, path);
5759 /* out will fill 'err' rusing current statistics */
5761 } else if (ret > 0) {
5766 node = path->nodes[0];
5767 slot = path->slots[0];
5768 btrfs_item_key_to_cpu(node, &key, slot);
5769 if (key.objectid != inode_id)
5773 case BTRFS_INODE_REF_KEY:
5774 ret = check_inode_ref(root, &key, path, namebuf,
5775 &name_len, &refs, mode);
5778 case BTRFS_INODE_EXTREF_KEY:
5779 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5780 warning("root %llu EXTREF[%llu %llu] isn't supported",
5781 root->objectid, key.objectid,
5783 ret = check_inode_extref(root, &key, node, slot, &refs,
5787 case BTRFS_DIR_ITEM_KEY:
5788 case BTRFS_DIR_INDEX_KEY:
5790 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5791 root->objectid, inode_id,
5792 imode_to_type(mode), key.objectid,
5795 ret = check_dir_item(root, &key, path, &size, ext_ref);
5798 case BTRFS_EXTENT_DATA_KEY:
5800 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5801 root->objectid, inode_id, key.objectid,
5804 ret = check_file_extent(root, &key, node, slot,
5805 nodatasum, &extent_size,
5809 case BTRFS_XATTR_ITEM_KEY:
5812 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5813 key.objectid, key.type, key.offset);
5818 /* verify INODE_ITEM nlink/isize/nbytes */
5820 if (repair && (err & DIR_COUNT_AGAIN)) {
5821 err &= ~DIR_COUNT_AGAIN;
5822 count_dir_isize(root, inode_id, &size);
5825 if ((nlink != 1 || refs != 1) && repair) {
5826 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5827 namebuf, name_len, refs, imode_to_type(mode),
5832 err |= LINK_COUNT_ERROR;
5833 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5834 root->objectid, inode_id, nlink);
5838 * Just a warning, as dir inode nbytes is just an
5839 * instructive value.
5841 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5842 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5843 root->objectid, inode_id,
5844 root->fs_info->nodesize);
5847 if (isize != size) {
5849 ret = repair_dir_isize_lowmem(root, path,
5851 if (!repair || ret) {
5854 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5855 root->objectid, inode_id, isize, size);
5859 if (nlink != refs) {
5861 ret = repair_inode_nlinks_lowmem(root, path,
5862 inode_id, namebuf, name_len, refs,
5863 imode_to_type(mode), &nlink);
5864 if (!repair || ret) {
5865 err |= LINK_COUNT_ERROR;
5867 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5868 root->objectid, inode_id, nlink, refs);
5870 } else if (!nlink) {
5872 ret = repair_inode_orphan_item_lowmem(root,
5874 if (!repair || ret) {
5876 error("root %llu INODE[%llu] is orphan item",
5877 root->objectid, inode_id);
5881 if (!nbytes && !no_holes && extent_end < isize) {
5882 err |= NBYTES_ERROR;
5883 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5884 root->objectid, inode_id, isize);
5887 if (nbytes != extent_size) {
5889 ret = repair_inode_nbytes_lowmem(root, path,
5890 inode_id, extent_size);
5891 if (!repair || ret) {
5892 err |= NBYTES_ERROR;
5894 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5895 root->objectid, inode_id, nbytes,
5905 * Insert the missing inode item and inode ref.
5907 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5908 * Root dir should be handled specially because root dir is the root of fs.
5910 * returns err (>0 or 0) after repair
5912 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5914 struct btrfs_trans_handle *trans;
5915 struct btrfs_key key;
5916 struct btrfs_path path;
5917 int filetype = BTRFS_FT_DIR;
5920 btrfs_init_path(&path);
5922 if (err & INODE_REF_MISSING) {
5923 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5924 key.type = BTRFS_INODE_REF_KEY;
5925 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5927 trans = btrfs_start_transaction(root, 1);
5928 if (IS_ERR(trans)) {
5929 ret = PTR_ERR(trans);
5933 btrfs_release_path(&path);
5934 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5938 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5939 BTRFS_FIRST_FREE_OBJECTID,
5940 BTRFS_FIRST_FREE_OBJECTID, 0);
5944 printf("Add INODE_REF[%llu %llu] name %s\n",
5945 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5947 err &= ~INODE_REF_MISSING;
5950 error("fail to insert first inode's ref");
5951 btrfs_commit_transaction(trans, root);
5954 if (err & INODE_ITEM_MISSING) {
5955 ret = repair_inode_item_missing(root,
5956 BTRFS_FIRST_FREE_OBJECTID, filetype);
5959 err &= ~INODE_ITEM_MISSING;
5963 error("fail to repair first inode");
5964 btrfs_release_path(&path);
5969 * check first root dir's inode_item and inode_ref
5971 * returns 0 means no error
5972 * returns >0 means error
5973 * returns <0 means fatal error
5975 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5977 struct btrfs_path path;
5978 struct btrfs_key key;
5979 struct btrfs_inode_item *ii;
5985 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5986 key.type = BTRFS_INODE_ITEM_KEY;
5989 /* For root being dropped, we don't need to check first inode */
5990 if (btrfs_root_refs(&root->root_item) == 0 &&
5991 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5992 BTRFS_FIRST_FREE_OBJECTID)
5995 btrfs_init_path(&path);
5996 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6001 err |= INODE_ITEM_MISSING;
6003 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6004 struct btrfs_inode_item);
6005 mode = btrfs_inode_mode(path.nodes[0], ii);
6006 if (imode_to_type(mode) != BTRFS_FT_DIR)
6007 err |= INODE_ITEM_MISMATCH;
6010 /* lookup first inode ref */
6011 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6012 key.type = BTRFS_INODE_REF_KEY;
6013 /* special index value */
6016 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6022 btrfs_release_path(&path);
6025 err = repair_fs_first_inode(root, err);
6027 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6028 error("root dir INODE_ITEM is %s",
6029 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6030 if (err & INODE_REF_MISSING)
6031 error("root dir INODE_REF is missing");
6033 return ret < 0 ? ret : err;
6036 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6037 u64 parent, u64 root)
6039 struct rb_node *node;
6040 struct tree_backref *back = NULL;
6041 struct tree_backref match = {
6048 match.parent = parent;
6049 match.node.full_backref = 1;
6054 node = rb_search(&rec->backref_tree, &match.node.node,
6055 (rb_compare_keys)compare_extent_backref, NULL);
6057 back = to_tree_backref(rb_node_to_extent_backref(node));
6062 static struct data_backref *find_data_backref(struct extent_record *rec,
6063 u64 parent, u64 root,
6064 u64 owner, u64 offset,
6066 u64 disk_bytenr, u64 bytes)
6068 struct rb_node *node;
6069 struct data_backref *back = NULL;
6070 struct data_backref match = {
6077 .found_ref = found_ref,
6078 .disk_bytenr = disk_bytenr,
6082 match.parent = parent;
6083 match.node.full_backref = 1;
6088 node = rb_search(&rec->backref_tree, &match.node.node,
6089 (rb_compare_keys)compare_extent_backref, NULL);
6091 back = to_data_backref(rb_node_to_extent_backref(node));
6096 * Iterate all item on the tree and call check_inode_item() to check.
6098 * @root: the root of the tree to be checked.
6099 * @ext_ref: the EXTENDED_IREF feature
6101 * Return 0 if no error found.
6102 * Return <0 for error.
6104 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6106 struct btrfs_path path;
6107 struct node_refs nrefs;
6108 struct btrfs_root_item *root_item = &root->root_item;
6114 * We need to manually check the first inode item(256)
6115 * As the following traversal function will only start from
6116 * the first inode item in the leaf, if inode item(256) is missing
6117 * we will just skip it forever.
6119 ret = check_fs_first_inode(root, ext_ref);
6124 memset(&nrefs, 0, sizeof(nrefs));
6125 level = btrfs_header_level(root->node);
6126 btrfs_init_path(&path);
6128 if (btrfs_root_refs(root_item) > 0 ||
6129 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6130 path.nodes[level] = root->node;
6131 path.slots[level] = 0;
6132 extent_buffer_get(root->node);
6134 struct btrfs_key key;
6136 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6137 level = root_item->drop_level;
6138 path.lowest_level = level;
6139 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6146 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
6149 /* if ret is negative, walk shall stop */
6155 ret = walk_up_tree_v2(root, &path, &level);
6157 /* Normal exit, reset ret to err */
6164 btrfs_release_path(&path);
6169 * Find the relative ref for root_ref and root_backref.
6171 * @root: the root of the root tree.
6172 * @ref_key: the key of the root ref.
6174 * Return 0 if no error occurred.
6176 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6177 struct extent_buffer *node, int slot)
6179 struct btrfs_path path;
6180 struct btrfs_key key;
6181 struct btrfs_root_ref *ref;
6182 struct btrfs_root_ref *backref;
6183 char ref_name[BTRFS_NAME_LEN] = {0};
6184 char backref_name[BTRFS_NAME_LEN] = {0};
6190 u32 backref_namelen;
6195 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6196 ref_dirid = btrfs_root_ref_dirid(node, ref);
6197 ref_seq = btrfs_root_ref_sequence(node, ref);
6198 ref_namelen = btrfs_root_ref_name_len(node, ref);
6200 if (ref_namelen <= BTRFS_NAME_LEN) {
6203 len = BTRFS_NAME_LEN;
6204 warning("%s[%llu %llu] ref_name too long",
6205 ref_key->type == BTRFS_ROOT_REF_KEY ?
6206 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6209 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6211 /* Find relative root_ref */
6212 key.objectid = ref_key->offset;
6213 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6214 key.offset = ref_key->objectid;
6216 btrfs_init_path(&path);
6217 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6219 err |= ROOT_REF_MISSING;
6220 error("%s[%llu %llu] couldn't find relative ref",
6221 ref_key->type == BTRFS_ROOT_REF_KEY ?
6222 "ROOT_REF" : "ROOT_BACKREF",
6223 ref_key->objectid, ref_key->offset);
6227 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6228 struct btrfs_root_ref);
6229 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6230 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6231 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6233 if (backref_namelen <= BTRFS_NAME_LEN) {
6234 len = backref_namelen;
6236 len = BTRFS_NAME_LEN;
6237 warning("%s[%llu %llu] ref_name too long",
6238 key.type == BTRFS_ROOT_REF_KEY ?
6239 "ROOT_REF" : "ROOT_BACKREF",
6240 key.objectid, key.offset);
6242 read_extent_buffer(path.nodes[0], backref_name,
6243 (unsigned long)(backref + 1), len);
6245 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6246 ref_namelen != backref_namelen ||
6247 strncmp(ref_name, backref_name, len)) {
6248 err |= ROOT_REF_MISMATCH;
6249 error("%s[%llu %llu] mismatch relative ref",
6250 ref_key->type == BTRFS_ROOT_REF_KEY ?
6251 "ROOT_REF" : "ROOT_BACKREF",
6252 ref_key->objectid, ref_key->offset);
6255 btrfs_release_path(&path);
6260 * Check all fs/file tree in low_memory mode.
6262 * 1. for fs tree root item, call check_fs_root_v2()
6263 * 2. for fs tree root ref/backref, call check_root_ref()
6265 * Return 0 if no error occurred.
6267 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6269 struct btrfs_root *tree_root = fs_info->tree_root;
6270 struct btrfs_root *cur_root = NULL;
6271 struct btrfs_path path;
6272 struct btrfs_key key;
6273 struct extent_buffer *node;
6274 unsigned int ext_ref;
6279 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6281 btrfs_init_path(&path);
6282 key.objectid = BTRFS_FS_TREE_OBJECTID;
6284 key.type = BTRFS_ROOT_ITEM_KEY;
6286 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6290 } else if (ret > 0) {
6296 node = path.nodes[0];
6297 slot = path.slots[0];
6298 btrfs_item_key_to_cpu(node, &key, slot);
6299 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6301 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6302 fs_root_objectid(key.objectid)) {
6303 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6304 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6307 key.offset = (u64)-1;
6308 cur_root = btrfs_read_fs_root(fs_info, &key);
6311 if (IS_ERR(cur_root)) {
6312 error("Fail to read fs/subvol tree: %lld",
6318 ret = check_fs_root_v2(cur_root, ext_ref);
6321 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6322 btrfs_free_fs_root(cur_root);
6323 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6324 key.type == BTRFS_ROOT_BACKREF_KEY) {
6325 ret = check_root_ref(tree_root, &key, node, slot);
6329 ret = btrfs_next_item(tree_root, &path);
6339 btrfs_release_path(&path);
6343 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6344 struct cache_tree *root_cache)
6348 if (!ctx.progress_enabled)
6349 fprintf(stderr, "checking fs roots\n");
6350 if (check_mode == CHECK_MODE_LOWMEM)
6351 ret = check_fs_roots_v2(fs_info);
6353 ret = check_fs_roots(fs_info, root_cache);
6358 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6360 struct extent_backref *back, *tmp;
6361 struct tree_backref *tback;
6362 struct data_backref *dback;
6366 rbtree_postorder_for_each_entry_safe(back, tmp,
6367 &rec->backref_tree, node) {
6368 if (!back->found_extent_tree) {
6372 if (back->is_data) {
6373 dback = to_data_backref(back);
6374 fprintf(stderr, "Data backref %llu %s %llu"
6375 " owner %llu offset %llu num_refs %lu"
6376 " not found in extent tree\n",
6377 (unsigned long long)rec->start,
6378 back->full_backref ?
6380 back->full_backref ?
6381 (unsigned long long)dback->parent:
6382 (unsigned long long)dback->root,
6383 (unsigned long long)dback->owner,
6384 (unsigned long long)dback->offset,
6385 (unsigned long)dback->num_refs);
6387 tback = to_tree_backref(back);
6388 fprintf(stderr, "Tree backref %llu parent %llu"
6389 " root %llu not found in extent tree\n",
6390 (unsigned long long)rec->start,
6391 (unsigned long long)tback->parent,
6392 (unsigned long long)tback->root);
6395 if (!back->is_data && !back->found_ref) {
6399 tback = to_tree_backref(back);
6400 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6401 (unsigned long long)rec->start,
6402 back->full_backref ? "parent" : "root",
6403 back->full_backref ?
6404 (unsigned long long)tback->parent :
6405 (unsigned long long)tback->root, back);
6407 if (back->is_data) {
6408 dback = to_data_backref(back);
6409 if (dback->found_ref != dback->num_refs) {
6413 fprintf(stderr, "Incorrect local backref count"
6414 " on %llu %s %llu owner %llu"
6415 " offset %llu found %u wanted %u back %p\n",
6416 (unsigned long long)rec->start,
6417 back->full_backref ?
6419 back->full_backref ?
6420 (unsigned long long)dback->parent:
6421 (unsigned long long)dback->root,
6422 (unsigned long long)dback->owner,
6423 (unsigned long long)dback->offset,
6424 dback->found_ref, dback->num_refs, back);
6426 if (dback->disk_bytenr != rec->start) {
6430 fprintf(stderr, "Backref disk bytenr does not"
6431 " match extent record, bytenr=%llu, "
6432 "ref bytenr=%llu\n",
6433 (unsigned long long)rec->start,
6434 (unsigned long long)dback->disk_bytenr);
6437 if (dback->bytes != rec->nr) {
6441 fprintf(stderr, "Backref bytes do not match "
6442 "extent backref, bytenr=%llu, ref "
6443 "bytes=%llu, backref bytes=%llu\n",
6444 (unsigned long long)rec->start,
6445 (unsigned long long)rec->nr,
6446 (unsigned long long)dback->bytes);
6449 if (!back->is_data) {
6452 dback = to_data_backref(back);
6453 found += dback->found_ref;
6456 if (found != rec->refs) {
6460 fprintf(stderr, "Incorrect global backref count "
6461 "on %llu found %llu wanted %llu\n",
6462 (unsigned long long)rec->start,
6463 (unsigned long long)found,
6464 (unsigned long long)rec->refs);
6470 static void __free_one_backref(struct rb_node *node)
6472 struct extent_backref *back = rb_node_to_extent_backref(node);
6477 static void free_all_extent_backrefs(struct extent_record *rec)
6479 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6482 static void free_extent_record_cache(struct cache_tree *extent_cache)
6484 struct cache_extent *cache;
6485 struct extent_record *rec;
6488 cache = first_cache_extent(extent_cache);
6491 rec = container_of(cache, struct extent_record, cache);
6492 remove_cache_extent(extent_cache, cache);
6493 free_all_extent_backrefs(rec);
6498 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6499 struct extent_record *rec)
6501 if (rec->content_checked && rec->owner_ref_checked &&
6502 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6503 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6504 !rec->bad_full_backref && !rec->crossing_stripes &&
6505 !rec->wrong_chunk_type) {
6506 remove_cache_extent(extent_cache, &rec->cache);
6507 free_all_extent_backrefs(rec);
6508 list_del_init(&rec->list);
6514 static int check_owner_ref(struct btrfs_root *root,
6515 struct extent_record *rec,
6516 struct extent_buffer *buf)
6518 struct extent_backref *node, *tmp;
6519 struct tree_backref *back;
6520 struct btrfs_root *ref_root;
6521 struct btrfs_key key;
6522 struct btrfs_path path;
6523 struct extent_buffer *parent;
6528 rbtree_postorder_for_each_entry_safe(node, tmp,
6529 &rec->backref_tree, node) {
6532 if (!node->found_ref)
6534 if (node->full_backref)
6536 back = to_tree_backref(node);
6537 if (btrfs_header_owner(buf) == back->root)
6540 BUG_ON(rec->is_root);
6542 /* try to find the block by search corresponding fs tree */
6543 key.objectid = btrfs_header_owner(buf);
6544 key.type = BTRFS_ROOT_ITEM_KEY;
6545 key.offset = (u64)-1;
6547 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6548 if (IS_ERR(ref_root))
6551 level = btrfs_header_level(buf);
6553 btrfs_item_key_to_cpu(buf, &key, 0);
6555 btrfs_node_key_to_cpu(buf, &key, 0);
6557 btrfs_init_path(&path);
6558 path.lowest_level = level + 1;
6559 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6563 parent = path.nodes[level + 1];
6564 if (parent && buf->start == btrfs_node_blockptr(parent,
6565 path.slots[level + 1]))
6568 btrfs_release_path(&path);
6569 return found ? 0 : 1;
6572 static int is_extent_tree_record(struct extent_record *rec)
6574 struct extent_backref *node, *tmp;
6575 struct tree_backref *back;
6578 rbtree_postorder_for_each_entry_safe(node, tmp,
6579 &rec->backref_tree, node) {
6582 back = to_tree_backref(node);
6583 if (node->full_backref)
6585 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6592 static int record_bad_block_io(struct btrfs_fs_info *info,
6593 struct cache_tree *extent_cache,
6596 struct extent_record *rec;
6597 struct cache_extent *cache;
6598 struct btrfs_key key;
6600 cache = lookup_cache_extent(extent_cache, start, len);
6604 rec = container_of(cache, struct extent_record, cache);
6605 if (!is_extent_tree_record(rec))
6608 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6609 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6612 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6613 struct extent_buffer *buf, int slot)
6615 if (btrfs_header_level(buf)) {
6616 struct btrfs_key_ptr ptr1, ptr2;
6618 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6619 sizeof(struct btrfs_key_ptr));
6620 read_extent_buffer(buf, &ptr2,
6621 btrfs_node_key_ptr_offset(slot + 1),
6622 sizeof(struct btrfs_key_ptr));
6623 write_extent_buffer(buf, &ptr1,
6624 btrfs_node_key_ptr_offset(slot + 1),
6625 sizeof(struct btrfs_key_ptr));
6626 write_extent_buffer(buf, &ptr2,
6627 btrfs_node_key_ptr_offset(slot),
6628 sizeof(struct btrfs_key_ptr));
6630 struct btrfs_disk_key key;
6631 btrfs_node_key(buf, &key, 0);
6632 btrfs_fixup_low_keys(root, path, &key,
6633 btrfs_header_level(buf) + 1);
6636 struct btrfs_item *item1, *item2;
6637 struct btrfs_key k1, k2;
6638 char *item1_data, *item2_data;
6639 u32 item1_offset, item2_offset, item1_size, item2_size;
6641 item1 = btrfs_item_nr(slot);
6642 item2 = btrfs_item_nr(slot + 1);
6643 btrfs_item_key_to_cpu(buf, &k1, slot);
6644 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6645 item1_offset = btrfs_item_offset(buf, item1);
6646 item2_offset = btrfs_item_offset(buf, item2);
6647 item1_size = btrfs_item_size(buf, item1);
6648 item2_size = btrfs_item_size(buf, item2);
6650 item1_data = malloc(item1_size);
6653 item2_data = malloc(item2_size);
6659 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6660 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6662 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6663 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6667 btrfs_set_item_offset(buf, item1, item2_offset);
6668 btrfs_set_item_offset(buf, item2, item1_offset);
6669 btrfs_set_item_size(buf, item1, item2_size);
6670 btrfs_set_item_size(buf, item2, item1_size);
6672 path->slots[0] = slot;
6673 btrfs_set_item_key_unsafe(root, path, &k2);
6674 path->slots[0] = slot + 1;
6675 btrfs_set_item_key_unsafe(root, path, &k1);
6680 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6682 struct extent_buffer *buf;
6683 struct btrfs_key k1, k2;
6685 int level = path->lowest_level;
6688 buf = path->nodes[level];
6689 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6691 btrfs_node_key_to_cpu(buf, &k1, i);
6692 btrfs_node_key_to_cpu(buf, &k2, i + 1);
6694 btrfs_item_key_to_cpu(buf, &k1, i);
6695 btrfs_item_key_to_cpu(buf, &k2, i + 1);
6697 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6699 ret = swap_values(root, path, buf, i);
6702 btrfs_mark_buffer_dirty(buf);
6708 static int delete_bogus_item(struct btrfs_root *root,
6709 struct btrfs_path *path,
6710 struct extent_buffer *buf, int slot)
6712 struct btrfs_key key;
6713 int nritems = btrfs_header_nritems(buf);
6715 btrfs_item_key_to_cpu(buf, &key, slot);
6717 /* These are all the keys we can deal with missing. */
6718 if (key.type != BTRFS_DIR_INDEX_KEY &&
6719 key.type != BTRFS_EXTENT_ITEM_KEY &&
6720 key.type != BTRFS_METADATA_ITEM_KEY &&
6721 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6722 key.type != BTRFS_EXTENT_DATA_REF_KEY)
6725 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6726 (unsigned long long)key.objectid, key.type,
6727 (unsigned long long)key.offset, slot, buf->start);
6728 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6729 btrfs_item_nr_offset(slot + 1),
6730 sizeof(struct btrfs_item) *
6731 (nritems - slot - 1));
6732 btrfs_set_header_nritems(buf, nritems - 1);
6734 struct btrfs_disk_key disk_key;
6736 btrfs_item_key(buf, &disk_key, 0);
6737 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6739 btrfs_mark_buffer_dirty(buf);
6743 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6745 struct extent_buffer *buf;
6749 /* We should only get this for leaves */
6750 BUG_ON(path->lowest_level);
6751 buf = path->nodes[0];
6753 for (i = 0; i < btrfs_header_nritems(buf); i++) {
6754 unsigned int shift = 0, offset;
6756 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6757 BTRFS_LEAF_DATA_SIZE(root)) {
6758 if (btrfs_item_end_nr(buf, i) >
6759 BTRFS_LEAF_DATA_SIZE(root)) {
6760 ret = delete_bogus_item(root, path, buf, i);
6763 fprintf(stderr, "item is off the end of the "
6764 "leaf, can't fix\n");
6768 shift = BTRFS_LEAF_DATA_SIZE(root) -
6769 btrfs_item_end_nr(buf, i);
6770 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6771 btrfs_item_offset_nr(buf, i - 1)) {
6772 if (btrfs_item_end_nr(buf, i) >
6773 btrfs_item_offset_nr(buf, i - 1)) {
6774 ret = delete_bogus_item(root, path, buf, i);
6777 fprintf(stderr, "items overlap, can't fix\n");
6781 shift = btrfs_item_offset_nr(buf, i - 1) -
6782 btrfs_item_end_nr(buf, i);
6787 printf("Shifting item nr %d by %u bytes in block %llu\n",
6788 i, shift, (unsigned long long)buf->start);
6789 offset = btrfs_item_offset_nr(buf, i);
6790 memmove_extent_buffer(buf,
6791 btrfs_leaf_data(buf) + offset + shift,
6792 btrfs_leaf_data(buf) + offset,
6793 btrfs_item_size_nr(buf, i));
6794 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6796 btrfs_mark_buffer_dirty(buf);
6800 * We may have moved things, in which case we want to exit so we don't
6801 * write those changes out. Once we have proper abort functionality in
6802 * progs this can be changed to something nicer.
6809 * Attempt to fix basic block failures. If we can't fix it for whatever reason
6810 * then just return -EIO.
6812 static int try_to_fix_bad_block(struct btrfs_root *root,
6813 struct extent_buffer *buf,
6814 enum btrfs_tree_block_status status)
6816 struct btrfs_trans_handle *trans;
6817 struct ulist *roots;
6818 struct ulist_node *node;
6819 struct btrfs_root *search_root;
6820 struct btrfs_path path;
6821 struct ulist_iterator iter;
6822 struct btrfs_key root_key, key;
6825 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6826 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6829 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6833 btrfs_init_path(&path);
6834 ULIST_ITER_INIT(&iter);
6835 while ((node = ulist_next(roots, &iter))) {
6836 root_key.objectid = node->val;
6837 root_key.type = BTRFS_ROOT_ITEM_KEY;
6838 root_key.offset = (u64)-1;
6840 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6847 trans = btrfs_start_transaction(search_root, 0);
6848 if (IS_ERR(trans)) {
6849 ret = PTR_ERR(trans);
6853 path.lowest_level = btrfs_header_level(buf);
6854 path.skip_check_block = 1;
6855 if (path.lowest_level)
6856 btrfs_node_key_to_cpu(buf, &key, 0);
6858 btrfs_item_key_to_cpu(buf, &key, 0);
6859 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6862 btrfs_commit_transaction(trans, search_root);
6865 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6866 ret = fix_key_order(search_root, &path);
6867 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6868 ret = fix_item_offset(search_root, &path);
6870 btrfs_commit_transaction(trans, search_root);
6873 btrfs_release_path(&path);
6874 btrfs_commit_transaction(trans, search_root);
6877 btrfs_release_path(&path);
6881 static int check_block(struct btrfs_root *root,
6882 struct cache_tree *extent_cache,
6883 struct extent_buffer *buf, u64 flags)
6885 struct extent_record *rec;
6886 struct cache_extent *cache;
6887 struct btrfs_key key;
6888 enum btrfs_tree_block_status status;
6892 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6895 rec = container_of(cache, struct extent_record, cache);
6896 rec->generation = btrfs_header_generation(buf);
6898 level = btrfs_header_level(buf);
6899 if (btrfs_header_nritems(buf) > 0) {
6902 btrfs_item_key_to_cpu(buf, &key, 0);
6904 btrfs_node_key_to_cpu(buf, &key, 0);
6906 rec->info_objectid = key.objectid;
6908 rec->info_level = level;
6910 if (btrfs_is_leaf(buf))
6911 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6913 status = btrfs_check_node(root, &rec->parent_key, buf);
6915 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6917 status = try_to_fix_bad_block(root, buf, status);
6918 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6920 fprintf(stderr, "bad block %llu\n",
6921 (unsigned long long)buf->start);
6924 * Signal to callers we need to start the scan over
6925 * again since we'll have cowed blocks.
6930 rec->content_checked = 1;
6931 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6932 rec->owner_ref_checked = 1;
6934 ret = check_owner_ref(root, rec, buf);
6936 rec->owner_ref_checked = 1;
6940 maybe_free_extent_rec(extent_cache, rec);
6945 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6946 u64 parent, u64 root)
6948 struct list_head *cur = rec->backrefs.next;
6949 struct extent_backref *node;
6950 struct tree_backref *back;
6952 while(cur != &rec->backrefs) {
6953 node = to_extent_backref(cur);
6957 back = to_tree_backref(node);
6959 if (!node->full_backref)
6961 if (parent == back->parent)
6964 if (node->full_backref)
6966 if (back->root == root)
6974 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6975 u64 parent, u64 root)
6977 struct tree_backref *ref = malloc(sizeof(*ref));
6981 memset(&ref->node, 0, sizeof(ref->node));
6983 ref->parent = parent;
6984 ref->node.full_backref = 1;
6987 ref->node.full_backref = 0;
6994 static struct data_backref *find_data_backref(struct extent_record *rec,
6995 u64 parent, u64 root,
6996 u64 owner, u64 offset,
6998 u64 disk_bytenr, u64 bytes)
7000 struct list_head *cur = rec->backrefs.next;
7001 struct extent_backref *node;
7002 struct data_backref *back;
7004 while(cur != &rec->backrefs) {
7005 node = to_extent_backref(cur);
7009 back = to_data_backref(node);
7011 if (!node->full_backref)
7013 if (parent == back->parent)
7016 if (node->full_backref)
7018 if (back->root == root && back->owner == owner &&
7019 back->offset == offset) {
7020 if (found_ref && node->found_ref &&
7021 (back->bytes != bytes ||
7022 back->disk_bytenr != disk_bytenr))
7032 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7033 u64 parent, u64 root,
7034 u64 owner, u64 offset,
7037 struct data_backref *ref = malloc(sizeof(*ref));
7041 memset(&ref->node, 0, sizeof(ref->node));
7042 ref->node.is_data = 1;
7045 ref->parent = parent;
7048 ref->node.full_backref = 1;
7052 ref->offset = offset;
7053 ref->node.full_backref = 0;
7055 ref->bytes = max_size;
7058 if (max_size > rec->max_size)
7059 rec->max_size = max_size;
7063 /* Check if the type of extent matches with its chunk */
7064 static void check_extent_type(struct extent_record *rec)
7066 struct btrfs_block_group_cache *bg_cache;
7068 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7072 /* data extent, check chunk directly*/
7073 if (!rec->metadata) {
7074 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7075 rec->wrong_chunk_type = 1;
7079 /* metadata extent, check the obvious case first */
7080 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7081 BTRFS_BLOCK_GROUP_METADATA))) {
7082 rec->wrong_chunk_type = 1;
7087 * Check SYSTEM extent, as it's also marked as metadata, we can only
7088 * make sure it's a SYSTEM extent by its backref
7090 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7091 struct extent_backref *node;
7092 struct tree_backref *tback;
7095 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7096 if (node->is_data) {
7097 /* tree block shouldn't have data backref */
7098 rec->wrong_chunk_type = 1;
7101 tback = container_of(node, struct tree_backref, node);
7103 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7104 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7106 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7107 if (!(bg_cache->flags & bg_type))
7108 rec->wrong_chunk_type = 1;
7113 * Allocate a new extent record, fill default values from @tmpl and insert int
7114 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7115 * the cache, otherwise it fails.
7117 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7118 struct extent_record *tmpl)
7120 struct extent_record *rec;
7123 BUG_ON(tmpl->max_size == 0);
7124 rec = malloc(sizeof(*rec));
7127 rec->start = tmpl->start;
7128 rec->max_size = tmpl->max_size;
7129 rec->nr = max(tmpl->nr, tmpl->max_size);
7130 rec->found_rec = tmpl->found_rec;
7131 rec->content_checked = tmpl->content_checked;
7132 rec->owner_ref_checked = tmpl->owner_ref_checked;
7133 rec->num_duplicates = 0;
7134 rec->metadata = tmpl->metadata;
7135 rec->flag_block_full_backref = FLAG_UNSET;
7136 rec->bad_full_backref = 0;
7137 rec->crossing_stripes = 0;
7138 rec->wrong_chunk_type = 0;
7139 rec->is_root = tmpl->is_root;
7140 rec->refs = tmpl->refs;
7141 rec->extent_item_refs = tmpl->extent_item_refs;
7142 rec->parent_generation = tmpl->parent_generation;
7143 INIT_LIST_HEAD(&rec->backrefs);
7144 INIT_LIST_HEAD(&rec->dups);
7145 INIT_LIST_HEAD(&rec->list);
7146 rec->backref_tree = RB_ROOT;
7147 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7148 rec->cache.start = tmpl->start;
7149 rec->cache.size = tmpl->nr;
7150 ret = insert_cache_extent(extent_cache, &rec->cache);
7155 bytes_used += rec->nr;
7158 rec->crossing_stripes = check_crossing_stripes(global_info,
7159 rec->start, global_info->nodesize);
7160 check_extent_type(rec);
7165 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7167 * - refs - if found, increase refs
7168 * - is_root - if found, set
7169 * - content_checked - if found, set
7170 * - owner_ref_checked - if found, set
7172 * If not found, create a new one, initialize and insert.
7174 static int add_extent_rec(struct cache_tree *extent_cache,
7175 struct extent_record *tmpl)
7177 struct extent_record *rec;
7178 struct cache_extent *cache;
7182 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7184 rec = container_of(cache, struct extent_record, cache);
7188 rec->nr = max(tmpl->nr, tmpl->max_size);
7191 * We need to make sure to reset nr to whatever the extent
7192 * record says was the real size, this way we can compare it to
7195 if (tmpl->found_rec) {
7196 if (tmpl->start != rec->start || rec->found_rec) {
7197 struct extent_record *tmp;
7200 if (list_empty(&rec->list))
7201 list_add_tail(&rec->list,
7202 &duplicate_extents);
7205 * We have to do this song and dance in case we
7206 * find an extent record that falls inside of
7207 * our current extent record but does not have
7208 * the same objectid.
7210 tmp = malloc(sizeof(*tmp));
7213 tmp->start = tmpl->start;
7214 tmp->max_size = tmpl->max_size;
7217 tmp->metadata = tmpl->metadata;
7218 tmp->extent_item_refs = tmpl->extent_item_refs;
7219 INIT_LIST_HEAD(&tmp->list);
7220 list_add_tail(&tmp->list, &rec->dups);
7221 rec->num_duplicates++;
7228 if (tmpl->extent_item_refs && !dup) {
7229 if (rec->extent_item_refs) {
7230 fprintf(stderr, "block %llu rec "
7231 "extent_item_refs %llu, passed %llu\n",
7232 (unsigned long long)tmpl->start,
7233 (unsigned long long)
7234 rec->extent_item_refs,
7235 (unsigned long long)tmpl->extent_item_refs);
7237 rec->extent_item_refs = tmpl->extent_item_refs;
7241 if (tmpl->content_checked)
7242 rec->content_checked = 1;
7243 if (tmpl->owner_ref_checked)
7244 rec->owner_ref_checked = 1;
7245 memcpy(&rec->parent_key, &tmpl->parent_key,
7246 sizeof(tmpl->parent_key));
7247 if (tmpl->parent_generation)
7248 rec->parent_generation = tmpl->parent_generation;
7249 if (rec->max_size < tmpl->max_size)
7250 rec->max_size = tmpl->max_size;
7253 * A metadata extent can't cross stripe_len boundary, otherwise
7254 * kernel scrub won't be able to handle it.
7255 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7259 rec->crossing_stripes = check_crossing_stripes(
7260 global_info, rec->start,
7261 global_info->nodesize);
7262 check_extent_type(rec);
7263 maybe_free_extent_rec(extent_cache, rec);
7267 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7272 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7273 u64 parent, u64 root, int found_ref)
7275 struct extent_record *rec;
7276 struct tree_backref *back;
7277 struct cache_extent *cache;
7279 bool insert = false;
7281 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7283 struct extent_record tmpl;
7285 memset(&tmpl, 0, sizeof(tmpl));
7286 tmpl.start = bytenr;
7291 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7295 /* really a bug in cache_extent implement now */
7296 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7301 rec = container_of(cache, struct extent_record, cache);
7302 if (rec->start != bytenr) {
7304 * Several cause, from unaligned bytenr to over lapping extents
7309 back = find_tree_backref(rec, parent, root);
7311 back = alloc_tree_backref(rec, parent, root);
7318 if (back->node.found_ref) {
7319 fprintf(stderr, "Extent back ref already exists "
7320 "for %llu parent %llu root %llu \n",
7321 (unsigned long long)bytenr,
7322 (unsigned long long)parent,
7323 (unsigned long long)root);
7325 back->node.found_ref = 1;
7327 if (back->node.found_extent_tree) {
7328 fprintf(stderr, "Extent back ref already exists "
7329 "for %llu parent %llu root %llu \n",
7330 (unsigned long long)bytenr,
7331 (unsigned long long)parent,
7332 (unsigned long long)root);
7334 back->node.found_extent_tree = 1;
7337 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7338 compare_extent_backref));
7339 check_extent_type(rec);
7340 maybe_free_extent_rec(extent_cache, rec);
7344 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7345 u64 parent, u64 root, u64 owner, u64 offset,
7346 u32 num_refs, int found_ref, u64 max_size)
7348 struct extent_record *rec;
7349 struct data_backref *back;
7350 struct cache_extent *cache;
7352 bool insert = false;
7354 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7356 struct extent_record tmpl;
7358 memset(&tmpl, 0, sizeof(tmpl));
7359 tmpl.start = bytenr;
7361 tmpl.max_size = max_size;
7363 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7367 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7372 rec = container_of(cache, struct extent_record, cache);
7373 if (rec->max_size < max_size)
7374 rec->max_size = max_size;
7377 * If found_ref is set then max_size is the real size and must match the
7378 * existing refs. So if we have already found a ref then we need to
7379 * make sure that this ref matches the existing one, otherwise we need
7380 * to add a new backref so we can notice that the backrefs don't match
7381 * and we need to figure out who is telling the truth. This is to
7382 * account for that awful fsync bug I introduced where we'd end up with
7383 * a btrfs_file_extent_item that would have its length include multiple
7384 * prealloc extents or point inside of a prealloc extent.
7386 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7389 back = alloc_data_backref(rec, parent, root, owner, offset,
7396 BUG_ON(num_refs != 1);
7397 if (back->node.found_ref)
7398 BUG_ON(back->bytes != max_size);
7399 back->node.found_ref = 1;
7400 back->found_ref += 1;
7401 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7402 back->bytes = max_size;
7403 back->disk_bytenr = bytenr;
7405 /* Need to reinsert if not already in the tree */
7407 rb_erase(&back->node.node, &rec->backref_tree);
7412 rec->content_checked = 1;
7413 rec->owner_ref_checked = 1;
7415 if (back->node.found_extent_tree) {
7416 fprintf(stderr, "Extent back ref already exists "
7417 "for %llu parent %llu root %llu "
7418 "owner %llu offset %llu num_refs %lu\n",
7419 (unsigned long long)bytenr,
7420 (unsigned long long)parent,
7421 (unsigned long long)root,
7422 (unsigned long long)owner,
7423 (unsigned long long)offset,
7424 (unsigned long)num_refs);
7426 back->num_refs = num_refs;
7427 back->node.found_extent_tree = 1;
7430 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7431 compare_extent_backref));
7433 maybe_free_extent_rec(extent_cache, rec);
7437 static int add_pending(struct cache_tree *pending,
7438 struct cache_tree *seen, u64 bytenr, u32 size)
7441 ret = add_cache_extent(seen, bytenr, size);
7444 add_cache_extent(pending, bytenr, size);
7448 static int pick_next_pending(struct cache_tree *pending,
7449 struct cache_tree *reada,
7450 struct cache_tree *nodes,
7451 u64 last, struct block_info *bits, int bits_nr,
7454 unsigned long node_start = last;
7455 struct cache_extent *cache;
7458 cache = search_cache_extent(reada, 0);
7460 bits[0].start = cache->start;
7461 bits[0].size = cache->size;
7466 if (node_start > 32768)
7467 node_start -= 32768;
7469 cache = search_cache_extent(nodes, node_start);
7471 cache = search_cache_extent(nodes, 0);
7474 cache = search_cache_extent(pending, 0);
7479 bits[ret].start = cache->start;
7480 bits[ret].size = cache->size;
7481 cache = next_cache_extent(cache);
7483 } while (cache && ret < bits_nr);
7489 bits[ret].start = cache->start;
7490 bits[ret].size = cache->size;
7491 cache = next_cache_extent(cache);
7493 } while (cache && ret < bits_nr);
7495 if (bits_nr - ret > 8) {
7496 u64 lookup = bits[0].start + bits[0].size;
7497 struct cache_extent *next;
7498 next = search_cache_extent(pending, lookup);
7500 if (next->start - lookup > 32768)
7502 bits[ret].start = next->start;
7503 bits[ret].size = next->size;
7504 lookup = next->start + next->size;
7508 next = next_cache_extent(next);
7516 static void free_chunk_record(struct cache_extent *cache)
7518 struct chunk_record *rec;
7520 rec = container_of(cache, struct chunk_record, cache);
7521 list_del_init(&rec->list);
7522 list_del_init(&rec->dextents);
7526 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7528 cache_tree_free_extents(chunk_cache, free_chunk_record);
7531 static void free_device_record(struct rb_node *node)
7533 struct device_record *rec;
7535 rec = container_of(node, struct device_record, node);
7539 FREE_RB_BASED_TREE(device_cache, free_device_record);
7541 int insert_block_group_record(struct block_group_tree *tree,
7542 struct block_group_record *bg_rec)
7546 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7550 list_add_tail(&bg_rec->list, &tree->block_groups);
7554 static void free_block_group_record(struct cache_extent *cache)
7556 struct block_group_record *rec;
7558 rec = container_of(cache, struct block_group_record, cache);
7559 list_del_init(&rec->list);
7563 void free_block_group_tree(struct block_group_tree *tree)
7565 cache_tree_free_extents(&tree->tree, free_block_group_record);
7568 int insert_device_extent_record(struct device_extent_tree *tree,
7569 struct device_extent_record *de_rec)
7574 * Device extent is a bit different from the other extents, because
7575 * the extents which belong to the different devices may have the
7576 * same start and size, so we need use the special extent cache
7577 * search/insert functions.
7579 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7583 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7584 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7588 static void free_device_extent_record(struct cache_extent *cache)
7590 struct device_extent_record *rec;
7592 rec = container_of(cache, struct device_extent_record, cache);
7593 if (!list_empty(&rec->chunk_list))
7594 list_del_init(&rec->chunk_list);
7595 if (!list_empty(&rec->device_list))
7596 list_del_init(&rec->device_list);
7600 void free_device_extent_tree(struct device_extent_tree *tree)
7602 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7605 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7606 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7607 struct extent_buffer *leaf, int slot)
7609 struct btrfs_extent_ref_v0 *ref0;
7610 struct btrfs_key key;
7613 btrfs_item_key_to_cpu(leaf, &key, slot);
7614 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7615 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7616 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7619 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7620 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7626 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7627 struct btrfs_key *key,
7630 struct btrfs_chunk *ptr;
7631 struct chunk_record *rec;
7634 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7635 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7637 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7639 fprintf(stderr, "memory allocation failed\n");
7643 INIT_LIST_HEAD(&rec->list);
7644 INIT_LIST_HEAD(&rec->dextents);
7647 rec->cache.start = key->offset;
7648 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7650 rec->generation = btrfs_header_generation(leaf);
7652 rec->objectid = key->objectid;
7653 rec->type = key->type;
7654 rec->offset = key->offset;
7656 rec->length = rec->cache.size;
7657 rec->owner = btrfs_chunk_owner(leaf, ptr);
7658 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7659 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7660 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7661 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7662 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7663 rec->num_stripes = num_stripes;
7664 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7666 for (i = 0; i < rec->num_stripes; ++i) {
7667 rec->stripes[i].devid =
7668 btrfs_stripe_devid_nr(leaf, ptr, i);
7669 rec->stripes[i].offset =
7670 btrfs_stripe_offset_nr(leaf, ptr, i);
7671 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7672 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7679 static int process_chunk_item(struct cache_tree *chunk_cache,
7680 struct btrfs_key *key, struct extent_buffer *eb,
7683 struct chunk_record *rec;
7684 struct btrfs_chunk *chunk;
7687 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7689 * Do extra check for this chunk item,
7691 * It's still possible one can craft a leaf with CHUNK_ITEM, with
7692 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7693 * and owner<->key_type check.
7695 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7698 error("chunk(%llu, %llu) is not valid, ignore it",
7699 key->offset, btrfs_chunk_length(eb, chunk));
7702 rec = btrfs_new_chunk_record(eb, key, slot);
7703 ret = insert_cache_extent(chunk_cache, &rec->cache);
7705 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7706 rec->offset, rec->length);
7713 static int process_device_item(struct rb_root *dev_cache,
7714 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7716 struct btrfs_dev_item *ptr;
7717 struct device_record *rec;
7720 ptr = btrfs_item_ptr(eb,
7721 slot, struct btrfs_dev_item);
7723 rec = malloc(sizeof(*rec));
7725 fprintf(stderr, "memory allocation failed\n");
7729 rec->devid = key->offset;
7730 rec->generation = btrfs_header_generation(eb);
7732 rec->objectid = key->objectid;
7733 rec->type = key->type;
7734 rec->offset = key->offset;
7736 rec->devid = btrfs_device_id(eb, ptr);
7737 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7738 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7740 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7742 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7749 struct block_group_record *
7750 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7753 struct btrfs_block_group_item *ptr;
7754 struct block_group_record *rec;
7756 rec = calloc(1, sizeof(*rec));
7758 fprintf(stderr, "memory allocation failed\n");
7762 rec->cache.start = key->objectid;
7763 rec->cache.size = key->offset;
7765 rec->generation = btrfs_header_generation(leaf);
7767 rec->objectid = key->objectid;
7768 rec->type = key->type;
7769 rec->offset = key->offset;
7771 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7772 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7774 INIT_LIST_HEAD(&rec->list);
7779 static int process_block_group_item(struct block_group_tree *block_group_cache,
7780 struct btrfs_key *key,
7781 struct extent_buffer *eb, int slot)
7783 struct block_group_record *rec;
7786 rec = btrfs_new_block_group_record(eb, key, slot);
7787 ret = insert_block_group_record(block_group_cache, rec);
7789 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7790 rec->objectid, rec->offset);
7797 struct device_extent_record *
7798 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7799 struct btrfs_key *key, int slot)
7801 struct device_extent_record *rec;
7802 struct btrfs_dev_extent *ptr;
7804 rec = calloc(1, sizeof(*rec));
7806 fprintf(stderr, "memory allocation failed\n");
7810 rec->cache.objectid = key->objectid;
7811 rec->cache.start = key->offset;
7813 rec->generation = btrfs_header_generation(leaf);
7815 rec->objectid = key->objectid;
7816 rec->type = key->type;
7817 rec->offset = key->offset;
7819 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7820 rec->chunk_objecteid =
7821 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7823 btrfs_dev_extent_chunk_offset(leaf, ptr);
7824 rec->length = btrfs_dev_extent_length(leaf, ptr);
7825 rec->cache.size = rec->length;
7827 INIT_LIST_HEAD(&rec->chunk_list);
7828 INIT_LIST_HEAD(&rec->device_list);
7834 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7835 struct btrfs_key *key, struct extent_buffer *eb,
7838 struct device_extent_record *rec;
7841 rec = btrfs_new_device_extent_record(eb, key, slot);
7842 ret = insert_device_extent_record(dev_extent_cache, rec);
7845 "Device extent[%llu, %llu, %llu] existed.\n",
7846 rec->objectid, rec->offset, rec->length);
7853 static int process_extent_item(struct btrfs_root *root,
7854 struct cache_tree *extent_cache,
7855 struct extent_buffer *eb, int slot)
7857 struct btrfs_extent_item *ei;
7858 struct btrfs_extent_inline_ref *iref;
7859 struct btrfs_extent_data_ref *dref;
7860 struct btrfs_shared_data_ref *sref;
7861 struct btrfs_key key;
7862 struct extent_record tmpl;
7867 u32 item_size = btrfs_item_size_nr(eb, slot);
7873 btrfs_item_key_to_cpu(eb, &key, slot);
7875 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7877 num_bytes = root->fs_info->nodesize;
7879 num_bytes = key.offset;
7882 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7883 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7884 key.objectid, root->fs_info->sectorsize);
7887 if (item_size < sizeof(*ei)) {
7888 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7889 struct btrfs_extent_item_v0 *ei0;
7890 BUG_ON(item_size != sizeof(*ei0));
7891 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7892 refs = btrfs_extent_refs_v0(eb, ei0);
7896 memset(&tmpl, 0, sizeof(tmpl));
7897 tmpl.start = key.objectid;
7898 tmpl.nr = num_bytes;
7899 tmpl.extent_item_refs = refs;
7900 tmpl.metadata = metadata;
7902 tmpl.max_size = num_bytes;
7904 return add_extent_rec(extent_cache, &tmpl);
7907 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7908 refs = btrfs_extent_refs(eb, ei);
7909 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7913 if (metadata && num_bytes != root->fs_info->nodesize) {
7914 error("ignore invalid metadata extent, length %llu does not equal to %u",
7915 num_bytes, root->fs_info->nodesize);
7918 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7919 error("ignore invalid data extent, length %llu is not aligned to %u",
7920 num_bytes, root->fs_info->sectorsize);
7924 memset(&tmpl, 0, sizeof(tmpl));
7925 tmpl.start = key.objectid;
7926 tmpl.nr = num_bytes;
7927 tmpl.extent_item_refs = refs;
7928 tmpl.metadata = metadata;
7930 tmpl.max_size = num_bytes;
7931 add_extent_rec(extent_cache, &tmpl);
7933 ptr = (unsigned long)(ei + 1);
7934 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7935 key.type == BTRFS_EXTENT_ITEM_KEY)
7936 ptr += sizeof(struct btrfs_tree_block_info);
7938 end = (unsigned long)ei + item_size;
7940 iref = (struct btrfs_extent_inline_ref *)ptr;
7941 type = btrfs_extent_inline_ref_type(eb, iref);
7942 offset = btrfs_extent_inline_ref_offset(eb, iref);
7944 case BTRFS_TREE_BLOCK_REF_KEY:
7945 ret = add_tree_backref(extent_cache, key.objectid,
7949 "add_tree_backref failed (extent items tree block): %s",
7952 case BTRFS_SHARED_BLOCK_REF_KEY:
7953 ret = add_tree_backref(extent_cache, key.objectid,
7957 "add_tree_backref failed (extent items shared block): %s",
7960 case BTRFS_EXTENT_DATA_REF_KEY:
7961 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7962 add_data_backref(extent_cache, key.objectid, 0,
7963 btrfs_extent_data_ref_root(eb, dref),
7964 btrfs_extent_data_ref_objectid(eb,
7966 btrfs_extent_data_ref_offset(eb, dref),
7967 btrfs_extent_data_ref_count(eb, dref),
7970 case BTRFS_SHARED_DATA_REF_KEY:
7971 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7972 add_data_backref(extent_cache, key.objectid, offset,
7974 btrfs_shared_data_ref_count(eb, sref),
7978 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7979 key.objectid, key.type, num_bytes);
7982 ptr += btrfs_extent_inline_ref_size(type);
7989 static int check_cache_range(struct btrfs_root *root,
7990 struct btrfs_block_group_cache *cache,
7991 u64 offset, u64 bytes)
7993 struct btrfs_free_space *entry;
7999 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8000 bytenr = btrfs_sb_offset(i);
8001 ret = btrfs_rmap_block(root->fs_info,
8002 cache->key.objectid, bytenr, 0,
8003 &logical, &nr, &stripe_len);
8008 if (logical[nr] + stripe_len <= offset)
8010 if (offset + bytes <= logical[nr])
8012 if (logical[nr] == offset) {
8013 if (stripe_len >= bytes) {
8017 bytes -= stripe_len;
8018 offset += stripe_len;
8019 } else if (logical[nr] < offset) {
8020 if (logical[nr] + stripe_len >=
8025 bytes = (offset + bytes) -
8026 (logical[nr] + stripe_len);
8027 offset = logical[nr] + stripe_len;
8030 * Could be tricky, the super may land in the
8031 * middle of the area we're checking. First
8032 * check the easiest case, it's at the end.
8034 if (logical[nr] + stripe_len >=
8036 bytes = logical[nr] - offset;
8040 /* Check the left side */
8041 ret = check_cache_range(root, cache,
8043 logical[nr] - offset);
8049 /* Now we continue with the right side */
8050 bytes = (offset + bytes) -
8051 (logical[nr] + stripe_len);
8052 offset = logical[nr] + stripe_len;
8059 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8061 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8062 offset, offset+bytes);
8066 if (entry->offset != offset) {
8067 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8072 if (entry->bytes != bytes) {
8073 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8074 bytes, entry->bytes, offset);
8078 unlink_free_space(cache->free_space_ctl, entry);
8083 static int verify_space_cache(struct btrfs_root *root,
8084 struct btrfs_block_group_cache *cache)
8086 struct btrfs_path path;
8087 struct extent_buffer *leaf;
8088 struct btrfs_key key;
8092 root = root->fs_info->extent_root;
8094 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8096 btrfs_init_path(&path);
8097 key.objectid = last;
8099 key.type = BTRFS_EXTENT_ITEM_KEY;
8100 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8105 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8106 ret = btrfs_next_leaf(root, &path);
8114 leaf = path.nodes[0];
8115 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8116 if (key.objectid >= cache->key.offset + cache->key.objectid)
8118 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8119 key.type != BTRFS_METADATA_ITEM_KEY) {
8124 if (last == key.objectid) {
8125 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8126 last = key.objectid + key.offset;
8128 last = key.objectid + root->fs_info->nodesize;
8133 ret = check_cache_range(root, cache, last,
8134 key.objectid - last);
8137 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8138 last = key.objectid + key.offset;
8140 last = key.objectid + root->fs_info->nodesize;
8144 if (last < cache->key.objectid + cache->key.offset)
8145 ret = check_cache_range(root, cache, last,
8146 cache->key.objectid +
8147 cache->key.offset - last);
8150 btrfs_release_path(&path);
8153 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8154 fprintf(stderr, "There are still entries left in the space "
8162 static int check_space_cache(struct btrfs_root *root)
8164 struct btrfs_block_group_cache *cache;
8165 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8169 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8170 btrfs_super_generation(root->fs_info->super_copy) !=
8171 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8172 printf("cache and super generation don't match, space cache "
8173 "will be invalidated\n");
8177 if (ctx.progress_enabled) {
8178 ctx.tp = TASK_FREE_SPACE;
8179 task_start(ctx.info);
8183 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8187 start = cache->key.objectid + cache->key.offset;
8188 if (!cache->free_space_ctl) {
8189 if (btrfs_init_free_space_ctl(cache,
8190 root->fs_info->sectorsize)) {
8195 btrfs_remove_free_space_cache(cache);
8198 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8199 ret = exclude_super_stripes(root, cache);
8201 fprintf(stderr, "could not exclude super stripes: %s\n",
8206 ret = load_free_space_tree(root->fs_info, cache);
8207 free_excluded_extents(root, cache);
8209 fprintf(stderr, "could not load free space tree: %s\n",
8216 ret = load_free_space_cache(root->fs_info, cache);
8221 ret = verify_space_cache(root, cache);
8223 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8224 cache->key.objectid);
8229 task_stop(ctx.info);
8231 return error ? -EINVAL : 0;
8234 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8235 u64 num_bytes, unsigned long leaf_offset,
8236 struct extent_buffer *eb) {
8238 struct btrfs_fs_info *fs_info = root->fs_info;
8240 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8242 unsigned long csum_offset;
8246 u64 data_checked = 0;
8252 if (num_bytes % fs_info->sectorsize)
8255 data = malloc(num_bytes);
8259 while (offset < num_bytes) {
8262 read_len = num_bytes - offset;
8263 /* read as much space once a time */
8264 ret = read_extent_data(fs_info, data + offset,
8265 bytenr + offset, &read_len, mirror);
8269 /* verify every 4k data's checksum */
8270 while (data_checked < read_len) {
8272 tmp = offset + data_checked;
8274 csum = btrfs_csum_data((char *)data + tmp,
8275 csum, fs_info->sectorsize);
8276 btrfs_csum_final(csum, (u8 *)&csum);
8278 csum_offset = leaf_offset +
8279 tmp / fs_info->sectorsize * csum_size;
8280 read_extent_buffer(eb, (char *)&csum_expected,
8281 csum_offset, csum_size);
8282 /* try another mirror */
8283 if (csum != csum_expected) {
8284 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8285 mirror, bytenr + tmp,
8286 csum, csum_expected);
8287 num_copies = btrfs_num_copies(root->fs_info,
8289 if (mirror < num_copies - 1) {
8294 data_checked += fs_info->sectorsize;
8303 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8306 struct btrfs_path path;
8307 struct extent_buffer *leaf;
8308 struct btrfs_key key;
8311 btrfs_init_path(&path);
8312 key.objectid = bytenr;
8313 key.type = BTRFS_EXTENT_ITEM_KEY;
8314 key.offset = (u64)-1;
8317 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8320 fprintf(stderr, "Error looking up extent record %d\n", ret);
8321 btrfs_release_path(&path);
8324 if (path.slots[0] > 0) {
8327 ret = btrfs_prev_leaf(root, &path);
8330 } else if (ret > 0) {
8337 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8340 * Block group items come before extent items if they have the same
8341 * bytenr, so walk back one more just in case. Dear future traveller,
8342 * first congrats on mastering time travel. Now if it's not too much
8343 * trouble could you go back to 2006 and tell Chris to make the
8344 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8345 * EXTENT_ITEM_KEY please?
8347 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8348 if (path.slots[0] > 0) {
8351 ret = btrfs_prev_leaf(root, &path);
8354 } else if (ret > 0) {
8359 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8363 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8364 ret = btrfs_next_leaf(root, &path);
8366 fprintf(stderr, "Error going to next leaf "
8368 btrfs_release_path(&path);
8374 leaf = path.nodes[0];
8375 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8376 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8380 if (key.objectid + key.offset < bytenr) {
8384 if (key.objectid > bytenr + num_bytes)
8387 if (key.objectid == bytenr) {
8388 if (key.offset >= num_bytes) {
8392 num_bytes -= key.offset;
8393 bytenr += key.offset;
8394 } else if (key.objectid < bytenr) {
8395 if (key.objectid + key.offset >= bytenr + num_bytes) {
8399 num_bytes = (bytenr + num_bytes) -
8400 (key.objectid + key.offset);
8401 bytenr = key.objectid + key.offset;
8403 if (key.objectid + key.offset < bytenr + num_bytes) {
8404 u64 new_start = key.objectid + key.offset;
8405 u64 new_bytes = bytenr + num_bytes - new_start;
8408 * Weird case, the extent is in the middle of
8409 * our range, we'll have to search one side
8410 * and then the other. Not sure if this happens
8411 * in real life, but no harm in coding it up
8412 * anyway just in case.
8414 btrfs_release_path(&path);
8415 ret = check_extent_exists(root, new_start,
8418 fprintf(stderr, "Right section didn't "
8422 num_bytes = key.objectid - bytenr;
8425 num_bytes = key.objectid - bytenr;
8432 if (num_bytes && !ret) {
8433 fprintf(stderr, "There are no extents for csum range "
8434 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8438 btrfs_release_path(&path);
8442 static int check_csums(struct btrfs_root *root)
8444 struct btrfs_path path;
8445 struct extent_buffer *leaf;
8446 struct btrfs_key key;
8447 u64 offset = 0, num_bytes = 0;
8448 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8452 unsigned long leaf_offset;
8454 root = root->fs_info->csum_root;
8455 if (!extent_buffer_uptodate(root->node)) {
8456 fprintf(stderr, "No valid csum tree found\n");
8460 btrfs_init_path(&path);
8461 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8462 key.type = BTRFS_EXTENT_CSUM_KEY;
8464 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8466 fprintf(stderr, "Error searching csum tree %d\n", ret);
8467 btrfs_release_path(&path);
8471 if (ret > 0 && path.slots[0])
8476 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8477 ret = btrfs_next_leaf(root, &path);
8479 fprintf(stderr, "Error going to next leaf "
8486 leaf = path.nodes[0];
8488 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8489 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8494 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8495 csum_size) * root->fs_info->sectorsize;
8496 if (!check_data_csum)
8497 goto skip_csum_check;
8498 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8499 ret = check_extent_csums(root, key.offset, data_len,
8505 offset = key.offset;
8506 } else if (key.offset != offset + num_bytes) {
8507 ret = check_extent_exists(root, offset, num_bytes);
8509 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8510 "there is no extent record\n",
8511 offset, offset+num_bytes);
8514 offset = key.offset;
8517 num_bytes += data_len;
8521 btrfs_release_path(&path);
8525 static int is_dropped_key(struct btrfs_key *key,
8526 struct btrfs_key *drop_key) {
8527 if (key->objectid < drop_key->objectid)
8529 else if (key->objectid == drop_key->objectid) {
8530 if (key->type < drop_key->type)
8532 else if (key->type == drop_key->type) {
8533 if (key->offset < drop_key->offset)
8541 * Here are the rules for FULL_BACKREF.
8543 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8544 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8546 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8547 * if it happened after the relocation occurred since we'll have dropped the
8548 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8549 * have no real way to know for sure.
8551 * We process the blocks one root at a time, and we start from the lowest root
8552 * objectid and go to the highest. So we can just lookup the owner backref for
8553 * the record and if we don't find it then we know it doesn't exist and we have
8556 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8557 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8558 * be set or not and then we can check later once we've gathered all the refs.
8560 static int calc_extent_flag(struct cache_tree *extent_cache,
8561 struct extent_buffer *buf,
8562 struct root_item_record *ri,
8565 struct extent_record *rec;
8566 struct cache_extent *cache;
8567 struct tree_backref *tback;
8570 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8571 /* we have added this extent before */
8575 rec = container_of(cache, struct extent_record, cache);
8578 * Except file/reloc tree, we can not have
8581 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8586 if (buf->start == ri->bytenr)
8589 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8592 owner = btrfs_header_owner(buf);
8593 if (owner == ri->objectid)
8596 tback = find_tree_backref(rec, 0, owner);
8601 if (rec->flag_block_full_backref != FLAG_UNSET &&
8602 rec->flag_block_full_backref != 0)
8603 rec->bad_full_backref = 1;
8606 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8607 if (rec->flag_block_full_backref != FLAG_UNSET &&
8608 rec->flag_block_full_backref != 1)
8609 rec->bad_full_backref = 1;
8613 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8615 fprintf(stderr, "Invalid key type(");
8616 print_key_type(stderr, 0, key_type);
8617 fprintf(stderr, ") found in root(");
8618 print_objectid(stderr, rootid, 0);
8619 fprintf(stderr, ")\n");
8623 * Check if the key is valid with its extent buffer.
8625 * This is a early check in case invalid key exists in a extent buffer
8626 * This is not comprehensive yet, but should prevent wrong key/item passed
8629 static int check_type_with_root(u64 rootid, u8 key_type)
8632 /* Only valid in chunk tree */
8633 case BTRFS_DEV_ITEM_KEY:
8634 case BTRFS_CHUNK_ITEM_KEY:
8635 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8638 /* valid in csum and log tree */
8639 case BTRFS_CSUM_TREE_OBJECTID:
8640 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8644 case BTRFS_EXTENT_ITEM_KEY:
8645 case BTRFS_METADATA_ITEM_KEY:
8646 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8647 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8650 case BTRFS_ROOT_ITEM_KEY:
8651 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8654 case BTRFS_DEV_EXTENT_KEY:
8655 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8661 report_mismatch_key_root(key_type, rootid);
8665 static int run_next_block(struct btrfs_root *root,
8666 struct block_info *bits,
8669 struct cache_tree *pending,
8670 struct cache_tree *seen,
8671 struct cache_tree *reada,
8672 struct cache_tree *nodes,
8673 struct cache_tree *extent_cache,
8674 struct cache_tree *chunk_cache,
8675 struct rb_root *dev_cache,
8676 struct block_group_tree *block_group_cache,
8677 struct device_extent_tree *dev_extent_cache,
8678 struct root_item_record *ri)
8680 struct btrfs_fs_info *fs_info = root->fs_info;
8681 struct extent_buffer *buf;
8682 struct extent_record *rec = NULL;
8693 struct btrfs_key key;
8694 struct cache_extent *cache;
8697 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8698 bits_nr, &reada_bits);
8703 for(i = 0; i < nritems; i++) {
8704 ret = add_cache_extent(reada, bits[i].start,
8709 /* fixme, get the parent transid */
8710 readahead_tree_block(fs_info, bits[i].start, 0);
8713 *last = bits[0].start;
8714 bytenr = bits[0].start;
8715 size = bits[0].size;
8717 cache = lookup_cache_extent(pending, bytenr, size);
8719 remove_cache_extent(pending, cache);
8722 cache = lookup_cache_extent(reada, bytenr, size);
8724 remove_cache_extent(reada, cache);
8727 cache = lookup_cache_extent(nodes, bytenr, size);
8729 remove_cache_extent(nodes, cache);
8732 cache = lookup_cache_extent(extent_cache, bytenr, size);
8734 rec = container_of(cache, struct extent_record, cache);
8735 gen = rec->parent_generation;
8738 /* fixme, get the real parent transid */
8739 buf = read_tree_block(root->fs_info, bytenr, gen);
8740 if (!extent_buffer_uptodate(buf)) {
8741 record_bad_block_io(root->fs_info,
8742 extent_cache, bytenr, size);
8746 nritems = btrfs_header_nritems(buf);
8749 if (!init_extent_tree) {
8750 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8751 btrfs_header_level(buf), 1, NULL,
8754 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8756 fprintf(stderr, "Couldn't calc extent flags\n");
8757 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8762 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8764 fprintf(stderr, "Couldn't calc extent flags\n");
8765 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8769 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8771 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8772 ri->objectid == btrfs_header_owner(buf)) {
8774 * Ok we got to this block from it's original owner and
8775 * we have FULL_BACKREF set. Relocation can leave
8776 * converted blocks over so this is altogether possible,
8777 * however it's not possible if the generation > the
8778 * last snapshot, so check for this case.
8780 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8781 btrfs_header_generation(buf) > ri->last_snapshot) {
8782 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8783 rec->bad_full_backref = 1;
8788 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8789 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8790 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8791 rec->bad_full_backref = 1;
8795 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8796 rec->flag_block_full_backref = 1;
8800 rec->flag_block_full_backref = 0;
8802 owner = btrfs_header_owner(buf);
8805 ret = check_block(root, extent_cache, buf, flags);
8809 if (btrfs_is_leaf(buf)) {
8810 btree_space_waste += btrfs_leaf_free_space(root, buf);
8811 for (i = 0; i < nritems; i++) {
8812 struct btrfs_file_extent_item *fi;
8813 btrfs_item_key_to_cpu(buf, &key, i);
8815 * Check key type against the leaf owner.
8816 * Could filter quite a lot of early error if
8819 if (check_type_with_root(btrfs_header_owner(buf),
8821 fprintf(stderr, "ignoring invalid key\n");
8824 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8825 process_extent_item(root, extent_cache, buf,
8829 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8830 process_extent_item(root, extent_cache, buf,
8834 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8836 btrfs_item_size_nr(buf, i);
8839 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8840 process_chunk_item(chunk_cache, &key, buf, i);
8843 if (key.type == BTRFS_DEV_ITEM_KEY) {
8844 process_device_item(dev_cache, &key, buf, i);
8847 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8848 process_block_group_item(block_group_cache,
8852 if (key.type == BTRFS_DEV_EXTENT_KEY) {
8853 process_device_extent_item(dev_extent_cache,
8858 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8859 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8860 process_extent_ref_v0(extent_cache, buf, i);
8867 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8868 ret = add_tree_backref(extent_cache,
8869 key.objectid, 0, key.offset, 0);
8872 "add_tree_backref failed (leaf tree block): %s",
8876 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8877 ret = add_tree_backref(extent_cache,
8878 key.objectid, key.offset, 0, 0);
8881 "add_tree_backref failed (leaf shared block): %s",
8885 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8886 struct btrfs_extent_data_ref *ref;
8887 ref = btrfs_item_ptr(buf, i,
8888 struct btrfs_extent_data_ref);
8889 add_data_backref(extent_cache,
8891 btrfs_extent_data_ref_root(buf, ref),
8892 btrfs_extent_data_ref_objectid(buf,
8894 btrfs_extent_data_ref_offset(buf, ref),
8895 btrfs_extent_data_ref_count(buf, ref),
8896 0, root->fs_info->sectorsize);
8899 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8900 struct btrfs_shared_data_ref *ref;
8901 ref = btrfs_item_ptr(buf, i,
8902 struct btrfs_shared_data_ref);
8903 add_data_backref(extent_cache,
8904 key.objectid, key.offset, 0, 0, 0,
8905 btrfs_shared_data_ref_count(buf, ref),
8906 0, root->fs_info->sectorsize);
8909 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8910 struct bad_item *bad;
8912 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8916 bad = malloc(sizeof(struct bad_item));
8919 INIT_LIST_HEAD(&bad->list);
8920 memcpy(&bad->key, &key,
8921 sizeof(struct btrfs_key));
8922 bad->root_id = owner;
8923 list_add_tail(&bad->list, &delete_items);
8926 if (key.type != BTRFS_EXTENT_DATA_KEY)
8928 fi = btrfs_item_ptr(buf, i,
8929 struct btrfs_file_extent_item);
8930 if (btrfs_file_extent_type(buf, fi) ==
8931 BTRFS_FILE_EXTENT_INLINE)
8933 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8936 data_bytes_allocated +=
8937 btrfs_file_extent_disk_num_bytes(buf, fi);
8938 if (data_bytes_allocated < root->fs_info->sectorsize) {
8941 data_bytes_referenced +=
8942 btrfs_file_extent_num_bytes(buf, fi);
8943 add_data_backref(extent_cache,
8944 btrfs_file_extent_disk_bytenr(buf, fi),
8945 parent, owner, key.objectid, key.offset -
8946 btrfs_file_extent_offset(buf, fi), 1, 1,
8947 btrfs_file_extent_disk_num_bytes(buf, fi));
8951 struct btrfs_key first_key;
8953 first_key.objectid = 0;
8956 btrfs_item_key_to_cpu(buf, &first_key, 0);
8957 level = btrfs_header_level(buf);
8958 for (i = 0; i < nritems; i++) {
8959 struct extent_record tmpl;
8961 ptr = btrfs_node_blockptr(buf, i);
8962 size = root->fs_info->nodesize;
8963 btrfs_node_key_to_cpu(buf, &key, i);
8965 if ((level == ri->drop_level)
8966 && is_dropped_key(&key, &ri->drop_key)) {
8971 memset(&tmpl, 0, sizeof(tmpl));
8972 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8973 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8978 tmpl.max_size = size;
8979 ret = add_extent_rec(extent_cache, &tmpl);
8983 ret = add_tree_backref(extent_cache, ptr, parent,
8987 "add_tree_backref failed (non-leaf block): %s",
8993 add_pending(nodes, seen, ptr, size);
8995 add_pending(pending, seen, ptr, size);
8998 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8999 nritems) * sizeof(struct btrfs_key_ptr);
9001 total_btree_bytes += buf->len;
9002 if (fs_root_objectid(btrfs_header_owner(buf)))
9003 total_fs_tree_bytes += buf->len;
9004 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9005 total_extent_tree_bytes += buf->len;
9007 free_extent_buffer(buf);
9011 static int add_root_to_pending(struct extent_buffer *buf,
9012 struct cache_tree *extent_cache,
9013 struct cache_tree *pending,
9014 struct cache_tree *seen,
9015 struct cache_tree *nodes,
9018 struct extent_record tmpl;
9021 if (btrfs_header_level(buf) > 0)
9022 add_pending(nodes, seen, buf->start, buf->len);
9024 add_pending(pending, seen, buf->start, buf->len);
9026 memset(&tmpl, 0, sizeof(tmpl));
9027 tmpl.start = buf->start;
9032 tmpl.max_size = buf->len;
9033 add_extent_rec(extent_cache, &tmpl);
9035 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9036 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9037 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9040 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9045 /* as we fix the tree, we might be deleting blocks that
9046 * we're tracking for repair. This hook makes sure we
9047 * remove any backrefs for blocks as we are fixing them.
9049 static int free_extent_hook(struct btrfs_trans_handle *trans,
9050 struct btrfs_root *root,
9051 u64 bytenr, u64 num_bytes, u64 parent,
9052 u64 root_objectid, u64 owner, u64 offset,
9055 struct extent_record *rec;
9056 struct cache_extent *cache;
9058 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9060 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9061 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9065 rec = container_of(cache, struct extent_record, cache);
9067 struct data_backref *back;
9068 back = find_data_backref(rec, parent, root_objectid, owner,
9069 offset, 1, bytenr, num_bytes);
9072 if (back->node.found_ref) {
9073 back->found_ref -= refs_to_drop;
9075 rec->refs -= refs_to_drop;
9077 if (back->node.found_extent_tree) {
9078 back->num_refs -= refs_to_drop;
9079 if (rec->extent_item_refs)
9080 rec->extent_item_refs -= refs_to_drop;
9082 if (back->found_ref == 0)
9083 back->node.found_ref = 0;
9084 if (back->num_refs == 0)
9085 back->node.found_extent_tree = 0;
9087 if (!back->node.found_extent_tree && back->node.found_ref) {
9088 rb_erase(&back->node.node, &rec->backref_tree);
9092 struct tree_backref *back;
9093 back = find_tree_backref(rec, parent, root_objectid);
9096 if (back->node.found_ref) {
9099 back->node.found_ref = 0;
9101 if (back->node.found_extent_tree) {
9102 if (rec->extent_item_refs)
9103 rec->extent_item_refs--;
9104 back->node.found_extent_tree = 0;
9106 if (!back->node.found_extent_tree && back->node.found_ref) {
9107 rb_erase(&back->node.node, &rec->backref_tree);
9111 maybe_free_extent_rec(extent_cache, rec);
9116 static int delete_extent_records(struct btrfs_trans_handle *trans,
9117 struct btrfs_root *root,
9118 struct btrfs_path *path,
9121 struct btrfs_key key;
9122 struct btrfs_key found_key;
9123 struct extent_buffer *leaf;
9128 key.objectid = bytenr;
9130 key.offset = (u64)-1;
9133 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9140 if (path->slots[0] == 0)
9146 leaf = path->nodes[0];
9147 slot = path->slots[0];
9149 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9150 if (found_key.objectid != bytenr)
9153 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9154 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9155 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9156 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9157 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9158 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9159 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9160 btrfs_release_path(path);
9161 if (found_key.type == 0) {
9162 if (found_key.offset == 0)
9164 key.offset = found_key.offset - 1;
9165 key.type = found_key.type;
9167 key.type = found_key.type - 1;
9168 key.offset = (u64)-1;
9172 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9173 found_key.objectid, found_key.type, found_key.offset);
9175 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9178 btrfs_release_path(path);
9180 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9181 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9182 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9183 found_key.offset : root->fs_info->nodesize;
9185 ret = btrfs_update_block_group(trans, root, bytenr,
9192 btrfs_release_path(path);
9197 * for a single backref, this will allocate a new extent
9198 * and add the backref to it.
9200 static int record_extent(struct btrfs_trans_handle *trans,
9201 struct btrfs_fs_info *info,
9202 struct btrfs_path *path,
9203 struct extent_record *rec,
9204 struct extent_backref *back,
9205 int allocated, u64 flags)
9208 struct btrfs_root *extent_root = info->extent_root;
9209 struct extent_buffer *leaf;
9210 struct btrfs_key ins_key;
9211 struct btrfs_extent_item *ei;
9212 struct data_backref *dback;
9213 struct btrfs_tree_block_info *bi;
9216 rec->max_size = max_t(u64, rec->max_size,
9220 u32 item_size = sizeof(*ei);
9223 item_size += sizeof(*bi);
9225 ins_key.objectid = rec->start;
9226 ins_key.offset = rec->max_size;
9227 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9229 ret = btrfs_insert_empty_item(trans, extent_root, path,
9230 &ins_key, item_size);
9234 leaf = path->nodes[0];
9235 ei = btrfs_item_ptr(leaf, path->slots[0],
9236 struct btrfs_extent_item);
9238 btrfs_set_extent_refs(leaf, ei, 0);
9239 btrfs_set_extent_generation(leaf, ei, rec->generation);
9241 if (back->is_data) {
9242 btrfs_set_extent_flags(leaf, ei,
9243 BTRFS_EXTENT_FLAG_DATA);
9245 struct btrfs_disk_key copy_key;;
9247 bi = (struct btrfs_tree_block_info *)(ei + 1);
9248 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9251 btrfs_set_disk_key_objectid(©_key,
9252 rec->info_objectid);
9253 btrfs_set_disk_key_type(©_key, 0);
9254 btrfs_set_disk_key_offset(©_key, 0);
9256 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9257 btrfs_set_tree_block_key(leaf, bi, ©_key);
9259 btrfs_set_extent_flags(leaf, ei,
9260 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9263 btrfs_mark_buffer_dirty(leaf);
9264 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9265 rec->max_size, 1, 0);
9268 btrfs_release_path(path);
9271 if (back->is_data) {
9275 dback = to_data_backref(back);
9276 if (back->full_backref)
9277 parent = dback->parent;
9281 for (i = 0; i < dback->found_ref; i++) {
9282 /* if parent != 0, we're doing a full backref
9283 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9284 * just makes the backref allocator create a data
9287 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9288 rec->start, rec->max_size,
9292 BTRFS_FIRST_FREE_OBJECTID :
9298 fprintf(stderr, "adding new data backref"
9299 " on %llu %s %llu owner %llu"
9300 " offset %llu found %d\n",
9301 (unsigned long long)rec->start,
9302 back->full_backref ?
9304 back->full_backref ?
9305 (unsigned long long)parent :
9306 (unsigned long long)dback->root,
9307 (unsigned long long)dback->owner,
9308 (unsigned long long)dback->offset,
9312 struct tree_backref *tback;
9314 tback = to_tree_backref(back);
9315 if (back->full_backref)
9316 parent = tback->parent;
9320 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9321 rec->start, rec->max_size,
9322 parent, tback->root, 0, 0);
9323 fprintf(stderr, "adding new tree backref on "
9324 "start %llu len %llu parent %llu root %llu\n",
9325 rec->start, rec->max_size, parent, tback->root);
9328 btrfs_release_path(path);
9332 static struct extent_entry *find_entry(struct list_head *entries,
9333 u64 bytenr, u64 bytes)
9335 struct extent_entry *entry = NULL;
9337 list_for_each_entry(entry, entries, list) {
9338 if (entry->bytenr == bytenr && entry->bytes == bytes)
9345 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9347 struct extent_entry *entry, *best = NULL, *prev = NULL;
9349 list_for_each_entry(entry, entries, list) {
9351 * If there are as many broken entries as entries then we know
9352 * not to trust this particular entry.
9354 if (entry->broken == entry->count)
9358 * Special case, when there are only two entries and 'best' is
9368 * If our current entry == best then we can't be sure our best
9369 * is really the best, so we need to keep searching.
9371 if (best && best->count == entry->count) {
9377 /* Prev == entry, not good enough, have to keep searching */
9378 if (!prev->broken && prev->count == entry->count)
9382 best = (prev->count > entry->count) ? prev : entry;
9383 else if (best->count < entry->count)
9391 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9392 struct data_backref *dback, struct extent_entry *entry)
9394 struct btrfs_trans_handle *trans;
9395 struct btrfs_root *root;
9396 struct btrfs_file_extent_item *fi;
9397 struct extent_buffer *leaf;
9398 struct btrfs_key key;
9402 key.objectid = dback->root;
9403 key.type = BTRFS_ROOT_ITEM_KEY;
9404 key.offset = (u64)-1;
9405 root = btrfs_read_fs_root(info, &key);
9407 fprintf(stderr, "Couldn't find root for our ref\n");
9412 * The backref points to the original offset of the extent if it was
9413 * split, so we need to search down to the offset we have and then walk
9414 * forward until we find the backref we're looking for.
9416 key.objectid = dback->owner;
9417 key.type = BTRFS_EXTENT_DATA_KEY;
9418 key.offset = dback->offset;
9419 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9421 fprintf(stderr, "Error looking up ref %d\n", ret);
9426 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9427 ret = btrfs_next_leaf(root, path);
9429 fprintf(stderr, "Couldn't find our ref, next\n");
9433 leaf = path->nodes[0];
9434 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9435 if (key.objectid != dback->owner ||
9436 key.type != BTRFS_EXTENT_DATA_KEY) {
9437 fprintf(stderr, "Couldn't find our ref, search\n");
9440 fi = btrfs_item_ptr(leaf, path->slots[0],
9441 struct btrfs_file_extent_item);
9442 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9443 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9445 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9450 btrfs_release_path(path);
9452 trans = btrfs_start_transaction(root, 1);
9454 return PTR_ERR(trans);
9457 * Ok we have the key of the file extent we want to fix, now we can cow
9458 * down to the thing and fix it.
9460 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9462 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9463 key.objectid, key.type, key.offset, ret);
9467 fprintf(stderr, "Well that's odd, we just found this key "
9468 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9473 leaf = path->nodes[0];
9474 fi = btrfs_item_ptr(leaf, path->slots[0],
9475 struct btrfs_file_extent_item);
9477 if (btrfs_file_extent_compression(leaf, fi) &&
9478 dback->disk_bytenr != entry->bytenr) {
9479 fprintf(stderr, "Ref doesn't match the record start and is "
9480 "compressed, please take a btrfs-image of this file "
9481 "system and send it to a btrfs developer so they can "
9482 "complete this functionality for bytenr %Lu\n",
9483 dback->disk_bytenr);
9488 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9489 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9490 } else if (dback->disk_bytenr > entry->bytenr) {
9491 u64 off_diff, offset;
9493 off_diff = dback->disk_bytenr - entry->bytenr;
9494 offset = btrfs_file_extent_offset(leaf, fi);
9495 if (dback->disk_bytenr + offset +
9496 btrfs_file_extent_num_bytes(leaf, fi) >
9497 entry->bytenr + entry->bytes) {
9498 fprintf(stderr, "Ref is past the entry end, please "
9499 "take a btrfs-image of this file system and "
9500 "send it to a btrfs developer, ref %Lu\n",
9501 dback->disk_bytenr);
9506 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9507 btrfs_set_file_extent_offset(leaf, fi, offset);
9508 } else if (dback->disk_bytenr < entry->bytenr) {
9511 offset = btrfs_file_extent_offset(leaf, fi);
9512 if (dback->disk_bytenr + offset < entry->bytenr) {
9513 fprintf(stderr, "Ref is before the entry start, please"
9514 " take a btrfs-image of this file system and "
9515 "send it to a btrfs developer, ref %Lu\n",
9516 dback->disk_bytenr);
9521 offset += dback->disk_bytenr;
9522 offset -= entry->bytenr;
9523 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9524 btrfs_set_file_extent_offset(leaf, fi, offset);
9527 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9530 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9531 * only do this if we aren't using compression, otherwise it's a
9534 if (!btrfs_file_extent_compression(leaf, fi))
9535 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9537 printf("ram bytes may be wrong?\n");
9538 btrfs_mark_buffer_dirty(leaf);
9540 err = btrfs_commit_transaction(trans, root);
9541 btrfs_release_path(path);
9542 return ret ? ret : err;
9545 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9546 struct extent_record *rec)
9548 struct extent_backref *back, *tmp;
9549 struct data_backref *dback;
9550 struct extent_entry *entry, *best = NULL;
9553 int broken_entries = 0;
9558 * Metadata is easy and the backrefs should always agree on bytenr and
9559 * size, if not we've got bigger issues.
9564 rbtree_postorder_for_each_entry_safe(back, tmp,
9565 &rec->backref_tree, node) {
9566 if (back->full_backref || !back->is_data)
9569 dback = to_data_backref(back);
9572 * We only pay attention to backrefs that we found a real
9575 if (dback->found_ref == 0)
9579 * For now we only catch when the bytes don't match, not the
9580 * bytenr. We can easily do this at the same time, but I want
9581 * to have a fs image to test on before we just add repair
9582 * functionality willy-nilly so we know we won't screw up the
9586 entry = find_entry(&entries, dback->disk_bytenr,
9589 entry = malloc(sizeof(struct extent_entry));
9594 memset(entry, 0, sizeof(*entry));
9595 entry->bytenr = dback->disk_bytenr;
9596 entry->bytes = dback->bytes;
9597 list_add_tail(&entry->list, &entries);
9602 * If we only have on entry we may think the entries agree when
9603 * in reality they don't so we have to do some extra checking.
9605 if (dback->disk_bytenr != rec->start ||
9606 dback->bytes != rec->nr || back->broken)
9617 /* Yay all the backrefs agree, carry on good sir */
9618 if (nr_entries <= 1 && !mismatch)
9621 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9622 "%Lu\n", rec->start);
9625 * First we want to see if the backrefs can agree amongst themselves who
9626 * is right, so figure out which one of the entries has the highest
9629 best = find_most_right_entry(&entries);
9632 * Ok so we may have an even split between what the backrefs think, so
9633 * this is where we use the extent ref to see what it thinks.
9636 entry = find_entry(&entries, rec->start, rec->nr);
9637 if (!entry && (!broken_entries || !rec->found_rec)) {
9638 fprintf(stderr, "Backrefs don't agree with each other "
9639 "and extent record doesn't agree with anybody,"
9640 " so we can't fix bytenr %Lu bytes %Lu\n",
9641 rec->start, rec->nr);
9644 } else if (!entry) {
9646 * Ok our backrefs were broken, we'll assume this is the
9647 * correct value and add an entry for this range.
9649 entry = malloc(sizeof(struct extent_entry));
9654 memset(entry, 0, sizeof(*entry));
9655 entry->bytenr = rec->start;
9656 entry->bytes = rec->nr;
9657 list_add_tail(&entry->list, &entries);
9661 best = find_most_right_entry(&entries);
9663 fprintf(stderr, "Backrefs and extent record evenly "
9664 "split on who is right, this is going to "
9665 "require user input to fix bytenr %Lu bytes "
9666 "%Lu\n", rec->start, rec->nr);
9673 * I don't think this can happen currently as we'll abort() if we catch
9674 * this case higher up, but in case somebody removes that we still can't
9675 * deal with it properly here yet, so just bail out of that's the case.
9677 if (best->bytenr != rec->start) {
9678 fprintf(stderr, "Extent start and backref starts don't match, "
9679 "please use btrfs-image on this file system and send "
9680 "it to a btrfs developer so they can make fsck fix "
9681 "this particular case. bytenr is %Lu, bytes is %Lu\n",
9682 rec->start, rec->nr);
9688 * Ok great we all agreed on an extent record, let's go find the real
9689 * references and fix up the ones that don't match.
9691 rbtree_postorder_for_each_entry_safe(back, tmp,
9692 &rec->backref_tree, node) {
9693 if (back->full_backref || !back->is_data)
9696 dback = to_data_backref(back);
9699 * Still ignoring backrefs that don't have a real ref attached
9702 if (dback->found_ref == 0)
9705 if (dback->bytes == best->bytes &&
9706 dback->disk_bytenr == best->bytenr)
9709 ret = repair_ref(info, path, dback, best);
9715 * Ok we messed with the actual refs, which means we need to drop our
9716 * entire cache and go back and rescan. I know this is a huge pain and
9717 * adds a lot of extra work, but it's the only way to be safe. Once all
9718 * the backrefs agree we may not need to do anything to the extent
9723 while (!list_empty(&entries)) {
9724 entry = list_entry(entries.next, struct extent_entry, list);
9725 list_del_init(&entry->list);
9731 static int process_duplicates(struct cache_tree *extent_cache,
9732 struct extent_record *rec)
9734 struct extent_record *good, *tmp;
9735 struct cache_extent *cache;
9739 * If we found a extent record for this extent then return, or if we
9740 * have more than one duplicate we are likely going to need to delete
9743 if (rec->found_rec || rec->num_duplicates > 1)
9746 /* Shouldn't happen but just in case */
9747 BUG_ON(!rec->num_duplicates);
9750 * So this happens if we end up with a backref that doesn't match the
9751 * actual extent entry. So either the backref is bad or the extent
9752 * entry is bad. Either way we want to have the extent_record actually
9753 * reflect what we found in the extent_tree, so we need to take the
9754 * duplicate out and use that as the extent_record since the only way we
9755 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9757 remove_cache_extent(extent_cache, &rec->cache);
9759 good = to_extent_record(rec->dups.next);
9760 list_del_init(&good->list);
9761 INIT_LIST_HEAD(&good->backrefs);
9762 INIT_LIST_HEAD(&good->dups);
9763 good->cache.start = good->start;
9764 good->cache.size = good->nr;
9765 good->content_checked = 0;
9766 good->owner_ref_checked = 0;
9767 good->num_duplicates = 0;
9768 good->refs = rec->refs;
9769 list_splice_init(&rec->backrefs, &good->backrefs);
9771 cache = lookup_cache_extent(extent_cache, good->start,
9775 tmp = container_of(cache, struct extent_record, cache);
9778 * If we find another overlapping extent and it's found_rec is
9779 * set then it's a duplicate and we need to try and delete
9782 if (tmp->found_rec || tmp->num_duplicates > 0) {
9783 if (list_empty(&good->list))
9784 list_add_tail(&good->list,
9785 &duplicate_extents);
9786 good->num_duplicates += tmp->num_duplicates + 1;
9787 list_splice_init(&tmp->dups, &good->dups);
9788 list_del_init(&tmp->list);
9789 list_add_tail(&tmp->list, &good->dups);
9790 remove_cache_extent(extent_cache, &tmp->cache);
9795 * Ok we have another non extent item backed extent rec, so lets
9796 * just add it to this extent and carry on like we did above.
9798 good->refs += tmp->refs;
9799 list_splice_init(&tmp->backrefs, &good->backrefs);
9800 remove_cache_extent(extent_cache, &tmp->cache);
9803 ret = insert_cache_extent(extent_cache, &good->cache);
9806 return good->num_duplicates ? 0 : 1;
9809 static int delete_duplicate_records(struct btrfs_root *root,
9810 struct extent_record *rec)
9812 struct btrfs_trans_handle *trans;
9813 LIST_HEAD(delete_list);
9814 struct btrfs_path path;
9815 struct extent_record *tmp, *good, *n;
9818 struct btrfs_key key;
9820 btrfs_init_path(&path);
9823 /* Find the record that covers all of the duplicates. */
9824 list_for_each_entry(tmp, &rec->dups, list) {
9825 if (good->start < tmp->start)
9827 if (good->nr > tmp->nr)
9830 if (tmp->start + tmp->nr < good->start + good->nr) {
9831 fprintf(stderr, "Ok we have overlapping extents that "
9832 "aren't completely covered by each other, this "
9833 "is going to require more careful thought. "
9834 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9835 tmp->start, tmp->nr, good->start, good->nr);
9842 list_add_tail(&rec->list, &delete_list);
9844 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9847 list_move_tail(&tmp->list, &delete_list);
9850 root = root->fs_info->extent_root;
9851 trans = btrfs_start_transaction(root, 1);
9852 if (IS_ERR(trans)) {
9853 ret = PTR_ERR(trans);
9857 list_for_each_entry(tmp, &delete_list, list) {
9858 if (tmp->found_rec == 0)
9860 key.objectid = tmp->start;
9861 key.type = BTRFS_EXTENT_ITEM_KEY;
9862 key.offset = tmp->nr;
9864 /* Shouldn't happen but just in case */
9865 if (tmp->metadata) {
9866 fprintf(stderr, "Well this shouldn't happen, extent "
9867 "record overlaps but is metadata? "
9868 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9872 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9878 ret = btrfs_del_item(trans, root, &path);
9881 btrfs_release_path(&path);
9884 err = btrfs_commit_transaction(trans, root);
9888 while (!list_empty(&delete_list)) {
9889 tmp = to_extent_record(delete_list.next);
9890 list_del_init(&tmp->list);
9896 while (!list_empty(&rec->dups)) {
9897 tmp = to_extent_record(rec->dups.next);
9898 list_del_init(&tmp->list);
9902 btrfs_release_path(&path);
9904 if (!ret && !nr_del)
9905 rec->num_duplicates = 0;
9907 return ret ? ret : nr_del;
9910 static int find_possible_backrefs(struct btrfs_fs_info *info,
9911 struct btrfs_path *path,
9912 struct cache_tree *extent_cache,
9913 struct extent_record *rec)
9915 struct btrfs_root *root;
9916 struct extent_backref *back, *tmp;
9917 struct data_backref *dback;
9918 struct cache_extent *cache;
9919 struct btrfs_file_extent_item *fi;
9920 struct btrfs_key key;
9924 rbtree_postorder_for_each_entry_safe(back, tmp,
9925 &rec->backref_tree, node) {
9926 /* Don't care about full backrefs (poor unloved backrefs) */
9927 if (back->full_backref || !back->is_data)
9930 dback = to_data_backref(back);
9932 /* We found this one, we don't need to do a lookup */
9933 if (dback->found_ref)
9936 key.objectid = dback->root;
9937 key.type = BTRFS_ROOT_ITEM_KEY;
9938 key.offset = (u64)-1;
9940 root = btrfs_read_fs_root(info, &key);
9942 /* No root, definitely a bad ref, skip */
9943 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9945 /* Other err, exit */
9947 return PTR_ERR(root);
9949 key.objectid = dback->owner;
9950 key.type = BTRFS_EXTENT_DATA_KEY;
9951 key.offset = dback->offset;
9952 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9954 btrfs_release_path(path);
9957 /* Didn't find it, we can carry on */
9962 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9963 struct btrfs_file_extent_item);
9964 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9965 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9966 btrfs_release_path(path);
9967 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9969 struct extent_record *tmp;
9970 tmp = container_of(cache, struct extent_record, cache);
9973 * If we found an extent record for the bytenr for this
9974 * particular backref then we can't add it to our
9975 * current extent record. We only want to add backrefs
9976 * that don't have a corresponding extent item in the
9977 * extent tree since they likely belong to this record
9978 * and we need to fix it if it doesn't match bytenrs.
9984 dback->found_ref += 1;
9985 dback->disk_bytenr = bytenr;
9986 dback->bytes = bytes;
9989 * Set this so the verify backref code knows not to trust the
9990 * values in this backref.
9999 * Record orphan data ref into corresponding root.
10001 * Return 0 if the extent item contains data ref and recorded.
10002 * Return 1 if the extent item contains no useful data ref
10003 * On that case, it may contains only shared_dataref or metadata backref
10004 * or the file extent exists(this should be handled by the extent bytenr
10005 * recovery routine)
10006 * Return <0 if something goes wrong.
10008 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10009 struct extent_record *rec)
10011 struct btrfs_key key;
10012 struct btrfs_root *dest_root;
10013 struct extent_backref *back, *tmp;
10014 struct data_backref *dback;
10015 struct orphan_data_extent *orphan;
10016 struct btrfs_path path;
10017 int recorded_data_ref = 0;
10022 btrfs_init_path(&path);
10023 rbtree_postorder_for_each_entry_safe(back, tmp,
10024 &rec->backref_tree, node) {
10025 if (back->full_backref || !back->is_data ||
10026 !back->found_extent_tree)
10028 dback = to_data_backref(back);
10029 if (dback->found_ref)
10031 key.objectid = dback->root;
10032 key.type = BTRFS_ROOT_ITEM_KEY;
10033 key.offset = (u64)-1;
10035 dest_root = btrfs_read_fs_root(fs_info, &key);
10037 /* For non-exist root we just skip it */
10038 if (IS_ERR(dest_root) || !dest_root)
10041 key.objectid = dback->owner;
10042 key.type = BTRFS_EXTENT_DATA_KEY;
10043 key.offset = dback->offset;
10045 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10046 btrfs_release_path(&path);
10048 * For ret < 0, it's OK since the fs-tree may be corrupted,
10049 * we need to record it for inode/file extent rebuild.
10050 * For ret > 0, we record it only for file extent rebuild.
10051 * For ret == 0, the file extent exists but only bytenr
10052 * mismatch, let the original bytenr fix routine to handle,
10058 orphan = malloc(sizeof(*orphan));
10063 INIT_LIST_HEAD(&orphan->list);
10064 orphan->root = dback->root;
10065 orphan->objectid = dback->owner;
10066 orphan->offset = dback->offset;
10067 orphan->disk_bytenr = rec->cache.start;
10068 orphan->disk_len = rec->cache.size;
10069 list_add(&dest_root->orphan_data_extents, &orphan->list);
10070 recorded_data_ref = 1;
10073 btrfs_release_path(&path);
10075 return !recorded_data_ref;
10081 * when an incorrect extent item is found, this will delete
10082 * all of the existing entries for it and recreate them
10083 * based on what the tree scan found.
10085 static int fixup_extent_refs(struct btrfs_fs_info *info,
10086 struct cache_tree *extent_cache,
10087 struct extent_record *rec)
10089 struct btrfs_trans_handle *trans = NULL;
10091 struct btrfs_path path;
10092 struct cache_extent *cache;
10093 struct extent_backref *back, *tmp;
10097 if (rec->flag_block_full_backref)
10098 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10100 btrfs_init_path(&path);
10101 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10103 * Sometimes the backrefs themselves are so broken they don't
10104 * get attached to any meaningful rec, so first go back and
10105 * check any of our backrefs that we couldn't find and throw
10106 * them into the list if we find the backref so that
10107 * verify_backrefs can figure out what to do.
10109 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10114 /* step one, make sure all of the backrefs agree */
10115 ret = verify_backrefs(info, &path, rec);
10119 trans = btrfs_start_transaction(info->extent_root, 1);
10120 if (IS_ERR(trans)) {
10121 ret = PTR_ERR(trans);
10125 /* step two, delete all the existing records */
10126 ret = delete_extent_records(trans, info->extent_root, &path,
10132 /* was this block corrupt? If so, don't add references to it */
10133 cache = lookup_cache_extent(info->corrupt_blocks,
10134 rec->start, rec->max_size);
10140 /* step three, recreate all the refs we did find */
10141 rbtree_postorder_for_each_entry_safe(back, tmp,
10142 &rec->backref_tree, node) {
10144 * if we didn't find any references, don't create a
10145 * new extent record
10147 if (!back->found_ref)
10150 rec->bad_full_backref = 0;
10151 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10159 int err = btrfs_commit_transaction(trans, info->extent_root);
10165 fprintf(stderr, "Repaired extent references for %llu\n",
10166 (unsigned long long)rec->start);
10168 btrfs_release_path(&path);
10172 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10173 struct extent_record *rec)
10175 struct btrfs_trans_handle *trans;
10176 struct btrfs_root *root = fs_info->extent_root;
10177 struct btrfs_path path;
10178 struct btrfs_extent_item *ei;
10179 struct btrfs_key key;
10183 key.objectid = rec->start;
10184 if (rec->metadata) {
10185 key.type = BTRFS_METADATA_ITEM_KEY;
10186 key.offset = rec->info_level;
10188 key.type = BTRFS_EXTENT_ITEM_KEY;
10189 key.offset = rec->max_size;
10192 trans = btrfs_start_transaction(root, 0);
10194 return PTR_ERR(trans);
10196 btrfs_init_path(&path);
10197 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10199 btrfs_release_path(&path);
10200 btrfs_commit_transaction(trans, root);
10203 fprintf(stderr, "Didn't find extent for %llu\n",
10204 (unsigned long long)rec->start);
10205 btrfs_release_path(&path);
10206 btrfs_commit_transaction(trans, root);
10210 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10211 struct btrfs_extent_item);
10212 flags = btrfs_extent_flags(path.nodes[0], ei);
10213 if (rec->flag_block_full_backref) {
10214 fprintf(stderr, "setting full backref on %llu\n",
10215 (unsigned long long)key.objectid);
10216 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10218 fprintf(stderr, "clearing full backref on %llu\n",
10219 (unsigned long long)key.objectid);
10220 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10222 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10223 btrfs_mark_buffer_dirty(path.nodes[0]);
10224 btrfs_release_path(&path);
10225 ret = btrfs_commit_transaction(trans, root);
10227 fprintf(stderr, "Repaired extent flags for %llu\n",
10228 (unsigned long long)rec->start);
10233 /* right now we only prune from the extent allocation tree */
10234 static int prune_one_block(struct btrfs_trans_handle *trans,
10235 struct btrfs_fs_info *info,
10236 struct btrfs_corrupt_block *corrupt)
10239 struct btrfs_path path;
10240 struct extent_buffer *eb;
10244 int level = corrupt->level + 1;
10246 btrfs_init_path(&path);
10248 /* we want to stop at the parent to our busted block */
10249 path.lowest_level = level;
10251 ret = btrfs_search_slot(trans, info->extent_root,
10252 &corrupt->key, &path, -1, 1);
10257 eb = path.nodes[level];
10264 * hopefully the search gave us the block we want to prune,
10265 * lets try that first
10267 slot = path.slots[level];
10268 found = btrfs_node_blockptr(eb, slot);
10269 if (found == corrupt->cache.start)
10272 nritems = btrfs_header_nritems(eb);
10274 /* the search failed, lets scan this node and hope we find it */
10275 for (slot = 0; slot < nritems; slot++) {
10276 found = btrfs_node_blockptr(eb, slot);
10277 if (found == corrupt->cache.start)
10281 * we couldn't find the bad block. TODO, search all the nodes for pointers
10284 if (eb == info->extent_root->node) {
10289 btrfs_release_path(&path);
10294 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10295 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10298 btrfs_release_path(&path);
10302 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10304 struct btrfs_trans_handle *trans = NULL;
10305 struct cache_extent *cache;
10306 struct btrfs_corrupt_block *corrupt;
10309 cache = search_cache_extent(info->corrupt_blocks, 0);
10313 trans = btrfs_start_transaction(info->extent_root, 1);
10315 return PTR_ERR(trans);
10317 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10318 prune_one_block(trans, info, corrupt);
10319 remove_cache_extent(info->corrupt_blocks, cache);
10322 return btrfs_commit_transaction(trans, info->extent_root);
10326 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10328 struct btrfs_block_group_cache *cache;
10333 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10334 &start, &end, EXTENT_DIRTY);
10337 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10342 cache = btrfs_lookup_first_block_group(fs_info, start);
10347 start = cache->key.objectid + cache->key.offset;
10351 static int check_extent_refs(struct btrfs_root *root,
10352 struct cache_tree *extent_cache)
10354 struct extent_record *rec;
10355 struct cache_extent *cache;
10361 * if we're doing a repair, we have to make sure
10362 * we don't allocate from the problem extents.
10363 * In the worst case, this will be all the
10364 * extents in the FS
10366 cache = search_cache_extent(extent_cache, 0);
10368 rec = container_of(cache, struct extent_record, cache);
10369 set_extent_dirty(root->fs_info->excluded_extents,
10371 rec->start + rec->max_size - 1);
10372 cache = next_cache_extent(cache);
10375 /* pin down all the corrupted blocks too */
10376 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10378 set_extent_dirty(root->fs_info->excluded_extents,
10380 cache->start + cache->size - 1);
10381 cache = next_cache_extent(cache);
10383 prune_corrupt_blocks(root->fs_info);
10384 reset_cached_block_groups(root->fs_info);
10387 reset_cached_block_groups(root->fs_info);
10390 * We need to delete any duplicate entries we find first otherwise we
10391 * could mess up the extent tree when we have backrefs that actually
10392 * belong to a different extent item and not the weird duplicate one.
10394 while (repair && !list_empty(&duplicate_extents)) {
10395 rec = to_extent_record(duplicate_extents.next);
10396 list_del_init(&rec->list);
10398 /* Sometimes we can find a backref before we find an actual
10399 * extent, so we need to process it a little bit to see if there
10400 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10401 * if this is a backref screwup. If we need to delete stuff
10402 * process_duplicates() will return 0, otherwise it will return
10405 if (process_duplicates(extent_cache, rec))
10407 ret = delete_duplicate_records(root, rec);
10411 * delete_duplicate_records will return the number of entries
10412 * deleted, so if it's greater than 0 then we know we actually
10413 * did something and we need to remove.
10426 cache = search_cache_extent(extent_cache, 0);
10429 rec = container_of(cache, struct extent_record, cache);
10430 if (rec->num_duplicates) {
10431 fprintf(stderr, "extent item %llu has multiple extent "
10432 "items\n", (unsigned long long)rec->start);
10436 if (rec->refs != rec->extent_item_refs) {
10437 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10438 (unsigned long long)rec->start,
10439 (unsigned long long)rec->nr);
10440 fprintf(stderr, "extent item %llu, found %llu\n",
10441 (unsigned long long)rec->extent_item_refs,
10442 (unsigned long long)rec->refs);
10443 ret = record_orphan_data_extents(root->fs_info, rec);
10449 if (all_backpointers_checked(rec, 1)) {
10450 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10451 (unsigned long long)rec->start,
10452 (unsigned long long)rec->nr);
10456 if (!rec->owner_ref_checked) {
10457 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10458 (unsigned long long)rec->start,
10459 (unsigned long long)rec->nr);
10464 if (repair && fix) {
10465 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10471 if (rec->bad_full_backref) {
10472 fprintf(stderr, "bad full backref, on [%llu]\n",
10473 (unsigned long long)rec->start);
10475 ret = fixup_extent_flags(root->fs_info, rec);
10483 * Although it's not a extent ref's problem, we reuse this
10484 * routine for error reporting.
10485 * No repair function yet.
10487 if (rec->crossing_stripes) {
10489 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10490 rec->start, rec->start + rec->max_size);
10494 if (rec->wrong_chunk_type) {
10496 "bad extent [%llu, %llu), type mismatch with chunk\n",
10497 rec->start, rec->start + rec->max_size);
10501 remove_cache_extent(extent_cache, cache);
10502 free_all_extent_backrefs(rec);
10503 if (!init_extent_tree && repair && (!cur_err || fix))
10504 clear_extent_dirty(root->fs_info->excluded_extents,
10506 rec->start + rec->max_size - 1);
10511 if (ret && ret != -EAGAIN) {
10512 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10515 struct btrfs_trans_handle *trans;
10517 root = root->fs_info->extent_root;
10518 trans = btrfs_start_transaction(root, 1);
10519 if (IS_ERR(trans)) {
10520 ret = PTR_ERR(trans);
10524 ret = btrfs_fix_block_accounting(trans, root);
10527 ret = btrfs_commit_transaction(trans, root);
10536 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10540 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10541 stripe_size = length;
10542 stripe_size /= num_stripes;
10543 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10544 stripe_size = length * 2;
10545 stripe_size /= num_stripes;
10546 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10547 stripe_size = length;
10548 stripe_size /= (num_stripes - 1);
10549 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10550 stripe_size = length;
10551 stripe_size /= (num_stripes - 2);
10553 stripe_size = length;
10555 return stripe_size;
10559 * Check the chunk with its block group/dev list ref:
10560 * Return 0 if all refs seems valid.
10561 * Return 1 if part of refs seems valid, need later check for rebuild ref
10562 * like missing block group and needs to search extent tree to rebuild them.
10563 * Return -1 if essential refs are missing and unable to rebuild.
10565 static int check_chunk_refs(struct chunk_record *chunk_rec,
10566 struct block_group_tree *block_group_cache,
10567 struct device_extent_tree *dev_extent_cache,
10570 struct cache_extent *block_group_item;
10571 struct block_group_record *block_group_rec;
10572 struct cache_extent *dev_extent_item;
10573 struct device_extent_record *dev_extent_rec;
10577 int metadump_v2 = 0;
10581 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10583 chunk_rec->length);
10584 if (block_group_item) {
10585 block_group_rec = container_of(block_group_item,
10586 struct block_group_record,
10588 if (chunk_rec->length != block_group_rec->offset ||
10589 chunk_rec->offset != block_group_rec->objectid ||
10591 chunk_rec->type_flags != block_group_rec->flags)) {
10594 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10595 chunk_rec->objectid,
10600 chunk_rec->type_flags,
10601 block_group_rec->objectid,
10602 block_group_rec->type,
10603 block_group_rec->offset,
10604 block_group_rec->offset,
10605 block_group_rec->objectid,
10606 block_group_rec->flags);
10609 list_del_init(&block_group_rec->list);
10610 chunk_rec->bg_rec = block_group_rec;
10615 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10616 chunk_rec->objectid,
10621 chunk_rec->type_flags);
10628 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10629 chunk_rec->num_stripes);
10630 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10631 devid = chunk_rec->stripes[i].devid;
10632 offset = chunk_rec->stripes[i].offset;
10633 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10634 devid, offset, length);
10635 if (dev_extent_item) {
10636 dev_extent_rec = container_of(dev_extent_item,
10637 struct device_extent_record,
10639 if (dev_extent_rec->objectid != devid ||
10640 dev_extent_rec->offset != offset ||
10641 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10642 dev_extent_rec->length != length) {
10645 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10646 chunk_rec->objectid,
10649 chunk_rec->stripes[i].devid,
10650 chunk_rec->stripes[i].offset,
10651 dev_extent_rec->objectid,
10652 dev_extent_rec->offset,
10653 dev_extent_rec->length);
10656 list_move(&dev_extent_rec->chunk_list,
10657 &chunk_rec->dextents);
10662 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10663 chunk_rec->objectid,
10666 chunk_rec->stripes[i].devid,
10667 chunk_rec->stripes[i].offset);
10674 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10675 int check_chunks(struct cache_tree *chunk_cache,
10676 struct block_group_tree *block_group_cache,
10677 struct device_extent_tree *dev_extent_cache,
10678 struct list_head *good, struct list_head *bad,
10679 struct list_head *rebuild, int silent)
10681 struct cache_extent *chunk_item;
10682 struct chunk_record *chunk_rec;
10683 struct block_group_record *bg_rec;
10684 struct device_extent_record *dext_rec;
10688 chunk_item = first_cache_extent(chunk_cache);
10689 while (chunk_item) {
10690 chunk_rec = container_of(chunk_item, struct chunk_record,
10692 err = check_chunk_refs(chunk_rec, block_group_cache,
10693 dev_extent_cache, silent);
10696 if (err == 0 && good)
10697 list_add_tail(&chunk_rec->list, good);
10698 if (err > 0 && rebuild)
10699 list_add_tail(&chunk_rec->list, rebuild);
10700 if (err < 0 && bad)
10701 list_add_tail(&chunk_rec->list, bad);
10702 chunk_item = next_cache_extent(chunk_item);
10705 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10708 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10716 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10720 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10721 dext_rec->objectid,
10731 static int check_device_used(struct device_record *dev_rec,
10732 struct device_extent_tree *dext_cache)
10734 struct cache_extent *cache;
10735 struct device_extent_record *dev_extent_rec;
10736 u64 total_byte = 0;
10738 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10740 dev_extent_rec = container_of(cache,
10741 struct device_extent_record,
10743 if (dev_extent_rec->objectid != dev_rec->devid)
10746 list_del_init(&dev_extent_rec->device_list);
10747 total_byte += dev_extent_rec->length;
10748 cache = next_cache_extent(cache);
10751 if (total_byte != dev_rec->byte_used) {
10753 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10754 total_byte, dev_rec->byte_used, dev_rec->objectid,
10755 dev_rec->type, dev_rec->offset);
10762 /* check btrfs_dev_item -> btrfs_dev_extent */
10763 static int check_devices(struct rb_root *dev_cache,
10764 struct device_extent_tree *dev_extent_cache)
10766 struct rb_node *dev_node;
10767 struct device_record *dev_rec;
10768 struct device_extent_record *dext_rec;
10772 dev_node = rb_first(dev_cache);
10774 dev_rec = container_of(dev_node, struct device_record, node);
10775 err = check_device_used(dev_rec, dev_extent_cache);
10779 dev_node = rb_next(dev_node);
10781 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10784 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10785 dext_rec->objectid, dext_rec->offset, dext_rec->length);
10792 static int add_root_item_to_list(struct list_head *head,
10793 u64 objectid, u64 bytenr, u64 last_snapshot,
10794 u8 level, u8 drop_level,
10795 struct btrfs_key *drop_key)
10798 struct root_item_record *ri_rec;
10799 ri_rec = malloc(sizeof(*ri_rec));
10802 ri_rec->bytenr = bytenr;
10803 ri_rec->objectid = objectid;
10804 ri_rec->level = level;
10805 ri_rec->drop_level = drop_level;
10806 ri_rec->last_snapshot = last_snapshot;
10808 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10809 list_add_tail(&ri_rec->list, head);
10814 static void free_root_item_list(struct list_head *list)
10816 struct root_item_record *ri_rec;
10818 while (!list_empty(list)) {
10819 ri_rec = list_first_entry(list, struct root_item_record,
10821 list_del_init(&ri_rec->list);
10826 static int deal_root_from_list(struct list_head *list,
10827 struct btrfs_root *root,
10828 struct block_info *bits,
10830 struct cache_tree *pending,
10831 struct cache_tree *seen,
10832 struct cache_tree *reada,
10833 struct cache_tree *nodes,
10834 struct cache_tree *extent_cache,
10835 struct cache_tree *chunk_cache,
10836 struct rb_root *dev_cache,
10837 struct block_group_tree *block_group_cache,
10838 struct device_extent_tree *dev_extent_cache)
10843 while (!list_empty(list)) {
10844 struct root_item_record *rec;
10845 struct extent_buffer *buf;
10846 rec = list_entry(list->next,
10847 struct root_item_record, list);
10849 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10850 if (!extent_buffer_uptodate(buf)) {
10851 free_extent_buffer(buf);
10855 ret = add_root_to_pending(buf, extent_cache, pending,
10856 seen, nodes, rec->objectid);
10860 * To rebuild extent tree, we need deal with snapshot
10861 * one by one, otherwise we deal with node firstly which
10862 * can maximize readahead.
10865 ret = run_next_block(root, bits, bits_nr, &last,
10866 pending, seen, reada, nodes,
10867 extent_cache, chunk_cache,
10868 dev_cache, block_group_cache,
10869 dev_extent_cache, rec);
10873 free_extent_buffer(buf);
10874 list_del(&rec->list);
10880 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10881 reada, nodes, extent_cache, chunk_cache,
10882 dev_cache, block_group_cache,
10883 dev_extent_cache, NULL);
10893 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10895 struct rb_root dev_cache;
10896 struct cache_tree chunk_cache;
10897 struct block_group_tree block_group_cache;
10898 struct device_extent_tree dev_extent_cache;
10899 struct cache_tree extent_cache;
10900 struct cache_tree seen;
10901 struct cache_tree pending;
10902 struct cache_tree reada;
10903 struct cache_tree nodes;
10904 struct extent_io_tree excluded_extents;
10905 struct cache_tree corrupt_blocks;
10906 struct btrfs_path path;
10907 struct btrfs_key key;
10908 struct btrfs_key found_key;
10910 struct block_info *bits;
10912 struct extent_buffer *leaf;
10914 struct btrfs_root_item ri;
10915 struct list_head dropping_trees;
10916 struct list_head normal_trees;
10917 struct btrfs_root *root1;
10918 struct btrfs_root *root;
10922 root = fs_info->fs_root;
10923 dev_cache = RB_ROOT;
10924 cache_tree_init(&chunk_cache);
10925 block_group_tree_init(&block_group_cache);
10926 device_extent_tree_init(&dev_extent_cache);
10928 cache_tree_init(&extent_cache);
10929 cache_tree_init(&seen);
10930 cache_tree_init(&pending);
10931 cache_tree_init(&nodes);
10932 cache_tree_init(&reada);
10933 cache_tree_init(&corrupt_blocks);
10934 extent_io_tree_init(&excluded_extents);
10935 INIT_LIST_HEAD(&dropping_trees);
10936 INIT_LIST_HEAD(&normal_trees);
10939 fs_info->excluded_extents = &excluded_extents;
10940 fs_info->fsck_extent_cache = &extent_cache;
10941 fs_info->free_extent_hook = free_extent_hook;
10942 fs_info->corrupt_blocks = &corrupt_blocks;
10946 bits = malloc(bits_nr * sizeof(struct block_info));
10952 if (ctx.progress_enabled) {
10953 ctx.tp = TASK_EXTENTS;
10954 task_start(ctx.info);
10958 root1 = fs_info->tree_root;
10959 level = btrfs_header_level(root1->node);
10960 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10961 root1->node->start, 0, level, 0, NULL);
10964 root1 = fs_info->chunk_root;
10965 level = btrfs_header_level(root1->node);
10966 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10967 root1->node->start, 0, level, 0, NULL);
10970 btrfs_init_path(&path);
10973 key.type = BTRFS_ROOT_ITEM_KEY;
10974 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10978 leaf = path.nodes[0];
10979 slot = path.slots[0];
10980 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10981 ret = btrfs_next_leaf(root, &path);
10984 leaf = path.nodes[0];
10985 slot = path.slots[0];
10987 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10988 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10989 unsigned long offset;
10992 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10993 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10994 last_snapshot = btrfs_root_last_snapshot(&ri);
10995 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10996 level = btrfs_root_level(&ri);
10997 ret = add_root_item_to_list(&normal_trees,
10998 found_key.objectid,
10999 btrfs_root_bytenr(&ri),
11000 last_snapshot, level,
11005 level = btrfs_root_level(&ri);
11006 objectid = found_key.objectid;
11007 btrfs_disk_key_to_cpu(&found_key,
11008 &ri.drop_progress);
11009 ret = add_root_item_to_list(&dropping_trees,
11011 btrfs_root_bytenr(&ri),
11012 last_snapshot, level,
11013 ri.drop_level, &found_key);
11020 btrfs_release_path(&path);
11023 * check_block can return -EAGAIN if it fixes something, please keep
11024 * this in mind when dealing with return values from these functions, if
11025 * we get -EAGAIN we want to fall through and restart the loop.
11027 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11028 &seen, &reada, &nodes, &extent_cache,
11029 &chunk_cache, &dev_cache, &block_group_cache,
11030 &dev_extent_cache);
11032 if (ret == -EAGAIN)
11036 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11037 &pending, &seen, &reada, &nodes,
11038 &extent_cache, &chunk_cache, &dev_cache,
11039 &block_group_cache, &dev_extent_cache);
11041 if (ret == -EAGAIN)
11046 ret = check_chunks(&chunk_cache, &block_group_cache,
11047 &dev_extent_cache, NULL, NULL, NULL, 0);
11049 if (ret == -EAGAIN)
11054 ret = check_extent_refs(root, &extent_cache);
11056 if (ret == -EAGAIN)
11061 ret = check_devices(&dev_cache, &dev_extent_cache);
11066 task_stop(ctx.info);
11068 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11069 extent_io_tree_cleanup(&excluded_extents);
11070 fs_info->fsck_extent_cache = NULL;
11071 fs_info->free_extent_hook = NULL;
11072 fs_info->corrupt_blocks = NULL;
11073 fs_info->excluded_extents = NULL;
11076 free_chunk_cache_tree(&chunk_cache);
11077 free_device_cache_tree(&dev_cache);
11078 free_block_group_tree(&block_group_cache);
11079 free_device_extent_tree(&dev_extent_cache);
11080 free_extent_cache_tree(&seen);
11081 free_extent_cache_tree(&pending);
11082 free_extent_cache_tree(&reada);
11083 free_extent_cache_tree(&nodes);
11084 free_root_item_list(&normal_trees);
11085 free_root_item_list(&dropping_trees);
11088 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11089 free_extent_cache_tree(&seen);
11090 free_extent_cache_tree(&pending);
11091 free_extent_cache_tree(&reada);
11092 free_extent_cache_tree(&nodes);
11093 free_chunk_cache_tree(&chunk_cache);
11094 free_block_group_tree(&block_group_cache);
11095 free_device_cache_tree(&dev_cache);
11096 free_device_extent_tree(&dev_extent_cache);
11097 free_extent_record_cache(&extent_cache);
11098 free_root_item_list(&normal_trees);
11099 free_root_item_list(&dropping_trees);
11100 extent_io_tree_cleanup(&excluded_extents);
11105 * Check backrefs of a tree block given by @bytenr or @eb.
11107 * @root: the root containing the @bytenr or @eb
11108 * @eb: tree block extent buffer, can be NULL
11109 * @bytenr: bytenr of the tree block to search
11110 * @level: tree level of the tree block
11111 * @owner: owner of the tree block
11113 * Return >0 for any error found and output error message
11114 * Return 0 for no error found
11116 static int check_tree_block_ref(struct btrfs_root *root,
11117 struct extent_buffer *eb, u64 bytenr,
11118 int level, u64 owner)
11120 struct btrfs_key key;
11121 struct btrfs_root *extent_root = root->fs_info->extent_root;
11122 struct btrfs_path path;
11123 struct btrfs_extent_item *ei;
11124 struct btrfs_extent_inline_ref *iref;
11125 struct extent_buffer *leaf;
11131 u32 nodesize = root->fs_info->nodesize;
11134 int tree_reloc_root = 0;
11139 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11140 btrfs_header_bytenr(root->node) == bytenr)
11141 tree_reloc_root = 1;
11143 btrfs_init_path(&path);
11144 key.objectid = bytenr;
11145 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11146 key.type = BTRFS_METADATA_ITEM_KEY;
11148 key.type = BTRFS_EXTENT_ITEM_KEY;
11149 key.offset = (u64)-1;
11151 /* Search for the backref in extent tree */
11152 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11154 err |= BACKREF_MISSING;
11157 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11159 err |= BACKREF_MISSING;
11163 leaf = path.nodes[0];
11164 slot = path.slots[0];
11165 btrfs_item_key_to_cpu(leaf, &key, slot);
11167 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11169 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11170 skinny_level = (int)key.offset;
11171 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11173 struct btrfs_tree_block_info *info;
11175 info = (struct btrfs_tree_block_info *)(ei + 1);
11176 skinny_level = btrfs_tree_block_level(leaf, info);
11177 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11184 if (!(btrfs_extent_flags(leaf, ei) &
11185 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11187 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11188 key.objectid, nodesize,
11189 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11190 err = BACKREF_MISMATCH;
11192 header_gen = btrfs_header_generation(eb);
11193 extent_gen = btrfs_extent_generation(leaf, ei);
11194 if (header_gen != extent_gen) {
11196 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11197 key.objectid, nodesize, header_gen,
11199 err = BACKREF_MISMATCH;
11201 if (level != skinny_level) {
11203 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11204 key.objectid, nodesize, level, skinny_level);
11205 err = BACKREF_MISMATCH;
11207 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11209 "extent[%llu %u] is referred by other roots than %llu",
11210 key.objectid, nodesize, root->objectid);
11211 err = BACKREF_MISMATCH;
11216 * Iterate the extent/metadata item to find the exact backref
11218 item_size = btrfs_item_size_nr(leaf, slot);
11219 ptr = (unsigned long)iref;
11220 end = (unsigned long)ei + item_size;
11221 while (ptr < end) {
11222 iref = (struct btrfs_extent_inline_ref *)ptr;
11223 type = btrfs_extent_inline_ref_type(leaf, iref);
11224 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11226 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11227 (offset == root->objectid || offset == owner)) {
11229 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11231 * Backref of tree reloc root points to itself, no need
11232 * to check backref any more.
11234 if (tree_reloc_root)
11237 /* Check if the backref points to valid referencer */
11238 found_ref = !check_tree_block_ref(root, NULL,
11239 offset, level + 1, owner);
11244 ptr += btrfs_extent_inline_ref_size(type);
11248 * Inlined extent item doesn't have what we need, check
11249 * TREE_BLOCK_REF_KEY
11252 btrfs_release_path(&path);
11253 key.objectid = bytenr;
11254 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11255 key.offset = root->objectid;
11257 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11262 err |= BACKREF_MISSING;
11264 btrfs_release_path(&path);
11265 if (eb && (err & BACKREF_MISSING))
11266 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
11267 bytenr, nodesize, owner, level);
11272 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11274 * Return >0 any error found and output error message
11275 * Return 0 for no error found
11277 static int check_extent_data_item(struct btrfs_root *root,
11278 struct extent_buffer *eb, int slot)
11280 struct btrfs_file_extent_item *fi;
11281 struct btrfs_path path;
11282 struct btrfs_root *extent_root = root->fs_info->extent_root;
11283 struct btrfs_key fi_key;
11284 struct btrfs_key dbref_key;
11285 struct extent_buffer *leaf;
11286 struct btrfs_extent_item *ei;
11287 struct btrfs_extent_inline_ref *iref;
11288 struct btrfs_extent_data_ref *dref;
11291 u64 disk_num_bytes;
11292 u64 extent_num_bytes;
11299 int found_dbackref = 0;
11303 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11304 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11306 /* Nothing to check for hole and inline data extents */
11307 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11308 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11311 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11312 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11313 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11315 /* Check unaligned disk_num_bytes and num_bytes */
11316 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11318 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11319 fi_key.objectid, fi_key.offset, disk_num_bytes,
11320 root->fs_info->sectorsize);
11321 err |= BYTES_UNALIGNED;
11323 data_bytes_allocated += disk_num_bytes;
11325 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11327 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11328 fi_key.objectid, fi_key.offset, extent_num_bytes,
11329 root->fs_info->sectorsize);
11330 err |= BYTES_UNALIGNED;
11332 data_bytes_referenced += extent_num_bytes;
11334 owner = btrfs_header_owner(eb);
11336 /* Check the extent item of the file extent in extent tree */
11337 btrfs_init_path(&path);
11338 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11339 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11340 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11342 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11346 leaf = path.nodes[0];
11347 slot = path.slots[0];
11348 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11350 extent_flags = btrfs_extent_flags(leaf, ei);
11352 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11354 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11355 disk_bytenr, disk_num_bytes,
11356 BTRFS_EXTENT_FLAG_DATA);
11357 err |= BACKREF_MISMATCH;
11360 /* Check data backref inside that extent item */
11361 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11362 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11363 ptr = (unsigned long)iref;
11364 end = (unsigned long)ei + item_size;
11365 while (ptr < end) {
11366 iref = (struct btrfs_extent_inline_ref *)ptr;
11367 type = btrfs_extent_inline_ref_type(leaf, iref);
11368 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11370 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11371 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11372 if (ref_root == owner || ref_root == root->objectid)
11373 found_dbackref = 1;
11374 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11375 found_dbackref = !check_tree_block_ref(root, NULL,
11376 btrfs_extent_inline_ref_offset(leaf, iref),
11380 if (found_dbackref)
11382 ptr += btrfs_extent_inline_ref_size(type);
11385 if (!found_dbackref) {
11386 btrfs_release_path(&path);
11388 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11389 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11390 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11391 dbref_key.offset = hash_extent_data_ref(root->objectid,
11392 fi_key.objectid, fi_key.offset);
11394 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11395 &dbref_key, &path, 0, 0);
11397 found_dbackref = 1;
11401 btrfs_release_path(&path);
11404 * Neither inlined nor EXTENT_DATA_REF found, try
11405 * SHARED_DATA_REF as last chance.
11407 dbref_key.objectid = disk_bytenr;
11408 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11409 dbref_key.offset = eb->start;
11411 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11412 &dbref_key, &path, 0, 0);
11414 found_dbackref = 1;
11420 if (!found_dbackref)
11421 err |= BACKREF_MISSING;
11422 btrfs_release_path(&path);
11423 if (err & BACKREF_MISSING) {
11424 error("data extent[%llu %llu] backref lost",
11425 disk_bytenr, disk_num_bytes);
11431 * Get real tree block level for the case like shared block
11432 * Return >= 0 as tree level
11433 * Return <0 for error
11435 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11437 struct extent_buffer *eb;
11438 struct btrfs_path path;
11439 struct btrfs_key key;
11440 struct btrfs_extent_item *ei;
11447 /* Search extent tree for extent generation and level */
11448 key.objectid = bytenr;
11449 key.type = BTRFS_METADATA_ITEM_KEY;
11450 key.offset = (u64)-1;
11452 btrfs_init_path(&path);
11453 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11456 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11464 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11465 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11466 struct btrfs_extent_item);
11467 flags = btrfs_extent_flags(path.nodes[0], ei);
11468 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11473 /* Get transid for later read_tree_block() check */
11474 transid = btrfs_extent_generation(path.nodes[0], ei);
11476 /* Get backref level as one source */
11477 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11478 backref_level = key.offset;
11480 struct btrfs_tree_block_info *info;
11482 info = (struct btrfs_tree_block_info *)(ei + 1);
11483 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11485 btrfs_release_path(&path);
11487 /* Get level from tree block as an alternative source */
11488 eb = read_tree_block(fs_info, bytenr, transid);
11489 if (!extent_buffer_uptodate(eb)) {
11490 free_extent_buffer(eb);
11493 header_level = btrfs_header_level(eb);
11494 free_extent_buffer(eb);
11496 if (header_level != backref_level)
11498 return header_level;
11501 btrfs_release_path(&path);
11506 * Check if a tree block backref is valid (points to a valid tree block)
11507 * if level == -1, level will be resolved
11508 * Return >0 for any error found and print error message
11510 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11511 u64 bytenr, int level)
11513 struct btrfs_root *root;
11514 struct btrfs_key key;
11515 struct btrfs_path path;
11516 struct extent_buffer *eb;
11517 struct extent_buffer *node;
11518 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11522 /* Query level for level == -1 special case */
11524 level = query_tree_block_level(fs_info, bytenr);
11526 err |= REFERENCER_MISSING;
11530 key.objectid = root_id;
11531 key.type = BTRFS_ROOT_ITEM_KEY;
11532 key.offset = (u64)-1;
11534 root = btrfs_read_fs_root(fs_info, &key);
11535 if (IS_ERR(root)) {
11536 err |= REFERENCER_MISSING;
11540 /* Read out the tree block to get item/node key */
11541 eb = read_tree_block(fs_info, bytenr, 0);
11542 if (!extent_buffer_uptodate(eb)) {
11543 err |= REFERENCER_MISSING;
11544 free_extent_buffer(eb);
11548 /* Empty tree, no need to check key */
11549 if (!btrfs_header_nritems(eb) && !level) {
11550 free_extent_buffer(eb);
11555 btrfs_node_key_to_cpu(eb, &key, 0);
11557 btrfs_item_key_to_cpu(eb, &key, 0);
11559 free_extent_buffer(eb);
11561 btrfs_init_path(&path);
11562 path.lowest_level = level;
11563 /* Search with the first key, to ensure we can reach it */
11564 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11566 err |= REFERENCER_MISSING;
11570 node = path.nodes[level];
11571 if (btrfs_header_bytenr(node) != bytenr) {
11573 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11574 bytenr, nodesize, bytenr,
11575 btrfs_header_bytenr(node));
11576 err |= REFERENCER_MISMATCH;
11578 if (btrfs_header_level(node) != level) {
11580 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11581 bytenr, nodesize, level,
11582 btrfs_header_level(node));
11583 err |= REFERENCER_MISMATCH;
11587 btrfs_release_path(&path);
11589 if (err & REFERENCER_MISSING) {
11591 error("extent [%llu %d] lost referencer (owner: %llu)",
11592 bytenr, nodesize, root_id);
11595 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11596 bytenr, nodesize, root_id, level);
11603 * Check if tree block @eb is tree reloc root.
11604 * Return 0 if it's not or any problem happens
11605 * Return 1 if it's a tree reloc root
11607 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11608 struct extent_buffer *eb)
11610 struct btrfs_root *tree_reloc_root;
11611 struct btrfs_key key;
11612 u64 bytenr = btrfs_header_bytenr(eb);
11613 u64 owner = btrfs_header_owner(eb);
11616 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11617 key.offset = owner;
11618 key.type = BTRFS_ROOT_ITEM_KEY;
11620 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11621 if (IS_ERR(tree_reloc_root))
11624 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11626 btrfs_free_fs_root(tree_reloc_root);
11631 * Check referencer for shared block backref
11632 * If level == -1, this function will resolve the level.
11634 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11635 u64 parent, u64 bytenr, int level)
11637 struct extent_buffer *eb;
11639 int found_parent = 0;
11642 eb = read_tree_block(fs_info, parent, 0);
11643 if (!extent_buffer_uptodate(eb))
11647 level = query_tree_block_level(fs_info, bytenr);
11651 /* It's possible it's a tree reloc root */
11652 if (parent == bytenr) {
11653 if (is_tree_reloc_root(fs_info, eb))
11658 if (level + 1 != btrfs_header_level(eb))
11661 nr = btrfs_header_nritems(eb);
11662 for (i = 0; i < nr; i++) {
11663 if (bytenr == btrfs_node_blockptr(eb, i)) {
11669 free_extent_buffer(eb);
11670 if (!found_parent) {
11672 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11673 bytenr, fs_info->nodesize, parent, level);
11674 return REFERENCER_MISSING;
11680 * Check referencer for normal (inlined) data ref
11681 * If len == 0, it will be resolved by searching in extent tree
11683 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11684 u64 root_id, u64 objectid, u64 offset,
11685 u64 bytenr, u64 len, u32 count)
11687 struct btrfs_root *root;
11688 struct btrfs_root *extent_root = fs_info->extent_root;
11689 struct btrfs_key key;
11690 struct btrfs_path path;
11691 struct extent_buffer *leaf;
11692 struct btrfs_file_extent_item *fi;
11693 u32 found_count = 0;
11698 key.objectid = bytenr;
11699 key.type = BTRFS_EXTENT_ITEM_KEY;
11700 key.offset = (u64)-1;
11702 btrfs_init_path(&path);
11703 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11706 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11709 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11710 if (key.objectid != bytenr ||
11711 key.type != BTRFS_EXTENT_ITEM_KEY)
11714 btrfs_release_path(&path);
11716 key.objectid = root_id;
11717 key.type = BTRFS_ROOT_ITEM_KEY;
11718 key.offset = (u64)-1;
11719 btrfs_init_path(&path);
11721 root = btrfs_read_fs_root(fs_info, &key);
11725 key.objectid = objectid;
11726 key.type = BTRFS_EXTENT_DATA_KEY;
11728 * It can be nasty as data backref offset is
11729 * file offset - file extent offset, which is smaller or
11730 * equal to original backref offset. The only special case is
11731 * overflow. So we need to special check and do further search.
11733 key.offset = offset & (1ULL << 63) ? 0 : offset;
11735 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11740 * Search afterwards to get correct one
11741 * NOTE: As we must do a comprehensive check on the data backref to
11742 * make sure the dref count also matches, we must iterate all file
11743 * extents for that inode.
11746 leaf = path.nodes[0];
11747 slot = path.slots[0];
11749 if (slot >= btrfs_header_nritems(leaf))
11751 btrfs_item_key_to_cpu(leaf, &key, slot);
11752 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11754 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11756 * Except normal disk bytenr and disk num bytes, we still
11757 * need to do extra check on dbackref offset as
11758 * dbackref offset = file_offset - file_extent_offset
11760 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11761 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11762 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11767 ret = btrfs_next_item(root, &path);
11772 btrfs_release_path(&path);
11773 if (found_count != count) {
11775 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11776 bytenr, len, root_id, objectid, offset, count, found_count);
11777 return REFERENCER_MISSING;
11783 * Check if the referencer of a shared data backref exists
11785 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11786 u64 parent, u64 bytenr)
11788 struct extent_buffer *eb;
11789 struct btrfs_key key;
11790 struct btrfs_file_extent_item *fi;
11792 int found_parent = 0;
11795 eb = read_tree_block(fs_info, parent, 0);
11796 if (!extent_buffer_uptodate(eb))
11799 nr = btrfs_header_nritems(eb);
11800 for (i = 0; i < nr; i++) {
11801 btrfs_item_key_to_cpu(eb, &key, i);
11802 if (key.type != BTRFS_EXTENT_DATA_KEY)
11805 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11806 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11809 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11816 free_extent_buffer(eb);
11817 if (!found_parent) {
11818 error("shared extent %llu referencer lost (parent: %llu)",
11820 return REFERENCER_MISSING;
11826 * This function will check a given extent item, including its backref and
11827 * itself (like crossing stripe boundary and type)
11829 * Since we don't use extent_record anymore, introduce new error bit
11831 static int check_extent_item(struct btrfs_fs_info *fs_info,
11832 struct extent_buffer *eb, int slot)
11834 struct btrfs_extent_item *ei;
11835 struct btrfs_extent_inline_ref *iref;
11836 struct btrfs_extent_data_ref *dref;
11840 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11841 u32 item_size = btrfs_item_size_nr(eb, slot);
11846 struct btrfs_key key;
11850 btrfs_item_key_to_cpu(eb, &key, slot);
11851 if (key.type == BTRFS_EXTENT_ITEM_KEY)
11852 bytes_used += key.offset;
11854 bytes_used += nodesize;
11856 if (item_size < sizeof(*ei)) {
11858 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11859 * old thing when on disk format is still un-determined.
11860 * No need to care about it anymore
11862 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11866 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11867 flags = btrfs_extent_flags(eb, ei);
11869 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11871 if (metadata && check_crossing_stripes(global_info, key.objectid,
11873 error("bad metadata [%llu, %llu) crossing stripe boundary",
11874 key.objectid, key.objectid + nodesize);
11875 err |= CROSSING_STRIPE_BOUNDARY;
11878 ptr = (unsigned long)(ei + 1);
11880 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11881 /* Old EXTENT_ITEM metadata */
11882 struct btrfs_tree_block_info *info;
11884 info = (struct btrfs_tree_block_info *)ptr;
11885 level = btrfs_tree_block_level(eb, info);
11886 ptr += sizeof(struct btrfs_tree_block_info);
11888 /* New METADATA_ITEM */
11889 level = key.offset;
11891 end = (unsigned long)ei + item_size;
11894 /* Reached extent item end normally */
11898 /* Beyond extent item end, wrong item size */
11900 err |= ITEM_SIZE_MISMATCH;
11901 error("extent item at bytenr %llu slot %d has wrong size",
11906 /* Now check every backref in this extent item */
11907 iref = (struct btrfs_extent_inline_ref *)ptr;
11908 type = btrfs_extent_inline_ref_type(eb, iref);
11909 offset = btrfs_extent_inline_ref_offset(eb, iref);
11911 case BTRFS_TREE_BLOCK_REF_KEY:
11912 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11916 case BTRFS_SHARED_BLOCK_REF_KEY:
11917 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11921 case BTRFS_EXTENT_DATA_REF_KEY:
11922 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11923 ret = check_extent_data_backref(fs_info,
11924 btrfs_extent_data_ref_root(eb, dref),
11925 btrfs_extent_data_ref_objectid(eb, dref),
11926 btrfs_extent_data_ref_offset(eb, dref),
11927 key.objectid, key.offset,
11928 btrfs_extent_data_ref_count(eb, dref));
11931 case BTRFS_SHARED_DATA_REF_KEY:
11932 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11936 error("extent[%llu %d %llu] has unknown ref type: %d",
11937 key.objectid, key.type, key.offset, type);
11938 err |= UNKNOWN_TYPE;
11942 ptr += btrfs_extent_inline_ref_size(type);
11950 * Check if a dev extent item is referred correctly by its chunk
11952 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11953 struct extent_buffer *eb, int slot)
11955 struct btrfs_root *chunk_root = fs_info->chunk_root;
11956 struct btrfs_dev_extent *ptr;
11957 struct btrfs_path path;
11958 struct btrfs_key chunk_key;
11959 struct btrfs_key devext_key;
11960 struct btrfs_chunk *chunk;
11961 struct extent_buffer *l;
11965 int found_chunk = 0;
11968 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11969 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11970 length = btrfs_dev_extent_length(eb, ptr);
11972 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11973 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11974 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11976 btrfs_init_path(&path);
11977 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11982 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11983 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11988 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11991 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11992 for (i = 0; i < num_stripes; i++) {
11993 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11994 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11996 if (devid == devext_key.objectid &&
11997 offset == devext_key.offset) {
12003 btrfs_release_path(&path);
12004 if (!found_chunk) {
12006 "device extent[%llu, %llu, %llu] did not find the related chunk",
12007 devext_key.objectid, devext_key.offset, length);
12008 return REFERENCER_MISSING;
12014 * Check if the used space is correct with the dev item
12016 static int check_dev_item(struct btrfs_fs_info *fs_info,
12017 struct extent_buffer *eb, int slot)
12019 struct btrfs_root *dev_root = fs_info->dev_root;
12020 struct btrfs_dev_item *dev_item;
12021 struct btrfs_path path;
12022 struct btrfs_key key;
12023 struct btrfs_dev_extent *ptr;
12029 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12030 dev_id = btrfs_device_id(eb, dev_item);
12031 used = btrfs_device_bytes_used(eb, dev_item);
12033 key.objectid = dev_id;
12034 key.type = BTRFS_DEV_EXTENT_KEY;
12037 btrfs_init_path(&path);
12038 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12040 btrfs_item_key_to_cpu(eb, &key, slot);
12041 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12042 key.objectid, key.type, key.offset);
12043 btrfs_release_path(&path);
12044 return REFERENCER_MISSING;
12047 /* Iterate dev_extents to calculate the used space of a device */
12049 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12052 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12053 if (key.objectid > dev_id)
12055 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12058 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12059 struct btrfs_dev_extent);
12060 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12062 ret = btrfs_next_item(dev_root, &path);
12066 btrfs_release_path(&path);
12068 if (used != total) {
12069 btrfs_item_key_to_cpu(eb, &key, slot);
12071 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12072 total, used, BTRFS_ROOT_TREE_OBJECTID,
12073 BTRFS_DEV_EXTENT_KEY, dev_id);
12074 return ACCOUNTING_MISMATCH;
12080 * Check a block group item with its referener (chunk) and its used space
12081 * with extent/metadata item
12083 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12084 struct extent_buffer *eb, int slot)
12086 struct btrfs_root *extent_root = fs_info->extent_root;
12087 struct btrfs_root *chunk_root = fs_info->chunk_root;
12088 struct btrfs_block_group_item *bi;
12089 struct btrfs_block_group_item bg_item;
12090 struct btrfs_path path;
12091 struct btrfs_key bg_key;
12092 struct btrfs_key chunk_key;
12093 struct btrfs_key extent_key;
12094 struct btrfs_chunk *chunk;
12095 struct extent_buffer *leaf;
12096 struct btrfs_extent_item *ei;
12097 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12105 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12106 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12107 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12108 used = btrfs_block_group_used(&bg_item);
12109 bg_flags = btrfs_block_group_flags(&bg_item);
12111 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12112 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12113 chunk_key.offset = bg_key.objectid;
12115 btrfs_init_path(&path);
12116 /* Search for the referencer chunk */
12117 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12120 "block group[%llu %llu] did not find the related chunk item",
12121 bg_key.objectid, bg_key.offset);
12122 err |= REFERENCER_MISSING;
12124 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12125 struct btrfs_chunk);
12126 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12129 "block group[%llu %llu] related chunk item length does not match",
12130 bg_key.objectid, bg_key.offset);
12131 err |= REFERENCER_MISMATCH;
12134 btrfs_release_path(&path);
12136 /* Search from the block group bytenr */
12137 extent_key.objectid = bg_key.objectid;
12138 extent_key.type = 0;
12139 extent_key.offset = 0;
12141 btrfs_init_path(&path);
12142 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12146 /* Iterate extent tree to account used space */
12148 leaf = path.nodes[0];
12150 /* Search slot can point to the last item beyond leaf nritems */
12151 if (path.slots[0] >= btrfs_header_nritems(leaf))
12154 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12155 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12158 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12159 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12161 if (extent_key.objectid < bg_key.objectid)
12164 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12167 total += extent_key.offset;
12169 ei = btrfs_item_ptr(leaf, path.slots[0],
12170 struct btrfs_extent_item);
12171 flags = btrfs_extent_flags(leaf, ei);
12172 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12173 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12175 "bad extent[%llu, %llu) type mismatch with chunk",
12176 extent_key.objectid,
12177 extent_key.objectid + extent_key.offset);
12178 err |= CHUNK_TYPE_MISMATCH;
12180 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12181 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12182 BTRFS_BLOCK_GROUP_METADATA))) {
12184 "bad extent[%llu, %llu) type mismatch with chunk",
12185 extent_key.objectid,
12186 extent_key.objectid + nodesize);
12187 err |= CHUNK_TYPE_MISMATCH;
12191 ret = btrfs_next_item(extent_root, &path);
12197 btrfs_release_path(&path);
12199 if (total != used) {
12201 "block group[%llu %llu] used %llu but extent items used %llu",
12202 bg_key.objectid, bg_key.offset, used, total);
12203 err |= ACCOUNTING_MISMATCH;
12209 * Check a chunk item.
12210 * Including checking all referred dev_extents and block group
12212 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12213 struct extent_buffer *eb, int slot)
12215 struct btrfs_root *extent_root = fs_info->extent_root;
12216 struct btrfs_root *dev_root = fs_info->dev_root;
12217 struct btrfs_path path;
12218 struct btrfs_key chunk_key;
12219 struct btrfs_key bg_key;
12220 struct btrfs_key devext_key;
12221 struct btrfs_chunk *chunk;
12222 struct extent_buffer *leaf;
12223 struct btrfs_block_group_item *bi;
12224 struct btrfs_block_group_item bg_item;
12225 struct btrfs_dev_extent *ptr;
12237 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12238 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12239 length = btrfs_chunk_length(eb, chunk);
12240 chunk_end = chunk_key.offset + length;
12241 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12244 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12246 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12249 type = btrfs_chunk_type(eb, chunk);
12251 bg_key.objectid = chunk_key.offset;
12252 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12253 bg_key.offset = length;
12255 btrfs_init_path(&path);
12256 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12259 "chunk[%llu %llu) did not find the related block group item",
12260 chunk_key.offset, chunk_end);
12261 err |= REFERENCER_MISSING;
12263 leaf = path.nodes[0];
12264 bi = btrfs_item_ptr(leaf, path.slots[0],
12265 struct btrfs_block_group_item);
12266 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12268 if (btrfs_block_group_flags(&bg_item) != type) {
12270 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12271 chunk_key.offset, chunk_end, type,
12272 btrfs_block_group_flags(&bg_item));
12273 err |= REFERENCER_MISSING;
12277 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12278 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12279 for (i = 0; i < num_stripes; i++) {
12280 btrfs_release_path(&path);
12281 btrfs_init_path(&path);
12282 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12283 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12284 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12286 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12289 goto not_match_dev;
12291 leaf = path.nodes[0];
12292 ptr = btrfs_item_ptr(leaf, path.slots[0],
12293 struct btrfs_dev_extent);
12294 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12295 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12296 if (objectid != chunk_key.objectid ||
12297 offset != chunk_key.offset ||
12298 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12299 goto not_match_dev;
12302 err |= BACKREF_MISSING;
12304 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12305 chunk_key.objectid, chunk_end, i);
12308 btrfs_release_path(&path);
12314 * Main entry function to check known items and update related accounting info
12316 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
12318 struct btrfs_fs_info *fs_info = root->fs_info;
12319 struct btrfs_key key;
12322 struct btrfs_extent_data_ref *dref;
12327 btrfs_item_key_to_cpu(eb, &key, slot);
12331 case BTRFS_EXTENT_DATA_KEY:
12332 ret = check_extent_data_item(root, eb, slot);
12335 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12336 ret = check_block_group_item(fs_info, eb, slot);
12339 case BTRFS_DEV_ITEM_KEY:
12340 ret = check_dev_item(fs_info, eb, slot);
12343 case BTRFS_CHUNK_ITEM_KEY:
12344 ret = check_chunk_item(fs_info, eb, slot);
12347 case BTRFS_DEV_EXTENT_KEY:
12348 ret = check_dev_extent_item(fs_info, eb, slot);
12351 case BTRFS_EXTENT_ITEM_KEY:
12352 case BTRFS_METADATA_ITEM_KEY:
12353 ret = check_extent_item(fs_info, eb, slot);
12356 case BTRFS_EXTENT_CSUM_KEY:
12357 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12359 case BTRFS_TREE_BLOCK_REF_KEY:
12360 ret = check_tree_block_backref(fs_info, key.offset,
12364 case BTRFS_EXTENT_DATA_REF_KEY:
12365 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12366 ret = check_extent_data_backref(fs_info,
12367 btrfs_extent_data_ref_root(eb, dref),
12368 btrfs_extent_data_ref_objectid(eb, dref),
12369 btrfs_extent_data_ref_offset(eb, dref),
12371 btrfs_extent_data_ref_count(eb, dref));
12374 case BTRFS_SHARED_BLOCK_REF_KEY:
12375 ret = check_shared_block_backref(fs_info, key.offset,
12379 case BTRFS_SHARED_DATA_REF_KEY:
12380 ret = check_shared_data_backref(fs_info, key.offset,
12388 if (++slot < btrfs_header_nritems(eb))
12395 * Helper function for later fs/subvol tree check. To determine if a tree
12396 * block should be checked.
12397 * This function will ensure only the direct referencer with lowest rootid to
12398 * check a fs/subvolume tree block.
12400 * Backref check at extent tree would detect errors like missing subvolume
12401 * tree, so we can do aggressive check to reduce duplicated checks.
12403 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12405 struct btrfs_root *extent_root = root->fs_info->extent_root;
12406 struct btrfs_key key;
12407 struct btrfs_path path;
12408 struct extent_buffer *leaf;
12410 struct btrfs_extent_item *ei;
12416 struct btrfs_extent_inline_ref *iref;
12419 btrfs_init_path(&path);
12420 key.objectid = btrfs_header_bytenr(eb);
12421 key.type = BTRFS_METADATA_ITEM_KEY;
12422 key.offset = (u64)-1;
12425 * Any failure in backref resolving means we can't determine
12426 * whom the tree block belongs to.
12427 * So in that case, we need to check that tree block
12429 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12433 ret = btrfs_previous_extent_item(extent_root, &path,
12434 btrfs_header_bytenr(eb));
12438 leaf = path.nodes[0];
12439 slot = path.slots[0];
12440 btrfs_item_key_to_cpu(leaf, &key, slot);
12441 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12443 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12444 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12446 struct btrfs_tree_block_info *info;
12448 info = (struct btrfs_tree_block_info *)(ei + 1);
12449 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12452 item_size = btrfs_item_size_nr(leaf, slot);
12453 ptr = (unsigned long)iref;
12454 end = (unsigned long)ei + item_size;
12455 while (ptr < end) {
12456 iref = (struct btrfs_extent_inline_ref *)ptr;
12457 type = btrfs_extent_inline_ref_type(leaf, iref);
12458 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12461 * We only check the tree block if current root is
12462 * the lowest referencer of it.
12464 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12465 offset < root->objectid) {
12466 btrfs_release_path(&path);
12470 ptr += btrfs_extent_inline_ref_size(type);
12473 * Normally we should also check keyed tree block ref, but that may be
12474 * very time consuming. Inlined ref should already make us skip a lot
12475 * of refs now. So skip search keyed tree block ref.
12479 btrfs_release_path(&path);
12484 * Traversal function for tree block. We will do:
12485 * 1) Skip shared fs/subvolume tree blocks
12486 * 2) Update related bytes accounting
12487 * 3) Pre-order traversal
12489 static int traverse_tree_block(struct btrfs_root *root,
12490 struct extent_buffer *node)
12492 struct extent_buffer *eb;
12493 struct btrfs_key key;
12494 struct btrfs_key drop_key;
12502 * Skip shared fs/subvolume tree block, in that case they will
12503 * be checked by referencer with lowest rootid
12505 if (is_fstree(root->objectid) && !should_check(root, node))
12508 /* Update bytes accounting */
12509 total_btree_bytes += node->len;
12510 if (fs_root_objectid(btrfs_header_owner(node)))
12511 total_fs_tree_bytes += node->len;
12512 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12513 total_extent_tree_bytes += node->len;
12515 /* pre-order tranversal, check itself first */
12516 level = btrfs_header_level(node);
12517 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12518 btrfs_header_level(node),
12519 btrfs_header_owner(node));
12523 "check %s failed root %llu bytenr %llu level %d, force continue check",
12524 level ? "node":"leaf", root->objectid,
12525 btrfs_header_bytenr(node), btrfs_header_level(node));
12528 btree_space_waste += btrfs_leaf_free_space(root, node);
12529 ret = check_leaf_items(root, node);
12534 nr = btrfs_header_nritems(node);
12535 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12536 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12537 sizeof(struct btrfs_key_ptr);
12539 /* Then check all its children */
12540 for (i = 0; i < nr; i++) {
12541 u64 blocknr = btrfs_node_blockptr(node, i);
12543 btrfs_node_key_to_cpu(node, &key, i);
12544 if (level == root->root_item.drop_level &&
12545 is_dropped_key(&key, &drop_key))
12549 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12550 * to call the function itself.
12552 eb = read_tree_block(root->fs_info, blocknr, 0);
12553 if (extent_buffer_uptodate(eb)) {
12554 ret = traverse_tree_block(root, eb);
12557 free_extent_buffer(eb);
12564 * Low memory usage version check_chunks_and_extents.
12566 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12568 struct btrfs_path path;
12569 struct btrfs_key key;
12570 struct btrfs_root *root1;
12571 struct btrfs_root *root;
12572 struct btrfs_root *cur_root;
12576 root = fs_info->fs_root;
12578 root1 = root->fs_info->chunk_root;
12579 ret = traverse_tree_block(root1, root1->node);
12582 root1 = root->fs_info->tree_root;
12583 ret = traverse_tree_block(root1, root1->node);
12586 btrfs_init_path(&path);
12587 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12589 key.type = BTRFS_ROOT_ITEM_KEY;
12591 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12593 error("cannot find extent treet in tree_root");
12598 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12599 if (key.type != BTRFS_ROOT_ITEM_KEY)
12601 key.offset = (u64)-1;
12603 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12604 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12607 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12608 if (IS_ERR(cur_root) || !cur_root) {
12609 error("failed to read tree: %lld", key.objectid);
12613 ret = traverse_tree_block(cur_root, cur_root->node);
12616 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12617 btrfs_free_fs_root(cur_root);
12619 ret = btrfs_next_item(root1, &path);
12625 btrfs_release_path(&path);
12629 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12633 if (!ctx.progress_enabled)
12634 fprintf(stderr, "checking extents\n");
12635 if (check_mode == CHECK_MODE_LOWMEM)
12636 ret = check_chunks_and_extents_v2(fs_info);
12638 ret = check_chunks_and_extents(fs_info);
12643 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12644 struct btrfs_root *root, int overwrite)
12646 struct extent_buffer *c;
12647 struct extent_buffer *old = root->node;
12650 struct btrfs_disk_key disk_key = {0,0,0};
12656 extent_buffer_get(c);
12659 c = btrfs_alloc_free_block(trans, root,
12660 root->fs_info->nodesize,
12661 root->root_key.objectid,
12662 &disk_key, level, 0, 0);
12665 extent_buffer_get(c);
12669 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12670 btrfs_set_header_level(c, level);
12671 btrfs_set_header_bytenr(c, c->start);
12672 btrfs_set_header_generation(c, trans->transid);
12673 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12674 btrfs_set_header_owner(c, root->root_key.objectid);
12676 write_extent_buffer(c, root->fs_info->fsid,
12677 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12679 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12680 btrfs_header_chunk_tree_uuid(c),
12683 btrfs_mark_buffer_dirty(c);
12685 * this case can happen in the following case:
12687 * 1.overwrite previous root.
12689 * 2.reinit reloc data root, this is because we skip pin
12690 * down reloc data tree before which means we can allocate
12691 * same block bytenr here.
12693 if (old->start == c->start) {
12694 btrfs_set_root_generation(&root->root_item,
12696 root->root_item.level = btrfs_header_level(root->node);
12697 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12698 &root->root_key, &root->root_item);
12700 free_extent_buffer(c);
12704 free_extent_buffer(old);
12706 add_root_to_dirty_list(root);
12710 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12711 struct extent_buffer *eb, int tree_root)
12713 struct extent_buffer *tmp;
12714 struct btrfs_root_item *ri;
12715 struct btrfs_key key;
12717 int level = btrfs_header_level(eb);
12723 * If we have pinned this block before, don't pin it again.
12724 * This can not only avoid forever loop with broken filesystem
12725 * but also give us some speedups.
12727 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12728 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12731 btrfs_pin_extent(fs_info, eb->start, eb->len);
12733 nritems = btrfs_header_nritems(eb);
12734 for (i = 0; i < nritems; i++) {
12736 btrfs_item_key_to_cpu(eb, &key, i);
12737 if (key.type != BTRFS_ROOT_ITEM_KEY)
12739 /* Skip the extent root and reloc roots */
12740 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12741 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12742 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12744 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12745 bytenr = btrfs_disk_root_bytenr(eb, ri);
12748 * If at any point we start needing the real root we
12749 * will have to build a stump root for the root we are
12750 * in, but for now this doesn't actually use the root so
12751 * just pass in extent_root.
12753 tmp = read_tree_block(fs_info, bytenr, 0);
12754 if (!extent_buffer_uptodate(tmp)) {
12755 fprintf(stderr, "Error reading root block\n");
12758 ret = pin_down_tree_blocks(fs_info, tmp, 0);
12759 free_extent_buffer(tmp);
12763 bytenr = btrfs_node_blockptr(eb, i);
12765 /* If we aren't the tree root don't read the block */
12766 if (level == 1 && !tree_root) {
12767 btrfs_pin_extent(fs_info, bytenr,
12768 fs_info->nodesize);
12772 tmp = read_tree_block(fs_info, bytenr, 0);
12773 if (!extent_buffer_uptodate(tmp)) {
12774 fprintf(stderr, "Error reading tree block\n");
12777 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12778 free_extent_buffer(tmp);
12787 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12791 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12795 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12798 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12800 struct btrfs_block_group_cache *cache;
12801 struct btrfs_path path;
12802 struct extent_buffer *leaf;
12803 struct btrfs_chunk *chunk;
12804 struct btrfs_key key;
12808 btrfs_init_path(&path);
12810 key.type = BTRFS_CHUNK_ITEM_KEY;
12812 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12814 btrfs_release_path(&path);
12819 * We do this in case the block groups were screwed up and had alloc
12820 * bits that aren't actually set on the chunks. This happens with
12821 * restored images every time and could happen in real life I guess.
12823 fs_info->avail_data_alloc_bits = 0;
12824 fs_info->avail_metadata_alloc_bits = 0;
12825 fs_info->avail_system_alloc_bits = 0;
12827 /* First we need to create the in-memory block groups */
12829 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12830 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12832 btrfs_release_path(&path);
12840 leaf = path.nodes[0];
12841 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12842 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12847 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12848 btrfs_add_block_group(fs_info, 0,
12849 btrfs_chunk_type(leaf, chunk),
12850 key.objectid, key.offset,
12851 btrfs_chunk_length(leaf, chunk));
12852 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12853 key.offset + btrfs_chunk_length(leaf, chunk));
12858 cache = btrfs_lookup_first_block_group(fs_info, start);
12862 start = cache->key.objectid + cache->key.offset;
12865 btrfs_release_path(&path);
12869 static int reset_balance(struct btrfs_trans_handle *trans,
12870 struct btrfs_fs_info *fs_info)
12872 struct btrfs_root *root = fs_info->tree_root;
12873 struct btrfs_path path;
12874 struct extent_buffer *leaf;
12875 struct btrfs_key key;
12876 int del_slot, del_nr = 0;
12880 btrfs_init_path(&path);
12881 key.objectid = BTRFS_BALANCE_OBJECTID;
12882 key.type = BTRFS_BALANCE_ITEM_KEY;
12884 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12889 goto reinit_data_reloc;
12894 ret = btrfs_del_item(trans, root, &path);
12897 btrfs_release_path(&path);
12899 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12900 key.type = BTRFS_ROOT_ITEM_KEY;
12902 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12906 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12911 ret = btrfs_del_items(trans, root, &path,
12918 btrfs_release_path(&path);
12921 ret = btrfs_search_slot(trans, root, &key, &path,
12928 leaf = path.nodes[0];
12929 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12930 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12932 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12937 del_slot = path.slots[0];
12946 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12950 btrfs_release_path(&path);
12953 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12954 key.type = BTRFS_ROOT_ITEM_KEY;
12955 key.offset = (u64)-1;
12956 root = btrfs_read_fs_root(fs_info, &key);
12957 if (IS_ERR(root)) {
12958 fprintf(stderr, "Error reading data reloc tree\n");
12959 ret = PTR_ERR(root);
12962 record_root_in_trans(trans, root);
12963 ret = btrfs_fsck_reinit_root(trans, root, 0);
12966 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12968 btrfs_release_path(&path);
12972 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12973 struct btrfs_fs_info *fs_info)
12979 * The only reason we don't do this is because right now we're just
12980 * walking the trees we find and pinning down their bytes, we don't look
12981 * at any of the leaves. In order to do mixed groups we'd have to check
12982 * the leaves of any fs roots and pin down the bytes for any file
12983 * extents we find. Not hard but why do it if we don't have to?
12985 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12986 fprintf(stderr, "We don't support re-initing the extent tree "
12987 "for mixed block groups yet, please notify a btrfs "
12988 "developer you want to do this so they can add this "
12989 "functionality.\n");
12994 * first we need to walk all of the trees except the extent tree and pin
12995 * down the bytes that are in use so we don't overwrite any existing
12998 ret = pin_metadata_blocks(fs_info);
13000 fprintf(stderr, "error pinning down used bytes\n");
13005 * Need to drop all the block groups since we're going to recreate all
13008 btrfs_free_block_groups(fs_info);
13009 ret = reset_block_groups(fs_info);
13011 fprintf(stderr, "error resetting the block groups\n");
13015 /* Ok we can allocate now, reinit the extent root */
13016 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13018 fprintf(stderr, "extent root initialization failed\n");
13020 * When the transaction code is updated we should end the
13021 * transaction, but for now progs only knows about commit so
13022 * just return an error.
13028 * Now we have all the in-memory block groups setup so we can make
13029 * allocations properly, and the metadata we care about is safe since we
13030 * pinned all of it above.
13033 struct btrfs_block_group_cache *cache;
13035 cache = btrfs_lookup_first_block_group(fs_info, start);
13038 start = cache->key.objectid + cache->key.offset;
13039 ret = btrfs_insert_item(trans, fs_info->extent_root,
13040 &cache->key, &cache->item,
13041 sizeof(cache->item));
13043 fprintf(stderr, "Error adding block group\n");
13046 btrfs_extent_post_op(trans, fs_info->extent_root);
13049 ret = reset_balance(trans, fs_info);
13051 fprintf(stderr, "error resetting the pending balance\n");
13056 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13058 struct btrfs_path path;
13059 struct btrfs_trans_handle *trans;
13060 struct btrfs_key key;
13063 printf("Recowing metadata block %llu\n", eb->start);
13064 key.objectid = btrfs_header_owner(eb);
13065 key.type = BTRFS_ROOT_ITEM_KEY;
13066 key.offset = (u64)-1;
13068 root = btrfs_read_fs_root(root->fs_info, &key);
13069 if (IS_ERR(root)) {
13070 fprintf(stderr, "Couldn't find owner root %llu\n",
13072 return PTR_ERR(root);
13075 trans = btrfs_start_transaction(root, 1);
13077 return PTR_ERR(trans);
13079 btrfs_init_path(&path);
13080 path.lowest_level = btrfs_header_level(eb);
13081 if (path.lowest_level)
13082 btrfs_node_key_to_cpu(eb, &key, 0);
13084 btrfs_item_key_to_cpu(eb, &key, 0);
13086 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13087 btrfs_commit_transaction(trans, root);
13088 btrfs_release_path(&path);
13092 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13094 struct btrfs_path path;
13095 struct btrfs_trans_handle *trans;
13096 struct btrfs_key key;
13099 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13100 bad->key.type, bad->key.offset);
13101 key.objectid = bad->root_id;
13102 key.type = BTRFS_ROOT_ITEM_KEY;
13103 key.offset = (u64)-1;
13105 root = btrfs_read_fs_root(root->fs_info, &key);
13106 if (IS_ERR(root)) {
13107 fprintf(stderr, "Couldn't find owner root %llu\n",
13109 return PTR_ERR(root);
13112 trans = btrfs_start_transaction(root, 1);
13114 return PTR_ERR(trans);
13116 btrfs_init_path(&path);
13117 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13123 ret = btrfs_del_item(trans, root, &path);
13125 btrfs_commit_transaction(trans, root);
13126 btrfs_release_path(&path);
13130 static int zero_log_tree(struct btrfs_root *root)
13132 struct btrfs_trans_handle *trans;
13135 trans = btrfs_start_transaction(root, 1);
13136 if (IS_ERR(trans)) {
13137 ret = PTR_ERR(trans);
13140 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13141 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13142 ret = btrfs_commit_transaction(trans, root);
13146 static int populate_csum(struct btrfs_trans_handle *trans,
13147 struct btrfs_root *csum_root, char *buf, u64 start,
13150 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13155 while (offset < len) {
13156 sectorsize = fs_info->sectorsize;
13157 ret = read_extent_data(fs_info, buf, start + offset,
13161 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13162 start + offset, buf, sectorsize);
13165 offset += sectorsize;
13170 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13171 struct btrfs_root *csum_root,
13172 struct btrfs_root *cur_root)
13174 struct btrfs_path path;
13175 struct btrfs_key key;
13176 struct extent_buffer *node;
13177 struct btrfs_file_extent_item *fi;
13184 buf = malloc(cur_root->fs_info->sectorsize);
13188 btrfs_init_path(&path);
13192 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13195 /* Iterate all regular file extents and fill its csum */
13197 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13199 if (key.type != BTRFS_EXTENT_DATA_KEY)
13201 node = path.nodes[0];
13202 slot = path.slots[0];
13203 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13204 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13206 start = btrfs_file_extent_disk_bytenr(node, fi);
13207 len = btrfs_file_extent_disk_num_bytes(node, fi);
13209 ret = populate_csum(trans, csum_root, buf, start, len);
13210 if (ret == -EEXIST)
13216 * TODO: if next leaf is corrupted, jump to nearest next valid
13219 ret = btrfs_next_item(cur_root, &path);
13229 btrfs_release_path(&path);
13234 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13235 struct btrfs_root *csum_root)
13237 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13238 struct btrfs_path path;
13239 struct btrfs_root *tree_root = fs_info->tree_root;
13240 struct btrfs_root *cur_root;
13241 struct extent_buffer *node;
13242 struct btrfs_key key;
13246 btrfs_init_path(&path);
13247 key.objectid = BTRFS_FS_TREE_OBJECTID;
13249 key.type = BTRFS_ROOT_ITEM_KEY;
13250 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13259 node = path.nodes[0];
13260 slot = path.slots[0];
13261 btrfs_item_key_to_cpu(node, &key, slot);
13262 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13264 if (key.type != BTRFS_ROOT_ITEM_KEY)
13266 if (!is_fstree(key.objectid))
13268 key.offset = (u64)-1;
13270 cur_root = btrfs_read_fs_root(fs_info, &key);
13271 if (IS_ERR(cur_root) || !cur_root) {
13272 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13276 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13281 ret = btrfs_next_item(tree_root, &path);
13291 btrfs_release_path(&path);
13295 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13296 struct btrfs_root *csum_root)
13298 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13299 struct btrfs_path path;
13300 struct btrfs_extent_item *ei;
13301 struct extent_buffer *leaf;
13303 struct btrfs_key key;
13306 btrfs_init_path(&path);
13308 key.type = BTRFS_EXTENT_ITEM_KEY;
13310 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13312 btrfs_release_path(&path);
13316 buf = malloc(csum_root->fs_info->sectorsize);
13318 btrfs_release_path(&path);
13323 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13324 ret = btrfs_next_leaf(extent_root, &path);
13332 leaf = path.nodes[0];
13334 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13335 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13340 ei = btrfs_item_ptr(leaf, path.slots[0],
13341 struct btrfs_extent_item);
13342 if (!(btrfs_extent_flags(leaf, ei) &
13343 BTRFS_EXTENT_FLAG_DATA)) {
13348 ret = populate_csum(trans, csum_root, buf, key.objectid,
13355 btrfs_release_path(&path);
13361 * Recalculate the csum and put it into the csum tree.
13363 * Extent tree init will wipe out all the extent info, so in that case, we
13364 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13365 * will use fs/subvol trees to init the csum tree.
13367 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13368 struct btrfs_root *csum_root,
13369 int search_fs_tree)
13371 if (search_fs_tree)
13372 return fill_csum_tree_from_fs(trans, csum_root);
13374 return fill_csum_tree_from_extent(trans, csum_root);
13377 static void free_roots_info_cache(void)
13379 if (!roots_info_cache)
13382 while (!cache_tree_empty(roots_info_cache)) {
13383 struct cache_extent *entry;
13384 struct root_item_info *rii;
13386 entry = first_cache_extent(roots_info_cache);
13389 remove_cache_extent(roots_info_cache, entry);
13390 rii = container_of(entry, struct root_item_info, cache_extent);
13394 free(roots_info_cache);
13395 roots_info_cache = NULL;
13398 static int build_roots_info_cache(struct btrfs_fs_info *info)
13401 struct btrfs_key key;
13402 struct extent_buffer *leaf;
13403 struct btrfs_path path;
13405 if (!roots_info_cache) {
13406 roots_info_cache = malloc(sizeof(*roots_info_cache));
13407 if (!roots_info_cache)
13409 cache_tree_init(roots_info_cache);
13412 btrfs_init_path(&path);
13414 key.type = BTRFS_EXTENT_ITEM_KEY;
13416 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13419 leaf = path.nodes[0];
13422 struct btrfs_key found_key;
13423 struct btrfs_extent_item *ei;
13424 struct btrfs_extent_inline_ref *iref;
13425 int slot = path.slots[0];
13430 struct cache_extent *entry;
13431 struct root_item_info *rii;
13433 if (slot >= btrfs_header_nritems(leaf)) {
13434 ret = btrfs_next_leaf(info->extent_root, &path);
13441 leaf = path.nodes[0];
13442 slot = path.slots[0];
13445 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13447 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13448 found_key.type != BTRFS_METADATA_ITEM_KEY)
13451 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13452 flags = btrfs_extent_flags(leaf, ei);
13454 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13455 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13458 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13459 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13460 level = found_key.offset;
13462 struct btrfs_tree_block_info *binfo;
13464 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13465 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13466 level = btrfs_tree_block_level(leaf, binfo);
13470 * For a root extent, it must be of the following type and the
13471 * first (and only one) iref in the item.
13473 type = btrfs_extent_inline_ref_type(leaf, iref);
13474 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13477 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13478 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13480 rii = malloc(sizeof(struct root_item_info));
13485 rii->cache_extent.start = root_id;
13486 rii->cache_extent.size = 1;
13487 rii->level = (u8)-1;
13488 entry = &rii->cache_extent;
13489 ret = insert_cache_extent(roots_info_cache, entry);
13492 rii = container_of(entry, struct root_item_info,
13496 ASSERT(rii->cache_extent.start == root_id);
13497 ASSERT(rii->cache_extent.size == 1);
13499 if (level > rii->level || rii->level == (u8)-1) {
13500 rii->level = level;
13501 rii->bytenr = found_key.objectid;
13502 rii->gen = btrfs_extent_generation(leaf, ei);
13503 rii->node_count = 1;
13504 } else if (level == rii->level) {
13512 btrfs_release_path(&path);
13517 static int maybe_repair_root_item(struct btrfs_path *path,
13518 const struct btrfs_key *root_key,
13519 const int read_only_mode)
13521 const u64 root_id = root_key->objectid;
13522 struct cache_extent *entry;
13523 struct root_item_info *rii;
13524 struct btrfs_root_item ri;
13525 unsigned long offset;
13527 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13530 "Error: could not find extent items for root %llu\n",
13531 root_key->objectid);
13535 rii = container_of(entry, struct root_item_info, cache_extent);
13536 ASSERT(rii->cache_extent.start == root_id);
13537 ASSERT(rii->cache_extent.size == 1);
13539 if (rii->node_count != 1) {
13541 "Error: could not find btree root extent for root %llu\n",
13546 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13547 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13549 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13550 btrfs_root_level(&ri) != rii->level ||
13551 btrfs_root_generation(&ri) != rii->gen) {
13554 * If we're in repair mode but our caller told us to not update
13555 * the root item, i.e. just check if it needs to be updated, don't
13556 * print this message, since the caller will call us again shortly
13557 * for the same root item without read only mode (the caller will
13558 * open a transaction first).
13560 if (!(read_only_mode && repair))
13562 "%sroot item for root %llu,"
13563 " current bytenr %llu, current gen %llu, current level %u,"
13564 " new bytenr %llu, new gen %llu, new level %u\n",
13565 (read_only_mode ? "" : "fixing "),
13567 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13568 btrfs_root_level(&ri),
13569 rii->bytenr, rii->gen, rii->level);
13571 if (btrfs_root_generation(&ri) > rii->gen) {
13573 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13574 root_id, btrfs_root_generation(&ri), rii->gen);
13578 if (!read_only_mode) {
13579 btrfs_set_root_bytenr(&ri, rii->bytenr);
13580 btrfs_set_root_level(&ri, rii->level);
13581 btrfs_set_root_generation(&ri, rii->gen);
13582 write_extent_buffer(path->nodes[0], &ri,
13583 offset, sizeof(ri));
13593 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13594 * caused read-only snapshots to be corrupted if they were created at a moment
13595 * when the source subvolume/snapshot had orphan items. The issue was that the
13596 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13597 * node instead of the post orphan cleanup root node.
13598 * So this function, and its callees, just detects and fixes those cases. Even
13599 * though the regression was for read-only snapshots, this function applies to
13600 * any snapshot/subvolume root.
13601 * This must be run before any other repair code - not doing it so, makes other
13602 * repair code delete or modify backrefs in the extent tree for example, which
13603 * will result in an inconsistent fs after repairing the root items.
13605 static int repair_root_items(struct btrfs_fs_info *info)
13607 struct btrfs_path path;
13608 struct btrfs_key key;
13609 struct extent_buffer *leaf;
13610 struct btrfs_trans_handle *trans = NULL;
13613 int need_trans = 0;
13615 btrfs_init_path(&path);
13617 ret = build_roots_info_cache(info);
13621 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13622 key.type = BTRFS_ROOT_ITEM_KEY;
13627 * Avoid opening and committing transactions if a leaf doesn't have
13628 * any root items that need to be fixed, so that we avoid rotating
13629 * backup roots unnecessarily.
13632 trans = btrfs_start_transaction(info->tree_root, 1);
13633 if (IS_ERR(trans)) {
13634 ret = PTR_ERR(trans);
13639 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13643 leaf = path.nodes[0];
13646 struct btrfs_key found_key;
13648 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13649 int no_more_keys = find_next_key(&path, &key);
13651 btrfs_release_path(&path);
13653 ret = btrfs_commit_transaction(trans,
13665 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13667 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13669 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13672 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13676 if (!trans && repair) {
13679 btrfs_release_path(&path);
13689 free_roots_info_cache();
13690 btrfs_release_path(&path);
13692 btrfs_commit_transaction(trans, info->tree_root);
13699 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13701 struct btrfs_trans_handle *trans;
13702 struct btrfs_block_group_cache *bg_cache;
13706 /* Clear all free space cache inodes and its extent data */
13708 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13711 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13714 current = bg_cache->key.objectid + bg_cache->key.offset;
13717 /* Don't forget to set cache_generation to -1 */
13718 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13719 if (IS_ERR(trans)) {
13720 error("failed to update super block cache generation");
13721 return PTR_ERR(trans);
13723 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13724 btrfs_commit_transaction(trans, fs_info->tree_root);
13729 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13734 if (clear_version == 1) {
13735 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13737 "free space cache v2 detected, use --clear-space-cache v2");
13741 printf("Clearing free space cache\n");
13742 ret = clear_free_space_cache(fs_info);
13744 error("failed to clear free space cache");
13747 printf("Free space cache cleared\n");
13749 } else if (clear_version == 2) {
13750 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13751 printf("no free space cache v2 to clear\n");
13755 printf("Clear free space cache v2\n");
13756 ret = btrfs_clear_free_space_tree(fs_info);
13758 error("failed to clear free space cache v2: %d", ret);
13761 printf("free space cache v2 cleared\n");
13768 const char * const cmd_check_usage[] = {
13769 "btrfs check [options] <device>",
13770 "Check structural integrity of a filesystem (unmounted).",
13771 "Check structural integrity of an unmounted filesystem. Verify internal",
13772 "trees' consistency and item connectivity. In the repair mode try to",
13773 "fix the problems found. ",
13774 "WARNING: the repair mode is considered dangerous",
13776 "-s|--super <superblock> use this superblock copy",
13777 "-b|--backup use the first valid backup root copy",
13778 "--force skip mount checks, repair is not possible",
13779 "--repair try to repair the filesystem",
13780 "--readonly run in read-only mode (default)",
13781 "--init-csum-tree create a new CRC tree",
13782 "--init-extent-tree create a new extent tree",
13783 "--mode <MODE> allows choice of memory/IO trade-offs",
13784 " where MODE is one of:",
13785 " original - read inodes and extents to memory (requires",
13786 " more memory, does less IO)",
13787 " lowmem - try to use less memory but read blocks again",
13789 "--check-data-csum verify checksums of data blocks",
13790 "-Q|--qgroup-report print a report on qgroup consistency",
13791 "-E|--subvol-extents <subvolid>",
13792 " print subvolume extents and sharing state",
13793 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
13794 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
13795 "-p|--progress indicate progress",
13796 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
13800 int cmd_check(int argc, char **argv)
13802 struct cache_tree root_cache;
13803 struct btrfs_root *root;
13804 struct btrfs_fs_info *info;
13807 u64 tree_root_bytenr = 0;
13808 u64 chunk_root_bytenr = 0;
13809 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13813 int init_csum_tree = 0;
13815 int clear_space_cache = 0;
13816 int qgroup_report = 0;
13817 int qgroups_repaired = 0;
13818 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13823 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13824 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13825 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13826 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13827 GETOPT_VAL_FORCE };
13828 static const struct option long_options[] = {
13829 { "super", required_argument, NULL, 's' },
13830 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13831 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13832 { "init-csum-tree", no_argument, NULL,
13833 GETOPT_VAL_INIT_CSUM },
13834 { "init-extent-tree", no_argument, NULL,
13835 GETOPT_VAL_INIT_EXTENT },
13836 { "check-data-csum", no_argument, NULL,
13837 GETOPT_VAL_CHECK_CSUM },
13838 { "backup", no_argument, NULL, 'b' },
13839 { "subvol-extents", required_argument, NULL, 'E' },
13840 { "qgroup-report", no_argument, NULL, 'Q' },
13841 { "tree-root", required_argument, NULL, 'r' },
13842 { "chunk-root", required_argument, NULL,
13843 GETOPT_VAL_CHUNK_TREE },
13844 { "progress", no_argument, NULL, 'p' },
13845 { "mode", required_argument, NULL,
13847 { "clear-space-cache", required_argument, NULL,
13848 GETOPT_VAL_CLEAR_SPACE_CACHE},
13849 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13850 { NULL, 0, NULL, 0}
13853 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13857 case 'a': /* ignored */ break;
13859 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13862 num = arg_strtou64(optarg);
13863 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13865 "super mirror should be less than %d",
13866 BTRFS_SUPER_MIRROR_MAX);
13869 bytenr = btrfs_sb_offset(((int)num));
13870 printf("using SB copy %llu, bytenr %llu\n", num,
13871 (unsigned long long)bytenr);
13877 subvolid = arg_strtou64(optarg);
13880 tree_root_bytenr = arg_strtou64(optarg);
13882 case GETOPT_VAL_CHUNK_TREE:
13883 chunk_root_bytenr = arg_strtou64(optarg);
13886 ctx.progress_enabled = true;
13890 usage(cmd_check_usage);
13891 case GETOPT_VAL_REPAIR:
13892 printf("enabling repair mode\n");
13894 ctree_flags |= OPEN_CTREE_WRITES;
13896 case GETOPT_VAL_READONLY:
13899 case GETOPT_VAL_INIT_CSUM:
13900 printf("Creating a new CRC tree\n");
13901 init_csum_tree = 1;
13903 ctree_flags |= OPEN_CTREE_WRITES;
13905 case GETOPT_VAL_INIT_EXTENT:
13906 init_extent_tree = 1;
13907 ctree_flags |= (OPEN_CTREE_WRITES |
13908 OPEN_CTREE_NO_BLOCK_GROUPS);
13911 case GETOPT_VAL_CHECK_CSUM:
13912 check_data_csum = 1;
13914 case GETOPT_VAL_MODE:
13915 check_mode = parse_check_mode(optarg);
13916 if (check_mode == CHECK_MODE_UNKNOWN) {
13917 error("unknown mode: %s", optarg);
13921 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13922 if (strcmp(optarg, "v1") == 0) {
13923 clear_space_cache = 1;
13924 } else if (strcmp(optarg, "v2") == 0) {
13925 clear_space_cache = 2;
13926 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13929 "invalid argument to --clear-space-cache, must be v1 or v2");
13932 ctree_flags |= OPEN_CTREE_WRITES;
13934 case GETOPT_VAL_FORCE:
13940 if (check_argc_exact(argc - optind, 1))
13941 usage(cmd_check_usage);
13943 if (ctx.progress_enabled) {
13944 ctx.tp = TASK_NOTHING;
13945 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13948 /* This check is the only reason for --readonly to exist */
13949 if (readonly && repair) {
13950 error("repair options are not compatible with --readonly");
13955 * experimental and dangerous
13957 if (repair && check_mode == CHECK_MODE_LOWMEM)
13958 warning("low-memory mode repair support is only partial");
13961 cache_tree_init(&root_cache);
13963 ret = check_mounted(argv[optind]);
13966 error("could not check mount status: %s",
13972 "%s is currently mounted, use --force if you really intend to check the filesystem",
13980 error("repair and --force is not yet supported");
13987 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13991 "filesystem mounted, continuing because of --force");
13993 /* A block device is mounted in exclusive mode by kernel */
13994 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13997 /* only allow partial opening under repair mode */
13999 ctree_flags |= OPEN_CTREE_PARTIAL;
14001 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14002 chunk_root_bytenr, ctree_flags);
14004 error("cannot open file system");
14010 global_info = info;
14011 root = info->fs_root;
14012 uuid_unparse(info->super_copy->fsid, uuidbuf);
14014 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14017 * Check the bare minimum before starting anything else that could rely
14018 * on it, namely the tree roots, any local consistency checks
14020 if (!extent_buffer_uptodate(info->tree_root->node) ||
14021 !extent_buffer_uptodate(info->dev_root->node) ||
14022 !extent_buffer_uptodate(info->chunk_root->node)) {
14023 error("critical roots corrupted, unable to check the filesystem");
14029 if (clear_space_cache) {
14030 ret = do_clear_free_space_cache(info, clear_space_cache);
14036 * repair mode will force us to commit transaction which
14037 * will make us fail to load log tree when mounting.
14039 if (repair && btrfs_super_log_root(info->super_copy)) {
14040 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14046 ret = zero_log_tree(root);
14049 error("failed to zero log tree: %d", ret);
14054 if (qgroup_report) {
14055 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14057 ret = qgroup_verify_all(info);
14064 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14065 subvolid, argv[optind], uuidbuf);
14066 ret = print_extent_state(info, subvolid);
14071 if (init_extent_tree || init_csum_tree) {
14072 struct btrfs_trans_handle *trans;
14074 trans = btrfs_start_transaction(info->extent_root, 0);
14075 if (IS_ERR(trans)) {
14076 error("error starting transaction");
14077 ret = PTR_ERR(trans);
14082 if (init_extent_tree) {
14083 printf("Creating a new extent tree\n");
14084 ret = reinit_extent_tree(trans, info);
14090 if (init_csum_tree) {
14091 printf("Reinitialize checksum tree\n");
14092 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14094 error("checksum tree initialization failed: %d",
14101 ret = fill_csum_tree(trans, info->csum_root,
14105 error("checksum tree refilling failed: %d", ret);
14110 * Ok now we commit and run the normal fsck, which will add
14111 * extent entries for all of the items it finds.
14113 ret = btrfs_commit_transaction(trans, info->extent_root);
14118 if (!extent_buffer_uptodate(info->extent_root->node)) {
14119 error("critical: extent_root, unable to check the filesystem");
14124 if (!extent_buffer_uptodate(info->csum_root->node)) {
14125 error("critical: csum_root, unable to check the filesystem");
14131 ret = do_check_chunks_and_extents(info);
14135 "errors found in extent allocation tree or chunk allocation");
14137 ret = repair_root_items(info);
14140 error("failed to repair root items: %s", strerror(-ret));
14144 fprintf(stderr, "Fixed %d roots.\n", ret);
14146 } else if (ret > 0) {
14148 "Found %d roots with an outdated root item.\n",
14151 "Please run a filesystem check with the option --repair to fix them.\n");
14157 if (!ctx.progress_enabled) {
14158 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14159 fprintf(stderr, "checking free space tree\n");
14161 fprintf(stderr, "checking free space cache\n");
14163 ret = check_space_cache(root);
14166 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14167 error("errors found in free space tree");
14169 error("errors found in free space cache");
14174 * We used to have to have these hole extents in between our real
14175 * extents so if we don't have this flag set we need to make sure there
14176 * are no gaps in the file extents for inodes, otherwise we can just
14177 * ignore it when this happens.
14179 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14180 ret = do_check_fs_roots(info, &root_cache);
14183 error("errors found in fs roots");
14187 fprintf(stderr, "checking csums\n");
14188 ret = check_csums(root);
14191 error("errors found in csum tree");
14195 fprintf(stderr, "checking root refs\n");
14196 /* For low memory mode, check_fs_roots_v2 handles root refs */
14197 if (check_mode != CHECK_MODE_LOWMEM) {
14198 ret = check_root_refs(root, &root_cache);
14201 error("errors found in root refs");
14206 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14207 struct extent_buffer *eb;
14209 eb = list_first_entry(&root->fs_info->recow_ebs,
14210 struct extent_buffer, recow);
14211 list_del_init(&eb->recow);
14212 ret = recow_extent_buffer(root, eb);
14215 error("fails to fix transid errors");
14220 while (!list_empty(&delete_items)) {
14221 struct bad_item *bad;
14223 bad = list_first_entry(&delete_items, struct bad_item, list);
14224 list_del_init(&bad->list);
14226 ret = delete_bad_item(root, bad);
14232 if (info->quota_enabled) {
14233 fprintf(stderr, "checking quota groups\n");
14234 ret = qgroup_verify_all(info);
14237 error("failed to check quota groups");
14241 ret = repair_qgroups(info, &qgroups_repaired);
14244 error("failed to repair quota groups");
14250 if (!list_empty(&root->fs_info->recow_ebs)) {
14251 error("transid errors in file system");
14256 printf("found %llu bytes used, ",
14257 (unsigned long long)bytes_used);
14259 printf("error(s) found\n");
14261 printf("no error found\n");
14262 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14263 printf("total tree bytes: %llu\n",
14264 (unsigned long long)total_btree_bytes);
14265 printf("total fs tree bytes: %llu\n",
14266 (unsigned long long)total_fs_tree_bytes);
14267 printf("total extent tree bytes: %llu\n",
14268 (unsigned long long)total_extent_tree_bytes);
14269 printf("btree space waste bytes: %llu\n",
14270 (unsigned long long)btree_space_waste);
14271 printf("file data blocks allocated: %llu\n referenced %llu\n",
14272 (unsigned long long)data_bytes_allocated,
14273 (unsigned long long)data_bytes_referenced);
14275 free_qgroup_counts();
14276 free_root_recs_tree(&root_cache);
14280 if (ctx.progress_enabled)
14281 task_deinit(ctx.info);