2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 return container_of(back, struct data_backref, node);
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146 struct data_backref *back1 = to_data_backref(ext1);
147 struct data_backref *back2 = to_data_backref(ext2);
149 WARN_ON(!ext1->is_data);
150 WARN_ON(!ext2->is_data);
152 /* parent and root are a union, so this covers both */
153 if (back1->parent > back2->parent)
155 if (back1->parent < back2->parent)
158 /* This is a full backref and the parents match. */
159 if (back1->node.full_backref)
162 if (back1->owner > back2->owner)
164 if (back1->owner < back2->owner)
167 if (back1->offset > back2->offset)
169 if (back1->offset < back2->offset)
172 if (back1->found_ref && back2->found_ref) {
173 if (back1->disk_bytenr > back2->disk_bytenr)
175 if (back1->disk_bytenr < back2->disk_bytenr)
178 if (back1->bytes > back2->bytes)
180 if (back1->bytes < back2->bytes)
188 * Much like data_backref, just removed the undetermined members
189 * and change it to use list_head.
190 * During extent scan, it is stored in root->orphan_data_extent.
191 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193 struct orphan_data_extent {
194 struct list_head list;
202 struct tree_backref {
203 struct extent_backref node;
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 return container_of(back, struct tree_backref, node);
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219 struct tree_backref *back1 = to_tree_backref(ext1);
220 struct tree_backref *back2 = to_tree_backref(ext2);
222 WARN_ON(ext1->is_data);
223 WARN_ON(ext2->is_data);
225 /* parent and root are a union, so this covers both */
226 if (back1->parent > back2->parent)
228 if (back1->parent < back2->parent)
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239 if (ext1->is_data > ext2->is_data)
242 if (ext1->is_data < ext2->is_data)
245 if (ext1->full_backref > ext2->full_backref)
247 if (ext1->full_backref < ext2->full_backref)
251 return compare_data_backref(node1, node2);
253 return compare_tree_backref(node1, node2);
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
259 struct extent_record {
260 struct list_head backrefs;
261 struct list_head dups;
262 struct rb_root backref_tree;
263 struct list_head list;
264 struct cache_extent cache;
265 struct btrfs_disk_key parent_key;
270 u64 extent_item_refs;
272 u64 parent_generation;
276 unsigned int flag_block_full_backref:2;
277 unsigned int found_rec:1;
278 unsigned int content_checked:1;
279 unsigned int owner_ref_checked:1;
280 unsigned int is_root:1;
281 unsigned int metadata:1;
282 unsigned int bad_full_backref:1;
283 unsigned int crossing_stripes:1;
284 unsigned int wrong_chunk_type:1;
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 return container_of(entry, struct extent_record, list);
292 struct inode_backref {
293 struct list_head list;
294 unsigned int found_dir_item:1;
295 unsigned int found_dir_index:1;
296 unsigned int found_inode_ref:1;
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 return list_entry(entry, struct inode_backref, list);
311 struct root_item_record {
312 struct list_head list;
318 struct btrfs_key drop_key;
321 #define REF_ERR_NO_DIR_ITEM (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX (1 << 1)
323 #define REF_ERR_NO_INODE_REF (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
326 #define REF_ERR_DUP_INODE_REF (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
335 struct file_extent_hole {
341 struct inode_record {
342 struct list_head backrefs;
343 unsigned int checked:1;
344 unsigned int merging:1;
345 unsigned int found_inode_item:1;
346 unsigned int found_dir_item:1;
347 unsigned int found_file_extent:1;
348 unsigned int found_csum_item:1;
349 unsigned int some_csum_missing:1;
350 unsigned int nodatasum:1;
363 struct rb_root holes;
364 struct list_head orphan_extents;
369 #define I_ERR_NO_INODE_ITEM (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
385 struct root_backref {
386 struct list_head list;
387 unsigned int found_dir_item:1;
388 unsigned int found_dir_index:1;
389 unsigned int found_back_ref:1;
390 unsigned int found_forward_ref:1;
391 unsigned int reachable:1;
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 return list_entry(entry, struct root_backref, list);
406 struct list_head backrefs;
407 struct cache_extent cache;
408 unsigned int found_root_item:1;
414 struct cache_extent cache;
419 struct cache_extent cache;
420 struct cache_tree root_cache;
421 struct cache_tree inode_cache;
422 struct inode_record *current;
431 struct walk_control {
432 struct cache_tree shared;
433 struct shared_node *nodes[BTRFS_MAX_LEVEL];
439 struct btrfs_key key;
441 struct list_head list;
444 struct extent_entry {
449 struct list_head list;
452 struct root_item_info {
453 /* level of the root */
455 /* number of nodes at this level, must be 1 for a root */
459 struct cache_extent cache_extent;
463 * Error bit for low memory mode check.
465 * Currently no caller cares about it yet. Just internal use for error
468 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH (1 << 8)
479 static void *print_status_check(void *p)
481 struct task_ctx *priv = p;
482 const char work_indicator[] = { '.', 'o', 'O', 'o' };
484 static char *task_position_string[] = {
486 "checking free space cache",
490 task_period_start(priv->info, 1000 /* 1s */);
492 if (priv->tp == TASK_NOTHING)
496 printf("%s [%c]\r", task_position_string[priv->tp],
497 work_indicator[count % 4]);
500 task_period_wait(priv->info);
505 static int print_status_return(void *p)
513 static enum btrfs_check_mode parse_check_mode(const char *str)
515 if (strcmp(str, "lowmem") == 0)
516 return CHECK_MODE_LOWMEM;
517 if (strcmp(str, "orig") == 0)
518 return CHECK_MODE_ORIGINAL;
519 if (strcmp(str, "original") == 0)
520 return CHECK_MODE_ORIGINAL;
522 return CHECK_MODE_UNKNOWN;
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
528 struct file_extent_hole *hole;
530 if (RB_EMPTY_ROOT(holes))
533 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 struct file_extent_hole *hole1;
540 struct file_extent_hole *hole2;
542 hole1 = rb_entry(node1, struct file_extent_hole, node);
543 hole2 = rb_entry(node2, struct file_extent_hole, node);
545 if (hole1->start > hole2->start)
547 if (hole1->start < hole2->start)
549 /* Now hole1->start == hole2->start */
550 if (hole1->len >= hole2->len)
552 * Hole 1 will be merge center
553 * Same hole will be merged later
556 /* Hole 2 will be merge center */
561 * Add a hole to the record
563 * This will do hole merge for copy_file_extent_holes(),
564 * which will ensure there won't be continuous holes.
566 static int add_file_extent_hole(struct rb_root *holes,
569 struct file_extent_hole *hole;
570 struct file_extent_hole *prev = NULL;
571 struct file_extent_hole *next = NULL;
573 hole = malloc(sizeof(*hole));
578 /* Since compare will not return 0, no -EEXIST will happen */
579 rb_insert(holes, &hole->node, compare_hole);
581 /* simple merge with previous hole */
582 if (rb_prev(&hole->node))
583 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585 if (prev && prev->start + prev->len >= hole->start) {
586 hole->len = hole->start + hole->len - prev->start;
587 hole->start = prev->start;
588 rb_erase(&prev->node, holes);
593 /* iterate merge with next holes */
595 if (!rb_next(&hole->node))
597 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599 if (hole->start + hole->len >= next->start) {
600 if (hole->start + hole->len <= next->start + next->len)
601 hole->len = next->start + next->len -
603 rb_erase(&next->node, holes);
612 static int compare_hole_range(struct rb_node *node, void *data)
614 struct file_extent_hole *hole;
617 hole = (struct file_extent_hole *)data;
620 hole = rb_entry(node, struct file_extent_hole, node);
621 if (start < hole->start)
623 if (start >= hole->start && start < hole->start + hole->len)
629 * Delete a hole in the record
631 * This will do the hole split and is much restrict than add.
633 static int del_file_extent_hole(struct rb_root *holes,
636 struct file_extent_hole *hole;
637 struct file_extent_hole tmp;
642 struct rb_node *node;
649 node = rb_search(holes, &tmp, compare_hole_range, NULL);
652 hole = rb_entry(node, struct file_extent_hole, node);
653 if (start + len > hole->start + hole->len)
657 * Now there will be no overlap, delete the hole and re-add the
658 * split(s) if they exists.
660 if (start > hole->start) {
661 prev_start = hole->start;
662 prev_len = start - hole->start;
665 if (hole->start + hole->len > start + len) {
666 next_start = start + len;
667 next_len = hole->start + hole->len - start - len;
670 rb_erase(node, holes);
673 ret = add_file_extent_hole(holes, prev_start, prev_len);
678 ret = add_file_extent_hole(holes, next_start, next_len);
685 static int copy_file_extent_holes(struct rb_root *dst,
688 struct file_extent_hole *hole;
689 struct rb_node *node;
692 node = rb_first(src);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 ret = add_file_extent_hole(dst, hole->start, hole->len);
698 node = rb_next(node);
703 static void free_file_extent_holes(struct rb_root *holes)
705 struct rb_node *node;
706 struct file_extent_hole *hole;
708 node = rb_first(holes);
710 hole = rb_entry(node, struct file_extent_hole, node);
711 rb_erase(node, holes);
713 node = rb_first(holes);
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720 struct btrfs_root *root)
722 if (root->last_trans != trans->transid) {
723 root->track_dirty = 1;
724 root->last_trans = trans->transid;
725 root->commit_root = root->node;
726 extent_buffer_get(root->node);
730 static u8 imode_to_type(u32 imode)
733 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
735 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
736 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
737 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
738 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
739 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
740 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
743 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 struct device_record *rec1;
750 struct device_record *rec2;
752 rec1 = rb_entry(node1, struct device_record, node);
753 rec2 = rb_entry(node2, struct device_record, node);
754 if (rec1->devid > rec2->devid)
756 else if (rec1->devid < rec2->devid)
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 struct inode_record *rec;
765 struct inode_backref *backref;
766 struct inode_backref *orig;
767 struct inode_backref *tmp;
768 struct orphan_data_extent *src_orphan;
769 struct orphan_data_extent *dst_orphan;
774 rec = malloc(sizeof(*rec));
776 return ERR_PTR(-ENOMEM);
777 memcpy(rec, orig_rec, sizeof(*rec));
779 INIT_LIST_HEAD(&rec->backrefs);
780 INIT_LIST_HEAD(&rec->orphan_extents);
781 rec->holes = RB_ROOT;
783 list_for_each_entry(orig, &orig_rec->backrefs, list) {
784 size = sizeof(*orig) + orig->namelen + 1;
785 backref = malloc(size);
790 memcpy(backref, orig, size);
791 list_add_tail(&backref->list, &rec->backrefs);
793 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794 dst_orphan = malloc(sizeof(*dst_orphan));
799 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
809 rb = rb_first(&rec->holes);
811 struct file_extent_hole *hole;
813 hole = rb_entry(rb, struct file_extent_hole, node);
819 if (!list_empty(&rec->backrefs))
820 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821 list_del(&orig->list);
825 if (!list_empty(&rec->orphan_extents))
826 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827 list_del(&orig->list);
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
839 struct orphan_data_extent *orphan;
841 if (list_empty(orphan_extents))
843 printf("The following data extent is lost in tree %llu:\n",
845 list_for_each_entry(orphan, orphan_extents, list) {
846 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847 orphan->objectid, orphan->offset, orphan->disk_bytenr,
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 u64 root_objectid = root->root_key.objectid;
855 int errors = rec->errors;
859 /* reloc root errors, we print its corresponding fs root objectid*/
860 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861 root_objectid = root->root_key.offset;
862 fprintf(stderr, "reloc");
864 fprintf(stderr, "root %llu inode %llu errors %x",
865 (unsigned long long) root_objectid,
866 (unsigned long long) rec->ino, rec->errors);
868 if (errors & I_ERR_NO_INODE_ITEM)
869 fprintf(stderr, ", no inode item");
870 if (errors & I_ERR_NO_ORPHAN_ITEM)
871 fprintf(stderr, ", no orphan item");
872 if (errors & I_ERR_DUP_INODE_ITEM)
873 fprintf(stderr, ", dup inode item");
874 if (errors & I_ERR_DUP_DIR_INDEX)
875 fprintf(stderr, ", dup dir index");
876 if (errors & I_ERR_ODD_DIR_ITEM)
877 fprintf(stderr, ", odd dir item");
878 if (errors & I_ERR_ODD_FILE_EXTENT)
879 fprintf(stderr, ", odd file extent");
880 if (errors & I_ERR_BAD_FILE_EXTENT)
881 fprintf(stderr, ", bad file extent");
882 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883 fprintf(stderr, ", file extent overlap");
884 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885 fprintf(stderr, ", file extent discount");
886 if (errors & I_ERR_DIR_ISIZE_WRONG)
887 fprintf(stderr, ", dir isize wrong");
888 if (errors & I_ERR_FILE_NBYTES_WRONG)
889 fprintf(stderr, ", nbytes wrong");
890 if (errors & I_ERR_ODD_CSUM_ITEM)
891 fprintf(stderr, ", odd csum item");
892 if (errors & I_ERR_SOME_CSUM_MISSING)
893 fprintf(stderr, ", some csum missing");
894 if (errors & I_ERR_LINK_COUNT_WRONG)
895 fprintf(stderr, ", link count wrong");
896 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897 fprintf(stderr, ", orphan file extent");
898 fprintf(stderr, "\n");
899 /* Print the orphan extents if needed */
900 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903 /* Print the holes if needed */
904 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905 struct file_extent_hole *hole;
906 struct rb_node *node;
909 node = rb_first(&rec->holes);
910 fprintf(stderr, "Found file extent holes:\n");
913 hole = rb_entry(node, struct file_extent_hole, node);
914 fprintf(stderr, "\tstart: %llu, len: %llu\n",
915 hole->start, hole->len);
916 node = rb_next(node);
919 fprintf(stderr, "\tstart: 0, len: %llu\n",
921 root->fs_info->sectorsize));
925 static void print_ref_error(int errors)
927 if (errors & REF_ERR_NO_DIR_ITEM)
928 fprintf(stderr, ", no dir item");
929 if (errors & REF_ERR_NO_DIR_INDEX)
930 fprintf(stderr, ", no dir index");
931 if (errors & REF_ERR_NO_INODE_REF)
932 fprintf(stderr, ", no inode ref");
933 if (errors & REF_ERR_DUP_DIR_ITEM)
934 fprintf(stderr, ", dup dir item");
935 if (errors & REF_ERR_DUP_DIR_INDEX)
936 fprintf(stderr, ", dup dir index");
937 if (errors & REF_ERR_DUP_INODE_REF)
938 fprintf(stderr, ", dup inode ref");
939 if (errors & REF_ERR_INDEX_UNMATCH)
940 fprintf(stderr, ", index mismatch");
941 if (errors & REF_ERR_FILETYPE_UNMATCH)
942 fprintf(stderr, ", filetype mismatch");
943 if (errors & REF_ERR_NAME_TOO_LONG)
944 fprintf(stderr, ", name too long");
945 if (errors & REF_ERR_NO_ROOT_REF)
946 fprintf(stderr, ", no root ref");
947 if (errors & REF_ERR_NO_ROOT_BACKREF)
948 fprintf(stderr, ", no root backref");
949 if (errors & REF_ERR_DUP_ROOT_REF)
950 fprintf(stderr, ", dup root ref");
951 if (errors & REF_ERR_DUP_ROOT_BACKREF)
952 fprintf(stderr, ", dup root backref");
953 fprintf(stderr, "\n");
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
959 struct ptr_node *node;
960 struct cache_extent *cache;
961 struct inode_record *rec = NULL;
964 cache = lookup_cache_extent(inode_cache, ino, 1);
966 node = container_of(cache, struct ptr_node, cache);
968 if (mod && rec->refs > 1) {
969 node->data = clone_inode_rec(rec);
970 if (IS_ERR(node->data))
976 rec = calloc(1, sizeof(*rec));
978 return ERR_PTR(-ENOMEM);
980 rec->extent_start = (u64)-1;
982 INIT_LIST_HEAD(&rec->backrefs);
983 INIT_LIST_HEAD(&rec->orphan_extents);
984 rec->holes = RB_ROOT;
986 node = malloc(sizeof(*node));
989 return ERR_PTR(-ENOMEM);
991 node->cache.start = ino;
992 node->cache.size = 1;
995 if (ino == BTRFS_FREE_INO_OBJECTID)
998 ret = insert_cache_extent(inode_cache, &node->cache);
1000 return ERR_PTR(-EEXIST);
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 struct orphan_data_extent *orphan;
1009 while (!list_empty(orphan_extents)) {
1010 orphan = list_entry(orphan_extents->next,
1011 struct orphan_data_extent, list);
1012 list_del(&orphan->list);
1017 static void free_inode_rec(struct inode_record *rec)
1019 struct inode_backref *backref;
1021 if (--rec->refs > 0)
1024 while (!list_empty(&rec->backrefs)) {
1025 backref = to_inode_backref(rec->backrefs.next);
1026 list_del(&backref->list);
1029 free_orphan_data_extents(&rec->orphan_extents);
1030 free_file_extent_holes(&rec->holes);
1034 static int can_free_inode_rec(struct inode_record *rec)
1036 if (!rec->errors && rec->checked && rec->found_inode_item &&
1037 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043 struct inode_record *rec)
1045 struct cache_extent *cache;
1046 struct inode_backref *tmp, *backref;
1047 struct ptr_node *node;
1050 if (!rec->found_inode_item)
1053 filetype = imode_to_type(rec->imode);
1054 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055 if (backref->found_dir_item && backref->found_dir_index) {
1056 if (backref->filetype != filetype)
1057 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058 if (!backref->errors && backref->found_inode_ref &&
1059 rec->nlink == rec->found_link) {
1060 list_del(&backref->list);
1066 if (!rec->checked || rec->merging)
1069 if (S_ISDIR(rec->imode)) {
1070 if (rec->found_size != rec->isize)
1071 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072 if (rec->found_file_extent)
1073 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075 if (rec->found_dir_item)
1076 rec->errors |= I_ERR_ODD_DIR_ITEM;
1077 if (rec->found_size != rec->nbytes)
1078 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079 if (rec->nlink > 0 && !no_holes &&
1080 (rec->extent_end < rec->isize ||
1081 first_extent_gap(&rec->holes) < rec->isize))
1082 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1085 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086 if (rec->found_csum_item && rec->nodatasum)
1087 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088 if (rec->some_csum_missing && !rec->nodatasum)
1089 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1092 BUG_ON(rec->refs != 1);
1093 if (can_free_inode_rec(rec)) {
1094 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095 node = container_of(cache, struct ptr_node, cache);
1096 BUG_ON(node->data != rec);
1097 remove_cache_extent(inode_cache, &node->cache);
1099 free_inode_rec(rec);
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 struct btrfs_path path;
1106 struct btrfs_key key;
1109 key.objectid = BTRFS_ORPHAN_OBJECTID;
1110 key.type = BTRFS_ORPHAN_ITEM_KEY;
1113 btrfs_init_path(&path);
1114 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115 btrfs_release_path(&path);
1121 static int process_inode_item(struct extent_buffer *eb,
1122 int slot, struct btrfs_key *key,
1123 struct shared_node *active_node)
1125 struct inode_record *rec;
1126 struct btrfs_inode_item *item;
1128 rec = active_node->current;
1129 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130 if (rec->found_inode_item) {
1131 rec->errors |= I_ERR_DUP_INODE_ITEM;
1134 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135 rec->nlink = btrfs_inode_nlink(eb, item);
1136 rec->isize = btrfs_inode_size(eb, item);
1137 rec->nbytes = btrfs_inode_nbytes(eb, item);
1138 rec->imode = btrfs_inode_mode(eb, item);
1139 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141 rec->found_inode_item = 1;
1142 if (rec->nlink == 0)
1143 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144 maybe_free_inode_rec(&active_node->inode_cache, rec);
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150 int namelen, u64 dir)
1152 struct inode_backref *backref;
1154 list_for_each_entry(backref, &rec->backrefs, list) {
1155 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157 if (backref->dir != dir || backref->namelen != namelen)
1159 if (memcmp(name, backref->name, namelen))
1164 backref = malloc(sizeof(*backref) + namelen + 1);
1167 memset(backref, 0, sizeof(*backref));
1169 backref->namelen = namelen;
1170 memcpy(backref->name, name, namelen);
1171 backref->name[namelen] = '\0';
1172 list_add_tail(&backref->list, &rec->backrefs);
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177 u64 ino, u64 dir, u64 index,
1178 const char *name, int namelen,
1179 u8 filetype, u8 itemtype, int errors)
1181 struct inode_record *rec;
1182 struct inode_backref *backref;
1184 rec = get_inode_rec(inode_cache, ino, 1);
1185 BUG_ON(IS_ERR(rec));
1186 backref = get_inode_backref(rec, name, namelen, dir);
1189 backref->errors |= errors;
1190 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191 if (backref->found_dir_index)
1192 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193 if (backref->found_inode_ref && backref->index != index)
1194 backref->errors |= REF_ERR_INDEX_UNMATCH;
1195 if (backref->found_dir_item && backref->filetype != filetype)
1196 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198 backref->index = index;
1199 backref->filetype = filetype;
1200 backref->found_dir_index = 1;
1201 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203 if (backref->found_dir_item)
1204 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205 if (backref->found_dir_index && backref->filetype != filetype)
1206 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208 backref->filetype = filetype;
1209 backref->found_dir_item = 1;
1210 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212 if (backref->found_inode_ref)
1213 backref->errors |= REF_ERR_DUP_INODE_REF;
1214 if (backref->found_dir_index && backref->index != index)
1215 backref->errors |= REF_ERR_INDEX_UNMATCH;
1217 backref->index = index;
1219 backref->ref_type = itemtype;
1220 backref->found_inode_ref = 1;
1225 maybe_free_inode_rec(inode_cache, rec);
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230 struct cache_tree *dst_cache)
1232 struct inode_backref *backref;
1237 list_for_each_entry(backref, &src->backrefs, list) {
1238 if (backref->found_dir_index) {
1239 add_inode_backref(dst_cache, dst->ino, backref->dir,
1240 backref->index, backref->name,
1241 backref->namelen, backref->filetype,
1242 BTRFS_DIR_INDEX_KEY, backref->errors);
1244 if (backref->found_dir_item) {
1246 add_inode_backref(dst_cache, dst->ino,
1247 backref->dir, 0, backref->name,
1248 backref->namelen, backref->filetype,
1249 BTRFS_DIR_ITEM_KEY, backref->errors);
1251 if (backref->found_inode_ref) {
1252 add_inode_backref(dst_cache, dst->ino,
1253 backref->dir, backref->index,
1254 backref->name, backref->namelen, 0,
1255 backref->ref_type, backref->errors);
1259 if (src->found_dir_item)
1260 dst->found_dir_item = 1;
1261 if (src->found_file_extent)
1262 dst->found_file_extent = 1;
1263 if (src->found_csum_item)
1264 dst->found_csum_item = 1;
1265 if (src->some_csum_missing)
1266 dst->some_csum_missing = 1;
1267 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1273 BUG_ON(src->found_link < dir_count);
1274 dst->found_link += src->found_link - dir_count;
1275 dst->found_size += src->found_size;
1276 if (src->extent_start != (u64)-1) {
1277 if (dst->extent_start == (u64)-1) {
1278 dst->extent_start = src->extent_start;
1279 dst->extent_end = src->extent_end;
1281 if (dst->extent_end > src->extent_start)
1282 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283 else if (dst->extent_end < src->extent_start) {
1284 ret = add_file_extent_hole(&dst->holes,
1286 src->extent_start - dst->extent_end);
1288 if (dst->extent_end < src->extent_end)
1289 dst->extent_end = src->extent_end;
1293 dst->errors |= src->errors;
1294 if (src->found_inode_item) {
1295 if (!dst->found_inode_item) {
1296 dst->nlink = src->nlink;
1297 dst->isize = src->isize;
1298 dst->nbytes = src->nbytes;
1299 dst->imode = src->imode;
1300 dst->nodatasum = src->nodatasum;
1301 dst->found_inode_item = 1;
1303 dst->errors |= I_ERR_DUP_INODE_ITEM;
1311 static int splice_shared_node(struct shared_node *src_node,
1312 struct shared_node *dst_node)
1314 struct cache_extent *cache;
1315 struct ptr_node *node, *ins;
1316 struct cache_tree *src, *dst;
1317 struct inode_record *rec, *conflict;
1318 u64 current_ino = 0;
1322 if (--src_node->refs == 0)
1324 if (src_node->current)
1325 current_ino = src_node->current->ino;
1327 src = &src_node->root_cache;
1328 dst = &dst_node->root_cache;
1330 cache = search_cache_extent(src, 0);
1332 node = container_of(cache, struct ptr_node, cache);
1334 cache = next_cache_extent(cache);
1337 remove_cache_extent(src, &node->cache);
1340 ins = malloc(sizeof(*ins));
1342 ins->cache.start = node->cache.start;
1343 ins->cache.size = node->cache.size;
1347 ret = insert_cache_extent(dst, &ins->cache);
1348 if (ret == -EEXIST) {
1349 conflict = get_inode_rec(dst, rec->ino, 1);
1350 BUG_ON(IS_ERR(conflict));
1351 merge_inode_recs(rec, conflict, dst);
1353 conflict->checked = 1;
1354 if (dst_node->current == conflict)
1355 dst_node->current = NULL;
1357 maybe_free_inode_rec(dst, conflict);
1358 free_inode_rec(rec);
1365 if (src == &src_node->root_cache) {
1366 src = &src_node->inode_cache;
1367 dst = &dst_node->inode_cache;
1371 if (current_ino > 0 && (!dst_node->current ||
1372 current_ino > dst_node->current->ino)) {
1373 if (dst_node->current) {
1374 dst_node->current->checked = 1;
1375 maybe_free_inode_rec(dst, dst_node->current);
1377 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378 BUG_ON(IS_ERR(dst_node->current));
1383 static void free_inode_ptr(struct cache_extent *cache)
1385 struct ptr_node *node;
1386 struct inode_record *rec;
1388 node = container_of(cache, struct ptr_node, cache);
1390 free_inode_rec(rec);
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1399 struct cache_extent *cache;
1400 struct shared_node *node;
1402 cache = lookup_cache_extent(shared, bytenr, 1);
1404 node = container_of(cache, struct shared_node, cache);
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1413 struct shared_node *node;
1415 node = calloc(1, sizeof(*node));
1418 node->cache.start = bytenr;
1419 node->cache.size = 1;
1420 cache_tree_init(&node->root_cache);
1421 cache_tree_init(&node->inode_cache);
1424 ret = insert_cache_extent(shared, &node->cache);
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430 struct walk_control *wc, int level)
1432 struct shared_node *node;
1433 struct shared_node *dest;
1436 if (level == wc->active_node)
1439 BUG_ON(wc->active_node <= level);
1440 node = find_shared_node(&wc->shared, bytenr);
1442 ret = add_shared_node(&wc->shared, bytenr, refs);
1444 node = find_shared_node(&wc->shared, bytenr);
1445 wc->nodes[level] = node;
1446 wc->active_node = level;
1450 if (wc->root_level == wc->active_node &&
1451 btrfs_root_refs(&root->root_item) == 0) {
1452 if (--node->refs == 0) {
1453 free_inode_recs_tree(&node->root_cache);
1454 free_inode_recs_tree(&node->inode_cache);
1455 remove_cache_extent(&wc->shared, &node->cache);
1461 dest = wc->nodes[wc->active_node];
1462 splice_shared_node(node, dest);
1463 if (node->refs == 0) {
1464 remove_cache_extent(&wc->shared, &node->cache);
1470 static int leave_shared_node(struct btrfs_root *root,
1471 struct walk_control *wc, int level)
1473 struct shared_node *node;
1474 struct shared_node *dest;
1477 if (level == wc->root_level)
1480 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1484 BUG_ON(i >= BTRFS_MAX_LEVEL);
1486 node = wc->nodes[wc->active_node];
1487 wc->nodes[wc->active_node] = NULL;
1488 wc->active_node = i;
1490 dest = wc->nodes[wc->active_node];
1491 if (wc->active_node < wc->root_level ||
1492 btrfs_root_refs(&root->root_item) > 0) {
1493 BUG_ON(node->refs <= 1);
1494 splice_shared_node(node, dest);
1496 BUG_ON(node->refs < 2);
1505 * 1 - if the root with id child_root_id is a child of root parent_root_id
1506 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1507 * has other root(s) as parent(s)
1508 * 2 - if the root child_root_id doesn't have any parent roots
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1513 struct btrfs_path path;
1514 struct btrfs_key key;
1515 struct extent_buffer *leaf;
1519 btrfs_init_path(&path);
1521 key.objectid = parent_root_id;
1522 key.type = BTRFS_ROOT_REF_KEY;
1523 key.offset = child_root_id;
1524 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1528 btrfs_release_path(&path);
1532 key.objectid = child_root_id;
1533 key.type = BTRFS_ROOT_BACKREF_KEY;
1535 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1541 leaf = path.nodes[0];
1542 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1546 leaf = path.nodes[0];
1549 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550 if (key.objectid != child_root_id ||
1551 key.type != BTRFS_ROOT_BACKREF_KEY)
1556 if (key.offset == parent_root_id) {
1557 btrfs_release_path(&path);
1564 btrfs_release_path(&path);
1567 return has_parent ? 0 : 2;
1570 static int process_dir_item(struct extent_buffer *eb,
1571 int slot, struct btrfs_key *key,
1572 struct shared_node *active_node)
1582 struct btrfs_dir_item *di;
1583 struct inode_record *rec;
1584 struct cache_tree *root_cache;
1585 struct cache_tree *inode_cache;
1586 struct btrfs_key location;
1587 char namebuf[BTRFS_NAME_LEN];
1589 root_cache = &active_node->root_cache;
1590 inode_cache = &active_node->inode_cache;
1591 rec = active_node->current;
1592 rec->found_dir_item = 1;
1594 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595 total = btrfs_item_size_nr(eb, slot);
1596 while (cur < total) {
1598 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599 name_len = btrfs_dir_name_len(eb, di);
1600 data_len = btrfs_dir_data_len(eb, di);
1601 filetype = btrfs_dir_type(eb, di);
1603 rec->found_size += name_len;
1604 if (cur + sizeof(*di) + name_len > total ||
1605 name_len > BTRFS_NAME_LEN) {
1606 error = REF_ERR_NAME_TOO_LONG;
1608 if (cur + sizeof(*di) > total)
1610 len = min_t(u32, total - cur - sizeof(*di),
1617 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620 key->offset != btrfs_name_hash(namebuf, len)) {
1621 rec->errors |= I_ERR_ODD_DIR_ITEM;
1622 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623 key->objectid, key->offset, namebuf, len, filetype,
1624 key->offset, btrfs_name_hash(namebuf, len));
1627 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628 add_inode_backref(inode_cache, location.objectid,
1629 key->objectid, key->offset, namebuf,
1630 len, filetype, key->type, error);
1631 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632 add_inode_backref(root_cache, location.objectid,
1633 key->objectid, key->offset,
1634 namebuf, len, filetype,
1637 fprintf(stderr, "invalid location in dir item %u\n",
1639 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640 key->objectid, key->offset, namebuf,
1641 len, filetype, key->type, error);
1644 len = sizeof(*di) + name_len + data_len;
1645 di = (struct btrfs_dir_item *)((char *)di + len);
1648 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649 rec->errors |= I_ERR_DUP_DIR_INDEX;
1654 static int process_inode_ref(struct extent_buffer *eb,
1655 int slot, struct btrfs_key *key,
1656 struct shared_node *active_node)
1664 struct cache_tree *inode_cache;
1665 struct btrfs_inode_ref *ref;
1666 char namebuf[BTRFS_NAME_LEN];
1668 inode_cache = &active_node->inode_cache;
1670 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671 total = btrfs_item_size_nr(eb, slot);
1672 while (cur < total) {
1673 name_len = btrfs_inode_ref_name_len(eb, ref);
1674 index = btrfs_inode_ref_index(eb, ref);
1676 /* inode_ref + namelen should not cross item boundary */
1677 if (cur + sizeof(*ref) + name_len > total ||
1678 name_len > BTRFS_NAME_LEN) {
1679 if (total < cur + sizeof(*ref))
1682 /* Still try to read out the remaining part */
1683 len = min_t(u32, total - cur - sizeof(*ref),
1685 error = REF_ERR_NAME_TOO_LONG;
1691 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692 add_inode_backref(inode_cache, key->objectid, key->offset,
1693 index, namebuf, len, 0, key->type, error);
1695 len = sizeof(*ref) + name_len;
1696 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1702 static int process_inode_extref(struct extent_buffer *eb,
1703 int slot, struct btrfs_key *key,
1704 struct shared_node *active_node)
1713 struct cache_tree *inode_cache;
1714 struct btrfs_inode_extref *extref;
1715 char namebuf[BTRFS_NAME_LEN];
1717 inode_cache = &active_node->inode_cache;
1719 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720 total = btrfs_item_size_nr(eb, slot);
1721 while (cur < total) {
1722 name_len = btrfs_inode_extref_name_len(eb, extref);
1723 index = btrfs_inode_extref_index(eb, extref);
1724 parent = btrfs_inode_extref_parent(eb, extref);
1725 if (name_len <= BTRFS_NAME_LEN) {
1729 len = BTRFS_NAME_LEN;
1730 error = REF_ERR_NAME_TOO_LONG;
1732 read_extent_buffer(eb, namebuf,
1733 (unsigned long)(extref + 1), len);
1734 add_inode_backref(inode_cache, key->objectid, parent,
1735 index, namebuf, len, 0, key->type, error);
1737 len = sizeof(*extref) + name_len;
1738 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746 u64 len, u64 *found)
1748 struct btrfs_key key;
1749 struct btrfs_path path;
1750 struct extent_buffer *leaf;
1755 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1757 btrfs_init_path(&path);
1759 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761 key.type = BTRFS_EXTENT_CSUM_KEY;
1763 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1767 if (ret > 0 && path.slots[0] > 0) {
1768 leaf = path.nodes[0];
1769 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771 key.type == BTRFS_EXTENT_CSUM_KEY)
1776 leaf = path.nodes[0];
1777 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1783 leaf = path.nodes[0];
1786 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788 key.type != BTRFS_EXTENT_CSUM_KEY)
1791 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792 if (key.offset >= start + len)
1795 if (key.offset > start)
1798 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799 csum_end = key.offset + (size / csum_size) *
1800 root->fs_info->sectorsize;
1801 if (csum_end > start) {
1802 size = min(csum_end - start, len);
1811 btrfs_release_path(&path);
1817 static int process_file_extent(struct btrfs_root *root,
1818 struct extent_buffer *eb,
1819 int slot, struct btrfs_key *key,
1820 struct shared_node *active_node)
1822 struct inode_record *rec;
1823 struct btrfs_file_extent_item *fi;
1825 u64 disk_bytenr = 0;
1826 u64 extent_offset = 0;
1827 u64 mask = root->fs_info->sectorsize - 1;
1831 rec = active_node->current;
1832 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833 rec->found_file_extent = 1;
1835 if (rec->extent_start == (u64)-1) {
1836 rec->extent_start = key->offset;
1837 rec->extent_end = key->offset;
1840 if (rec->extent_end > key->offset)
1841 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842 else if (rec->extent_end < key->offset) {
1843 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844 key->offset - rec->extent_end);
1849 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850 extent_type = btrfs_file_extent_type(eb, fi);
1852 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856 rec->found_size += num_bytes;
1857 num_bytes = (num_bytes + mask) & ~mask;
1858 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862 extent_offset = btrfs_file_extent_offset(eb, fi);
1863 if (num_bytes == 0 || (num_bytes & mask))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (num_bytes + extent_offset >
1866 btrfs_file_extent_ram_bytes(eb, fi))
1867 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869 (btrfs_file_extent_compression(eb, fi) ||
1870 btrfs_file_extent_encryption(eb, fi) ||
1871 btrfs_file_extent_other_encoding(eb, fi)))
1872 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873 if (disk_bytenr > 0)
1874 rec->found_size += num_bytes;
1876 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1878 rec->extent_end = key->offset + num_bytes;
1881 * The data reloc tree will copy full extents into its inode and then
1882 * copy the corresponding csums. Because the extent it copied could be
1883 * a preallocated extent that hasn't been written to yet there may be no
1884 * csums to copy, ergo we won't have csums for our file extent. This is
1885 * ok so just don't bother checking csums if the inode belongs to the
1888 if (disk_bytenr > 0 &&
1889 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891 if (btrfs_file_extent_compression(eb, fi))
1892 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894 disk_bytenr += extent_offset;
1896 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1899 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901 rec->found_csum_item = 1;
1902 if (found < num_bytes)
1903 rec->some_csum_missing = 1;
1904 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913 struct walk_control *wc)
1915 struct btrfs_key key;
1919 struct cache_tree *inode_cache;
1920 struct shared_node *active_node;
1922 if (wc->root_level == wc->active_node &&
1923 btrfs_root_refs(&root->root_item) == 0)
1926 active_node = wc->nodes[wc->active_node];
1927 inode_cache = &active_node->inode_cache;
1928 nritems = btrfs_header_nritems(eb);
1929 for (i = 0; i < nritems; i++) {
1930 btrfs_item_key_to_cpu(eb, &key, i);
1932 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1937 if (active_node->current == NULL ||
1938 active_node->current->ino < key.objectid) {
1939 if (active_node->current) {
1940 active_node->current->checked = 1;
1941 maybe_free_inode_rec(inode_cache,
1942 active_node->current);
1944 active_node->current = get_inode_rec(inode_cache,
1946 BUG_ON(IS_ERR(active_node->current));
1949 case BTRFS_DIR_ITEM_KEY:
1950 case BTRFS_DIR_INDEX_KEY:
1951 ret = process_dir_item(eb, i, &key, active_node);
1953 case BTRFS_INODE_REF_KEY:
1954 ret = process_inode_ref(eb, i, &key, active_node);
1956 case BTRFS_INODE_EXTREF_KEY:
1957 ret = process_inode_extref(eb, i, &key, active_node);
1959 case BTRFS_INODE_ITEM_KEY:
1960 ret = process_inode_item(eb, i, &key, active_node);
1962 case BTRFS_EXTENT_DATA_KEY:
1963 ret = process_file_extent(root, eb, i, &key,
1974 u64 bytenr[BTRFS_MAX_LEVEL];
1975 u64 refs[BTRFS_MAX_LEVEL];
1976 int need_check[BTRFS_MAX_LEVEL];
1977 /* field for checking all trees */
1978 int checked[BTRFS_MAX_LEVEL];
1979 /* the corresponding extent should be marked as full backref or not */
1980 int full_backref[BTRFS_MAX_LEVEL];
1983 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1984 struct extent_buffer *eb, struct node_refs *nrefs,
1985 u64 level, int check_all);
1986 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1987 unsigned int ext_ref);
1990 * Returns >0 Found error, not fatal, should continue
1991 * Returns <0 Fatal error, must exit the whole check
1992 * Returns 0 No errors found
1994 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1995 struct node_refs *nrefs, int *level, int ext_ref)
1997 struct extent_buffer *cur = path->nodes[0];
1998 struct btrfs_key key;
2002 int root_level = btrfs_header_level(root->node);
2004 int ret = 0; /* Final return value */
2005 int err = 0; /* Positive error bitmap */
2007 cur_bytenr = cur->start;
2009 /* skip to first inode item or the first inode number change */
2010 nritems = btrfs_header_nritems(cur);
2011 for (i = 0; i < nritems; i++) {
2012 btrfs_item_key_to_cpu(cur, &key, i);
2014 first_ino = key.objectid;
2015 if (key.type == BTRFS_INODE_ITEM_KEY ||
2016 (first_ino && first_ino != key.objectid))
2020 path->slots[0] = nritems;
2026 err |= check_inode_item(root, path, ext_ref);
2028 /* modify cur since check_inode_item may change path */
2029 cur = path->nodes[0];
2031 if (err & LAST_ITEM)
2034 /* still have inode items in thie leaf */
2035 if (cur->start == cur_bytenr)
2039 * we have switched to another leaf, above nodes may
2040 * have changed, here walk down the path, if a node
2041 * or leaf is shared, check whether we can skip this
2044 for (i = root_level; i >= 0; i--) {
2045 if (path->nodes[i]->start == nrefs->bytenr[i])
2048 ret = update_nodes_refs(root, path->nodes[i]->start,
2049 path->nodes[i], nrefs, i, 0);
2053 if (!nrefs->need_check[i]) {
2059 for (i = 0; i < *level; i++) {
2060 free_extent_buffer(path->nodes[i]);
2061 path->nodes[i] = NULL;
2070 static void reada_walk_down(struct btrfs_root *root,
2071 struct extent_buffer *node, int slot)
2073 struct btrfs_fs_info *fs_info = root->fs_info;
2080 level = btrfs_header_level(node);
2084 nritems = btrfs_header_nritems(node);
2085 for (i = slot; i < nritems; i++) {
2086 bytenr = btrfs_node_blockptr(node, i);
2087 ptr_gen = btrfs_node_ptr_generation(node, i);
2088 readahead_tree_block(fs_info, bytenr, ptr_gen);
2093 * Check the child node/leaf by the following condition:
2094 * 1. the first item key of the node/leaf should be the same with the one
2096 * 2. block in parent node should match the child node/leaf.
2097 * 3. generation of parent node and child's header should be consistent.
2099 * Or the child node/leaf pointed by the key in parent is not valid.
2101 * We hope to check leaf owner too, but since subvol may share leaves,
2102 * which makes leaf owner check not so strong, key check should be
2103 * sufficient enough for that case.
2105 static int check_child_node(struct extent_buffer *parent, int slot,
2106 struct extent_buffer *child)
2108 struct btrfs_key parent_key;
2109 struct btrfs_key child_key;
2112 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2113 if (btrfs_header_level(child) == 0)
2114 btrfs_item_key_to_cpu(child, &child_key, 0);
2116 btrfs_node_key_to_cpu(child, &child_key, 0);
2118 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2121 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2122 parent_key.objectid, parent_key.type, parent_key.offset,
2123 child_key.objectid, child_key.type, child_key.offset);
2125 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2127 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2128 btrfs_node_blockptr(parent, slot),
2129 btrfs_header_bytenr(child));
2131 if (btrfs_node_ptr_generation(parent, slot) !=
2132 btrfs_header_generation(child)) {
2134 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2135 btrfs_header_generation(child),
2136 btrfs_node_ptr_generation(parent, slot));
2142 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2143 * in every fs or file tree check. Here we find its all root ids, and only check
2144 * it in the fs or file tree which has the smallest root id.
2146 static int need_check(struct btrfs_root *root, struct ulist *roots)
2148 struct rb_node *node;
2149 struct ulist_node *u;
2151 if (roots->nnodes == 1)
2154 node = rb_first(&roots->root);
2155 u = rb_entry(node, struct ulist_node, rb_node);
2157 * current root id is not smallest, we skip it and let it be checked
2158 * in the fs or file tree who hash the smallest root id.
2160 if (root->objectid != u->val)
2166 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2169 struct btrfs_root *extent_root = root->fs_info->extent_root;
2170 struct btrfs_root_item *ri = &root->root_item;
2171 struct btrfs_extent_inline_ref *iref;
2172 struct btrfs_extent_item *ei;
2173 struct btrfs_key key;
2174 struct btrfs_path *path = NULL;
2185 * Except file/reloc tree, we can not have FULL BACKREF MODE
2187 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2191 if (eb->start == btrfs_root_bytenr(ri))
2194 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2197 owner = btrfs_header_owner(eb);
2198 if (owner == root->objectid)
2201 path = btrfs_alloc_path();
2205 key.objectid = btrfs_header_bytenr(eb);
2207 key.offset = (u64)-1;
2209 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2216 ret = btrfs_previous_extent_item(extent_root, path,
2222 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2224 eb = path->nodes[0];
2225 slot = path->slots[0];
2226 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2228 flags = btrfs_extent_flags(eb, ei);
2229 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2232 ptr = (unsigned long)(ei + 1);
2233 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2235 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2236 ptr += sizeof(struct btrfs_tree_block_info);
2239 /* Reached extent item ends normally */
2243 /* Beyond extent item end, wrong item size */
2245 error("extent item at bytenr %llu slot %d has wrong size",
2250 iref = (struct btrfs_extent_inline_ref *)ptr;
2251 offset = btrfs_extent_inline_ref_offset(eb, iref);
2252 type = btrfs_extent_inline_ref_type(eb, iref);
2254 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2256 ptr += btrfs_extent_inline_ref_size(type);
2260 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2264 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2266 btrfs_free_path(path);
2271 * for a tree node or leaf, we record its reference count, so later if we still
2272 * process this node or leaf, don't need to compute its reference count again.
2274 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2276 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2277 struct extent_buffer *eb, struct node_refs *nrefs,
2278 u64 level, int check_all)
2280 struct ulist *roots;
2283 int root_level = btrfs_header_level(root->node);
2287 if (nrefs->bytenr[level] == bytenr)
2290 if (bytenr != (u64)-1) {
2291 /* the return value of this function seems a mistake */
2292 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2293 level, 1, &refs, &flags);
2295 if (ret < 0 && !check_all)
2298 nrefs->bytenr[level] = bytenr;
2299 nrefs->refs[level] = refs;
2300 nrefs->full_backref[level] = 0;
2301 nrefs->checked[level] = 0;
2304 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2309 check = need_check(root, roots);
2311 nrefs->need_check[level] = check;
2314 nrefs->need_check[level] = 1;
2316 if (level == root_level) {
2317 nrefs->need_check[level] = 1;
2320 * The node refs may have not been
2321 * updated if upper needs checking (the
2322 * lowest root_objectid) the node can
2325 nrefs->need_check[level] =
2326 nrefs->need_check[level + 1];
2332 if (check_all && eb) {
2333 calc_extent_flag_v2(root, eb, &flags);
2334 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2335 nrefs->full_backref[level] = 1;
2342 * @level if @level == -1 means extent data item
2343 * else normal treeblocl.
2345 static int should_check_extent_strictly(struct btrfs_root *root,
2346 struct node_refs *nrefs, int level)
2348 int root_level = btrfs_header_level(root->node);
2350 if (level > root_level || level < -1)
2352 if (level == root_level)
2355 * if the upper node is marked full backref, it should contain shared
2356 * backref of the parent (except owner == root->objectid).
2358 while (++level <= root_level)
2359 if (nrefs->refs[level] > 1)
2365 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2366 struct walk_control *wc, int *level,
2367 struct node_refs *nrefs)
2369 enum btrfs_tree_block_status status;
2372 struct btrfs_fs_info *fs_info = root->fs_info;
2373 struct extent_buffer *next;
2374 struct extent_buffer *cur;
2378 WARN_ON(*level < 0);
2379 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2381 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2382 refs = nrefs->refs[*level];
2385 ret = btrfs_lookup_extent_info(NULL, root,
2386 path->nodes[*level]->start,
2387 *level, 1, &refs, NULL);
2392 nrefs->bytenr[*level] = path->nodes[*level]->start;
2393 nrefs->refs[*level] = refs;
2397 ret = enter_shared_node(root, path->nodes[*level]->start,
2405 while (*level >= 0) {
2406 WARN_ON(*level < 0);
2407 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2408 cur = path->nodes[*level];
2410 if (btrfs_header_level(cur) != *level)
2413 if (path->slots[*level] >= btrfs_header_nritems(cur))
2416 ret = process_one_leaf(root, cur, wc);
2421 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2422 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2424 if (bytenr == nrefs->bytenr[*level - 1]) {
2425 refs = nrefs->refs[*level - 1];
2427 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2428 *level - 1, 1, &refs, NULL);
2432 nrefs->bytenr[*level - 1] = bytenr;
2433 nrefs->refs[*level - 1] = refs;
2438 ret = enter_shared_node(root, bytenr, refs,
2441 path->slots[*level]++;
2446 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2447 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2448 free_extent_buffer(next);
2449 reada_walk_down(root, cur, path->slots[*level]);
2450 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2451 if (!extent_buffer_uptodate(next)) {
2452 struct btrfs_key node_key;
2454 btrfs_node_key_to_cpu(path->nodes[*level],
2456 path->slots[*level]);
2457 btrfs_add_corrupt_extent_record(root->fs_info,
2459 path->nodes[*level]->start,
2460 root->fs_info->nodesize,
2467 ret = check_child_node(cur, path->slots[*level], next);
2469 free_extent_buffer(next);
2474 if (btrfs_is_leaf(next))
2475 status = btrfs_check_leaf(root, NULL, next);
2477 status = btrfs_check_node(root, NULL, next);
2478 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2479 free_extent_buffer(next);
2484 *level = *level - 1;
2485 free_extent_buffer(path->nodes[*level]);
2486 path->nodes[*level] = next;
2487 path->slots[*level] = 0;
2490 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2494 static int fs_root_objectid(u64 objectid);
2497 * Update global fs information.
2499 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2503 struct extent_buffer *eb = path->nodes[level];
2505 total_btree_bytes += eb->len;
2506 if (fs_root_objectid(root->objectid))
2507 total_fs_tree_bytes += eb->len;
2508 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2509 total_extent_tree_bytes += eb->len;
2512 btree_space_waste += btrfs_leaf_free_space(root, eb);
2514 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2515 btrfs_header_nritems(eb));
2516 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2520 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2521 unsigned int ext_ref);
2522 static int check_tree_block_ref(struct btrfs_root *root,
2523 struct extent_buffer *eb, u64 bytenr,
2524 int level, u64 owner, struct node_refs *nrefs);
2525 static int check_leaf_items(struct btrfs_trans_handle *trans,
2526 struct btrfs_root *root, struct btrfs_path *path,
2527 struct node_refs *nrefs, int account_bytes);
2530 * @trans just for lowmem repair mode
2531 * @check all if not 0 then check all tree block backrefs and items
2532 * 0 then just check relationship of items in fs tree(s)
2534 * Returns >0 Found error, should continue
2535 * Returns <0 Fatal error, must exit the whole check
2536 * Returns 0 No errors found
2538 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2539 struct btrfs_root *root, struct btrfs_path *path,
2540 int *level, struct node_refs *nrefs, int ext_ref,
2544 enum btrfs_tree_block_status status;
2547 struct btrfs_fs_info *fs_info = root->fs_info;
2548 struct extent_buffer *next;
2549 struct extent_buffer *cur;
2553 int account_file_data = 0;
2555 WARN_ON(*level < 0);
2556 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2558 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2559 path->nodes[*level], nrefs, *level, check_all);
2563 while (*level >= 0) {
2564 WARN_ON(*level < 0);
2565 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2566 cur = path->nodes[*level];
2567 bytenr = btrfs_header_bytenr(cur);
2568 check = nrefs->need_check[*level];
2570 if (btrfs_header_level(cur) != *level)
2573 * Update bytes accounting and check tree block ref
2574 * NOTE: Doing accounting and check before checking nritems
2575 * is necessary because of empty node/leaf.
2577 if ((check_all && !nrefs->checked[*level]) ||
2578 (!check_all && nrefs->need_check[*level])) {
2579 ret = check_tree_block_ref(root, cur,
2580 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2581 btrfs_header_owner(cur), nrefs);
2584 if (check_all && nrefs->need_check[*level] &&
2585 nrefs->refs[*level]) {
2586 account_bytes(root, path, *level);
2587 account_file_data = 1;
2589 nrefs->checked[*level] = 1;
2592 if (path->slots[*level] >= btrfs_header_nritems(cur))
2595 /* Don't forgot to check leaf/node validation */
2597 /* skip duplicate check */
2598 if (check || !check_all) {
2599 ret = btrfs_check_leaf(root, NULL, cur);
2600 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2608 ret = process_one_leaf_v2(root, path, nrefs,
2611 ret = check_leaf_items(trans, root, path,
2612 nrefs, account_file_data);
2616 if (check || !check_all) {
2617 ret = btrfs_check_node(root, NULL, cur);
2618 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2625 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2626 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2628 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2633 * check all trees in check_chunks_and_extent_v2
2634 * check shared node once in check_fs_roots
2636 if (!check_all && !nrefs->need_check[*level - 1]) {
2637 path->slots[*level]++;
2641 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2642 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2643 free_extent_buffer(next);
2644 reada_walk_down(root, cur, path->slots[*level]);
2645 next = read_tree_block(fs_info, bytenr, ptr_gen);
2646 if (!extent_buffer_uptodate(next)) {
2647 struct btrfs_key node_key;
2649 btrfs_node_key_to_cpu(path->nodes[*level],
2651 path->slots[*level]);
2652 btrfs_add_corrupt_extent_record(fs_info,
2653 &node_key, path->nodes[*level]->start,
2654 fs_info->nodesize, *level);
2660 ret = check_child_node(cur, path->slots[*level], next);
2665 if (btrfs_is_leaf(next))
2666 status = btrfs_check_leaf(root, NULL, next);
2668 status = btrfs_check_node(root, NULL, next);
2669 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2670 free_extent_buffer(next);
2675 *level = *level - 1;
2676 free_extent_buffer(path->nodes[*level]);
2677 path->nodes[*level] = next;
2678 path->slots[*level] = 0;
2679 account_file_data = 0;
2681 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2686 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2687 struct walk_control *wc, int *level)
2690 struct extent_buffer *leaf;
2692 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2693 leaf = path->nodes[i];
2694 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2699 free_extent_buffer(path->nodes[*level]);
2700 path->nodes[*level] = NULL;
2701 BUG_ON(*level > wc->active_node);
2702 if (*level == wc->active_node)
2703 leave_shared_node(root, wc, *level);
2710 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2714 struct extent_buffer *leaf;
2716 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2717 leaf = path->nodes[i];
2718 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2723 free_extent_buffer(path->nodes[*level]);
2724 path->nodes[*level] = NULL;
2731 static int check_root_dir(struct inode_record *rec)
2733 struct inode_backref *backref;
2736 if (!rec->found_inode_item || rec->errors)
2738 if (rec->nlink != 1 || rec->found_link != 0)
2740 if (list_empty(&rec->backrefs))
2742 backref = to_inode_backref(rec->backrefs.next);
2743 if (!backref->found_inode_ref)
2745 if (backref->index != 0 || backref->namelen != 2 ||
2746 memcmp(backref->name, "..", 2))
2748 if (backref->found_dir_index || backref->found_dir_item)
2755 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2756 struct btrfs_root *root, struct btrfs_path *path,
2757 struct inode_record *rec)
2759 struct btrfs_inode_item *ei;
2760 struct btrfs_key key;
2763 key.objectid = rec->ino;
2764 key.type = BTRFS_INODE_ITEM_KEY;
2765 key.offset = (u64)-1;
2767 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2771 if (!path->slots[0]) {
2778 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2779 if (key.objectid != rec->ino) {
2784 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2785 struct btrfs_inode_item);
2786 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2787 btrfs_mark_buffer_dirty(path->nodes[0]);
2788 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2789 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2790 root->root_key.objectid);
2792 btrfs_release_path(path);
2796 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2797 struct btrfs_root *root,
2798 struct btrfs_path *path,
2799 struct inode_record *rec)
2803 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2804 btrfs_release_path(path);
2806 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2810 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2811 struct btrfs_root *root,
2812 struct btrfs_path *path,
2813 struct inode_record *rec)
2815 struct btrfs_inode_item *ei;
2816 struct btrfs_key key;
2819 key.objectid = rec->ino;
2820 key.type = BTRFS_INODE_ITEM_KEY;
2823 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2830 /* Since ret == 0, no need to check anything */
2831 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832 struct btrfs_inode_item);
2833 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2834 btrfs_mark_buffer_dirty(path->nodes[0]);
2835 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2836 printf("reset nbytes for ino %llu root %llu\n",
2837 rec->ino, root->root_key.objectid);
2839 btrfs_release_path(path);
2843 static int add_missing_dir_index(struct btrfs_root *root,
2844 struct cache_tree *inode_cache,
2845 struct inode_record *rec,
2846 struct inode_backref *backref)
2848 struct btrfs_path path;
2849 struct btrfs_trans_handle *trans;
2850 struct btrfs_dir_item *dir_item;
2851 struct extent_buffer *leaf;
2852 struct btrfs_key key;
2853 struct btrfs_disk_key disk_key;
2854 struct inode_record *dir_rec;
2855 unsigned long name_ptr;
2856 u32 data_size = sizeof(*dir_item) + backref->namelen;
2859 trans = btrfs_start_transaction(root, 1);
2861 return PTR_ERR(trans);
2863 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2864 (unsigned long long)rec->ino);
2866 btrfs_init_path(&path);
2867 key.objectid = backref->dir;
2868 key.type = BTRFS_DIR_INDEX_KEY;
2869 key.offset = backref->index;
2870 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2873 leaf = path.nodes[0];
2874 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2876 disk_key.objectid = cpu_to_le64(rec->ino);
2877 disk_key.type = BTRFS_INODE_ITEM_KEY;
2878 disk_key.offset = 0;
2880 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2881 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2882 btrfs_set_dir_data_len(leaf, dir_item, 0);
2883 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2884 name_ptr = (unsigned long)(dir_item + 1);
2885 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2886 btrfs_mark_buffer_dirty(leaf);
2887 btrfs_release_path(&path);
2888 btrfs_commit_transaction(trans, root);
2890 backref->found_dir_index = 1;
2891 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2892 BUG_ON(IS_ERR(dir_rec));
2895 dir_rec->found_size += backref->namelen;
2896 if (dir_rec->found_size == dir_rec->isize &&
2897 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2898 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2899 if (dir_rec->found_size != dir_rec->isize)
2900 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2905 static int delete_dir_index(struct btrfs_root *root,
2906 struct inode_backref *backref)
2908 struct btrfs_trans_handle *trans;
2909 struct btrfs_dir_item *di;
2910 struct btrfs_path path;
2913 trans = btrfs_start_transaction(root, 1);
2915 return PTR_ERR(trans);
2917 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2918 (unsigned long long)backref->dir,
2919 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2920 (unsigned long long)root->objectid);
2922 btrfs_init_path(&path);
2923 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2924 backref->name, backref->namelen,
2925 backref->index, -1);
2928 btrfs_release_path(&path);
2929 btrfs_commit_transaction(trans, root);
2936 ret = btrfs_del_item(trans, root, &path);
2938 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2940 btrfs_release_path(&path);
2941 btrfs_commit_transaction(trans, root);
2945 static int __create_inode_item(struct btrfs_trans_handle *trans,
2946 struct btrfs_root *root, u64 ino, u64 size,
2947 u64 nbytes, u64 nlink, u32 mode)
2949 struct btrfs_inode_item ii;
2950 time_t now = time(NULL);
2953 btrfs_set_stack_inode_size(&ii, size);
2954 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2955 btrfs_set_stack_inode_nlink(&ii, nlink);
2956 btrfs_set_stack_inode_mode(&ii, mode);
2957 btrfs_set_stack_inode_generation(&ii, trans->transid);
2958 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2959 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2960 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2961 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2962 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2963 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2964 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2966 ret = btrfs_insert_inode(trans, root, ino, &ii);
2969 warning("root %llu inode %llu recreating inode item, this may "
2970 "be incomplete, please check permissions and content after "
2971 "the fsck completes.\n", (unsigned long long)root->objectid,
2972 (unsigned long long)ino);
2977 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2978 struct btrfs_root *root, u64 ino,
2981 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2983 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2986 static int create_inode_item(struct btrfs_root *root,
2987 struct inode_record *rec, int root_dir)
2989 struct btrfs_trans_handle *trans;
2995 trans = btrfs_start_transaction(root, 1);
2996 if (IS_ERR(trans)) {
2997 ret = PTR_ERR(trans);
3001 nlink = root_dir ? 1 : rec->found_link;
3002 if (rec->found_dir_item) {
3003 if (rec->found_file_extent)
3004 fprintf(stderr, "root %llu inode %llu has both a dir "
3005 "item and extents, unsure if it is a dir or a "
3006 "regular file so setting it as a directory\n",
3007 (unsigned long long)root->objectid,
3008 (unsigned long long)rec->ino);
3009 mode = S_IFDIR | 0755;
3010 size = rec->found_size;
3011 } else if (!rec->found_dir_item) {
3012 size = rec->extent_end;
3013 mode = S_IFREG | 0755;
3016 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3018 btrfs_commit_transaction(trans, root);
3022 static int repair_inode_backrefs(struct btrfs_root *root,
3023 struct inode_record *rec,
3024 struct cache_tree *inode_cache,
3027 struct inode_backref *tmp, *backref;
3028 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3032 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3033 if (!delete && rec->ino == root_dirid) {
3034 if (!rec->found_inode_item) {
3035 ret = create_inode_item(root, rec, 1);
3042 /* Index 0 for root dir's are special, don't mess with it */
3043 if (rec->ino == root_dirid && backref->index == 0)
3047 ((backref->found_dir_index && !backref->found_inode_ref) ||
3048 (backref->found_dir_index && backref->found_inode_ref &&
3049 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3050 ret = delete_dir_index(root, backref);
3054 list_del(&backref->list);
3059 if (!delete && !backref->found_dir_index &&
3060 backref->found_dir_item && backref->found_inode_ref) {
3061 ret = add_missing_dir_index(root, inode_cache, rec,
3066 if (backref->found_dir_item &&
3067 backref->found_dir_index) {
3068 if (!backref->errors &&
3069 backref->found_inode_ref) {
3070 list_del(&backref->list);
3077 if (!delete && (!backref->found_dir_index &&
3078 !backref->found_dir_item &&
3079 backref->found_inode_ref)) {
3080 struct btrfs_trans_handle *trans;
3081 struct btrfs_key location;
3083 ret = check_dir_conflict(root, backref->name,
3089 * let nlink fixing routine to handle it,
3090 * which can do it better.
3095 location.objectid = rec->ino;
3096 location.type = BTRFS_INODE_ITEM_KEY;
3097 location.offset = 0;
3099 trans = btrfs_start_transaction(root, 1);
3100 if (IS_ERR(trans)) {
3101 ret = PTR_ERR(trans);
3104 fprintf(stderr, "adding missing dir index/item pair "
3106 (unsigned long long)rec->ino);
3107 ret = btrfs_insert_dir_item(trans, root, backref->name,
3109 backref->dir, &location,
3110 imode_to_type(rec->imode),
3113 btrfs_commit_transaction(trans, root);
3117 if (!delete && (backref->found_inode_ref &&
3118 backref->found_dir_index &&
3119 backref->found_dir_item &&
3120 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3121 !rec->found_inode_item)) {
3122 ret = create_inode_item(root, rec, 0);
3129 return ret ? ret : repaired;
3133 * To determine the file type for nlink/inode_item repair
3135 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3136 * Return -ENOENT if file type is not found.
3138 static int find_file_type(struct inode_record *rec, u8 *type)
3140 struct inode_backref *backref;
3142 /* For inode item recovered case */
3143 if (rec->found_inode_item) {
3144 *type = imode_to_type(rec->imode);
3148 list_for_each_entry(backref, &rec->backrefs, list) {
3149 if (backref->found_dir_index || backref->found_dir_item) {
3150 *type = backref->filetype;
3158 * To determine the file name for nlink repair
3160 * Return 0 if file name is found, set name and namelen.
3161 * Return -ENOENT if file name is not found.
3163 static int find_file_name(struct inode_record *rec,
3164 char *name, int *namelen)
3166 struct inode_backref *backref;
3168 list_for_each_entry(backref, &rec->backrefs, list) {
3169 if (backref->found_dir_index || backref->found_dir_item ||
3170 backref->found_inode_ref) {
3171 memcpy(name, backref->name, backref->namelen);
3172 *namelen = backref->namelen;
3179 /* Reset the nlink of the inode to the correct one */
3180 static int reset_nlink(struct btrfs_trans_handle *trans,
3181 struct btrfs_root *root,
3182 struct btrfs_path *path,
3183 struct inode_record *rec)
3185 struct inode_backref *backref;
3186 struct inode_backref *tmp;
3187 struct btrfs_key key;
3188 struct btrfs_inode_item *inode_item;
3191 /* We don't believe this either, reset it and iterate backref */
3192 rec->found_link = 0;
3194 /* Remove all backref including the valid ones */
3195 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3196 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3197 backref->index, backref->name,
3198 backref->namelen, 0);
3202 /* remove invalid backref, so it won't be added back */
3203 if (!(backref->found_dir_index &&
3204 backref->found_dir_item &&
3205 backref->found_inode_ref)) {
3206 list_del(&backref->list);
3213 /* Set nlink to 0 */
3214 key.objectid = rec->ino;
3215 key.type = BTRFS_INODE_ITEM_KEY;
3217 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3224 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3225 struct btrfs_inode_item);
3226 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3227 btrfs_mark_buffer_dirty(path->nodes[0]);
3228 btrfs_release_path(path);
3231 * Add back valid inode_ref/dir_item/dir_index,
3232 * add_link() will handle the nlink inc, so new nlink must be correct
3234 list_for_each_entry(backref, &rec->backrefs, list) {
3235 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3236 backref->name, backref->namelen,
3237 backref->filetype, &backref->index, 1, 0);
3242 btrfs_release_path(path);
3246 static int get_highest_inode(struct btrfs_trans_handle *trans,
3247 struct btrfs_root *root,
3248 struct btrfs_path *path,
3251 struct btrfs_key key, found_key;
3254 btrfs_init_path(path);
3255 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3257 key.type = BTRFS_INODE_ITEM_KEY;
3258 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3260 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3261 path->slots[0] - 1);
3262 *highest_ino = found_key.objectid;
3265 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3267 btrfs_release_path(path);
3272 * Link inode to dir 'lost+found'. Increase @ref_count.
3274 * Returns 0 means success.
3275 * Returns <0 means failure.
3277 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3278 struct btrfs_root *root,
3279 struct btrfs_path *path,
3280 u64 ino, char *namebuf, u32 name_len,
3281 u8 filetype, u64 *ref_count)
3283 char *dir_name = "lost+found";
3288 btrfs_release_path(path);
3289 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3294 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3295 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3298 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3301 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3302 namebuf, name_len, filetype, NULL, 1, 0);
3304 * Add ".INO" suffix several times to handle case where
3305 * "FILENAME.INO" is already taken by another file.
3307 while (ret == -EEXIST) {
3309 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3311 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3315 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3317 name_len += count_digits(ino) + 1;
3318 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3319 name_len, filetype, NULL, 1, 0);
3322 error("failed to link the inode %llu to %s dir: %s",
3323 ino, dir_name, strerror(-ret));
3328 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3329 name_len, namebuf, dir_name);
3331 btrfs_release_path(path);
3333 error("failed to move file '%.*s' to '%s' dir", name_len,
3338 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3339 struct btrfs_root *root,
3340 struct btrfs_path *path,
3341 struct inode_record *rec)
3343 char namebuf[BTRFS_NAME_LEN] = {0};
3346 int name_recovered = 0;
3347 int type_recovered = 0;
3351 * Get file name and type first before these invalid inode ref
3352 * are deleted by remove_all_invalid_backref()
3354 name_recovered = !find_file_name(rec, namebuf, &namelen);
3355 type_recovered = !find_file_type(rec, &type);
3357 if (!name_recovered) {
3358 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3359 rec->ino, rec->ino);
3360 namelen = count_digits(rec->ino);
3361 sprintf(namebuf, "%llu", rec->ino);
3364 if (!type_recovered) {
3365 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3367 type = BTRFS_FT_REG_FILE;
3371 ret = reset_nlink(trans, root, path, rec);
3374 "Failed to reset nlink for inode %llu: %s\n",
3375 rec->ino, strerror(-ret));
3379 if (rec->found_link == 0) {
3380 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3381 namebuf, namelen, type,
3382 (u64 *)&rec->found_link);
3386 printf("Fixed the nlink of inode %llu\n", rec->ino);
3389 * Clear the flag anyway, or we will loop forever for the same inode
3390 * as it will not be removed from the bad inode list and the dead loop
3393 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3394 btrfs_release_path(path);
3399 * Check if there is any normal(reg or prealloc) file extent for given
3401 * This is used to determine the file type when neither its dir_index/item or
3402 * inode_item exists.
3404 * This will *NOT* report error, if any error happens, just consider it does
3405 * not have any normal file extent.
3407 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3409 struct btrfs_path path;
3410 struct btrfs_key key;
3411 struct btrfs_key found_key;
3412 struct btrfs_file_extent_item *fi;
3416 btrfs_init_path(&path);
3418 key.type = BTRFS_EXTENT_DATA_KEY;
3421 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3426 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3427 ret = btrfs_next_leaf(root, &path);
3434 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3436 if (found_key.objectid != ino ||
3437 found_key.type != BTRFS_EXTENT_DATA_KEY)
3439 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3440 struct btrfs_file_extent_item);
3441 type = btrfs_file_extent_type(path.nodes[0], fi);
3442 if (type != BTRFS_FILE_EXTENT_INLINE) {
3448 btrfs_release_path(&path);
3452 static u32 btrfs_type_to_imode(u8 type)
3454 static u32 imode_by_btrfs_type[] = {
3455 [BTRFS_FT_REG_FILE] = S_IFREG,
3456 [BTRFS_FT_DIR] = S_IFDIR,
3457 [BTRFS_FT_CHRDEV] = S_IFCHR,
3458 [BTRFS_FT_BLKDEV] = S_IFBLK,
3459 [BTRFS_FT_FIFO] = S_IFIFO,
3460 [BTRFS_FT_SOCK] = S_IFSOCK,
3461 [BTRFS_FT_SYMLINK] = S_IFLNK,
3464 return imode_by_btrfs_type[(type)];
3467 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3468 struct btrfs_root *root,
3469 struct btrfs_path *path,
3470 struct inode_record *rec)
3474 int type_recovered = 0;
3477 printf("Trying to rebuild inode:%llu\n", rec->ino);
3479 type_recovered = !find_file_type(rec, &filetype);
3482 * Try to determine inode type if type not found.
3484 * For found regular file extent, it must be FILE.
3485 * For found dir_item/index, it must be DIR.
3487 * For undetermined one, use FILE as fallback.
3490 * 1. If found backref(inode_index/item is already handled) to it,
3492 * Need new inode-inode ref structure to allow search for that.
3494 if (!type_recovered) {
3495 if (rec->found_file_extent &&
3496 find_normal_file_extent(root, rec->ino)) {
3498 filetype = BTRFS_FT_REG_FILE;
3499 } else if (rec->found_dir_item) {
3501 filetype = BTRFS_FT_DIR;
3502 } else if (!list_empty(&rec->orphan_extents)) {
3504 filetype = BTRFS_FT_REG_FILE;
3506 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3509 filetype = BTRFS_FT_REG_FILE;
3513 ret = btrfs_new_inode(trans, root, rec->ino,
3514 mode | btrfs_type_to_imode(filetype));
3519 * Here inode rebuild is done, we only rebuild the inode item,
3520 * don't repair the nlink(like move to lost+found).
3521 * That is the job of nlink repair.
3523 * We just fill the record and return
3525 rec->found_dir_item = 1;
3526 rec->imode = mode | btrfs_type_to_imode(filetype);
3528 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3529 /* Ensure the inode_nlinks repair function will be called */
3530 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3535 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3536 struct btrfs_root *root,
3537 struct btrfs_path *path,
3538 struct inode_record *rec)
3540 struct orphan_data_extent *orphan;
3541 struct orphan_data_extent *tmp;
3544 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3546 * Check for conflicting file extents
3548 * Here we don't know whether the extents is compressed or not,
3549 * so we can only assume it not compressed nor data offset,
3550 * and use its disk_len as extent length.
3552 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3553 orphan->offset, orphan->disk_len, 0);
3554 btrfs_release_path(path);
3559 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3560 orphan->disk_bytenr, orphan->disk_len);
3561 ret = btrfs_free_extent(trans,
3562 root->fs_info->extent_root,
3563 orphan->disk_bytenr, orphan->disk_len,
3564 0, root->objectid, orphan->objectid,
3569 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3570 orphan->offset, orphan->disk_bytenr,
3571 orphan->disk_len, orphan->disk_len);
3575 /* Update file size info */
3576 rec->found_size += orphan->disk_len;
3577 if (rec->found_size == rec->nbytes)
3578 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3580 /* Update the file extent hole info too */
3581 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3585 if (RB_EMPTY_ROOT(&rec->holes))
3586 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3588 list_del(&orphan->list);
3591 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3596 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3597 struct btrfs_root *root,
3598 struct btrfs_path *path,
3599 struct inode_record *rec)
3601 struct rb_node *node;
3602 struct file_extent_hole *hole;
3606 node = rb_first(&rec->holes);
3610 hole = rb_entry(node, struct file_extent_hole, node);
3611 ret = btrfs_punch_hole(trans, root, rec->ino,
3612 hole->start, hole->len);
3615 ret = del_file_extent_hole(&rec->holes, hole->start,
3619 if (RB_EMPTY_ROOT(&rec->holes))
3620 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3621 node = rb_first(&rec->holes);
3623 /* special case for a file losing all its file extent */
3625 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3626 round_up(rec->isize,
3627 root->fs_info->sectorsize));
3631 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3632 rec->ino, root->objectid);
3637 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3639 struct btrfs_trans_handle *trans;
3640 struct btrfs_path path;
3643 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3644 I_ERR_NO_ORPHAN_ITEM |
3645 I_ERR_LINK_COUNT_WRONG |
3646 I_ERR_NO_INODE_ITEM |
3647 I_ERR_FILE_EXTENT_ORPHAN |
3648 I_ERR_FILE_EXTENT_DISCOUNT|
3649 I_ERR_FILE_NBYTES_WRONG)))
3653 * For nlink repair, it may create a dir and add link, so
3654 * 2 for parent(256)'s dir_index and dir_item
3655 * 2 for lost+found dir's inode_item and inode_ref
3656 * 1 for the new inode_ref of the file
3657 * 2 for lost+found dir's dir_index and dir_item for the file
3659 trans = btrfs_start_transaction(root, 7);
3661 return PTR_ERR(trans);
3663 btrfs_init_path(&path);
3664 if (rec->errors & I_ERR_NO_INODE_ITEM)
3665 ret = repair_inode_no_item(trans, root, &path, rec);
3666 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3667 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3668 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3669 ret = repair_inode_discount_extent(trans, root, &path, rec);
3670 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3671 ret = repair_inode_isize(trans, root, &path, rec);
3672 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3673 ret = repair_inode_orphan_item(trans, root, &path, rec);
3674 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3675 ret = repair_inode_nlinks(trans, root, &path, rec);
3676 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3677 ret = repair_inode_nbytes(trans, root, &path, rec);
3678 btrfs_commit_transaction(trans, root);
3679 btrfs_release_path(&path);
3683 static int check_inode_recs(struct btrfs_root *root,
3684 struct cache_tree *inode_cache)
3686 struct cache_extent *cache;
3687 struct ptr_node *node;
3688 struct inode_record *rec;
3689 struct inode_backref *backref;
3694 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3696 if (btrfs_root_refs(&root->root_item) == 0) {
3697 if (!cache_tree_empty(inode_cache))
3698 fprintf(stderr, "warning line %d\n", __LINE__);
3703 * We need to repair backrefs first because we could change some of the
3704 * errors in the inode recs.
3706 * We also need to go through and delete invalid backrefs first and then
3707 * add the correct ones second. We do this because we may get EEXIST
3708 * when adding back the correct index because we hadn't yet deleted the
3711 * For example, if we were missing a dir index then the directories
3712 * isize would be wrong, so if we fixed the isize to what we thought it
3713 * would be and then fixed the backref we'd still have a invalid fs, so
3714 * we need to add back the dir index and then check to see if the isize
3719 if (stage == 3 && !err)
3722 cache = search_cache_extent(inode_cache, 0);
3723 while (repair && cache) {
3724 node = container_of(cache, struct ptr_node, cache);
3726 cache = next_cache_extent(cache);
3728 /* Need to free everything up and rescan */
3730 remove_cache_extent(inode_cache, &node->cache);
3732 free_inode_rec(rec);
3736 if (list_empty(&rec->backrefs))
3739 ret = repair_inode_backrefs(root, rec, inode_cache,
3753 rec = get_inode_rec(inode_cache, root_dirid, 0);
3754 BUG_ON(IS_ERR(rec));
3756 ret = check_root_dir(rec);
3758 fprintf(stderr, "root %llu root dir %llu error\n",
3759 (unsigned long long)root->root_key.objectid,
3760 (unsigned long long)root_dirid);
3761 print_inode_error(root, rec);
3766 struct btrfs_trans_handle *trans;
3768 trans = btrfs_start_transaction(root, 1);
3769 if (IS_ERR(trans)) {
3770 err = PTR_ERR(trans);
3775 "root %llu missing its root dir, recreating\n",
3776 (unsigned long long)root->objectid);
3778 ret = btrfs_make_root_dir(trans, root, root_dirid);
3781 btrfs_commit_transaction(trans, root);
3785 fprintf(stderr, "root %llu root dir %llu not found\n",
3786 (unsigned long long)root->root_key.objectid,
3787 (unsigned long long)root_dirid);
3791 cache = search_cache_extent(inode_cache, 0);
3794 node = container_of(cache, struct ptr_node, cache);
3796 remove_cache_extent(inode_cache, &node->cache);
3798 if (rec->ino == root_dirid ||
3799 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3800 free_inode_rec(rec);
3804 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3805 ret = check_orphan_item(root, rec->ino);
3807 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3808 if (can_free_inode_rec(rec)) {
3809 free_inode_rec(rec);
3814 if (!rec->found_inode_item)
3815 rec->errors |= I_ERR_NO_INODE_ITEM;
3816 if (rec->found_link != rec->nlink)
3817 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3819 ret = try_repair_inode(root, rec);
3820 if (ret == 0 && can_free_inode_rec(rec)) {
3821 free_inode_rec(rec);
3827 if (!(repair && ret == 0))
3829 print_inode_error(root, rec);
3830 list_for_each_entry(backref, &rec->backrefs, list) {
3831 if (!backref->found_dir_item)
3832 backref->errors |= REF_ERR_NO_DIR_ITEM;
3833 if (!backref->found_dir_index)
3834 backref->errors |= REF_ERR_NO_DIR_INDEX;
3835 if (!backref->found_inode_ref)
3836 backref->errors |= REF_ERR_NO_INODE_REF;
3837 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3838 " namelen %u name %s filetype %d errors %x",
3839 (unsigned long long)backref->dir,
3840 (unsigned long long)backref->index,
3841 backref->namelen, backref->name,
3842 backref->filetype, backref->errors);
3843 print_ref_error(backref->errors);
3845 free_inode_rec(rec);
3847 return (error > 0) ? -1 : 0;
3850 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3853 struct cache_extent *cache;
3854 struct root_record *rec = NULL;
3857 cache = lookup_cache_extent(root_cache, objectid, 1);
3859 rec = container_of(cache, struct root_record, cache);
3861 rec = calloc(1, sizeof(*rec));
3863 return ERR_PTR(-ENOMEM);
3864 rec->objectid = objectid;
3865 INIT_LIST_HEAD(&rec->backrefs);
3866 rec->cache.start = objectid;
3867 rec->cache.size = 1;
3869 ret = insert_cache_extent(root_cache, &rec->cache);
3871 return ERR_PTR(-EEXIST);
3876 static struct root_backref *get_root_backref(struct root_record *rec,
3877 u64 ref_root, u64 dir, u64 index,
3878 const char *name, int namelen)
3880 struct root_backref *backref;
3882 list_for_each_entry(backref, &rec->backrefs, list) {
3883 if (backref->ref_root != ref_root || backref->dir != dir ||
3884 backref->namelen != namelen)
3886 if (memcmp(name, backref->name, namelen))
3891 backref = calloc(1, sizeof(*backref) + namelen + 1);
3894 backref->ref_root = ref_root;
3896 backref->index = index;
3897 backref->namelen = namelen;
3898 memcpy(backref->name, name, namelen);
3899 backref->name[namelen] = '\0';
3900 list_add_tail(&backref->list, &rec->backrefs);
3904 static void free_root_record(struct cache_extent *cache)
3906 struct root_record *rec;
3907 struct root_backref *backref;
3909 rec = container_of(cache, struct root_record, cache);
3910 while (!list_empty(&rec->backrefs)) {
3911 backref = to_root_backref(rec->backrefs.next);
3912 list_del(&backref->list);
3919 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3921 static int add_root_backref(struct cache_tree *root_cache,
3922 u64 root_id, u64 ref_root, u64 dir, u64 index,
3923 const char *name, int namelen,
3924 int item_type, int errors)
3926 struct root_record *rec;
3927 struct root_backref *backref;
3929 rec = get_root_rec(root_cache, root_id);
3930 BUG_ON(IS_ERR(rec));
3931 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3934 backref->errors |= errors;
3936 if (item_type != BTRFS_DIR_ITEM_KEY) {
3937 if (backref->found_dir_index || backref->found_back_ref ||
3938 backref->found_forward_ref) {
3939 if (backref->index != index)
3940 backref->errors |= REF_ERR_INDEX_UNMATCH;
3942 backref->index = index;
3946 if (item_type == BTRFS_DIR_ITEM_KEY) {
3947 if (backref->found_forward_ref)
3949 backref->found_dir_item = 1;
3950 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3951 backref->found_dir_index = 1;
3952 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3953 if (backref->found_forward_ref)
3954 backref->errors |= REF_ERR_DUP_ROOT_REF;
3955 else if (backref->found_dir_item)
3957 backref->found_forward_ref = 1;
3958 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3959 if (backref->found_back_ref)
3960 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3961 backref->found_back_ref = 1;
3966 if (backref->found_forward_ref && backref->found_dir_item)
3967 backref->reachable = 1;
3971 static int merge_root_recs(struct btrfs_root *root,
3972 struct cache_tree *src_cache,
3973 struct cache_tree *dst_cache)
3975 struct cache_extent *cache;
3976 struct ptr_node *node;
3977 struct inode_record *rec;
3978 struct inode_backref *backref;
3981 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3982 free_inode_recs_tree(src_cache);
3987 cache = search_cache_extent(src_cache, 0);
3990 node = container_of(cache, struct ptr_node, cache);
3992 remove_cache_extent(src_cache, &node->cache);
3995 ret = is_child_root(root, root->objectid, rec->ino);
4001 list_for_each_entry(backref, &rec->backrefs, list) {
4002 BUG_ON(backref->found_inode_ref);
4003 if (backref->found_dir_item)
4004 add_root_backref(dst_cache, rec->ino,
4005 root->root_key.objectid, backref->dir,
4006 backref->index, backref->name,
4007 backref->namelen, BTRFS_DIR_ITEM_KEY,
4009 if (backref->found_dir_index)
4010 add_root_backref(dst_cache, rec->ino,
4011 root->root_key.objectid, backref->dir,
4012 backref->index, backref->name,
4013 backref->namelen, BTRFS_DIR_INDEX_KEY,
4017 free_inode_rec(rec);
4024 static int check_root_refs(struct btrfs_root *root,
4025 struct cache_tree *root_cache)
4027 struct root_record *rec;
4028 struct root_record *ref_root;
4029 struct root_backref *backref;
4030 struct cache_extent *cache;
4036 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4037 BUG_ON(IS_ERR(rec));
4040 /* fixme: this can not detect circular references */
4043 cache = search_cache_extent(root_cache, 0);
4047 rec = container_of(cache, struct root_record, cache);
4048 cache = next_cache_extent(cache);
4050 if (rec->found_ref == 0)
4053 list_for_each_entry(backref, &rec->backrefs, list) {
4054 if (!backref->reachable)
4057 ref_root = get_root_rec(root_cache,
4059 BUG_ON(IS_ERR(ref_root));
4060 if (ref_root->found_ref > 0)
4063 backref->reachable = 0;
4065 if (rec->found_ref == 0)
4071 cache = search_cache_extent(root_cache, 0);
4075 rec = container_of(cache, struct root_record, cache);
4076 cache = next_cache_extent(cache);
4078 if (rec->found_ref == 0 &&
4079 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4080 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4081 ret = check_orphan_item(root->fs_info->tree_root,
4087 * If we don't have a root item then we likely just have
4088 * a dir item in a snapshot for this root but no actual
4089 * ref key or anything so it's meaningless.
4091 if (!rec->found_root_item)
4094 fprintf(stderr, "fs tree %llu not referenced\n",
4095 (unsigned long long)rec->objectid);
4099 if (rec->found_ref > 0 && !rec->found_root_item)
4101 list_for_each_entry(backref, &rec->backrefs, list) {
4102 if (!backref->found_dir_item)
4103 backref->errors |= REF_ERR_NO_DIR_ITEM;
4104 if (!backref->found_dir_index)
4105 backref->errors |= REF_ERR_NO_DIR_INDEX;
4106 if (!backref->found_back_ref)
4107 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4108 if (!backref->found_forward_ref)
4109 backref->errors |= REF_ERR_NO_ROOT_REF;
4110 if (backref->reachable && backref->errors)
4117 fprintf(stderr, "fs tree %llu refs %u %s\n",
4118 (unsigned long long)rec->objectid, rec->found_ref,
4119 rec->found_root_item ? "" : "not found");
4121 list_for_each_entry(backref, &rec->backrefs, list) {
4122 if (!backref->reachable)
4124 if (!backref->errors && rec->found_root_item)
4126 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4127 " index %llu namelen %u name %s errors %x\n",
4128 (unsigned long long)backref->ref_root,
4129 (unsigned long long)backref->dir,
4130 (unsigned long long)backref->index,
4131 backref->namelen, backref->name,
4133 print_ref_error(backref->errors);
4136 return errors > 0 ? 1 : 0;
4139 static int process_root_ref(struct extent_buffer *eb, int slot,
4140 struct btrfs_key *key,
4141 struct cache_tree *root_cache)
4147 struct btrfs_root_ref *ref;
4148 char namebuf[BTRFS_NAME_LEN];
4151 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4153 dirid = btrfs_root_ref_dirid(eb, ref);
4154 index = btrfs_root_ref_sequence(eb, ref);
4155 name_len = btrfs_root_ref_name_len(eb, ref);
4157 if (name_len <= BTRFS_NAME_LEN) {
4161 len = BTRFS_NAME_LEN;
4162 error = REF_ERR_NAME_TOO_LONG;
4164 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4166 if (key->type == BTRFS_ROOT_REF_KEY) {
4167 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4168 index, namebuf, len, key->type, error);
4170 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4171 index, namebuf, len, key->type, error);
4176 static void free_corrupt_block(struct cache_extent *cache)
4178 struct btrfs_corrupt_block *corrupt;
4180 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4184 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4187 * Repair the btree of the given root.
4189 * The fix is to remove the node key in corrupt_blocks cache_tree.
4190 * and rebalance the tree.
4191 * After the fix, the btree should be writeable.
4193 static int repair_btree(struct btrfs_root *root,
4194 struct cache_tree *corrupt_blocks)
4196 struct btrfs_trans_handle *trans;
4197 struct btrfs_path path;
4198 struct btrfs_corrupt_block *corrupt;
4199 struct cache_extent *cache;
4200 struct btrfs_key key;
4205 if (cache_tree_empty(corrupt_blocks))
4208 trans = btrfs_start_transaction(root, 1);
4209 if (IS_ERR(trans)) {
4210 ret = PTR_ERR(trans);
4211 fprintf(stderr, "Error starting transaction: %s\n",
4215 btrfs_init_path(&path);
4216 cache = first_cache_extent(corrupt_blocks);
4218 corrupt = container_of(cache, struct btrfs_corrupt_block,
4220 level = corrupt->level;
4221 path.lowest_level = level;
4222 key.objectid = corrupt->key.objectid;
4223 key.type = corrupt->key.type;
4224 key.offset = corrupt->key.offset;
4227 * Here we don't want to do any tree balance, since it may
4228 * cause a balance with corrupted brother leaf/node,
4229 * so ins_len set to 0 here.
4230 * Balance will be done after all corrupt node/leaf is deleted.
4232 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4235 offset = btrfs_node_blockptr(path.nodes[level],
4238 /* Remove the ptr */
4239 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4243 * Remove the corresponding extent
4244 * return value is not concerned.
4246 btrfs_release_path(&path);
4247 ret = btrfs_free_extent(trans, root, offset,
4248 root->fs_info->nodesize, 0,
4249 root->root_key.objectid, level - 1, 0);
4250 cache = next_cache_extent(cache);
4253 /* Balance the btree using btrfs_search_slot() */
4254 cache = first_cache_extent(corrupt_blocks);
4256 corrupt = container_of(cache, struct btrfs_corrupt_block,
4258 memcpy(&key, &corrupt->key, sizeof(key));
4259 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4262 /* return will always >0 since it won't find the item */
4264 btrfs_release_path(&path);
4265 cache = next_cache_extent(cache);
4268 btrfs_commit_transaction(trans, root);
4269 btrfs_release_path(&path);
4273 static int check_fs_root(struct btrfs_root *root,
4274 struct cache_tree *root_cache,
4275 struct walk_control *wc)
4281 struct btrfs_path path;
4282 struct shared_node root_node;
4283 struct root_record *rec;
4284 struct btrfs_root_item *root_item = &root->root_item;
4285 struct cache_tree corrupt_blocks;
4286 struct orphan_data_extent *orphan;
4287 struct orphan_data_extent *tmp;
4288 enum btrfs_tree_block_status status;
4289 struct node_refs nrefs;
4292 * Reuse the corrupt_block cache tree to record corrupted tree block
4294 * Unlike the usage in extent tree check, here we do it in a per
4295 * fs/subvol tree base.
4297 cache_tree_init(&corrupt_blocks);
4298 root->fs_info->corrupt_blocks = &corrupt_blocks;
4300 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4301 rec = get_root_rec(root_cache, root->root_key.objectid);
4302 BUG_ON(IS_ERR(rec));
4303 if (btrfs_root_refs(root_item) > 0)
4304 rec->found_root_item = 1;
4307 btrfs_init_path(&path);
4308 memset(&root_node, 0, sizeof(root_node));
4309 cache_tree_init(&root_node.root_cache);
4310 cache_tree_init(&root_node.inode_cache);
4311 memset(&nrefs, 0, sizeof(nrefs));
4313 /* Move the orphan extent record to corresponding inode_record */
4314 list_for_each_entry_safe(orphan, tmp,
4315 &root->orphan_data_extents, list) {
4316 struct inode_record *inode;
4318 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4320 BUG_ON(IS_ERR(inode));
4321 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4322 list_move(&orphan->list, &inode->orphan_extents);
4325 level = btrfs_header_level(root->node);
4326 memset(wc->nodes, 0, sizeof(wc->nodes));
4327 wc->nodes[level] = &root_node;
4328 wc->active_node = level;
4329 wc->root_level = level;
4331 /* We may not have checked the root block, lets do that now */
4332 if (btrfs_is_leaf(root->node))
4333 status = btrfs_check_leaf(root, NULL, root->node);
4335 status = btrfs_check_node(root, NULL, root->node);
4336 if (status != BTRFS_TREE_BLOCK_CLEAN)
4339 if (btrfs_root_refs(root_item) > 0 ||
4340 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4341 path.nodes[level] = root->node;
4342 extent_buffer_get(root->node);
4343 path.slots[level] = 0;
4345 struct btrfs_key key;
4346 struct btrfs_disk_key found_key;
4348 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4349 level = root_item->drop_level;
4350 path.lowest_level = level;
4351 if (level > btrfs_header_level(root->node) ||
4352 level >= BTRFS_MAX_LEVEL) {
4353 error("ignoring invalid drop level: %u", level);
4356 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4359 btrfs_node_key(path.nodes[level], &found_key,
4361 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4362 sizeof(found_key)));
4366 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4372 wret = walk_up_tree(root, &path, wc, &level);
4379 btrfs_release_path(&path);
4381 if (!cache_tree_empty(&corrupt_blocks)) {
4382 struct cache_extent *cache;
4383 struct btrfs_corrupt_block *corrupt;
4385 printf("The following tree block(s) is corrupted in tree %llu:\n",
4386 root->root_key.objectid);
4387 cache = first_cache_extent(&corrupt_blocks);
4389 corrupt = container_of(cache,
4390 struct btrfs_corrupt_block,
4392 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4393 cache->start, corrupt->level,
4394 corrupt->key.objectid, corrupt->key.type,
4395 corrupt->key.offset);
4396 cache = next_cache_extent(cache);
4399 printf("Try to repair the btree for root %llu\n",
4400 root->root_key.objectid);
4401 ret = repair_btree(root, &corrupt_blocks);
4403 fprintf(stderr, "Failed to repair btree: %s\n",
4406 printf("Btree for root %llu is fixed\n",
4407 root->root_key.objectid);
4411 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4415 if (root_node.current) {
4416 root_node.current->checked = 1;
4417 maybe_free_inode_rec(&root_node.inode_cache,
4421 err = check_inode_recs(root, &root_node.inode_cache);
4425 free_corrupt_blocks_tree(&corrupt_blocks);
4426 root->fs_info->corrupt_blocks = NULL;
4427 free_orphan_data_extents(&root->orphan_data_extents);
4431 static int fs_root_objectid(u64 objectid)
4433 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4434 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4436 return is_fstree(objectid);
4439 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4440 struct cache_tree *root_cache)
4442 struct btrfs_path path;
4443 struct btrfs_key key;
4444 struct walk_control wc;
4445 struct extent_buffer *leaf, *tree_node;
4446 struct btrfs_root *tmp_root;
4447 struct btrfs_root *tree_root = fs_info->tree_root;
4451 if (ctx.progress_enabled) {
4452 ctx.tp = TASK_FS_ROOTS;
4453 task_start(ctx.info);
4457 * Just in case we made any changes to the extent tree that weren't
4458 * reflected into the free space cache yet.
4461 reset_cached_block_groups(fs_info);
4462 memset(&wc, 0, sizeof(wc));
4463 cache_tree_init(&wc.shared);
4464 btrfs_init_path(&path);
4469 key.type = BTRFS_ROOT_ITEM_KEY;
4470 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4475 tree_node = tree_root->node;
4477 if (tree_node != tree_root->node) {
4478 free_root_recs_tree(root_cache);
4479 btrfs_release_path(&path);
4482 leaf = path.nodes[0];
4483 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4484 ret = btrfs_next_leaf(tree_root, &path);
4490 leaf = path.nodes[0];
4492 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4493 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4494 fs_root_objectid(key.objectid)) {
4495 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4496 tmp_root = btrfs_read_fs_root_no_cache(
4499 key.offset = (u64)-1;
4500 tmp_root = btrfs_read_fs_root(
4503 if (IS_ERR(tmp_root)) {
4507 ret = check_fs_root(tmp_root, root_cache, &wc);
4508 if (ret == -EAGAIN) {
4509 free_root_recs_tree(root_cache);
4510 btrfs_release_path(&path);
4515 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4516 btrfs_free_fs_root(tmp_root);
4517 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4518 key.type == BTRFS_ROOT_BACKREF_KEY) {
4519 process_root_ref(leaf, path.slots[0], &key,
4526 btrfs_release_path(&path);
4528 free_extent_cache_tree(&wc.shared);
4529 if (!cache_tree_empty(&wc.shared))
4530 fprintf(stderr, "warning line %d\n", __LINE__);
4532 task_stop(ctx.info);
4538 * Find the @index according by @ino and name.
4539 * Notice:time efficiency is O(N)
4541 * @root: the root of the fs/file tree
4542 * @index_ret: the index as return value
4543 * @namebuf: the name to match
4544 * @name_len: the length of name to match
4545 * @file_type: the file_type of INODE_ITEM to match
4547 * Returns 0 if found and *@index_ret will be modified with right value
4548 * Returns< 0 not found and *@index_ret will be (u64)-1
4550 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4551 u64 *index_ret, char *namebuf, u32 name_len,
4554 struct btrfs_path path;
4555 struct extent_buffer *node;
4556 struct btrfs_dir_item *di;
4557 struct btrfs_key key;
4558 struct btrfs_key location;
4559 char name[BTRFS_NAME_LEN] = {0};
4571 /* search from the last index */
4572 key.objectid = dirid;
4573 key.offset = (u64)-1;
4574 key.type = BTRFS_DIR_INDEX_KEY;
4576 btrfs_init_path(&path);
4577 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4582 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4585 *index_ret = (64)-1;
4588 /* Check whether inode_id/filetype/name match */
4589 node = path.nodes[0];
4590 slot = path.slots[0];
4591 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4592 total = btrfs_item_size_nr(node, slot);
4593 while (cur < total) {
4595 len = btrfs_dir_name_len(node, di);
4596 data_len = btrfs_dir_data_len(node, di);
4598 btrfs_dir_item_key_to_cpu(node, di, &location);
4599 if (location.objectid != location_id ||
4600 location.type != BTRFS_INODE_ITEM_KEY ||
4601 location.offset != 0)
4604 filetype = btrfs_dir_type(node, di);
4605 if (file_type != filetype)
4608 if (len > BTRFS_NAME_LEN)
4609 len = BTRFS_NAME_LEN;
4611 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4612 if (len != name_len || strncmp(namebuf, name, len))
4615 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4616 *index_ret = key.offset;
4620 len += sizeof(*di) + data_len;
4621 di = (struct btrfs_dir_item *)((char *)di + len);
4627 btrfs_release_path(&path);
4632 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4633 * INODE_REF/INODE_EXTREF match.
4635 * @root: the root of the fs/file tree
4636 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4637 * value while find index
4638 * @location_key: location key of the struct btrfs_dir_item to match
4639 * @name: the name to match
4640 * @namelen: the length of name
4641 * @file_type: the type of file to math
4643 * Return 0 if no error occurred.
4644 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4645 * DIR_ITEM/DIR_INDEX
4646 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4647 * and DIR_ITEM/DIR_INDEX mismatch
4649 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4650 struct btrfs_key *location_key, char *name,
4651 u32 namelen, u8 file_type)
4653 struct btrfs_path path;
4654 struct extent_buffer *node;
4655 struct btrfs_dir_item *di;
4656 struct btrfs_key location;
4657 char namebuf[BTRFS_NAME_LEN] = {0};
4666 /* get the index by traversing all index */
4667 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4668 ret = find_dir_index(root, key->objectid,
4669 location_key->objectid, &key->offset,
4670 name, namelen, file_type);
4672 ret = DIR_INDEX_MISSING;
4676 btrfs_init_path(&path);
4677 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4679 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4684 /* Check whether inode_id/filetype/name match */
4685 node = path.nodes[0];
4686 slot = path.slots[0];
4687 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4688 total = btrfs_item_size_nr(node, slot);
4689 while (cur < total) {
4690 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4691 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4693 len = btrfs_dir_name_len(node, di);
4694 data_len = btrfs_dir_data_len(node, di);
4696 btrfs_dir_item_key_to_cpu(node, di, &location);
4697 if (location.objectid != location_key->objectid ||
4698 location.type != location_key->type ||
4699 location.offset != location_key->offset)
4702 filetype = btrfs_dir_type(node, di);
4703 if (file_type != filetype)
4706 if (len > BTRFS_NAME_LEN) {
4707 len = BTRFS_NAME_LEN;
4708 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4710 key->type == BTRFS_DIR_ITEM_KEY ?
4711 "DIR_ITEM" : "DIR_INDEX",
4712 key->objectid, key->offset, len);
4714 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4716 if (len != namelen || strncmp(namebuf, name, len))
4722 len += sizeof(*di) + data_len;
4723 di = (struct btrfs_dir_item *)((char *)di + len);
4728 btrfs_release_path(&path);
4733 * Prints inode ref error message
4735 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4736 u64 index, const char *namebuf, int name_len,
4737 u8 filetype, int err)
4742 /* root dir error */
4743 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4745 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4746 root->objectid, key->objectid, key->offset, namebuf);
4751 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4752 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4753 root->objectid, key->offset,
4754 btrfs_name_hash(namebuf, name_len),
4755 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4757 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4758 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4759 root->objectid, key->offset, index,
4760 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4765 * Insert the missing inode item.
4767 * Returns 0 means success.
4768 * Returns <0 means error.
4770 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4773 struct btrfs_key key;
4774 struct btrfs_trans_handle *trans;
4775 struct btrfs_path path;
4779 key.type = BTRFS_INODE_ITEM_KEY;
4782 btrfs_init_path(&path);
4783 trans = btrfs_start_transaction(root, 1);
4784 if (IS_ERR(trans)) {
4789 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4790 if (ret < 0 || !ret)
4793 /* insert inode item */
4794 create_inode_item_lowmem(trans, root, ino, filetype);
4797 btrfs_commit_transaction(trans, root);
4800 error("failed to repair root %llu INODE ITEM[%llu] missing",
4801 root->objectid, ino);
4802 btrfs_release_path(&path);
4807 * The ternary means dir item, dir index and relative inode ref.
4808 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4809 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4811 * If two of three is missing or mismatched, delete the existing one.
4812 * If one of three is missing or mismatched, add the missing one.
4814 * returns 0 means success.
4815 * returns not 0 means on error;
4817 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4818 u64 index, char *name, int name_len, u8 filetype,
4821 struct btrfs_trans_handle *trans;
4826 * stage shall be one of following valild values:
4827 * 0: Fine, nothing to do.
4828 * 1: One of three is wrong, so add missing one.
4829 * 2: Two of three is wrong, so delete existed one.
4831 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4833 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4835 if (err & (INODE_REF_MISSING))
4838 /* stage must be smllarer than 3 */
4841 trans = btrfs_start_transaction(root, 1);
4843 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4848 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4849 filetype, &index, 1, 1);
4853 btrfs_commit_transaction(trans, root);
4856 error("fail to repair inode %llu name %s filetype %u",
4857 ino, name, filetype);
4859 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4860 stage == 2 ? "Delete" : "Add",
4861 ino, name, filetype);
4867 * Traverse the given INODE_REF and call find_dir_item() to find related
4868 * DIR_ITEM/DIR_INDEX.
4870 * @root: the root of the fs/file tree
4871 * @ref_key: the key of the INODE_REF
4872 * @path the path provides node and slot
4873 * @refs: the count of INODE_REF
4874 * @mode: the st_mode of INODE_ITEM
4875 * @name_ret: returns with the first ref's name
4876 * @name_len_ret: len of the name_ret
4878 * Return 0 if no error occurred.
4880 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4881 struct btrfs_path *path, char *name_ret,
4882 u32 *namelen_ret, u64 *refs_ret, int mode)
4884 struct btrfs_key key;
4885 struct btrfs_key location;
4886 struct btrfs_inode_ref *ref;
4887 struct extent_buffer *node;
4888 char namebuf[BTRFS_NAME_LEN] = {0};
4898 int need_research = 0;
4906 /* since after repair, path and the dir item may be changed */
4907 if (need_research) {
4909 btrfs_release_path(path);
4910 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4911 /* the item was deleted, let path point to the last checked item */
4913 if (path->slots[0] == 0)
4914 btrfs_prev_leaf(root, path);
4922 location.objectid = ref_key->objectid;
4923 location.type = BTRFS_INODE_ITEM_KEY;
4924 location.offset = 0;
4925 node = path->nodes[0];
4926 slot = path->slots[0];
4928 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4929 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4930 total = btrfs_item_size_nr(node, slot);
4933 /* Update inode ref count */
4936 index = btrfs_inode_ref_index(node, ref);
4937 name_len = btrfs_inode_ref_name_len(node, ref);
4939 if (name_len <= BTRFS_NAME_LEN) {
4942 len = BTRFS_NAME_LEN;
4943 warning("root %llu INODE_REF[%llu %llu] name too long",
4944 root->objectid, ref_key->objectid, ref_key->offset);
4947 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4949 /* copy the first name found to name_ret */
4950 if (refs == 1 && name_ret) {
4951 memcpy(name_ret, namebuf, len);
4955 /* Check root dir ref */
4956 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4957 if (index != 0 || len != strlen("..") ||
4958 strncmp("..", namebuf, len) ||
4959 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4960 /* set err bits then repair will delete the ref */
4961 err |= DIR_INDEX_MISSING;
4962 err |= DIR_ITEM_MISSING;
4967 /* Find related DIR_INDEX */
4968 key.objectid = ref_key->offset;
4969 key.type = BTRFS_DIR_INDEX_KEY;
4971 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4972 imode_to_type(mode));
4974 /* Find related dir_item */
4975 key.objectid = ref_key->offset;
4976 key.type = BTRFS_DIR_ITEM_KEY;
4977 key.offset = btrfs_name_hash(namebuf, len);
4978 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4979 imode_to_type(mode));
4981 if (tmp_err && repair) {
4982 ret = repair_ternary_lowmem(root, ref_key->offset,
4983 ref_key->objectid, index, namebuf,
4984 name_len, imode_to_type(mode),
4991 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4992 imode_to_type(mode), tmp_err);
4994 len = sizeof(*ref) + name_len;
4995 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5006 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5007 * DIR_ITEM/DIR_INDEX.
5009 * @root: the root of the fs/file tree
5010 * @ref_key: the key of the INODE_EXTREF
5011 * @refs: the count of INODE_EXTREF
5012 * @mode: the st_mode of INODE_ITEM
5014 * Return 0 if no error occurred.
5016 static int check_inode_extref(struct btrfs_root *root,
5017 struct btrfs_key *ref_key,
5018 struct extent_buffer *node, int slot, u64 *refs,
5021 struct btrfs_key key;
5022 struct btrfs_key location;
5023 struct btrfs_inode_extref *extref;
5024 char namebuf[BTRFS_NAME_LEN] = {0};
5034 location.objectid = ref_key->objectid;
5035 location.type = BTRFS_INODE_ITEM_KEY;
5036 location.offset = 0;
5038 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5039 total = btrfs_item_size_nr(node, slot);
5042 /* update inode ref count */
5044 name_len = btrfs_inode_extref_name_len(node, extref);
5045 index = btrfs_inode_extref_index(node, extref);
5046 parent = btrfs_inode_extref_parent(node, extref);
5047 if (name_len <= BTRFS_NAME_LEN) {
5050 len = BTRFS_NAME_LEN;
5051 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5052 root->objectid, ref_key->objectid, ref_key->offset);
5054 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5056 /* Check root dir ref name */
5057 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5058 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5059 root->objectid, ref_key->objectid, ref_key->offset,
5061 err |= ROOT_DIR_ERROR;
5064 /* find related dir_index */
5065 key.objectid = parent;
5066 key.type = BTRFS_DIR_INDEX_KEY;
5068 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5071 /* find related dir_item */
5072 key.objectid = parent;
5073 key.type = BTRFS_DIR_ITEM_KEY;
5074 key.offset = btrfs_name_hash(namebuf, len);
5075 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5078 len = sizeof(*extref) + name_len;
5079 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5089 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5090 * DIR_ITEM/DIR_INDEX match.
5091 * Return with @index_ret.
5093 * @root: the root of the fs/file tree
5094 * @key: the key of the INODE_REF/INODE_EXTREF
5095 * @name: the name in the INODE_REF/INODE_EXTREF
5096 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5097 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5098 * value (64)-1 means do not check index
5099 * @ext_ref: the EXTENDED_IREF feature
5101 * Return 0 if no error occurred.
5102 * Return >0 for error bitmap
5104 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5105 char *name, int namelen, u64 *index_ret,
5106 unsigned int ext_ref)
5108 struct btrfs_path path;
5109 struct btrfs_inode_ref *ref;
5110 struct btrfs_inode_extref *extref;
5111 struct extent_buffer *node;
5112 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5125 btrfs_init_path(&path);
5126 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5128 ret = INODE_REF_MISSING;
5132 node = path.nodes[0];
5133 slot = path.slots[0];
5135 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5136 total = btrfs_item_size_nr(node, slot);
5138 /* Iterate all entry of INODE_REF */
5139 while (cur < total) {
5140 ret = INODE_REF_MISSING;
5142 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5143 ref_index = btrfs_inode_ref_index(node, ref);
5144 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5147 if (cur + sizeof(*ref) + ref_namelen > total ||
5148 ref_namelen > BTRFS_NAME_LEN) {
5149 warning("root %llu INODE %s[%llu %llu] name too long",
5151 key->type == BTRFS_INODE_REF_KEY ?
5153 key->objectid, key->offset);
5155 if (cur + sizeof(*ref) > total)
5157 len = min_t(u32, total - cur - sizeof(*ref),
5163 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5166 if (len != namelen || strncmp(ref_namebuf, name, len))
5169 *index_ret = ref_index;
5173 len = sizeof(*ref) + ref_namelen;
5174 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5179 /* Skip if not support EXTENDED_IREF feature */
5183 btrfs_release_path(&path);
5184 btrfs_init_path(&path);
5186 dir_id = key->offset;
5187 key->type = BTRFS_INODE_EXTREF_KEY;
5188 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5190 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5192 ret = INODE_REF_MISSING;
5196 node = path.nodes[0];
5197 slot = path.slots[0];
5199 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5201 total = btrfs_item_size_nr(node, slot);
5203 /* Iterate all entry of INODE_EXTREF */
5204 while (cur < total) {
5205 ret = INODE_REF_MISSING;
5207 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5208 ref_index = btrfs_inode_extref_index(node, extref);
5209 parent = btrfs_inode_extref_parent(node, extref);
5210 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5213 if (parent != dir_id)
5216 if (ref_namelen <= BTRFS_NAME_LEN) {
5219 len = BTRFS_NAME_LEN;
5220 warning("root %llu INODE %s[%llu %llu] name too long",
5222 key->type == BTRFS_INODE_REF_KEY ?
5224 key->objectid, key->offset);
5226 read_extent_buffer(node, ref_namebuf,
5227 (unsigned long)(extref + 1), len);
5229 if (len != namelen || strncmp(ref_namebuf, name, len))
5232 *index_ret = ref_index;
5237 len = sizeof(*extref) + ref_namelen;
5238 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5243 btrfs_release_path(&path);
5247 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5248 u64 ino, u64 index, const char *namebuf,
5249 int name_len, u8 filetype, int err)
5251 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5252 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5253 root->objectid, key->objectid, key->offset, namebuf,
5255 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5258 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5259 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5260 root->objectid, key->objectid, index, namebuf, filetype,
5261 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5264 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5266 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5267 root->objectid, ino, index, namebuf, filetype,
5268 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5271 if (err & INODE_REF_MISSING)
5273 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5274 root->objectid, ino, key->objectid, namebuf, filetype);
5279 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5281 * Returns error after repair
5283 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5284 u64 index, u8 filetype, char *namebuf, u32 name_len,
5289 if (err & INODE_ITEM_MISSING) {
5290 ret = repair_inode_item_missing(root, ino, filetype);
5292 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5295 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5296 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5297 name_len, filetype, err);
5299 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5300 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5301 err &= ~(INODE_REF_MISSING);
5307 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5310 struct btrfs_key key;
5311 struct btrfs_path path;
5313 struct btrfs_dir_item *di;
5323 key.offset = (u64)-1;
5325 btrfs_init_path(&path);
5326 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5331 /* if found, go to spacial case */
5336 ret = btrfs_previous_item(root, &path, ino, type);
5344 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5346 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5348 while (cur < total) {
5349 len = btrfs_dir_name_len(path.nodes[0], di);
5350 if (len > BTRFS_NAME_LEN)
5351 len = BTRFS_NAME_LEN;
5354 len += btrfs_dir_data_len(path.nodes[0], di);
5356 di = (struct btrfs_dir_item *)((char *)di + len);
5362 btrfs_release_path(&path);
5366 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5373 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5377 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5381 *size = item_size + index_size;
5385 error("failed to count root %llu INODE[%llu] root size",
5386 root->objectid, ino);
5391 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5392 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5394 * @root: the root of the fs/file tree
5395 * @key: the key of the INODE_REF/INODE_EXTREF
5397 * @size: the st_size of the INODE_ITEM
5398 * @ext_ref: the EXTENDED_IREF feature
5400 * Return 0 if no error occurred.
5401 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5403 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5404 struct btrfs_path *path, u64 *size,
5405 unsigned int ext_ref)
5407 struct btrfs_dir_item *di;
5408 struct btrfs_inode_item *ii;
5409 struct btrfs_key key;
5410 struct btrfs_key location;
5411 struct extent_buffer *node;
5413 char namebuf[BTRFS_NAME_LEN] = {0};
5425 int need_research = 0;
5428 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5429 * ignore index check.
5431 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5432 index = di_key->offset;
5439 /* since after repair, path and the dir item may be changed */
5440 if (need_research) {
5442 err |= DIR_COUNT_AGAIN;
5443 btrfs_release_path(path);
5444 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5445 /* the item was deleted, let path point the last checked item */
5447 if (path->slots[0] == 0)
5448 btrfs_prev_leaf(root, path);
5456 node = path->nodes[0];
5457 slot = path->slots[0];
5459 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5460 total = btrfs_item_size_nr(node, slot);
5461 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5463 while (cur < total) {
5464 data_len = btrfs_dir_data_len(node, di);
5467 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5469 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5470 di_key->objectid, di_key->offset, data_len);
5472 name_len = btrfs_dir_name_len(node, di);
5473 if (name_len <= BTRFS_NAME_LEN) {
5476 len = BTRFS_NAME_LEN;
5477 warning("root %llu %s[%llu %llu] name too long",
5479 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5480 di_key->objectid, di_key->offset);
5482 (*size) += name_len;
5483 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5485 filetype = btrfs_dir_type(node, di);
5487 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5488 di_key->offset != btrfs_name_hash(namebuf, len)) {
5490 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5491 root->objectid, di_key->objectid, di_key->offset,
5492 namebuf, len, filetype, di_key->offset,
5493 btrfs_name_hash(namebuf, len));
5496 btrfs_dir_item_key_to_cpu(node, di, &location);
5497 /* Ignore related ROOT_ITEM check */
5498 if (location.type == BTRFS_ROOT_ITEM_KEY)
5501 btrfs_release_path(path);
5502 /* Check relative INODE_ITEM(existence/filetype) */
5503 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5505 tmp_err |= INODE_ITEM_MISSING;
5509 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5510 struct btrfs_inode_item);
5511 mode = btrfs_inode_mode(path->nodes[0], ii);
5512 if (imode_to_type(mode) != filetype) {
5513 tmp_err |= INODE_ITEM_MISMATCH;
5517 /* Check relative INODE_REF/INODE_EXTREF */
5518 key.objectid = location.objectid;
5519 key.type = BTRFS_INODE_REF_KEY;
5520 key.offset = di_key->objectid;
5521 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5524 /* check relative INDEX/ITEM */
5525 key.objectid = di_key->objectid;
5526 if (key.type == BTRFS_DIR_ITEM_KEY) {
5527 key.type = BTRFS_DIR_INDEX_KEY;
5530 key.type = BTRFS_DIR_ITEM_KEY;
5531 key.offset = btrfs_name_hash(namebuf, name_len);
5534 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5535 name_len, filetype);
5536 /* find_dir_item may find index */
5537 if (key.type == BTRFS_DIR_INDEX_KEY)
5541 if (tmp_err && repair) {
5542 ret = repair_dir_item(root, di_key->objectid,
5543 location.objectid, index,
5544 imode_to_type(mode), namebuf,
5546 if (ret != tmp_err) {
5551 btrfs_release_path(path);
5552 print_dir_item_err(root, di_key, location.objectid, index,
5553 namebuf, name_len, filetype, tmp_err);
5555 len = sizeof(*di) + name_len + data_len;
5556 di = (struct btrfs_dir_item *)((char *)di + len);
5559 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5560 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5561 root->objectid, di_key->objectid,
5568 btrfs_release_path(path);
5569 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5571 err |= ret > 0 ? -ENOENT : ret;
5576 * Wrapper function of btrfs_punch_hole.
5578 * Returns 0 means success.
5579 * Returns not 0 means error.
5581 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5584 struct btrfs_trans_handle *trans;
5587 trans = btrfs_start_transaction(root, 1);
5589 return PTR_ERR(trans);
5591 ret = btrfs_punch_hole(trans, root, ino, start, len);
5593 error("failed to add hole [%llu, %llu] in inode [%llu]",
5596 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5599 btrfs_commit_transaction(trans, root);
5604 * Check file extent datasum/hole, update the size of the file extents,
5605 * check and update the last offset of the file extent.
5607 * @root: the root of fs/file tree.
5608 * @fkey: the key of the file extent.
5609 * @nodatasum: INODE_NODATASUM feature.
5610 * @size: the sum of all EXTENT_DATA items size for this inode.
5611 * @end: the offset of the last extent.
5613 * Return 0 if no error occurred.
5615 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5616 struct extent_buffer *node, int slot,
5617 unsigned int nodatasum, u64 *size, u64 *end)
5619 struct btrfs_file_extent_item *fi;
5622 u64 extent_num_bytes;
5624 u64 csum_found; /* In byte size, sectorsize aligned */
5625 u64 search_start; /* Logical range start we search for csum */
5626 u64 search_len; /* Logical range len we search for csum */
5627 unsigned int extent_type;
5628 unsigned int is_hole;
5633 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5635 /* Check inline extent */
5636 extent_type = btrfs_file_extent_type(node, fi);
5637 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5638 struct btrfs_item *e = btrfs_item_nr(slot);
5639 u32 item_inline_len;
5641 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5642 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5643 compressed = btrfs_file_extent_compression(node, fi);
5644 if (extent_num_bytes == 0) {
5646 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5647 root->objectid, fkey->objectid, fkey->offset);
5648 err |= FILE_EXTENT_ERROR;
5650 if (!compressed && extent_num_bytes != item_inline_len) {
5652 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5653 root->objectid, fkey->objectid, fkey->offset,
5654 extent_num_bytes, item_inline_len);
5655 err |= FILE_EXTENT_ERROR;
5657 *end += extent_num_bytes;
5658 *size += extent_num_bytes;
5662 /* Check extent type */
5663 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5664 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5665 err |= FILE_EXTENT_ERROR;
5666 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5667 root->objectid, fkey->objectid, fkey->offset);
5671 /* Check REG_EXTENT/PREALLOC_EXTENT */
5672 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5673 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5674 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5675 extent_offset = btrfs_file_extent_offset(node, fi);
5676 compressed = btrfs_file_extent_compression(node, fi);
5677 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5680 * Check EXTENT_DATA csum
5682 * For plain (uncompressed) extent, we should only check the range
5683 * we're referring to, as it's possible that part of prealloc extent
5684 * has been written, and has csum:
5686 * |<--- Original large preallocated extent A ---->|
5687 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5690 * For compressed extent, we should check the whole range.
5693 search_start = disk_bytenr + extent_offset;
5694 search_len = extent_num_bytes;
5696 search_start = disk_bytenr;
5697 search_len = disk_num_bytes;
5699 ret = count_csum_range(root, search_start, search_len, &csum_found);
5700 if (csum_found > 0 && nodatasum) {
5701 err |= ODD_CSUM_ITEM;
5702 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5703 root->objectid, fkey->objectid, fkey->offset);
5704 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5705 !is_hole && (ret < 0 || csum_found < search_len)) {
5706 err |= CSUM_ITEM_MISSING;
5707 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5708 root->objectid, fkey->objectid, fkey->offset,
5709 csum_found, search_len);
5710 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5711 err |= ODD_CSUM_ITEM;
5712 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5713 root->objectid, fkey->objectid, fkey->offset, csum_found);
5716 /* Check EXTENT_DATA hole */
5717 if (!no_holes && *end != fkey->offset) {
5719 ret = punch_extent_hole(root, fkey->objectid,
5720 *end, fkey->offset - *end);
5721 if (!repair || ret) {
5722 err |= FILE_EXTENT_ERROR;
5723 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5724 root->objectid, fkey->objectid, fkey->offset);
5728 *end += extent_num_bytes;
5730 *size += extent_num_bytes;
5736 * Set inode item nbytes to @nbytes
5738 * Returns 0 on success
5739 * Returns != 0 on error
5741 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5742 struct btrfs_path *path,
5743 u64 ino, u64 nbytes)
5745 struct btrfs_trans_handle *trans;
5746 struct btrfs_inode_item *ii;
5747 struct btrfs_key key;
5748 struct btrfs_key research_key;
5752 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5755 key.type = BTRFS_INODE_ITEM_KEY;
5758 trans = btrfs_start_transaction(root, 1);
5759 if (IS_ERR(trans)) {
5760 ret = PTR_ERR(trans);
5765 btrfs_release_path(path);
5766 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5774 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5775 struct btrfs_inode_item);
5776 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5777 btrfs_mark_buffer_dirty(path->nodes[0]);
5779 btrfs_commit_transaction(trans, root);
5782 error("failed to set nbytes in inode %llu root %llu",
5783 ino, root->root_key.objectid);
5785 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5786 root->root_key.objectid, nbytes);
5789 btrfs_release_path(path);
5790 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5797 * Set directory inode isize to @isize.
5799 * Returns 0 on success.
5800 * Returns != 0 on error.
5802 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5803 struct btrfs_path *path,
5806 struct btrfs_trans_handle *trans;
5807 struct btrfs_inode_item *ii;
5808 struct btrfs_key key;
5809 struct btrfs_key research_key;
5813 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5816 key.type = BTRFS_INODE_ITEM_KEY;
5819 trans = btrfs_start_transaction(root, 1);
5820 if (IS_ERR(trans)) {
5821 ret = PTR_ERR(trans);
5826 btrfs_release_path(path);
5827 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5835 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5836 struct btrfs_inode_item);
5837 btrfs_set_inode_size(path->nodes[0], ii, isize);
5838 btrfs_mark_buffer_dirty(path->nodes[0]);
5840 btrfs_commit_transaction(trans, root);
5843 error("failed to set isize in inode %llu root %llu",
5844 ino, root->root_key.objectid);
5846 printf("Set isize in inode %llu root %llu to %llu\n",
5847 ino, root->root_key.objectid, isize);
5849 btrfs_release_path(path);
5850 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5857 * Wrapper function for btrfs_add_orphan_item().
5859 * Returns 0 on success.
5860 * Returns != 0 on error.
5862 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5863 struct btrfs_path *path, u64 ino)
5865 struct btrfs_trans_handle *trans;
5866 struct btrfs_key research_key;
5870 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5872 trans = btrfs_start_transaction(root, 1);
5873 if (IS_ERR(trans)) {
5874 ret = PTR_ERR(trans);
5879 btrfs_release_path(path);
5880 ret = btrfs_add_orphan_item(trans, root, path, ino);
5882 btrfs_commit_transaction(trans, root);
5885 error("failed to add inode %llu as orphan item root %llu",
5886 ino, root->root_key.objectid);
5888 printf("Added inode %llu as orphan item root %llu\n",
5889 ino, root->root_key.objectid);
5891 btrfs_release_path(path);
5892 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5898 /* Set inode_item nlink to @ref_count.
5899 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5901 * Returns 0 on success
5903 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5904 struct btrfs_path *path, u64 ino,
5905 const char *name, u32 namelen,
5906 u64 ref_count, u8 filetype, u64 *nlink)
5908 struct btrfs_trans_handle *trans;
5909 struct btrfs_inode_item *ii;
5910 struct btrfs_key key;
5911 struct btrfs_key old_key;
5912 char namebuf[BTRFS_NAME_LEN] = {0};
5918 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5920 if (name && namelen) {
5921 ASSERT(namelen <= BTRFS_NAME_LEN);
5922 memcpy(namebuf, name, namelen);
5925 sprintf(namebuf, "%llu", ino);
5926 name_len = count_digits(ino);
5927 printf("Can't find file name for inode %llu, use %s instead\n",
5931 trans = btrfs_start_transaction(root, 1);
5932 if (IS_ERR(trans)) {
5933 ret = PTR_ERR(trans);
5937 btrfs_release_path(path);
5938 /* if refs is 0, put it into lostfound */
5939 if (ref_count == 0) {
5940 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5941 name_len, filetype, &ref_count);
5946 /* reset inode_item's nlink to ref_count */
5948 key.type = BTRFS_INODE_ITEM_KEY;
5951 btrfs_release_path(path);
5952 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5958 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5959 struct btrfs_inode_item);
5960 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5961 btrfs_mark_buffer_dirty(path->nodes[0]);
5966 btrfs_commit_transaction(trans, root);
5970 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5971 root->objectid, ino, namebuf, filetype);
5973 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5974 root->objectid, ino, namebuf, filetype);
5977 btrfs_release_path(path);
5978 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5985 * Check INODE_ITEM and related ITEMs (the same inode number)
5986 * 1. check link count
5987 * 2. check inode ref/extref
5988 * 3. check dir item/index
5990 * @ext_ref: the EXTENDED_IREF feature
5992 * Return 0 if no error occurred.
5993 * Return >0 for error or hit the traversal is done(by error bitmap)
5995 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5996 unsigned int ext_ref)
5998 struct extent_buffer *node;
5999 struct btrfs_inode_item *ii;
6000 struct btrfs_key key;
6001 struct btrfs_key last_key;
6010 u64 extent_size = 0;
6012 unsigned int nodatasum;
6016 char namebuf[BTRFS_NAME_LEN] = {0};
6019 node = path->nodes[0];
6020 slot = path->slots[0];
6022 btrfs_item_key_to_cpu(node, &key, slot);
6023 inode_id = key.objectid;
6025 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6026 ret = btrfs_next_item(root, path);
6032 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6033 isize = btrfs_inode_size(node, ii);
6034 nbytes = btrfs_inode_nbytes(node, ii);
6035 mode = btrfs_inode_mode(node, ii);
6036 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6037 nlink = btrfs_inode_nlink(node, ii);
6038 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6041 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6042 ret = btrfs_next_item(root, path);
6044 /* out will fill 'err' rusing current statistics */
6046 } else if (ret > 0) {
6051 node = path->nodes[0];
6052 slot = path->slots[0];
6053 btrfs_item_key_to_cpu(node, &key, slot);
6054 if (key.objectid != inode_id)
6058 case BTRFS_INODE_REF_KEY:
6059 ret = check_inode_ref(root, &key, path, namebuf,
6060 &name_len, &refs, mode);
6063 case BTRFS_INODE_EXTREF_KEY:
6064 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6065 warning("root %llu EXTREF[%llu %llu] isn't supported",
6066 root->objectid, key.objectid,
6068 ret = check_inode_extref(root, &key, node, slot, &refs,
6072 case BTRFS_DIR_ITEM_KEY:
6073 case BTRFS_DIR_INDEX_KEY:
6075 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6076 root->objectid, inode_id,
6077 imode_to_type(mode), key.objectid,
6080 ret = check_dir_item(root, &key, path, &size, ext_ref);
6083 case BTRFS_EXTENT_DATA_KEY:
6085 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6086 root->objectid, inode_id, key.objectid,
6089 ret = check_file_extent(root, &key, node, slot,
6090 nodatasum, &extent_size,
6094 case BTRFS_XATTR_ITEM_KEY:
6097 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6098 key.objectid, key.type, key.offset);
6103 if (err & LAST_ITEM) {
6104 btrfs_release_path(path);
6105 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6110 /* verify INODE_ITEM nlink/isize/nbytes */
6112 if (repair && (err & DIR_COUNT_AGAIN)) {
6113 err &= ~DIR_COUNT_AGAIN;
6114 count_dir_isize(root, inode_id, &size);
6117 if ((nlink != 1 || refs != 1) && repair) {
6118 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6119 namebuf, name_len, refs, imode_to_type(mode),
6124 err |= LINK_COUNT_ERROR;
6125 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6126 root->objectid, inode_id, nlink);
6130 * Just a warning, as dir inode nbytes is just an
6131 * instructive value.
6133 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6134 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6135 root->objectid, inode_id,
6136 root->fs_info->nodesize);
6139 if (isize != size) {
6141 ret = repair_dir_isize_lowmem(root, path,
6143 if (!repair || ret) {
6146 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6147 root->objectid, inode_id, isize, size);
6151 if (nlink != refs) {
6153 ret = repair_inode_nlinks_lowmem(root, path,
6154 inode_id, namebuf, name_len, refs,
6155 imode_to_type(mode), &nlink);
6156 if (!repair || ret) {
6157 err |= LINK_COUNT_ERROR;
6159 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6160 root->objectid, inode_id, nlink, refs);
6162 } else if (!nlink) {
6164 ret = repair_inode_orphan_item_lowmem(root,
6166 if (!repair || ret) {
6168 error("root %llu INODE[%llu] is orphan item",
6169 root->objectid, inode_id);
6173 if (!nbytes && !no_holes && extent_end < isize) {
6175 ret = punch_extent_hole(root, inode_id,
6176 extent_end, isize - extent_end);
6177 if (!repair || ret) {
6178 err |= NBYTES_ERROR;
6180 "root %llu INODE[%llu] size %llu should have a file extent hole",
6181 root->objectid, inode_id, isize);
6185 if (nbytes != extent_size) {
6187 ret = repair_inode_nbytes_lowmem(root, path,
6188 inode_id, extent_size);
6189 if (!repair || ret) {
6190 err |= NBYTES_ERROR;
6192 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6193 root->objectid, inode_id, nbytes,
6199 if (err & LAST_ITEM)
6200 btrfs_next_item(root, path);
6205 * Insert the missing inode item and inode ref.
6207 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6208 * Root dir should be handled specially because root dir is the root of fs.
6210 * returns err (>0 or 0) after repair
6212 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6214 struct btrfs_trans_handle *trans;
6215 struct btrfs_key key;
6216 struct btrfs_path path;
6217 int filetype = BTRFS_FT_DIR;
6220 btrfs_init_path(&path);
6222 if (err & INODE_REF_MISSING) {
6223 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6224 key.type = BTRFS_INODE_REF_KEY;
6225 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6227 trans = btrfs_start_transaction(root, 1);
6228 if (IS_ERR(trans)) {
6229 ret = PTR_ERR(trans);
6233 btrfs_release_path(&path);
6234 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6238 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6239 BTRFS_FIRST_FREE_OBJECTID,
6240 BTRFS_FIRST_FREE_OBJECTID, 0);
6244 printf("Add INODE_REF[%llu %llu] name %s\n",
6245 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6247 err &= ~INODE_REF_MISSING;
6250 error("fail to insert first inode's ref");
6251 btrfs_commit_transaction(trans, root);
6254 if (err & INODE_ITEM_MISSING) {
6255 ret = repair_inode_item_missing(root,
6256 BTRFS_FIRST_FREE_OBJECTID, filetype);
6259 err &= ~INODE_ITEM_MISSING;
6263 error("fail to repair first inode");
6264 btrfs_release_path(&path);
6269 * check first root dir's inode_item and inode_ref
6271 * returns 0 means no error
6272 * returns >0 means error
6273 * returns <0 means fatal error
6275 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6277 struct btrfs_path path;
6278 struct btrfs_key key;
6279 struct btrfs_inode_item *ii;
6285 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6286 key.type = BTRFS_INODE_ITEM_KEY;
6289 /* For root being dropped, we don't need to check first inode */
6290 if (btrfs_root_refs(&root->root_item) == 0 &&
6291 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6292 BTRFS_FIRST_FREE_OBJECTID)
6295 btrfs_init_path(&path);
6296 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6301 err |= INODE_ITEM_MISSING;
6303 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6304 struct btrfs_inode_item);
6305 mode = btrfs_inode_mode(path.nodes[0], ii);
6306 if (imode_to_type(mode) != BTRFS_FT_DIR)
6307 err |= INODE_ITEM_MISMATCH;
6310 /* lookup first inode ref */
6311 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6312 key.type = BTRFS_INODE_REF_KEY;
6313 /* special index value */
6316 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6322 btrfs_release_path(&path);
6325 err = repair_fs_first_inode(root, err);
6327 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6328 error("root dir INODE_ITEM is %s",
6329 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6330 if (err & INODE_REF_MISSING)
6331 error("root dir INODE_REF is missing");
6333 return ret < 0 ? ret : err;
6336 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6337 u64 parent, u64 root)
6339 struct rb_node *node;
6340 struct tree_backref *back = NULL;
6341 struct tree_backref match = {
6348 match.parent = parent;
6349 match.node.full_backref = 1;
6354 node = rb_search(&rec->backref_tree, &match.node.node,
6355 (rb_compare_keys)compare_extent_backref, NULL);
6357 back = to_tree_backref(rb_node_to_extent_backref(node));
6362 static struct data_backref *find_data_backref(struct extent_record *rec,
6363 u64 parent, u64 root,
6364 u64 owner, u64 offset,
6366 u64 disk_bytenr, u64 bytes)
6368 struct rb_node *node;
6369 struct data_backref *back = NULL;
6370 struct data_backref match = {
6377 .found_ref = found_ref,
6378 .disk_bytenr = disk_bytenr,
6382 match.parent = parent;
6383 match.node.full_backref = 1;
6388 node = rb_search(&rec->backref_tree, &match.node.node,
6389 (rb_compare_keys)compare_extent_backref, NULL);
6391 back = to_data_backref(rb_node_to_extent_backref(node));
6396 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6397 * blocks and integrity of fs tree items.
6399 * @root: the root of the tree to be checked.
6400 * @ext_ref feature EXTENDED_IREF is enable or not.
6401 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6402 * otherwise means check fs tree(s) items relationship and
6403 * @root MUST be a fs tree root.
6404 * Returns 0 represents OK.
6405 * Returns not 0 represents error.
6407 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6408 struct btrfs_root *root, unsigned int ext_ref,
6412 struct btrfs_path path;
6413 struct node_refs nrefs;
6414 struct btrfs_root_item *root_item = &root->root_item;
6419 memset(&nrefs, 0, sizeof(nrefs));
6422 * We need to manually check the first inode item (256)
6423 * As the following traversal function will only start from
6424 * the first inode item in the leaf, if inode item (256) is
6425 * missing we will skip it forever.
6427 ret = check_fs_first_inode(root, ext_ref);
6433 level = btrfs_header_level(root->node);
6434 btrfs_init_path(&path);
6436 if (btrfs_root_refs(root_item) > 0 ||
6437 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6438 path.nodes[level] = root->node;
6439 path.slots[level] = 0;
6440 extent_buffer_get(root->node);
6442 struct btrfs_key key;
6444 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6445 level = root_item->drop_level;
6446 path.lowest_level = level;
6447 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6454 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6455 ext_ref, check_all);
6459 /* if ret is negative, walk shall stop */
6465 ret = walk_up_tree_v2(root, &path, &level);
6467 /* Normal exit, reset ret to err */
6474 btrfs_release_path(&path);
6478 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6481 * Iterate all items in the tree and call check_inode_item() to check.
6483 * @root: the root of the tree to be checked.
6484 * @ext_ref: the EXTENDED_IREF feature
6486 * Return 0 if no error found.
6487 * Return <0 for error.
6489 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6491 return check_btrfs_root(NULL, root, ext_ref, 0);
6495 * Find the relative ref for root_ref and root_backref.
6497 * @root: the root of the root tree.
6498 * @ref_key: the key of the root ref.
6500 * Return 0 if no error occurred.
6502 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6503 struct extent_buffer *node, int slot)
6505 struct btrfs_path path;
6506 struct btrfs_key key;
6507 struct btrfs_root_ref *ref;
6508 struct btrfs_root_ref *backref;
6509 char ref_name[BTRFS_NAME_LEN] = {0};
6510 char backref_name[BTRFS_NAME_LEN] = {0};
6516 u32 backref_namelen;
6521 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6522 ref_dirid = btrfs_root_ref_dirid(node, ref);
6523 ref_seq = btrfs_root_ref_sequence(node, ref);
6524 ref_namelen = btrfs_root_ref_name_len(node, ref);
6526 if (ref_namelen <= BTRFS_NAME_LEN) {
6529 len = BTRFS_NAME_LEN;
6530 warning("%s[%llu %llu] ref_name too long",
6531 ref_key->type == BTRFS_ROOT_REF_KEY ?
6532 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6535 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6537 /* Find relative root_ref */
6538 key.objectid = ref_key->offset;
6539 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6540 key.offset = ref_key->objectid;
6542 btrfs_init_path(&path);
6543 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6545 err |= ROOT_REF_MISSING;
6546 error("%s[%llu %llu] couldn't find relative ref",
6547 ref_key->type == BTRFS_ROOT_REF_KEY ?
6548 "ROOT_REF" : "ROOT_BACKREF",
6549 ref_key->objectid, ref_key->offset);
6553 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6554 struct btrfs_root_ref);
6555 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6556 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6557 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6559 if (backref_namelen <= BTRFS_NAME_LEN) {
6560 len = backref_namelen;
6562 len = BTRFS_NAME_LEN;
6563 warning("%s[%llu %llu] ref_name too long",
6564 key.type == BTRFS_ROOT_REF_KEY ?
6565 "ROOT_REF" : "ROOT_BACKREF",
6566 key.objectid, key.offset);
6568 read_extent_buffer(path.nodes[0], backref_name,
6569 (unsigned long)(backref + 1), len);
6571 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6572 ref_namelen != backref_namelen ||
6573 strncmp(ref_name, backref_name, len)) {
6574 err |= ROOT_REF_MISMATCH;
6575 error("%s[%llu %llu] mismatch relative ref",
6576 ref_key->type == BTRFS_ROOT_REF_KEY ?
6577 "ROOT_REF" : "ROOT_BACKREF",
6578 ref_key->objectid, ref_key->offset);
6581 btrfs_release_path(&path);
6586 * Check all fs/file tree in low_memory mode.
6588 * 1. for fs tree root item, call check_fs_root_v2()
6589 * 2. for fs tree root ref/backref, call check_root_ref()
6591 * Return 0 if no error occurred.
6593 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6595 struct btrfs_root *tree_root = fs_info->tree_root;
6596 struct btrfs_root *cur_root = NULL;
6597 struct btrfs_path path;
6598 struct btrfs_key key;
6599 struct extent_buffer *node;
6600 unsigned int ext_ref;
6605 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6607 btrfs_init_path(&path);
6608 key.objectid = BTRFS_FS_TREE_OBJECTID;
6610 key.type = BTRFS_ROOT_ITEM_KEY;
6612 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6616 } else if (ret > 0) {
6622 node = path.nodes[0];
6623 slot = path.slots[0];
6624 btrfs_item_key_to_cpu(node, &key, slot);
6625 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6627 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6628 fs_root_objectid(key.objectid)) {
6629 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6630 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6633 key.offset = (u64)-1;
6634 cur_root = btrfs_read_fs_root(fs_info, &key);
6637 if (IS_ERR(cur_root)) {
6638 error("Fail to read fs/subvol tree: %lld",
6644 ret = check_fs_root_v2(cur_root, ext_ref);
6647 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6648 btrfs_free_fs_root(cur_root);
6649 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6650 key.type == BTRFS_ROOT_BACKREF_KEY) {
6651 ret = check_root_ref(tree_root, &key, node, slot);
6655 ret = btrfs_next_item(tree_root, &path);
6665 btrfs_release_path(&path);
6669 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6670 struct cache_tree *root_cache)
6674 if (!ctx.progress_enabled)
6675 fprintf(stderr, "checking fs roots\n");
6676 if (check_mode == CHECK_MODE_LOWMEM)
6677 ret = check_fs_roots_v2(fs_info);
6679 ret = check_fs_roots(fs_info, root_cache);
6684 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6686 struct extent_backref *back, *tmp;
6687 struct tree_backref *tback;
6688 struct data_backref *dback;
6692 rbtree_postorder_for_each_entry_safe(back, tmp,
6693 &rec->backref_tree, node) {
6694 if (!back->found_extent_tree) {
6698 if (back->is_data) {
6699 dback = to_data_backref(back);
6700 fprintf(stderr, "Data backref %llu %s %llu"
6701 " owner %llu offset %llu num_refs %lu"
6702 " not found in extent tree\n",
6703 (unsigned long long)rec->start,
6704 back->full_backref ?
6706 back->full_backref ?
6707 (unsigned long long)dback->parent:
6708 (unsigned long long)dback->root,
6709 (unsigned long long)dback->owner,
6710 (unsigned long long)dback->offset,
6711 (unsigned long)dback->num_refs);
6713 tback = to_tree_backref(back);
6714 fprintf(stderr, "Tree backref %llu parent %llu"
6715 " root %llu not found in extent tree\n",
6716 (unsigned long long)rec->start,
6717 (unsigned long long)tback->parent,
6718 (unsigned long long)tback->root);
6721 if (!back->is_data && !back->found_ref) {
6725 tback = to_tree_backref(back);
6726 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6727 (unsigned long long)rec->start,
6728 back->full_backref ? "parent" : "root",
6729 back->full_backref ?
6730 (unsigned long long)tback->parent :
6731 (unsigned long long)tback->root, back);
6733 if (back->is_data) {
6734 dback = to_data_backref(back);
6735 if (dback->found_ref != dback->num_refs) {
6739 fprintf(stderr, "Incorrect local backref count"
6740 " on %llu %s %llu owner %llu"
6741 " offset %llu found %u wanted %u back %p\n",
6742 (unsigned long long)rec->start,
6743 back->full_backref ?
6745 back->full_backref ?
6746 (unsigned long long)dback->parent:
6747 (unsigned long long)dback->root,
6748 (unsigned long long)dback->owner,
6749 (unsigned long long)dback->offset,
6750 dback->found_ref, dback->num_refs, back);
6752 if (dback->disk_bytenr != rec->start) {
6756 fprintf(stderr, "Backref disk bytenr does not"
6757 " match extent record, bytenr=%llu, "
6758 "ref bytenr=%llu\n",
6759 (unsigned long long)rec->start,
6760 (unsigned long long)dback->disk_bytenr);
6763 if (dback->bytes != rec->nr) {
6767 fprintf(stderr, "Backref bytes do not match "
6768 "extent backref, bytenr=%llu, ref "
6769 "bytes=%llu, backref bytes=%llu\n",
6770 (unsigned long long)rec->start,
6771 (unsigned long long)rec->nr,
6772 (unsigned long long)dback->bytes);
6775 if (!back->is_data) {
6778 dback = to_data_backref(back);
6779 found += dback->found_ref;
6782 if (found != rec->refs) {
6786 fprintf(stderr, "Incorrect global backref count "
6787 "on %llu found %llu wanted %llu\n",
6788 (unsigned long long)rec->start,
6789 (unsigned long long)found,
6790 (unsigned long long)rec->refs);
6796 static void __free_one_backref(struct rb_node *node)
6798 struct extent_backref *back = rb_node_to_extent_backref(node);
6803 static void free_all_extent_backrefs(struct extent_record *rec)
6805 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6808 static void free_extent_record_cache(struct cache_tree *extent_cache)
6810 struct cache_extent *cache;
6811 struct extent_record *rec;
6814 cache = first_cache_extent(extent_cache);
6817 rec = container_of(cache, struct extent_record, cache);
6818 remove_cache_extent(extent_cache, cache);
6819 free_all_extent_backrefs(rec);
6824 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6825 struct extent_record *rec)
6827 if (rec->content_checked && rec->owner_ref_checked &&
6828 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6829 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6830 !rec->bad_full_backref && !rec->crossing_stripes &&
6831 !rec->wrong_chunk_type) {
6832 remove_cache_extent(extent_cache, &rec->cache);
6833 free_all_extent_backrefs(rec);
6834 list_del_init(&rec->list);
6840 static int check_owner_ref(struct btrfs_root *root,
6841 struct extent_record *rec,
6842 struct extent_buffer *buf)
6844 struct extent_backref *node, *tmp;
6845 struct tree_backref *back;
6846 struct btrfs_root *ref_root;
6847 struct btrfs_key key;
6848 struct btrfs_path path;
6849 struct extent_buffer *parent;
6854 rbtree_postorder_for_each_entry_safe(node, tmp,
6855 &rec->backref_tree, node) {
6858 if (!node->found_ref)
6860 if (node->full_backref)
6862 back = to_tree_backref(node);
6863 if (btrfs_header_owner(buf) == back->root)
6866 BUG_ON(rec->is_root);
6868 /* try to find the block by search corresponding fs tree */
6869 key.objectid = btrfs_header_owner(buf);
6870 key.type = BTRFS_ROOT_ITEM_KEY;
6871 key.offset = (u64)-1;
6873 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6874 if (IS_ERR(ref_root))
6877 level = btrfs_header_level(buf);
6879 btrfs_item_key_to_cpu(buf, &key, 0);
6881 btrfs_node_key_to_cpu(buf, &key, 0);
6883 btrfs_init_path(&path);
6884 path.lowest_level = level + 1;
6885 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6889 parent = path.nodes[level + 1];
6890 if (parent && buf->start == btrfs_node_blockptr(parent,
6891 path.slots[level + 1]))
6894 btrfs_release_path(&path);
6895 return found ? 0 : 1;
6898 static int is_extent_tree_record(struct extent_record *rec)
6900 struct extent_backref *node, *tmp;
6901 struct tree_backref *back;
6904 rbtree_postorder_for_each_entry_safe(node, tmp,
6905 &rec->backref_tree, node) {
6908 back = to_tree_backref(node);
6909 if (node->full_backref)
6911 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6918 static int record_bad_block_io(struct btrfs_fs_info *info,
6919 struct cache_tree *extent_cache,
6922 struct extent_record *rec;
6923 struct cache_extent *cache;
6924 struct btrfs_key key;
6926 cache = lookup_cache_extent(extent_cache, start, len);
6930 rec = container_of(cache, struct extent_record, cache);
6931 if (!is_extent_tree_record(rec))
6934 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6935 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6938 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6939 struct extent_buffer *buf, int slot)
6941 if (btrfs_header_level(buf)) {
6942 struct btrfs_key_ptr ptr1, ptr2;
6944 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6945 sizeof(struct btrfs_key_ptr));
6946 read_extent_buffer(buf, &ptr2,
6947 btrfs_node_key_ptr_offset(slot + 1),
6948 sizeof(struct btrfs_key_ptr));
6949 write_extent_buffer(buf, &ptr1,
6950 btrfs_node_key_ptr_offset(slot + 1),
6951 sizeof(struct btrfs_key_ptr));
6952 write_extent_buffer(buf, &ptr2,
6953 btrfs_node_key_ptr_offset(slot),
6954 sizeof(struct btrfs_key_ptr));
6956 struct btrfs_disk_key key;
6957 btrfs_node_key(buf, &key, 0);
6958 btrfs_fixup_low_keys(root, path, &key,
6959 btrfs_header_level(buf) + 1);
6962 struct btrfs_item *item1, *item2;
6963 struct btrfs_key k1, k2;
6964 char *item1_data, *item2_data;
6965 u32 item1_offset, item2_offset, item1_size, item2_size;
6967 item1 = btrfs_item_nr(slot);
6968 item2 = btrfs_item_nr(slot + 1);
6969 btrfs_item_key_to_cpu(buf, &k1, slot);
6970 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6971 item1_offset = btrfs_item_offset(buf, item1);
6972 item2_offset = btrfs_item_offset(buf, item2);
6973 item1_size = btrfs_item_size(buf, item1);
6974 item2_size = btrfs_item_size(buf, item2);
6976 item1_data = malloc(item1_size);
6979 item2_data = malloc(item2_size);
6985 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6986 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6988 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6989 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6993 btrfs_set_item_offset(buf, item1, item2_offset);
6994 btrfs_set_item_offset(buf, item2, item1_offset);
6995 btrfs_set_item_size(buf, item1, item2_size);
6996 btrfs_set_item_size(buf, item2, item1_size);
6998 path->slots[0] = slot;
6999 btrfs_set_item_key_unsafe(root, path, &k2);
7000 path->slots[0] = slot + 1;
7001 btrfs_set_item_key_unsafe(root, path, &k1);
7006 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7008 struct extent_buffer *buf;
7009 struct btrfs_key k1, k2;
7011 int level = path->lowest_level;
7014 buf = path->nodes[level];
7015 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7017 btrfs_node_key_to_cpu(buf, &k1, i);
7018 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7020 btrfs_item_key_to_cpu(buf, &k1, i);
7021 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7023 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7025 ret = swap_values(root, path, buf, i);
7028 btrfs_mark_buffer_dirty(buf);
7034 static int delete_bogus_item(struct btrfs_root *root,
7035 struct btrfs_path *path,
7036 struct extent_buffer *buf, int slot)
7038 struct btrfs_key key;
7039 int nritems = btrfs_header_nritems(buf);
7041 btrfs_item_key_to_cpu(buf, &key, slot);
7043 /* These are all the keys we can deal with missing. */
7044 if (key.type != BTRFS_DIR_INDEX_KEY &&
7045 key.type != BTRFS_EXTENT_ITEM_KEY &&
7046 key.type != BTRFS_METADATA_ITEM_KEY &&
7047 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7048 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7051 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7052 (unsigned long long)key.objectid, key.type,
7053 (unsigned long long)key.offset, slot, buf->start);
7054 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7055 btrfs_item_nr_offset(slot + 1),
7056 sizeof(struct btrfs_item) *
7057 (nritems - slot - 1));
7058 btrfs_set_header_nritems(buf, nritems - 1);
7060 struct btrfs_disk_key disk_key;
7062 btrfs_item_key(buf, &disk_key, 0);
7063 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7065 btrfs_mark_buffer_dirty(buf);
7069 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7071 struct extent_buffer *buf;
7075 /* We should only get this for leaves */
7076 BUG_ON(path->lowest_level);
7077 buf = path->nodes[0];
7079 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7080 unsigned int shift = 0, offset;
7082 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7083 BTRFS_LEAF_DATA_SIZE(root)) {
7084 if (btrfs_item_end_nr(buf, i) >
7085 BTRFS_LEAF_DATA_SIZE(root)) {
7086 ret = delete_bogus_item(root, path, buf, i);
7089 fprintf(stderr, "item is off the end of the "
7090 "leaf, can't fix\n");
7094 shift = BTRFS_LEAF_DATA_SIZE(root) -
7095 btrfs_item_end_nr(buf, i);
7096 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7097 btrfs_item_offset_nr(buf, i - 1)) {
7098 if (btrfs_item_end_nr(buf, i) >
7099 btrfs_item_offset_nr(buf, i - 1)) {
7100 ret = delete_bogus_item(root, path, buf, i);
7103 fprintf(stderr, "items overlap, can't fix\n");
7107 shift = btrfs_item_offset_nr(buf, i - 1) -
7108 btrfs_item_end_nr(buf, i);
7113 printf("Shifting item nr %d by %u bytes in block %llu\n",
7114 i, shift, (unsigned long long)buf->start);
7115 offset = btrfs_item_offset_nr(buf, i);
7116 memmove_extent_buffer(buf,
7117 btrfs_leaf_data(buf) + offset + shift,
7118 btrfs_leaf_data(buf) + offset,
7119 btrfs_item_size_nr(buf, i));
7120 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7122 btrfs_mark_buffer_dirty(buf);
7126 * We may have moved things, in which case we want to exit so we don't
7127 * write those changes out. Once we have proper abort functionality in
7128 * progs this can be changed to something nicer.
7135 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7136 * then just return -EIO.
7138 static int try_to_fix_bad_block(struct btrfs_root *root,
7139 struct extent_buffer *buf,
7140 enum btrfs_tree_block_status status)
7142 struct btrfs_trans_handle *trans;
7143 struct ulist *roots;
7144 struct ulist_node *node;
7145 struct btrfs_root *search_root;
7146 struct btrfs_path path;
7147 struct ulist_iterator iter;
7148 struct btrfs_key root_key, key;
7151 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7152 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7155 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7159 btrfs_init_path(&path);
7160 ULIST_ITER_INIT(&iter);
7161 while ((node = ulist_next(roots, &iter))) {
7162 root_key.objectid = node->val;
7163 root_key.type = BTRFS_ROOT_ITEM_KEY;
7164 root_key.offset = (u64)-1;
7166 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7173 trans = btrfs_start_transaction(search_root, 0);
7174 if (IS_ERR(trans)) {
7175 ret = PTR_ERR(trans);
7179 path.lowest_level = btrfs_header_level(buf);
7180 path.skip_check_block = 1;
7181 if (path.lowest_level)
7182 btrfs_node_key_to_cpu(buf, &key, 0);
7184 btrfs_item_key_to_cpu(buf, &key, 0);
7185 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7188 btrfs_commit_transaction(trans, search_root);
7191 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7192 ret = fix_key_order(search_root, &path);
7193 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7194 ret = fix_item_offset(search_root, &path);
7196 btrfs_commit_transaction(trans, search_root);
7199 btrfs_release_path(&path);
7200 btrfs_commit_transaction(trans, search_root);
7203 btrfs_release_path(&path);
7207 static int check_block(struct btrfs_root *root,
7208 struct cache_tree *extent_cache,
7209 struct extent_buffer *buf, u64 flags)
7211 struct extent_record *rec;
7212 struct cache_extent *cache;
7213 struct btrfs_key key;
7214 enum btrfs_tree_block_status status;
7218 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7221 rec = container_of(cache, struct extent_record, cache);
7222 rec->generation = btrfs_header_generation(buf);
7224 level = btrfs_header_level(buf);
7225 if (btrfs_header_nritems(buf) > 0) {
7228 btrfs_item_key_to_cpu(buf, &key, 0);
7230 btrfs_node_key_to_cpu(buf, &key, 0);
7232 rec->info_objectid = key.objectid;
7234 rec->info_level = level;
7236 if (btrfs_is_leaf(buf))
7237 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7239 status = btrfs_check_node(root, &rec->parent_key, buf);
7241 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7243 status = try_to_fix_bad_block(root, buf, status);
7244 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7246 fprintf(stderr, "bad block %llu\n",
7247 (unsigned long long)buf->start);
7250 * Signal to callers we need to start the scan over
7251 * again since we'll have cowed blocks.
7256 rec->content_checked = 1;
7257 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7258 rec->owner_ref_checked = 1;
7260 ret = check_owner_ref(root, rec, buf);
7262 rec->owner_ref_checked = 1;
7266 maybe_free_extent_rec(extent_cache, rec);
7271 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7272 u64 parent, u64 root)
7274 struct list_head *cur = rec->backrefs.next;
7275 struct extent_backref *node;
7276 struct tree_backref *back;
7278 while(cur != &rec->backrefs) {
7279 node = to_extent_backref(cur);
7283 back = to_tree_backref(node);
7285 if (!node->full_backref)
7287 if (parent == back->parent)
7290 if (node->full_backref)
7292 if (back->root == root)
7300 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7301 u64 parent, u64 root)
7303 struct tree_backref *ref = malloc(sizeof(*ref));
7307 memset(&ref->node, 0, sizeof(ref->node));
7309 ref->parent = parent;
7310 ref->node.full_backref = 1;
7313 ref->node.full_backref = 0;
7320 static struct data_backref *find_data_backref(struct extent_record *rec,
7321 u64 parent, u64 root,
7322 u64 owner, u64 offset,
7324 u64 disk_bytenr, u64 bytes)
7326 struct list_head *cur = rec->backrefs.next;
7327 struct extent_backref *node;
7328 struct data_backref *back;
7330 while(cur != &rec->backrefs) {
7331 node = to_extent_backref(cur);
7335 back = to_data_backref(node);
7337 if (!node->full_backref)
7339 if (parent == back->parent)
7342 if (node->full_backref)
7344 if (back->root == root && back->owner == owner &&
7345 back->offset == offset) {
7346 if (found_ref && node->found_ref &&
7347 (back->bytes != bytes ||
7348 back->disk_bytenr != disk_bytenr))
7358 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7359 u64 parent, u64 root,
7360 u64 owner, u64 offset,
7363 struct data_backref *ref = malloc(sizeof(*ref));
7367 memset(&ref->node, 0, sizeof(ref->node));
7368 ref->node.is_data = 1;
7371 ref->parent = parent;
7374 ref->node.full_backref = 1;
7378 ref->offset = offset;
7379 ref->node.full_backref = 0;
7381 ref->bytes = max_size;
7384 if (max_size > rec->max_size)
7385 rec->max_size = max_size;
7389 /* Check if the type of extent matches with its chunk */
7390 static void check_extent_type(struct extent_record *rec)
7392 struct btrfs_block_group_cache *bg_cache;
7394 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7398 /* data extent, check chunk directly*/
7399 if (!rec->metadata) {
7400 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7401 rec->wrong_chunk_type = 1;
7405 /* metadata extent, check the obvious case first */
7406 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7407 BTRFS_BLOCK_GROUP_METADATA))) {
7408 rec->wrong_chunk_type = 1;
7413 * Check SYSTEM extent, as it's also marked as metadata, we can only
7414 * make sure it's a SYSTEM extent by its backref
7416 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7417 struct extent_backref *node;
7418 struct tree_backref *tback;
7421 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7422 if (node->is_data) {
7423 /* tree block shouldn't have data backref */
7424 rec->wrong_chunk_type = 1;
7427 tback = container_of(node, struct tree_backref, node);
7429 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7430 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7432 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7433 if (!(bg_cache->flags & bg_type))
7434 rec->wrong_chunk_type = 1;
7439 * Allocate a new extent record, fill default values from @tmpl and insert int
7440 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7441 * the cache, otherwise it fails.
7443 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7444 struct extent_record *tmpl)
7446 struct extent_record *rec;
7449 BUG_ON(tmpl->max_size == 0);
7450 rec = malloc(sizeof(*rec));
7453 rec->start = tmpl->start;
7454 rec->max_size = tmpl->max_size;
7455 rec->nr = max(tmpl->nr, tmpl->max_size);
7456 rec->found_rec = tmpl->found_rec;
7457 rec->content_checked = tmpl->content_checked;
7458 rec->owner_ref_checked = tmpl->owner_ref_checked;
7459 rec->num_duplicates = 0;
7460 rec->metadata = tmpl->metadata;
7461 rec->flag_block_full_backref = FLAG_UNSET;
7462 rec->bad_full_backref = 0;
7463 rec->crossing_stripes = 0;
7464 rec->wrong_chunk_type = 0;
7465 rec->is_root = tmpl->is_root;
7466 rec->refs = tmpl->refs;
7467 rec->extent_item_refs = tmpl->extent_item_refs;
7468 rec->parent_generation = tmpl->parent_generation;
7469 INIT_LIST_HEAD(&rec->backrefs);
7470 INIT_LIST_HEAD(&rec->dups);
7471 INIT_LIST_HEAD(&rec->list);
7472 rec->backref_tree = RB_ROOT;
7473 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7474 rec->cache.start = tmpl->start;
7475 rec->cache.size = tmpl->nr;
7476 ret = insert_cache_extent(extent_cache, &rec->cache);
7481 bytes_used += rec->nr;
7484 rec->crossing_stripes = check_crossing_stripes(global_info,
7485 rec->start, global_info->nodesize);
7486 check_extent_type(rec);
7491 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7493 * - refs - if found, increase refs
7494 * - is_root - if found, set
7495 * - content_checked - if found, set
7496 * - owner_ref_checked - if found, set
7498 * If not found, create a new one, initialize and insert.
7500 static int add_extent_rec(struct cache_tree *extent_cache,
7501 struct extent_record *tmpl)
7503 struct extent_record *rec;
7504 struct cache_extent *cache;
7508 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7510 rec = container_of(cache, struct extent_record, cache);
7514 rec->nr = max(tmpl->nr, tmpl->max_size);
7517 * We need to make sure to reset nr to whatever the extent
7518 * record says was the real size, this way we can compare it to
7521 if (tmpl->found_rec) {
7522 if (tmpl->start != rec->start || rec->found_rec) {
7523 struct extent_record *tmp;
7526 if (list_empty(&rec->list))
7527 list_add_tail(&rec->list,
7528 &duplicate_extents);
7531 * We have to do this song and dance in case we
7532 * find an extent record that falls inside of
7533 * our current extent record but does not have
7534 * the same objectid.
7536 tmp = malloc(sizeof(*tmp));
7539 tmp->start = tmpl->start;
7540 tmp->max_size = tmpl->max_size;
7543 tmp->metadata = tmpl->metadata;
7544 tmp->extent_item_refs = tmpl->extent_item_refs;
7545 INIT_LIST_HEAD(&tmp->list);
7546 list_add_tail(&tmp->list, &rec->dups);
7547 rec->num_duplicates++;
7554 if (tmpl->extent_item_refs && !dup) {
7555 if (rec->extent_item_refs) {
7556 fprintf(stderr, "block %llu rec "
7557 "extent_item_refs %llu, passed %llu\n",
7558 (unsigned long long)tmpl->start,
7559 (unsigned long long)
7560 rec->extent_item_refs,
7561 (unsigned long long)tmpl->extent_item_refs);
7563 rec->extent_item_refs = tmpl->extent_item_refs;
7567 if (tmpl->content_checked)
7568 rec->content_checked = 1;
7569 if (tmpl->owner_ref_checked)
7570 rec->owner_ref_checked = 1;
7571 memcpy(&rec->parent_key, &tmpl->parent_key,
7572 sizeof(tmpl->parent_key));
7573 if (tmpl->parent_generation)
7574 rec->parent_generation = tmpl->parent_generation;
7575 if (rec->max_size < tmpl->max_size)
7576 rec->max_size = tmpl->max_size;
7579 * A metadata extent can't cross stripe_len boundary, otherwise
7580 * kernel scrub won't be able to handle it.
7581 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7585 rec->crossing_stripes = check_crossing_stripes(
7586 global_info, rec->start,
7587 global_info->nodesize);
7588 check_extent_type(rec);
7589 maybe_free_extent_rec(extent_cache, rec);
7593 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7598 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7599 u64 parent, u64 root, int found_ref)
7601 struct extent_record *rec;
7602 struct tree_backref *back;
7603 struct cache_extent *cache;
7605 bool insert = false;
7607 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7609 struct extent_record tmpl;
7611 memset(&tmpl, 0, sizeof(tmpl));
7612 tmpl.start = bytenr;
7617 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7621 /* really a bug in cache_extent implement now */
7622 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7627 rec = container_of(cache, struct extent_record, cache);
7628 if (rec->start != bytenr) {
7630 * Several cause, from unaligned bytenr to over lapping extents
7635 back = find_tree_backref(rec, parent, root);
7637 back = alloc_tree_backref(rec, parent, root);
7644 if (back->node.found_ref) {
7645 fprintf(stderr, "Extent back ref already exists "
7646 "for %llu parent %llu root %llu \n",
7647 (unsigned long long)bytenr,
7648 (unsigned long long)parent,
7649 (unsigned long long)root);
7651 back->node.found_ref = 1;
7653 if (back->node.found_extent_tree) {
7654 fprintf(stderr, "Extent back ref already exists "
7655 "for %llu parent %llu root %llu \n",
7656 (unsigned long long)bytenr,
7657 (unsigned long long)parent,
7658 (unsigned long long)root);
7660 back->node.found_extent_tree = 1;
7663 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7664 compare_extent_backref));
7665 check_extent_type(rec);
7666 maybe_free_extent_rec(extent_cache, rec);
7670 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7671 u64 parent, u64 root, u64 owner, u64 offset,
7672 u32 num_refs, int found_ref, u64 max_size)
7674 struct extent_record *rec;
7675 struct data_backref *back;
7676 struct cache_extent *cache;
7678 bool insert = false;
7680 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7682 struct extent_record tmpl;
7684 memset(&tmpl, 0, sizeof(tmpl));
7685 tmpl.start = bytenr;
7687 tmpl.max_size = max_size;
7689 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7693 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7698 rec = container_of(cache, struct extent_record, cache);
7699 if (rec->max_size < max_size)
7700 rec->max_size = max_size;
7703 * If found_ref is set then max_size is the real size and must match the
7704 * existing refs. So if we have already found a ref then we need to
7705 * make sure that this ref matches the existing one, otherwise we need
7706 * to add a new backref so we can notice that the backrefs don't match
7707 * and we need to figure out who is telling the truth. This is to
7708 * account for that awful fsync bug I introduced where we'd end up with
7709 * a btrfs_file_extent_item that would have its length include multiple
7710 * prealloc extents or point inside of a prealloc extent.
7712 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7715 back = alloc_data_backref(rec, parent, root, owner, offset,
7722 BUG_ON(num_refs != 1);
7723 if (back->node.found_ref)
7724 BUG_ON(back->bytes != max_size);
7725 back->node.found_ref = 1;
7726 back->found_ref += 1;
7727 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7728 back->bytes = max_size;
7729 back->disk_bytenr = bytenr;
7731 /* Need to reinsert if not already in the tree */
7733 rb_erase(&back->node.node, &rec->backref_tree);
7738 rec->content_checked = 1;
7739 rec->owner_ref_checked = 1;
7741 if (back->node.found_extent_tree) {
7742 fprintf(stderr, "Extent back ref already exists "
7743 "for %llu parent %llu root %llu "
7744 "owner %llu offset %llu num_refs %lu\n",
7745 (unsigned long long)bytenr,
7746 (unsigned long long)parent,
7747 (unsigned long long)root,
7748 (unsigned long long)owner,
7749 (unsigned long long)offset,
7750 (unsigned long)num_refs);
7752 back->num_refs = num_refs;
7753 back->node.found_extent_tree = 1;
7756 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7757 compare_extent_backref));
7759 maybe_free_extent_rec(extent_cache, rec);
7763 static int add_pending(struct cache_tree *pending,
7764 struct cache_tree *seen, u64 bytenr, u32 size)
7767 ret = add_cache_extent(seen, bytenr, size);
7770 add_cache_extent(pending, bytenr, size);
7774 static int pick_next_pending(struct cache_tree *pending,
7775 struct cache_tree *reada,
7776 struct cache_tree *nodes,
7777 u64 last, struct block_info *bits, int bits_nr,
7780 unsigned long node_start = last;
7781 struct cache_extent *cache;
7784 cache = search_cache_extent(reada, 0);
7786 bits[0].start = cache->start;
7787 bits[0].size = cache->size;
7792 if (node_start > 32768)
7793 node_start -= 32768;
7795 cache = search_cache_extent(nodes, node_start);
7797 cache = search_cache_extent(nodes, 0);
7800 cache = search_cache_extent(pending, 0);
7805 bits[ret].start = cache->start;
7806 bits[ret].size = cache->size;
7807 cache = next_cache_extent(cache);
7809 } while (cache && ret < bits_nr);
7815 bits[ret].start = cache->start;
7816 bits[ret].size = cache->size;
7817 cache = next_cache_extent(cache);
7819 } while (cache && ret < bits_nr);
7821 if (bits_nr - ret > 8) {
7822 u64 lookup = bits[0].start + bits[0].size;
7823 struct cache_extent *next;
7824 next = search_cache_extent(pending, lookup);
7826 if (next->start - lookup > 32768)
7828 bits[ret].start = next->start;
7829 bits[ret].size = next->size;
7830 lookup = next->start + next->size;
7834 next = next_cache_extent(next);
7842 static void free_chunk_record(struct cache_extent *cache)
7844 struct chunk_record *rec;
7846 rec = container_of(cache, struct chunk_record, cache);
7847 list_del_init(&rec->list);
7848 list_del_init(&rec->dextents);
7852 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7854 cache_tree_free_extents(chunk_cache, free_chunk_record);
7857 static void free_device_record(struct rb_node *node)
7859 struct device_record *rec;
7861 rec = container_of(node, struct device_record, node);
7865 FREE_RB_BASED_TREE(device_cache, free_device_record);
7867 int insert_block_group_record(struct block_group_tree *tree,
7868 struct block_group_record *bg_rec)
7872 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7876 list_add_tail(&bg_rec->list, &tree->block_groups);
7880 static void free_block_group_record(struct cache_extent *cache)
7882 struct block_group_record *rec;
7884 rec = container_of(cache, struct block_group_record, cache);
7885 list_del_init(&rec->list);
7889 void free_block_group_tree(struct block_group_tree *tree)
7891 cache_tree_free_extents(&tree->tree, free_block_group_record);
7894 int insert_device_extent_record(struct device_extent_tree *tree,
7895 struct device_extent_record *de_rec)
7900 * Device extent is a bit different from the other extents, because
7901 * the extents which belong to the different devices may have the
7902 * same start and size, so we need use the special extent cache
7903 * search/insert functions.
7905 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7909 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7910 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7914 static void free_device_extent_record(struct cache_extent *cache)
7916 struct device_extent_record *rec;
7918 rec = container_of(cache, struct device_extent_record, cache);
7919 if (!list_empty(&rec->chunk_list))
7920 list_del_init(&rec->chunk_list);
7921 if (!list_empty(&rec->device_list))
7922 list_del_init(&rec->device_list);
7926 void free_device_extent_tree(struct device_extent_tree *tree)
7928 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7931 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7932 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7933 struct extent_buffer *leaf, int slot)
7935 struct btrfs_extent_ref_v0 *ref0;
7936 struct btrfs_key key;
7939 btrfs_item_key_to_cpu(leaf, &key, slot);
7940 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7941 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7942 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7945 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7946 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7952 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7953 struct btrfs_key *key,
7956 struct btrfs_chunk *ptr;
7957 struct chunk_record *rec;
7960 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7961 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7963 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7965 fprintf(stderr, "memory allocation failed\n");
7969 INIT_LIST_HEAD(&rec->list);
7970 INIT_LIST_HEAD(&rec->dextents);
7973 rec->cache.start = key->offset;
7974 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7976 rec->generation = btrfs_header_generation(leaf);
7978 rec->objectid = key->objectid;
7979 rec->type = key->type;
7980 rec->offset = key->offset;
7982 rec->length = rec->cache.size;
7983 rec->owner = btrfs_chunk_owner(leaf, ptr);
7984 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7985 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7986 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7987 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7988 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7989 rec->num_stripes = num_stripes;
7990 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7992 for (i = 0; i < rec->num_stripes; ++i) {
7993 rec->stripes[i].devid =
7994 btrfs_stripe_devid_nr(leaf, ptr, i);
7995 rec->stripes[i].offset =
7996 btrfs_stripe_offset_nr(leaf, ptr, i);
7997 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7998 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8005 static int process_chunk_item(struct cache_tree *chunk_cache,
8006 struct btrfs_key *key, struct extent_buffer *eb,
8009 struct chunk_record *rec;
8010 struct btrfs_chunk *chunk;
8013 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8015 * Do extra check for this chunk item,
8017 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8018 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8019 * and owner<->key_type check.
8021 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8024 error("chunk(%llu, %llu) is not valid, ignore it",
8025 key->offset, btrfs_chunk_length(eb, chunk));
8028 rec = btrfs_new_chunk_record(eb, key, slot);
8029 ret = insert_cache_extent(chunk_cache, &rec->cache);
8031 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8032 rec->offset, rec->length);
8039 static int process_device_item(struct rb_root *dev_cache,
8040 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8042 struct btrfs_dev_item *ptr;
8043 struct device_record *rec;
8046 ptr = btrfs_item_ptr(eb,
8047 slot, struct btrfs_dev_item);
8049 rec = malloc(sizeof(*rec));
8051 fprintf(stderr, "memory allocation failed\n");
8055 rec->devid = key->offset;
8056 rec->generation = btrfs_header_generation(eb);
8058 rec->objectid = key->objectid;
8059 rec->type = key->type;
8060 rec->offset = key->offset;
8062 rec->devid = btrfs_device_id(eb, ptr);
8063 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8064 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8066 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8068 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8075 struct block_group_record *
8076 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8079 struct btrfs_block_group_item *ptr;
8080 struct block_group_record *rec;
8082 rec = calloc(1, sizeof(*rec));
8084 fprintf(stderr, "memory allocation failed\n");
8088 rec->cache.start = key->objectid;
8089 rec->cache.size = key->offset;
8091 rec->generation = btrfs_header_generation(leaf);
8093 rec->objectid = key->objectid;
8094 rec->type = key->type;
8095 rec->offset = key->offset;
8097 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8098 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8100 INIT_LIST_HEAD(&rec->list);
8105 static int process_block_group_item(struct block_group_tree *block_group_cache,
8106 struct btrfs_key *key,
8107 struct extent_buffer *eb, int slot)
8109 struct block_group_record *rec;
8112 rec = btrfs_new_block_group_record(eb, key, slot);
8113 ret = insert_block_group_record(block_group_cache, rec);
8115 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8116 rec->objectid, rec->offset);
8123 struct device_extent_record *
8124 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8125 struct btrfs_key *key, int slot)
8127 struct device_extent_record *rec;
8128 struct btrfs_dev_extent *ptr;
8130 rec = calloc(1, sizeof(*rec));
8132 fprintf(stderr, "memory allocation failed\n");
8136 rec->cache.objectid = key->objectid;
8137 rec->cache.start = key->offset;
8139 rec->generation = btrfs_header_generation(leaf);
8141 rec->objectid = key->objectid;
8142 rec->type = key->type;
8143 rec->offset = key->offset;
8145 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8146 rec->chunk_objecteid =
8147 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8149 btrfs_dev_extent_chunk_offset(leaf, ptr);
8150 rec->length = btrfs_dev_extent_length(leaf, ptr);
8151 rec->cache.size = rec->length;
8153 INIT_LIST_HEAD(&rec->chunk_list);
8154 INIT_LIST_HEAD(&rec->device_list);
8160 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8161 struct btrfs_key *key, struct extent_buffer *eb,
8164 struct device_extent_record *rec;
8167 rec = btrfs_new_device_extent_record(eb, key, slot);
8168 ret = insert_device_extent_record(dev_extent_cache, rec);
8171 "Device extent[%llu, %llu, %llu] existed.\n",
8172 rec->objectid, rec->offset, rec->length);
8179 static int process_extent_item(struct btrfs_root *root,
8180 struct cache_tree *extent_cache,
8181 struct extent_buffer *eb, int slot)
8183 struct btrfs_extent_item *ei;
8184 struct btrfs_extent_inline_ref *iref;
8185 struct btrfs_extent_data_ref *dref;
8186 struct btrfs_shared_data_ref *sref;
8187 struct btrfs_key key;
8188 struct extent_record tmpl;
8193 u32 item_size = btrfs_item_size_nr(eb, slot);
8199 btrfs_item_key_to_cpu(eb, &key, slot);
8201 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8203 num_bytes = root->fs_info->nodesize;
8205 num_bytes = key.offset;
8208 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8209 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8210 key.objectid, root->fs_info->sectorsize);
8213 if (item_size < sizeof(*ei)) {
8214 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8215 struct btrfs_extent_item_v0 *ei0;
8216 BUG_ON(item_size != sizeof(*ei0));
8217 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8218 refs = btrfs_extent_refs_v0(eb, ei0);
8222 memset(&tmpl, 0, sizeof(tmpl));
8223 tmpl.start = key.objectid;
8224 tmpl.nr = num_bytes;
8225 tmpl.extent_item_refs = refs;
8226 tmpl.metadata = metadata;
8228 tmpl.max_size = num_bytes;
8230 return add_extent_rec(extent_cache, &tmpl);
8233 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8234 refs = btrfs_extent_refs(eb, ei);
8235 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8239 if (metadata && num_bytes != root->fs_info->nodesize) {
8240 error("ignore invalid metadata extent, length %llu does not equal to %u",
8241 num_bytes, root->fs_info->nodesize);
8244 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8245 error("ignore invalid data extent, length %llu is not aligned to %u",
8246 num_bytes, root->fs_info->sectorsize);
8250 memset(&tmpl, 0, sizeof(tmpl));
8251 tmpl.start = key.objectid;
8252 tmpl.nr = num_bytes;
8253 tmpl.extent_item_refs = refs;
8254 tmpl.metadata = metadata;
8256 tmpl.max_size = num_bytes;
8257 add_extent_rec(extent_cache, &tmpl);
8259 ptr = (unsigned long)(ei + 1);
8260 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8261 key.type == BTRFS_EXTENT_ITEM_KEY)
8262 ptr += sizeof(struct btrfs_tree_block_info);
8264 end = (unsigned long)ei + item_size;
8266 iref = (struct btrfs_extent_inline_ref *)ptr;
8267 type = btrfs_extent_inline_ref_type(eb, iref);
8268 offset = btrfs_extent_inline_ref_offset(eb, iref);
8270 case BTRFS_TREE_BLOCK_REF_KEY:
8271 ret = add_tree_backref(extent_cache, key.objectid,
8275 "add_tree_backref failed (extent items tree block): %s",
8278 case BTRFS_SHARED_BLOCK_REF_KEY:
8279 ret = add_tree_backref(extent_cache, key.objectid,
8283 "add_tree_backref failed (extent items shared block): %s",
8286 case BTRFS_EXTENT_DATA_REF_KEY:
8287 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8288 add_data_backref(extent_cache, key.objectid, 0,
8289 btrfs_extent_data_ref_root(eb, dref),
8290 btrfs_extent_data_ref_objectid(eb,
8292 btrfs_extent_data_ref_offset(eb, dref),
8293 btrfs_extent_data_ref_count(eb, dref),
8296 case BTRFS_SHARED_DATA_REF_KEY:
8297 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8298 add_data_backref(extent_cache, key.objectid, offset,
8300 btrfs_shared_data_ref_count(eb, sref),
8304 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8305 key.objectid, key.type, num_bytes);
8308 ptr += btrfs_extent_inline_ref_size(type);
8315 static int check_cache_range(struct btrfs_root *root,
8316 struct btrfs_block_group_cache *cache,
8317 u64 offset, u64 bytes)
8319 struct btrfs_free_space *entry;
8325 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8326 bytenr = btrfs_sb_offset(i);
8327 ret = btrfs_rmap_block(root->fs_info,
8328 cache->key.objectid, bytenr, 0,
8329 &logical, &nr, &stripe_len);
8334 if (logical[nr] + stripe_len <= offset)
8336 if (offset + bytes <= logical[nr])
8338 if (logical[nr] == offset) {
8339 if (stripe_len >= bytes) {
8343 bytes -= stripe_len;
8344 offset += stripe_len;
8345 } else if (logical[nr] < offset) {
8346 if (logical[nr] + stripe_len >=
8351 bytes = (offset + bytes) -
8352 (logical[nr] + stripe_len);
8353 offset = logical[nr] + stripe_len;
8356 * Could be tricky, the super may land in the
8357 * middle of the area we're checking. First
8358 * check the easiest case, it's at the end.
8360 if (logical[nr] + stripe_len >=
8362 bytes = logical[nr] - offset;
8366 /* Check the left side */
8367 ret = check_cache_range(root, cache,
8369 logical[nr] - offset);
8375 /* Now we continue with the right side */
8376 bytes = (offset + bytes) -
8377 (logical[nr] + stripe_len);
8378 offset = logical[nr] + stripe_len;
8385 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8387 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8388 offset, offset+bytes);
8392 if (entry->offset != offset) {
8393 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8398 if (entry->bytes != bytes) {
8399 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8400 bytes, entry->bytes, offset);
8404 unlink_free_space(cache->free_space_ctl, entry);
8409 static int verify_space_cache(struct btrfs_root *root,
8410 struct btrfs_block_group_cache *cache)
8412 struct btrfs_path path;
8413 struct extent_buffer *leaf;
8414 struct btrfs_key key;
8418 root = root->fs_info->extent_root;
8420 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8422 btrfs_init_path(&path);
8423 key.objectid = last;
8425 key.type = BTRFS_EXTENT_ITEM_KEY;
8426 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8431 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8432 ret = btrfs_next_leaf(root, &path);
8440 leaf = path.nodes[0];
8441 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8442 if (key.objectid >= cache->key.offset + cache->key.objectid)
8444 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8445 key.type != BTRFS_METADATA_ITEM_KEY) {
8450 if (last == key.objectid) {
8451 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8452 last = key.objectid + key.offset;
8454 last = key.objectid + root->fs_info->nodesize;
8459 ret = check_cache_range(root, cache, last,
8460 key.objectid - last);
8463 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8464 last = key.objectid + key.offset;
8466 last = key.objectid + root->fs_info->nodesize;
8470 if (last < cache->key.objectid + cache->key.offset)
8471 ret = check_cache_range(root, cache, last,
8472 cache->key.objectid +
8473 cache->key.offset - last);
8476 btrfs_release_path(&path);
8479 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8480 fprintf(stderr, "There are still entries left in the space "
8488 static int check_space_cache(struct btrfs_root *root)
8490 struct btrfs_block_group_cache *cache;
8491 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8495 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8496 btrfs_super_generation(root->fs_info->super_copy) !=
8497 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8498 printf("cache and super generation don't match, space cache "
8499 "will be invalidated\n");
8503 if (ctx.progress_enabled) {
8504 ctx.tp = TASK_FREE_SPACE;
8505 task_start(ctx.info);
8509 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8513 start = cache->key.objectid + cache->key.offset;
8514 if (!cache->free_space_ctl) {
8515 if (btrfs_init_free_space_ctl(cache,
8516 root->fs_info->sectorsize)) {
8521 btrfs_remove_free_space_cache(cache);
8524 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8525 ret = exclude_super_stripes(root, cache);
8527 fprintf(stderr, "could not exclude super stripes: %s\n",
8532 ret = load_free_space_tree(root->fs_info, cache);
8533 free_excluded_extents(root, cache);
8535 fprintf(stderr, "could not load free space tree: %s\n",
8542 ret = load_free_space_cache(root->fs_info, cache);
8547 ret = verify_space_cache(root, cache);
8549 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8550 cache->key.objectid);
8555 task_stop(ctx.info);
8557 return error ? -EINVAL : 0;
8560 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8561 u64 num_bytes, unsigned long leaf_offset,
8562 struct extent_buffer *eb) {
8564 struct btrfs_fs_info *fs_info = root->fs_info;
8566 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8568 unsigned long csum_offset;
8572 u64 data_checked = 0;
8578 if (num_bytes % fs_info->sectorsize)
8581 data = malloc(num_bytes);
8585 while (offset < num_bytes) {
8588 read_len = num_bytes - offset;
8589 /* read as much space once a time */
8590 ret = read_extent_data(fs_info, data + offset,
8591 bytenr + offset, &read_len, mirror);
8595 /* verify every 4k data's checksum */
8596 while (data_checked < read_len) {
8598 tmp = offset + data_checked;
8600 csum = btrfs_csum_data((char *)data + tmp,
8601 csum, fs_info->sectorsize);
8602 btrfs_csum_final(csum, (u8 *)&csum);
8604 csum_offset = leaf_offset +
8605 tmp / fs_info->sectorsize * csum_size;
8606 read_extent_buffer(eb, (char *)&csum_expected,
8607 csum_offset, csum_size);
8608 /* try another mirror */
8609 if (csum != csum_expected) {
8610 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8611 mirror, bytenr + tmp,
8612 csum, csum_expected);
8613 num_copies = btrfs_num_copies(root->fs_info,
8615 if (mirror < num_copies - 1) {
8620 data_checked += fs_info->sectorsize;
8629 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8632 struct btrfs_path path;
8633 struct extent_buffer *leaf;
8634 struct btrfs_key key;
8637 btrfs_init_path(&path);
8638 key.objectid = bytenr;
8639 key.type = BTRFS_EXTENT_ITEM_KEY;
8640 key.offset = (u64)-1;
8643 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8646 fprintf(stderr, "Error looking up extent record %d\n", ret);
8647 btrfs_release_path(&path);
8650 if (path.slots[0] > 0) {
8653 ret = btrfs_prev_leaf(root, &path);
8656 } else if (ret > 0) {
8663 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8666 * Block group items come before extent items if they have the same
8667 * bytenr, so walk back one more just in case. Dear future traveller,
8668 * first congrats on mastering time travel. Now if it's not too much
8669 * trouble could you go back to 2006 and tell Chris to make the
8670 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8671 * EXTENT_ITEM_KEY please?
8673 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8674 if (path.slots[0] > 0) {
8677 ret = btrfs_prev_leaf(root, &path);
8680 } else if (ret > 0) {
8685 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8689 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8690 ret = btrfs_next_leaf(root, &path);
8692 fprintf(stderr, "Error going to next leaf "
8694 btrfs_release_path(&path);
8700 leaf = path.nodes[0];
8701 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8702 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8706 if (key.objectid + key.offset < bytenr) {
8710 if (key.objectid > bytenr + num_bytes)
8713 if (key.objectid == bytenr) {
8714 if (key.offset >= num_bytes) {
8718 num_bytes -= key.offset;
8719 bytenr += key.offset;
8720 } else if (key.objectid < bytenr) {
8721 if (key.objectid + key.offset >= bytenr + num_bytes) {
8725 num_bytes = (bytenr + num_bytes) -
8726 (key.objectid + key.offset);
8727 bytenr = key.objectid + key.offset;
8729 if (key.objectid + key.offset < bytenr + num_bytes) {
8730 u64 new_start = key.objectid + key.offset;
8731 u64 new_bytes = bytenr + num_bytes - new_start;
8734 * Weird case, the extent is in the middle of
8735 * our range, we'll have to search one side
8736 * and then the other. Not sure if this happens
8737 * in real life, but no harm in coding it up
8738 * anyway just in case.
8740 btrfs_release_path(&path);
8741 ret = check_extent_exists(root, new_start,
8744 fprintf(stderr, "Right section didn't "
8748 num_bytes = key.objectid - bytenr;
8751 num_bytes = key.objectid - bytenr;
8758 if (num_bytes && !ret) {
8759 fprintf(stderr, "There are no extents for csum range "
8760 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8764 btrfs_release_path(&path);
8768 static int check_csums(struct btrfs_root *root)
8770 struct btrfs_path path;
8771 struct extent_buffer *leaf;
8772 struct btrfs_key key;
8773 u64 offset = 0, num_bytes = 0;
8774 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8778 unsigned long leaf_offset;
8780 root = root->fs_info->csum_root;
8781 if (!extent_buffer_uptodate(root->node)) {
8782 fprintf(stderr, "No valid csum tree found\n");
8786 btrfs_init_path(&path);
8787 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8788 key.type = BTRFS_EXTENT_CSUM_KEY;
8790 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8792 fprintf(stderr, "Error searching csum tree %d\n", ret);
8793 btrfs_release_path(&path);
8797 if (ret > 0 && path.slots[0])
8802 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8803 ret = btrfs_next_leaf(root, &path);
8805 fprintf(stderr, "Error going to next leaf "
8812 leaf = path.nodes[0];
8814 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8815 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8820 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8821 csum_size) * root->fs_info->sectorsize;
8822 if (!check_data_csum)
8823 goto skip_csum_check;
8824 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8825 ret = check_extent_csums(root, key.offset, data_len,
8831 offset = key.offset;
8832 } else if (key.offset != offset + num_bytes) {
8833 ret = check_extent_exists(root, offset, num_bytes);
8835 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8836 "there is no extent record\n",
8837 offset, offset+num_bytes);
8840 offset = key.offset;
8843 num_bytes += data_len;
8847 btrfs_release_path(&path);
8851 static int is_dropped_key(struct btrfs_key *key,
8852 struct btrfs_key *drop_key) {
8853 if (key->objectid < drop_key->objectid)
8855 else if (key->objectid == drop_key->objectid) {
8856 if (key->type < drop_key->type)
8858 else if (key->type == drop_key->type) {
8859 if (key->offset < drop_key->offset)
8867 * Here are the rules for FULL_BACKREF.
8869 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8870 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8872 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8873 * if it happened after the relocation occurred since we'll have dropped the
8874 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8875 * have no real way to know for sure.
8877 * We process the blocks one root at a time, and we start from the lowest root
8878 * objectid and go to the highest. So we can just lookup the owner backref for
8879 * the record and if we don't find it then we know it doesn't exist and we have
8882 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8883 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8884 * be set or not and then we can check later once we've gathered all the refs.
8886 static int calc_extent_flag(struct cache_tree *extent_cache,
8887 struct extent_buffer *buf,
8888 struct root_item_record *ri,
8891 struct extent_record *rec;
8892 struct cache_extent *cache;
8893 struct tree_backref *tback;
8896 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8897 /* we have added this extent before */
8901 rec = container_of(cache, struct extent_record, cache);
8904 * Except file/reloc tree, we can not have
8907 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8912 if (buf->start == ri->bytenr)
8915 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8918 owner = btrfs_header_owner(buf);
8919 if (owner == ri->objectid)
8922 tback = find_tree_backref(rec, 0, owner);
8927 if (rec->flag_block_full_backref != FLAG_UNSET &&
8928 rec->flag_block_full_backref != 0)
8929 rec->bad_full_backref = 1;
8932 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8933 if (rec->flag_block_full_backref != FLAG_UNSET &&
8934 rec->flag_block_full_backref != 1)
8935 rec->bad_full_backref = 1;
8939 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8941 fprintf(stderr, "Invalid key type(");
8942 print_key_type(stderr, 0, key_type);
8943 fprintf(stderr, ") found in root(");
8944 print_objectid(stderr, rootid, 0);
8945 fprintf(stderr, ")\n");
8949 * Check if the key is valid with its extent buffer.
8951 * This is a early check in case invalid key exists in a extent buffer
8952 * This is not comprehensive yet, but should prevent wrong key/item passed
8955 static int check_type_with_root(u64 rootid, u8 key_type)
8958 /* Only valid in chunk tree */
8959 case BTRFS_DEV_ITEM_KEY:
8960 case BTRFS_CHUNK_ITEM_KEY:
8961 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8964 /* valid in csum and log tree */
8965 case BTRFS_CSUM_TREE_OBJECTID:
8966 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8970 case BTRFS_EXTENT_ITEM_KEY:
8971 case BTRFS_METADATA_ITEM_KEY:
8972 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8973 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8976 case BTRFS_ROOT_ITEM_KEY:
8977 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8980 case BTRFS_DEV_EXTENT_KEY:
8981 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8987 report_mismatch_key_root(key_type, rootid);
8991 static int run_next_block(struct btrfs_root *root,
8992 struct block_info *bits,
8995 struct cache_tree *pending,
8996 struct cache_tree *seen,
8997 struct cache_tree *reada,
8998 struct cache_tree *nodes,
8999 struct cache_tree *extent_cache,
9000 struct cache_tree *chunk_cache,
9001 struct rb_root *dev_cache,
9002 struct block_group_tree *block_group_cache,
9003 struct device_extent_tree *dev_extent_cache,
9004 struct root_item_record *ri)
9006 struct btrfs_fs_info *fs_info = root->fs_info;
9007 struct extent_buffer *buf;
9008 struct extent_record *rec = NULL;
9019 struct btrfs_key key;
9020 struct cache_extent *cache;
9023 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9024 bits_nr, &reada_bits);
9029 for(i = 0; i < nritems; i++) {
9030 ret = add_cache_extent(reada, bits[i].start,
9035 /* fixme, get the parent transid */
9036 readahead_tree_block(fs_info, bits[i].start, 0);
9039 *last = bits[0].start;
9040 bytenr = bits[0].start;
9041 size = bits[0].size;
9043 cache = lookup_cache_extent(pending, bytenr, size);
9045 remove_cache_extent(pending, cache);
9048 cache = lookup_cache_extent(reada, bytenr, size);
9050 remove_cache_extent(reada, cache);
9053 cache = lookup_cache_extent(nodes, bytenr, size);
9055 remove_cache_extent(nodes, cache);
9058 cache = lookup_cache_extent(extent_cache, bytenr, size);
9060 rec = container_of(cache, struct extent_record, cache);
9061 gen = rec->parent_generation;
9064 /* fixme, get the real parent transid */
9065 buf = read_tree_block(root->fs_info, bytenr, gen);
9066 if (!extent_buffer_uptodate(buf)) {
9067 record_bad_block_io(root->fs_info,
9068 extent_cache, bytenr, size);
9072 nritems = btrfs_header_nritems(buf);
9075 if (!init_extent_tree) {
9076 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9077 btrfs_header_level(buf), 1, NULL,
9080 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9082 fprintf(stderr, "Couldn't calc extent flags\n");
9083 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9088 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9090 fprintf(stderr, "Couldn't calc extent flags\n");
9091 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9095 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9097 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9098 ri->objectid == btrfs_header_owner(buf)) {
9100 * Ok we got to this block from it's original owner and
9101 * we have FULL_BACKREF set. Relocation can leave
9102 * converted blocks over so this is altogether possible,
9103 * however it's not possible if the generation > the
9104 * last snapshot, so check for this case.
9106 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9107 btrfs_header_generation(buf) > ri->last_snapshot) {
9108 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9109 rec->bad_full_backref = 1;
9114 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9115 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9116 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9117 rec->bad_full_backref = 1;
9121 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9122 rec->flag_block_full_backref = 1;
9126 rec->flag_block_full_backref = 0;
9128 owner = btrfs_header_owner(buf);
9131 ret = check_block(root, extent_cache, buf, flags);
9135 if (btrfs_is_leaf(buf)) {
9136 btree_space_waste += btrfs_leaf_free_space(root, buf);
9137 for (i = 0; i < nritems; i++) {
9138 struct btrfs_file_extent_item *fi;
9139 btrfs_item_key_to_cpu(buf, &key, i);
9141 * Check key type against the leaf owner.
9142 * Could filter quite a lot of early error if
9145 if (check_type_with_root(btrfs_header_owner(buf),
9147 fprintf(stderr, "ignoring invalid key\n");
9150 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9151 process_extent_item(root, extent_cache, buf,
9155 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9156 process_extent_item(root, extent_cache, buf,
9160 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9162 btrfs_item_size_nr(buf, i);
9165 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9166 process_chunk_item(chunk_cache, &key, buf, i);
9169 if (key.type == BTRFS_DEV_ITEM_KEY) {
9170 process_device_item(dev_cache, &key, buf, i);
9173 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9174 process_block_group_item(block_group_cache,
9178 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9179 process_device_extent_item(dev_extent_cache,
9184 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9185 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9186 process_extent_ref_v0(extent_cache, buf, i);
9193 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9194 ret = add_tree_backref(extent_cache,
9195 key.objectid, 0, key.offset, 0);
9198 "add_tree_backref failed (leaf tree block): %s",
9202 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9203 ret = add_tree_backref(extent_cache,
9204 key.objectid, key.offset, 0, 0);
9207 "add_tree_backref failed (leaf shared block): %s",
9211 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9212 struct btrfs_extent_data_ref *ref;
9213 ref = btrfs_item_ptr(buf, i,
9214 struct btrfs_extent_data_ref);
9215 add_data_backref(extent_cache,
9217 btrfs_extent_data_ref_root(buf, ref),
9218 btrfs_extent_data_ref_objectid(buf,
9220 btrfs_extent_data_ref_offset(buf, ref),
9221 btrfs_extent_data_ref_count(buf, ref),
9222 0, root->fs_info->sectorsize);
9225 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9226 struct btrfs_shared_data_ref *ref;
9227 ref = btrfs_item_ptr(buf, i,
9228 struct btrfs_shared_data_ref);
9229 add_data_backref(extent_cache,
9230 key.objectid, key.offset, 0, 0, 0,
9231 btrfs_shared_data_ref_count(buf, ref),
9232 0, root->fs_info->sectorsize);
9235 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9236 struct bad_item *bad;
9238 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9242 bad = malloc(sizeof(struct bad_item));
9245 INIT_LIST_HEAD(&bad->list);
9246 memcpy(&bad->key, &key,
9247 sizeof(struct btrfs_key));
9248 bad->root_id = owner;
9249 list_add_tail(&bad->list, &delete_items);
9252 if (key.type != BTRFS_EXTENT_DATA_KEY)
9254 fi = btrfs_item_ptr(buf, i,
9255 struct btrfs_file_extent_item);
9256 if (btrfs_file_extent_type(buf, fi) ==
9257 BTRFS_FILE_EXTENT_INLINE)
9259 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9262 data_bytes_allocated +=
9263 btrfs_file_extent_disk_num_bytes(buf, fi);
9264 if (data_bytes_allocated < root->fs_info->sectorsize) {
9267 data_bytes_referenced +=
9268 btrfs_file_extent_num_bytes(buf, fi);
9269 add_data_backref(extent_cache,
9270 btrfs_file_extent_disk_bytenr(buf, fi),
9271 parent, owner, key.objectid, key.offset -
9272 btrfs_file_extent_offset(buf, fi), 1, 1,
9273 btrfs_file_extent_disk_num_bytes(buf, fi));
9277 struct btrfs_key first_key;
9279 first_key.objectid = 0;
9282 btrfs_item_key_to_cpu(buf, &first_key, 0);
9283 level = btrfs_header_level(buf);
9284 for (i = 0; i < nritems; i++) {
9285 struct extent_record tmpl;
9287 ptr = btrfs_node_blockptr(buf, i);
9288 size = root->fs_info->nodesize;
9289 btrfs_node_key_to_cpu(buf, &key, i);
9291 if ((level == ri->drop_level)
9292 && is_dropped_key(&key, &ri->drop_key)) {
9297 memset(&tmpl, 0, sizeof(tmpl));
9298 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9299 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9304 tmpl.max_size = size;
9305 ret = add_extent_rec(extent_cache, &tmpl);
9309 ret = add_tree_backref(extent_cache, ptr, parent,
9313 "add_tree_backref failed (non-leaf block): %s",
9319 add_pending(nodes, seen, ptr, size);
9321 add_pending(pending, seen, ptr, size);
9324 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9325 nritems) * sizeof(struct btrfs_key_ptr);
9327 total_btree_bytes += buf->len;
9328 if (fs_root_objectid(btrfs_header_owner(buf)))
9329 total_fs_tree_bytes += buf->len;
9330 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9331 total_extent_tree_bytes += buf->len;
9333 free_extent_buffer(buf);
9337 static int add_root_to_pending(struct extent_buffer *buf,
9338 struct cache_tree *extent_cache,
9339 struct cache_tree *pending,
9340 struct cache_tree *seen,
9341 struct cache_tree *nodes,
9344 struct extent_record tmpl;
9347 if (btrfs_header_level(buf) > 0)
9348 add_pending(nodes, seen, buf->start, buf->len);
9350 add_pending(pending, seen, buf->start, buf->len);
9352 memset(&tmpl, 0, sizeof(tmpl));
9353 tmpl.start = buf->start;
9358 tmpl.max_size = buf->len;
9359 add_extent_rec(extent_cache, &tmpl);
9361 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9362 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9363 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9366 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9371 /* as we fix the tree, we might be deleting blocks that
9372 * we're tracking for repair. This hook makes sure we
9373 * remove any backrefs for blocks as we are fixing them.
9375 static int free_extent_hook(struct btrfs_trans_handle *trans,
9376 struct btrfs_root *root,
9377 u64 bytenr, u64 num_bytes, u64 parent,
9378 u64 root_objectid, u64 owner, u64 offset,
9381 struct extent_record *rec;
9382 struct cache_extent *cache;
9384 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9386 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9387 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9391 rec = container_of(cache, struct extent_record, cache);
9393 struct data_backref *back;
9394 back = find_data_backref(rec, parent, root_objectid, owner,
9395 offset, 1, bytenr, num_bytes);
9398 if (back->node.found_ref) {
9399 back->found_ref -= refs_to_drop;
9401 rec->refs -= refs_to_drop;
9403 if (back->node.found_extent_tree) {
9404 back->num_refs -= refs_to_drop;
9405 if (rec->extent_item_refs)
9406 rec->extent_item_refs -= refs_to_drop;
9408 if (back->found_ref == 0)
9409 back->node.found_ref = 0;
9410 if (back->num_refs == 0)
9411 back->node.found_extent_tree = 0;
9413 if (!back->node.found_extent_tree && back->node.found_ref) {
9414 rb_erase(&back->node.node, &rec->backref_tree);
9418 struct tree_backref *back;
9419 back = find_tree_backref(rec, parent, root_objectid);
9422 if (back->node.found_ref) {
9425 back->node.found_ref = 0;
9427 if (back->node.found_extent_tree) {
9428 if (rec->extent_item_refs)
9429 rec->extent_item_refs--;
9430 back->node.found_extent_tree = 0;
9432 if (!back->node.found_extent_tree && back->node.found_ref) {
9433 rb_erase(&back->node.node, &rec->backref_tree);
9437 maybe_free_extent_rec(extent_cache, rec);
9442 static int delete_extent_records(struct btrfs_trans_handle *trans,
9443 struct btrfs_root *root,
9444 struct btrfs_path *path,
9447 struct btrfs_key key;
9448 struct btrfs_key found_key;
9449 struct extent_buffer *leaf;
9454 key.objectid = bytenr;
9456 key.offset = (u64)-1;
9459 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9466 if (path->slots[0] == 0)
9472 leaf = path->nodes[0];
9473 slot = path->slots[0];
9475 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9476 if (found_key.objectid != bytenr)
9479 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9480 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9481 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9482 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9483 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9484 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9485 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9486 btrfs_release_path(path);
9487 if (found_key.type == 0) {
9488 if (found_key.offset == 0)
9490 key.offset = found_key.offset - 1;
9491 key.type = found_key.type;
9493 key.type = found_key.type - 1;
9494 key.offset = (u64)-1;
9498 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9499 found_key.objectid, found_key.type, found_key.offset);
9501 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9504 btrfs_release_path(path);
9506 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9507 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9508 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9509 found_key.offset : root->fs_info->nodesize;
9511 ret = btrfs_update_block_group(trans, root, bytenr,
9518 btrfs_release_path(path);
9523 * for a single backref, this will allocate a new extent
9524 * and add the backref to it.
9526 static int record_extent(struct btrfs_trans_handle *trans,
9527 struct btrfs_fs_info *info,
9528 struct btrfs_path *path,
9529 struct extent_record *rec,
9530 struct extent_backref *back,
9531 int allocated, u64 flags)
9534 struct btrfs_root *extent_root = info->extent_root;
9535 struct extent_buffer *leaf;
9536 struct btrfs_key ins_key;
9537 struct btrfs_extent_item *ei;
9538 struct data_backref *dback;
9539 struct btrfs_tree_block_info *bi;
9542 rec->max_size = max_t(u64, rec->max_size,
9546 u32 item_size = sizeof(*ei);
9549 item_size += sizeof(*bi);
9551 ins_key.objectid = rec->start;
9552 ins_key.offset = rec->max_size;
9553 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9555 ret = btrfs_insert_empty_item(trans, extent_root, path,
9556 &ins_key, item_size);
9560 leaf = path->nodes[0];
9561 ei = btrfs_item_ptr(leaf, path->slots[0],
9562 struct btrfs_extent_item);
9564 btrfs_set_extent_refs(leaf, ei, 0);
9565 btrfs_set_extent_generation(leaf, ei, rec->generation);
9567 if (back->is_data) {
9568 btrfs_set_extent_flags(leaf, ei,
9569 BTRFS_EXTENT_FLAG_DATA);
9571 struct btrfs_disk_key copy_key;;
9573 bi = (struct btrfs_tree_block_info *)(ei + 1);
9574 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9577 btrfs_set_disk_key_objectid(©_key,
9578 rec->info_objectid);
9579 btrfs_set_disk_key_type(©_key, 0);
9580 btrfs_set_disk_key_offset(©_key, 0);
9582 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9583 btrfs_set_tree_block_key(leaf, bi, ©_key);
9585 btrfs_set_extent_flags(leaf, ei,
9586 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9589 btrfs_mark_buffer_dirty(leaf);
9590 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9591 rec->max_size, 1, 0);
9594 btrfs_release_path(path);
9597 if (back->is_data) {
9601 dback = to_data_backref(back);
9602 if (back->full_backref)
9603 parent = dback->parent;
9607 for (i = 0; i < dback->found_ref; i++) {
9608 /* if parent != 0, we're doing a full backref
9609 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9610 * just makes the backref allocator create a data
9613 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9614 rec->start, rec->max_size,
9618 BTRFS_FIRST_FREE_OBJECTID :
9624 fprintf(stderr, "adding new data backref"
9625 " on %llu %s %llu owner %llu"
9626 " offset %llu found %d\n",
9627 (unsigned long long)rec->start,
9628 back->full_backref ?
9630 back->full_backref ?
9631 (unsigned long long)parent :
9632 (unsigned long long)dback->root,
9633 (unsigned long long)dback->owner,
9634 (unsigned long long)dback->offset,
9638 struct tree_backref *tback;
9640 tback = to_tree_backref(back);
9641 if (back->full_backref)
9642 parent = tback->parent;
9646 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9647 rec->start, rec->max_size,
9648 parent, tback->root, 0, 0);
9649 fprintf(stderr, "adding new tree backref on "
9650 "start %llu len %llu parent %llu root %llu\n",
9651 rec->start, rec->max_size, parent, tback->root);
9654 btrfs_release_path(path);
9658 static struct extent_entry *find_entry(struct list_head *entries,
9659 u64 bytenr, u64 bytes)
9661 struct extent_entry *entry = NULL;
9663 list_for_each_entry(entry, entries, list) {
9664 if (entry->bytenr == bytenr && entry->bytes == bytes)
9671 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9673 struct extent_entry *entry, *best = NULL, *prev = NULL;
9675 list_for_each_entry(entry, entries, list) {
9677 * If there are as many broken entries as entries then we know
9678 * not to trust this particular entry.
9680 if (entry->broken == entry->count)
9684 * Special case, when there are only two entries and 'best' is
9694 * If our current entry == best then we can't be sure our best
9695 * is really the best, so we need to keep searching.
9697 if (best && best->count == entry->count) {
9703 /* Prev == entry, not good enough, have to keep searching */
9704 if (!prev->broken && prev->count == entry->count)
9708 best = (prev->count > entry->count) ? prev : entry;
9709 else if (best->count < entry->count)
9717 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9718 struct data_backref *dback, struct extent_entry *entry)
9720 struct btrfs_trans_handle *trans;
9721 struct btrfs_root *root;
9722 struct btrfs_file_extent_item *fi;
9723 struct extent_buffer *leaf;
9724 struct btrfs_key key;
9728 key.objectid = dback->root;
9729 key.type = BTRFS_ROOT_ITEM_KEY;
9730 key.offset = (u64)-1;
9731 root = btrfs_read_fs_root(info, &key);
9733 fprintf(stderr, "Couldn't find root for our ref\n");
9738 * The backref points to the original offset of the extent if it was
9739 * split, so we need to search down to the offset we have and then walk
9740 * forward until we find the backref we're looking for.
9742 key.objectid = dback->owner;
9743 key.type = BTRFS_EXTENT_DATA_KEY;
9744 key.offset = dback->offset;
9745 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9747 fprintf(stderr, "Error looking up ref %d\n", ret);
9752 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9753 ret = btrfs_next_leaf(root, path);
9755 fprintf(stderr, "Couldn't find our ref, next\n");
9759 leaf = path->nodes[0];
9760 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9761 if (key.objectid != dback->owner ||
9762 key.type != BTRFS_EXTENT_DATA_KEY) {
9763 fprintf(stderr, "Couldn't find our ref, search\n");
9766 fi = btrfs_item_ptr(leaf, path->slots[0],
9767 struct btrfs_file_extent_item);
9768 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9769 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9771 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9776 btrfs_release_path(path);
9778 trans = btrfs_start_transaction(root, 1);
9780 return PTR_ERR(trans);
9783 * Ok we have the key of the file extent we want to fix, now we can cow
9784 * down to the thing and fix it.
9786 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9788 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9789 key.objectid, key.type, key.offset, ret);
9793 fprintf(stderr, "Well that's odd, we just found this key "
9794 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9799 leaf = path->nodes[0];
9800 fi = btrfs_item_ptr(leaf, path->slots[0],
9801 struct btrfs_file_extent_item);
9803 if (btrfs_file_extent_compression(leaf, fi) &&
9804 dback->disk_bytenr != entry->bytenr) {
9805 fprintf(stderr, "Ref doesn't match the record start and is "
9806 "compressed, please take a btrfs-image of this file "
9807 "system and send it to a btrfs developer so they can "
9808 "complete this functionality for bytenr %Lu\n",
9809 dback->disk_bytenr);
9814 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9815 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9816 } else if (dback->disk_bytenr > entry->bytenr) {
9817 u64 off_diff, offset;
9819 off_diff = dback->disk_bytenr - entry->bytenr;
9820 offset = btrfs_file_extent_offset(leaf, fi);
9821 if (dback->disk_bytenr + offset +
9822 btrfs_file_extent_num_bytes(leaf, fi) >
9823 entry->bytenr + entry->bytes) {
9824 fprintf(stderr, "Ref is past the entry end, please "
9825 "take a btrfs-image of this file system and "
9826 "send it to a btrfs developer, ref %Lu\n",
9827 dback->disk_bytenr);
9832 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9833 btrfs_set_file_extent_offset(leaf, fi, offset);
9834 } else if (dback->disk_bytenr < entry->bytenr) {
9837 offset = btrfs_file_extent_offset(leaf, fi);
9838 if (dback->disk_bytenr + offset < entry->bytenr) {
9839 fprintf(stderr, "Ref is before the entry start, please"
9840 " take a btrfs-image of this file system and "
9841 "send it to a btrfs developer, ref %Lu\n",
9842 dback->disk_bytenr);
9847 offset += dback->disk_bytenr;
9848 offset -= entry->bytenr;
9849 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9850 btrfs_set_file_extent_offset(leaf, fi, offset);
9853 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9856 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9857 * only do this if we aren't using compression, otherwise it's a
9860 if (!btrfs_file_extent_compression(leaf, fi))
9861 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9863 printf("ram bytes may be wrong?\n");
9864 btrfs_mark_buffer_dirty(leaf);
9866 err = btrfs_commit_transaction(trans, root);
9867 btrfs_release_path(path);
9868 return ret ? ret : err;
9871 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9872 struct extent_record *rec)
9874 struct extent_backref *back, *tmp;
9875 struct data_backref *dback;
9876 struct extent_entry *entry, *best = NULL;
9879 int broken_entries = 0;
9884 * Metadata is easy and the backrefs should always agree on bytenr and
9885 * size, if not we've got bigger issues.
9890 rbtree_postorder_for_each_entry_safe(back, tmp,
9891 &rec->backref_tree, node) {
9892 if (back->full_backref || !back->is_data)
9895 dback = to_data_backref(back);
9898 * We only pay attention to backrefs that we found a real
9901 if (dback->found_ref == 0)
9905 * For now we only catch when the bytes don't match, not the
9906 * bytenr. We can easily do this at the same time, but I want
9907 * to have a fs image to test on before we just add repair
9908 * functionality willy-nilly so we know we won't screw up the
9912 entry = find_entry(&entries, dback->disk_bytenr,
9915 entry = malloc(sizeof(struct extent_entry));
9920 memset(entry, 0, sizeof(*entry));
9921 entry->bytenr = dback->disk_bytenr;
9922 entry->bytes = dback->bytes;
9923 list_add_tail(&entry->list, &entries);
9928 * If we only have on entry we may think the entries agree when
9929 * in reality they don't so we have to do some extra checking.
9931 if (dback->disk_bytenr != rec->start ||
9932 dback->bytes != rec->nr || back->broken)
9943 /* Yay all the backrefs agree, carry on good sir */
9944 if (nr_entries <= 1 && !mismatch)
9947 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9948 "%Lu\n", rec->start);
9951 * First we want to see if the backrefs can agree amongst themselves who
9952 * is right, so figure out which one of the entries has the highest
9955 best = find_most_right_entry(&entries);
9958 * Ok so we may have an even split between what the backrefs think, so
9959 * this is where we use the extent ref to see what it thinks.
9962 entry = find_entry(&entries, rec->start, rec->nr);
9963 if (!entry && (!broken_entries || !rec->found_rec)) {
9964 fprintf(stderr, "Backrefs don't agree with each other "
9965 "and extent record doesn't agree with anybody,"
9966 " so we can't fix bytenr %Lu bytes %Lu\n",
9967 rec->start, rec->nr);
9970 } else if (!entry) {
9972 * Ok our backrefs were broken, we'll assume this is the
9973 * correct value and add an entry for this range.
9975 entry = malloc(sizeof(struct extent_entry));
9980 memset(entry, 0, sizeof(*entry));
9981 entry->bytenr = rec->start;
9982 entry->bytes = rec->nr;
9983 list_add_tail(&entry->list, &entries);
9987 best = find_most_right_entry(&entries);
9989 fprintf(stderr, "Backrefs and extent record evenly "
9990 "split on who is right, this is going to "
9991 "require user input to fix bytenr %Lu bytes "
9992 "%Lu\n", rec->start, rec->nr);
9999 * I don't think this can happen currently as we'll abort() if we catch
10000 * this case higher up, but in case somebody removes that we still can't
10001 * deal with it properly here yet, so just bail out of that's the case.
10003 if (best->bytenr != rec->start) {
10004 fprintf(stderr, "Extent start and backref starts don't match, "
10005 "please use btrfs-image on this file system and send "
10006 "it to a btrfs developer so they can make fsck fix "
10007 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10008 rec->start, rec->nr);
10014 * Ok great we all agreed on an extent record, let's go find the real
10015 * references and fix up the ones that don't match.
10017 rbtree_postorder_for_each_entry_safe(back, tmp,
10018 &rec->backref_tree, node) {
10019 if (back->full_backref || !back->is_data)
10022 dback = to_data_backref(back);
10025 * Still ignoring backrefs that don't have a real ref attached
10028 if (dback->found_ref == 0)
10031 if (dback->bytes == best->bytes &&
10032 dback->disk_bytenr == best->bytenr)
10035 ret = repair_ref(info, path, dback, best);
10041 * Ok we messed with the actual refs, which means we need to drop our
10042 * entire cache and go back and rescan. I know this is a huge pain and
10043 * adds a lot of extra work, but it's the only way to be safe. Once all
10044 * the backrefs agree we may not need to do anything to the extent
10049 while (!list_empty(&entries)) {
10050 entry = list_entry(entries.next, struct extent_entry, list);
10051 list_del_init(&entry->list);
10057 static int process_duplicates(struct cache_tree *extent_cache,
10058 struct extent_record *rec)
10060 struct extent_record *good, *tmp;
10061 struct cache_extent *cache;
10065 * If we found a extent record for this extent then return, or if we
10066 * have more than one duplicate we are likely going to need to delete
10069 if (rec->found_rec || rec->num_duplicates > 1)
10072 /* Shouldn't happen but just in case */
10073 BUG_ON(!rec->num_duplicates);
10076 * So this happens if we end up with a backref that doesn't match the
10077 * actual extent entry. So either the backref is bad or the extent
10078 * entry is bad. Either way we want to have the extent_record actually
10079 * reflect what we found in the extent_tree, so we need to take the
10080 * duplicate out and use that as the extent_record since the only way we
10081 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10083 remove_cache_extent(extent_cache, &rec->cache);
10085 good = to_extent_record(rec->dups.next);
10086 list_del_init(&good->list);
10087 INIT_LIST_HEAD(&good->backrefs);
10088 INIT_LIST_HEAD(&good->dups);
10089 good->cache.start = good->start;
10090 good->cache.size = good->nr;
10091 good->content_checked = 0;
10092 good->owner_ref_checked = 0;
10093 good->num_duplicates = 0;
10094 good->refs = rec->refs;
10095 list_splice_init(&rec->backrefs, &good->backrefs);
10097 cache = lookup_cache_extent(extent_cache, good->start,
10101 tmp = container_of(cache, struct extent_record, cache);
10104 * If we find another overlapping extent and it's found_rec is
10105 * set then it's a duplicate and we need to try and delete
10108 if (tmp->found_rec || tmp->num_duplicates > 0) {
10109 if (list_empty(&good->list))
10110 list_add_tail(&good->list,
10111 &duplicate_extents);
10112 good->num_duplicates += tmp->num_duplicates + 1;
10113 list_splice_init(&tmp->dups, &good->dups);
10114 list_del_init(&tmp->list);
10115 list_add_tail(&tmp->list, &good->dups);
10116 remove_cache_extent(extent_cache, &tmp->cache);
10121 * Ok we have another non extent item backed extent rec, so lets
10122 * just add it to this extent and carry on like we did above.
10124 good->refs += tmp->refs;
10125 list_splice_init(&tmp->backrefs, &good->backrefs);
10126 remove_cache_extent(extent_cache, &tmp->cache);
10129 ret = insert_cache_extent(extent_cache, &good->cache);
10132 return good->num_duplicates ? 0 : 1;
10135 static int delete_duplicate_records(struct btrfs_root *root,
10136 struct extent_record *rec)
10138 struct btrfs_trans_handle *trans;
10139 LIST_HEAD(delete_list);
10140 struct btrfs_path path;
10141 struct extent_record *tmp, *good, *n;
10144 struct btrfs_key key;
10146 btrfs_init_path(&path);
10149 /* Find the record that covers all of the duplicates. */
10150 list_for_each_entry(tmp, &rec->dups, list) {
10151 if (good->start < tmp->start)
10153 if (good->nr > tmp->nr)
10156 if (tmp->start + tmp->nr < good->start + good->nr) {
10157 fprintf(stderr, "Ok we have overlapping extents that "
10158 "aren't completely covered by each other, this "
10159 "is going to require more careful thought. "
10160 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10161 tmp->start, tmp->nr, good->start, good->nr);
10168 list_add_tail(&rec->list, &delete_list);
10170 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10173 list_move_tail(&tmp->list, &delete_list);
10176 root = root->fs_info->extent_root;
10177 trans = btrfs_start_transaction(root, 1);
10178 if (IS_ERR(trans)) {
10179 ret = PTR_ERR(trans);
10183 list_for_each_entry(tmp, &delete_list, list) {
10184 if (tmp->found_rec == 0)
10186 key.objectid = tmp->start;
10187 key.type = BTRFS_EXTENT_ITEM_KEY;
10188 key.offset = tmp->nr;
10190 /* Shouldn't happen but just in case */
10191 if (tmp->metadata) {
10192 fprintf(stderr, "Well this shouldn't happen, extent "
10193 "record overlaps but is metadata? "
10194 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10198 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10204 ret = btrfs_del_item(trans, root, &path);
10207 btrfs_release_path(&path);
10210 err = btrfs_commit_transaction(trans, root);
10214 while (!list_empty(&delete_list)) {
10215 tmp = to_extent_record(delete_list.next);
10216 list_del_init(&tmp->list);
10222 while (!list_empty(&rec->dups)) {
10223 tmp = to_extent_record(rec->dups.next);
10224 list_del_init(&tmp->list);
10228 btrfs_release_path(&path);
10230 if (!ret && !nr_del)
10231 rec->num_duplicates = 0;
10233 return ret ? ret : nr_del;
10236 static int find_possible_backrefs(struct btrfs_fs_info *info,
10237 struct btrfs_path *path,
10238 struct cache_tree *extent_cache,
10239 struct extent_record *rec)
10241 struct btrfs_root *root;
10242 struct extent_backref *back, *tmp;
10243 struct data_backref *dback;
10244 struct cache_extent *cache;
10245 struct btrfs_file_extent_item *fi;
10246 struct btrfs_key key;
10250 rbtree_postorder_for_each_entry_safe(back, tmp,
10251 &rec->backref_tree, node) {
10252 /* Don't care about full backrefs (poor unloved backrefs) */
10253 if (back->full_backref || !back->is_data)
10256 dback = to_data_backref(back);
10258 /* We found this one, we don't need to do a lookup */
10259 if (dback->found_ref)
10262 key.objectid = dback->root;
10263 key.type = BTRFS_ROOT_ITEM_KEY;
10264 key.offset = (u64)-1;
10266 root = btrfs_read_fs_root(info, &key);
10268 /* No root, definitely a bad ref, skip */
10269 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10271 /* Other err, exit */
10273 return PTR_ERR(root);
10275 key.objectid = dback->owner;
10276 key.type = BTRFS_EXTENT_DATA_KEY;
10277 key.offset = dback->offset;
10278 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10280 btrfs_release_path(path);
10283 /* Didn't find it, we can carry on */
10288 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10289 struct btrfs_file_extent_item);
10290 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10291 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10292 btrfs_release_path(path);
10293 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10295 struct extent_record *tmp;
10296 tmp = container_of(cache, struct extent_record, cache);
10299 * If we found an extent record for the bytenr for this
10300 * particular backref then we can't add it to our
10301 * current extent record. We only want to add backrefs
10302 * that don't have a corresponding extent item in the
10303 * extent tree since they likely belong to this record
10304 * and we need to fix it if it doesn't match bytenrs.
10306 if (tmp->found_rec)
10310 dback->found_ref += 1;
10311 dback->disk_bytenr = bytenr;
10312 dback->bytes = bytes;
10315 * Set this so the verify backref code knows not to trust the
10316 * values in this backref.
10325 * Record orphan data ref into corresponding root.
10327 * Return 0 if the extent item contains data ref and recorded.
10328 * Return 1 if the extent item contains no useful data ref
10329 * On that case, it may contains only shared_dataref or metadata backref
10330 * or the file extent exists(this should be handled by the extent bytenr
10331 * recovery routine)
10332 * Return <0 if something goes wrong.
10334 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10335 struct extent_record *rec)
10337 struct btrfs_key key;
10338 struct btrfs_root *dest_root;
10339 struct extent_backref *back, *tmp;
10340 struct data_backref *dback;
10341 struct orphan_data_extent *orphan;
10342 struct btrfs_path path;
10343 int recorded_data_ref = 0;
10348 btrfs_init_path(&path);
10349 rbtree_postorder_for_each_entry_safe(back, tmp,
10350 &rec->backref_tree, node) {
10351 if (back->full_backref || !back->is_data ||
10352 !back->found_extent_tree)
10354 dback = to_data_backref(back);
10355 if (dback->found_ref)
10357 key.objectid = dback->root;
10358 key.type = BTRFS_ROOT_ITEM_KEY;
10359 key.offset = (u64)-1;
10361 dest_root = btrfs_read_fs_root(fs_info, &key);
10363 /* For non-exist root we just skip it */
10364 if (IS_ERR(dest_root) || !dest_root)
10367 key.objectid = dback->owner;
10368 key.type = BTRFS_EXTENT_DATA_KEY;
10369 key.offset = dback->offset;
10371 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10372 btrfs_release_path(&path);
10374 * For ret < 0, it's OK since the fs-tree may be corrupted,
10375 * we need to record it for inode/file extent rebuild.
10376 * For ret > 0, we record it only for file extent rebuild.
10377 * For ret == 0, the file extent exists but only bytenr
10378 * mismatch, let the original bytenr fix routine to handle,
10384 orphan = malloc(sizeof(*orphan));
10389 INIT_LIST_HEAD(&orphan->list);
10390 orphan->root = dback->root;
10391 orphan->objectid = dback->owner;
10392 orphan->offset = dback->offset;
10393 orphan->disk_bytenr = rec->cache.start;
10394 orphan->disk_len = rec->cache.size;
10395 list_add(&dest_root->orphan_data_extents, &orphan->list);
10396 recorded_data_ref = 1;
10399 btrfs_release_path(&path);
10401 return !recorded_data_ref;
10407 * when an incorrect extent item is found, this will delete
10408 * all of the existing entries for it and recreate them
10409 * based on what the tree scan found.
10411 static int fixup_extent_refs(struct btrfs_fs_info *info,
10412 struct cache_tree *extent_cache,
10413 struct extent_record *rec)
10415 struct btrfs_trans_handle *trans = NULL;
10417 struct btrfs_path path;
10418 struct cache_extent *cache;
10419 struct extent_backref *back, *tmp;
10423 if (rec->flag_block_full_backref)
10424 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10426 btrfs_init_path(&path);
10427 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10429 * Sometimes the backrefs themselves are so broken they don't
10430 * get attached to any meaningful rec, so first go back and
10431 * check any of our backrefs that we couldn't find and throw
10432 * them into the list if we find the backref so that
10433 * verify_backrefs can figure out what to do.
10435 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10440 /* step one, make sure all of the backrefs agree */
10441 ret = verify_backrefs(info, &path, rec);
10445 trans = btrfs_start_transaction(info->extent_root, 1);
10446 if (IS_ERR(trans)) {
10447 ret = PTR_ERR(trans);
10451 /* step two, delete all the existing records */
10452 ret = delete_extent_records(trans, info->extent_root, &path,
10458 /* was this block corrupt? If so, don't add references to it */
10459 cache = lookup_cache_extent(info->corrupt_blocks,
10460 rec->start, rec->max_size);
10466 /* step three, recreate all the refs we did find */
10467 rbtree_postorder_for_each_entry_safe(back, tmp,
10468 &rec->backref_tree, node) {
10470 * if we didn't find any references, don't create a
10471 * new extent record
10473 if (!back->found_ref)
10476 rec->bad_full_backref = 0;
10477 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10485 int err = btrfs_commit_transaction(trans, info->extent_root);
10491 fprintf(stderr, "Repaired extent references for %llu\n",
10492 (unsigned long long)rec->start);
10494 btrfs_release_path(&path);
10498 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10499 struct extent_record *rec)
10501 struct btrfs_trans_handle *trans;
10502 struct btrfs_root *root = fs_info->extent_root;
10503 struct btrfs_path path;
10504 struct btrfs_extent_item *ei;
10505 struct btrfs_key key;
10509 key.objectid = rec->start;
10510 if (rec->metadata) {
10511 key.type = BTRFS_METADATA_ITEM_KEY;
10512 key.offset = rec->info_level;
10514 key.type = BTRFS_EXTENT_ITEM_KEY;
10515 key.offset = rec->max_size;
10518 trans = btrfs_start_transaction(root, 0);
10520 return PTR_ERR(trans);
10522 btrfs_init_path(&path);
10523 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10525 btrfs_release_path(&path);
10526 btrfs_commit_transaction(trans, root);
10529 fprintf(stderr, "Didn't find extent for %llu\n",
10530 (unsigned long long)rec->start);
10531 btrfs_release_path(&path);
10532 btrfs_commit_transaction(trans, root);
10536 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10537 struct btrfs_extent_item);
10538 flags = btrfs_extent_flags(path.nodes[0], ei);
10539 if (rec->flag_block_full_backref) {
10540 fprintf(stderr, "setting full backref on %llu\n",
10541 (unsigned long long)key.objectid);
10542 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10544 fprintf(stderr, "clearing full backref on %llu\n",
10545 (unsigned long long)key.objectid);
10546 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10548 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10549 btrfs_mark_buffer_dirty(path.nodes[0]);
10550 btrfs_release_path(&path);
10551 ret = btrfs_commit_transaction(trans, root);
10553 fprintf(stderr, "Repaired extent flags for %llu\n",
10554 (unsigned long long)rec->start);
10559 /* right now we only prune from the extent allocation tree */
10560 static int prune_one_block(struct btrfs_trans_handle *trans,
10561 struct btrfs_fs_info *info,
10562 struct btrfs_corrupt_block *corrupt)
10565 struct btrfs_path path;
10566 struct extent_buffer *eb;
10570 int level = corrupt->level + 1;
10572 btrfs_init_path(&path);
10574 /* we want to stop at the parent to our busted block */
10575 path.lowest_level = level;
10577 ret = btrfs_search_slot(trans, info->extent_root,
10578 &corrupt->key, &path, -1, 1);
10583 eb = path.nodes[level];
10590 * hopefully the search gave us the block we want to prune,
10591 * lets try that first
10593 slot = path.slots[level];
10594 found = btrfs_node_blockptr(eb, slot);
10595 if (found == corrupt->cache.start)
10598 nritems = btrfs_header_nritems(eb);
10600 /* the search failed, lets scan this node and hope we find it */
10601 for (slot = 0; slot < nritems; slot++) {
10602 found = btrfs_node_blockptr(eb, slot);
10603 if (found == corrupt->cache.start)
10607 * we couldn't find the bad block. TODO, search all the nodes for pointers
10610 if (eb == info->extent_root->node) {
10615 btrfs_release_path(&path);
10620 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10621 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10624 btrfs_release_path(&path);
10628 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10630 struct btrfs_trans_handle *trans = NULL;
10631 struct cache_extent *cache;
10632 struct btrfs_corrupt_block *corrupt;
10635 cache = search_cache_extent(info->corrupt_blocks, 0);
10639 trans = btrfs_start_transaction(info->extent_root, 1);
10641 return PTR_ERR(trans);
10643 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10644 prune_one_block(trans, info, corrupt);
10645 remove_cache_extent(info->corrupt_blocks, cache);
10648 return btrfs_commit_transaction(trans, info->extent_root);
10652 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10654 struct btrfs_block_group_cache *cache;
10659 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10660 &start, &end, EXTENT_DIRTY);
10663 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10668 cache = btrfs_lookup_first_block_group(fs_info, start);
10673 start = cache->key.objectid + cache->key.offset;
10677 static int check_extent_refs(struct btrfs_root *root,
10678 struct cache_tree *extent_cache)
10680 struct extent_record *rec;
10681 struct cache_extent *cache;
10687 * if we're doing a repair, we have to make sure
10688 * we don't allocate from the problem extents.
10689 * In the worst case, this will be all the
10690 * extents in the FS
10692 cache = search_cache_extent(extent_cache, 0);
10694 rec = container_of(cache, struct extent_record, cache);
10695 set_extent_dirty(root->fs_info->excluded_extents,
10697 rec->start + rec->max_size - 1);
10698 cache = next_cache_extent(cache);
10701 /* pin down all the corrupted blocks too */
10702 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10704 set_extent_dirty(root->fs_info->excluded_extents,
10706 cache->start + cache->size - 1);
10707 cache = next_cache_extent(cache);
10709 prune_corrupt_blocks(root->fs_info);
10710 reset_cached_block_groups(root->fs_info);
10713 reset_cached_block_groups(root->fs_info);
10716 * We need to delete any duplicate entries we find first otherwise we
10717 * could mess up the extent tree when we have backrefs that actually
10718 * belong to a different extent item and not the weird duplicate one.
10720 while (repair && !list_empty(&duplicate_extents)) {
10721 rec = to_extent_record(duplicate_extents.next);
10722 list_del_init(&rec->list);
10724 /* Sometimes we can find a backref before we find an actual
10725 * extent, so we need to process it a little bit to see if there
10726 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10727 * if this is a backref screwup. If we need to delete stuff
10728 * process_duplicates() will return 0, otherwise it will return
10731 if (process_duplicates(extent_cache, rec))
10733 ret = delete_duplicate_records(root, rec);
10737 * delete_duplicate_records will return the number of entries
10738 * deleted, so if it's greater than 0 then we know we actually
10739 * did something and we need to remove.
10752 cache = search_cache_extent(extent_cache, 0);
10755 rec = container_of(cache, struct extent_record, cache);
10756 if (rec->num_duplicates) {
10757 fprintf(stderr, "extent item %llu has multiple extent "
10758 "items\n", (unsigned long long)rec->start);
10762 if (rec->refs != rec->extent_item_refs) {
10763 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10764 (unsigned long long)rec->start,
10765 (unsigned long long)rec->nr);
10766 fprintf(stderr, "extent item %llu, found %llu\n",
10767 (unsigned long long)rec->extent_item_refs,
10768 (unsigned long long)rec->refs);
10769 ret = record_orphan_data_extents(root->fs_info, rec);
10775 if (all_backpointers_checked(rec, 1)) {
10776 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10777 (unsigned long long)rec->start,
10778 (unsigned long long)rec->nr);
10782 if (!rec->owner_ref_checked) {
10783 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10784 (unsigned long long)rec->start,
10785 (unsigned long long)rec->nr);
10790 if (repair && fix) {
10791 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10797 if (rec->bad_full_backref) {
10798 fprintf(stderr, "bad full backref, on [%llu]\n",
10799 (unsigned long long)rec->start);
10801 ret = fixup_extent_flags(root->fs_info, rec);
10809 * Although it's not a extent ref's problem, we reuse this
10810 * routine for error reporting.
10811 * No repair function yet.
10813 if (rec->crossing_stripes) {
10815 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10816 rec->start, rec->start + rec->max_size);
10820 if (rec->wrong_chunk_type) {
10822 "bad extent [%llu, %llu), type mismatch with chunk\n",
10823 rec->start, rec->start + rec->max_size);
10827 remove_cache_extent(extent_cache, cache);
10828 free_all_extent_backrefs(rec);
10829 if (!init_extent_tree && repair && (!cur_err || fix))
10830 clear_extent_dirty(root->fs_info->excluded_extents,
10832 rec->start + rec->max_size - 1);
10837 if (ret && ret != -EAGAIN) {
10838 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10841 struct btrfs_trans_handle *trans;
10843 root = root->fs_info->extent_root;
10844 trans = btrfs_start_transaction(root, 1);
10845 if (IS_ERR(trans)) {
10846 ret = PTR_ERR(trans);
10850 ret = btrfs_fix_block_accounting(trans, root);
10853 ret = btrfs_commit_transaction(trans, root);
10862 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10866 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10867 stripe_size = length;
10868 stripe_size /= num_stripes;
10869 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10870 stripe_size = length * 2;
10871 stripe_size /= num_stripes;
10872 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10873 stripe_size = length;
10874 stripe_size /= (num_stripes - 1);
10875 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10876 stripe_size = length;
10877 stripe_size /= (num_stripes - 2);
10879 stripe_size = length;
10881 return stripe_size;
10885 * Check the chunk with its block group/dev list ref:
10886 * Return 0 if all refs seems valid.
10887 * Return 1 if part of refs seems valid, need later check for rebuild ref
10888 * like missing block group and needs to search extent tree to rebuild them.
10889 * Return -1 if essential refs are missing and unable to rebuild.
10891 static int check_chunk_refs(struct chunk_record *chunk_rec,
10892 struct block_group_tree *block_group_cache,
10893 struct device_extent_tree *dev_extent_cache,
10896 struct cache_extent *block_group_item;
10897 struct block_group_record *block_group_rec;
10898 struct cache_extent *dev_extent_item;
10899 struct device_extent_record *dev_extent_rec;
10903 int metadump_v2 = 0;
10907 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10909 chunk_rec->length);
10910 if (block_group_item) {
10911 block_group_rec = container_of(block_group_item,
10912 struct block_group_record,
10914 if (chunk_rec->length != block_group_rec->offset ||
10915 chunk_rec->offset != block_group_rec->objectid ||
10917 chunk_rec->type_flags != block_group_rec->flags)) {
10920 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10921 chunk_rec->objectid,
10926 chunk_rec->type_flags,
10927 block_group_rec->objectid,
10928 block_group_rec->type,
10929 block_group_rec->offset,
10930 block_group_rec->offset,
10931 block_group_rec->objectid,
10932 block_group_rec->flags);
10935 list_del_init(&block_group_rec->list);
10936 chunk_rec->bg_rec = block_group_rec;
10941 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10942 chunk_rec->objectid,
10947 chunk_rec->type_flags);
10954 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10955 chunk_rec->num_stripes);
10956 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10957 devid = chunk_rec->stripes[i].devid;
10958 offset = chunk_rec->stripes[i].offset;
10959 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10960 devid, offset, length);
10961 if (dev_extent_item) {
10962 dev_extent_rec = container_of(dev_extent_item,
10963 struct device_extent_record,
10965 if (dev_extent_rec->objectid != devid ||
10966 dev_extent_rec->offset != offset ||
10967 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10968 dev_extent_rec->length != length) {
10971 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10972 chunk_rec->objectid,
10975 chunk_rec->stripes[i].devid,
10976 chunk_rec->stripes[i].offset,
10977 dev_extent_rec->objectid,
10978 dev_extent_rec->offset,
10979 dev_extent_rec->length);
10982 list_move(&dev_extent_rec->chunk_list,
10983 &chunk_rec->dextents);
10988 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10989 chunk_rec->objectid,
10992 chunk_rec->stripes[i].devid,
10993 chunk_rec->stripes[i].offset);
11000 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11001 int check_chunks(struct cache_tree *chunk_cache,
11002 struct block_group_tree *block_group_cache,
11003 struct device_extent_tree *dev_extent_cache,
11004 struct list_head *good, struct list_head *bad,
11005 struct list_head *rebuild, int silent)
11007 struct cache_extent *chunk_item;
11008 struct chunk_record *chunk_rec;
11009 struct block_group_record *bg_rec;
11010 struct device_extent_record *dext_rec;
11014 chunk_item = first_cache_extent(chunk_cache);
11015 while (chunk_item) {
11016 chunk_rec = container_of(chunk_item, struct chunk_record,
11018 err = check_chunk_refs(chunk_rec, block_group_cache,
11019 dev_extent_cache, silent);
11022 if (err == 0 && good)
11023 list_add_tail(&chunk_rec->list, good);
11024 if (err > 0 && rebuild)
11025 list_add_tail(&chunk_rec->list, rebuild);
11026 if (err < 0 && bad)
11027 list_add_tail(&chunk_rec->list, bad);
11028 chunk_item = next_cache_extent(chunk_item);
11031 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11034 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11042 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11046 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11047 dext_rec->objectid,
11057 static int check_device_used(struct device_record *dev_rec,
11058 struct device_extent_tree *dext_cache)
11060 struct cache_extent *cache;
11061 struct device_extent_record *dev_extent_rec;
11062 u64 total_byte = 0;
11064 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11066 dev_extent_rec = container_of(cache,
11067 struct device_extent_record,
11069 if (dev_extent_rec->objectid != dev_rec->devid)
11072 list_del_init(&dev_extent_rec->device_list);
11073 total_byte += dev_extent_rec->length;
11074 cache = next_cache_extent(cache);
11077 if (total_byte != dev_rec->byte_used) {
11079 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11080 total_byte, dev_rec->byte_used, dev_rec->objectid,
11081 dev_rec->type, dev_rec->offset);
11088 /* check btrfs_dev_item -> btrfs_dev_extent */
11089 static int check_devices(struct rb_root *dev_cache,
11090 struct device_extent_tree *dev_extent_cache)
11092 struct rb_node *dev_node;
11093 struct device_record *dev_rec;
11094 struct device_extent_record *dext_rec;
11098 dev_node = rb_first(dev_cache);
11100 dev_rec = container_of(dev_node, struct device_record, node);
11101 err = check_device_used(dev_rec, dev_extent_cache);
11105 dev_node = rb_next(dev_node);
11107 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11110 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11111 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11118 static int add_root_item_to_list(struct list_head *head,
11119 u64 objectid, u64 bytenr, u64 last_snapshot,
11120 u8 level, u8 drop_level,
11121 struct btrfs_key *drop_key)
11124 struct root_item_record *ri_rec;
11125 ri_rec = malloc(sizeof(*ri_rec));
11128 ri_rec->bytenr = bytenr;
11129 ri_rec->objectid = objectid;
11130 ri_rec->level = level;
11131 ri_rec->drop_level = drop_level;
11132 ri_rec->last_snapshot = last_snapshot;
11134 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11135 list_add_tail(&ri_rec->list, head);
11140 static void free_root_item_list(struct list_head *list)
11142 struct root_item_record *ri_rec;
11144 while (!list_empty(list)) {
11145 ri_rec = list_first_entry(list, struct root_item_record,
11147 list_del_init(&ri_rec->list);
11152 static int deal_root_from_list(struct list_head *list,
11153 struct btrfs_root *root,
11154 struct block_info *bits,
11156 struct cache_tree *pending,
11157 struct cache_tree *seen,
11158 struct cache_tree *reada,
11159 struct cache_tree *nodes,
11160 struct cache_tree *extent_cache,
11161 struct cache_tree *chunk_cache,
11162 struct rb_root *dev_cache,
11163 struct block_group_tree *block_group_cache,
11164 struct device_extent_tree *dev_extent_cache)
11169 while (!list_empty(list)) {
11170 struct root_item_record *rec;
11171 struct extent_buffer *buf;
11172 rec = list_entry(list->next,
11173 struct root_item_record, list);
11175 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11176 if (!extent_buffer_uptodate(buf)) {
11177 free_extent_buffer(buf);
11181 ret = add_root_to_pending(buf, extent_cache, pending,
11182 seen, nodes, rec->objectid);
11186 * To rebuild extent tree, we need deal with snapshot
11187 * one by one, otherwise we deal with node firstly which
11188 * can maximize readahead.
11191 ret = run_next_block(root, bits, bits_nr, &last,
11192 pending, seen, reada, nodes,
11193 extent_cache, chunk_cache,
11194 dev_cache, block_group_cache,
11195 dev_extent_cache, rec);
11199 free_extent_buffer(buf);
11200 list_del(&rec->list);
11206 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11207 reada, nodes, extent_cache, chunk_cache,
11208 dev_cache, block_group_cache,
11209 dev_extent_cache, NULL);
11219 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11221 struct rb_root dev_cache;
11222 struct cache_tree chunk_cache;
11223 struct block_group_tree block_group_cache;
11224 struct device_extent_tree dev_extent_cache;
11225 struct cache_tree extent_cache;
11226 struct cache_tree seen;
11227 struct cache_tree pending;
11228 struct cache_tree reada;
11229 struct cache_tree nodes;
11230 struct extent_io_tree excluded_extents;
11231 struct cache_tree corrupt_blocks;
11232 struct btrfs_path path;
11233 struct btrfs_key key;
11234 struct btrfs_key found_key;
11236 struct block_info *bits;
11238 struct extent_buffer *leaf;
11240 struct btrfs_root_item ri;
11241 struct list_head dropping_trees;
11242 struct list_head normal_trees;
11243 struct btrfs_root *root1;
11244 struct btrfs_root *root;
11248 root = fs_info->fs_root;
11249 dev_cache = RB_ROOT;
11250 cache_tree_init(&chunk_cache);
11251 block_group_tree_init(&block_group_cache);
11252 device_extent_tree_init(&dev_extent_cache);
11254 cache_tree_init(&extent_cache);
11255 cache_tree_init(&seen);
11256 cache_tree_init(&pending);
11257 cache_tree_init(&nodes);
11258 cache_tree_init(&reada);
11259 cache_tree_init(&corrupt_blocks);
11260 extent_io_tree_init(&excluded_extents);
11261 INIT_LIST_HEAD(&dropping_trees);
11262 INIT_LIST_HEAD(&normal_trees);
11265 fs_info->excluded_extents = &excluded_extents;
11266 fs_info->fsck_extent_cache = &extent_cache;
11267 fs_info->free_extent_hook = free_extent_hook;
11268 fs_info->corrupt_blocks = &corrupt_blocks;
11272 bits = malloc(bits_nr * sizeof(struct block_info));
11278 if (ctx.progress_enabled) {
11279 ctx.tp = TASK_EXTENTS;
11280 task_start(ctx.info);
11284 root1 = fs_info->tree_root;
11285 level = btrfs_header_level(root1->node);
11286 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11287 root1->node->start, 0, level, 0, NULL);
11290 root1 = fs_info->chunk_root;
11291 level = btrfs_header_level(root1->node);
11292 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11293 root1->node->start, 0, level, 0, NULL);
11296 btrfs_init_path(&path);
11299 key.type = BTRFS_ROOT_ITEM_KEY;
11300 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11304 leaf = path.nodes[0];
11305 slot = path.slots[0];
11306 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11307 ret = btrfs_next_leaf(root, &path);
11310 leaf = path.nodes[0];
11311 slot = path.slots[0];
11313 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11314 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11315 unsigned long offset;
11318 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11319 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11320 last_snapshot = btrfs_root_last_snapshot(&ri);
11321 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11322 level = btrfs_root_level(&ri);
11323 ret = add_root_item_to_list(&normal_trees,
11324 found_key.objectid,
11325 btrfs_root_bytenr(&ri),
11326 last_snapshot, level,
11331 level = btrfs_root_level(&ri);
11332 objectid = found_key.objectid;
11333 btrfs_disk_key_to_cpu(&found_key,
11334 &ri.drop_progress);
11335 ret = add_root_item_to_list(&dropping_trees,
11337 btrfs_root_bytenr(&ri),
11338 last_snapshot, level,
11339 ri.drop_level, &found_key);
11346 btrfs_release_path(&path);
11349 * check_block can return -EAGAIN if it fixes something, please keep
11350 * this in mind when dealing with return values from these functions, if
11351 * we get -EAGAIN we want to fall through and restart the loop.
11353 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11354 &seen, &reada, &nodes, &extent_cache,
11355 &chunk_cache, &dev_cache, &block_group_cache,
11356 &dev_extent_cache);
11358 if (ret == -EAGAIN)
11362 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11363 &pending, &seen, &reada, &nodes,
11364 &extent_cache, &chunk_cache, &dev_cache,
11365 &block_group_cache, &dev_extent_cache);
11367 if (ret == -EAGAIN)
11372 ret = check_chunks(&chunk_cache, &block_group_cache,
11373 &dev_extent_cache, NULL, NULL, NULL, 0);
11375 if (ret == -EAGAIN)
11380 ret = check_extent_refs(root, &extent_cache);
11382 if (ret == -EAGAIN)
11387 ret = check_devices(&dev_cache, &dev_extent_cache);
11392 task_stop(ctx.info);
11394 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11395 extent_io_tree_cleanup(&excluded_extents);
11396 fs_info->fsck_extent_cache = NULL;
11397 fs_info->free_extent_hook = NULL;
11398 fs_info->corrupt_blocks = NULL;
11399 fs_info->excluded_extents = NULL;
11402 free_chunk_cache_tree(&chunk_cache);
11403 free_device_cache_tree(&dev_cache);
11404 free_block_group_tree(&block_group_cache);
11405 free_device_extent_tree(&dev_extent_cache);
11406 free_extent_cache_tree(&seen);
11407 free_extent_cache_tree(&pending);
11408 free_extent_cache_tree(&reada);
11409 free_extent_cache_tree(&nodes);
11410 free_root_item_list(&normal_trees);
11411 free_root_item_list(&dropping_trees);
11414 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11415 free_extent_cache_tree(&seen);
11416 free_extent_cache_tree(&pending);
11417 free_extent_cache_tree(&reada);
11418 free_extent_cache_tree(&nodes);
11419 free_chunk_cache_tree(&chunk_cache);
11420 free_block_group_tree(&block_group_cache);
11421 free_device_cache_tree(&dev_cache);
11422 free_device_extent_tree(&dev_extent_cache);
11423 free_extent_record_cache(&extent_cache);
11424 free_root_item_list(&normal_trees);
11425 free_root_item_list(&dropping_trees);
11426 extent_io_tree_cleanup(&excluded_extents);
11431 * Check backrefs of a tree block given by @bytenr or @eb.
11433 * @root: the root containing the @bytenr or @eb
11434 * @eb: tree block extent buffer, can be NULL
11435 * @bytenr: bytenr of the tree block to search
11436 * @level: tree level of the tree block
11437 * @owner: owner of the tree block
11439 * Return >0 for any error found and output error message
11440 * Return 0 for no error found
11442 static int check_tree_block_ref(struct btrfs_root *root,
11443 struct extent_buffer *eb, u64 bytenr,
11444 int level, u64 owner, struct node_refs *nrefs)
11446 struct btrfs_key key;
11447 struct btrfs_root *extent_root = root->fs_info->extent_root;
11448 struct btrfs_path path;
11449 struct btrfs_extent_item *ei;
11450 struct btrfs_extent_inline_ref *iref;
11451 struct extent_buffer *leaf;
11456 int root_level = btrfs_header_level(root->node);
11458 u32 nodesize = root->fs_info->nodesize;
11461 int tree_reloc_root = 0;
11468 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11469 btrfs_header_bytenr(root->node) == bytenr)
11470 tree_reloc_root = 1;
11471 btrfs_init_path(&path);
11472 key.objectid = bytenr;
11473 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11474 key.type = BTRFS_METADATA_ITEM_KEY;
11476 key.type = BTRFS_EXTENT_ITEM_KEY;
11477 key.offset = (u64)-1;
11479 /* Search for the backref in extent tree */
11480 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11482 err |= BACKREF_MISSING;
11485 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11487 err |= BACKREF_MISSING;
11491 leaf = path.nodes[0];
11492 slot = path.slots[0];
11493 btrfs_item_key_to_cpu(leaf, &key, slot);
11495 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11497 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11498 skinny_level = (int)key.offset;
11499 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11501 struct btrfs_tree_block_info *info;
11503 info = (struct btrfs_tree_block_info *)(ei + 1);
11504 skinny_level = btrfs_tree_block_level(leaf, info);
11505 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11514 * Due to the feature of shared tree blocks, if the upper node
11515 * is a fs root or shared node, the extent of checked node may
11516 * not be updated until the next CoW.
11519 strict = should_check_extent_strictly(root, nrefs,
11521 if (!(btrfs_extent_flags(leaf, ei) &
11522 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11524 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11525 key.objectid, nodesize,
11526 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11527 err = BACKREF_MISMATCH;
11529 header_gen = btrfs_header_generation(eb);
11530 extent_gen = btrfs_extent_generation(leaf, ei);
11531 if (header_gen != extent_gen) {
11533 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11534 key.objectid, nodesize, header_gen,
11536 err = BACKREF_MISMATCH;
11538 if (level != skinny_level) {
11540 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11541 key.objectid, nodesize, level, skinny_level);
11542 err = BACKREF_MISMATCH;
11544 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11546 "extent[%llu %u] is referred by other roots than %llu",
11547 key.objectid, nodesize, root->objectid);
11548 err = BACKREF_MISMATCH;
11553 * Iterate the extent/metadata item to find the exact backref
11555 item_size = btrfs_item_size_nr(leaf, slot);
11556 ptr = (unsigned long)iref;
11557 end = (unsigned long)ei + item_size;
11559 while (ptr < end) {
11560 iref = (struct btrfs_extent_inline_ref *)ptr;
11561 type = btrfs_extent_inline_ref_type(leaf, iref);
11562 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11564 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11565 if (offset == root->objectid)
11567 if (!strict && owner == offset)
11569 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11571 * Backref of tree reloc root points to itself, no need
11572 * to check backref any more.
11574 if (tree_reloc_root) {
11578 * Check if the backref points to valid
11581 found_ref = !check_tree_block_ref( root, NULL,
11582 offset, level + 1, owner,
11589 ptr += btrfs_extent_inline_ref_size(type);
11593 * Inlined extent item doesn't have what we need, check
11594 * TREE_BLOCK_REF_KEY
11597 btrfs_release_path(&path);
11598 key.objectid = bytenr;
11599 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11600 key.offset = root->objectid;
11602 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11607 err |= BACKREF_MISSING;
11609 btrfs_release_path(&path);
11610 if (nrefs && strict &&
11611 level < root_level && nrefs->full_backref[level + 1])
11612 parent = nrefs->bytenr[level + 1];
11613 if (eb && (err & BACKREF_MISSING))
11615 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11616 bytenr, nodesize, owner, level,
11617 parent ? "parent" : "root",
11618 parent ? parent : root->objectid);
11623 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11625 * Return >0 any error found and output error message
11626 * Return 0 for no error found
11628 static int check_extent_data_item(struct btrfs_root *root,
11629 struct btrfs_path *pathp,
11630 struct node_refs *nrefs, int account_bytes)
11632 struct btrfs_file_extent_item *fi;
11633 struct extent_buffer *eb = pathp->nodes[0];
11634 struct btrfs_path path;
11635 struct btrfs_root *extent_root = root->fs_info->extent_root;
11636 struct btrfs_key fi_key;
11637 struct btrfs_key dbref_key;
11638 struct extent_buffer *leaf;
11639 struct btrfs_extent_item *ei;
11640 struct btrfs_extent_inline_ref *iref;
11641 struct btrfs_extent_data_ref *dref;
11644 u64 disk_num_bytes;
11645 u64 extent_num_bytes;
11652 int found_dbackref = 0;
11653 int slot = pathp->slots[0];
11658 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11659 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11661 /* Nothing to check for hole and inline data extents */
11662 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11663 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11666 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11667 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11668 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11670 /* Check unaligned disk_num_bytes and num_bytes */
11671 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11673 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11674 fi_key.objectid, fi_key.offset, disk_num_bytes,
11675 root->fs_info->sectorsize);
11676 err |= BYTES_UNALIGNED;
11677 } else if (account_bytes) {
11678 data_bytes_allocated += disk_num_bytes;
11680 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11682 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11683 fi_key.objectid, fi_key.offset, extent_num_bytes,
11684 root->fs_info->sectorsize);
11685 err |= BYTES_UNALIGNED;
11686 } else if (account_bytes) {
11687 data_bytes_referenced += extent_num_bytes;
11689 owner = btrfs_header_owner(eb);
11691 /* Check the extent item of the file extent in extent tree */
11692 btrfs_init_path(&path);
11693 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11694 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11695 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11697 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11701 leaf = path.nodes[0];
11702 slot = path.slots[0];
11703 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11705 extent_flags = btrfs_extent_flags(leaf, ei);
11707 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11709 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11710 disk_bytenr, disk_num_bytes,
11711 BTRFS_EXTENT_FLAG_DATA);
11712 err |= BACKREF_MISMATCH;
11715 /* Check data backref inside that extent item */
11716 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11717 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11718 ptr = (unsigned long)iref;
11719 end = (unsigned long)ei + item_size;
11720 strict = should_check_extent_strictly(root, nrefs, -1);
11722 while (ptr < end) {
11723 iref = (struct btrfs_extent_inline_ref *)ptr;
11724 type = btrfs_extent_inline_ref_type(leaf, iref);
11725 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11727 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11728 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11729 if (ref_root == root->objectid)
11730 found_dbackref = 1;
11731 else if (!strict && owner == ref_root)
11732 found_dbackref = 1;
11733 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11734 found_dbackref = !check_tree_block_ref(root, NULL,
11735 btrfs_extent_inline_ref_offset(leaf, iref),
11739 if (found_dbackref)
11741 ptr += btrfs_extent_inline_ref_size(type);
11744 if (!found_dbackref) {
11745 btrfs_release_path(&path);
11747 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11748 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11749 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11750 dbref_key.offset = hash_extent_data_ref(root->objectid,
11751 fi_key.objectid, fi_key.offset);
11753 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11754 &dbref_key, &path, 0, 0);
11756 found_dbackref = 1;
11760 btrfs_release_path(&path);
11763 * Neither inlined nor EXTENT_DATA_REF found, try
11764 * SHARED_DATA_REF as last chance.
11766 dbref_key.objectid = disk_bytenr;
11767 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11768 dbref_key.offset = eb->start;
11770 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11771 &dbref_key, &path, 0, 0);
11773 found_dbackref = 1;
11779 if (!found_dbackref)
11780 err |= BACKREF_MISSING;
11781 btrfs_release_path(&path);
11782 if (err & BACKREF_MISSING) {
11783 error("data extent[%llu %llu] backref lost",
11784 disk_bytenr, disk_num_bytes);
11790 * Get real tree block level for the case like shared block
11791 * Return >= 0 as tree level
11792 * Return <0 for error
11794 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11796 struct extent_buffer *eb;
11797 struct btrfs_path path;
11798 struct btrfs_key key;
11799 struct btrfs_extent_item *ei;
11806 /* Search extent tree for extent generation and level */
11807 key.objectid = bytenr;
11808 key.type = BTRFS_METADATA_ITEM_KEY;
11809 key.offset = (u64)-1;
11811 btrfs_init_path(&path);
11812 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11815 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11823 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11824 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11825 struct btrfs_extent_item);
11826 flags = btrfs_extent_flags(path.nodes[0], ei);
11827 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11832 /* Get transid for later read_tree_block() check */
11833 transid = btrfs_extent_generation(path.nodes[0], ei);
11835 /* Get backref level as one source */
11836 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11837 backref_level = key.offset;
11839 struct btrfs_tree_block_info *info;
11841 info = (struct btrfs_tree_block_info *)(ei + 1);
11842 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11844 btrfs_release_path(&path);
11846 /* Get level from tree block as an alternative source */
11847 eb = read_tree_block(fs_info, bytenr, transid);
11848 if (!extent_buffer_uptodate(eb)) {
11849 free_extent_buffer(eb);
11852 header_level = btrfs_header_level(eb);
11853 free_extent_buffer(eb);
11855 if (header_level != backref_level)
11857 return header_level;
11860 btrfs_release_path(&path);
11865 * Check if a tree block backref is valid (points to a valid tree block)
11866 * if level == -1, level will be resolved
11867 * Return >0 for any error found and print error message
11869 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11870 u64 bytenr, int level)
11872 struct btrfs_root *root;
11873 struct btrfs_key key;
11874 struct btrfs_path path;
11875 struct extent_buffer *eb;
11876 struct extent_buffer *node;
11877 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11881 /* Query level for level == -1 special case */
11883 level = query_tree_block_level(fs_info, bytenr);
11885 err |= REFERENCER_MISSING;
11889 key.objectid = root_id;
11890 key.type = BTRFS_ROOT_ITEM_KEY;
11891 key.offset = (u64)-1;
11893 root = btrfs_read_fs_root(fs_info, &key);
11894 if (IS_ERR(root)) {
11895 err |= REFERENCER_MISSING;
11899 /* Read out the tree block to get item/node key */
11900 eb = read_tree_block(fs_info, bytenr, 0);
11901 if (!extent_buffer_uptodate(eb)) {
11902 err |= REFERENCER_MISSING;
11903 free_extent_buffer(eb);
11907 /* Empty tree, no need to check key */
11908 if (!btrfs_header_nritems(eb) && !level) {
11909 free_extent_buffer(eb);
11914 btrfs_node_key_to_cpu(eb, &key, 0);
11916 btrfs_item_key_to_cpu(eb, &key, 0);
11918 free_extent_buffer(eb);
11920 btrfs_init_path(&path);
11921 path.lowest_level = level;
11922 /* Search with the first key, to ensure we can reach it */
11923 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11925 err |= REFERENCER_MISSING;
11929 node = path.nodes[level];
11930 if (btrfs_header_bytenr(node) != bytenr) {
11932 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11933 bytenr, nodesize, bytenr,
11934 btrfs_header_bytenr(node));
11935 err |= REFERENCER_MISMATCH;
11937 if (btrfs_header_level(node) != level) {
11939 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11940 bytenr, nodesize, level,
11941 btrfs_header_level(node));
11942 err |= REFERENCER_MISMATCH;
11946 btrfs_release_path(&path);
11948 if (err & REFERENCER_MISSING) {
11950 error("extent [%llu %d] lost referencer (owner: %llu)",
11951 bytenr, nodesize, root_id);
11954 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11955 bytenr, nodesize, root_id, level);
11962 * Check if tree block @eb is tree reloc root.
11963 * Return 0 if it's not or any problem happens
11964 * Return 1 if it's a tree reloc root
11966 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11967 struct extent_buffer *eb)
11969 struct btrfs_root *tree_reloc_root;
11970 struct btrfs_key key;
11971 u64 bytenr = btrfs_header_bytenr(eb);
11972 u64 owner = btrfs_header_owner(eb);
11975 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11976 key.offset = owner;
11977 key.type = BTRFS_ROOT_ITEM_KEY;
11979 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11980 if (IS_ERR(tree_reloc_root))
11983 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11985 btrfs_free_fs_root(tree_reloc_root);
11990 * Check referencer for shared block backref
11991 * If level == -1, this function will resolve the level.
11993 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11994 u64 parent, u64 bytenr, int level)
11996 struct extent_buffer *eb;
11998 int found_parent = 0;
12001 eb = read_tree_block(fs_info, parent, 0);
12002 if (!extent_buffer_uptodate(eb))
12006 level = query_tree_block_level(fs_info, bytenr);
12010 /* It's possible it's a tree reloc root */
12011 if (parent == bytenr) {
12012 if (is_tree_reloc_root(fs_info, eb))
12017 if (level + 1 != btrfs_header_level(eb))
12020 nr = btrfs_header_nritems(eb);
12021 for (i = 0; i < nr; i++) {
12022 if (bytenr == btrfs_node_blockptr(eb, i)) {
12028 free_extent_buffer(eb);
12029 if (!found_parent) {
12031 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12032 bytenr, fs_info->nodesize, parent, level);
12033 return REFERENCER_MISSING;
12039 * Check referencer for normal (inlined) data ref
12040 * If len == 0, it will be resolved by searching in extent tree
12042 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12043 u64 root_id, u64 objectid, u64 offset,
12044 u64 bytenr, u64 len, u32 count)
12046 struct btrfs_root *root;
12047 struct btrfs_root *extent_root = fs_info->extent_root;
12048 struct btrfs_key key;
12049 struct btrfs_path path;
12050 struct extent_buffer *leaf;
12051 struct btrfs_file_extent_item *fi;
12052 u32 found_count = 0;
12057 key.objectid = bytenr;
12058 key.type = BTRFS_EXTENT_ITEM_KEY;
12059 key.offset = (u64)-1;
12061 btrfs_init_path(&path);
12062 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12065 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12068 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12069 if (key.objectid != bytenr ||
12070 key.type != BTRFS_EXTENT_ITEM_KEY)
12073 btrfs_release_path(&path);
12075 key.objectid = root_id;
12076 key.type = BTRFS_ROOT_ITEM_KEY;
12077 key.offset = (u64)-1;
12078 btrfs_init_path(&path);
12080 root = btrfs_read_fs_root(fs_info, &key);
12084 key.objectid = objectid;
12085 key.type = BTRFS_EXTENT_DATA_KEY;
12087 * It can be nasty as data backref offset is
12088 * file offset - file extent offset, which is smaller or
12089 * equal to original backref offset. The only special case is
12090 * overflow. So we need to special check and do further search.
12092 key.offset = offset & (1ULL << 63) ? 0 : offset;
12094 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12099 * Search afterwards to get correct one
12100 * NOTE: As we must do a comprehensive check on the data backref to
12101 * make sure the dref count also matches, we must iterate all file
12102 * extents for that inode.
12105 leaf = path.nodes[0];
12106 slot = path.slots[0];
12108 if (slot >= btrfs_header_nritems(leaf))
12110 btrfs_item_key_to_cpu(leaf, &key, slot);
12111 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12113 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12115 * Except normal disk bytenr and disk num bytes, we still
12116 * need to do extra check on dbackref offset as
12117 * dbackref offset = file_offset - file_extent_offset
12119 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12120 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12121 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12126 ret = btrfs_next_item(root, &path);
12131 btrfs_release_path(&path);
12132 if (found_count != count) {
12134 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12135 bytenr, len, root_id, objectid, offset, count, found_count);
12136 return REFERENCER_MISSING;
12142 * Check if the referencer of a shared data backref exists
12144 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12145 u64 parent, u64 bytenr)
12147 struct extent_buffer *eb;
12148 struct btrfs_key key;
12149 struct btrfs_file_extent_item *fi;
12151 int found_parent = 0;
12154 eb = read_tree_block(fs_info, parent, 0);
12155 if (!extent_buffer_uptodate(eb))
12158 nr = btrfs_header_nritems(eb);
12159 for (i = 0; i < nr; i++) {
12160 btrfs_item_key_to_cpu(eb, &key, i);
12161 if (key.type != BTRFS_EXTENT_DATA_KEY)
12164 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12165 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12168 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12175 free_extent_buffer(eb);
12176 if (!found_parent) {
12177 error("shared extent %llu referencer lost (parent: %llu)",
12179 return REFERENCER_MISSING;
12185 * This function will check a given extent item, including its backref and
12186 * itself (like crossing stripe boundary and type)
12188 * Since we don't use extent_record anymore, introduce new error bit
12190 static int check_extent_item(struct btrfs_fs_info *fs_info,
12191 struct extent_buffer *eb, int slot)
12193 struct btrfs_extent_item *ei;
12194 struct btrfs_extent_inline_ref *iref;
12195 struct btrfs_extent_data_ref *dref;
12199 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12200 u32 item_size = btrfs_item_size_nr(eb, slot);
12205 struct btrfs_key key;
12209 btrfs_item_key_to_cpu(eb, &key, slot);
12210 if (key.type == BTRFS_EXTENT_ITEM_KEY)
12211 bytes_used += key.offset;
12213 bytes_used += nodesize;
12215 if (item_size < sizeof(*ei)) {
12217 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12218 * old thing when on disk format is still un-determined.
12219 * No need to care about it anymore
12221 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12225 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12226 flags = btrfs_extent_flags(eb, ei);
12228 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12230 if (metadata && check_crossing_stripes(global_info, key.objectid,
12232 error("bad metadata [%llu, %llu) crossing stripe boundary",
12233 key.objectid, key.objectid + nodesize);
12234 err |= CROSSING_STRIPE_BOUNDARY;
12237 ptr = (unsigned long)(ei + 1);
12239 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12240 /* Old EXTENT_ITEM metadata */
12241 struct btrfs_tree_block_info *info;
12243 info = (struct btrfs_tree_block_info *)ptr;
12244 level = btrfs_tree_block_level(eb, info);
12245 ptr += sizeof(struct btrfs_tree_block_info);
12247 /* New METADATA_ITEM */
12248 level = key.offset;
12250 end = (unsigned long)ei + item_size;
12253 /* Reached extent item end normally */
12257 /* Beyond extent item end, wrong item size */
12259 err |= ITEM_SIZE_MISMATCH;
12260 error("extent item at bytenr %llu slot %d has wrong size",
12265 /* Now check every backref in this extent item */
12266 iref = (struct btrfs_extent_inline_ref *)ptr;
12267 type = btrfs_extent_inline_ref_type(eb, iref);
12268 offset = btrfs_extent_inline_ref_offset(eb, iref);
12270 case BTRFS_TREE_BLOCK_REF_KEY:
12271 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12275 case BTRFS_SHARED_BLOCK_REF_KEY:
12276 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12280 case BTRFS_EXTENT_DATA_REF_KEY:
12281 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12282 ret = check_extent_data_backref(fs_info,
12283 btrfs_extent_data_ref_root(eb, dref),
12284 btrfs_extent_data_ref_objectid(eb, dref),
12285 btrfs_extent_data_ref_offset(eb, dref),
12286 key.objectid, key.offset,
12287 btrfs_extent_data_ref_count(eb, dref));
12290 case BTRFS_SHARED_DATA_REF_KEY:
12291 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12295 error("extent[%llu %d %llu] has unknown ref type: %d",
12296 key.objectid, key.type, key.offset, type);
12297 err |= UNKNOWN_TYPE;
12301 ptr += btrfs_extent_inline_ref_size(type);
12309 * Check if a dev extent item is referred correctly by its chunk
12311 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12312 struct extent_buffer *eb, int slot)
12314 struct btrfs_root *chunk_root = fs_info->chunk_root;
12315 struct btrfs_dev_extent *ptr;
12316 struct btrfs_path path;
12317 struct btrfs_key chunk_key;
12318 struct btrfs_key devext_key;
12319 struct btrfs_chunk *chunk;
12320 struct extent_buffer *l;
12324 int found_chunk = 0;
12327 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12328 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12329 length = btrfs_dev_extent_length(eb, ptr);
12331 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12332 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12333 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12335 btrfs_init_path(&path);
12336 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12341 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12342 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12347 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12350 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12351 for (i = 0; i < num_stripes; i++) {
12352 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12353 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12355 if (devid == devext_key.objectid &&
12356 offset == devext_key.offset) {
12362 btrfs_release_path(&path);
12363 if (!found_chunk) {
12365 "device extent[%llu, %llu, %llu] did not find the related chunk",
12366 devext_key.objectid, devext_key.offset, length);
12367 return REFERENCER_MISSING;
12373 * Check if the used space is correct with the dev item
12375 static int check_dev_item(struct btrfs_fs_info *fs_info,
12376 struct extent_buffer *eb, int slot)
12378 struct btrfs_root *dev_root = fs_info->dev_root;
12379 struct btrfs_dev_item *dev_item;
12380 struct btrfs_path path;
12381 struct btrfs_key key;
12382 struct btrfs_dev_extent *ptr;
12388 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12389 dev_id = btrfs_device_id(eb, dev_item);
12390 used = btrfs_device_bytes_used(eb, dev_item);
12392 key.objectid = dev_id;
12393 key.type = BTRFS_DEV_EXTENT_KEY;
12396 btrfs_init_path(&path);
12397 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12399 btrfs_item_key_to_cpu(eb, &key, slot);
12400 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12401 key.objectid, key.type, key.offset);
12402 btrfs_release_path(&path);
12403 return REFERENCER_MISSING;
12406 /* Iterate dev_extents to calculate the used space of a device */
12408 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12411 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12412 if (key.objectid > dev_id)
12414 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12417 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12418 struct btrfs_dev_extent);
12419 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12421 ret = btrfs_next_item(dev_root, &path);
12425 btrfs_release_path(&path);
12427 if (used != total) {
12428 btrfs_item_key_to_cpu(eb, &key, slot);
12430 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12431 total, used, BTRFS_ROOT_TREE_OBJECTID,
12432 BTRFS_DEV_EXTENT_KEY, dev_id);
12433 return ACCOUNTING_MISMATCH;
12439 * Check a block group item with its referener (chunk) and its used space
12440 * with extent/metadata item
12442 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12443 struct extent_buffer *eb, int slot)
12445 struct btrfs_root *extent_root = fs_info->extent_root;
12446 struct btrfs_root *chunk_root = fs_info->chunk_root;
12447 struct btrfs_block_group_item *bi;
12448 struct btrfs_block_group_item bg_item;
12449 struct btrfs_path path;
12450 struct btrfs_key bg_key;
12451 struct btrfs_key chunk_key;
12452 struct btrfs_key extent_key;
12453 struct btrfs_chunk *chunk;
12454 struct extent_buffer *leaf;
12455 struct btrfs_extent_item *ei;
12456 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12464 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12465 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12466 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12467 used = btrfs_block_group_used(&bg_item);
12468 bg_flags = btrfs_block_group_flags(&bg_item);
12470 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12471 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12472 chunk_key.offset = bg_key.objectid;
12474 btrfs_init_path(&path);
12475 /* Search for the referencer chunk */
12476 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12479 "block group[%llu %llu] did not find the related chunk item",
12480 bg_key.objectid, bg_key.offset);
12481 err |= REFERENCER_MISSING;
12483 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12484 struct btrfs_chunk);
12485 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12488 "block group[%llu %llu] related chunk item length does not match",
12489 bg_key.objectid, bg_key.offset);
12490 err |= REFERENCER_MISMATCH;
12493 btrfs_release_path(&path);
12495 /* Search from the block group bytenr */
12496 extent_key.objectid = bg_key.objectid;
12497 extent_key.type = 0;
12498 extent_key.offset = 0;
12500 btrfs_init_path(&path);
12501 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12505 /* Iterate extent tree to account used space */
12507 leaf = path.nodes[0];
12509 /* Search slot can point to the last item beyond leaf nritems */
12510 if (path.slots[0] >= btrfs_header_nritems(leaf))
12513 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12514 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12517 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12518 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12520 if (extent_key.objectid < bg_key.objectid)
12523 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12526 total += extent_key.offset;
12528 ei = btrfs_item_ptr(leaf, path.slots[0],
12529 struct btrfs_extent_item);
12530 flags = btrfs_extent_flags(leaf, ei);
12531 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12532 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12534 "bad extent[%llu, %llu) type mismatch with chunk",
12535 extent_key.objectid,
12536 extent_key.objectid + extent_key.offset);
12537 err |= CHUNK_TYPE_MISMATCH;
12539 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12540 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12541 BTRFS_BLOCK_GROUP_METADATA))) {
12543 "bad extent[%llu, %llu) type mismatch with chunk",
12544 extent_key.objectid,
12545 extent_key.objectid + nodesize);
12546 err |= CHUNK_TYPE_MISMATCH;
12550 ret = btrfs_next_item(extent_root, &path);
12556 btrfs_release_path(&path);
12558 if (total != used) {
12560 "block group[%llu %llu] used %llu but extent items used %llu",
12561 bg_key.objectid, bg_key.offset, used, total);
12562 err |= ACCOUNTING_MISMATCH;
12568 * Check a chunk item.
12569 * Including checking all referred dev_extents and block group
12571 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12572 struct extent_buffer *eb, int slot)
12574 struct btrfs_root *extent_root = fs_info->extent_root;
12575 struct btrfs_root *dev_root = fs_info->dev_root;
12576 struct btrfs_path path;
12577 struct btrfs_key chunk_key;
12578 struct btrfs_key bg_key;
12579 struct btrfs_key devext_key;
12580 struct btrfs_chunk *chunk;
12581 struct extent_buffer *leaf;
12582 struct btrfs_block_group_item *bi;
12583 struct btrfs_block_group_item bg_item;
12584 struct btrfs_dev_extent *ptr;
12596 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12597 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12598 length = btrfs_chunk_length(eb, chunk);
12599 chunk_end = chunk_key.offset + length;
12600 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12603 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12605 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12608 type = btrfs_chunk_type(eb, chunk);
12610 bg_key.objectid = chunk_key.offset;
12611 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12612 bg_key.offset = length;
12614 btrfs_init_path(&path);
12615 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12618 "chunk[%llu %llu) did not find the related block group item",
12619 chunk_key.offset, chunk_end);
12620 err |= REFERENCER_MISSING;
12622 leaf = path.nodes[0];
12623 bi = btrfs_item_ptr(leaf, path.slots[0],
12624 struct btrfs_block_group_item);
12625 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12627 if (btrfs_block_group_flags(&bg_item) != type) {
12629 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12630 chunk_key.offset, chunk_end, type,
12631 btrfs_block_group_flags(&bg_item));
12632 err |= REFERENCER_MISSING;
12636 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12637 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12638 for (i = 0; i < num_stripes; i++) {
12639 btrfs_release_path(&path);
12640 btrfs_init_path(&path);
12641 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12642 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12643 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12645 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12648 goto not_match_dev;
12650 leaf = path.nodes[0];
12651 ptr = btrfs_item_ptr(leaf, path.slots[0],
12652 struct btrfs_dev_extent);
12653 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12654 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12655 if (objectid != chunk_key.objectid ||
12656 offset != chunk_key.offset ||
12657 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12658 goto not_match_dev;
12661 err |= BACKREF_MISSING;
12663 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12664 chunk_key.objectid, chunk_end, i);
12667 btrfs_release_path(&path);
12673 * Main entry function to check known items and update related accounting info
12675 static int check_leaf_items(struct btrfs_trans_handle *trans,
12676 struct btrfs_root *root, struct btrfs_path *path,
12677 struct node_refs *nrefs, int account_bytes)
12679 struct btrfs_fs_info *fs_info = root->fs_info;
12680 struct btrfs_key key;
12681 struct extent_buffer *eb;
12684 struct btrfs_extent_data_ref *dref;
12689 eb = path->nodes[0];
12690 slot = path->slots[0];
12691 if (slot >= btrfs_header_nritems(eb)) {
12693 error("empty leaf [%llu %u] root %llu", eb->start,
12694 root->fs_info->nodesize, root->objectid);
12700 btrfs_item_key_to_cpu(eb, &key, slot);
12704 case BTRFS_EXTENT_DATA_KEY:
12705 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12708 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12709 ret = check_block_group_item(fs_info, eb, slot);
12712 case BTRFS_DEV_ITEM_KEY:
12713 ret = check_dev_item(fs_info, eb, slot);
12716 case BTRFS_CHUNK_ITEM_KEY:
12717 ret = check_chunk_item(fs_info, eb, slot);
12720 case BTRFS_DEV_EXTENT_KEY:
12721 ret = check_dev_extent_item(fs_info, eb, slot);
12724 case BTRFS_EXTENT_ITEM_KEY:
12725 case BTRFS_METADATA_ITEM_KEY:
12726 ret = check_extent_item(fs_info, eb, slot);
12729 case BTRFS_EXTENT_CSUM_KEY:
12730 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12733 case BTRFS_TREE_BLOCK_REF_KEY:
12734 ret = check_tree_block_backref(fs_info, key.offset,
12738 case BTRFS_EXTENT_DATA_REF_KEY:
12739 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12740 ret = check_extent_data_backref(fs_info,
12741 btrfs_extent_data_ref_root(eb, dref),
12742 btrfs_extent_data_ref_objectid(eb, dref),
12743 btrfs_extent_data_ref_offset(eb, dref),
12745 btrfs_extent_data_ref_count(eb, dref));
12748 case BTRFS_SHARED_BLOCK_REF_KEY:
12749 ret = check_shared_block_backref(fs_info, key.offset,
12753 case BTRFS_SHARED_DATA_REF_KEY:
12754 ret = check_shared_data_backref(fs_info, key.offset,
12768 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
12771 * Low memory usage version check_chunks_and_extents.
12773 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12775 struct btrfs_trans_handle *trans = NULL;
12776 struct btrfs_path path;
12777 struct btrfs_key old_key;
12778 struct btrfs_key key;
12779 struct btrfs_root *root1;
12780 struct btrfs_root *root;
12781 struct btrfs_root *cur_root;
12785 root = fs_info->fs_root;
12788 /* pin every tree block to avoid extent overwrite */
12789 ret = pin_metadata_blocks(fs_info);
12791 error("failed to pin metadata blocks");
12794 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12795 if (IS_ERR(trans)) {
12796 error("failed to start transaction before check");
12797 return PTR_ERR(trans);
12801 root1 = root->fs_info->chunk_root;
12802 ret = check_btrfs_root(trans, root1, 0, 1);
12805 root1 = root->fs_info->tree_root;
12806 ret = check_btrfs_root(trans, root1, 0, 1);
12809 btrfs_init_path(&path);
12810 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12812 key.type = BTRFS_ROOT_ITEM_KEY;
12814 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12816 error("cannot find extent tree in tree_root");
12821 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12822 if (key.type != BTRFS_ROOT_ITEM_KEY)
12825 key.offset = (u64)-1;
12827 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12828 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12831 cur_root = btrfs_read_fs_root(root->fs_info, &key);
12832 if (IS_ERR(cur_root) || !cur_root) {
12833 error("failed to read tree: %lld", key.objectid);
12837 ret = check_btrfs_root(trans, cur_root, 0, 1);
12840 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12841 btrfs_free_fs_root(cur_root);
12843 btrfs_release_path(&path);
12844 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12845 &old_key, &path, 0, 0);
12849 ret = btrfs_next_item(root1, &path);
12855 /* if repair, update block accounting */
12857 ret = btrfs_fix_block_accounting(trans, root);
12863 btrfs_commit_transaction(trans, root->fs_info->extent_root);
12865 btrfs_release_path(&path);
12870 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12874 if (!ctx.progress_enabled)
12875 fprintf(stderr, "checking extents\n");
12876 if (check_mode == CHECK_MODE_LOWMEM)
12877 ret = check_chunks_and_extents_v2(fs_info);
12879 ret = check_chunks_and_extents(fs_info);
12884 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12885 struct btrfs_root *root, int overwrite)
12887 struct extent_buffer *c;
12888 struct extent_buffer *old = root->node;
12891 struct btrfs_disk_key disk_key = {0,0,0};
12897 extent_buffer_get(c);
12900 c = btrfs_alloc_free_block(trans, root,
12901 root->fs_info->nodesize,
12902 root->root_key.objectid,
12903 &disk_key, level, 0, 0);
12906 extent_buffer_get(c);
12910 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12911 btrfs_set_header_level(c, level);
12912 btrfs_set_header_bytenr(c, c->start);
12913 btrfs_set_header_generation(c, trans->transid);
12914 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12915 btrfs_set_header_owner(c, root->root_key.objectid);
12917 write_extent_buffer(c, root->fs_info->fsid,
12918 btrfs_header_fsid(), BTRFS_FSID_SIZE);
12920 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12921 btrfs_header_chunk_tree_uuid(c),
12924 btrfs_mark_buffer_dirty(c);
12926 * this case can happen in the following case:
12928 * 1.overwrite previous root.
12930 * 2.reinit reloc data root, this is because we skip pin
12931 * down reloc data tree before which means we can allocate
12932 * same block bytenr here.
12934 if (old->start == c->start) {
12935 btrfs_set_root_generation(&root->root_item,
12937 root->root_item.level = btrfs_header_level(root->node);
12938 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12939 &root->root_key, &root->root_item);
12941 free_extent_buffer(c);
12945 free_extent_buffer(old);
12947 add_root_to_dirty_list(root);
12951 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12952 struct extent_buffer *eb, int tree_root)
12954 struct extent_buffer *tmp;
12955 struct btrfs_root_item *ri;
12956 struct btrfs_key key;
12958 int level = btrfs_header_level(eb);
12964 * If we have pinned this block before, don't pin it again.
12965 * This can not only avoid forever loop with broken filesystem
12966 * but also give us some speedups.
12968 if (test_range_bit(&fs_info->pinned_extents, eb->start,
12969 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12972 btrfs_pin_extent(fs_info, eb->start, eb->len);
12974 nritems = btrfs_header_nritems(eb);
12975 for (i = 0; i < nritems; i++) {
12977 btrfs_item_key_to_cpu(eb, &key, i);
12978 if (key.type != BTRFS_ROOT_ITEM_KEY)
12980 /* Skip the extent root and reloc roots */
12981 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12982 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12983 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12985 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12986 bytenr = btrfs_disk_root_bytenr(eb, ri);
12989 * If at any point we start needing the real root we
12990 * will have to build a stump root for the root we are
12991 * in, but for now this doesn't actually use the root so
12992 * just pass in extent_root.
12994 tmp = read_tree_block(fs_info, bytenr, 0);
12995 if (!extent_buffer_uptodate(tmp)) {
12996 fprintf(stderr, "Error reading root block\n");
12999 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13000 free_extent_buffer(tmp);
13004 bytenr = btrfs_node_blockptr(eb, i);
13006 /* If we aren't the tree root don't read the block */
13007 if (level == 1 && !tree_root) {
13008 btrfs_pin_extent(fs_info, bytenr,
13009 fs_info->nodesize);
13013 tmp = read_tree_block(fs_info, bytenr, 0);
13014 if (!extent_buffer_uptodate(tmp)) {
13015 fprintf(stderr, "Error reading tree block\n");
13018 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13019 free_extent_buffer(tmp);
13028 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13032 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13036 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13039 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13041 struct btrfs_block_group_cache *cache;
13042 struct btrfs_path path;
13043 struct extent_buffer *leaf;
13044 struct btrfs_chunk *chunk;
13045 struct btrfs_key key;
13049 btrfs_init_path(&path);
13051 key.type = BTRFS_CHUNK_ITEM_KEY;
13053 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13055 btrfs_release_path(&path);
13060 * We do this in case the block groups were screwed up and had alloc
13061 * bits that aren't actually set on the chunks. This happens with
13062 * restored images every time and could happen in real life I guess.
13064 fs_info->avail_data_alloc_bits = 0;
13065 fs_info->avail_metadata_alloc_bits = 0;
13066 fs_info->avail_system_alloc_bits = 0;
13068 /* First we need to create the in-memory block groups */
13070 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13071 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13073 btrfs_release_path(&path);
13081 leaf = path.nodes[0];
13082 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13083 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13088 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13089 btrfs_add_block_group(fs_info, 0,
13090 btrfs_chunk_type(leaf, chunk),
13091 key.objectid, key.offset,
13092 btrfs_chunk_length(leaf, chunk));
13093 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13094 key.offset + btrfs_chunk_length(leaf, chunk));
13099 cache = btrfs_lookup_first_block_group(fs_info, start);
13103 start = cache->key.objectid + cache->key.offset;
13106 btrfs_release_path(&path);
13110 static int reset_balance(struct btrfs_trans_handle *trans,
13111 struct btrfs_fs_info *fs_info)
13113 struct btrfs_root *root = fs_info->tree_root;
13114 struct btrfs_path path;
13115 struct extent_buffer *leaf;
13116 struct btrfs_key key;
13117 int del_slot, del_nr = 0;
13121 btrfs_init_path(&path);
13122 key.objectid = BTRFS_BALANCE_OBJECTID;
13123 key.type = BTRFS_BALANCE_ITEM_KEY;
13125 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13130 goto reinit_data_reloc;
13135 ret = btrfs_del_item(trans, root, &path);
13138 btrfs_release_path(&path);
13140 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13141 key.type = BTRFS_ROOT_ITEM_KEY;
13143 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13147 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13152 ret = btrfs_del_items(trans, root, &path,
13159 btrfs_release_path(&path);
13162 ret = btrfs_search_slot(trans, root, &key, &path,
13169 leaf = path.nodes[0];
13170 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13171 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13173 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13178 del_slot = path.slots[0];
13187 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13191 btrfs_release_path(&path);
13194 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13195 key.type = BTRFS_ROOT_ITEM_KEY;
13196 key.offset = (u64)-1;
13197 root = btrfs_read_fs_root(fs_info, &key);
13198 if (IS_ERR(root)) {
13199 fprintf(stderr, "Error reading data reloc tree\n");
13200 ret = PTR_ERR(root);
13203 record_root_in_trans(trans, root);
13204 ret = btrfs_fsck_reinit_root(trans, root, 0);
13207 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13209 btrfs_release_path(&path);
13213 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13214 struct btrfs_fs_info *fs_info)
13220 * The only reason we don't do this is because right now we're just
13221 * walking the trees we find and pinning down their bytes, we don't look
13222 * at any of the leaves. In order to do mixed groups we'd have to check
13223 * the leaves of any fs roots and pin down the bytes for any file
13224 * extents we find. Not hard but why do it if we don't have to?
13226 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13227 fprintf(stderr, "We don't support re-initing the extent tree "
13228 "for mixed block groups yet, please notify a btrfs "
13229 "developer you want to do this so they can add this "
13230 "functionality.\n");
13235 * first we need to walk all of the trees except the extent tree and pin
13236 * down the bytes that are in use so we don't overwrite any existing
13239 ret = pin_metadata_blocks(fs_info);
13241 fprintf(stderr, "error pinning down used bytes\n");
13246 * Need to drop all the block groups since we're going to recreate all
13249 btrfs_free_block_groups(fs_info);
13250 ret = reset_block_groups(fs_info);
13252 fprintf(stderr, "error resetting the block groups\n");
13256 /* Ok we can allocate now, reinit the extent root */
13257 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13259 fprintf(stderr, "extent root initialization failed\n");
13261 * When the transaction code is updated we should end the
13262 * transaction, but for now progs only knows about commit so
13263 * just return an error.
13269 * Now we have all the in-memory block groups setup so we can make
13270 * allocations properly, and the metadata we care about is safe since we
13271 * pinned all of it above.
13274 struct btrfs_block_group_cache *cache;
13276 cache = btrfs_lookup_first_block_group(fs_info, start);
13279 start = cache->key.objectid + cache->key.offset;
13280 ret = btrfs_insert_item(trans, fs_info->extent_root,
13281 &cache->key, &cache->item,
13282 sizeof(cache->item));
13284 fprintf(stderr, "Error adding block group\n");
13287 btrfs_extent_post_op(trans, fs_info->extent_root);
13290 ret = reset_balance(trans, fs_info);
13292 fprintf(stderr, "error resetting the pending balance\n");
13297 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13299 struct btrfs_path path;
13300 struct btrfs_trans_handle *trans;
13301 struct btrfs_key key;
13304 printf("Recowing metadata block %llu\n", eb->start);
13305 key.objectid = btrfs_header_owner(eb);
13306 key.type = BTRFS_ROOT_ITEM_KEY;
13307 key.offset = (u64)-1;
13309 root = btrfs_read_fs_root(root->fs_info, &key);
13310 if (IS_ERR(root)) {
13311 fprintf(stderr, "Couldn't find owner root %llu\n",
13313 return PTR_ERR(root);
13316 trans = btrfs_start_transaction(root, 1);
13318 return PTR_ERR(trans);
13320 btrfs_init_path(&path);
13321 path.lowest_level = btrfs_header_level(eb);
13322 if (path.lowest_level)
13323 btrfs_node_key_to_cpu(eb, &key, 0);
13325 btrfs_item_key_to_cpu(eb, &key, 0);
13327 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13328 btrfs_commit_transaction(trans, root);
13329 btrfs_release_path(&path);
13333 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13335 struct btrfs_path path;
13336 struct btrfs_trans_handle *trans;
13337 struct btrfs_key key;
13340 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13341 bad->key.type, bad->key.offset);
13342 key.objectid = bad->root_id;
13343 key.type = BTRFS_ROOT_ITEM_KEY;
13344 key.offset = (u64)-1;
13346 root = btrfs_read_fs_root(root->fs_info, &key);
13347 if (IS_ERR(root)) {
13348 fprintf(stderr, "Couldn't find owner root %llu\n",
13350 return PTR_ERR(root);
13353 trans = btrfs_start_transaction(root, 1);
13355 return PTR_ERR(trans);
13357 btrfs_init_path(&path);
13358 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13364 ret = btrfs_del_item(trans, root, &path);
13366 btrfs_commit_transaction(trans, root);
13367 btrfs_release_path(&path);
13371 static int zero_log_tree(struct btrfs_root *root)
13373 struct btrfs_trans_handle *trans;
13376 trans = btrfs_start_transaction(root, 1);
13377 if (IS_ERR(trans)) {
13378 ret = PTR_ERR(trans);
13381 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13382 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13383 ret = btrfs_commit_transaction(trans, root);
13387 static int populate_csum(struct btrfs_trans_handle *trans,
13388 struct btrfs_root *csum_root, char *buf, u64 start,
13391 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13396 while (offset < len) {
13397 sectorsize = fs_info->sectorsize;
13398 ret = read_extent_data(fs_info, buf, start + offset,
13402 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13403 start + offset, buf, sectorsize);
13406 offset += sectorsize;
13411 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13412 struct btrfs_root *csum_root,
13413 struct btrfs_root *cur_root)
13415 struct btrfs_path path;
13416 struct btrfs_key key;
13417 struct extent_buffer *node;
13418 struct btrfs_file_extent_item *fi;
13425 buf = malloc(cur_root->fs_info->sectorsize);
13429 btrfs_init_path(&path);
13433 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13436 /* Iterate all regular file extents and fill its csum */
13438 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13440 if (key.type != BTRFS_EXTENT_DATA_KEY)
13442 node = path.nodes[0];
13443 slot = path.slots[0];
13444 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13445 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13447 start = btrfs_file_extent_disk_bytenr(node, fi);
13448 len = btrfs_file_extent_disk_num_bytes(node, fi);
13450 ret = populate_csum(trans, csum_root, buf, start, len);
13451 if (ret == -EEXIST)
13457 * TODO: if next leaf is corrupted, jump to nearest next valid
13460 ret = btrfs_next_item(cur_root, &path);
13470 btrfs_release_path(&path);
13475 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13476 struct btrfs_root *csum_root)
13478 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13479 struct btrfs_path path;
13480 struct btrfs_root *tree_root = fs_info->tree_root;
13481 struct btrfs_root *cur_root;
13482 struct extent_buffer *node;
13483 struct btrfs_key key;
13487 btrfs_init_path(&path);
13488 key.objectid = BTRFS_FS_TREE_OBJECTID;
13490 key.type = BTRFS_ROOT_ITEM_KEY;
13491 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13500 node = path.nodes[0];
13501 slot = path.slots[0];
13502 btrfs_item_key_to_cpu(node, &key, slot);
13503 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13505 if (key.type != BTRFS_ROOT_ITEM_KEY)
13507 if (!is_fstree(key.objectid))
13509 key.offset = (u64)-1;
13511 cur_root = btrfs_read_fs_root(fs_info, &key);
13512 if (IS_ERR(cur_root) || !cur_root) {
13513 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13517 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13522 ret = btrfs_next_item(tree_root, &path);
13532 btrfs_release_path(&path);
13536 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13537 struct btrfs_root *csum_root)
13539 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13540 struct btrfs_path path;
13541 struct btrfs_extent_item *ei;
13542 struct extent_buffer *leaf;
13544 struct btrfs_key key;
13547 btrfs_init_path(&path);
13549 key.type = BTRFS_EXTENT_ITEM_KEY;
13551 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13553 btrfs_release_path(&path);
13557 buf = malloc(csum_root->fs_info->sectorsize);
13559 btrfs_release_path(&path);
13564 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13565 ret = btrfs_next_leaf(extent_root, &path);
13573 leaf = path.nodes[0];
13575 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13576 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13581 ei = btrfs_item_ptr(leaf, path.slots[0],
13582 struct btrfs_extent_item);
13583 if (!(btrfs_extent_flags(leaf, ei) &
13584 BTRFS_EXTENT_FLAG_DATA)) {
13589 ret = populate_csum(trans, csum_root, buf, key.objectid,
13596 btrfs_release_path(&path);
13602 * Recalculate the csum and put it into the csum tree.
13604 * Extent tree init will wipe out all the extent info, so in that case, we
13605 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13606 * will use fs/subvol trees to init the csum tree.
13608 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13609 struct btrfs_root *csum_root,
13610 int search_fs_tree)
13612 if (search_fs_tree)
13613 return fill_csum_tree_from_fs(trans, csum_root);
13615 return fill_csum_tree_from_extent(trans, csum_root);
13618 static void free_roots_info_cache(void)
13620 if (!roots_info_cache)
13623 while (!cache_tree_empty(roots_info_cache)) {
13624 struct cache_extent *entry;
13625 struct root_item_info *rii;
13627 entry = first_cache_extent(roots_info_cache);
13630 remove_cache_extent(roots_info_cache, entry);
13631 rii = container_of(entry, struct root_item_info, cache_extent);
13635 free(roots_info_cache);
13636 roots_info_cache = NULL;
13639 static int build_roots_info_cache(struct btrfs_fs_info *info)
13642 struct btrfs_key key;
13643 struct extent_buffer *leaf;
13644 struct btrfs_path path;
13646 if (!roots_info_cache) {
13647 roots_info_cache = malloc(sizeof(*roots_info_cache));
13648 if (!roots_info_cache)
13650 cache_tree_init(roots_info_cache);
13653 btrfs_init_path(&path);
13655 key.type = BTRFS_EXTENT_ITEM_KEY;
13657 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13660 leaf = path.nodes[0];
13663 struct btrfs_key found_key;
13664 struct btrfs_extent_item *ei;
13665 struct btrfs_extent_inline_ref *iref;
13666 int slot = path.slots[0];
13671 struct cache_extent *entry;
13672 struct root_item_info *rii;
13674 if (slot >= btrfs_header_nritems(leaf)) {
13675 ret = btrfs_next_leaf(info->extent_root, &path);
13682 leaf = path.nodes[0];
13683 slot = path.slots[0];
13686 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13688 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13689 found_key.type != BTRFS_METADATA_ITEM_KEY)
13692 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13693 flags = btrfs_extent_flags(leaf, ei);
13695 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13696 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13699 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13700 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13701 level = found_key.offset;
13703 struct btrfs_tree_block_info *binfo;
13705 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13706 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13707 level = btrfs_tree_block_level(leaf, binfo);
13711 * For a root extent, it must be of the following type and the
13712 * first (and only one) iref in the item.
13714 type = btrfs_extent_inline_ref_type(leaf, iref);
13715 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13718 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13719 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13721 rii = malloc(sizeof(struct root_item_info));
13726 rii->cache_extent.start = root_id;
13727 rii->cache_extent.size = 1;
13728 rii->level = (u8)-1;
13729 entry = &rii->cache_extent;
13730 ret = insert_cache_extent(roots_info_cache, entry);
13733 rii = container_of(entry, struct root_item_info,
13737 ASSERT(rii->cache_extent.start == root_id);
13738 ASSERT(rii->cache_extent.size == 1);
13740 if (level > rii->level || rii->level == (u8)-1) {
13741 rii->level = level;
13742 rii->bytenr = found_key.objectid;
13743 rii->gen = btrfs_extent_generation(leaf, ei);
13744 rii->node_count = 1;
13745 } else if (level == rii->level) {
13753 btrfs_release_path(&path);
13758 static int maybe_repair_root_item(struct btrfs_path *path,
13759 const struct btrfs_key *root_key,
13760 const int read_only_mode)
13762 const u64 root_id = root_key->objectid;
13763 struct cache_extent *entry;
13764 struct root_item_info *rii;
13765 struct btrfs_root_item ri;
13766 unsigned long offset;
13768 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13771 "Error: could not find extent items for root %llu\n",
13772 root_key->objectid);
13776 rii = container_of(entry, struct root_item_info, cache_extent);
13777 ASSERT(rii->cache_extent.start == root_id);
13778 ASSERT(rii->cache_extent.size == 1);
13780 if (rii->node_count != 1) {
13782 "Error: could not find btree root extent for root %llu\n",
13787 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13788 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13790 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13791 btrfs_root_level(&ri) != rii->level ||
13792 btrfs_root_generation(&ri) != rii->gen) {
13795 * If we're in repair mode but our caller told us to not update
13796 * the root item, i.e. just check if it needs to be updated, don't
13797 * print this message, since the caller will call us again shortly
13798 * for the same root item without read only mode (the caller will
13799 * open a transaction first).
13801 if (!(read_only_mode && repair))
13803 "%sroot item for root %llu,"
13804 " current bytenr %llu, current gen %llu, current level %u,"
13805 " new bytenr %llu, new gen %llu, new level %u\n",
13806 (read_only_mode ? "" : "fixing "),
13808 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13809 btrfs_root_level(&ri),
13810 rii->bytenr, rii->gen, rii->level);
13812 if (btrfs_root_generation(&ri) > rii->gen) {
13814 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13815 root_id, btrfs_root_generation(&ri), rii->gen);
13819 if (!read_only_mode) {
13820 btrfs_set_root_bytenr(&ri, rii->bytenr);
13821 btrfs_set_root_level(&ri, rii->level);
13822 btrfs_set_root_generation(&ri, rii->gen);
13823 write_extent_buffer(path->nodes[0], &ri,
13824 offset, sizeof(ri));
13834 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13835 * caused read-only snapshots to be corrupted if they were created at a moment
13836 * when the source subvolume/snapshot had orphan items. The issue was that the
13837 * on-disk root items became incorrect, referring to the pre orphan cleanup root
13838 * node instead of the post orphan cleanup root node.
13839 * So this function, and its callees, just detects and fixes those cases. Even
13840 * though the regression was for read-only snapshots, this function applies to
13841 * any snapshot/subvolume root.
13842 * This must be run before any other repair code - not doing it so, makes other
13843 * repair code delete or modify backrefs in the extent tree for example, which
13844 * will result in an inconsistent fs after repairing the root items.
13846 static int repair_root_items(struct btrfs_fs_info *info)
13848 struct btrfs_path path;
13849 struct btrfs_key key;
13850 struct extent_buffer *leaf;
13851 struct btrfs_trans_handle *trans = NULL;
13854 int need_trans = 0;
13856 btrfs_init_path(&path);
13858 ret = build_roots_info_cache(info);
13862 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13863 key.type = BTRFS_ROOT_ITEM_KEY;
13868 * Avoid opening and committing transactions if a leaf doesn't have
13869 * any root items that need to be fixed, so that we avoid rotating
13870 * backup roots unnecessarily.
13873 trans = btrfs_start_transaction(info->tree_root, 1);
13874 if (IS_ERR(trans)) {
13875 ret = PTR_ERR(trans);
13880 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13884 leaf = path.nodes[0];
13887 struct btrfs_key found_key;
13889 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13890 int no_more_keys = find_next_key(&path, &key);
13892 btrfs_release_path(&path);
13894 ret = btrfs_commit_transaction(trans,
13906 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13908 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13910 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13913 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13917 if (!trans && repair) {
13920 btrfs_release_path(&path);
13930 free_roots_info_cache();
13931 btrfs_release_path(&path);
13933 btrfs_commit_transaction(trans, info->tree_root);
13940 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13942 struct btrfs_trans_handle *trans;
13943 struct btrfs_block_group_cache *bg_cache;
13947 /* Clear all free space cache inodes and its extent data */
13949 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13952 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13955 current = bg_cache->key.objectid + bg_cache->key.offset;
13958 /* Don't forget to set cache_generation to -1 */
13959 trans = btrfs_start_transaction(fs_info->tree_root, 0);
13960 if (IS_ERR(trans)) {
13961 error("failed to update super block cache generation");
13962 return PTR_ERR(trans);
13964 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13965 btrfs_commit_transaction(trans, fs_info->tree_root);
13970 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13975 if (clear_version == 1) {
13976 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13978 "free space cache v2 detected, use --clear-space-cache v2");
13982 printf("Clearing free space cache\n");
13983 ret = clear_free_space_cache(fs_info);
13985 error("failed to clear free space cache");
13988 printf("Free space cache cleared\n");
13990 } else if (clear_version == 2) {
13991 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13992 printf("no free space cache v2 to clear\n");
13996 printf("Clear free space cache v2\n");
13997 ret = btrfs_clear_free_space_tree(fs_info);
13999 error("failed to clear free space cache v2: %d", ret);
14002 printf("free space cache v2 cleared\n");
14009 const char * const cmd_check_usage[] = {
14010 "btrfs check [options] <device>",
14011 "Check structural integrity of a filesystem (unmounted).",
14012 "Check structural integrity of an unmounted filesystem. Verify internal",
14013 "trees' consistency and item connectivity. In the repair mode try to",
14014 "fix the problems found. ",
14015 "WARNING: the repair mode is considered dangerous",
14017 "-s|--super <superblock> use this superblock copy",
14018 "-b|--backup use the first valid backup root copy",
14019 "--force skip mount checks, repair is not possible",
14020 "--repair try to repair the filesystem",
14021 "--readonly run in read-only mode (default)",
14022 "--init-csum-tree create a new CRC tree",
14023 "--init-extent-tree create a new extent tree",
14024 "--mode <MODE> allows choice of memory/IO trade-offs",
14025 " where MODE is one of:",
14026 " original - read inodes and extents to memory (requires",
14027 " more memory, does less IO)",
14028 " lowmem - try to use less memory but read blocks again",
14030 "--check-data-csum verify checksums of data blocks",
14031 "-Q|--qgroup-report print a report on qgroup consistency",
14032 "-E|--subvol-extents <subvolid>",
14033 " print subvolume extents and sharing state",
14034 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14035 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14036 "-p|--progress indicate progress",
14037 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14041 int cmd_check(int argc, char **argv)
14043 struct cache_tree root_cache;
14044 struct btrfs_root *root;
14045 struct btrfs_fs_info *info;
14048 u64 tree_root_bytenr = 0;
14049 u64 chunk_root_bytenr = 0;
14050 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14054 int init_csum_tree = 0;
14056 int clear_space_cache = 0;
14057 int qgroup_report = 0;
14058 int qgroups_repaired = 0;
14059 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14064 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14065 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14066 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14067 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14068 GETOPT_VAL_FORCE };
14069 static const struct option long_options[] = {
14070 { "super", required_argument, NULL, 's' },
14071 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14072 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14073 { "init-csum-tree", no_argument, NULL,
14074 GETOPT_VAL_INIT_CSUM },
14075 { "init-extent-tree", no_argument, NULL,
14076 GETOPT_VAL_INIT_EXTENT },
14077 { "check-data-csum", no_argument, NULL,
14078 GETOPT_VAL_CHECK_CSUM },
14079 { "backup", no_argument, NULL, 'b' },
14080 { "subvol-extents", required_argument, NULL, 'E' },
14081 { "qgroup-report", no_argument, NULL, 'Q' },
14082 { "tree-root", required_argument, NULL, 'r' },
14083 { "chunk-root", required_argument, NULL,
14084 GETOPT_VAL_CHUNK_TREE },
14085 { "progress", no_argument, NULL, 'p' },
14086 { "mode", required_argument, NULL,
14088 { "clear-space-cache", required_argument, NULL,
14089 GETOPT_VAL_CLEAR_SPACE_CACHE},
14090 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14091 { NULL, 0, NULL, 0}
14094 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14098 case 'a': /* ignored */ break;
14100 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14103 num = arg_strtou64(optarg);
14104 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14106 "super mirror should be less than %d",
14107 BTRFS_SUPER_MIRROR_MAX);
14110 bytenr = btrfs_sb_offset(((int)num));
14111 printf("using SB copy %llu, bytenr %llu\n", num,
14112 (unsigned long long)bytenr);
14118 subvolid = arg_strtou64(optarg);
14121 tree_root_bytenr = arg_strtou64(optarg);
14123 case GETOPT_VAL_CHUNK_TREE:
14124 chunk_root_bytenr = arg_strtou64(optarg);
14127 ctx.progress_enabled = true;
14131 usage(cmd_check_usage);
14132 case GETOPT_VAL_REPAIR:
14133 printf("enabling repair mode\n");
14135 ctree_flags |= OPEN_CTREE_WRITES;
14137 case GETOPT_VAL_READONLY:
14140 case GETOPT_VAL_INIT_CSUM:
14141 printf("Creating a new CRC tree\n");
14142 init_csum_tree = 1;
14144 ctree_flags |= OPEN_CTREE_WRITES;
14146 case GETOPT_VAL_INIT_EXTENT:
14147 init_extent_tree = 1;
14148 ctree_flags |= (OPEN_CTREE_WRITES |
14149 OPEN_CTREE_NO_BLOCK_GROUPS);
14152 case GETOPT_VAL_CHECK_CSUM:
14153 check_data_csum = 1;
14155 case GETOPT_VAL_MODE:
14156 check_mode = parse_check_mode(optarg);
14157 if (check_mode == CHECK_MODE_UNKNOWN) {
14158 error("unknown mode: %s", optarg);
14162 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14163 if (strcmp(optarg, "v1") == 0) {
14164 clear_space_cache = 1;
14165 } else if (strcmp(optarg, "v2") == 0) {
14166 clear_space_cache = 2;
14167 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14170 "invalid argument to --clear-space-cache, must be v1 or v2");
14173 ctree_flags |= OPEN_CTREE_WRITES;
14175 case GETOPT_VAL_FORCE:
14181 if (check_argc_exact(argc - optind, 1))
14182 usage(cmd_check_usage);
14184 if (ctx.progress_enabled) {
14185 ctx.tp = TASK_NOTHING;
14186 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14189 /* This check is the only reason for --readonly to exist */
14190 if (readonly && repair) {
14191 error("repair options are not compatible with --readonly");
14196 * experimental and dangerous
14198 if (repair && check_mode == CHECK_MODE_LOWMEM)
14199 warning("low-memory mode repair support is only partial");
14202 cache_tree_init(&root_cache);
14204 ret = check_mounted(argv[optind]);
14207 error("could not check mount status: %s",
14213 "%s is currently mounted, use --force if you really intend to check the filesystem",
14221 error("repair and --force is not yet supported");
14228 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14232 "filesystem mounted, continuing because of --force");
14234 /* A block device is mounted in exclusive mode by kernel */
14235 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14238 /* only allow partial opening under repair mode */
14240 ctree_flags |= OPEN_CTREE_PARTIAL;
14242 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14243 chunk_root_bytenr, ctree_flags);
14245 error("cannot open file system");
14251 global_info = info;
14252 root = info->fs_root;
14253 uuid_unparse(info->super_copy->fsid, uuidbuf);
14255 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14258 * Check the bare minimum before starting anything else that could rely
14259 * on it, namely the tree roots, any local consistency checks
14261 if (!extent_buffer_uptodate(info->tree_root->node) ||
14262 !extent_buffer_uptodate(info->dev_root->node) ||
14263 !extent_buffer_uptodate(info->chunk_root->node)) {
14264 error("critical roots corrupted, unable to check the filesystem");
14270 if (clear_space_cache) {
14271 ret = do_clear_free_space_cache(info, clear_space_cache);
14277 * repair mode will force us to commit transaction which
14278 * will make us fail to load log tree when mounting.
14280 if (repair && btrfs_super_log_root(info->super_copy)) {
14281 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14287 ret = zero_log_tree(root);
14290 error("failed to zero log tree: %d", ret);
14295 if (qgroup_report) {
14296 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14298 ret = qgroup_verify_all(info);
14305 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14306 subvolid, argv[optind], uuidbuf);
14307 ret = print_extent_state(info, subvolid);
14312 if (init_extent_tree || init_csum_tree) {
14313 struct btrfs_trans_handle *trans;
14315 trans = btrfs_start_transaction(info->extent_root, 0);
14316 if (IS_ERR(trans)) {
14317 error("error starting transaction");
14318 ret = PTR_ERR(trans);
14323 if (init_extent_tree) {
14324 printf("Creating a new extent tree\n");
14325 ret = reinit_extent_tree(trans, info);
14331 if (init_csum_tree) {
14332 printf("Reinitialize checksum tree\n");
14333 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14335 error("checksum tree initialization failed: %d",
14342 ret = fill_csum_tree(trans, info->csum_root,
14346 error("checksum tree refilling failed: %d", ret);
14351 * Ok now we commit and run the normal fsck, which will add
14352 * extent entries for all of the items it finds.
14354 ret = btrfs_commit_transaction(trans, info->extent_root);
14359 if (!extent_buffer_uptodate(info->extent_root->node)) {
14360 error("critical: extent_root, unable to check the filesystem");
14365 if (!extent_buffer_uptodate(info->csum_root->node)) {
14366 error("critical: csum_root, unable to check the filesystem");
14372 ret = do_check_chunks_and_extents(info);
14376 "errors found in extent allocation tree or chunk allocation");
14378 ret = repair_root_items(info);
14381 error("failed to repair root items: %s", strerror(-ret));
14385 fprintf(stderr, "Fixed %d roots.\n", ret);
14387 } else if (ret > 0) {
14389 "Found %d roots with an outdated root item.\n",
14392 "Please run a filesystem check with the option --repair to fix them.\n");
14398 if (!ctx.progress_enabled) {
14399 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14400 fprintf(stderr, "checking free space tree\n");
14402 fprintf(stderr, "checking free space cache\n");
14404 ret = check_space_cache(root);
14407 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14408 error("errors found in free space tree");
14410 error("errors found in free space cache");
14415 * We used to have to have these hole extents in between our real
14416 * extents so if we don't have this flag set we need to make sure there
14417 * are no gaps in the file extents for inodes, otherwise we can just
14418 * ignore it when this happens.
14420 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14421 ret = do_check_fs_roots(info, &root_cache);
14424 error("errors found in fs roots");
14428 fprintf(stderr, "checking csums\n");
14429 ret = check_csums(root);
14432 error("errors found in csum tree");
14436 fprintf(stderr, "checking root refs\n");
14437 /* For low memory mode, check_fs_roots_v2 handles root refs */
14438 if (check_mode != CHECK_MODE_LOWMEM) {
14439 ret = check_root_refs(root, &root_cache);
14442 error("errors found in root refs");
14447 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14448 struct extent_buffer *eb;
14450 eb = list_first_entry(&root->fs_info->recow_ebs,
14451 struct extent_buffer, recow);
14452 list_del_init(&eb->recow);
14453 ret = recow_extent_buffer(root, eb);
14456 error("fails to fix transid errors");
14461 while (!list_empty(&delete_items)) {
14462 struct bad_item *bad;
14464 bad = list_first_entry(&delete_items, struct bad_item, list);
14465 list_del_init(&bad->list);
14467 ret = delete_bad_item(root, bad);
14473 if (info->quota_enabled) {
14474 fprintf(stderr, "checking quota groups\n");
14475 ret = qgroup_verify_all(info);
14478 error("failed to check quota groups");
14482 ret = repair_qgroups(info, &qgroups_repaired);
14485 error("failed to repair quota groups");
14491 if (!list_empty(&root->fs_info->recow_ebs)) {
14492 error("transid errors in file system");
14497 printf("found %llu bytes used, ",
14498 (unsigned long long)bytes_used);
14500 printf("error(s) found\n");
14502 printf("no error found\n");
14503 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14504 printf("total tree bytes: %llu\n",
14505 (unsigned long long)total_btree_bytes);
14506 printf("total fs tree bytes: %llu\n",
14507 (unsigned long long)total_fs_tree_bytes);
14508 printf("total extent tree bytes: %llu\n",
14509 (unsigned long long)total_extent_tree_bytes);
14510 printf("btree space waste bytes: %llu\n",
14511 (unsigned long long)btree_space_waste);
14512 printf("file data blocks allocated: %llu\n referenced %llu\n",
14513 (unsigned long long)data_bytes_allocated,
14514 (unsigned long long)data_bytes_referenced);
14516 free_qgroup_counts();
14517 free_root_recs_tree(&root_cache);
14521 if (ctx.progress_enabled)
14522 task_deinit(ctx.info);