2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 return container_of(back, struct data_backref, node);
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146 struct data_backref *back1 = to_data_backref(ext1);
147 struct data_backref *back2 = to_data_backref(ext2);
149 WARN_ON(!ext1->is_data);
150 WARN_ON(!ext2->is_data);
152 /* parent and root are a union, so this covers both */
153 if (back1->parent > back2->parent)
155 if (back1->parent < back2->parent)
158 /* This is a full backref and the parents match. */
159 if (back1->node.full_backref)
162 if (back1->owner > back2->owner)
164 if (back1->owner < back2->owner)
167 if (back1->offset > back2->offset)
169 if (back1->offset < back2->offset)
172 if (back1->found_ref && back2->found_ref) {
173 if (back1->disk_bytenr > back2->disk_bytenr)
175 if (back1->disk_bytenr < back2->disk_bytenr)
178 if (back1->bytes > back2->bytes)
180 if (back1->bytes < back2->bytes)
188 * Much like data_backref, just removed the undetermined members
189 * and change it to use list_head.
190 * During extent scan, it is stored in root->orphan_data_extent.
191 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193 struct orphan_data_extent {
194 struct list_head list;
202 struct tree_backref {
203 struct extent_backref node;
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 return container_of(back, struct tree_backref, node);
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219 struct tree_backref *back1 = to_tree_backref(ext1);
220 struct tree_backref *back2 = to_tree_backref(ext2);
222 WARN_ON(ext1->is_data);
223 WARN_ON(ext2->is_data);
225 /* parent and root are a union, so this covers both */
226 if (back1->parent > back2->parent)
228 if (back1->parent < back2->parent)
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239 if (ext1->is_data > ext2->is_data)
242 if (ext1->is_data < ext2->is_data)
245 if (ext1->full_backref > ext2->full_backref)
247 if (ext1->full_backref < ext2->full_backref)
251 return compare_data_backref(node1, node2);
253 return compare_tree_backref(node1, node2);
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
259 struct extent_record {
260 struct list_head backrefs;
261 struct list_head dups;
262 struct rb_root backref_tree;
263 struct list_head list;
264 struct cache_extent cache;
265 struct btrfs_disk_key parent_key;
270 u64 extent_item_refs;
272 u64 parent_generation;
276 unsigned int flag_block_full_backref:2;
277 unsigned int found_rec:1;
278 unsigned int content_checked:1;
279 unsigned int owner_ref_checked:1;
280 unsigned int is_root:1;
281 unsigned int metadata:1;
282 unsigned int bad_full_backref:1;
283 unsigned int crossing_stripes:1;
284 unsigned int wrong_chunk_type:1;
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 return container_of(entry, struct extent_record, list);
292 struct inode_backref {
293 struct list_head list;
294 unsigned int found_dir_item:1;
295 unsigned int found_dir_index:1;
296 unsigned int found_inode_ref:1;
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 return list_entry(entry, struct inode_backref, list);
311 struct root_item_record {
312 struct list_head list;
318 struct btrfs_key drop_key;
321 #define REF_ERR_NO_DIR_ITEM (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX (1 << 1)
323 #define REF_ERR_NO_INODE_REF (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
326 #define REF_ERR_DUP_INODE_REF (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
335 struct file_extent_hole {
341 struct inode_record {
342 struct list_head backrefs;
343 unsigned int checked:1;
344 unsigned int merging:1;
345 unsigned int found_inode_item:1;
346 unsigned int found_dir_item:1;
347 unsigned int found_file_extent:1;
348 unsigned int found_csum_item:1;
349 unsigned int some_csum_missing:1;
350 unsigned int nodatasum:1;
363 struct rb_root holes;
364 struct list_head orphan_extents;
369 #define I_ERR_NO_INODE_ITEM (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
385 struct root_backref {
386 struct list_head list;
387 unsigned int found_dir_item:1;
388 unsigned int found_dir_index:1;
389 unsigned int found_back_ref:1;
390 unsigned int found_forward_ref:1;
391 unsigned int reachable:1;
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 return list_entry(entry, struct root_backref, list);
406 struct list_head backrefs;
407 struct cache_extent cache;
408 unsigned int found_root_item:1;
414 struct cache_extent cache;
419 struct cache_extent cache;
420 struct cache_tree root_cache;
421 struct cache_tree inode_cache;
422 struct inode_record *current;
431 struct walk_control {
432 struct cache_tree shared;
433 struct shared_node *nodes[BTRFS_MAX_LEVEL];
439 struct btrfs_key key;
441 struct list_head list;
444 struct extent_entry {
449 struct list_head list;
452 struct root_item_info {
453 /* level of the root */
455 /* number of nodes at this level, must be 1 for a root */
459 struct cache_extent cache_extent;
463 * Error bit for low memory mode check.
465 * Currently no caller cares about it yet. Just internal use for error
468 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH (1 << 8)
479 static void *print_status_check(void *p)
481 struct task_ctx *priv = p;
482 const char work_indicator[] = { '.', 'o', 'O', 'o' };
484 static char *task_position_string[] = {
486 "checking free space cache",
490 task_period_start(priv->info, 1000 /* 1s */);
492 if (priv->tp == TASK_NOTHING)
496 printf("%s [%c]\r", task_position_string[priv->tp],
497 work_indicator[count % 4]);
500 task_period_wait(priv->info);
505 static int print_status_return(void *p)
513 static enum btrfs_check_mode parse_check_mode(const char *str)
515 if (strcmp(str, "lowmem") == 0)
516 return CHECK_MODE_LOWMEM;
517 if (strcmp(str, "orig") == 0)
518 return CHECK_MODE_ORIGINAL;
519 if (strcmp(str, "original") == 0)
520 return CHECK_MODE_ORIGINAL;
522 return CHECK_MODE_UNKNOWN;
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
528 struct file_extent_hole *hole;
530 if (RB_EMPTY_ROOT(holes))
533 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 struct file_extent_hole *hole1;
540 struct file_extent_hole *hole2;
542 hole1 = rb_entry(node1, struct file_extent_hole, node);
543 hole2 = rb_entry(node2, struct file_extent_hole, node);
545 if (hole1->start > hole2->start)
547 if (hole1->start < hole2->start)
549 /* Now hole1->start == hole2->start */
550 if (hole1->len >= hole2->len)
552 * Hole 1 will be merge center
553 * Same hole will be merged later
556 /* Hole 2 will be merge center */
561 * Add a hole to the record
563 * This will do hole merge for copy_file_extent_holes(),
564 * which will ensure there won't be continuous holes.
566 static int add_file_extent_hole(struct rb_root *holes,
569 struct file_extent_hole *hole;
570 struct file_extent_hole *prev = NULL;
571 struct file_extent_hole *next = NULL;
573 hole = malloc(sizeof(*hole));
578 /* Since compare will not return 0, no -EEXIST will happen */
579 rb_insert(holes, &hole->node, compare_hole);
581 /* simple merge with previous hole */
582 if (rb_prev(&hole->node))
583 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585 if (prev && prev->start + prev->len >= hole->start) {
586 hole->len = hole->start + hole->len - prev->start;
587 hole->start = prev->start;
588 rb_erase(&prev->node, holes);
593 /* iterate merge with next holes */
595 if (!rb_next(&hole->node))
597 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599 if (hole->start + hole->len >= next->start) {
600 if (hole->start + hole->len <= next->start + next->len)
601 hole->len = next->start + next->len -
603 rb_erase(&next->node, holes);
612 static int compare_hole_range(struct rb_node *node, void *data)
614 struct file_extent_hole *hole;
617 hole = (struct file_extent_hole *)data;
620 hole = rb_entry(node, struct file_extent_hole, node);
621 if (start < hole->start)
623 if (start >= hole->start && start < hole->start + hole->len)
629 * Delete a hole in the record
631 * This will do the hole split and is much restrict than add.
633 static int del_file_extent_hole(struct rb_root *holes,
636 struct file_extent_hole *hole;
637 struct file_extent_hole tmp;
642 struct rb_node *node;
649 node = rb_search(holes, &tmp, compare_hole_range, NULL);
652 hole = rb_entry(node, struct file_extent_hole, node);
653 if (start + len > hole->start + hole->len)
657 * Now there will be no overlap, delete the hole and re-add the
658 * split(s) if they exists.
660 if (start > hole->start) {
661 prev_start = hole->start;
662 prev_len = start - hole->start;
665 if (hole->start + hole->len > start + len) {
666 next_start = start + len;
667 next_len = hole->start + hole->len - start - len;
670 rb_erase(node, holes);
673 ret = add_file_extent_hole(holes, prev_start, prev_len);
678 ret = add_file_extent_hole(holes, next_start, next_len);
685 static int copy_file_extent_holes(struct rb_root *dst,
688 struct file_extent_hole *hole;
689 struct rb_node *node;
692 node = rb_first(src);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 ret = add_file_extent_hole(dst, hole->start, hole->len);
698 node = rb_next(node);
703 static void free_file_extent_holes(struct rb_root *holes)
705 struct rb_node *node;
706 struct file_extent_hole *hole;
708 node = rb_first(holes);
710 hole = rb_entry(node, struct file_extent_hole, node);
711 rb_erase(node, holes);
713 node = rb_first(holes);
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720 struct btrfs_root *root)
722 if (root->last_trans != trans->transid) {
723 root->track_dirty = 1;
724 root->last_trans = trans->transid;
725 root->commit_root = root->node;
726 extent_buffer_get(root->node);
730 static u8 imode_to_type(u32 imode)
733 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
735 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
736 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
737 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
738 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
739 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
740 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
743 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 struct device_record *rec1;
750 struct device_record *rec2;
752 rec1 = rb_entry(node1, struct device_record, node);
753 rec2 = rb_entry(node2, struct device_record, node);
754 if (rec1->devid > rec2->devid)
756 else if (rec1->devid < rec2->devid)
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 struct inode_record *rec;
765 struct inode_backref *backref;
766 struct inode_backref *orig;
767 struct inode_backref *tmp;
768 struct orphan_data_extent *src_orphan;
769 struct orphan_data_extent *dst_orphan;
774 rec = malloc(sizeof(*rec));
776 return ERR_PTR(-ENOMEM);
777 memcpy(rec, orig_rec, sizeof(*rec));
779 INIT_LIST_HEAD(&rec->backrefs);
780 INIT_LIST_HEAD(&rec->orphan_extents);
781 rec->holes = RB_ROOT;
783 list_for_each_entry(orig, &orig_rec->backrefs, list) {
784 size = sizeof(*orig) + orig->namelen + 1;
785 backref = malloc(size);
790 memcpy(backref, orig, size);
791 list_add_tail(&backref->list, &rec->backrefs);
793 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794 dst_orphan = malloc(sizeof(*dst_orphan));
799 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
809 rb = rb_first(&rec->holes);
811 struct file_extent_hole *hole;
813 hole = rb_entry(rb, struct file_extent_hole, node);
819 if (!list_empty(&rec->backrefs))
820 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821 list_del(&orig->list);
825 if (!list_empty(&rec->orphan_extents))
826 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827 list_del(&orig->list);
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
839 struct orphan_data_extent *orphan;
841 if (list_empty(orphan_extents))
843 printf("The following data extent is lost in tree %llu:\n",
845 list_for_each_entry(orphan, orphan_extents, list) {
846 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847 orphan->objectid, orphan->offset, orphan->disk_bytenr,
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 u64 root_objectid = root->root_key.objectid;
855 int errors = rec->errors;
859 /* reloc root errors, we print its corresponding fs root objectid*/
860 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861 root_objectid = root->root_key.offset;
862 fprintf(stderr, "reloc");
864 fprintf(stderr, "root %llu inode %llu errors %x",
865 (unsigned long long) root_objectid,
866 (unsigned long long) rec->ino, rec->errors);
868 if (errors & I_ERR_NO_INODE_ITEM)
869 fprintf(stderr, ", no inode item");
870 if (errors & I_ERR_NO_ORPHAN_ITEM)
871 fprintf(stderr, ", no orphan item");
872 if (errors & I_ERR_DUP_INODE_ITEM)
873 fprintf(stderr, ", dup inode item");
874 if (errors & I_ERR_DUP_DIR_INDEX)
875 fprintf(stderr, ", dup dir index");
876 if (errors & I_ERR_ODD_DIR_ITEM)
877 fprintf(stderr, ", odd dir item");
878 if (errors & I_ERR_ODD_FILE_EXTENT)
879 fprintf(stderr, ", odd file extent");
880 if (errors & I_ERR_BAD_FILE_EXTENT)
881 fprintf(stderr, ", bad file extent");
882 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883 fprintf(stderr, ", file extent overlap");
884 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885 fprintf(stderr, ", file extent discount");
886 if (errors & I_ERR_DIR_ISIZE_WRONG)
887 fprintf(stderr, ", dir isize wrong");
888 if (errors & I_ERR_FILE_NBYTES_WRONG)
889 fprintf(stderr, ", nbytes wrong");
890 if (errors & I_ERR_ODD_CSUM_ITEM)
891 fprintf(stderr, ", odd csum item");
892 if (errors & I_ERR_SOME_CSUM_MISSING)
893 fprintf(stderr, ", some csum missing");
894 if (errors & I_ERR_LINK_COUNT_WRONG)
895 fprintf(stderr, ", link count wrong");
896 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897 fprintf(stderr, ", orphan file extent");
898 fprintf(stderr, "\n");
899 /* Print the orphan extents if needed */
900 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903 /* Print the holes if needed */
904 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905 struct file_extent_hole *hole;
906 struct rb_node *node;
909 node = rb_first(&rec->holes);
910 fprintf(stderr, "Found file extent holes:\n");
913 hole = rb_entry(node, struct file_extent_hole, node);
914 fprintf(stderr, "\tstart: %llu, len: %llu\n",
915 hole->start, hole->len);
916 node = rb_next(node);
919 fprintf(stderr, "\tstart: 0, len: %llu\n",
921 root->fs_info->sectorsize));
925 static void print_ref_error(int errors)
927 if (errors & REF_ERR_NO_DIR_ITEM)
928 fprintf(stderr, ", no dir item");
929 if (errors & REF_ERR_NO_DIR_INDEX)
930 fprintf(stderr, ", no dir index");
931 if (errors & REF_ERR_NO_INODE_REF)
932 fprintf(stderr, ", no inode ref");
933 if (errors & REF_ERR_DUP_DIR_ITEM)
934 fprintf(stderr, ", dup dir item");
935 if (errors & REF_ERR_DUP_DIR_INDEX)
936 fprintf(stderr, ", dup dir index");
937 if (errors & REF_ERR_DUP_INODE_REF)
938 fprintf(stderr, ", dup inode ref");
939 if (errors & REF_ERR_INDEX_UNMATCH)
940 fprintf(stderr, ", index mismatch");
941 if (errors & REF_ERR_FILETYPE_UNMATCH)
942 fprintf(stderr, ", filetype mismatch");
943 if (errors & REF_ERR_NAME_TOO_LONG)
944 fprintf(stderr, ", name too long");
945 if (errors & REF_ERR_NO_ROOT_REF)
946 fprintf(stderr, ", no root ref");
947 if (errors & REF_ERR_NO_ROOT_BACKREF)
948 fprintf(stderr, ", no root backref");
949 if (errors & REF_ERR_DUP_ROOT_REF)
950 fprintf(stderr, ", dup root ref");
951 if (errors & REF_ERR_DUP_ROOT_BACKREF)
952 fprintf(stderr, ", dup root backref");
953 fprintf(stderr, "\n");
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
959 struct ptr_node *node;
960 struct cache_extent *cache;
961 struct inode_record *rec = NULL;
964 cache = lookup_cache_extent(inode_cache, ino, 1);
966 node = container_of(cache, struct ptr_node, cache);
968 if (mod && rec->refs > 1) {
969 node->data = clone_inode_rec(rec);
970 if (IS_ERR(node->data))
976 rec = calloc(1, sizeof(*rec));
978 return ERR_PTR(-ENOMEM);
980 rec->extent_start = (u64)-1;
982 INIT_LIST_HEAD(&rec->backrefs);
983 INIT_LIST_HEAD(&rec->orphan_extents);
984 rec->holes = RB_ROOT;
986 node = malloc(sizeof(*node));
989 return ERR_PTR(-ENOMEM);
991 node->cache.start = ino;
992 node->cache.size = 1;
995 if (ino == BTRFS_FREE_INO_OBJECTID)
998 ret = insert_cache_extent(inode_cache, &node->cache);
1000 return ERR_PTR(-EEXIST);
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 struct orphan_data_extent *orphan;
1009 while (!list_empty(orphan_extents)) {
1010 orphan = list_entry(orphan_extents->next,
1011 struct orphan_data_extent, list);
1012 list_del(&orphan->list);
1017 static void free_inode_rec(struct inode_record *rec)
1019 struct inode_backref *backref;
1021 if (--rec->refs > 0)
1024 while (!list_empty(&rec->backrefs)) {
1025 backref = to_inode_backref(rec->backrefs.next);
1026 list_del(&backref->list);
1029 free_orphan_data_extents(&rec->orphan_extents);
1030 free_file_extent_holes(&rec->holes);
1034 static int can_free_inode_rec(struct inode_record *rec)
1036 if (!rec->errors && rec->checked && rec->found_inode_item &&
1037 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043 struct inode_record *rec)
1045 struct cache_extent *cache;
1046 struct inode_backref *tmp, *backref;
1047 struct ptr_node *node;
1050 if (!rec->found_inode_item)
1053 filetype = imode_to_type(rec->imode);
1054 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055 if (backref->found_dir_item && backref->found_dir_index) {
1056 if (backref->filetype != filetype)
1057 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058 if (!backref->errors && backref->found_inode_ref &&
1059 rec->nlink == rec->found_link) {
1060 list_del(&backref->list);
1066 if (!rec->checked || rec->merging)
1069 if (S_ISDIR(rec->imode)) {
1070 if (rec->found_size != rec->isize)
1071 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072 if (rec->found_file_extent)
1073 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075 if (rec->found_dir_item)
1076 rec->errors |= I_ERR_ODD_DIR_ITEM;
1077 if (rec->found_size != rec->nbytes)
1078 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079 if (rec->nlink > 0 && !no_holes &&
1080 (rec->extent_end < rec->isize ||
1081 first_extent_gap(&rec->holes) < rec->isize))
1082 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1085 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086 if (rec->found_csum_item && rec->nodatasum)
1087 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088 if (rec->some_csum_missing && !rec->nodatasum)
1089 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1092 BUG_ON(rec->refs != 1);
1093 if (can_free_inode_rec(rec)) {
1094 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095 node = container_of(cache, struct ptr_node, cache);
1096 BUG_ON(node->data != rec);
1097 remove_cache_extent(inode_cache, &node->cache);
1099 free_inode_rec(rec);
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 struct btrfs_path path;
1106 struct btrfs_key key;
1109 key.objectid = BTRFS_ORPHAN_OBJECTID;
1110 key.type = BTRFS_ORPHAN_ITEM_KEY;
1113 btrfs_init_path(&path);
1114 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115 btrfs_release_path(&path);
1121 static int process_inode_item(struct extent_buffer *eb,
1122 int slot, struct btrfs_key *key,
1123 struct shared_node *active_node)
1125 struct inode_record *rec;
1126 struct btrfs_inode_item *item;
1128 rec = active_node->current;
1129 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130 if (rec->found_inode_item) {
1131 rec->errors |= I_ERR_DUP_INODE_ITEM;
1134 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135 rec->nlink = btrfs_inode_nlink(eb, item);
1136 rec->isize = btrfs_inode_size(eb, item);
1137 rec->nbytes = btrfs_inode_nbytes(eb, item);
1138 rec->imode = btrfs_inode_mode(eb, item);
1139 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141 rec->found_inode_item = 1;
1142 if (rec->nlink == 0)
1143 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144 maybe_free_inode_rec(&active_node->inode_cache, rec);
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150 int namelen, u64 dir)
1152 struct inode_backref *backref;
1154 list_for_each_entry(backref, &rec->backrefs, list) {
1155 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157 if (backref->dir != dir || backref->namelen != namelen)
1159 if (memcmp(name, backref->name, namelen))
1164 backref = malloc(sizeof(*backref) + namelen + 1);
1167 memset(backref, 0, sizeof(*backref));
1169 backref->namelen = namelen;
1170 memcpy(backref->name, name, namelen);
1171 backref->name[namelen] = '\0';
1172 list_add_tail(&backref->list, &rec->backrefs);
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177 u64 ino, u64 dir, u64 index,
1178 const char *name, int namelen,
1179 u8 filetype, u8 itemtype, int errors)
1181 struct inode_record *rec;
1182 struct inode_backref *backref;
1184 rec = get_inode_rec(inode_cache, ino, 1);
1185 BUG_ON(IS_ERR(rec));
1186 backref = get_inode_backref(rec, name, namelen, dir);
1189 backref->errors |= errors;
1190 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191 if (backref->found_dir_index)
1192 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193 if (backref->found_inode_ref && backref->index != index)
1194 backref->errors |= REF_ERR_INDEX_UNMATCH;
1195 if (backref->found_dir_item && backref->filetype != filetype)
1196 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198 backref->index = index;
1199 backref->filetype = filetype;
1200 backref->found_dir_index = 1;
1201 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203 if (backref->found_dir_item)
1204 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205 if (backref->found_dir_index && backref->filetype != filetype)
1206 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208 backref->filetype = filetype;
1209 backref->found_dir_item = 1;
1210 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212 if (backref->found_inode_ref)
1213 backref->errors |= REF_ERR_DUP_INODE_REF;
1214 if (backref->found_dir_index && backref->index != index)
1215 backref->errors |= REF_ERR_INDEX_UNMATCH;
1217 backref->index = index;
1219 backref->ref_type = itemtype;
1220 backref->found_inode_ref = 1;
1225 maybe_free_inode_rec(inode_cache, rec);
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230 struct cache_tree *dst_cache)
1232 struct inode_backref *backref;
1237 list_for_each_entry(backref, &src->backrefs, list) {
1238 if (backref->found_dir_index) {
1239 add_inode_backref(dst_cache, dst->ino, backref->dir,
1240 backref->index, backref->name,
1241 backref->namelen, backref->filetype,
1242 BTRFS_DIR_INDEX_KEY, backref->errors);
1244 if (backref->found_dir_item) {
1246 add_inode_backref(dst_cache, dst->ino,
1247 backref->dir, 0, backref->name,
1248 backref->namelen, backref->filetype,
1249 BTRFS_DIR_ITEM_KEY, backref->errors);
1251 if (backref->found_inode_ref) {
1252 add_inode_backref(dst_cache, dst->ino,
1253 backref->dir, backref->index,
1254 backref->name, backref->namelen, 0,
1255 backref->ref_type, backref->errors);
1259 if (src->found_dir_item)
1260 dst->found_dir_item = 1;
1261 if (src->found_file_extent)
1262 dst->found_file_extent = 1;
1263 if (src->found_csum_item)
1264 dst->found_csum_item = 1;
1265 if (src->some_csum_missing)
1266 dst->some_csum_missing = 1;
1267 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1273 BUG_ON(src->found_link < dir_count);
1274 dst->found_link += src->found_link - dir_count;
1275 dst->found_size += src->found_size;
1276 if (src->extent_start != (u64)-1) {
1277 if (dst->extent_start == (u64)-1) {
1278 dst->extent_start = src->extent_start;
1279 dst->extent_end = src->extent_end;
1281 if (dst->extent_end > src->extent_start)
1282 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283 else if (dst->extent_end < src->extent_start) {
1284 ret = add_file_extent_hole(&dst->holes,
1286 src->extent_start - dst->extent_end);
1288 if (dst->extent_end < src->extent_end)
1289 dst->extent_end = src->extent_end;
1293 dst->errors |= src->errors;
1294 if (src->found_inode_item) {
1295 if (!dst->found_inode_item) {
1296 dst->nlink = src->nlink;
1297 dst->isize = src->isize;
1298 dst->nbytes = src->nbytes;
1299 dst->imode = src->imode;
1300 dst->nodatasum = src->nodatasum;
1301 dst->found_inode_item = 1;
1303 dst->errors |= I_ERR_DUP_INODE_ITEM;
1311 static int splice_shared_node(struct shared_node *src_node,
1312 struct shared_node *dst_node)
1314 struct cache_extent *cache;
1315 struct ptr_node *node, *ins;
1316 struct cache_tree *src, *dst;
1317 struct inode_record *rec, *conflict;
1318 u64 current_ino = 0;
1322 if (--src_node->refs == 0)
1324 if (src_node->current)
1325 current_ino = src_node->current->ino;
1327 src = &src_node->root_cache;
1328 dst = &dst_node->root_cache;
1330 cache = search_cache_extent(src, 0);
1332 node = container_of(cache, struct ptr_node, cache);
1334 cache = next_cache_extent(cache);
1337 remove_cache_extent(src, &node->cache);
1340 ins = malloc(sizeof(*ins));
1342 ins->cache.start = node->cache.start;
1343 ins->cache.size = node->cache.size;
1347 ret = insert_cache_extent(dst, &ins->cache);
1348 if (ret == -EEXIST) {
1349 conflict = get_inode_rec(dst, rec->ino, 1);
1350 BUG_ON(IS_ERR(conflict));
1351 merge_inode_recs(rec, conflict, dst);
1353 conflict->checked = 1;
1354 if (dst_node->current == conflict)
1355 dst_node->current = NULL;
1357 maybe_free_inode_rec(dst, conflict);
1358 free_inode_rec(rec);
1365 if (src == &src_node->root_cache) {
1366 src = &src_node->inode_cache;
1367 dst = &dst_node->inode_cache;
1371 if (current_ino > 0 && (!dst_node->current ||
1372 current_ino > dst_node->current->ino)) {
1373 if (dst_node->current) {
1374 dst_node->current->checked = 1;
1375 maybe_free_inode_rec(dst, dst_node->current);
1377 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378 BUG_ON(IS_ERR(dst_node->current));
1383 static void free_inode_ptr(struct cache_extent *cache)
1385 struct ptr_node *node;
1386 struct inode_record *rec;
1388 node = container_of(cache, struct ptr_node, cache);
1390 free_inode_rec(rec);
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1399 struct cache_extent *cache;
1400 struct shared_node *node;
1402 cache = lookup_cache_extent(shared, bytenr, 1);
1404 node = container_of(cache, struct shared_node, cache);
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1413 struct shared_node *node;
1415 node = calloc(1, sizeof(*node));
1418 node->cache.start = bytenr;
1419 node->cache.size = 1;
1420 cache_tree_init(&node->root_cache);
1421 cache_tree_init(&node->inode_cache);
1424 ret = insert_cache_extent(shared, &node->cache);
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430 struct walk_control *wc, int level)
1432 struct shared_node *node;
1433 struct shared_node *dest;
1436 if (level == wc->active_node)
1439 BUG_ON(wc->active_node <= level);
1440 node = find_shared_node(&wc->shared, bytenr);
1442 ret = add_shared_node(&wc->shared, bytenr, refs);
1444 node = find_shared_node(&wc->shared, bytenr);
1445 wc->nodes[level] = node;
1446 wc->active_node = level;
1450 if (wc->root_level == wc->active_node &&
1451 btrfs_root_refs(&root->root_item) == 0) {
1452 if (--node->refs == 0) {
1453 free_inode_recs_tree(&node->root_cache);
1454 free_inode_recs_tree(&node->inode_cache);
1455 remove_cache_extent(&wc->shared, &node->cache);
1461 dest = wc->nodes[wc->active_node];
1462 splice_shared_node(node, dest);
1463 if (node->refs == 0) {
1464 remove_cache_extent(&wc->shared, &node->cache);
1470 static int leave_shared_node(struct btrfs_root *root,
1471 struct walk_control *wc, int level)
1473 struct shared_node *node;
1474 struct shared_node *dest;
1477 if (level == wc->root_level)
1480 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1484 BUG_ON(i >= BTRFS_MAX_LEVEL);
1486 node = wc->nodes[wc->active_node];
1487 wc->nodes[wc->active_node] = NULL;
1488 wc->active_node = i;
1490 dest = wc->nodes[wc->active_node];
1491 if (wc->active_node < wc->root_level ||
1492 btrfs_root_refs(&root->root_item) > 0) {
1493 BUG_ON(node->refs <= 1);
1494 splice_shared_node(node, dest);
1496 BUG_ON(node->refs < 2);
1505 * 1 - if the root with id child_root_id is a child of root parent_root_id
1506 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1507 * has other root(s) as parent(s)
1508 * 2 - if the root child_root_id doesn't have any parent roots
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1513 struct btrfs_path path;
1514 struct btrfs_key key;
1515 struct extent_buffer *leaf;
1519 btrfs_init_path(&path);
1521 key.objectid = parent_root_id;
1522 key.type = BTRFS_ROOT_REF_KEY;
1523 key.offset = child_root_id;
1524 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1528 btrfs_release_path(&path);
1532 key.objectid = child_root_id;
1533 key.type = BTRFS_ROOT_BACKREF_KEY;
1535 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1541 leaf = path.nodes[0];
1542 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1546 leaf = path.nodes[0];
1549 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550 if (key.objectid != child_root_id ||
1551 key.type != BTRFS_ROOT_BACKREF_KEY)
1556 if (key.offset == parent_root_id) {
1557 btrfs_release_path(&path);
1564 btrfs_release_path(&path);
1567 return has_parent ? 0 : 2;
1570 static int process_dir_item(struct extent_buffer *eb,
1571 int slot, struct btrfs_key *key,
1572 struct shared_node *active_node)
1582 struct btrfs_dir_item *di;
1583 struct inode_record *rec;
1584 struct cache_tree *root_cache;
1585 struct cache_tree *inode_cache;
1586 struct btrfs_key location;
1587 char namebuf[BTRFS_NAME_LEN];
1589 root_cache = &active_node->root_cache;
1590 inode_cache = &active_node->inode_cache;
1591 rec = active_node->current;
1592 rec->found_dir_item = 1;
1594 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595 total = btrfs_item_size_nr(eb, slot);
1596 while (cur < total) {
1598 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599 name_len = btrfs_dir_name_len(eb, di);
1600 data_len = btrfs_dir_data_len(eb, di);
1601 filetype = btrfs_dir_type(eb, di);
1603 rec->found_size += name_len;
1604 if (cur + sizeof(*di) + name_len > total ||
1605 name_len > BTRFS_NAME_LEN) {
1606 error = REF_ERR_NAME_TOO_LONG;
1608 if (cur + sizeof(*di) > total)
1610 len = min_t(u32, total - cur - sizeof(*di),
1617 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620 key->offset != btrfs_name_hash(namebuf, len)) {
1621 rec->errors |= I_ERR_ODD_DIR_ITEM;
1622 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623 key->objectid, key->offset, namebuf, len, filetype,
1624 key->offset, btrfs_name_hash(namebuf, len));
1627 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628 add_inode_backref(inode_cache, location.objectid,
1629 key->objectid, key->offset, namebuf,
1630 len, filetype, key->type, error);
1631 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632 add_inode_backref(root_cache, location.objectid,
1633 key->objectid, key->offset,
1634 namebuf, len, filetype,
1637 fprintf(stderr, "invalid location in dir item %u\n",
1639 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640 key->objectid, key->offset, namebuf,
1641 len, filetype, key->type, error);
1644 len = sizeof(*di) + name_len + data_len;
1645 di = (struct btrfs_dir_item *)((char *)di + len);
1648 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649 rec->errors |= I_ERR_DUP_DIR_INDEX;
1654 static int process_inode_ref(struct extent_buffer *eb,
1655 int slot, struct btrfs_key *key,
1656 struct shared_node *active_node)
1664 struct cache_tree *inode_cache;
1665 struct btrfs_inode_ref *ref;
1666 char namebuf[BTRFS_NAME_LEN];
1668 inode_cache = &active_node->inode_cache;
1670 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671 total = btrfs_item_size_nr(eb, slot);
1672 while (cur < total) {
1673 name_len = btrfs_inode_ref_name_len(eb, ref);
1674 index = btrfs_inode_ref_index(eb, ref);
1676 /* inode_ref + namelen should not cross item boundary */
1677 if (cur + sizeof(*ref) + name_len > total ||
1678 name_len > BTRFS_NAME_LEN) {
1679 if (total < cur + sizeof(*ref))
1682 /* Still try to read out the remaining part */
1683 len = min_t(u32, total - cur - sizeof(*ref),
1685 error = REF_ERR_NAME_TOO_LONG;
1691 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692 add_inode_backref(inode_cache, key->objectid, key->offset,
1693 index, namebuf, len, 0, key->type, error);
1695 len = sizeof(*ref) + name_len;
1696 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1702 static int process_inode_extref(struct extent_buffer *eb,
1703 int slot, struct btrfs_key *key,
1704 struct shared_node *active_node)
1713 struct cache_tree *inode_cache;
1714 struct btrfs_inode_extref *extref;
1715 char namebuf[BTRFS_NAME_LEN];
1717 inode_cache = &active_node->inode_cache;
1719 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720 total = btrfs_item_size_nr(eb, slot);
1721 while (cur < total) {
1722 name_len = btrfs_inode_extref_name_len(eb, extref);
1723 index = btrfs_inode_extref_index(eb, extref);
1724 parent = btrfs_inode_extref_parent(eb, extref);
1725 if (name_len <= BTRFS_NAME_LEN) {
1729 len = BTRFS_NAME_LEN;
1730 error = REF_ERR_NAME_TOO_LONG;
1732 read_extent_buffer(eb, namebuf,
1733 (unsigned long)(extref + 1), len);
1734 add_inode_backref(inode_cache, key->objectid, parent,
1735 index, namebuf, len, 0, key->type, error);
1737 len = sizeof(*extref) + name_len;
1738 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746 u64 len, u64 *found)
1748 struct btrfs_key key;
1749 struct btrfs_path path;
1750 struct extent_buffer *leaf;
1755 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1757 btrfs_init_path(&path);
1759 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761 key.type = BTRFS_EXTENT_CSUM_KEY;
1763 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1767 if (ret > 0 && path.slots[0] > 0) {
1768 leaf = path.nodes[0];
1769 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771 key.type == BTRFS_EXTENT_CSUM_KEY)
1776 leaf = path.nodes[0];
1777 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1783 leaf = path.nodes[0];
1786 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788 key.type != BTRFS_EXTENT_CSUM_KEY)
1791 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792 if (key.offset >= start + len)
1795 if (key.offset > start)
1798 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799 csum_end = key.offset + (size / csum_size) *
1800 root->fs_info->sectorsize;
1801 if (csum_end > start) {
1802 size = min(csum_end - start, len);
1811 btrfs_release_path(&path);
1817 static int process_file_extent(struct btrfs_root *root,
1818 struct extent_buffer *eb,
1819 int slot, struct btrfs_key *key,
1820 struct shared_node *active_node)
1822 struct inode_record *rec;
1823 struct btrfs_file_extent_item *fi;
1825 u64 disk_bytenr = 0;
1826 u64 extent_offset = 0;
1827 u64 mask = root->fs_info->sectorsize - 1;
1831 rec = active_node->current;
1832 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833 rec->found_file_extent = 1;
1835 if (rec->extent_start == (u64)-1) {
1836 rec->extent_start = key->offset;
1837 rec->extent_end = key->offset;
1840 if (rec->extent_end > key->offset)
1841 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842 else if (rec->extent_end < key->offset) {
1843 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844 key->offset - rec->extent_end);
1849 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850 extent_type = btrfs_file_extent_type(eb, fi);
1852 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856 rec->found_size += num_bytes;
1857 num_bytes = (num_bytes + mask) & ~mask;
1858 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862 extent_offset = btrfs_file_extent_offset(eb, fi);
1863 if (num_bytes == 0 || (num_bytes & mask))
1864 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865 if (num_bytes + extent_offset >
1866 btrfs_file_extent_ram_bytes(eb, fi))
1867 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869 (btrfs_file_extent_compression(eb, fi) ||
1870 btrfs_file_extent_encryption(eb, fi) ||
1871 btrfs_file_extent_other_encoding(eb, fi)))
1872 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873 if (disk_bytenr > 0)
1874 rec->found_size += num_bytes;
1876 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1878 rec->extent_end = key->offset + num_bytes;
1881 * The data reloc tree will copy full extents into its inode and then
1882 * copy the corresponding csums. Because the extent it copied could be
1883 * a preallocated extent that hasn't been written to yet there may be no
1884 * csums to copy, ergo we won't have csums for our file extent. This is
1885 * ok so just don't bother checking csums if the inode belongs to the
1888 if (disk_bytenr > 0 &&
1889 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891 if (btrfs_file_extent_compression(eb, fi))
1892 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894 disk_bytenr += extent_offset;
1896 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1899 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901 rec->found_csum_item = 1;
1902 if (found < num_bytes)
1903 rec->some_csum_missing = 1;
1904 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913 struct walk_control *wc)
1915 struct btrfs_key key;
1919 struct cache_tree *inode_cache;
1920 struct shared_node *active_node;
1922 if (wc->root_level == wc->active_node &&
1923 btrfs_root_refs(&root->root_item) == 0)
1926 active_node = wc->nodes[wc->active_node];
1927 inode_cache = &active_node->inode_cache;
1928 nritems = btrfs_header_nritems(eb);
1929 for (i = 0; i < nritems; i++) {
1930 btrfs_item_key_to_cpu(eb, &key, i);
1932 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1937 if (active_node->current == NULL ||
1938 active_node->current->ino < key.objectid) {
1939 if (active_node->current) {
1940 active_node->current->checked = 1;
1941 maybe_free_inode_rec(inode_cache,
1942 active_node->current);
1944 active_node->current = get_inode_rec(inode_cache,
1946 BUG_ON(IS_ERR(active_node->current));
1949 case BTRFS_DIR_ITEM_KEY:
1950 case BTRFS_DIR_INDEX_KEY:
1951 ret = process_dir_item(eb, i, &key, active_node);
1953 case BTRFS_INODE_REF_KEY:
1954 ret = process_inode_ref(eb, i, &key, active_node);
1956 case BTRFS_INODE_EXTREF_KEY:
1957 ret = process_inode_extref(eb, i, &key, active_node);
1959 case BTRFS_INODE_ITEM_KEY:
1960 ret = process_inode_item(eb, i, &key, active_node);
1962 case BTRFS_EXTENT_DATA_KEY:
1963 ret = process_file_extent(root, eb, i, &key,
1974 u64 bytenr[BTRFS_MAX_LEVEL];
1975 u64 refs[BTRFS_MAX_LEVEL];
1976 int need_check[BTRFS_MAX_LEVEL];
1977 /* field for checking all trees */
1978 int checked[BTRFS_MAX_LEVEL];
1979 /* the corresponding extent should be marked as full backref or not */
1980 int full_backref[BTRFS_MAX_LEVEL];
1983 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1984 struct extent_buffer *eb, struct node_refs *nrefs,
1985 u64 level, int check_all);
1986 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1987 unsigned int ext_ref);
1990 * Returns >0 Found error, not fatal, should continue
1991 * Returns <0 Fatal error, must exit the whole check
1992 * Returns 0 No errors found
1994 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1995 struct node_refs *nrefs, int *level, int ext_ref)
1997 struct extent_buffer *cur = path->nodes[0];
1998 struct btrfs_key key;
2002 int root_level = btrfs_header_level(root->node);
2004 int ret = 0; /* Final return value */
2005 int err = 0; /* Positive error bitmap */
2007 cur_bytenr = cur->start;
2009 /* skip to first inode item or the first inode number change */
2010 nritems = btrfs_header_nritems(cur);
2011 for (i = 0; i < nritems; i++) {
2012 btrfs_item_key_to_cpu(cur, &key, i);
2014 first_ino = key.objectid;
2015 if (key.type == BTRFS_INODE_ITEM_KEY ||
2016 (first_ino && first_ino != key.objectid))
2020 path->slots[0] = nritems;
2026 err |= check_inode_item(root, path, ext_ref);
2028 /* modify cur since check_inode_item may change path */
2029 cur = path->nodes[0];
2031 if (err & LAST_ITEM)
2034 /* still have inode items in thie leaf */
2035 if (cur->start == cur_bytenr)
2039 * we have switched to another leaf, above nodes may
2040 * have changed, here walk down the path, if a node
2041 * or leaf is shared, check whether we can skip this
2044 for (i = root_level; i >= 0; i--) {
2045 if (path->nodes[i]->start == nrefs->bytenr[i])
2048 ret = update_nodes_refs(root, path->nodes[i]->start,
2049 path->nodes[i], nrefs, i, 0);
2053 if (!nrefs->need_check[i]) {
2059 for (i = 0; i < *level; i++) {
2060 free_extent_buffer(path->nodes[i]);
2061 path->nodes[i] = NULL;
2070 static void reada_walk_down(struct btrfs_root *root,
2071 struct extent_buffer *node, int slot)
2073 struct btrfs_fs_info *fs_info = root->fs_info;
2080 level = btrfs_header_level(node);
2084 nritems = btrfs_header_nritems(node);
2085 for (i = slot; i < nritems; i++) {
2086 bytenr = btrfs_node_blockptr(node, i);
2087 ptr_gen = btrfs_node_ptr_generation(node, i);
2088 readahead_tree_block(fs_info, bytenr, ptr_gen);
2093 * Check the child node/leaf by the following condition:
2094 * 1. the first item key of the node/leaf should be the same with the one
2096 * 2. block in parent node should match the child node/leaf.
2097 * 3. generation of parent node and child's header should be consistent.
2099 * Or the child node/leaf pointed by the key in parent is not valid.
2101 * We hope to check leaf owner too, but since subvol may share leaves,
2102 * which makes leaf owner check not so strong, key check should be
2103 * sufficient enough for that case.
2105 static int check_child_node(struct extent_buffer *parent, int slot,
2106 struct extent_buffer *child)
2108 struct btrfs_key parent_key;
2109 struct btrfs_key child_key;
2112 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2113 if (btrfs_header_level(child) == 0)
2114 btrfs_item_key_to_cpu(child, &child_key, 0);
2116 btrfs_node_key_to_cpu(child, &child_key, 0);
2118 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2121 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2122 parent_key.objectid, parent_key.type, parent_key.offset,
2123 child_key.objectid, child_key.type, child_key.offset);
2125 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2127 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2128 btrfs_node_blockptr(parent, slot),
2129 btrfs_header_bytenr(child));
2131 if (btrfs_node_ptr_generation(parent, slot) !=
2132 btrfs_header_generation(child)) {
2134 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2135 btrfs_header_generation(child),
2136 btrfs_node_ptr_generation(parent, slot));
2142 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2143 * in every fs or file tree check. Here we find its all root ids, and only check
2144 * it in the fs or file tree which has the smallest root id.
2146 static int need_check(struct btrfs_root *root, struct ulist *roots)
2148 struct rb_node *node;
2149 struct ulist_node *u;
2151 if (roots->nnodes == 1)
2154 node = rb_first(&roots->root);
2155 u = rb_entry(node, struct ulist_node, rb_node);
2157 * current root id is not smallest, we skip it and let it be checked
2158 * in the fs or file tree who hash the smallest root id.
2160 if (root->objectid != u->val)
2166 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2169 struct btrfs_root *extent_root = root->fs_info->extent_root;
2170 struct btrfs_root_item *ri = &root->root_item;
2171 struct btrfs_extent_inline_ref *iref;
2172 struct btrfs_extent_item *ei;
2173 struct btrfs_key key;
2174 struct btrfs_path *path = NULL;
2185 * Except file/reloc tree, we can not have FULL BACKREF MODE
2187 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2191 if (eb->start == btrfs_root_bytenr(ri))
2194 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2197 owner = btrfs_header_owner(eb);
2198 if (owner == root->objectid)
2201 path = btrfs_alloc_path();
2205 key.objectid = btrfs_header_bytenr(eb);
2207 key.offset = (u64)-1;
2209 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2216 ret = btrfs_previous_extent_item(extent_root, path,
2222 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2224 eb = path->nodes[0];
2225 slot = path->slots[0];
2226 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2228 flags = btrfs_extent_flags(eb, ei);
2229 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2232 ptr = (unsigned long)(ei + 1);
2233 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2235 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2236 ptr += sizeof(struct btrfs_tree_block_info);
2239 /* Reached extent item ends normally */
2243 /* Beyond extent item end, wrong item size */
2245 error("extent item at bytenr %llu slot %d has wrong size",
2250 iref = (struct btrfs_extent_inline_ref *)ptr;
2251 offset = btrfs_extent_inline_ref_offset(eb, iref);
2252 type = btrfs_extent_inline_ref_type(eb, iref);
2254 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2256 ptr += btrfs_extent_inline_ref_size(type);
2260 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2264 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2266 btrfs_free_path(path);
2271 * for a tree node or leaf, we record its reference count, so later if we still
2272 * process this node or leaf, don't need to compute its reference count again.
2274 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2276 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2277 struct extent_buffer *eb, struct node_refs *nrefs,
2278 u64 level, int check_all)
2280 struct ulist *roots;
2283 int root_level = btrfs_header_level(root->node);
2287 if (nrefs->bytenr[level] == bytenr)
2290 if (bytenr != (u64)-1) {
2291 /* the return value of this function seems a mistake */
2292 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2293 level, 1, &refs, &flags);
2295 if (ret < 0 && !check_all)
2298 nrefs->bytenr[level] = bytenr;
2299 nrefs->refs[level] = refs;
2300 nrefs->full_backref[level] = 0;
2301 nrefs->checked[level] = 0;
2304 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2309 check = need_check(root, roots);
2311 nrefs->need_check[level] = check;
2314 nrefs->need_check[level] = 1;
2316 if (level == root_level) {
2317 nrefs->need_check[level] = 1;
2320 * The node refs may have not been
2321 * updated if upper needs checking (the
2322 * lowest root_objectid) the node can
2325 nrefs->need_check[level] =
2326 nrefs->need_check[level + 1];
2332 if (check_all && eb) {
2333 calc_extent_flag_v2(root, eb, &flags);
2334 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2335 nrefs->full_backref[level] = 1;
2342 * @level if @level == -1 means extent data item
2343 * else normal treeblocl.
2345 static int should_check_extent_strictly(struct btrfs_root *root,
2346 struct node_refs *nrefs, int level)
2348 int root_level = btrfs_header_level(root->node);
2350 if (level > root_level || level < -1)
2352 if (level == root_level)
2355 * if the upper node is marked full backref, it should contain shared
2356 * backref of the parent (except owner == root->objectid).
2358 while (++level <= root_level)
2359 if (nrefs->refs[level] > 1)
2365 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2366 struct walk_control *wc, int *level,
2367 struct node_refs *nrefs)
2369 enum btrfs_tree_block_status status;
2372 struct btrfs_fs_info *fs_info = root->fs_info;
2373 struct extent_buffer *next;
2374 struct extent_buffer *cur;
2378 WARN_ON(*level < 0);
2379 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2381 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2382 refs = nrefs->refs[*level];
2385 ret = btrfs_lookup_extent_info(NULL, root,
2386 path->nodes[*level]->start,
2387 *level, 1, &refs, NULL);
2392 nrefs->bytenr[*level] = path->nodes[*level]->start;
2393 nrefs->refs[*level] = refs;
2397 ret = enter_shared_node(root, path->nodes[*level]->start,
2405 while (*level >= 0) {
2406 WARN_ON(*level < 0);
2407 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2408 cur = path->nodes[*level];
2410 if (btrfs_header_level(cur) != *level)
2413 if (path->slots[*level] >= btrfs_header_nritems(cur))
2416 ret = process_one_leaf(root, cur, wc);
2421 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2422 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2424 if (bytenr == nrefs->bytenr[*level - 1]) {
2425 refs = nrefs->refs[*level - 1];
2427 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2428 *level - 1, 1, &refs, NULL);
2432 nrefs->bytenr[*level - 1] = bytenr;
2433 nrefs->refs[*level - 1] = refs;
2438 ret = enter_shared_node(root, bytenr, refs,
2441 path->slots[*level]++;
2446 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2447 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2448 free_extent_buffer(next);
2449 reada_walk_down(root, cur, path->slots[*level]);
2450 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2451 if (!extent_buffer_uptodate(next)) {
2452 struct btrfs_key node_key;
2454 btrfs_node_key_to_cpu(path->nodes[*level],
2456 path->slots[*level]);
2457 btrfs_add_corrupt_extent_record(root->fs_info,
2459 path->nodes[*level]->start,
2460 root->fs_info->nodesize,
2467 ret = check_child_node(cur, path->slots[*level], next);
2469 free_extent_buffer(next);
2474 if (btrfs_is_leaf(next))
2475 status = btrfs_check_leaf(root, NULL, next);
2477 status = btrfs_check_node(root, NULL, next);
2478 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2479 free_extent_buffer(next);
2484 *level = *level - 1;
2485 free_extent_buffer(path->nodes[*level]);
2486 path->nodes[*level] = next;
2487 path->slots[*level] = 0;
2490 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2494 static int fs_root_objectid(u64 objectid);
2497 * Update global fs information.
2499 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2503 struct extent_buffer *eb = path->nodes[level];
2505 total_btree_bytes += eb->len;
2506 if (fs_root_objectid(root->objectid))
2507 total_fs_tree_bytes += eb->len;
2508 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2509 total_extent_tree_bytes += eb->len;
2512 btree_space_waste += btrfs_leaf_free_space(root, eb);
2514 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2515 btrfs_header_nritems(eb));
2516 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2520 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2521 unsigned int ext_ref);
2522 static int check_tree_block_ref(struct btrfs_root *root,
2523 struct extent_buffer *eb, u64 bytenr,
2524 int level, u64 owner, struct node_refs *nrefs);
2525 static int check_leaf_items(struct btrfs_trans_handle *trans,
2526 struct btrfs_root *root, struct btrfs_path *path,
2527 struct node_refs *nrefs, int account_bytes);
2530 * @trans just for lowmem repair mode
2531 * @check all if not 0 then check all tree block backrefs and items
2532 * 0 then just check relationship of items in fs tree(s)
2534 * Returns >0 Found error, should continue
2535 * Returns <0 Fatal error, must exit the whole check
2536 * Returns 0 No errors found
2538 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2539 struct btrfs_root *root, struct btrfs_path *path,
2540 int *level, struct node_refs *nrefs, int ext_ref,
2544 enum btrfs_tree_block_status status;
2547 struct btrfs_fs_info *fs_info = root->fs_info;
2548 struct extent_buffer *next;
2549 struct extent_buffer *cur;
2553 int account_file_data = 0;
2555 WARN_ON(*level < 0);
2556 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2558 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2559 path->nodes[*level], nrefs, *level, check_all);
2563 while (*level >= 0) {
2564 WARN_ON(*level < 0);
2565 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2566 cur = path->nodes[*level];
2567 bytenr = btrfs_header_bytenr(cur);
2568 check = nrefs->need_check[*level];
2570 if (btrfs_header_level(cur) != *level)
2573 * Update bytes accounting and check tree block ref
2574 * NOTE: Doing accounting and check before checking nritems
2575 * is necessary because of empty node/leaf.
2577 if ((check_all && !nrefs->checked[*level]) ||
2578 (!check_all && nrefs->need_check[*level])) {
2579 ret = check_tree_block_ref(root, cur,
2580 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2581 btrfs_header_owner(cur), nrefs);
2584 if (check_all && nrefs->need_check[*level] &&
2585 nrefs->refs[*level]) {
2586 account_bytes(root, path, *level);
2587 account_file_data = 1;
2589 nrefs->checked[*level] = 1;
2592 if (path->slots[*level] >= btrfs_header_nritems(cur))
2595 /* Don't forgot to check leaf/node validation */
2597 /* skip duplicate check */
2598 if (check || !check_all) {
2599 ret = btrfs_check_leaf(root, NULL, cur);
2600 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2608 ret = process_one_leaf_v2(root, path, nrefs,
2611 ret = check_leaf_items(trans, root, path,
2612 nrefs, account_file_data);
2616 if (check || !check_all) {
2617 ret = btrfs_check_node(root, NULL, cur);
2618 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2625 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2626 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2628 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2633 * check all trees in check_chunks_and_extent_v2
2634 * check shared node once in check_fs_roots
2636 if (!check_all && !nrefs->need_check[*level - 1]) {
2637 path->slots[*level]++;
2641 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2642 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2643 free_extent_buffer(next);
2644 reada_walk_down(root, cur, path->slots[*level]);
2645 next = read_tree_block(fs_info, bytenr, ptr_gen);
2646 if (!extent_buffer_uptodate(next)) {
2647 struct btrfs_key node_key;
2649 btrfs_node_key_to_cpu(path->nodes[*level],
2651 path->slots[*level]);
2652 btrfs_add_corrupt_extent_record(fs_info,
2653 &node_key, path->nodes[*level]->start,
2654 fs_info->nodesize, *level);
2660 ret = check_child_node(cur, path->slots[*level], next);
2665 if (btrfs_is_leaf(next))
2666 status = btrfs_check_leaf(root, NULL, next);
2668 status = btrfs_check_node(root, NULL, next);
2669 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2670 free_extent_buffer(next);
2675 *level = *level - 1;
2676 free_extent_buffer(path->nodes[*level]);
2677 path->nodes[*level] = next;
2678 path->slots[*level] = 0;
2679 account_file_data = 0;
2681 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2686 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2687 struct walk_control *wc, int *level)
2690 struct extent_buffer *leaf;
2692 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2693 leaf = path->nodes[i];
2694 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2699 free_extent_buffer(path->nodes[*level]);
2700 path->nodes[*level] = NULL;
2701 BUG_ON(*level > wc->active_node);
2702 if (*level == wc->active_node)
2703 leave_shared_node(root, wc, *level);
2710 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2714 struct extent_buffer *leaf;
2716 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2717 leaf = path->nodes[i];
2718 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2723 free_extent_buffer(path->nodes[*level]);
2724 path->nodes[*level] = NULL;
2731 static int check_root_dir(struct inode_record *rec)
2733 struct inode_backref *backref;
2736 if (!rec->found_inode_item || rec->errors)
2738 if (rec->nlink != 1 || rec->found_link != 0)
2740 if (list_empty(&rec->backrefs))
2742 backref = to_inode_backref(rec->backrefs.next);
2743 if (!backref->found_inode_ref)
2745 if (backref->index != 0 || backref->namelen != 2 ||
2746 memcmp(backref->name, "..", 2))
2748 if (backref->found_dir_index || backref->found_dir_item)
2755 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2756 struct btrfs_root *root, struct btrfs_path *path,
2757 struct inode_record *rec)
2759 struct btrfs_inode_item *ei;
2760 struct btrfs_key key;
2763 key.objectid = rec->ino;
2764 key.type = BTRFS_INODE_ITEM_KEY;
2765 key.offset = (u64)-1;
2767 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2771 if (!path->slots[0]) {
2778 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2779 if (key.objectid != rec->ino) {
2784 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2785 struct btrfs_inode_item);
2786 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2787 btrfs_mark_buffer_dirty(path->nodes[0]);
2788 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2789 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2790 root->root_key.objectid);
2792 btrfs_release_path(path);
2796 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2797 struct btrfs_root *root,
2798 struct btrfs_path *path,
2799 struct inode_record *rec)
2803 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2804 btrfs_release_path(path);
2806 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2810 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2811 struct btrfs_root *root,
2812 struct btrfs_path *path,
2813 struct inode_record *rec)
2815 struct btrfs_inode_item *ei;
2816 struct btrfs_key key;
2819 key.objectid = rec->ino;
2820 key.type = BTRFS_INODE_ITEM_KEY;
2823 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2830 /* Since ret == 0, no need to check anything */
2831 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832 struct btrfs_inode_item);
2833 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2834 btrfs_mark_buffer_dirty(path->nodes[0]);
2835 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2836 printf("reset nbytes for ino %llu root %llu\n",
2837 rec->ino, root->root_key.objectid);
2839 btrfs_release_path(path);
2843 static int add_missing_dir_index(struct btrfs_root *root,
2844 struct cache_tree *inode_cache,
2845 struct inode_record *rec,
2846 struct inode_backref *backref)
2848 struct btrfs_path path;
2849 struct btrfs_trans_handle *trans;
2850 struct btrfs_dir_item *dir_item;
2851 struct extent_buffer *leaf;
2852 struct btrfs_key key;
2853 struct btrfs_disk_key disk_key;
2854 struct inode_record *dir_rec;
2855 unsigned long name_ptr;
2856 u32 data_size = sizeof(*dir_item) + backref->namelen;
2859 trans = btrfs_start_transaction(root, 1);
2861 return PTR_ERR(trans);
2863 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2864 (unsigned long long)rec->ino);
2866 btrfs_init_path(&path);
2867 key.objectid = backref->dir;
2868 key.type = BTRFS_DIR_INDEX_KEY;
2869 key.offset = backref->index;
2870 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2873 leaf = path.nodes[0];
2874 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2876 disk_key.objectid = cpu_to_le64(rec->ino);
2877 disk_key.type = BTRFS_INODE_ITEM_KEY;
2878 disk_key.offset = 0;
2880 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2881 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2882 btrfs_set_dir_data_len(leaf, dir_item, 0);
2883 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2884 name_ptr = (unsigned long)(dir_item + 1);
2885 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2886 btrfs_mark_buffer_dirty(leaf);
2887 btrfs_release_path(&path);
2888 btrfs_commit_transaction(trans, root);
2890 backref->found_dir_index = 1;
2891 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2892 BUG_ON(IS_ERR(dir_rec));
2895 dir_rec->found_size += backref->namelen;
2896 if (dir_rec->found_size == dir_rec->isize &&
2897 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2898 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2899 if (dir_rec->found_size != dir_rec->isize)
2900 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2905 static int delete_dir_index(struct btrfs_root *root,
2906 struct inode_backref *backref)
2908 struct btrfs_trans_handle *trans;
2909 struct btrfs_dir_item *di;
2910 struct btrfs_path path;
2913 trans = btrfs_start_transaction(root, 1);
2915 return PTR_ERR(trans);
2917 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2918 (unsigned long long)backref->dir,
2919 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2920 (unsigned long long)root->objectid);
2922 btrfs_init_path(&path);
2923 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2924 backref->name, backref->namelen,
2925 backref->index, -1);
2928 btrfs_release_path(&path);
2929 btrfs_commit_transaction(trans, root);
2936 ret = btrfs_del_item(trans, root, &path);
2938 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2940 btrfs_release_path(&path);
2941 btrfs_commit_transaction(trans, root);
2945 static int __create_inode_item(struct btrfs_trans_handle *trans,
2946 struct btrfs_root *root, u64 ino, u64 size,
2947 u64 nbytes, u64 nlink, u32 mode)
2949 struct btrfs_inode_item ii;
2950 time_t now = time(NULL);
2953 btrfs_set_stack_inode_size(&ii, size);
2954 btrfs_set_stack_inode_nbytes(&ii, nbytes);
2955 btrfs_set_stack_inode_nlink(&ii, nlink);
2956 btrfs_set_stack_inode_mode(&ii, mode);
2957 btrfs_set_stack_inode_generation(&ii, trans->transid);
2958 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2959 btrfs_set_stack_timespec_sec(&ii.ctime, now);
2960 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2961 btrfs_set_stack_timespec_sec(&ii.mtime, now);
2962 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2963 btrfs_set_stack_timespec_sec(&ii.otime, 0);
2964 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2966 ret = btrfs_insert_inode(trans, root, ino, &ii);
2969 warning("root %llu inode %llu recreating inode item, this may "
2970 "be incomplete, please check permissions and content after "
2971 "the fsck completes.\n", (unsigned long long)root->objectid,
2972 (unsigned long long)ino);
2977 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2978 struct btrfs_root *root, u64 ino,
2981 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2983 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2986 static int create_inode_item(struct btrfs_root *root,
2987 struct inode_record *rec, int root_dir)
2989 struct btrfs_trans_handle *trans;
2995 trans = btrfs_start_transaction(root, 1);
2996 if (IS_ERR(trans)) {
2997 ret = PTR_ERR(trans);
3001 nlink = root_dir ? 1 : rec->found_link;
3002 if (rec->found_dir_item) {
3003 if (rec->found_file_extent)
3004 fprintf(stderr, "root %llu inode %llu has both a dir "
3005 "item and extents, unsure if it is a dir or a "
3006 "regular file so setting it as a directory\n",
3007 (unsigned long long)root->objectid,
3008 (unsigned long long)rec->ino);
3009 mode = S_IFDIR | 0755;
3010 size = rec->found_size;
3011 } else if (!rec->found_dir_item) {
3012 size = rec->extent_end;
3013 mode = S_IFREG | 0755;
3016 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3018 btrfs_commit_transaction(trans, root);
3022 static int repair_inode_backrefs(struct btrfs_root *root,
3023 struct inode_record *rec,
3024 struct cache_tree *inode_cache,
3027 struct inode_backref *tmp, *backref;
3028 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3032 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3033 if (!delete && rec->ino == root_dirid) {
3034 if (!rec->found_inode_item) {
3035 ret = create_inode_item(root, rec, 1);
3042 /* Index 0 for root dir's are special, don't mess with it */
3043 if (rec->ino == root_dirid && backref->index == 0)
3047 ((backref->found_dir_index && !backref->found_inode_ref) ||
3048 (backref->found_dir_index && backref->found_inode_ref &&
3049 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3050 ret = delete_dir_index(root, backref);
3054 list_del(&backref->list);
3059 if (!delete && !backref->found_dir_index &&
3060 backref->found_dir_item && backref->found_inode_ref) {
3061 ret = add_missing_dir_index(root, inode_cache, rec,
3066 if (backref->found_dir_item &&
3067 backref->found_dir_index) {
3068 if (!backref->errors &&
3069 backref->found_inode_ref) {
3070 list_del(&backref->list);
3077 if (!delete && (!backref->found_dir_index &&
3078 !backref->found_dir_item &&
3079 backref->found_inode_ref)) {
3080 struct btrfs_trans_handle *trans;
3081 struct btrfs_key location;
3083 ret = check_dir_conflict(root, backref->name,
3089 * let nlink fixing routine to handle it,
3090 * which can do it better.
3095 location.objectid = rec->ino;
3096 location.type = BTRFS_INODE_ITEM_KEY;
3097 location.offset = 0;
3099 trans = btrfs_start_transaction(root, 1);
3100 if (IS_ERR(trans)) {
3101 ret = PTR_ERR(trans);
3104 fprintf(stderr, "adding missing dir index/item pair "
3106 (unsigned long long)rec->ino);
3107 ret = btrfs_insert_dir_item(trans, root, backref->name,
3109 backref->dir, &location,
3110 imode_to_type(rec->imode),
3113 btrfs_commit_transaction(trans, root);
3117 if (!delete && (backref->found_inode_ref &&
3118 backref->found_dir_index &&
3119 backref->found_dir_item &&
3120 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3121 !rec->found_inode_item)) {
3122 ret = create_inode_item(root, rec, 0);
3129 return ret ? ret : repaired;
3133 * To determine the file type for nlink/inode_item repair
3135 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3136 * Return -ENOENT if file type is not found.
3138 static int find_file_type(struct inode_record *rec, u8 *type)
3140 struct inode_backref *backref;
3142 /* For inode item recovered case */
3143 if (rec->found_inode_item) {
3144 *type = imode_to_type(rec->imode);
3148 list_for_each_entry(backref, &rec->backrefs, list) {
3149 if (backref->found_dir_index || backref->found_dir_item) {
3150 *type = backref->filetype;
3158 * To determine the file name for nlink repair
3160 * Return 0 if file name is found, set name and namelen.
3161 * Return -ENOENT if file name is not found.
3163 static int find_file_name(struct inode_record *rec,
3164 char *name, int *namelen)
3166 struct inode_backref *backref;
3168 list_for_each_entry(backref, &rec->backrefs, list) {
3169 if (backref->found_dir_index || backref->found_dir_item ||
3170 backref->found_inode_ref) {
3171 memcpy(name, backref->name, backref->namelen);
3172 *namelen = backref->namelen;
3179 /* Reset the nlink of the inode to the correct one */
3180 static int reset_nlink(struct btrfs_trans_handle *trans,
3181 struct btrfs_root *root,
3182 struct btrfs_path *path,
3183 struct inode_record *rec)
3185 struct inode_backref *backref;
3186 struct inode_backref *tmp;
3187 struct btrfs_key key;
3188 struct btrfs_inode_item *inode_item;
3191 /* We don't believe this either, reset it and iterate backref */
3192 rec->found_link = 0;
3194 /* Remove all backref including the valid ones */
3195 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3196 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3197 backref->index, backref->name,
3198 backref->namelen, 0);
3202 /* remove invalid backref, so it won't be added back */
3203 if (!(backref->found_dir_index &&
3204 backref->found_dir_item &&
3205 backref->found_inode_ref)) {
3206 list_del(&backref->list);
3213 /* Set nlink to 0 */
3214 key.objectid = rec->ino;
3215 key.type = BTRFS_INODE_ITEM_KEY;
3217 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3224 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3225 struct btrfs_inode_item);
3226 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3227 btrfs_mark_buffer_dirty(path->nodes[0]);
3228 btrfs_release_path(path);
3231 * Add back valid inode_ref/dir_item/dir_index,
3232 * add_link() will handle the nlink inc, so new nlink must be correct
3234 list_for_each_entry(backref, &rec->backrefs, list) {
3235 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3236 backref->name, backref->namelen,
3237 backref->filetype, &backref->index, 1, 0);
3242 btrfs_release_path(path);
3246 static int get_highest_inode(struct btrfs_trans_handle *trans,
3247 struct btrfs_root *root,
3248 struct btrfs_path *path,
3251 struct btrfs_key key, found_key;
3254 btrfs_init_path(path);
3255 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3257 key.type = BTRFS_INODE_ITEM_KEY;
3258 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3260 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3261 path->slots[0] - 1);
3262 *highest_ino = found_key.objectid;
3265 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3267 btrfs_release_path(path);
3272 * Link inode to dir 'lost+found'. Increase @ref_count.
3274 * Returns 0 means success.
3275 * Returns <0 means failure.
3277 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3278 struct btrfs_root *root,
3279 struct btrfs_path *path,
3280 u64 ino, char *namebuf, u32 name_len,
3281 u8 filetype, u64 *ref_count)
3283 char *dir_name = "lost+found";
3288 btrfs_release_path(path);
3289 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3294 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3295 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3298 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3301 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3302 namebuf, name_len, filetype, NULL, 1, 0);
3304 * Add ".INO" suffix several times to handle case where
3305 * "FILENAME.INO" is already taken by another file.
3307 while (ret == -EEXIST) {
3309 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3311 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3315 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3317 name_len += count_digits(ino) + 1;
3318 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3319 name_len, filetype, NULL, 1, 0);
3322 error("failed to link the inode %llu to %s dir: %s",
3323 ino, dir_name, strerror(-ret));
3328 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3329 name_len, namebuf, dir_name);
3331 btrfs_release_path(path);
3333 error("failed to move file '%.*s' to '%s' dir", name_len,
3338 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3339 struct btrfs_root *root,
3340 struct btrfs_path *path,
3341 struct inode_record *rec)
3343 char namebuf[BTRFS_NAME_LEN] = {0};
3346 int name_recovered = 0;
3347 int type_recovered = 0;
3351 * Get file name and type first before these invalid inode ref
3352 * are deleted by remove_all_invalid_backref()
3354 name_recovered = !find_file_name(rec, namebuf, &namelen);
3355 type_recovered = !find_file_type(rec, &type);
3357 if (!name_recovered) {
3358 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3359 rec->ino, rec->ino);
3360 namelen = count_digits(rec->ino);
3361 sprintf(namebuf, "%llu", rec->ino);
3364 if (!type_recovered) {
3365 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3367 type = BTRFS_FT_REG_FILE;
3371 ret = reset_nlink(trans, root, path, rec);
3374 "Failed to reset nlink for inode %llu: %s\n",
3375 rec->ino, strerror(-ret));
3379 if (rec->found_link == 0) {
3380 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3381 namebuf, namelen, type,
3382 (u64 *)&rec->found_link);
3386 printf("Fixed the nlink of inode %llu\n", rec->ino);
3389 * Clear the flag anyway, or we will loop forever for the same inode
3390 * as it will not be removed from the bad inode list and the dead loop
3393 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3394 btrfs_release_path(path);
3399 * Check if there is any normal(reg or prealloc) file extent for given
3401 * This is used to determine the file type when neither its dir_index/item or
3402 * inode_item exists.
3404 * This will *NOT* report error, if any error happens, just consider it does
3405 * not have any normal file extent.
3407 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3409 struct btrfs_path path;
3410 struct btrfs_key key;
3411 struct btrfs_key found_key;
3412 struct btrfs_file_extent_item *fi;
3416 btrfs_init_path(&path);
3418 key.type = BTRFS_EXTENT_DATA_KEY;
3421 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3426 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3427 ret = btrfs_next_leaf(root, &path);
3434 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3436 if (found_key.objectid != ino ||
3437 found_key.type != BTRFS_EXTENT_DATA_KEY)
3439 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3440 struct btrfs_file_extent_item);
3441 type = btrfs_file_extent_type(path.nodes[0], fi);
3442 if (type != BTRFS_FILE_EXTENT_INLINE) {
3448 btrfs_release_path(&path);
3452 static u32 btrfs_type_to_imode(u8 type)
3454 static u32 imode_by_btrfs_type[] = {
3455 [BTRFS_FT_REG_FILE] = S_IFREG,
3456 [BTRFS_FT_DIR] = S_IFDIR,
3457 [BTRFS_FT_CHRDEV] = S_IFCHR,
3458 [BTRFS_FT_BLKDEV] = S_IFBLK,
3459 [BTRFS_FT_FIFO] = S_IFIFO,
3460 [BTRFS_FT_SOCK] = S_IFSOCK,
3461 [BTRFS_FT_SYMLINK] = S_IFLNK,
3464 return imode_by_btrfs_type[(type)];
3467 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3468 struct btrfs_root *root,
3469 struct btrfs_path *path,
3470 struct inode_record *rec)
3474 int type_recovered = 0;
3477 printf("Trying to rebuild inode:%llu\n", rec->ino);
3479 type_recovered = !find_file_type(rec, &filetype);
3482 * Try to determine inode type if type not found.
3484 * For found regular file extent, it must be FILE.
3485 * For found dir_item/index, it must be DIR.
3487 * For undetermined one, use FILE as fallback.
3490 * 1. If found backref(inode_index/item is already handled) to it,
3492 * Need new inode-inode ref structure to allow search for that.
3494 if (!type_recovered) {
3495 if (rec->found_file_extent &&
3496 find_normal_file_extent(root, rec->ino)) {
3498 filetype = BTRFS_FT_REG_FILE;
3499 } else if (rec->found_dir_item) {
3501 filetype = BTRFS_FT_DIR;
3502 } else if (!list_empty(&rec->orphan_extents)) {
3504 filetype = BTRFS_FT_REG_FILE;
3506 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3509 filetype = BTRFS_FT_REG_FILE;
3513 ret = btrfs_new_inode(trans, root, rec->ino,
3514 mode | btrfs_type_to_imode(filetype));
3519 * Here inode rebuild is done, we only rebuild the inode item,
3520 * don't repair the nlink(like move to lost+found).
3521 * That is the job of nlink repair.
3523 * We just fill the record and return
3525 rec->found_dir_item = 1;
3526 rec->imode = mode | btrfs_type_to_imode(filetype);
3528 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3529 /* Ensure the inode_nlinks repair function will be called */
3530 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3535 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3536 struct btrfs_root *root,
3537 struct btrfs_path *path,
3538 struct inode_record *rec)
3540 struct orphan_data_extent *orphan;
3541 struct orphan_data_extent *tmp;
3544 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3546 * Check for conflicting file extents
3548 * Here we don't know whether the extents is compressed or not,
3549 * so we can only assume it not compressed nor data offset,
3550 * and use its disk_len as extent length.
3552 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3553 orphan->offset, orphan->disk_len, 0);
3554 btrfs_release_path(path);
3559 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3560 orphan->disk_bytenr, orphan->disk_len);
3561 ret = btrfs_free_extent(trans,
3562 root->fs_info->extent_root,
3563 orphan->disk_bytenr, orphan->disk_len,
3564 0, root->objectid, orphan->objectid,
3569 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3570 orphan->offset, orphan->disk_bytenr,
3571 orphan->disk_len, orphan->disk_len);
3575 /* Update file size info */
3576 rec->found_size += orphan->disk_len;
3577 if (rec->found_size == rec->nbytes)
3578 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3580 /* Update the file extent hole info too */
3581 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3585 if (RB_EMPTY_ROOT(&rec->holes))
3586 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3588 list_del(&orphan->list);
3591 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3596 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3597 struct btrfs_root *root,
3598 struct btrfs_path *path,
3599 struct inode_record *rec)
3601 struct rb_node *node;
3602 struct file_extent_hole *hole;
3606 node = rb_first(&rec->holes);
3610 hole = rb_entry(node, struct file_extent_hole, node);
3611 ret = btrfs_punch_hole(trans, root, rec->ino,
3612 hole->start, hole->len);
3615 ret = del_file_extent_hole(&rec->holes, hole->start,
3619 if (RB_EMPTY_ROOT(&rec->holes))
3620 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3621 node = rb_first(&rec->holes);
3623 /* special case for a file losing all its file extent */
3625 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3626 round_up(rec->isize,
3627 root->fs_info->sectorsize));
3631 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3632 rec->ino, root->objectid);
3637 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3639 struct btrfs_trans_handle *trans;
3640 struct btrfs_path path;
3643 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3644 I_ERR_NO_ORPHAN_ITEM |
3645 I_ERR_LINK_COUNT_WRONG |
3646 I_ERR_NO_INODE_ITEM |
3647 I_ERR_FILE_EXTENT_ORPHAN |
3648 I_ERR_FILE_EXTENT_DISCOUNT|
3649 I_ERR_FILE_NBYTES_WRONG)))
3653 * For nlink repair, it may create a dir and add link, so
3654 * 2 for parent(256)'s dir_index and dir_item
3655 * 2 for lost+found dir's inode_item and inode_ref
3656 * 1 for the new inode_ref of the file
3657 * 2 for lost+found dir's dir_index and dir_item for the file
3659 trans = btrfs_start_transaction(root, 7);
3661 return PTR_ERR(trans);
3663 btrfs_init_path(&path);
3664 if (rec->errors & I_ERR_NO_INODE_ITEM)
3665 ret = repair_inode_no_item(trans, root, &path, rec);
3666 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3667 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3668 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3669 ret = repair_inode_discount_extent(trans, root, &path, rec);
3670 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3671 ret = repair_inode_isize(trans, root, &path, rec);
3672 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3673 ret = repair_inode_orphan_item(trans, root, &path, rec);
3674 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3675 ret = repair_inode_nlinks(trans, root, &path, rec);
3676 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3677 ret = repair_inode_nbytes(trans, root, &path, rec);
3678 btrfs_commit_transaction(trans, root);
3679 btrfs_release_path(&path);
3683 static int check_inode_recs(struct btrfs_root *root,
3684 struct cache_tree *inode_cache)
3686 struct cache_extent *cache;
3687 struct ptr_node *node;
3688 struct inode_record *rec;
3689 struct inode_backref *backref;
3694 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3696 if (btrfs_root_refs(&root->root_item) == 0) {
3697 if (!cache_tree_empty(inode_cache))
3698 fprintf(stderr, "warning line %d\n", __LINE__);
3703 * We need to repair backrefs first because we could change some of the
3704 * errors in the inode recs.
3706 * We also need to go through and delete invalid backrefs first and then
3707 * add the correct ones second. We do this because we may get EEXIST
3708 * when adding back the correct index because we hadn't yet deleted the
3711 * For example, if we were missing a dir index then the directories
3712 * isize would be wrong, so if we fixed the isize to what we thought it
3713 * would be and then fixed the backref we'd still have a invalid fs, so
3714 * we need to add back the dir index and then check to see if the isize
3719 if (stage == 3 && !err)
3722 cache = search_cache_extent(inode_cache, 0);
3723 while (repair && cache) {
3724 node = container_of(cache, struct ptr_node, cache);
3726 cache = next_cache_extent(cache);
3728 /* Need to free everything up and rescan */
3730 remove_cache_extent(inode_cache, &node->cache);
3732 free_inode_rec(rec);
3736 if (list_empty(&rec->backrefs))
3739 ret = repair_inode_backrefs(root, rec, inode_cache,
3753 rec = get_inode_rec(inode_cache, root_dirid, 0);
3754 BUG_ON(IS_ERR(rec));
3756 ret = check_root_dir(rec);
3758 fprintf(stderr, "root %llu root dir %llu error\n",
3759 (unsigned long long)root->root_key.objectid,
3760 (unsigned long long)root_dirid);
3761 print_inode_error(root, rec);
3766 struct btrfs_trans_handle *trans;
3768 trans = btrfs_start_transaction(root, 1);
3769 if (IS_ERR(trans)) {
3770 err = PTR_ERR(trans);
3775 "root %llu missing its root dir, recreating\n",
3776 (unsigned long long)root->objectid);
3778 ret = btrfs_make_root_dir(trans, root, root_dirid);
3781 btrfs_commit_transaction(trans, root);
3785 fprintf(stderr, "root %llu root dir %llu not found\n",
3786 (unsigned long long)root->root_key.objectid,
3787 (unsigned long long)root_dirid);
3791 cache = search_cache_extent(inode_cache, 0);
3794 node = container_of(cache, struct ptr_node, cache);
3796 remove_cache_extent(inode_cache, &node->cache);
3798 if (rec->ino == root_dirid ||
3799 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3800 free_inode_rec(rec);
3804 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3805 ret = check_orphan_item(root, rec->ino);
3807 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3808 if (can_free_inode_rec(rec)) {
3809 free_inode_rec(rec);
3814 if (!rec->found_inode_item)
3815 rec->errors |= I_ERR_NO_INODE_ITEM;
3816 if (rec->found_link != rec->nlink)
3817 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3819 ret = try_repair_inode(root, rec);
3820 if (ret == 0 && can_free_inode_rec(rec)) {
3821 free_inode_rec(rec);
3827 if (!(repair && ret == 0))
3829 print_inode_error(root, rec);
3830 list_for_each_entry(backref, &rec->backrefs, list) {
3831 if (!backref->found_dir_item)
3832 backref->errors |= REF_ERR_NO_DIR_ITEM;
3833 if (!backref->found_dir_index)
3834 backref->errors |= REF_ERR_NO_DIR_INDEX;
3835 if (!backref->found_inode_ref)
3836 backref->errors |= REF_ERR_NO_INODE_REF;
3837 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3838 " namelen %u name %s filetype %d errors %x",
3839 (unsigned long long)backref->dir,
3840 (unsigned long long)backref->index,
3841 backref->namelen, backref->name,
3842 backref->filetype, backref->errors);
3843 print_ref_error(backref->errors);
3845 free_inode_rec(rec);
3847 return (error > 0) ? -1 : 0;
3850 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3853 struct cache_extent *cache;
3854 struct root_record *rec = NULL;
3857 cache = lookup_cache_extent(root_cache, objectid, 1);
3859 rec = container_of(cache, struct root_record, cache);
3861 rec = calloc(1, sizeof(*rec));
3863 return ERR_PTR(-ENOMEM);
3864 rec->objectid = objectid;
3865 INIT_LIST_HEAD(&rec->backrefs);
3866 rec->cache.start = objectid;
3867 rec->cache.size = 1;
3869 ret = insert_cache_extent(root_cache, &rec->cache);
3871 return ERR_PTR(-EEXIST);
3876 static struct root_backref *get_root_backref(struct root_record *rec,
3877 u64 ref_root, u64 dir, u64 index,
3878 const char *name, int namelen)
3880 struct root_backref *backref;
3882 list_for_each_entry(backref, &rec->backrefs, list) {
3883 if (backref->ref_root != ref_root || backref->dir != dir ||
3884 backref->namelen != namelen)
3886 if (memcmp(name, backref->name, namelen))
3891 backref = calloc(1, sizeof(*backref) + namelen + 1);
3894 backref->ref_root = ref_root;
3896 backref->index = index;
3897 backref->namelen = namelen;
3898 memcpy(backref->name, name, namelen);
3899 backref->name[namelen] = '\0';
3900 list_add_tail(&backref->list, &rec->backrefs);
3904 static void free_root_record(struct cache_extent *cache)
3906 struct root_record *rec;
3907 struct root_backref *backref;
3909 rec = container_of(cache, struct root_record, cache);
3910 while (!list_empty(&rec->backrefs)) {
3911 backref = to_root_backref(rec->backrefs.next);
3912 list_del(&backref->list);
3919 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3921 static int add_root_backref(struct cache_tree *root_cache,
3922 u64 root_id, u64 ref_root, u64 dir, u64 index,
3923 const char *name, int namelen,
3924 int item_type, int errors)
3926 struct root_record *rec;
3927 struct root_backref *backref;
3929 rec = get_root_rec(root_cache, root_id);
3930 BUG_ON(IS_ERR(rec));
3931 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3934 backref->errors |= errors;
3936 if (item_type != BTRFS_DIR_ITEM_KEY) {
3937 if (backref->found_dir_index || backref->found_back_ref ||
3938 backref->found_forward_ref) {
3939 if (backref->index != index)
3940 backref->errors |= REF_ERR_INDEX_UNMATCH;
3942 backref->index = index;
3946 if (item_type == BTRFS_DIR_ITEM_KEY) {
3947 if (backref->found_forward_ref)
3949 backref->found_dir_item = 1;
3950 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3951 backref->found_dir_index = 1;
3952 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3953 if (backref->found_forward_ref)
3954 backref->errors |= REF_ERR_DUP_ROOT_REF;
3955 else if (backref->found_dir_item)
3957 backref->found_forward_ref = 1;
3958 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3959 if (backref->found_back_ref)
3960 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3961 backref->found_back_ref = 1;
3966 if (backref->found_forward_ref && backref->found_dir_item)
3967 backref->reachable = 1;
3971 static int merge_root_recs(struct btrfs_root *root,
3972 struct cache_tree *src_cache,
3973 struct cache_tree *dst_cache)
3975 struct cache_extent *cache;
3976 struct ptr_node *node;
3977 struct inode_record *rec;
3978 struct inode_backref *backref;
3981 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3982 free_inode_recs_tree(src_cache);
3987 cache = search_cache_extent(src_cache, 0);
3990 node = container_of(cache, struct ptr_node, cache);
3992 remove_cache_extent(src_cache, &node->cache);
3995 ret = is_child_root(root, root->objectid, rec->ino);
4001 list_for_each_entry(backref, &rec->backrefs, list) {
4002 BUG_ON(backref->found_inode_ref);
4003 if (backref->found_dir_item)
4004 add_root_backref(dst_cache, rec->ino,
4005 root->root_key.objectid, backref->dir,
4006 backref->index, backref->name,
4007 backref->namelen, BTRFS_DIR_ITEM_KEY,
4009 if (backref->found_dir_index)
4010 add_root_backref(dst_cache, rec->ino,
4011 root->root_key.objectid, backref->dir,
4012 backref->index, backref->name,
4013 backref->namelen, BTRFS_DIR_INDEX_KEY,
4017 free_inode_rec(rec);
4024 static int check_root_refs(struct btrfs_root *root,
4025 struct cache_tree *root_cache)
4027 struct root_record *rec;
4028 struct root_record *ref_root;
4029 struct root_backref *backref;
4030 struct cache_extent *cache;
4036 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4037 BUG_ON(IS_ERR(rec));
4040 /* fixme: this can not detect circular references */
4043 cache = search_cache_extent(root_cache, 0);
4047 rec = container_of(cache, struct root_record, cache);
4048 cache = next_cache_extent(cache);
4050 if (rec->found_ref == 0)
4053 list_for_each_entry(backref, &rec->backrefs, list) {
4054 if (!backref->reachable)
4057 ref_root = get_root_rec(root_cache,
4059 BUG_ON(IS_ERR(ref_root));
4060 if (ref_root->found_ref > 0)
4063 backref->reachable = 0;
4065 if (rec->found_ref == 0)
4071 cache = search_cache_extent(root_cache, 0);
4075 rec = container_of(cache, struct root_record, cache);
4076 cache = next_cache_extent(cache);
4078 if (rec->found_ref == 0 &&
4079 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4080 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4081 ret = check_orphan_item(root->fs_info->tree_root,
4087 * If we don't have a root item then we likely just have
4088 * a dir item in a snapshot for this root but no actual
4089 * ref key or anything so it's meaningless.
4091 if (!rec->found_root_item)
4094 fprintf(stderr, "fs tree %llu not referenced\n",
4095 (unsigned long long)rec->objectid);
4099 if (rec->found_ref > 0 && !rec->found_root_item)
4101 list_for_each_entry(backref, &rec->backrefs, list) {
4102 if (!backref->found_dir_item)
4103 backref->errors |= REF_ERR_NO_DIR_ITEM;
4104 if (!backref->found_dir_index)
4105 backref->errors |= REF_ERR_NO_DIR_INDEX;
4106 if (!backref->found_back_ref)
4107 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4108 if (!backref->found_forward_ref)
4109 backref->errors |= REF_ERR_NO_ROOT_REF;
4110 if (backref->reachable && backref->errors)
4117 fprintf(stderr, "fs tree %llu refs %u %s\n",
4118 (unsigned long long)rec->objectid, rec->found_ref,
4119 rec->found_root_item ? "" : "not found");
4121 list_for_each_entry(backref, &rec->backrefs, list) {
4122 if (!backref->reachable)
4124 if (!backref->errors && rec->found_root_item)
4126 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4127 " index %llu namelen %u name %s errors %x\n",
4128 (unsigned long long)backref->ref_root,
4129 (unsigned long long)backref->dir,
4130 (unsigned long long)backref->index,
4131 backref->namelen, backref->name,
4133 print_ref_error(backref->errors);
4136 return errors > 0 ? 1 : 0;
4139 static int process_root_ref(struct extent_buffer *eb, int slot,
4140 struct btrfs_key *key,
4141 struct cache_tree *root_cache)
4147 struct btrfs_root_ref *ref;
4148 char namebuf[BTRFS_NAME_LEN];
4151 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4153 dirid = btrfs_root_ref_dirid(eb, ref);
4154 index = btrfs_root_ref_sequence(eb, ref);
4155 name_len = btrfs_root_ref_name_len(eb, ref);
4157 if (name_len <= BTRFS_NAME_LEN) {
4161 len = BTRFS_NAME_LEN;
4162 error = REF_ERR_NAME_TOO_LONG;
4164 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4166 if (key->type == BTRFS_ROOT_REF_KEY) {
4167 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4168 index, namebuf, len, key->type, error);
4170 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4171 index, namebuf, len, key->type, error);
4176 static void free_corrupt_block(struct cache_extent *cache)
4178 struct btrfs_corrupt_block *corrupt;
4180 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4184 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4187 * Repair the btree of the given root.
4189 * The fix is to remove the node key in corrupt_blocks cache_tree.
4190 * and rebalance the tree.
4191 * After the fix, the btree should be writeable.
4193 static int repair_btree(struct btrfs_root *root,
4194 struct cache_tree *corrupt_blocks)
4196 struct btrfs_trans_handle *trans;
4197 struct btrfs_path path;
4198 struct btrfs_corrupt_block *corrupt;
4199 struct cache_extent *cache;
4200 struct btrfs_key key;
4205 if (cache_tree_empty(corrupt_blocks))
4208 trans = btrfs_start_transaction(root, 1);
4209 if (IS_ERR(trans)) {
4210 ret = PTR_ERR(trans);
4211 fprintf(stderr, "Error starting transaction: %s\n",
4215 btrfs_init_path(&path);
4216 cache = first_cache_extent(corrupt_blocks);
4218 corrupt = container_of(cache, struct btrfs_corrupt_block,
4220 level = corrupt->level;
4221 path.lowest_level = level;
4222 key.objectid = corrupt->key.objectid;
4223 key.type = corrupt->key.type;
4224 key.offset = corrupt->key.offset;
4227 * Here we don't want to do any tree balance, since it may
4228 * cause a balance with corrupted brother leaf/node,
4229 * so ins_len set to 0 here.
4230 * Balance will be done after all corrupt node/leaf is deleted.
4232 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4235 offset = btrfs_node_blockptr(path.nodes[level],
4238 /* Remove the ptr */
4239 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4243 * Remove the corresponding extent
4244 * return value is not concerned.
4246 btrfs_release_path(&path);
4247 ret = btrfs_free_extent(trans, root, offset,
4248 root->fs_info->nodesize, 0,
4249 root->root_key.objectid, level - 1, 0);
4250 cache = next_cache_extent(cache);
4253 /* Balance the btree using btrfs_search_slot() */
4254 cache = first_cache_extent(corrupt_blocks);
4256 corrupt = container_of(cache, struct btrfs_corrupt_block,
4258 memcpy(&key, &corrupt->key, sizeof(key));
4259 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4262 /* return will always >0 since it won't find the item */
4264 btrfs_release_path(&path);
4265 cache = next_cache_extent(cache);
4268 btrfs_commit_transaction(trans, root);
4269 btrfs_release_path(&path);
4273 static int check_fs_root(struct btrfs_root *root,
4274 struct cache_tree *root_cache,
4275 struct walk_control *wc)
4281 struct btrfs_path path;
4282 struct shared_node root_node;
4283 struct root_record *rec;
4284 struct btrfs_root_item *root_item = &root->root_item;
4285 struct cache_tree corrupt_blocks;
4286 struct orphan_data_extent *orphan;
4287 struct orphan_data_extent *tmp;
4288 enum btrfs_tree_block_status status;
4289 struct node_refs nrefs;
4292 * Reuse the corrupt_block cache tree to record corrupted tree block
4294 * Unlike the usage in extent tree check, here we do it in a per
4295 * fs/subvol tree base.
4297 cache_tree_init(&corrupt_blocks);
4298 root->fs_info->corrupt_blocks = &corrupt_blocks;
4300 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4301 rec = get_root_rec(root_cache, root->root_key.objectid);
4302 BUG_ON(IS_ERR(rec));
4303 if (btrfs_root_refs(root_item) > 0)
4304 rec->found_root_item = 1;
4307 btrfs_init_path(&path);
4308 memset(&root_node, 0, sizeof(root_node));
4309 cache_tree_init(&root_node.root_cache);
4310 cache_tree_init(&root_node.inode_cache);
4311 memset(&nrefs, 0, sizeof(nrefs));
4313 /* Move the orphan extent record to corresponding inode_record */
4314 list_for_each_entry_safe(orphan, tmp,
4315 &root->orphan_data_extents, list) {
4316 struct inode_record *inode;
4318 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4320 BUG_ON(IS_ERR(inode));
4321 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4322 list_move(&orphan->list, &inode->orphan_extents);
4325 level = btrfs_header_level(root->node);
4326 memset(wc->nodes, 0, sizeof(wc->nodes));
4327 wc->nodes[level] = &root_node;
4328 wc->active_node = level;
4329 wc->root_level = level;
4331 /* We may not have checked the root block, lets do that now */
4332 if (btrfs_is_leaf(root->node))
4333 status = btrfs_check_leaf(root, NULL, root->node);
4335 status = btrfs_check_node(root, NULL, root->node);
4336 if (status != BTRFS_TREE_BLOCK_CLEAN)
4339 if (btrfs_root_refs(root_item) > 0 ||
4340 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4341 path.nodes[level] = root->node;
4342 extent_buffer_get(root->node);
4343 path.slots[level] = 0;
4345 struct btrfs_key key;
4346 struct btrfs_disk_key found_key;
4348 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4349 level = root_item->drop_level;
4350 path.lowest_level = level;
4351 if (level > btrfs_header_level(root->node) ||
4352 level >= BTRFS_MAX_LEVEL) {
4353 error("ignoring invalid drop level: %u", level);
4356 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4359 btrfs_node_key(path.nodes[level], &found_key,
4361 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4362 sizeof(found_key)));
4366 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4372 wret = walk_up_tree(root, &path, wc, &level);
4379 btrfs_release_path(&path);
4381 if (!cache_tree_empty(&corrupt_blocks)) {
4382 struct cache_extent *cache;
4383 struct btrfs_corrupt_block *corrupt;
4385 printf("The following tree block(s) is corrupted in tree %llu:\n",
4386 root->root_key.objectid);
4387 cache = first_cache_extent(&corrupt_blocks);
4389 corrupt = container_of(cache,
4390 struct btrfs_corrupt_block,
4392 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4393 cache->start, corrupt->level,
4394 corrupt->key.objectid, corrupt->key.type,
4395 corrupt->key.offset);
4396 cache = next_cache_extent(cache);
4399 printf("Try to repair the btree for root %llu\n",
4400 root->root_key.objectid);
4401 ret = repair_btree(root, &corrupt_blocks);
4403 fprintf(stderr, "Failed to repair btree: %s\n",
4406 printf("Btree for root %llu is fixed\n",
4407 root->root_key.objectid);
4411 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4415 if (root_node.current) {
4416 root_node.current->checked = 1;
4417 maybe_free_inode_rec(&root_node.inode_cache,
4421 err = check_inode_recs(root, &root_node.inode_cache);
4425 free_corrupt_blocks_tree(&corrupt_blocks);
4426 root->fs_info->corrupt_blocks = NULL;
4427 free_orphan_data_extents(&root->orphan_data_extents);
4431 static int fs_root_objectid(u64 objectid)
4433 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4434 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4436 return is_fstree(objectid);
4439 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4440 struct cache_tree *root_cache)
4442 struct btrfs_path path;
4443 struct btrfs_key key;
4444 struct walk_control wc;
4445 struct extent_buffer *leaf, *tree_node;
4446 struct btrfs_root *tmp_root;
4447 struct btrfs_root *tree_root = fs_info->tree_root;
4451 if (ctx.progress_enabled) {
4452 ctx.tp = TASK_FS_ROOTS;
4453 task_start(ctx.info);
4457 * Just in case we made any changes to the extent tree that weren't
4458 * reflected into the free space cache yet.
4461 reset_cached_block_groups(fs_info);
4462 memset(&wc, 0, sizeof(wc));
4463 cache_tree_init(&wc.shared);
4464 btrfs_init_path(&path);
4469 key.type = BTRFS_ROOT_ITEM_KEY;
4470 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4475 tree_node = tree_root->node;
4477 if (tree_node != tree_root->node) {
4478 free_root_recs_tree(root_cache);
4479 btrfs_release_path(&path);
4482 leaf = path.nodes[0];
4483 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4484 ret = btrfs_next_leaf(tree_root, &path);
4490 leaf = path.nodes[0];
4492 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4493 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4494 fs_root_objectid(key.objectid)) {
4495 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4496 tmp_root = btrfs_read_fs_root_no_cache(
4499 key.offset = (u64)-1;
4500 tmp_root = btrfs_read_fs_root(
4503 if (IS_ERR(tmp_root)) {
4507 ret = check_fs_root(tmp_root, root_cache, &wc);
4508 if (ret == -EAGAIN) {
4509 free_root_recs_tree(root_cache);
4510 btrfs_release_path(&path);
4515 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4516 btrfs_free_fs_root(tmp_root);
4517 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4518 key.type == BTRFS_ROOT_BACKREF_KEY) {
4519 process_root_ref(leaf, path.slots[0], &key,
4526 btrfs_release_path(&path);
4528 free_extent_cache_tree(&wc.shared);
4529 if (!cache_tree_empty(&wc.shared))
4530 fprintf(stderr, "warning line %d\n", __LINE__);
4532 task_stop(ctx.info);
4538 * Find the @index according by @ino and name.
4539 * Notice:time efficiency is O(N)
4541 * @root: the root of the fs/file tree
4542 * @index_ret: the index as return value
4543 * @namebuf: the name to match
4544 * @name_len: the length of name to match
4545 * @file_type: the file_type of INODE_ITEM to match
4547 * Returns 0 if found and *@index_ret will be modified with right value
4548 * Returns< 0 not found and *@index_ret will be (u64)-1
4550 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4551 u64 *index_ret, char *namebuf, u32 name_len,
4554 struct btrfs_path path;
4555 struct extent_buffer *node;
4556 struct btrfs_dir_item *di;
4557 struct btrfs_key key;
4558 struct btrfs_key location;
4559 char name[BTRFS_NAME_LEN] = {0};
4571 /* search from the last index */
4572 key.objectid = dirid;
4573 key.offset = (u64)-1;
4574 key.type = BTRFS_DIR_INDEX_KEY;
4576 btrfs_init_path(&path);
4577 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4582 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4585 *index_ret = (64)-1;
4588 /* Check whether inode_id/filetype/name match */
4589 node = path.nodes[0];
4590 slot = path.slots[0];
4591 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4592 total = btrfs_item_size_nr(node, slot);
4593 while (cur < total) {
4595 len = btrfs_dir_name_len(node, di);
4596 data_len = btrfs_dir_data_len(node, di);
4598 btrfs_dir_item_key_to_cpu(node, di, &location);
4599 if (location.objectid != location_id ||
4600 location.type != BTRFS_INODE_ITEM_KEY ||
4601 location.offset != 0)
4604 filetype = btrfs_dir_type(node, di);
4605 if (file_type != filetype)
4608 if (len > BTRFS_NAME_LEN)
4609 len = BTRFS_NAME_LEN;
4611 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4612 if (len != name_len || strncmp(namebuf, name, len))
4615 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4616 *index_ret = key.offset;
4620 len += sizeof(*di) + data_len;
4621 di = (struct btrfs_dir_item *)((char *)di + len);
4627 btrfs_release_path(&path);
4632 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4633 * INODE_REF/INODE_EXTREF match.
4635 * @root: the root of the fs/file tree
4636 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4637 * value while find index
4638 * @location_key: location key of the struct btrfs_dir_item to match
4639 * @name: the name to match
4640 * @namelen: the length of name
4641 * @file_type: the type of file to math
4643 * Return 0 if no error occurred.
4644 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4645 * DIR_ITEM/DIR_INDEX
4646 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4647 * and DIR_ITEM/DIR_INDEX mismatch
4649 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4650 struct btrfs_key *location_key, char *name,
4651 u32 namelen, u8 file_type)
4653 struct btrfs_path path;
4654 struct extent_buffer *node;
4655 struct btrfs_dir_item *di;
4656 struct btrfs_key location;
4657 char namebuf[BTRFS_NAME_LEN] = {0};
4666 /* get the index by traversing all index */
4667 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4668 ret = find_dir_index(root, key->objectid,
4669 location_key->objectid, &key->offset,
4670 name, namelen, file_type);
4672 ret = DIR_INDEX_MISSING;
4676 btrfs_init_path(&path);
4677 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4679 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4684 /* Check whether inode_id/filetype/name match */
4685 node = path.nodes[0];
4686 slot = path.slots[0];
4687 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4688 total = btrfs_item_size_nr(node, slot);
4689 while (cur < total) {
4690 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4691 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4693 len = btrfs_dir_name_len(node, di);
4694 data_len = btrfs_dir_data_len(node, di);
4696 btrfs_dir_item_key_to_cpu(node, di, &location);
4697 if (location.objectid != location_key->objectid ||
4698 location.type != location_key->type ||
4699 location.offset != location_key->offset)
4702 filetype = btrfs_dir_type(node, di);
4703 if (file_type != filetype)
4706 if (len > BTRFS_NAME_LEN) {
4707 len = BTRFS_NAME_LEN;
4708 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4710 key->type == BTRFS_DIR_ITEM_KEY ?
4711 "DIR_ITEM" : "DIR_INDEX",
4712 key->objectid, key->offset, len);
4714 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4716 if (len != namelen || strncmp(namebuf, name, len))
4722 len += sizeof(*di) + data_len;
4723 di = (struct btrfs_dir_item *)((char *)di + len);
4728 btrfs_release_path(&path);
4733 * Prints inode ref error message
4735 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4736 u64 index, const char *namebuf, int name_len,
4737 u8 filetype, int err)
4742 /* root dir error */
4743 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4745 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4746 root->objectid, key->objectid, key->offset, namebuf);
4751 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4752 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4753 root->objectid, key->offset,
4754 btrfs_name_hash(namebuf, name_len),
4755 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4757 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4758 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4759 root->objectid, key->offset, index,
4760 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4765 * Insert the missing inode item.
4767 * Returns 0 means success.
4768 * Returns <0 means error.
4770 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4773 struct btrfs_key key;
4774 struct btrfs_trans_handle *trans;
4775 struct btrfs_path path;
4779 key.type = BTRFS_INODE_ITEM_KEY;
4782 btrfs_init_path(&path);
4783 trans = btrfs_start_transaction(root, 1);
4784 if (IS_ERR(trans)) {
4789 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4790 if (ret < 0 || !ret)
4793 /* insert inode item */
4794 create_inode_item_lowmem(trans, root, ino, filetype);
4797 btrfs_commit_transaction(trans, root);
4800 error("failed to repair root %llu INODE ITEM[%llu] missing",
4801 root->objectid, ino);
4802 btrfs_release_path(&path);
4807 * The ternary means dir item, dir index and relative inode ref.
4808 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4809 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4811 * If two of three is missing or mismatched, delete the existing one.
4812 * If one of three is missing or mismatched, add the missing one.
4814 * returns 0 means success.
4815 * returns not 0 means on error;
4817 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4818 u64 index, char *name, int name_len, u8 filetype,
4821 struct btrfs_trans_handle *trans;
4826 * stage shall be one of following valild values:
4827 * 0: Fine, nothing to do.
4828 * 1: One of three is wrong, so add missing one.
4829 * 2: Two of three is wrong, so delete existed one.
4831 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4833 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4835 if (err & (INODE_REF_MISSING))
4838 /* stage must be smllarer than 3 */
4841 trans = btrfs_start_transaction(root, 1);
4843 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4848 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4849 filetype, &index, 1, 1);
4853 btrfs_commit_transaction(trans, root);
4856 error("fail to repair inode %llu name %s filetype %u",
4857 ino, name, filetype);
4859 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4860 stage == 2 ? "Delete" : "Add",
4861 ino, name, filetype);
4867 * Traverse the given INODE_REF and call find_dir_item() to find related
4868 * DIR_ITEM/DIR_INDEX.
4870 * @root: the root of the fs/file tree
4871 * @ref_key: the key of the INODE_REF
4872 * @path the path provides node and slot
4873 * @refs: the count of INODE_REF
4874 * @mode: the st_mode of INODE_ITEM
4875 * @name_ret: returns with the first ref's name
4876 * @name_len_ret: len of the name_ret
4878 * Return 0 if no error occurred.
4880 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4881 struct btrfs_path *path, char *name_ret,
4882 u32 *namelen_ret, u64 *refs_ret, int mode)
4884 struct btrfs_key key;
4885 struct btrfs_key location;
4886 struct btrfs_inode_ref *ref;
4887 struct extent_buffer *node;
4888 char namebuf[BTRFS_NAME_LEN] = {0};
4898 int need_research = 0;
4906 /* since after repair, path and the dir item may be changed */
4907 if (need_research) {
4909 btrfs_release_path(path);
4910 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4911 /* the item was deleted, let path point to the last checked item */
4913 if (path->slots[0] == 0)
4914 btrfs_prev_leaf(root, path);
4922 location.objectid = ref_key->objectid;
4923 location.type = BTRFS_INODE_ITEM_KEY;
4924 location.offset = 0;
4925 node = path->nodes[0];
4926 slot = path->slots[0];
4928 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4929 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4930 total = btrfs_item_size_nr(node, slot);
4933 /* Update inode ref count */
4936 index = btrfs_inode_ref_index(node, ref);
4937 name_len = btrfs_inode_ref_name_len(node, ref);
4939 if (name_len <= BTRFS_NAME_LEN) {
4942 len = BTRFS_NAME_LEN;
4943 warning("root %llu INODE_REF[%llu %llu] name too long",
4944 root->objectid, ref_key->objectid, ref_key->offset);
4947 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4949 /* copy the first name found to name_ret */
4950 if (refs == 1 && name_ret) {
4951 memcpy(name_ret, namebuf, len);
4955 /* Check root dir ref */
4956 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4957 if (index != 0 || len != strlen("..") ||
4958 strncmp("..", namebuf, len) ||
4959 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4960 /* set err bits then repair will delete the ref */
4961 err |= DIR_INDEX_MISSING;
4962 err |= DIR_ITEM_MISSING;
4967 /* Find related DIR_INDEX */
4968 key.objectid = ref_key->offset;
4969 key.type = BTRFS_DIR_INDEX_KEY;
4971 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4972 imode_to_type(mode));
4974 /* Find related dir_item */
4975 key.objectid = ref_key->offset;
4976 key.type = BTRFS_DIR_ITEM_KEY;
4977 key.offset = btrfs_name_hash(namebuf, len);
4978 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4979 imode_to_type(mode));
4981 if (tmp_err && repair) {
4982 ret = repair_ternary_lowmem(root, ref_key->offset,
4983 ref_key->objectid, index, namebuf,
4984 name_len, imode_to_type(mode),
4991 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4992 imode_to_type(mode), tmp_err);
4994 len = sizeof(*ref) + name_len;
4995 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5006 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5007 * DIR_ITEM/DIR_INDEX.
5009 * @root: the root of the fs/file tree
5010 * @ref_key: the key of the INODE_EXTREF
5011 * @refs: the count of INODE_EXTREF
5012 * @mode: the st_mode of INODE_ITEM
5014 * Return 0 if no error occurred.
5016 static int check_inode_extref(struct btrfs_root *root,
5017 struct btrfs_key *ref_key,
5018 struct extent_buffer *node, int slot, u64 *refs,
5021 struct btrfs_key key;
5022 struct btrfs_key location;
5023 struct btrfs_inode_extref *extref;
5024 char namebuf[BTRFS_NAME_LEN] = {0};
5034 location.objectid = ref_key->objectid;
5035 location.type = BTRFS_INODE_ITEM_KEY;
5036 location.offset = 0;
5038 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5039 total = btrfs_item_size_nr(node, slot);
5042 /* update inode ref count */
5044 name_len = btrfs_inode_extref_name_len(node, extref);
5045 index = btrfs_inode_extref_index(node, extref);
5046 parent = btrfs_inode_extref_parent(node, extref);
5047 if (name_len <= BTRFS_NAME_LEN) {
5050 len = BTRFS_NAME_LEN;
5051 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5052 root->objectid, ref_key->objectid, ref_key->offset);
5054 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5056 /* Check root dir ref name */
5057 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5058 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5059 root->objectid, ref_key->objectid, ref_key->offset,
5061 err |= ROOT_DIR_ERROR;
5064 /* find related dir_index */
5065 key.objectid = parent;
5066 key.type = BTRFS_DIR_INDEX_KEY;
5068 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5071 /* find related dir_item */
5072 key.objectid = parent;
5073 key.type = BTRFS_DIR_ITEM_KEY;
5074 key.offset = btrfs_name_hash(namebuf, len);
5075 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5078 len = sizeof(*extref) + name_len;
5079 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5089 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5090 * DIR_ITEM/DIR_INDEX match.
5091 * Return with @index_ret.
5093 * @root: the root of the fs/file tree
5094 * @key: the key of the INODE_REF/INODE_EXTREF
5095 * @name: the name in the INODE_REF/INODE_EXTREF
5096 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5097 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5098 * value (64)-1 means do not check index
5099 * @ext_ref: the EXTENDED_IREF feature
5101 * Return 0 if no error occurred.
5102 * Return >0 for error bitmap
5104 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5105 char *name, int namelen, u64 *index_ret,
5106 unsigned int ext_ref)
5108 struct btrfs_path path;
5109 struct btrfs_inode_ref *ref;
5110 struct btrfs_inode_extref *extref;
5111 struct extent_buffer *node;
5112 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5125 btrfs_init_path(&path);
5126 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5128 ret = INODE_REF_MISSING;
5132 node = path.nodes[0];
5133 slot = path.slots[0];
5135 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5136 total = btrfs_item_size_nr(node, slot);
5138 /* Iterate all entry of INODE_REF */
5139 while (cur < total) {
5140 ret = INODE_REF_MISSING;
5142 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5143 ref_index = btrfs_inode_ref_index(node, ref);
5144 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5147 if (cur + sizeof(*ref) + ref_namelen > total ||
5148 ref_namelen > BTRFS_NAME_LEN) {
5149 warning("root %llu INODE %s[%llu %llu] name too long",
5151 key->type == BTRFS_INODE_REF_KEY ?
5153 key->objectid, key->offset);
5155 if (cur + sizeof(*ref) > total)
5157 len = min_t(u32, total - cur - sizeof(*ref),
5163 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5166 if (len != namelen || strncmp(ref_namebuf, name, len))
5169 *index_ret = ref_index;
5173 len = sizeof(*ref) + ref_namelen;
5174 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5179 /* Skip if not support EXTENDED_IREF feature */
5183 btrfs_release_path(&path);
5184 btrfs_init_path(&path);
5186 dir_id = key->offset;
5187 key->type = BTRFS_INODE_EXTREF_KEY;
5188 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5190 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5192 ret = INODE_REF_MISSING;
5196 node = path.nodes[0];
5197 slot = path.slots[0];
5199 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5201 total = btrfs_item_size_nr(node, slot);
5203 /* Iterate all entry of INODE_EXTREF */
5204 while (cur < total) {
5205 ret = INODE_REF_MISSING;
5207 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5208 ref_index = btrfs_inode_extref_index(node, extref);
5209 parent = btrfs_inode_extref_parent(node, extref);
5210 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5213 if (parent != dir_id)
5216 if (ref_namelen <= BTRFS_NAME_LEN) {
5219 len = BTRFS_NAME_LEN;
5220 warning("root %llu INODE %s[%llu %llu] name too long",
5222 key->type == BTRFS_INODE_REF_KEY ?
5224 key->objectid, key->offset);
5226 read_extent_buffer(node, ref_namebuf,
5227 (unsigned long)(extref + 1), len);
5229 if (len != namelen || strncmp(ref_namebuf, name, len))
5232 *index_ret = ref_index;
5237 len = sizeof(*extref) + ref_namelen;
5238 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5243 btrfs_release_path(&path);
5247 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5248 u64 ino, u64 index, const char *namebuf,
5249 int name_len, u8 filetype, int err)
5251 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5252 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5253 root->objectid, key->objectid, key->offset, namebuf,
5255 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5258 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5259 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5260 root->objectid, key->objectid, index, namebuf, filetype,
5261 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5264 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5266 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5267 root->objectid, ino, index, namebuf, filetype,
5268 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5271 if (err & INODE_REF_MISSING)
5273 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5274 root->objectid, ino, key->objectid, namebuf, filetype);
5279 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5281 * Returns error after repair
5283 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5284 u64 index, u8 filetype, char *namebuf, u32 name_len,
5289 if (err & INODE_ITEM_MISSING) {
5290 ret = repair_inode_item_missing(root, ino, filetype);
5292 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5295 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5296 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5297 name_len, filetype, err);
5299 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5300 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5301 err &= ~(INODE_REF_MISSING);
5307 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5310 struct btrfs_key key;
5311 struct btrfs_path path;
5313 struct btrfs_dir_item *di;
5323 key.offset = (u64)-1;
5325 btrfs_init_path(&path);
5326 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5331 /* if found, go to spacial case */
5336 ret = btrfs_previous_item(root, &path, ino, type);
5344 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5346 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5348 while (cur < total) {
5349 len = btrfs_dir_name_len(path.nodes[0], di);
5350 if (len > BTRFS_NAME_LEN)
5351 len = BTRFS_NAME_LEN;
5354 len += btrfs_dir_data_len(path.nodes[0], di);
5356 di = (struct btrfs_dir_item *)((char *)di + len);
5362 btrfs_release_path(&path);
5366 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5373 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5377 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5381 *size = item_size + index_size;
5385 error("failed to count root %llu INODE[%llu] root size",
5386 root->objectid, ino);
5391 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5392 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5394 * @root: the root of the fs/file tree
5395 * @key: the key of the INODE_REF/INODE_EXTREF
5397 * @size: the st_size of the INODE_ITEM
5398 * @ext_ref: the EXTENDED_IREF feature
5400 * Return 0 if no error occurred.
5401 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5403 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5404 struct btrfs_path *path, u64 *size,
5405 unsigned int ext_ref)
5407 struct btrfs_dir_item *di;
5408 struct btrfs_inode_item *ii;
5409 struct btrfs_key key;
5410 struct btrfs_key location;
5411 struct extent_buffer *node;
5413 char namebuf[BTRFS_NAME_LEN] = {0};
5425 int need_research = 0;
5428 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5429 * ignore index check.
5431 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5432 index = di_key->offset;
5439 /* since after repair, path and the dir item may be changed */
5440 if (need_research) {
5442 err |= DIR_COUNT_AGAIN;
5443 btrfs_release_path(path);
5444 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5445 /* the item was deleted, let path point the last checked item */
5447 if (path->slots[0] == 0)
5448 btrfs_prev_leaf(root, path);
5456 node = path->nodes[0];
5457 slot = path->slots[0];
5459 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5460 total = btrfs_item_size_nr(node, slot);
5461 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5463 while (cur < total) {
5464 data_len = btrfs_dir_data_len(node, di);
5467 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5469 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5470 di_key->objectid, di_key->offset, data_len);
5472 name_len = btrfs_dir_name_len(node, di);
5473 if (name_len <= BTRFS_NAME_LEN) {
5476 len = BTRFS_NAME_LEN;
5477 warning("root %llu %s[%llu %llu] name too long",
5479 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5480 di_key->objectid, di_key->offset);
5482 (*size) += name_len;
5483 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5485 filetype = btrfs_dir_type(node, di);
5487 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5488 di_key->offset != btrfs_name_hash(namebuf, len)) {
5490 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5491 root->objectid, di_key->objectid, di_key->offset,
5492 namebuf, len, filetype, di_key->offset,
5493 btrfs_name_hash(namebuf, len));
5496 btrfs_dir_item_key_to_cpu(node, di, &location);
5497 /* Ignore related ROOT_ITEM check */
5498 if (location.type == BTRFS_ROOT_ITEM_KEY)
5501 btrfs_release_path(path);
5502 /* Check relative INODE_ITEM(existence/filetype) */
5503 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5505 tmp_err |= INODE_ITEM_MISSING;
5509 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5510 struct btrfs_inode_item);
5511 mode = btrfs_inode_mode(path->nodes[0], ii);
5512 if (imode_to_type(mode) != filetype) {
5513 tmp_err |= INODE_ITEM_MISMATCH;
5517 /* Check relative INODE_REF/INODE_EXTREF */
5518 key.objectid = location.objectid;
5519 key.type = BTRFS_INODE_REF_KEY;
5520 key.offset = di_key->objectid;
5521 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5524 /* check relative INDEX/ITEM */
5525 key.objectid = di_key->objectid;
5526 if (key.type == BTRFS_DIR_ITEM_KEY) {
5527 key.type = BTRFS_DIR_INDEX_KEY;
5530 key.type = BTRFS_DIR_ITEM_KEY;
5531 key.offset = btrfs_name_hash(namebuf, name_len);
5534 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5535 name_len, filetype);
5536 /* find_dir_item may find index */
5537 if (key.type == BTRFS_DIR_INDEX_KEY)
5541 if (tmp_err && repair) {
5542 ret = repair_dir_item(root, di_key->objectid,
5543 location.objectid, index,
5544 imode_to_type(mode), namebuf,
5546 if (ret != tmp_err) {
5551 btrfs_release_path(path);
5552 print_dir_item_err(root, di_key, location.objectid, index,
5553 namebuf, name_len, filetype, tmp_err);
5555 len = sizeof(*di) + name_len + data_len;
5556 di = (struct btrfs_dir_item *)((char *)di + len);
5559 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5560 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5561 root->objectid, di_key->objectid,
5568 btrfs_release_path(path);
5569 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5571 err |= ret > 0 ? -ENOENT : ret;
5576 * Wrapper function of btrfs_punch_hole.
5578 * Returns 0 means success.
5579 * Returns not 0 means error.
5581 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5584 struct btrfs_trans_handle *trans;
5587 trans = btrfs_start_transaction(root, 1);
5589 return PTR_ERR(trans);
5591 ret = btrfs_punch_hole(trans, root, ino, start, len);
5593 error("failed to add hole [%llu, %llu] in inode [%llu]",
5596 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5599 btrfs_commit_transaction(trans, root);
5604 * Check file extent datasum/hole, update the size of the file extents,
5605 * check and update the last offset of the file extent.
5607 * @root: the root of fs/file tree.
5608 * @fkey: the key of the file extent.
5609 * @nodatasum: INODE_NODATASUM feature.
5610 * @size: the sum of all EXTENT_DATA items size for this inode.
5611 * @end: the offset of the last extent.
5613 * Return 0 if no error occurred.
5615 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5616 struct extent_buffer *node, int slot,
5617 unsigned int nodatasum, u64 *size, u64 *end)
5619 struct btrfs_file_extent_item *fi;
5622 u64 extent_num_bytes;
5624 u64 csum_found; /* In byte size, sectorsize aligned */
5625 u64 search_start; /* Logical range start we search for csum */
5626 u64 search_len; /* Logical range len we search for csum */
5627 unsigned int extent_type;
5628 unsigned int is_hole;
5633 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5635 /* Check inline extent */
5636 extent_type = btrfs_file_extent_type(node, fi);
5637 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5638 struct btrfs_item *e = btrfs_item_nr(slot);
5639 u32 item_inline_len;
5641 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5642 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5643 compressed = btrfs_file_extent_compression(node, fi);
5644 if (extent_num_bytes == 0) {
5646 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5647 root->objectid, fkey->objectid, fkey->offset);
5648 err |= FILE_EXTENT_ERROR;
5650 if (!compressed && extent_num_bytes != item_inline_len) {
5652 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5653 root->objectid, fkey->objectid, fkey->offset,
5654 extent_num_bytes, item_inline_len);
5655 err |= FILE_EXTENT_ERROR;
5657 *end += extent_num_bytes;
5658 *size += extent_num_bytes;
5662 /* Check extent type */
5663 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5664 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5665 err |= FILE_EXTENT_ERROR;
5666 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5667 root->objectid, fkey->objectid, fkey->offset);
5671 /* Check REG_EXTENT/PREALLOC_EXTENT */
5672 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5673 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5674 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5675 extent_offset = btrfs_file_extent_offset(node, fi);
5676 compressed = btrfs_file_extent_compression(node, fi);
5677 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5680 * Check EXTENT_DATA csum
5682 * For plain (uncompressed) extent, we should only check the range
5683 * we're referring to, as it's possible that part of prealloc extent
5684 * has been written, and has csum:
5686 * |<--- Original large preallocated extent A ---->|
5687 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5690 * For compressed extent, we should check the whole range.
5693 search_start = disk_bytenr + extent_offset;
5694 search_len = extent_num_bytes;
5696 search_start = disk_bytenr;
5697 search_len = disk_num_bytes;
5699 ret = count_csum_range(root, search_start, search_len, &csum_found);
5700 if (csum_found > 0 && nodatasum) {
5701 err |= ODD_CSUM_ITEM;
5702 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5703 root->objectid, fkey->objectid, fkey->offset);
5704 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5705 !is_hole && (ret < 0 || csum_found < search_len)) {
5706 err |= CSUM_ITEM_MISSING;
5707 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5708 root->objectid, fkey->objectid, fkey->offset,
5709 csum_found, search_len);
5710 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5711 err |= ODD_CSUM_ITEM;
5712 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5713 root->objectid, fkey->objectid, fkey->offset, csum_found);
5716 /* Check EXTENT_DATA hole */
5717 if (!no_holes && *end != fkey->offset) {
5719 ret = punch_extent_hole(root, fkey->objectid,
5720 *end, fkey->offset - *end);
5721 if (!repair || ret) {
5722 err |= FILE_EXTENT_ERROR;
5723 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5724 root->objectid, fkey->objectid, fkey->offset);
5728 *end += extent_num_bytes;
5730 *size += extent_num_bytes;
5736 * Set inode item nbytes to @nbytes
5738 * Returns 0 on success
5739 * Returns != 0 on error
5741 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5742 struct btrfs_path *path,
5743 u64 ino, u64 nbytes)
5745 struct btrfs_trans_handle *trans;
5746 struct btrfs_inode_item *ii;
5747 struct btrfs_key key;
5748 struct btrfs_key research_key;
5752 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5755 key.type = BTRFS_INODE_ITEM_KEY;
5758 trans = btrfs_start_transaction(root, 1);
5759 if (IS_ERR(trans)) {
5760 ret = PTR_ERR(trans);
5765 btrfs_release_path(path);
5766 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5774 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5775 struct btrfs_inode_item);
5776 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5777 btrfs_mark_buffer_dirty(path->nodes[0]);
5779 btrfs_commit_transaction(trans, root);
5782 error("failed to set nbytes in inode %llu root %llu",
5783 ino, root->root_key.objectid);
5785 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5786 root->root_key.objectid, nbytes);
5789 btrfs_release_path(path);
5790 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5797 * Set directory inode isize to @isize.
5799 * Returns 0 on success.
5800 * Returns != 0 on error.
5802 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5803 struct btrfs_path *path,
5806 struct btrfs_trans_handle *trans;
5807 struct btrfs_inode_item *ii;
5808 struct btrfs_key key;
5809 struct btrfs_key research_key;
5813 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5816 key.type = BTRFS_INODE_ITEM_KEY;
5819 trans = btrfs_start_transaction(root, 1);
5820 if (IS_ERR(trans)) {
5821 ret = PTR_ERR(trans);
5826 btrfs_release_path(path);
5827 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5835 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5836 struct btrfs_inode_item);
5837 btrfs_set_inode_size(path->nodes[0], ii, isize);
5838 btrfs_mark_buffer_dirty(path->nodes[0]);
5840 btrfs_commit_transaction(trans, root);
5843 error("failed to set isize in inode %llu root %llu",
5844 ino, root->root_key.objectid);
5846 printf("Set isize in inode %llu root %llu to %llu\n",
5847 ino, root->root_key.objectid, isize);
5849 btrfs_release_path(path);
5850 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5857 * Wrapper function for btrfs_add_orphan_item().
5859 * Returns 0 on success.
5860 * Returns != 0 on error.
5862 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5863 struct btrfs_path *path, u64 ino)
5865 struct btrfs_trans_handle *trans;
5866 struct btrfs_key research_key;
5870 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5872 trans = btrfs_start_transaction(root, 1);
5873 if (IS_ERR(trans)) {
5874 ret = PTR_ERR(trans);
5879 btrfs_release_path(path);
5880 ret = btrfs_add_orphan_item(trans, root, path, ino);
5882 btrfs_commit_transaction(trans, root);
5885 error("failed to add inode %llu as orphan item root %llu",
5886 ino, root->root_key.objectid);
5888 printf("Added inode %llu as orphan item root %llu\n",
5889 ino, root->root_key.objectid);
5891 btrfs_release_path(path);
5892 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5898 /* Set inode_item nlink to @ref_count.
5899 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5901 * Returns 0 on success
5903 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5904 struct btrfs_path *path, u64 ino,
5905 const char *name, u32 namelen,
5906 u64 ref_count, u8 filetype, u64 *nlink)
5908 struct btrfs_trans_handle *trans;
5909 struct btrfs_inode_item *ii;
5910 struct btrfs_key key;
5911 struct btrfs_key old_key;
5912 char namebuf[BTRFS_NAME_LEN] = {0};
5918 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5920 if (name && namelen) {
5921 ASSERT(namelen <= BTRFS_NAME_LEN);
5922 memcpy(namebuf, name, namelen);
5925 sprintf(namebuf, "%llu", ino);
5926 name_len = count_digits(ino);
5927 printf("Can't find file name for inode %llu, use %s instead\n",
5931 trans = btrfs_start_transaction(root, 1);
5932 if (IS_ERR(trans)) {
5933 ret = PTR_ERR(trans);
5937 btrfs_release_path(path);
5938 /* if refs is 0, put it into lostfound */
5939 if (ref_count == 0) {
5940 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5941 name_len, filetype, &ref_count);
5946 /* reset inode_item's nlink to ref_count */
5948 key.type = BTRFS_INODE_ITEM_KEY;
5951 btrfs_release_path(path);
5952 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5958 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5959 struct btrfs_inode_item);
5960 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5961 btrfs_mark_buffer_dirty(path->nodes[0]);
5966 btrfs_commit_transaction(trans, root);
5970 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5971 root->objectid, ino, namebuf, filetype);
5973 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5974 root->objectid, ino, namebuf, filetype);
5977 btrfs_release_path(path);
5978 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5985 * Check INODE_ITEM and related ITEMs (the same inode number)
5986 * 1. check link count
5987 * 2. check inode ref/extref
5988 * 3. check dir item/index
5990 * @ext_ref: the EXTENDED_IREF feature
5992 * Return 0 if no error occurred.
5993 * Return >0 for error or hit the traversal is done(by error bitmap)
5995 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5996 unsigned int ext_ref)
5998 struct extent_buffer *node;
5999 struct btrfs_inode_item *ii;
6000 struct btrfs_key key;
6001 struct btrfs_key last_key;
6010 u64 extent_size = 0;
6012 unsigned int nodatasum;
6016 char namebuf[BTRFS_NAME_LEN] = {0};
6019 node = path->nodes[0];
6020 slot = path->slots[0];
6022 btrfs_item_key_to_cpu(node, &key, slot);
6023 inode_id = key.objectid;
6025 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6026 ret = btrfs_next_item(root, path);
6032 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6033 isize = btrfs_inode_size(node, ii);
6034 nbytes = btrfs_inode_nbytes(node, ii);
6035 mode = btrfs_inode_mode(node, ii);
6036 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6037 nlink = btrfs_inode_nlink(node, ii);
6038 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6041 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6042 ret = btrfs_next_item(root, path);
6044 /* out will fill 'err' rusing current statistics */
6046 } else if (ret > 0) {
6051 node = path->nodes[0];
6052 slot = path->slots[0];
6053 btrfs_item_key_to_cpu(node, &key, slot);
6054 if (key.objectid != inode_id)
6058 case BTRFS_INODE_REF_KEY:
6059 ret = check_inode_ref(root, &key, path, namebuf,
6060 &name_len, &refs, mode);
6063 case BTRFS_INODE_EXTREF_KEY:
6064 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6065 warning("root %llu EXTREF[%llu %llu] isn't supported",
6066 root->objectid, key.objectid,
6068 ret = check_inode_extref(root, &key, node, slot, &refs,
6072 case BTRFS_DIR_ITEM_KEY:
6073 case BTRFS_DIR_INDEX_KEY:
6075 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6076 root->objectid, inode_id,
6077 imode_to_type(mode), key.objectid,
6080 ret = check_dir_item(root, &key, path, &size, ext_ref);
6083 case BTRFS_EXTENT_DATA_KEY:
6085 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6086 root->objectid, inode_id, key.objectid,
6089 ret = check_file_extent(root, &key, node, slot,
6090 nodatasum, &extent_size,
6094 case BTRFS_XATTR_ITEM_KEY:
6097 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6098 key.objectid, key.type, key.offset);
6103 if (err & LAST_ITEM) {
6104 btrfs_release_path(path);
6105 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6110 /* verify INODE_ITEM nlink/isize/nbytes */
6112 if (repair && (err & DIR_COUNT_AGAIN)) {
6113 err &= ~DIR_COUNT_AGAIN;
6114 count_dir_isize(root, inode_id, &size);
6117 if ((nlink != 1 || refs != 1) && repair) {
6118 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6119 namebuf, name_len, refs, imode_to_type(mode),
6124 err |= LINK_COUNT_ERROR;
6125 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6126 root->objectid, inode_id, nlink);
6130 * Just a warning, as dir inode nbytes is just an
6131 * instructive value.
6133 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6134 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6135 root->objectid, inode_id,
6136 root->fs_info->nodesize);
6139 if (isize != size) {
6141 ret = repair_dir_isize_lowmem(root, path,
6143 if (!repair || ret) {
6146 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6147 root->objectid, inode_id, isize, size);
6151 if (nlink != refs) {
6153 ret = repair_inode_nlinks_lowmem(root, path,
6154 inode_id, namebuf, name_len, refs,
6155 imode_to_type(mode), &nlink);
6156 if (!repair || ret) {
6157 err |= LINK_COUNT_ERROR;
6159 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6160 root->objectid, inode_id, nlink, refs);
6162 } else if (!nlink) {
6164 ret = repair_inode_orphan_item_lowmem(root,
6166 if (!repair || ret) {
6168 error("root %llu INODE[%llu] is orphan item",
6169 root->objectid, inode_id);
6173 if (!nbytes && !no_holes && extent_end < isize) {
6175 ret = punch_extent_hole(root, inode_id,
6176 extent_end, isize - extent_end);
6177 if (!repair || ret) {
6178 err |= NBYTES_ERROR;
6180 "root %llu INODE[%llu] size %llu should have a file extent hole",
6181 root->objectid, inode_id, isize);
6185 if (nbytes != extent_size) {
6187 ret = repair_inode_nbytes_lowmem(root, path,
6188 inode_id, extent_size);
6189 if (!repair || ret) {
6190 err |= NBYTES_ERROR;
6192 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6193 root->objectid, inode_id, nbytes,
6199 if (err & LAST_ITEM)
6200 btrfs_next_item(root, path);
6205 * Insert the missing inode item and inode ref.
6207 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6208 * Root dir should be handled specially because root dir is the root of fs.
6210 * returns err (>0 or 0) after repair
6212 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6214 struct btrfs_trans_handle *trans;
6215 struct btrfs_key key;
6216 struct btrfs_path path;
6217 int filetype = BTRFS_FT_DIR;
6220 btrfs_init_path(&path);
6222 if (err & INODE_REF_MISSING) {
6223 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6224 key.type = BTRFS_INODE_REF_KEY;
6225 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6227 trans = btrfs_start_transaction(root, 1);
6228 if (IS_ERR(trans)) {
6229 ret = PTR_ERR(trans);
6233 btrfs_release_path(&path);
6234 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6238 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6239 BTRFS_FIRST_FREE_OBJECTID,
6240 BTRFS_FIRST_FREE_OBJECTID, 0);
6244 printf("Add INODE_REF[%llu %llu] name %s\n",
6245 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6247 err &= ~INODE_REF_MISSING;
6250 error("fail to insert first inode's ref");
6251 btrfs_commit_transaction(trans, root);
6254 if (err & INODE_ITEM_MISSING) {
6255 ret = repair_inode_item_missing(root,
6256 BTRFS_FIRST_FREE_OBJECTID, filetype);
6259 err &= ~INODE_ITEM_MISSING;
6263 error("fail to repair first inode");
6264 btrfs_release_path(&path);
6269 * check first root dir's inode_item and inode_ref
6271 * returns 0 means no error
6272 * returns >0 means error
6273 * returns <0 means fatal error
6275 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6277 struct btrfs_path path;
6278 struct btrfs_key key;
6279 struct btrfs_inode_item *ii;
6285 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6286 key.type = BTRFS_INODE_ITEM_KEY;
6289 /* For root being dropped, we don't need to check first inode */
6290 if (btrfs_root_refs(&root->root_item) == 0 &&
6291 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6292 BTRFS_FIRST_FREE_OBJECTID)
6295 btrfs_init_path(&path);
6296 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6301 err |= INODE_ITEM_MISSING;
6303 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6304 struct btrfs_inode_item);
6305 mode = btrfs_inode_mode(path.nodes[0], ii);
6306 if (imode_to_type(mode) != BTRFS_FT_DIR)
6307 err |= INODE_ITEM_MISMATCH;
6310 /* lookup first inode ref */
6311 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6312 key.type = BTRFS_INODE_REF_KEY;
6313 /* special index value */
6316 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6322 btrfs_release_path(&path);
6325 err = repair_fs_first_inode(root, err);
6327 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6328 error("root dir INODE_ITEM is %s",
6329 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6330 if (err & INODE_REF_MISSING)
6331 error("root dir INODE_REF is missing");
6333 return ret < 0 ? ret : err;
6336 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6337 u64 parent, u64 root)
6339 struct rb_node *node;
6340 struct tree_backref *back = NULL;
6341 struct tree_backref match = {
6348 match.parent = parent;
6349 match.node.full_backref = 1;
6354 node = rb_search(&rec->backref_tree, &match.node.node,
6355 (rb_compare_keys)compare_extent_backref, NULL);
6357 back = to_tree_backref(rb_node_to_extent_backref(node));
6362 static struct data_backref *find_data_backref(struct extent_record *rec,
6363 u64 parent, u64 root,
6364 u64 owner, u64 offset,
6366 u64 disk_bytenr, u64 bytes)
6368 struct rb_node *node;
6369 struct data_backref *back = NULL;
6370 struct data_backref match = {
6377 .found_ref = found_ref,
6378 .disk_bytenr = disk_bytenr,
6382 match.parent = parent;
6383 match.node.full_backref = 1;
6388 node = rb_search(&rec->backref_tree, &match.node.node,
6389 (rb_compare_keys)compare_extent_backref, NULL);
6391 back = to_data_backref(rb_node_to_extent_backref(node));
6396 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6397 * blocks and integrity of fs tree items.
6399 * @root: the root of the tree to be checked.
6400 * @ext_ref feature EXTENDED_IREF is enable or not.
6401 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6402 * otherwise means check fs tree(s) items relationship and
6403 * @root MUST be a fs tree root.
6404 * Returns 0 represents OK.
6405 * Returns not 0 represents error.
6407 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6408 struct btrfs_root *root, unsigned int ext_ref,
6412 struct btrfs_path path;
6413 struct node_refs nrefs;
6414 struct btrfs_root_item *root_item = &root->root_item;
6419 memset(&nrefs, 0, sizeof(nrefs));
6422 * We need to manually check the first inode item (256)
6423 * As the following traversal function will only start from
6424 * the first inode item in the leaf, if inode item (256) is
6425 * missing we will skip it forever.
6427 ret = check_fs_first_inode(root, ext_ref);
6433 level = btrfs_header_level(root->node);
6434 btrfs_init_path(&path);
6436 if (btrfs_root_refs(root_item) > 0 ||
6437 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6438 path.nodes[level] = root->node;
6439 path.slots[level] = 0;
6440 extent_buffer_get(root->node);
6442 struct btrfs_key key;
6444 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6445 level = root_item->drop_level;
6446 path.lowest_level = level;
6447 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6454 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6455 ext_ref, check_all);
6459 /* if ret is negative, walk shall stop */
6465 ret = walk_up_tree_v2(root, &path, &level);
6467 /* Normal exit, reset ret to err */
6474 btrfs_release_path(&path);
6478 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6481 * Iterate all items in the tree and call check_inode_item() to check.
6483 * @root: the root of the tree to be checked.
6484 * @ext_ref: the EXTENDED_IREF feature
6486 * Return 0 if no error found.
6487 * Return <0 for error.
6489 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6491 reset_cached_block_groups(root->fs_info);
6492 return check_btrfs_root(NULL, root, ext_ref, 0);
6496 * Find the relative ref for root_ref and root_backref.
6498 * @root: the root of the root tree.
6499 * @ref_key: the key of the root ref.
6501 * Return 0 if no error occurred.
6503 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6504 struct extent_buffer *node, int slot)
6506 struct btrfs_path path;
6507 struct btrfs_key key;
6508 struct btrfs_root_ref *ref;
6509 struct btrfs_root_ref *backref;
6510 char ref_name[BTRFS_NAME_LEN] = {0};
6511 char backref_name[BTRFS_NAME_LEN] = {0};
6517 u32 backref_namelen;
6522 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6523 ref_dirid = btrfs_root_ref_dirid(node, ref);
6524 ref_seq = btrfs_root_ref_sequence(node, ref);
6525 ref_namelen = btrfs_root_ref_name_len(node, ref);
6527 if (ref_namelen <= BTRFS_NAME_LEN) {
6530 len = BTRFS_NAME_LEN;
6531 warning("%s[%llu %llu] ref_name too long",
6532 ref_key->type == BTRFS_ROOT_REF_KEY ?
6533 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6536 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6538 /* Find relative root_ref */
6539 key.objectid = ref_key->offset;
6540 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6541 key.offset = ref_key->objectid;
6543 btrfs_init_path(&path);
6544 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6546 err |= ROOT_REF_MISSING;
6547 error("%s[%llu %llu] couldn't find relative ref",
6548 ref_key->type == BTRFS_ROOT_REF_KEY ?
6549 "ROOT_REF" : "ROOT_BACKREF",
6550 ref_key->objectid, ref_key->offset);
6554 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6555 struct btrfs_root_ref);
6556 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6557 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6558 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6560 if (backref_namelen <= BTRFS_NAME_LEN) {
6561 len = backref_namelen;
6563 len = BTRFS_NAME_LEN;
6564 warning("%s[%llu %llu] ref_name too long",
6565 key.type == BTRFS_ROOT_REF_KEY ?
6566 "ROOT_REF" : "ROOT_BACKREF",
6567 key.objectid, key.offset);
6569 read_extent_buffer(path.nodes[0], backref_name,
6570 (unsigned long)(backref + 1), len);
6572 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6573 ref_namelen != backref_namelen ||
6574 strncmp(ref_name, backref_name, len)) {
6575 err |= ROOT_REF_MISMATCH;
6576 error("%s[%llu %llu] mismatch relative ref",
6577 ref_key->type == BTRFS_ROOT_REF_KEY ?
6578 "ROOT_REF" : "ROOT_BACKREF",
6579 ref_key->objectid, ref_key->offset);
6582 btrfs_release_path(&path);
6587 * Check all fs/file tree in low_memory mode.
6589 * 1. for fs tree root item, call check_fs_root_v2()
6590 * 2. for fs tree root ref/backref, call check_root_ref()
6592 * Return 0 if no error occurred.
6594 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6596 struct btrfs_root *tree_root = fs_info->tree_root;
6597 struct btrfs_root *cur_root = NULL;
6598 struct btrfs_path path;
6599 struct btrfs_key key;
6600 struct extent_buffer *node;
6601 unsigned int ext_ref;
6606 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6608 btrfs_init_path(&path);
6609 key.objectid = BTRFS_FS_TREE_OBJECTID;
6611 key.type = BTRFS_ROOT_ITEM_KEY;
6613 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6617 } else if (ret > 0) {
6623 node = path.nodes[0];
6624 slot = path.slots[0];
6625 btrfs_item_key_to_cpu(node, &key, slot);
6626 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6628 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6629 fs_root_objectid(key.objectid)) {
6630 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6631 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6634 key.offset = (u64)-1;
6635 cur_root = btrfs_read_fs_root(fs_info, &key);
6638 if (IS_ERR(cur_root)) {
6639 error("Fail to read fs/subvol tree: %lld",
6645 ret = check_fs_root_v2(cur_root, ext_ref);
6648 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6649 btrfs_free_fs_root(cur_root);
6650 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6651 key.type == BTRFS_ROOT_BACKREF_KEY) {
6652 ret = check_root_ref(tree_root, &key, node, slot);
6656 ret = btrfs_next_item(tree_root, &path);
6666 btrfs_release_path(&path);
6670 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6671 struct cache_tree *root_cache)
6675 if (!ctx.progress_enabled)
6676 fprintf(stderr, "checking fs roots\n");
6677 if (check_mode == CHECK_MODE_LOWMEM)
6678 ret = check_fs_roots_v2(fs_info);
6680 ret = check_fs_roots(fs_info, root_cache);
6685 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6687 struct extent_backref *back, *tmp;
6688 struct tree_backref *tback;
6689 struct data_backref *dback;
6693 rbtree_postorder_for_each_entry_safe(back, tmp,
6694 &rec->backref_tree, node) {
6695 if (!back->found_extent_tree) {
6699 if (back->is_data) {
6700 dback = to_data_backref(back);
6701 fprintf(stderr, "Data backref %llu %s %llu"
6702 " owner %llu offset %llu num_refs %lu"
6703 " not found in extent tree\n",
6704 (unsigned long long)rec->start,
6705 back->full_backref ?
6707 back->full_backref ?
6708 (unsigned long long)dback->parent:
6709 (unsigned long long)dback->root,
6710 (unsigned long long)dback->owner,
6711 (unsigned long long)dback->offset,
6712 (unsigned long)dback->num_refs);
6714 tback = to_tree_backref(back);
6715 fprintf(stderr, "Tree backref %llu parent %llu"
6716 " root %llu not found in extent tree\n",
6717 (unsigned long long)rec->start,
6718 (unsigned long long)tback->parent,
6719 (unsigned long long)tback->root);
6722 if (!back->is_data && !back->found_ref) {
6726 tback = to_tree_backref(back);
6727 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6728 (unsigned long long)rec->start,
6729 back->full_backref ? "parent" : "root",
6730 back->full_backref ?
6731 (unsigned long long)tback->parent :
6732 (unsigned long long)tback->root, back);
6734 if (back->is_data) {
6735 dback = to_data_backref(back);
6736 if (dback->found_ref != dback->num_refs) {
6740 fprintf(stderr, "Incorrect local backref count"
6741 " on %llu %s %llu owner %llu"
6742 " offset %llu found %u wanted %u back %p\n",
6743 (unsigned long long)rec->start,
6744 back->full_backref ?
6746 back->full_backref ?
6747 (unsigned long long)dback->parent:
6748 (unsigned long long)dback->root,
6749 (unsigned long long)dback->owner,
6750 (unsigned long long)dback->offset,
6751 dback->found_ref, dback->num_refs, back);
6753 if (dback->disk_bytenr != rec->start) {
6757 fprintf(stderr, "Backref disk bytenr does not"
6758 " match extent record, bytenr=%llu, "
6759 "ref bytenr=%llu\n",
6760 (unsigned long long)rec->start,
6761 (unsigned long long)dback->disk_bytenr);
6764 if (dback->bytes != rec->nr) {
6768 fprintf(stderr, "Backref bytes do not match "
6769 "extent backref, bytenr=%llu, ref "
6770 "bytes=%llu, backref bytes=%llu\n",
6771 (unsigned long long)rec->start,
6772 (unsigned long long)rec->nr,
6773 (unsigned long long)dback->bytes);
6776 if (!back->is_data) {
6779 dback = to_data_backref(back);
6780 found += dback->found_ref;
6783 if (found != rec->refs) {
6787 fprintf(stderr, "Incorrect global backref count "
6788 "on %llu found %llu wanted %llu\n",
6789 (unsigned long long)rec->start,
6790 (unsigned long long)found,
6791 (unsigned long long)rec->refs);
6797 static void __free_one_backref(struct rb_node *node)
6799 struct extent_backref *back = rb_node_to_extent_backref(node);
6804 static void free_all_extent_backrefs(struct extent_record *rec)
6806 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6809 static void free_extent_record_cache(struct cache_tree *extent_cache)
6811 struct cache_extent *cache;
6812 struct extent_record *rec;
6815 cache = first_cache_extent(extent_cache);
6818 rec = container_of(cache, struct extent_record, cache);
6819 remove_cache_extent(extent_cache, cache);
6820 free_all_extent_backrefs(rec);
6825 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6826 struct extent_record *rec)
6828 if (rec->content_checked && rec->owner_ref_checked &&
6829 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6830 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6831 !rec->bad_full_backref && !rec->crossing_stripes &&
6832 !rec->wrong_chunk_type) {
6833 remove_cache_extent(extent_cache, &rec->cache);
6834 free_all_extent_backrefs(rec);
6835 list_del_init(&rec->list);
6841 static int check_owner_ref(struct btrfs_root *root,
6842 struct extent_record *rec,
6843 struct extent_buffer *buf)
6845 struct extent_backref *node, *tmp;
6846 struct tree_backref *back;
6847 struct btrfs_root *ref_root;
6848 struct btrfs_key key;
6849 struct btrfs_path path;
6850 struct extent_buffer *parent;
6855 rbtree_postorder_for_each_entry_safe(node, tmp,
6856 &rec->backref_tree, node) {
6859 if (!node->found_ref)
6861 if (node->full_backref)
6863 back = to_tree_backref(node);
6864 if (btrfs_header_owner(buf) == back->root)
6867 BUG_ON(rec->is_root);
6869 /* try to find the block by search corresponding fs tree */
6870 key.objectid = btrfs_header_owner(buf);
6871 key.type = BTRFS_ROOT_ITEM_KEY;
6872 key.offset = (u64)-1;
6874 ref_root = btrfs_read_fs_root(root->fs_info, &key);
6875 if (IS_ERR(ref_root))
6878 level = btrfs_header_level(buf);
6880 btrfs_item_key_to_cpu(buf, &key, 0);
6882 btrfs_node_key_to_cpu(buf, &key, 0);
6884 btrfs_init_path(&path);
6885 path.lowest_level = level + 1;
6886 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6890 parent = path.nodes[level + 1];
6891 if (parent && buf->start == btrfs_node_blockptr(parent,
6892 path.slots[level + 1]))
6895 btrfs_release_path(&path);
6896 return found ? 0 : 1;
6899 static int is_extent_tree_record(struct extent_record *rec)
6901 struct extent_backref *node, *tmp;
6902 struct tree_backref *back;
6905 rbtree_postorder_for_each_entry_safe(node, tmp,
6906 &rec->backref_tree, node) {
6909 back = to_tree_backref(node);
6910 if (node->full_backref)
6912 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6919 static int record_bad_block_io(struct btrfs_fs_info *info,
6920 struct cache_tree *extent_cache,
6923 struct extent_record *rec;
6924 struct cache_extent *cache;
6925 struct btrfs_key key;
6927 cache = lookup_cache_extent(extent_cache, start, len);
6931 rec = container_of(cache, struct extent_record, cache);
6932 if (!is_extent_tree_record(rec))
6935 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6936 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6939 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6940 struct extent_buffer *buf, int slot)
6942 if (btrfs_header_level(buf)) {
6943 struct btrfs_key_ptr ptr1, ptr2;
6945 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6946 sizeof(struct btrfs_key_ptr));
6947 read_extent_buffer(buf, &ptr2,
6948 btrfs_node_key_ptr_offset(slot + 1),
6949 sizeof(struct btrfs_key_ptr));
6950 write_extent_buffer(buf, &ptr1,
6951 btrfs_node_key_ptr_offset(slot + 1),
6952 sizeof(struct btrfs_key_ptr));
6953 write_extent_buffer(buf, &ptr2,
6954 btrfs_node_key_ptr_offset(slot),
6955 sizeof(struct btrfs_key_ptr));
6957 struct btrfs_disk_key key;
6958 btrfs_node_key(buf, &key, 0);
6959 btrfs_fixup_low_keys(root, path, &key,
6960 btrfs_header_level(buf) + 1);
6963 struct btrfs_item *item1, *item2;
6964 struct btrfs_key k1, k2;
6965 char *item1_data, *item2_data;
6966 u32 item1_offset, item2_offset, item1_size, item2_size;
6968 item1 = btrfs_item_nr(slot);
6969 item2 = btrfs_item_nr(slot + 1);
6970 btrfs_item_key_to_cpu(buf, &k1, slot);
6971 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6972 item1_offset = btrfs_item_offset(buf, item1);
6973 item2_offset = btrfs_item_offset(buf, item2);
6974 item1_size = btrfs_item_size(buf, item1);
6975 item2_size = btrfs_item_size(buf, item2);
6977 item1_data = malloc(item1_size);
6980 item2_data = malloc(item2_size);
6986 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6987 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6989 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6990 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6994 btrfs_set_item_offset(buf, item1, item2_offset);
6995 btrfs_set_item_offset(buf, item2, item1_offset);
6996 btrfs_set_item_size(buf, item1, item2_size);
6997 btrfs_set_item_size(buf, item2, item1_size);
6999 path->slots[0] = slot;
7000 btrfs_set_item_key_unsafe(root, path, &k2);
7001 path->slots[0] = slot + 1;
7002 btrfs_set_item_key_unsafe(root, path, &k1);
7007 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7009 struct extent_buffer *buf;
7010 struct btrfs_key k1, k2;
7012 int level = path->lowest_level;
7015 buf = path->nodes[level];
7016 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7018 btrfs_node_key_to_cpu(buf, &k1, i);
7019 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7021 btrfs_item_key_to_cpu(buf, &k1, i);
7022 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7024 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7026 ret = swap_values(root, path, buf, i);
7029 btrfs_mark_buffer_dirty(buf);
7035 static int delete_bogus_item(struct btrfs_root *root,
7036 struct btrfs_path *path,
7037 struct extent_buffer *buf, int slot)
7039 struct btrfs_key key;
7040 int nritems = btrfs_header_nritems(buf);
7042 btrfs_item_key_to_cpu(buf, &key, slot);
7044 /* These are all the keys we can deal with missing. */
7045 if (key.type != BTRFS_DIR_INDEX_KEY &&
7046 key.type != BTRFS_EXTENT_ITEM_KEY &&
7047 key.type != BTRFS_METADATA_ITEM_KEY &&
7048 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7049 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7052 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7053 (unsigned long long)key.objectid, key.type,
7054 (unsigned long long)key.offset, slot, buf->start);
7055 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7056 btrfs_item_nr_offset(slot + 1),
7057 sizeof(struct btrfs_item) *
7058 (nritems - slot - 1));
7059 btrfs_set_header_nritems(buf, nritems - 1);
7061 struct btrfs_disk_key disk_key;
7063 btrfs_item_key(buf, &disk_key, 0);
7064 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7066 btrfs_mark_buffer_dirty(buf);
7070 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7072 struct extent_buffer *buf;
7076 /* We should only get this for leaves */
7077 BUG_ON(path->lowest_level);
7078 buf = path->nodes[0];
7080 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7081 unsigned int shift = 0, offset;
7083 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7084 BTRFS_LEAF_DATA_SIZE(root)) {
7085 if (btrfs_item_end_nr(buf, i) >
7086 BTRFS_LEAF_DATA_SIZE(root)) {
7087 ret = delete_bogus_item(root, path, buf, i);
7090 fprintf(stderr, "item is off the end of the "
7091 "leaf, can't fix\n");
7095 shift = BTRFS_LEAF_DATA_SIZE(root) -
7096 btrfs_item_end_nr(buf, i);
7097 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7098 btrfs_item_offset_nr(buf, i - 1)) {
7099 if (btrfs_item_end_nr(buf, i) >
7100 btrfs_item_offset_nr(buf, i - 1)) {
7101 ret = delete_bogus_item(root, path, buf, i);
7104 fprintf(stderr, "items overlap, can't fix\n");
7108 shift = btrfs_item_offset_nr(buf, i - 1) -
7109 btrfs_item_end_nr(buf, i);
7114 printf("Shifting item nr %d by %u bytes in block %llu\n",
7115 i, shift, (unsigned long long)buf->start);
7116 offset = btrfs_item_offset_nr(buf, i);
7117 memmove_extent_buffer(buf,
7118 btrfs_leaf_data(buf) + offset + shift,
7119 btrfs_leaf_data(buf) + offset,
7120 btrfs_item_size_nr(buf, i));
7121 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7123 btrfs_mark_buffer_dirty(buf);
7127 * We may have moved things, in which case we want to exit so we don't
7128 * write those changes out. Once we have proper abort functionality in
7129 * progs this can be changed to something nicer.
7136 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7137 * then just return -EIO.
7139 static int try_to_fix_bad_block(struct btrfs_root *root,
7140 struct extent_buffer *buf,
7141 enum btrfs_tree_block_status status)
7143 struct btrfs_trans_handle *trans;
7144 struct ulist *roots;
7145 struct ulist_node *node;
7146 struct btrfs_root *search_root;
7147 struct btrfs_path path;
7148 struct ulist_iterator iter;
7149 struct btrfs_key root_key, key;
7152 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7153 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7156 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7160 btrfs_init_path(&path);
7161 ULIST_ITER_INIT(&iter);
7162 while ((node = ulist_next(roots, &iter))) {
7163 root_key.objectid = node->val;
7164 root_key.type = BTRFS_ROOT_ITEM_KEY;
7165 root_key.offset = (u64)-1;
7167 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7174 trans = btrfs_start_transaction(search_root, 0);
7175 if (IS_ERR(trans)) {
7176 ret = PTR_ERR(trans);
7180 path.lowest_level = btrfs_header_level(buf);
7181 path.skip_check_block = 1;
7182 if (path.lowest_level)
7183 btrfs_node_key_to_cpu(buf, &key, 0);
7185 btrfs_item_key_to_cpu(buf, &key, 0);
7186 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7189 btrfs_commit_transaction(trans, search_root);
7192 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7193 ret = fix_key_order(search_root, &path);
7194 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7195 ret = fix_item_offset(search_root, &path);
7197 btrfs_commit_transaction(trans, search_root);
7200 btrfs_release_path(&path);
7201 btrfs_commit_transaction(trans, search_root);
7204 btrfs_release_path(&path);
7208 static int check_block(struct btrfs_root *root,
7209 struct cache_tree *extent_cache,
7210 struct extent_buffer *buf, u64 flags)
7212 struct extent_record *rec;
7213 struct cache_extent *cache;
7214 struct btrfs_key key;
7215 enum btrfs_tree_block_status status;
7219 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7222 rec = container_of(cache, struct extent_record, cache);
7223 rec->generation = btrfs_header_generation(buf);
7225 level = btrfs_header_level(buf);
7226 if (btrfs_header_nritems(buf) > 0) {
7229 btrfs_item_key_to_cpu(buf, &key, 0);
7231 btrfs_node_key_to_cpu(buf, &key, 0);
7233 rec->info_objectid = key.objectid;
7235 rec->info_level = level;
7237 if (btrfs_is_leaf(buf))
7238 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7240 status = btrfs_check_node(root, &rec->parent_key, buf);
7242 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7244 status = try_to_fix_bad_block(root, buf, status);
7245 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7247 fprintf(stderr, "bad block %llu\n",
7248 (unsigned long long)buf->start);
7251 * Signal to callers we need to start the scan over
7252 * again since we'll have cowed blocks.
7257 rec->content_checked = 1;
7258 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7259 rec->owner_ref_checked = 1;
7261 ret = check_owner_ref(root, rec, buf);
7263 rec->owner_ref_checked = 1;
7267 maybe_free_extent_rec(extent_cache, rec);
7272 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7273 u64 parent, u64 root)
7275 struct list_head *cur = rec->backrefs.next;
7276 struct extent_backref *node;
7277 struct tree_backref *back;
7279 while(cur != &rec->backrefs) {
7280 node = to_extent_backref(cur);
7284 back = to_tree_backref(node);
7286 if (!node->full_backref)
7288 if (parent == back->parent)
7291 if (node->full_backref)
7293 if (back->root == root)
7301 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7302 u64 parent, u64 root)
7304 struct tree_backref *ref = malloc(sizeof(*ref));
7308 memset(&ref->node, 0, sizeof(ref->node));
7310 ref->parent = parent;
7311 ref->node.full_backref = 1;
7314 ref->node.full_backref = 0;
7321 static struct data_backref *find_data_backref(struct extent_record *rec,
7322 u64 parent, u64 root,
7323 u64 owner, u64 offset,
7325 u64 disk_bytenr, u64 bytes)
7327 struct list_head *cur = rec->backrefs.next;
7328 struct extent_backref *node;
7329 struct data_backref *back;
7331 while(cur != &rec->backrefs) {
7332 node = to_extent_backref(cur);
7336 back = to_data_backref(node);
7338 if (!node->full_backref)
7340 if (parent == back->parent)
7343 if (node->full_backref)
7345 if (back->root == root && back->owner == owner &&
7346 back->offset == offset) {
7347 if (found_ref && node->found_ref &&
7348 (back->bytes != bytes ||
7349 back->disk_bytenr != disk_bytenr))
7359 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7360 u64 parent, u64 root,
7361 u64 owner, u64 offset,
7364 struct data_backref *ref = malloc(sizeof(*ref));
7368 memset(&ref->node, 0, sizeof(ref->node));
7369 ref->node.is_data = 1;
7372 ref->parent = parent;
7375 ref->node.full_backref = 1;
7379 ref->offset = offset;
7380 ref->node.full_backref = 0;
7382 ref->bytes = max_size;
7385 if (max_size > rec->max_size)
7386 rec->max_size = max_size;
7390 /* Check if the type of extent matches with its chunk */
7391 static void check_extent_type(struct extent_record *rec)
7393 struct btrfs_block_group_cache *bg_cache;
7395 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7399 /* data extent, check chunk directly*/
7400 if (!rec->metadata) {
7401 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7402 rec->wrong_chunk_type = 1;
7406 /* metadata extent, check the obvious case first */
7407 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7408 BTRFS_BLOCK_GROUP_METADATA))) {
7409 rec->wrong_chunk_type = 1;
7414 * Check SYSTEM extent, as it's also marked as metadata, we can only
7415 * make sure it's a SYSTEM extent by its backref
7417 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7418 struct extent_backref *node;
7419 struct tree_backref *tback;
7422 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7423 if (node->is_data) {
7424 /* tree block shouldn't have data backref */
7425 rec->wrong_chunk_type = 1;
7428 tback = container_of(node, struct tree_backref, node);
7430 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7431 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7433 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7434 if (!(bg_cache->flags & bg_type))
7435 rec->wrong_chunk_type = 1;
7440 * Allocate a new extent record, fill default values from @tmpl and insert int
7441 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7442 * the cache, otherwise it fails.
7444 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7445 struct extent_record *tmpl)
7447 struct extent_record *rec;
7450 BUG_ON(tmpl->max_size == 0);
7451 rec = malloc(sizeof(*rec));
7454 rec->start = tmpl->start;
7455 rec->max_size = tmpl->max_size;
7456 rec->nr = max(tmpl->nr, tmpl->max_size);
7457 rec->found_rec = tmpl->found_rec;
7458 rec->content_checked = tmpl->content_checked;
7459 rec->owner_ref_checked = tmpl->owner_ref_checked;
7460 rec->num_duplicates = 0;
7461 rec->metadata = tmpl->metadata;
7462 rec->flag_block_full_backref = FLAG_UNSET;
7463 rec->bad_full_backref = 0;
7464 rec->crossing_stripes = 0;
7465 rec->wrong_chunk_type = 0;
7466 rec->is_root = tmpl->is_root;
7467 rec->refs = tmpl->refs;
7468 rec->extent_item_refs = tmpl->extent_item_refs;
7469 rec->parent_generation = tmpl->parent_generation;
7470 INIT_LIST_HEAD(&rec->backrefs);
7471 INIT_LIST_HEAD(&rec->dups);
7472 INIT_LIST_HEAD(&rec->list);
7473 rec->backref_tree = RB_ROOT;
7474 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7475 rec->cache.start = tmpl->start;
7476 rec->cache.size = tmpl->nr;
7477 ret = insert_cache_extent(extent_cache, &rec->cache);
7482 bytes_used += rec->nr;
7485 rec->crossing_stripes = check_crossing_stripes(global_info,
7486 rec->start, global_info->nodesize);
7487 check_extent_type(rec);
7492 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7494 * - refs - if found, increase refs
7495 * - is_root - if found, set
7496 * - content_checked - if found, set
7497 * - owner_ref_checked - if found, set
7499 * If not found, create a new one, initialize and insert.
7501 static int add_extent_rec(struct cache_tree *extent_cache,
7502 struct extent_record *tmpl)
7504 struct extent_record *rec;
7505 struct cache_extent *cache;
7509 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7511 rec = container_of(cache, struct extent_record, cache);
7515 rec->nr = max(tmpl->nr, tmpl->max_size);
7518 * We need to make sure to reset nr to whatever the extent
7519 * record says was the real size, this way we can compare it to
7522 if (tmpl->found_rec) {
7523 if (tmpl->start != rec->start || rec->found_rec) {
7524 struct extent_record *tmp;
7527 if (list_empty(&rec->list))
7528 list_add_tail(&rec->list,
7529 &duplicate_extents);
7532 * We have to do this song and dance in case we
7533 * find an extent record that falls inside of
7534 * our current extent record but does not have
7535 * the same objectid.
7537 tmp = malloc(sizeof(*tmp));
7540 tmp->start = tmpl->start;
7541 tmp->max_size = tmpl->max_size;
7544 tmp->metadata = tmpl->metadata;
7545 tmp->extent_item_refs = tmpl->extent_item_refs;
7546 INIT_LIST_HEAD(&tmp->list);
7547 list_add_tail(&tmp->list, &rec->dups);
7548 rec->num_duplicates++;
7555 if (tmpl->extent_item_refs && !dup) {
7556 if (rec->extent_item_refs) {
7557 fprintf(stderr, "block %llu rec "
7558 "extent_item_refs %llu, passed %llu\n",
7559 (unsigned long long)tmpl->start,
7560 (unsigned long long)
7561 rec->extent_item_refs,
7562 (unsigned long long)tmpl->extent_item_refs);
7564 rec->extent_item_refs = tmpl->extent_item_refs;
7568 if (tmpl->content_checked)
7569 rec->content_checked = 1;
7570 if (tmpl->owner_ref_checked)
7571 rec->owner_ref_checked = 1;
7572 memcpy(&rec->parent_key, &tmpl->parent_key,
7573 sizeof(tmpl->parent_key));
7574 if (tmpl->parent_generation)
7575 rec->parent_generation = tmpl->parent_generation;
7576 if (rec->max_size < tmpl->max_size)
7577 rec->max_size = tmpl->max_size;
7580 * A metadata extent can't cross stripe_len boundary, otherwise
7581 * kernel scrub won't be able to handle it.
7582 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7586 rec->crossing_stripes = check_crossing_stripes(
7587 global_info, rec->start,
7588 global_info->nodesize);
7589 check_extent_type(rec);
7590 maybe_free_extent_rec(extent_cache, rec);
7594 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7599 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7600 u64 parent, u64 root, int found_ref)
7602 struct extent_record *rec;
7603 struct tree_backref *back;
7604 struct cache_extent *cache;
7606 bool insert = false;
7608 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7610 struct extent_record tmpl;
7612 memset(&tmpl, 0, sizeof(tmpl));
7613 tmpl.start = bytenr;
7618 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7622 /* really a bug in cache_extent implement now */
7623 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7628 rec = container_of(cache, struct extent_record, cache);
7629 if (rec->start != bytenr) {
7631 * Several cause, from unaligned bytenr to over lapping extents
7636 back = find_tree_backref(rec, parent, root);
7638 back = alloc_tree_backref(rec, parent, root);
7645 if (back->node.found_ref) {
7646 fprintf(stderr, "Extent back ref already exists "
7647 "for %llu parent %llu root %llu \n",
7648 (unsigned long long)bytenr,
7649 (unsigned long long)parent,
7650 (unsigned long long)root);
7652 back->node.found_ref = 1;
7654 if (back->node.found_extent_tree) {
7655 fprintf(stderr, "Extent back ref already exists "
7656 "for %llu parent %llu root %llu \n",
7657 (unsigned long long)bytenr,
7658 (unsigned long long)parent,
7659 (unsigned long long)root);
7661 back->node.found_extent_tree = 1;
7664 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7665 compare_extent_backref));
7666 check_extent_type(rec);
7667 maybe_free_extent_rec(extent_cache, rec);
7671 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7672 u64 parent, u64 root, u64 owner, u64 offset,
7673 u32 num_refs, int found_ref, u64 max_size)
7675 struct extent_record *rec;
7676 struct data_backref *back;
7677 struct cache_extent *cache;
7679 bool insert = false;
7681 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7683 struct extent_record tmpl;
7685 memset(&tmpl, 0, sizeof(tmpl));
7686 tmpl.start = bytenr;
7688 tmpl.max_size = max_size;
7690 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7694 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7699 rec = container_of(cache, struct extent_record, cache);
7700 if (rec->max_size < max_size)
7701 rec->max_size = max_size;
7704 * If found_ref is set then max_size is the real size and must match the
7705 * existing refs. So if we have already found a ref then we need to
7706 * make sure that this ref matches the existing one, otherwise we need
7707 * to add a new backref so we can notice that the backrefs don't match
7708 * and we need to figure out who is telling the truth. This is to
7709 * account for that awful fsync bug I introduced where we'd end up with
7710 * a btrfs_file_extent_item that would have its length include multiple
7711 * prealloc extents or point inside of a prealloc extent.
7713 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7716 back = alloc_data_backref(rec, parent, root, owner, offset,
7723 BUG_ON(num_refs != 1);
7724 if (back->node.found_ref)
7725 BUG_ON(back->bytes != max_size);
7726 back->node.found_ref = 1;
7727 back->found_ref += 1;
7728 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7729 back->bytes = max_size;
7730 back->disk_bytenr = bytenr;
7732 /* Need to reinsert if not already in the tree */
7734 rb_erase(&back->node.node, &rec->backref_tree);
7739 rec->content_checked = 1;
7740 rec->owner_ref_checked = 1;
7742 if (back->node.found_extent_tree) {
7743 fprintf(stderr, "Extent back ref already exists "
7744 "for %llu parent %llu root %llu "
7745 "owner %llu offset %llu num_refs %lu\n",
7746 (unsigned long long)bytenr,
7747 (unsigned long long)parent,
7748 (unsigned long long)root,
7749 (unsigned long long)owner,
7750 (unsigned long long)offset,
7751 (unsigned long)num_refs);
7753 back->num_refs = num_refs;
7754 back->node.found_extent_tree = 1;
7757 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7758 compare_extent_backref));
7760 maybe_free_extent_rec(extent_cache, rec);
7764 static int add_pending(struct cache_tree *pending,
7765 struct cache_tree *seen, u64 bytenr, u32 size)
7768 ret = add_cache_extent(seen, bytenr, size);
7771 add_cache_extent(pending, bytenr, size);
7775 static int pick_next_pending(struct cache_tree *pending,
7776 struct cache_tree *reada,
7777 struct cache_tree *nodes,
7778 u64 last, struct block_info *bits, int bits_nr,
7781 unsigned long node_start = last;
7782 struct cache_extent *cache;
7785 cache = search_cache_extent(reada, 0);
7787 bits[0].start = cache->start;
7788 bits[0].size = cache->size;
7793 if (node_start > 32768)
7794 node_start -= 32768;
7796 cache = search_cache_extent(nodes, node_start);
7798 cache = search_cache_extent(nodes, 0);
7801 cache = search_cache_extent(pending, 0);
7806 bits[ret].start = cache->start;
7807 bits[ret].size = cache->size;
7808 cache = next_cache_extent(cache);
7810 } while (cache && ret < bits_nr);
7816 bits[ret].start = cache->start;
7817 bits[ret].size = cache->size;
7818 cache = next_cache_extent(cache);
7820 } while (cache && ret < bits_nr);
7822 if (bits_nr - ret > 8) {
7823 u64 lookup = bits[0].start + bits[0].size;
7824 struct cache_extent *next;
7825 next = search_cache_extent(pending, lookup);
7827 if (next->start - lookup > 32768)
7829 bits[ret].start = next->start;
7830 bits[ret].size = next->size;
7831 lookup = next->start + next->size;
7835 next = next_cache_extent(next);
7843 static void free_chunk_record(struct cache_extent *cache)
7845 struct chunk_record *rec;
7847 rec = container_of(cache, struct chunk_record, cache);
7848 list_del_init(&rec->list);
7849 list_del_init(&rec->dextents);
7853 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7855 cache_tree_free_extents(chunk_cache, free_chunk_record);
7858 static void free_device_record(struct rb_node *node)
7860 struct device_record *rec;
7862 rec = container_of(node, struct device_record, node);
7866 FREE_RB_BASED_TREE(device_cache, free_device_record);
7868 int insert_block_group_record(struct block_group_tree *tree,
7869 struct block_group_record *bg_rec)
7873 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7877 list_add_tail(&bg_rec->list, &tree->block_groups);
7881 static void free_block_group_record(struct cache_extent *cache)
7883 struct block_group_record *rec;
7885 rec = container_of(cache, struct block_group_record, cache);
7886 list_del_init(&rec->list);
7890 void free_block_group_tree(struct block_group_tree *tree)
7892 cache_tree_free_extents(&tree->tree, free_block_group_record);
7895 int insert_device_extent_record(struct device_extent_tree *tree,
7896 struct device_extent_record *de_rec)
7901 * Device extent is a bit different from the other extents, because
7902 * the extents which belong to the different devices may have the
7903 * same start and size, so we need use the special extent cache
7904 * search/insert functions.
7906 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7910 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7911 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7915 static void free_device_extent_record(struct cache_extent *cache)
7917 struct device_extent_record *rec;
7919 rec = container_of(cache, struct device_extent_record, cache);
7920 if (!list_empty(&rec->chunk_list))
7921 list_del_init(&rec->chunk_list);
7922 if (!list_empty(&rec->device_list))
7923 list_del_init(&rec->device_list);
7927 void free_device_extent_tree(struct device_extent_tree *tree)
7929 cache_tree_free_extents(&tree->tree, free_device_extent_record);
7932 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7933 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7934 struct extent_buffer *leaf, int slot)
7936 struct btrfs_extent_ref_v0 *ref0;
7937 struct btrfs_key key;
7940 btrfs_item_key_to_cpu(leaf, &key, slot);
7941 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7942 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7943 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7946 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7947 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7953 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7954 struct btrfs_key *key,
7957 struct btrfs_chunk *ptr;
7958 struct chunk_record *rec;
7961 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7962 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7964 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7966 fprintf(stderr, "memory allocation failed\n");
7970 INIT_LIST_HEAD(&rec->list);
7971 INIT_LIST_HEAD(&rec->dextents);
7974 rec->cache.start = key->offset;
7975 rec->cache.size = btrfs_chunk_length(leaf, ptr);
7977 rec->generation = btrfs_header_generation(leaf);
7979 rec->objectid = key->objectid;
7980 rec->type = key->type;
7981 rec->offset = key->offset;
7983 rec->length = rec->cache.size;
7984 rec->owner = btrfs_chunk_owner(leaf, ptr);
7985 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7986 rec->type_flags = btrfs_chunk_type(leaf, ptr);
7987 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7988 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7989 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7990 rec->num_stripes = num_stripes;
7991 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7993 for (i = 0; i < rec->num_stripes; ++i) {
7994 rec->stripes[i].devid =
7995 btrfs_stripe_devid_nr(leaf, ptr, i);
7996 rec->stripes[i].offset =
7997 btrfs_stripe_offset_nr(leaf, ptr, i);
7998 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7999 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8006 static int process_chunk_item(struct cache_tree *chunk_cache,
8007 struct btrfs_key *key, struct extent_buffer *eb,
8010 struct chunk_record *rec;
8011 struct btrfs_chunk *chunk;
8014 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8016 * Do extra check for this chunk item,
8018 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8019 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8020 * and owner<->key_type check.
8022 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8025 error("chunk(%llu, %llu) is not valid, ignore it",
8026 key->offset, btrfs_chunk_length(eb, chunk));
8029 rec = btrfs_new_chunk_record(eb, key, slot);
8030 ret = insert_cache_extent(chunk_cache, &rec->cache);
8032 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8033 rec->offset, rec->length);
8040 static int process_device_item(struct rb_root *dev_cache,
8041 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8043 struct btrfs_dev_item *ptr;
8044 struct device_record *rec;
8047 ptr = btrfs_item_ptr(eb,
8048 slot, struct btrfs_dev_item);
8050 rec = malloc(sizeof(*rec));
8052 fprintf(stderr, "memory allocation failed\n");
8056 rec->devid = key->offset;
8057 rec->generation = btrfs_header_generation(eb);
8059 rec->objectid = key->objectid;
8060 rec->type = key->type;
8061 rec->offset = key->offset;
8063 rec->devid = btrfs_device_id(eb, ptr);
8064 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8065 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8067 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8069 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8076 struct block_group_record *
8077 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8080 struct btrfs_block_group_item *ptr;
8081 struct block_group_record *rec;
8083 rec = calloc(1, sizeof(*rec));
8085 fprintf(stderr, "memory allocation failed\n");
8089 rec->cache.start = key->objectid;
8090 rec->cache.size = key->offset;
8092 rec->generation = btrfs_header_generation(leaf);
8094 rec->objectid = key->objectid;
8095 rec->type = key->type;
8096 rec->offset = key->offset;
8098 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8099 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8101 INIT_LIST_HEAD(&rec->list);
8106 static int process_block_group_item(struct block_group_tree *block_group_cache,
8107 struct btrfs_key *key,
8108 struct extent_buffer *eb, int slot)
8110 struct block_group_record *rec;
8113 rec = btrfs_new_block_group_record(eb, key, slot);
8114 ret = insert_block_group_record(block_group_cache, rec);
8116 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8117 rec->objectid, rec->offset);
8124 struct device_extent_record *
8125 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8126 struct btrfs_key *key, int slot)
8128 struct device_extent_record *rec;
8129 struct btrfs_dev_extent *ptr;
8131 rec = calloc(1, sizeof(*rec));
8133 fprintf(stderr, "memory allocation failed\n");
8137 rec->cache.objectid = key->objectid;
8138 rec->cache.start = key->offset;
8140 rec->generation = btrfs_header_generation(leaf);
8142 rec->objectid = key->objectid;
8143 rec->type = key->type;
8144 rec->offset = key->offset;
8146 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8147 rec->chunk_objecteid =
8148 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8150 btrfs_dev_extent_chunk_offset(leaf, ptr);
8151 rec->length = btrfs_dev_extent_length(leaf, ptr);
8152 rec->cache.size = rec->length;
8154 INIT_LIST_HEAD(&rec->chunk_list);
8155 INIT_LIST_HEAD(&rec->device_list);
8161 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8162 struct btrfs_key *key, struct extent_buffer *eb,
8165 struct device_extent_record *rec;
8168 rec = btrfs_new_device_extent_record(eb, key, slot);
8169 ret = insert_device_extent_record(dev_extent_cache, rec);
8172 "Device extent[%llu, %llu, %llu] existed.\n",
8173 rec->objectid, rec->offset, rec->length);
8180 static int process_extent_item(struct btrfs_root *root,
8181 struct cache_tree *extent_cache,
8182 struct extent_buffer *eb, int slot)
8184 struct btrfs_extent_item *ei;
8185 struct btrfs_extent_inline_ref *iref;
8186 struct btrfs_extent_data_ref *dref;
8187 struct btrfs_shared_data_ref *sref;
8188 struct btrfs_key key;
8189 struct extent_record tmpl;
8194 u32 item_size = btrfs_item_size_nr(eb, slot);
8200 btrfs_item_key_to_cpu(eb, &key, slot);
8202 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8204 num_bytes = root->fs_info->nodesize;
8206 num_bytes = key.offset;
8209 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8210 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8211 key.objectid, root->fs_info->sectorsize);
8214 if (item_size < sizeof(*ei)) {
8215 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8216 struct btrfs_extent_item_v0 *ei0;
8217 BUG_ON(item_size != sizeof(*ei0));
8218 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8219 refs = btrfs_extent_refs_v0(eb, ei0);
8223 memset(&tmpl, 0, sizeof(tmpl));
8224 tmpl.start = key.objectid;
8225 tmpl.nr = num_bytes;
8226 tmpl.extent_item_refs = refs;
8227 tmpl.metadata = metadata;
8229 tmpl.max_size = num_bytes;
8231 return add_extent_rec(extent_cache, &tmpl);
8234 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8235 refs = btrfs_extent_refs(eb, ei);
8236 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8240 if (metadata && num_bytes != root->fs_info->nodesize) {
8241 error("ignore invalid metadata extent, length %llu does not equal to %u",
8242 num_bytes, root->fs_info->nodesize);
8245 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8246 error("ignore invalid data extent, length %llu is not aligned to %u",
8247 num_bytes, root->fs_info->sectorsize);
8251 memset(&tmpl, 0, sizeof(tmpl));
8252 tmpl.start = key.objectid;
8253 tmpl.nr = num_bytes;
8254 tmpl.extent_item_refs = refs;
8255 tmpl.metadata = metadata;
8257 tmpl.max_size = num_bytes;
8258 add_extent_rec(extent_cache, &tmpl);
8260 ptr = (unsigned long)(ei + 1);
8261 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8262 key.type == BTRFS_EXTENT_ITEM_KEY)
8263 ptr += sizeof(struct btrfs_tree_block_info);
8265 end = (unsigned long)ei + item_size;
8267 iref = (struct btrfs_extent_inline_ref *)ptr;
8268 type = btrfs_extent_inline_ref_type(eb, iref);
8269 offset = btrfs_extent_inline_ref_offset(eb, iref);
8271 case BTRFS_TREE_BLOCK_REF_KEY:
8272 ret = add_tree_backref(extent_cache, key.objectid,
8276 "add_tree_backref failed (extent items tree block): %s",
8279 case BTRFS_SHARED_BLOCK_REF_KEY:
8280 ret = add_tree_backref(extent_cache, key.objectid,
8284 "add_tree_backref failed (extent items shared block): %s",
8287 case BTRFS_EXTENT_DATA_REF_KEY:
8288 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8289 add_data_backref(extent_cache, key.objectid, 0,
8290 btrfs_extent_data_ref_root(eb, dref),
8291 btrfs_extent_data_ref_objectid(eb,
8293 btrfs_extent_data_ref_offset(eb, dref),
8294 btrfs_extent_data_ref_count(eb, dref),
8297 case BTRFS_SHARED_DATA_REF_KEY:
8298 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8299 add_data_backref(extent_cache, key.objectid, offset,
8301 btrfs_shared_data_ref_count(eb, sref),
8305 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8306 key.objectid, key.type, num_bytes);
8309 ptr += btrfs_extent_inline_ref_size(type);
8316 static int check_cache_range(struct btrfs_root *root,
8317 struct btrfs_block_group_cache *cache,
8318 u64 offset, u64 bytes)
8320 struct btrfs_free_space *entry;
8326 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8327 bytenr = btrfs_sb_offset(i);
8328 ret = btrfs_rmap_block(root->fs_info,
8329 cache->key.objectid, bytenr, 0,
8330 &logical, &nr, &stripe_len);
8335 if (logical[nr] + stripe_len <= offset)
8337 if (offset + bytes <= logical[nr])
8339 if (logical[nr] == offset) {
8340 if (stripe_len >= bytes) {
8344 bytes -= stripe_len;
8345 offset += stripe_len;
8346 } else if (logical[nr] < offset) {
8347 if (logical[nr] + stripe_len >=
8352 bytes = (offset + bytes) -
8353 (logical[nr] + stripe_len);
8354 offset = logical[nr] + stripe_len;
8357 * Could be tricky, the super may land in the
8358 * middle of the area we're checking. First
8359 * check the easiest case, it's at the end.
8361 if (logical[nr] + stripe_len >=
8363 bytes = logical[nr] - offset;
8367 /* Check the left side */
8368 ret = check_cache_range(root, cache,
8370 logical[nr] - offset);
8376 /* Now we continue with the right side */
8377 bytes = (offset + bytes) -
8378 (logical[nr] + stripe_len);
8379 offset = logical[nr] + stripe_len;
8386 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8388 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8389 offset, offset+bytes);
8393 if (entry->offset != offset) {
8394 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8399 if (entry->bytes != bytes) {
8400 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8401 bytes, entry->bytes, offset);
8405 unlink_free_space(cache->free_space_ctl, entry);
8410 static int verify_space_cache(struct btrfs_root *root,
8411 struct btrfs_block_group_cache *cache)
8413 struct btrfs_path path;
8414 struct extent_buffer *leaf;
8415 struct btrfs_key key;
8419 root = root->fs_info->extent_root;
8421 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8423 btrfs_init_path(&path);
8424 key.objectid = last;
8426 key.type = BTRFS_EXTENT_ITEM_KEY;
8427 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8432 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8433 ret = btrfs_next_leaf(root, &path);
8441 leaf = path.nodes[0];
8442 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8443 if (key.objectid >= cache->key.offset + cache->key.objectid)
8445 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8446 key.type != BTRFS_METADATA_ITEM_KEY) {
8451 if (last == key.objectid) {
8452 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8453 last = key.objectid + key.offset;
8455 last = key.objectid + root->fs_info->nodesize;
8460 ret = check_cache_range(root, cache, last,
8461 key.objectid - last);
8464 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8465 last = key.objectid + key.offset;
8467 last = key.objectid + root->fs_info->nodesize;
8471 if (last < cache->key.objectid + cache->key.offset)
8472 ret = check_cache_range(root, cache, last,
8473 cache->key.objectid +
8474 cache->key.offset - last);
8477 btrfs_release_path(&path);
8480 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8481 fprintf(stderr, "There are still entries left in the space "
8489 static int check_space_cache(struct btrfs_root *root)
8491 struct btrfs_block_group_cache *cache;
8492 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8496 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8497 btrfs_super_generation(root->fs_info->super_copy) !=
8498 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8499 printf("cache and super generation don't match, space cache "
8500 "will be invalidated\n");
8504 if (ctx.progress_enabled) {
8505 ctx.tp = TASK_FREE_SPACE;
8506 task_start(ctx.info);
8510 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8514 start = cache->key.objectid + cache->key.offset;
8515 if (!cache->free_space_ctl) {
8516 if (btrfs_init_free_space_ctl(cache,
8517 root->fs_info->sectorsize)) {
8522 btrfs_remove_free_space_cache(cache);
8525 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8526 ret = exclude_super_stripes(root, cache);
8528 fprintf(stderr, "could not exclude super stripes: %s\n",
8533 ret = load_free_space_tree(root->fs_info, cache);
8534 free_excluded_extents(root, cache);
8536 fprintf(stderr, "could not load free space tree: %s\n",
8543 ret = load_free_space_cache(root->fs_info, cache);
8548 ret = verify_space_cache(root, cache);
8550 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8551 cache->key.objectid);
8556 task_stop(ctx.info);
8558 return error ? -EINVAL : 0;
8561 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8562 u64 num_bytes, unsigned long leaf_offset,
8563 struct extent_buffer *eb) {
8565 struct btrfs_fs_info *fs_info = root->fs_info;
8567 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8569 unsigned long csum_offset;
8573 u64 data_checked = 0;
8579 if (num_bytes % fs_info->sectorsize)
8582 data = malloc(num_bytes);
8586 while (offset < num_bytes) {
8589 read_len = num_bytes - offset;
8590 /* read as much space once a time */
8591 ret = read_extent_data(fs_info, data + offset,
8592 bytenr + offset, &read_len, mirror);
8596 /* verify every 4k data's checksum */
8597 while (data_checked < read_len) {
8599 tmp = offset + data_checked;
8601 csum = btrfs_csum_data((char *)data + tmp,
8602 csum, fs_info->sectorsize);
8603 btrfs_csum_final(csum, (u8 *)&csum);
8605 csum_offset = leaf_offset +
8606 tmp / fs_info->sectorsize * csum_size;
8607 read_extent_buffer(eb, (char *)&csum_expected,
8608 csum_offset, csum_size);
8609 /* try another mirror */
8610 if (csum != csum_expected) {
8611 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8612 mirror, bytenr + tmp,
8613 csum, csum_expected);
8614 num_copies = btrfs_num_copies(root->fs_info,
8616 if (mirror < num_copies - 1) {
8621 data_checked += fs_info->sectorsize;
8630 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8633 struct btrfs_path path;
8634 struct extent_buffer *leaf;
8635 struct btrfs_key key;
8638 btrfs_init_path(&path);
8639 key.objectid = bytenr;
8640 key.type = BTRFS_EXTENT_ITEM_KEY;
8641 key.offset = (u64)-1;
8644 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8647 fprintf(stderr, "Error looking up extent record %d\n", ret);
8648 btrfs_release_path(&path);
8651 if (path.slots[0] > 0) {
8654 ret = btrfs_prev_leaf(root, &path);
8657 } else if (ret > 0) {
8664 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8667 * Block group items come before extent items if they have the same
8668 * bytenr, so walk back one more just in case. Dear future traveller,
8669 * first congrats on mastering time travel. Now if it's not too much
8670 * trouble could you go back to 2006 and tell Chris to make the
8671 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8672 * EXTENT_ITEM_KEY please?
8674 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8675 if (path.slots[0] > 0) {
8678 ret = btrfs_prev_leaf(root, &path);
8681 } else if (ret > 0) {
8686 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8690 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8691 ret = btrfs_next_leaf(root, &path);
8693 fprintf(stderr, "Error going to next leaf "
8695 btrfs_release_path(&path);
8701 leaf = path.nodes[0];
8702 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8703 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8707 if (key.objectid + key.offset < bytenr) {
8711 if (key.objectid > bytenr + num_bytes)
8714 if (key.objectid == bytenr) {
8715 if (key.offset >= num_bytes) {
8719 num_bytes -= key.offset;
8720 bytenr += key.offset;
8721 } else if (key.objectid < bytenr) {
8722 if (key.objectid + key.offset >= bytenr + num_bytes) {
8726 num_bytes = (bytenr + num_bytes) -
8727 (key.objectid + key.offset);
8728 bytenr = key.objectid + key.offset;
8730 if (key.objectid + key.offset < bytenr + num_bytes) {
8731 u64 new_start = key.objectid + key.offset;
8732 u64 new_bytes = bytenr + num_bytes - new_start;
8735 * Weird case, the extent is in the middle of
8736 * our range, we'll have to search one side
8737 * and then the other. Not sure if this happens
8738 * in real life, but no harm in coding it up
8739 * anyway just in case.
8741 btrfs_release_path(&path);
8742 ret = check_extent_exists(root, new_start,
8745 fprintf(stderr, "Right section didn't "
8749 num_bytes = key.objectid - bytenr;
8752 num_bytes = key.objectid - bytenr;
8759 if (num_bytes && !ret) {
8760 fprintf(stderr, "There are no extents for csum range "
8761 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8765 btrfs_release_path(&path);
8769 static int check_csums(struct btrfs_root *root)
8771 struct btrfs_path path;
8772 struct extent_buffer *leaf;
8773 struct btrfs_key key;
8774 u64 offset = 0, num_bytes = 0;
8775 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8779 unsigned long leaf_offset;
8781 root = root->fs_info->csum_root;
8782 if (!extent_buffer_uptodate(root->node)) {
8783 fprintf(stderr, "No valid csum tree found\n");
8787 btrfs_init_path(&path);
8788 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8789 key.type = BTRFS_EXTENT_CSUM_KEY;
8791 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8793 fprintf(stderr, "Error searching csum tree %d\n", ret);
8794 btrfs_release_path(&path);
8798 if (ret > 0 && path.slots[0])
8803 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8804 ret = btrfs_next_leaf(root, &path);
8806 fprintf(stderr, "Error going to next leaf "
8813 leaf = path.nodes[0];
8815 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8816 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8821 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8822 csum_size) * root->fs_info->sectorsize;
8823 if (!check_data_csum)
8824 goto skip_csum_check;
8825 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8826 ret = check_extent_csums(root, key.offset, data_len,
8832 offset = key.offset;
8833 } else if (key.offset != offset + num_bytes) {
8834 ret = check_extent_exists(root, offset, num_bytes);
8836 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8837 "there is no extent record\n",
8838 offset, offset+num_bytes);
8841 offset = key.offset;
8844 num_bytes += data_len;
8848 btrfs_release_path(&path);
8852 static int is_dropped_key(struct btrfs_key *key,
8853 struct btrfs_key *drop_key) {
8854 if (key->objectid < drop_key->objectid)
8856 else if (key->objectid == drop_key->objectid) {
8857 if (key->type < drop_key->type)
8859 else if (key->type == drop_key->type) {
8860 if (key->offset < drop_key->offset)
8868 * Here are the rules for FULL_BACKREF.
8870 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8871 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8873 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
8874 * if it happened after the relocation occurred since we'll have dropped the
8875 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8876 * have no real way to know for sure.
8878 * We process the blocks one root at a time, and we start from the lowest root
8879 * objectid and go to the highest. So we can just lookup the owner backref for
8880 * the record and if we don't find it then we know it doesn't exist and we have
8883 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8884 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8885 * be set or not and then we can check later once we've gathered all the refs.
8887 static int calc_extent_flag(struct cache_tree *extent_cache,
8888 struct extent_buffer *buf,
8889 struct root_item_record *ri,
8892 struct extent_record *rec;
8893 struct cache_extent *cache;
8894 struct tree_backref *tback;
8897 cache = lookup_cache_extent(extent_cache, buf->start, 1);
8898 /* we have added this extent before */
8902 rec = container_of(cache, struct extent_record, cache);
8905 * Except file/reloc tree, we can not have
8908 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8913 if (buf->start == ri->bytenr)
8916 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8919 owner = btrfs_header_owner(buf);
8920 if (owner == ri->objectid)
8923 tback = find_tree_backref(rec, 0, owner);
8928 if (rec->flag_block_full_backref != FLAG_UNSET &&
8929 rec->flag_block_full_backref != 0)
8930 rec->bad_full_backref = 1;
8933 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8934 if (rec->flag_block_full_backref != FLAG_UNSET &&
8935 rec->flag_block_full_backref != 1)
8936 rec->bad_full_backref = 1;
8940 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8942 fprintf(stderr, "Invalid key type(");
8943 print_key_type(stderr, 0, key_type);
8944 fprintf(stderr, ") found in root(");
8945 print_objectid(stderr, rootid, 0);
8946 fprintf(stderr, ")\n");
8950 * Check if the key is valid with its extent buffer.
8952 * This is a early check in case invalid key exists in a extent buffer
8953 * This is not comprehensive yet, but should prevent wrong key/item passed
8956 static int check_type_with_root(u64 rootid, u8 key_type)
8959 /* Only valid in chunk tree */
8960 case BTRFS_DEV_ITEM_KEY:
8961 case BTRFS_CHUNK_ITEM_KEY:
8962 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8965 /* valid in csum and log tree */
8966 case BTRFS_CSUM_TREE_OBJECTID:
8967 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8971 case BTRFS_EXTENT_ITEM_KEY:
8972 case BTRFS_METADATA_ITEM_KEY:
8973 case BTRFS_BLOCK_GROUP_ITEM_KEY:
8974 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8977 case BTRFS_ROOT_ITEM_KEY:
8978 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8981 case BTRFS_DEV_EXTENT_KEY:
8982 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8988 report_mismatch_key_root(key_type, rootid);
8992 static int run_next_block(struct btrfs_root *root,
8993 struct block_info *bits,
8996 struct cache_tree *pending,
8997 struct cache_tree *seen,
8998 struct cache_tree *reada,
8999 struct cache_tree *nodes,
9000 struct cache_tree *extent_cache,
9001 struct cache_tree *chunk_cache,
9002 struct rb_root *dev_cache,
9003 struct block_group_tree *block_group_cache,
9004 struct device_extent_tree *dev_extent_cache,
9005 struct root_item_record *ri)
9007 struct btrfs_fs_info *fs_info = root->fs_info;
9008 struct extent_buffer *buf;
9009 struct extent_record *rec = NULL;
9020 struct btrfs_key key;
9021 struct cache_extent *cache;
9024 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9025 bits_nr, &reada_bits);
9030 for(i = 0; i < nritems; i++) {
9031 ret = add_cache_extent(reada, bits[i].start,
9036 /* fixme, get the parent transid */
9037 readahead_tree_block(fs_info, bits[i].start, 0);
9040 *last = bits[0].start;
9041 bytenr = bits[0].start;
9042 size = bits[0].size;
9044 cache = lookup_cache_extent(pending, bytenr, size);
9046 remove_cache_extent(pending, cache);
9049 cache = lookup_cache_extent(reada, bytenr, size);
9051 remove_cache_extent(reada, cache);
9054 cache = lookup_cache_extent(nodes, bytenr, size);
9056 remove_cache_extent(nodes, cache);
9059 cache = lookup_cache_extent(extent_cache, bytenr, size);
9061 rec = container_of(cache, struct extent_record, cache);
9062 gen = rec->parent_generation;
9065 /* fixme, get the real parent transid */
9066 buf = read_tree_block(root->fs_info, bytenr, gen);
9067 if (!extent_buffer_uptodate(buf)) {
9068 record_bad_block_io(root->fs_info,
9069 extent_cache, bytenr, size);
9073 nritems = btrfs_header_nritems(buf);
9076 if (!init_extent_tree) {
9077 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9078 btrfs_header_level(buf), 1, NULL,
9081 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9083 fprintf(stderr, "Couldn't calc extent flags\n");
9084 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9089 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9091 fprintf(stderr, "Couldn't calc extent flags\n");
9092 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9096 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9098 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9099 ri->objectid == btrfs_header_owner(buf)) {
9101 * Ok we got to this block from it's original owner and
9102 * we have FULL_BACKREF set. Relocation can leave
9103 * converted blocks over so this is altogether possible,
9104 * however it's not possible if the generation > the
9105 * last snapshot, so check for this case.
9107 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9108 btrfs_header_generation(buf) > ri->last_snapshot) {
9109 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9110 rec->bad_full_backref = 1;
9115 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9116 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9117 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9118 rec->bad_full_backref = 1;
9122 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9123 rec->flag_block_full_backref = 1;
9127 rec->flag_block_full_backref = 0;
9129 owner = btrfs_header_owner(buf);
9132 ret = check_block(root, extent_cache, buf, flags);
9136 if (btrfs_is_leaf(buf)) {
9137 btree_space_waste += btrfs_leaf_free_space(root, buf);
9138 for (i = 0; i < nritems; i++) {
9139 struct btrfs_file_extent_item *fi;
9140 btrfs_item_key_to_cpu(buf, &key, i);
9142 * Check key type against the leaf owner.
9143 * Could filter quite a lot of early error if
9146 if (check_type_with_root(btrfs_header_owner(buf),
9148 fprintf(stderr, "ignoring invalid key\n");
9151 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9152 process_extent_item(root, extent_cache, buf,
9156 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9157 process_extent_item(root, extent_cache, buf,
9161 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9163 btrfs_item_size_nr(buf, i);
9166 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9167 process_chunk_item(chunk_cache, &key, buf, i);
9170 if (key.type == BTRFS_DEV_ITEM_KEY) {
9171 process_device_item(dev_cache, &key, buf, i);
9174 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9175 process_block_group_item(block_group_cache,
9179 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9180 process_device_extent_item(dev_extent_cache,
9185 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9186 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9187 process_extent_ref_v0(extent_cache, buf, i);
9194 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9195 ret = add_tree_backref(extent_cache,
9196 key.objectid, 0, key.offset, 0);
9199 "add_tree_backref failed (leaf tree block): %s",
9203 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9204 ret = add_tree_backref(extent_cache,
9205 key.objectid, key.offset, 0, 0);
9208 "add_tree_backref failed (leaf shared block): %s",
9212 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9213 struct btrfs_extent_data_ref *ref;
9214 ref = btrfs_item_ptr(buf, i,
9215 struct btrfs_extent_data_ref);
9216 add_data_backref(extent_cache,
9218 btrfs_extent_data_ref_root(buf, ref),
9219 btrfs_extent_data_ref_objectid(buf,
9221 btrfs_extent_data_ref_offset(buf, ref),
9222 btrfs_extent_data_ref_count(buf, ref),
9223 0, root->fs_info->sectorsize);
9226 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9227 struct btrfs_shared_data_ref *ref;
9228 ref = btrfs_item_ptr(buf, i,
9229 struct btrfs_shared_data_ref);
9230 add_data_backref(extent_cache,
9231 key.objectid, key.offset, 0, 0, 0,
9232 btrfs_shared_data_ref_count(buf, ref),
9233 0, root->fs_info->sectorsize);
9236 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9237 struct bad_item *bad;
9239 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9243 bad = malloc(sizeof(struct bad_item));
9246 INIT_LIST_HEAD(&bad->list);
9247 memcpy(&bad->key, &key,
9248 sizeof(struct btrfs_key));
9249 bad->root_id = owner;
9250 list_add_tail(&bad->list, &delete_items);
9253 if (key.type != BTRFS_EXTENT_DATA_KEY)
9255 fi = btrfs_item_ptr(buf, i,
9256 struct btrfs_file_extent_item);
9257 if (btrfs_file_extent_type(buf, fi) ==
9258 BTRFS_FILE_EXTENT_INLINE)
9260 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9263 data_bytes_allocated +=
9264 btrfs_file_extent_disk_num_bytes(buf, fi);
9265 if (data_bytes_allocated < root->fs_info->sectorsize) {
9268 data_bytes_referenced +=
9269 btrfs_file_extent_num_bytes(buf, fi);
9270 add_data_backref(extent_cache,
9271 btrfs_file_extent_disk_bytenr(buf, fi),
9272 parent, owner, key.objectid, key.offset -
9273 btrfs_file_extent_offset(buf, fi), 1, 1,
9274 btrfs_file_extent_disk_num_bytes(buf, fi));
9278 struct btrfs_key first_key;
9280 first_key.objectid = 0;
9283 btrfs_item_key_to_cpu(buf, &first_key, 0);
9284 level = btrfs_header_level(buf);
9285 for (i = 0; i < nritems; i++) {
9286 struct extent_record tmpl;
9288 ptr = btrfs_node_blockptr(buf, i);
9289 size = root->fs_info->nodesize;
9290 btrfs_node_key_to_cpu(buf, &key, i);
9292 if ((level == ri->drop_level)
9293 && is_dropped_key(&key, &ri->drop_key)) {
9298 memset(&tmpl, 0, sizeof(tmpl));
9299 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9300 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9305 tmpl.max_size = size;
9306 ret = add_extent_rec(extent_cache, &tmpl);
9310 ret = add_tree_backref(extent_cache, ptr, parent,
9314 "add_tree_backref failed (non-leaf block): %s",
9320 add_pending(nodes, seen, ptr, size);
9322 add_pending(pending, seen, ptr, size);
9325 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9326 nritems) * sizeof(struct btrfs_key_ptr);
9328 total_btree_bytes += buf->len;
9329 if (fs_root_objectid(btrfs_header_owner(buf)))
9330 total_fs_tree_bytes += buf->len;
9331 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9332 total_extent_tree_bytes += buf->len;
9334 free_extent_buffer(buf);
9338 static int add_root_to_pending(struct extent_buffer *buf,
9339 struct cache_tree *extent_cache,
9340 struct cache_tree *pending,
9341 struct cache_tree *seen,
9342 struct cache_tree *nodes,
9345 struct extent_record tmpl;
9348 if (btrfs_header_level(buf) > 0)
9349 add_pending(nodes, seen, buf->start, buf->len);
9351 add_pending(pending, seen, buf->start, buf->len);
9353 memset(&tmpl, 0, sizeof(tmpl));
9354 tmpl.start = buf->start;
9359 tmpl.max_size = buf->len;
9360 add_extent_rec(extent_cache, &tmpl);
9362 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9363 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9364 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9367 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9372 /* as we fix the tree, we might be deleting blocks that
9373 * we're tracking for repair. This hook makes sure we
9374 * remove any backrefs for blocks as we are fixing them.
9376 static int free_extent_hook(struct btrfs_trans_handle *trans,
9377 struct btrfs_root *root,
9378 u64 bytenr, u64 num_bytes, u64 parent,
9379 u64 root_objectid, u64 owner, u64 offset,
9382 struct extent_record *rec;
9383 struct cache_extent *cache;
9385 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9387 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9388 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9392 rec = container_of(cache, struct extent_record, cache);
9394 struct data_backref *back;
9395 back = find_data_backref(rec, parent, root_objectid, owner,
9396 offset, 1, bytenr, num_bytes);
9399 if (back->node.found_ref) {
9400 back->found_ref -= refs_to_drop;
9402 rec->refs -= refs_to_drop;
9404 if (back->node.found_extent_tree) {
9405 back->num_refs -= refs_to_drop;
9406 if (rec->extent_item_refs)
9407 rec->extent_item_refs -= refs_to_drop;
9409 if (back->found_ref == 0)
9410 back->node.found_ref = 0;
9411 if (back->num_refs == 0)
9412 back->node.found_extent_tree = 0;
9414 if (!back->node.found_extent_tree && back->node.found_ref) {
9415 rb_erase(&back->node.node, &rec->backref_tree);
9419 struct tree_backref *back;
9420 back = find_tree_backref(rec, parent, root_objectid);
9423 if (back->node.found_ref) {
9426 back->node.found_ref = 0;
9428 if (back->node.found_extent_tree) {
9429 if (rec->extent_item_refs)
9430 rec->extent_item_refs--;
9431 back->node.found_extent_tree = 0;
9433 if (!back->node.found_extent_tree && back->node.found_ref) {
9434 rb_erase(&back->node.node, &rec->backref_tree);
9438 maybe_free_extent_rec(extent_cache, rec);
9443 static int delete_extent_records(struct btrfs_trans_handle *trans,
9444 struct btrfs_root *root,
9445 struct btrfs_path *path,
9448 struct btrfs_key key;
9449 struct btrfs_key found_key;
9450 struct extent_buffer *leaf;
9455 key.objectid = bytenr;
9457 key.offset = (u64)-1;
9460 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9467 if (path->slots[0] == 0)
9473 leaf = path->nodes[0];
9474 slot = path->slots[0];
9476 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9477 if (found_key.objectid != bytenr)
9480 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9481 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9482 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9483 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9484 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9485 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9486 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9487 btrfs_release_path(path);
9488 if (found_key.type == 0) {
9489 if (found_key.offset == 0)
9491 key.offset = found_key.offset - 1;
9492 key.type = found_key.type;
9494 key.type = found_key.type - 1;
9495 key.offset = (u64)-1;
9499 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9500 found_key.objectid, found_key.type, found_key.offset);
9502 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9505 btrfs_release_path(path);
9507 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9508 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9509 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9510 found_key.offset : root->fs_info->nodesize;
9512 ret = btrfs_update_block_group(trans, root, bytenr,
9519 btrfs_release_path(path);
9524 * for a single backref, this will allocate a new extent
9525 * and add the backref to it.
9527 static int record_extent(struct btrfs_trans_handle *trans,
9528 struct btrfs_fs_info *info,
9529 struct btrfs_path *path,
9530 struct extent_record *rec,
9531 struct extent_backref *back,
9532 int allocated, u64 flags)
9535 struct btrfs_root *extent_root = info->extent_root;
9536 struct extent_buffer *leaf;
9537 struct btrfs_key ins_key;
9538 struct btrfs_extent_item *ei;
9539 struct data_backref *dback;
9540 struct btrfs_tree_block_info *bi;
9543 rec->max_size = max_t(u64, rec->max_size,
9547 u32 item_size = sizeof(*ei);
9550 item_size += sizeof(*bi);
9552 ins_key.objectid = rec->start;
9553 ins_key.offset = rec->max_size;
9554 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9556 ret = btrfs_insert_empty_item(trans, extent_root, path,
9557 &ins_key, item_size);
9561 leaf = path->nodes[0];
9562 ei = btrfs_item_ptr(leaf, path->slots[0],
9563 struct btrfs_extent_item);
9565 btrfs_set_extent_refs(leaf, ei, 0);
9566 btrfs_set_extent_generation(leaf, ei, rec->generation);
9568 if (back->is_data) {
9569 btrfs_set_extent_flags(leaf, ei,
9570 BTRFS_EXTENT_FLAG_DATA);
9572 struct btrfs_disk_key copy_key;;
9574 bi = (struct btrfs_tree_block_info *)(ei + 1);
9575 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9578 btrfs_set_disk_key_objectid(©_key,
9579 rec->info_objectid);
9580 btrfs_set_disk_key_type(©_key, 0);
9581 btrfs_set_disk_key_offset(©_key, 0);
9583 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9584 btrfs_set_tree_block_key(leaf, bi, ©_key);
9586 btrfs_set_extent_flags(leaf, ei,
9587 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9590 btrfs_mark_buffer_dirty(leaf);
9591 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9592 rec->max_size, 1, 0);
9595 btrfs_release_path(path);
9598 if (back->is_data) {
9602 dback = to_data_backref(back);
9603 if (back->full_backref)
9604 parent = dback->parent;
9608 for (i = 0; i < dback->found_ref; i++) {
9609 /* if parent != 0, we're doing a full backref
9610 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9611 * just makes the backref allocator create a data
9614 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9615 rec->start, rec->max_size,
9619 BTRFS_FIRST_FREE_OBJECTID :
9625 fprintf(stderr, "adding new data backref"
9626 " on %llu %s %llu owner %llu"
9627 " offset %llu found %d\n",
9628 (unsigned long long)rec->start,
9629 back->full_backref ?
9631 back->full_backref ?
9632 (unsigned long long)parent :
9633 (unsigned long long)dback->root,
9634 (unsigned long long)dback->owner,
9635 (unsigned long long)dback->offset,
9639 struct tree_backref *tback;
9641 tback = to_tree_backref(back);
9642 if (back->full_backref)
9643 parent = tback->parent;
9647 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9648 rec->start, rec->max_size,
9649 parent, tback->root, 0, 0);
9650 fprintf(stderr, "adding new tree backref on "
9651 "start %llu len %llu parent %llu root %llu\n",
9652 rec->start, rec->max_size, parent, tback->root);
9655 btrfs_release_path(path);
9659 static struct extent_entry *find_entry(struct list_head *entries,
9660 u64 bytenr, u64 bytes)
9662 struct extent_entry *entry = NULL;
9664 list_for_each_entry(entry, entries, list) {
9665 if (entry->bytenr == bytenr && entry->bytes == bytes)
9672 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9674 struct extent_entry *entry, *best = NULL, *prev = NULL;
9676 list_for_each_entry(entry, entries, list) {
9678 * If there are as many broken entries as entries then we know
9679 * not to trust this particular entry.
9681 if (entry->broken == entry->count)
9685 * Special case, when there are only two entries and 'best' is
9695 * If our current entry == best then we can't be sure our best
9696 * is really the best, so we need to keep searching.
9698 if (best && best->count == entry->count) {
9704 /* Prev == entry, not good enough, have to keep searching */
9705 if (!prev->broken && prev->count == entry->count)
9709 best = (prev->count > entry->count) ? prev : entry;
9710 else if (best->count < entry->count)
9718 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9719 struct data_backref *dback, struct extent_entry *entry)
9721 struct btrfs_trans_handle *trans;
9722 struct btrfs_root *root;
9723 struct btrfs_file_extent_item *fi;
9724 struct extent_buffer *leaf;
9725 struct btrfs_key key;
9729 key.objectid = dback->root;
9730 key.type = BTRFS_ROOT_ITEM_KEY;
9731 key.offset = (u64)-1;
9732 root = btrfs_read_fs_root(info, &key);
9734 fprintf(stderr, "Couldn't find root for our ref\n");
9739 * The backref points to the original offset of the extent if it was
9740 * split, so we need to search down to the offset we have and then walk
9741 * forward until we find the backref we're looking for.
9743 key.objectid = dback->owner;
9744 key.type = BTRFS_EXTENT_DATA_KEY;
9745 key.offset = dback->offset;
9746 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9748 fprintf(stderr, "Error looking up ref %d\n", ret);
9753 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9754 ret = btrfs_next_leaf(root, path);
9756 fprintf(stderr, "Couldn't find our ref, next\n");
9760 leaf = path->nodes[0];
9761 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9762 if (key.objectid != dback->owner ||
9763 key.type != BTRFS_EXTENT_DATA_KEY) {
9764 fprintf(stderr, "Couldn't find our ref, search\n");
9767 fi = btrfs_item_ptr(leaf, path->slots[0],
9768 struct btrfs_file_extent_item);
9769 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9770 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9772 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9777 btrfs_release_path(path);
9779 trans = btrfs_start_transaction(root, 1);
9781 return PTR_ERR(trans);
9784 * Ok we have the key of the file extent we want to fix, now we can cow
9785 * down to the thing and fix it.
9787 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9789 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9790 key.objectid, key.type, key.offset, ret);
9794 fprintf(stderr, "Well that's odd, we just found this key "
9795 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9800 leaf = path->nodes[0];
9801 fi = btrfs_item_ptr(leaf, path->slots[0],
9802 struct btrfs_file_extent_item);
9804 if (btrfs_file_extent_compression(leaf, fi) &&
9805 dback->disk_bytenr != entry->bytenr) {
9806 fprintf(stderr, "Ref doesn't match the record start and is "
9807 "compressed, please take a btrfs-image of this file "
9808 "system and send it to a btrfs developer so they can "
9809 "complete this functionality for bytenr %Lu\n",
9810 dback->disk_bytenr);
9815 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9816 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9817 } else if (dback->disk_bytenr > entry->bytenr) {
9818 u64 off_diff, offset;
9820 off_diff = dback->disk_bytenr - entry->bytenr;
9821 offset = btrfs_file_extent_offset(leaf, fi);
9822 if (dback->disk_bytenr + offset +
9823 btrfs_file_extent_num_bytes(leaf, fi) >
9824 entry->bytenr + entry->bytes) {
9825 fprintf(stderr, "Ref is past the entry end, please "
9826 "take a btrfs-image of this file system and "
9827 "send it to a btrfs developer, ref %Lu\n",
9828 dback->disk_bytenr);
9833 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9834 btrfs_set_file_extent_offset(leaf, fi, offset);
9835 } else if (dback->disk_bytenr < entry->bytenr) {
9838 offset = btrfs_file_extent_offset(leaf, fi);
9839 if (dback->disk_bytenr + offset < entry->bytenr) {
9840 fprintf(stderr, "Ref is before the entry start, please"
9841 " take a btrfs-image of this file system and "
9842 "send it to a btrfs developer, ref %Lu\n",
9843 dback->disk_bytenr);
9848 offset += dback->disk_bytenr;
9849 offset -= entry->bytenr;
9850 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9851 btrfs_set_file_extent_offset(leaf, fi, offset);
9854 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9857 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9858 * only do this if we aren't using compression, otherwise it's a
9861 if (!btrfs_file_extent_compression(leaf, fi))
9862 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9864 printf("ram bytes may be wrong?\n");
9865 btrfs_mark_buffer_dirty(leaf);
9867 err = btrfs_commit_transaction(trans, root);
9868 btrfs_release_path(path);
9869 return ret ? ret : err;
9872 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9873 struct extent_record *rec)
9875 struct extent_backref *back, *tmp;
9876 struct data_backref *dback;
9877 struct extent_entry *entry, *best = NULL;
9880 int broken_entries = 0;
9885 * Metadata is easy and the backrefs should always agree on bytenr and
9886 * size, if not we've got bigger issues.
9891 rbtree_postorder_for_each_entry_safe(back, tmp,
9892 &rec->backref_tree, node) {
9893 if (back->full_backref || !back->is_data)
9896 dback = to_data_backref(back);
9899 * We only pay attention to backrefs that we found a real
9902 if (dback->found_ref == 0)
9906 * For now we only catch when the bytes don't match, not the
9907 * bytenr. We can easily do this at the same time, but I want
9908 * to have a fs image to test on before we just add repair
9909 * functionality willy-nilly so we know we won't screw up the
9913 entry = find_entry(&entries, dback->disk_bytenr,
9916 entry = malloc(sizeof(struct extent_entry));
9921 memset(entry, 0, sizeof(*entry));
9922 entry->bytenr = dback->disk_bytenr;
9923 entry->bytes = dback->bytes;
9924 list_add_tail(&entry->list, &entries);
9929 * If we only have on entry we may think the entries agree when
9930 * in reality they don't so we have to do some extra checking.
9932 if (dback->disk_bytenr != rec->start ||
9933 dback->bytes != rec->nr || back->broken)
9944 /* Yay all the backrefs agree, carry on good sir */
9945 if (nr_entries <= 1 && !mismatch)
9948 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9949 "%Lu\n", rec->start);
9952 * First we want to see if the backrefs can agree amongst themselves who
9953 * is right, so figure out which one of the entries has the highest
9956 best = find_most_right_entry(&entries);
9959 * Ok so we may have an even split between what the backrefs think, so
9960 * this is where we use the extent ref to see what it thinks.
9963 entry = find_entry(&entries, rec->start, rec->nr);
9964 if (!entry && (!broken_entries || !rec->found_rec)) {
9965 fprintf(stderr, "Backrefs don't agree with each other "
9966 "and extent record doesn't agree with anybody,"
9967 " so we can't fix bytenr %Lu bytes %Lu\n",
9968 rec->start, rec->nr);
9971 } else if (!entry) {
9973 * Ok our backrefs were broken, we'll assume this is the
9974 * correct value and add an entry for this range.
9976 entry = malloc(sizeof(struct extent_entry));
9981 memset(entry, 0, sizeof(*entry));
9982 entry->bytenr = rec->start;
9983 entry->bytes = rec->nr;
9984 list_add_tail(&entry->list, &entries);
9988 best = find_most_right_entry(&entries);
9990 fprintf(stderr, "Backrefs and extent record evenly "
9991 "split on who is right, this is going to "
9992 "require user input to fix bytenr %Lu bytes "
9993 "%Lu\n", rec->start, rec->nr);
10000 * I don't think this can happen currently as we'll abort() if we catch
10001 * this case higher up, but in case somebody removes that we still can't
10002 * deal with it properly here yet, so just bail out of that's the case.
10004 if (best->bytenr != rec->start) {
10005 fprintf(stderr, "Extent start and backref starts don't match, "
10006 "please use btrfs-image on this file system and send "
10007 "it to a btrfs developer so they can make fsck fix "
10008 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10009 rec->start, rec->nr);
10015 * Ok great we all agreed on an extent record, let's go find the real
10016 * references and fix up the ones that don't match.
10018 rbtree_postorder_for_each_entry_safe(back, tmp,
10019 &rec->backref_tree, node) {
10020 if (back->full_backref || !back->is_data)
10023 dback = to_data_backref(back);
10026 * Still ignoring backrefs that don't have a real ref attached
10029 if (dback->found_ref == 0)
10032 if (dback->bytes == best->bytes &&
10033 dback->disk_bytenr == best->bytenr)
10036 ret = repair_ref(info, path, dback, best);
10042 * Ok we messed with the actual refs, which means we need to drop our
10043 * entire cache and go back and rescan. I know this is a huge pain and
10044 * adds a lot of extra work, but it's the only way to be safe. Once all
10045 * the backrefs agree we may not need to do anything to the extent
10050 while (!list_empty(&entries)) {
10051 entry = list_entry(entries.next, struct extent_entry, list);
10052 list_del_init(&entry->list);
10058 static int process_duplicates(struct cache_tree *extent_cache,
10059 struct extent_record *rec)
10061 struct extent_record *good, *tmp;
10062 struct cache_extent *cache;
10066 * If we found a extent record for this extent then return, or if we
10067 * have more than one duplicate we are likely going to need to delete
10070 if (rec->found_rec || rec->num_duplicates > 1)
10073 /* Shouldn't happen but just in case */
10074 BUG_ON(!rec->num_duplicates);
10077 * So this happens if we end up with a backref that doesn't match the
10078 * actual extent entry. So either the backref is bad or the extent
10079 * entry is bad. Either way we want to have the extent_record actually
10080 * reflect what we found in the extent_tree, so we need to take the
10081 * duplicate out and use that as the extent_record since the only way we
10082 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10084 remove_cache_extent(extent_cache, &rec->cache);
10086 good = to_extent_record(rec->dups.next);
10087 list_del_init(&good->list);
10088 INIT_LIST_HEAD(&good->backrefs);
10089 INIT_LIST_HEAD(&good->dups);
10090 good->cache.start = good->start;
10091 good->cache.size = good->nr;
10092 good->content_checked = 0;
10093 good->owner_ref_checked = 0;
10094 good->num_duplicates = 0;
10095 good->refs = rec->refs;
10096 list_splice_init(&rec->backrefs, &good->backrefs);
10098 cache = lookup_cache_extent(extent_cache, good->start,
10102 tmp = container_of(cache, struct extent_record, cache);
10105 * If we find another overlapping extent and it's found_rec is
10106 * set then it's a duplicate and we need to try and delete
10109 if (tmp->found_rec || tmp->num_duplicates > 0) {
10110 if (list_empty(&good->list))
10111 list_add_tail(&good->list,
10112 &duplicate_extents);
10113 good->num_duplicates += tmp->num_duplicates + 1;
10114 list_splice_init(&tmp->dups, &good->dups);
10115 list_del_init(&tmp->list);
10116 list_add_tail(&tmp->list, &good->dups);
10117 remove_cache_extent(extent_cache, &tmp->cache);
10122 * Ok we have another non extent item backed extent rec, so lets
10123 * just add it to this extent and carry on like we did above.
10125 good->refs += tmp->refs;
10126 list_splice_init(&tmp->backrefs, &good->backrefs);
10127 remove_cache_extent(extent_cache, &tmp->cache);
10130 ret = insert_cache_extent(extent_cache, &good->cache);
10133 return good->num_duplicates ? 0 : 1;
10136 static int delete_duplicate_records(struct btrfs_root *root,
10137 struct extent_record *rec)
10139 struct btrfs_trans_handle *trans;
10140 LIST_HEAD(delete_list);
10141 struct btrfs_path path;
10142 struct extent_record *tmp, *good, *n;
10145 struct btrfs_key key;
10147 btrfs_init_path(&path);
10150 /* Find the record that covers all of the duplicates. */
10151 list_for_each_entry(tmp, &rec->dups, list) {
10152 if (good->start < tmp->start)
10154 if (good->nr > tmp->nr)
10157 if (tmp->start + tmp->nr < good->start + good->nr) {
10158 fprintf(stderr, "Ok we have overlapping extents that "
10159 "aren't completely covered by each other, this "
10160 "is going to require more careful thought. "
10161 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10162 tmp->start, tmp->nr, good->start, good->nr);
10169 list_add_tail(&rec->list, &delete_list);
10171 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10174 list_move_tail(&tmp->list, &delete_list);
10177 root = root->fs_info->extent_root;
10178 trans = btrfs_start_transaction(root, 1);
10179 if (IS_ERR(trans)) {
10180 ret = PTR_ERR(trans);
10184 list_for_each_entry(tmp, &delete_list, list) {
10185 if (tmp->found_rec == 0)
10187 key.objectid = tmp->start;
10188 key.type = BTRFS_EXTENT_ITEM_KEY;
10189 key.offset = tmp->nr;
10191 /* Shouldn't happen but just in case */
10192 if (tmp->metadata) {
10193 fprintf(stderr, "Well this shouldn't happen, extent "
10194 "record overlaps but is metadata? "
10195 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10199 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10205 ret = btrfs_del_item(trans, root, &path);
10208 btrfs_release_path(&path);
10211 err = btrfs_commit_transaction(trans, root);
10215 while (!list_empty(&delete_list)) {
10216 tmp = to_extent_record(delete_list.next);
10217 list_del_init(&tmp->list);
10223 while (!list_empty(&rec->dups)) {
10224 tmp = to_extent_record(rec->dups.next);
10225 list_del_init(&tmp->list);
10229 btrfs_release_path(&path);
10231 if (!ret && !nr_del)
10232 rec->num_duplicates = 0;
10234 return ret ? ret : nr_del;
10237 static int find_possible_backrefs(struct btrfs_fs_info *info,
10238 struct btrfs_path *path,
10239 struct cache_tree *extent_cache,
10240 struct extent_record *rec)
10242 struct btrfs_root *root;
10243 struct extent_backref *back, *tmp;
10244 struct data_backref *dback;
10245 struct cache_extent *cache;
10246 struct btrfs_file_extent_item *fi;
10247 struct btrfs_key key;
10251 rbtree_postorder_for_each_entry_safe(back, tmp,
10252 &rec->backref_tree, node) {
10253 /* Don't care about full backrefs (poor unloved backrefs) */
10254 if (back->full_backref || !back->is_data)
10257 dback = to_data_backref(back);
10259 /* We found this one, we don't need to do a lookup */
10260 if (dback->found_ref)
10263 key.objectid = dback->root;
10264 key.type = BTRFS_ROOT_ITEM_KEY;
10265 key.offset = (u64)-1;
10267 root = btrfs_read_fs_root(info, &key);
10269 /* No root, definitely a bad ref, skip */
10270 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10272 /* Other err, exit */
10274 return PTR_ERR(root);
10276 key.objectid = dback->owner;
10277 key.type = BTRFS_EXTENT_DATA_KEY;
10278 key.offset = dback->offset;
10279 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10281 btrfs_release_path(path);
10284 /* Didn't find it, we can carry on */
10289 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10290 struct btrfs_file_extent_item);
10291 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10292 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10293 btrfs_release_path(path);
10294 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10296 struct extent_record *tmp;
10297 tmp = container_of(cache, struct extent_record, cache);
10300 * If we found an extent record for the bytenr for this
10301 * particular backref then we can't add it to our
10302 * current extent record. We only want to add backrefs
10303 * that don't have a corresponding extent item in the
10304 * extent tree since they likely belong to this record
10305 * and we need to fix it if it doesn't match bytenrs.
10307 if (tmp->found_rec)
10311 dback->found_ref += 1;
10312 dback->disk_bytenr = bytenr;
10313 dback->bytes = bytes;
10316 * Set this so the verify backref code knows not to trust the
10317 * values in this backref.
10326 * Record orphan data ref into corresponding root.
10328 * Return 0 if the extent item contains data ref and recorded.
10329 * Return 1 if the extent item contains no useful data ref
10330 * On that case, it may contains only shared_dataref or metadata backref
10331 * or the file extent exists(this should be handled by the extent bytenr
10332 * recovery routine)
10333 * Return <0 if something goes wrong.
10335 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10336 struct extent_record *rec)
10338 struct btrfs_key key;
10339 struct btrfs_root *dest_root;
10340 struct extent_backref *back, *tmp;
10341 struct data_backref *dback;
10342 struct orphan_data_extent *orphan;
10343 struct btrfs_path path;
10344 int recorded_data_ref = 0;
10349 btrfs_init_path(&path);
10350 rbtree_postorder_for_each_entry_safe(back, tmp,
10351 &rec->backref_tree, node) {
10352 if (back->full_backref || !back->is_data ||
10353 !back->found_extent_tree)
10355 dback = to_data_backref(back);
10356 if (dback->found_ref)
10358 key.objectid = dback->root;
10359 key.type = BTRFS_ROOT_ITEM_KEY;
10360 key.offset = (u64)-1;
10362 dest_root = btrfs_read_fs_root(fs_info, &key);
10364 /* For non-exist root we just skip it */
10365 if (IS_ERR(dest_root) || !dest_root)
10368 key.objectid = dback->owner;
10369 key.type = BTRFS_EXTENT_DATA_KEY;
10370 key.offset = dback->offset;
10372 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10373 btrfs_release_path(&path);
10375 * For ret < 0, it's OK since the fs-tree may be corrupted,
10376 * we need to record it for inode/file extent rebuild.
10377 * For ret > 0, we record it only for file extent rebuild.
10378 * For ret == 0, the file extent exists but only bytenr
10379 * mismatch, let the original bytenr fix routine to handle,
10385 orphan = malloc(sizeof(*orphan));
10390 INIT_LIST_HEAD(&orphan->list);
10391 orphan->root = dback->root;
10392 orphan->objectid = dback->owner;
10393 orphan->offset = dback->offset;
10394 orphan->disk_bytenr = rec->cache.start;
10395 orphan->disk_len = rec->cache.size;
10396 list_add(&dest_root->orphan_data_extents, &orphan->list);
10397 recorded_data_ref = 1;
10400 btrfs_release_path(&path);
10402 return !recorded_data_ref;
10408 * when an incorrect extent item is found, this will delete
10409 * all of the existing entries for it and recreate them
10410 * based on what the tree scan found.
10412 static int fixup_extent_refs(struct btrfs_fs_info *info,
10413 struct cache_tree *extent_cache,
10414 struct extent_record *rec)
10416 struct btrfs_trans_handle *trans = NULL;
10418 struct btrfs_path path;
10419 struct cache_extent *cache;
10420 struct extent_backref *back, *tmp;
10424 if (rec->flag_block_full_backref)
10425 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10427 btrfs_init_path(&path);
10428 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10430 * Sometimes the backrefs themselves are so broken they don't
10431 * get attached to any meaningful rec, so first go back and
10432 * check any of our backrefs that we couldn't find and throw
10433 * them into the list if we find the backref so that
10434 * verify_backrefs can figure out what to do.
10436 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10441 /* step one, make sure all of the backrefs agree */
10442 ret = verify_backrefs(info, &path, rec);
10446 trans = btrfs_start_transaction(info->extent_root, 1);
10447 if (IS_ERR(trans)) {
10448 ret = PTR_ERR(trans);
10452 /* step two, delete all the existing records */
10453 ret = delete_extent_records(trans, info->extent_root, &path,
10459 /* was this block corrupt? If so, don't add references to it */
10460 cache = lookup_cache_extent(info->corrupt_blocks,
10461 rec->start, rec->max_size);
10467 /* step three, recreate all the refs we did find */
10468 rbtree_postorder_for_each_entry_safe(back, tmp,
10469 &rec->backref_tree, node) {
10471 * if we didn't find any references, don't create a
10472 * new extent record
10474 if (!back->found_ref)
10477 rec->bad_full_backref = 0;
10478 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10486 int err = btrfs_commit_transaction(trans, info->extent_root);
10492 fprintf(stderr, "Repaired extent references for %llu\n",
10493 (unsigned long long)rec->start);
10495 btrfs_release_path(&path);
10499 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10500 struct extent_record *rec)
10502 struct btrfs_trans_handle *trans;
10503 struct btrfs_root *root = fs_info->extent_root;
10504 struct btrfs_path path;
10505 struct btrfs_extent_item *ei;
10506 struct btrfs_key key;
10510 key.objectid = rec->start;
10511 if (rec->metadata) {
10512 key.type = BTRFS_METADATA_ITEM_KEY;
10513 key.offset = rec->info_level;
10515 key.type = BTRFS_EXTENT_ITEM_KEY;
10516 key.offset = rec->max_size;
10519 trans = btrfs_start_transaction(root, 0);
10521 return PTR_ERR(trans);
10523 btrfs_init_path(&path);
10524 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10526 btrfs_release_path(&path);
10527 btrfs_commit_transaction(trans, root);
10530 fprintf(stderr, "Didn't find extent for %llu\n",
10531 (unsigned long long)rec->start);
10532 btrfs_release_path(&path);
10533 btrfs_commit_transaction(trans, root);
10537 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10538 struct btrfs_extent_item);
10539 flags = btrfs_extent_flags(path.nodes[0], ei);
10540 if (rec->flag_block_full_backref) {
10541 fprintf(stderr, "setting full backref on %llu\n",
10542 (unsigned long long)key.objectid);
10543 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10545 fprintf(stderr, "clearing full backref on %llu\n",
10546 (unsigned long long)key.objectid);
10547 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10549 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10550 btrfs_mark_buffer_dirty(path.nodes[0]);
10551 btrfs_release_path(&path);
10552 ret = btrfs_commit_transaction(trans, root);
10554 fprintf(stderr, "Repaired extent flags for %llu\n",
10555 (unsigned long long)rec->start);
10560 /* right now we only prune from the extent allocation tree */
10561 static int prune_one_block(struct btrfs_trans_handle *trans,
10562 struct btrfs_fs_info *info,
10563 struct btrfs_corrupt_block *corrupt)
10566 struct btrfs_path path;
10567 struct extent_buffer *eb;
10571 int level = corrupt->level + 1;
10573 btrfs_init_path(&path);
10575 /* we want to stop at the parent to our busted block */
10576 path.lowest_level = level;
10578 ret = btrfs_search_slot(trans, info->extent_root,
10579 &corrupt->key, &path, -1, 1);
10584 eb = path.nodes[level];
10591 * hopefully the search gave us the block we want to prune,
10592 * lets try that first
10594 slot = path.slots[level];
10595 found = btrfs_node_blockptr(eb, slot);
10596 if (found == corrupt->cache.start)
10599 nritems = btrfs_header_nritems(eb);
10601 /* the search failed, lets scan this node and hope we find it */
10602 for (slot = 0; slot < nritems; slot++) {
10603 found = btrfs_node_blockptr(eb, slot);
10604 if (found == corrupt->cache.start)
10608 * we couldn't find the bad block. TODO, search all the nodes for pointers
10611 if (eb == info->extent_root->node) {
10616 btrfs_release_path(&path);
10621 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10622 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10625 btrfs_release_path(&path);
10629 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10631 struct btrfs_trans_handle *trans = NULL;
10632 struct cache_extent *cache;
10633 struct btrfs_corrupt_block *corrupt;
10636 cache = search_cache_extent(info->corrupt_blocks, 0);
10640 trans = btrfs_start_transaction(info->extent_root, 1);
10642 return PTR_ERR(trans);
10644 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10645 prune_one_block(trans, info, corrupt);
10646 remove_cache_extent(info->corrupt_blocks, cache);
10649 return btrfs_commit_transaction(trans, info->extent_root);
10653 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10655 struct btrfs_block_group_cache *cache;
10660 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10661 &start, &end, EXTENT_DIRTY);
10664 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10669 cache = btrfs_lookup_first_block_group(fs_info, start);
10674 start = cache->key.objectid + cache->key.offset;
10678 static int check_extent_refs(struct btrfs_root *root,
10679 struct cache_tree *extent_cache)
10681 struct extent_record *rec;
10682 struct cache_extent *cache;
10688 * if we're doing a repair, we have to make sure
10689 * we don't allocate from the problem extents.
10690 * In the worst case, this will be all the
10691 * extents in the FS
10693 cache = search_cache_extent(extent_cache, 0);
10695 rec = container_of(cache, struct extent_record, cache);
10696 set_extent_dirty(root->fs_info->excluded_extents,
10698 rec->start + rec->max_size - 1);
10699 cache = next_cache_extent(cache);
10702 /* pin down all the corrupted blocks too */
10703 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10705 set_extent_dirty(root->fs_info->excluded_extents,
10707 cache->start + cache->size - 1);
10708 cache = next_cache_extent(cache);
10710 prune_corrupt_blocks(root->fs_info);
10711 reset_cached_block_groups(root->fs_info);
10714 reset_cached_block_groups(root->fs_info);
10717 * We need to delete any duplicate entries we find first otherwise we
10718 * could mess up the extent tree when we have backrefs that actually
10719 * belong to a different extent item and not the weird duplicate one.
10721 while (repair && !list_empty(&duplicate_extents)) {
10722 rec = to_extent_record(duplicate_extents.next);
10723 list_del_init(&rec->list);
10725 /* Sometimes we can find a backref before we find an actual
10726 * extent, so we need to process it a little bit to see if there
10727 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10728 * if this is a backref screwup. If we need to delete stuff
10729 * process_duplicates() will return 0, otherwise it will return
10732 if (process_duplicates(extent_cache, rec))
10734 ret = delete_duplicate_records(root, rec);
10738 * delete_duplicate_records will return the number of entries
10739 * deleted, so if it's greater than 0 then we know we actually
10740 * did something and we need to remove.
10753 cache = search_cache_extent(extent_cache, 0);
10756 rec = container_of(cache, struct extent_record, cache);
10757 if (rec->num_duplicates) {
10758 fprintf(stderr, "extent item %llu has multiple extent "
10759 "items\n", (unsigned long long)rec->start);
10763 if (rec->refs != rec->extent_item_refs) {
10764 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10765 (unsigned long long)rec->start,
10766 (unsigned long long)rec->nr);
10767 fprintf(stderr, "extent item %llu, found %llu\n",
10768 (unsigned long long)rec->extent_item_refs,
10769 (unsigned long long)rec->refs);
10770 ret = record_orphan_data_extents(root->fs_info, rec);
10776 if (all_backpointers_checked(rec, 1)) {
10777 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10778 (unsigned long long)rec->start,
10779 (unsigned long long)rec->nr);
10783 if (!rec->owner_ref_checked) {
10784 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10785 (unsigned long long)rec->start,
10786 (unsigned long long)rec->nr);
10791 if (repair && fix) {
10792 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10798 if (rec->bad_full_backref) {
10799 fprintf(stderr, "bad full backref, on [%llu]\n",
10800 (unsigned long long)rec->start);
10802 ret = fixup_extent_flags(root->fs_info, rec);
10810 * Although it's not a extent ref's problem, we reuse this
10811 * routine for error reporting.
10812 * No repair function yet.
10814 if (rec->crossing_stripes) {
10816 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10817 rec->start, rec->start + rec->max_size);
10821 if (rec->wrong_chunk_type) {
10823 "bad extent [%llu, %llu), type mismatch with chunk\n",
10824 rec->start, rec->start + rec->max_size);
10828 remove_cache_extent(extent_cache, cache);
10829 free_all_extent_backrefs(rec);
10830 if (!init_extent_tree && repair && (!cur_err || fix))
10831 clear_extent_dirty(root->fs_info->excluded_extents,
10833 rec->start + rec->max_size - 1);
10838 if (ret && ret != -EAGAIN) {
10839 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10842 struct btrfs_trans_handle *trans;
10844 root = root->fs_info->extent_root;
10845 trans = btrfs_start_transaction(root, 1);
10846 if (IS_ERR(trans)) {
10847 ret = PTR_ERR(trans);
10851 ret = btrfs_fix_block_accounting(trans, root);
10854 ret = btrfs_commit_transaction(trans, root);
10863 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10867 if (type & BTRFS_BLOCK_GROUP_RAID0) {
10868 stripe_size = length;
10869 stripe_size /= num_stripes;
10870 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10871 stripe_size = length * 2;
10872 stripe_size /= num_stripes;
10873 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10874 stripe_size = length;
10875 stripe_size /= (num_stripes - 1);
10876 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10877 stripe_size = length;
10878 stripe_size /= (num_stripes - 2);
10880 stripe_size = length;
10882 return stripe_size;
10886 * Check the chunk with its block group/dev list ref:
10887 * Return 0 if all refs seems valid.
10888 * Return 1 if part of refs seems valid, need later check for rebuild ref
10889 * like missing block group and needs to search extent tree to rebuild them.
10890 * Return -1 if essential refs are missing and unable to rebuild.
10892 static int check_chunk_refs(struct chunk_record *chunk_rec,
10893 struct block_group_tree *block_group_cache,
10894 struct device_extent_tree *dev_extent_cache,
10897 struct cache_extent *block_group_item;
10898 struct block_group_record *block_group_rec;
10899 struct cache_extent *dev_extent_item;
10900 struct device_extent_record *dev_extent_rec;
10904 int metadump_v2 = 0;
10908 block_group_item = lookup_cache_extent(&block_group_cache->tree,
10910 chunk_rec->length);
10911 if (block_group_item) {
10912 block_group_rec = container_of(block_group_item,
10913 struct block_group_record,
10915 if (chunk_rec->length != block_group_rec->offset ||
10916 chunk_rec->offset != block_group_rec->objectid ||
10918 chunk_rec->type_flags != block_group_rec->flags)) {
10921 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10922 chunk_rec->objectid,
10927 chunk_rec->type_flags,
10928 block_group_rec->objectid,
10929 block_group_rec->type,
10930 block_group_rec->offset,
10931 block_group_rec->offset,
10932 block_group_rec->objectid,
10933 block_group_rec->flags);
10936 list_del_init(&block_group_rec->list);
10937 chunk_rec->bg_rec = block_group_rec;
10942 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10943 chunk_rec->objectid,
10948 chunk_rec->type_flags);
10955 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10956 chunk_rec->num_stripes);
10957 for (i = 0; i < chunk_rec->num_stripes; ++i) {
10958 devid = chunk_rec->stripes[i].devid;
10959 offset = chunk_rec->stripes[i].offset;
10960 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10961 devid, offset, length);
10962 if (dev_extent_item) {
10963 dev_extent_rec = container_of(dev_extent_item,
10964 struct device_extent_record,
10966 if (dev_extent_rec->objectid != devid ||
10967 dev_extent_rec->offset != offset ||
10968 dev_extent_rec->chunk_offset != chunk_rec->offset ||
10969 dev_extent_rec->length != length) {
10972 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10973 chunk_rec->objectid,
10976 chunk_rec->stripes[i].devid,
10977 chunk_rec->stripes[i].offset,
10978 dev_extent_rec->objectid,
10979 dev_extent_rec->offset,
10980 dev_extent_rec->length);
10983 list_move(&dev_extent_rec->chunk_list,
10984 &chunk_rec->dextents);
10989 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10990 chunk_rec->objectid,
10993 chunk_rec->stripes[i].devid,
10994 chunk_rec->stripes[i].offset);
11001 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11002 int check_chunks(struct cache_tree *chunk_cache,
11003 struct block_group_tree *block_group_cache,
11004 struct device_extent_tree *dev_extent_cache,
11005 struct list_head *good, struct list_head *bad,
11006 struct list_head *rebuild, int silent)
11008 struct cache_extent *chunk_item;
11009 struct chunk_record *chunk_rec;
11010 struct block_group_record *bg_rec;
11011 struct device_extent_record *dext_rec;
11015 chunk_item = first_cache_extent(chunk_cache);
11016 while (chunk_item) {
11017 chunk_rec = container_of(chunk_item, struct chunk_record,
11019 err = check_chunk_refs(chunk_rec, block_group_cache,
11020 dev_extent_cache, silent);
11023 if (err == 0 && good)
11024 list_add_tail(&chunk_rec->list, good);
11025 if (err > 0 && rebuild)
11026 list_add_tail(&chunk_rec->list, rebuild);
11027 if (err < 0 && bad)
11028 list_add_tail(&chunk_rec->list, bad);
11029 chunk_item = next_cache_extent(chunk_item);
11032 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11035 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11043 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11047 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11048 dext_rec->objectid,
11058 static int check_device_used(struct device_record *dev_rec,
11059 struct device_extent_tree *dext_cache)
11061 struct cache_extent *cache;
11062 struct device_extent_record *dev_extent_rec;
11063 u64 total_byte = 0;
11065 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11067 dev_extent_rec = container_of(cache,
11068 struct device_extent_record,
11070 if (dev_extent_rec->objectid != dev_rec->devid)
11073 list_del_init(&dev_extent_rec->device_list);
11074 total_byte += dev_extent_rec->length;
11075 cache = next_cache_extent(cache);
11078 if (total_byte != dev_rec->byte_used) {
11080 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11081 total_byte, dev_rec->byte_used, dev_rec->objectid,
11082 dev_rec->type, dev_rec->offset);
11089 /* check btrfs_dev_item -> btrfs_dev_extent */
11090 static int check_devices(struct rb_root *dev_cache,
11091 struct device_extent_tree *dev_extent_cache)
11093 struct rb_node *dev_node;
11094 struct device_record *dev_rec;
11095 struct device_extent_record *dext_rec;
11099 dev_node = rb_first(dev_cache);
11101 dev_rec = container_of(dev_node, struct device_record, node);
11102 err = check_device_used(dev_rec, dev_extent_cache);
11106 dev_node = rb_next(dev_node);
11108 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11111 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11112 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11119 static int add_root_item_to_list(struct list_head *head,
11120 u64 objectid, u64 bytenr, u64 last_snapshot,
11121 u8 level, u8 drop_level,
11122 struct btrfs_key *drop_key)
11125 struct root_item_record *ri_rec;
11126 ri_rec = malloc(sizeof(*ri_rec));
11129 ri_rec->bytenr = bytenr;
11130 ri_rec->objectid = objectid;
11131 ri_rec->level = level;
11132 ri_rec->drop_level = drop_level;
11133 ri_rec->last_snapshot = last_snapshot;
11135 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11136 list_add_tail(&ri_rec->list, head);
11141 static void free_root_item_list(struct list_head *list)
11143 struct root_item_record *ri_rec;
11145 while (!list_empty(list)) {
11146 ri_rec = list_first_entry(list, struct root_item_record,
11148 list_del_init(&ri_rec->list);
11153 static int deal_root_from_list(struct list_head *list,
11154 struct btrfs_root *root,
11155 struct block_info *bits,
11157 struct cache_tree *pending,
11158 struct cache_tree *seen,
11159 struct cache_tree *reada,
11160 struct cache_tree *nodes,
11161 struct cache_tree *extent_cache,
11162 struct cache_tree *chunk_cache,
11163 struct rb_root *dev_cache,
11164 struct block_group_tree *block_group_cache,
11165 struct device_extent_tree *dev_extent_cache)
11170 while (!list_empty(list)) {
11171 struct root_item_record *rec;
11172 struct extent_buffer *buf;
11173 rec = list_entry(list->next,
11174 struct root_item_record, list);
11176 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11177 if (!extent_buffer_uptodate(buf)) {
11178 free_extent_buffer(buf);
11182 ret = add_root_to_pending(buf, extent_cache, pending,
11183 seen, nodes, rec->objectid);
11187 * To rebuild extent tree, we need deal with snapshot
11188 * one by one, otherwise we deal with node firstly which
11189 * can maximize readahead.
11192 ret = run_next_block(root, bits, bits_nr, &last,
11193 pending, seen, reada, nodes,
11194 extent_cache, chunk_cache,
11195 dev_cache, block_group_cache,
11196 dev_extent_cache, rec);
11200 free_extent_buffer(buf);
11201 list_del(&rec->list);
11207 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11208 reada, nodes, extent_cache, chunk_cache,
11209 dev_cache, block_group_cache,
11210 dev_extent_cache, NULL);
11220 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11222 struct rb_root dev_cache;
11223 struct cache_tree chunk_cache;
11224 struct block_group_tree block_group_cache;
11225 struct device_extent_tree dev_extent_cache;
11226 struct cache_tree extent_cache;
11227 struct cache_tree seen;
11228 struct cache_tree pending;
11229 struct cache_tree reada;
11230 struct cache_tree nodes;
11231 struct extent_io_tree excluded_extents;
11232 struct cache_tree corrupt_blocks;
11233 struct btrfs_path path;
11234 struct btrfs_key key;
11235 struct btrfs_key found_key;
11237 struct block_info *bits;
11239 struct extent_buffer *leaf;
11241 struct btrfs_root_item ri;
11242 struct list_head dropping_trees;
11243 struct list_head normal_trees;
11244 struct btrfs_root *root1;
11245 struct btrfs_root *root;
11249 root = fs_info->fs_root;
11250 dev_cache = RB_ROOT;
11251 cache_tree_init(&chunk_cache);
11252 block_group_tree_init(&block_group_cache);
11253 device_extent_tree_init(&dev_extent_cache);
11255 cache_tree_init(&extent_cache);
11256 cache_tree_init(&seen);
11257 cache_tree_init(&pending);
11258 cache_tree_init(&nodes);
11259 cache_tree_init(&reada);
11260 cache_tree_init(&corrupt_blocks);
11261 extent_io_tree_init(&excluded_extents);
11262 INIT_LIST_HEAD(&dropping_trees);
11263 INIT_LIST_HEAD(&normal_trees);
11266 fs_info->excluded_extents = &excluded_extents;
11267 fs_info->fsck_extent_cache = &extent_cache;
11268 fs_info->free_extent_hook = free_extent_hook;
11269 fs_info->corrupt_blocks = &corrupt_blocks;
11273 bits = malloc(bits_nr * sizeof(struct block_info));
11279 if (ctx.progress_enabled) {
11280 ctx.tp = TASK_EXTENTS;
11281 task_start(ctx.info);
11285 root1 = fs_info->tree_root;
11286 level = btrfs_header_level(root1->node);
11287 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11288 root1->node->start, 0, level, 0, NULL);
11291 root1 = fs_info->chunk_root;
11292 level = btrfs_header_level(root1->node);
11293 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11294 root1->node->start, 0, level, 0, NULL);
11297 btrfs_init_path(&path);
11300 key.type = BTRFS_ROOT_ITEM_KEY;
11301 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11305 leaf = path.nodes[0];
11306 slot = path.slots[0];
11307 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11308 ret = btrfs_next_leaf(root, &path);
11311 leaf = path.nodes[0];
11312 slot = path.slots[0];
11314 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11315 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11316 unsigned long offset;
11319 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11320 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11321 last_snapshot = btrfs_root_last_snapshot(&ri);
11322 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11323 level = btrfs_root_level(&ri);
11324 ret = add_root_item_to_list(&normal_trees,
11325 found_key.objectid,
11326 btrfs_root_bytenr(&ri),
11327 last_snapshot, level,
11332 level = btrfs_root_level(&ri);
11333 objectid = found_key.objectid;
11334 btrfs_disk_key_to_cpu(&found_key,
11335 &ri.drop_progress);
11336 ret = add_root_item_to_list(&dropping_trees,
11338 btrfs_root_bytenr(&ri),
11339 last_snapshot, level,
11340 ri.drop_level, &found_key);
11347 btrfs_release_path(&path);
11350 * check_block can return -EAGAIN if it fixes something, please keep
11351 * this in mind when dealing with return values from these functions, if
11352 * we get -EAGAIN we want to fall through and restart the loop.
11354 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11355 &seen, &reada, &nodes, &extent_cache,
11356 &chunk_cache, &dev_cache, &block_group_cache,
11357 &dev_extent_cache);
11359 if (ret == -EAGAIN)
11363 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11364 &pending, &seen, &reada, &nodes,
11365 &extent_cache, &chunk_cache, &dev_cache,
11366 &block_group_cache, &dev_extent_cache);
11368 if (ret == -EAGAIN)
11373 ret = check_chunks(&chunk_cache, &block_group_cache,
11374 &dev_extent_cache, NULL, NULL, NULL, 0);
11376 if (ret == -EAGAIN)
11381 ret = check_extent_refs(root, &extent_cache);
11383 if (ret == -EAGAIN)
11388 ret = check_devices(&dev_cache, &dev_extent_cache);
11393 task_stop(ctx.info);
11395 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11396 extent_io_tree_cleanup(&excluded_extents);
11397 fs_info->fsck_extent_cache = NULL;
11398 fs_info->free_extent_hook = NULL;
11399 fs_info->corrupt_blocks = NULL;
11400 fs_info->excluded_extents = NULL;
11403 free_chunk_cache_tree(&chunk_cache);
11404 free_device_cache_tree(&dev_cache);
11405 free_block_group_tree(&block_group_cache);
11406 free_device_extent_tree(&dev_extent_cache);
11407 free_extent_cache_tree(&seen);
11408 free_extent_cache_tree(&pending);
11409 free_extent_cache_tree(&reada);
11410 free_extent_cache_tree(&nodes);
11411 free_root_item_list(&normal_trees);
11412 free_root_item_list(&dropping_trees);
11415 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11416 free_extent_cache_tree(&seen);
11417 free_extent_cache_tree(&pending);
11418 free_extent_cache_tree(&reada);
11419 free_extent_cache_tree(&nodes);
11420 free_chunk_cache_tree(&chunk_cache);
11421 free_block_group_tree(&block_group_cache);
11422 free_device_cache_tree(&dev_cache);
11423 free_device_extent_tree(&dev_extent_cache);
11424 free_extent_record_cache(&extent_cache);
11425 free_root_item_list(&normal_trees);
11426 free_root_item_list(&dropping_trees);
11427 extent_io_tree_cleanup(&excluded_extents);
11432 * Check backrefs of a tree block given by @bytenr or @eb.
11434 * @root: the root containing the @bytenr or @eb
11435 * @eb: tree block extent buffer, can be NULL
11436 * @bytenr: bytenr of the tree block to search
11437 * @level: tree level of the tree block
11438 * @owner: owner of the tree block
11440 * Return >0 for any error found and output error message
11441 * Return 0 for no error found
11443 static int check_tree_block_ref(struct btrfs_root *root,
11444 struct extent_buffer *eb, u64 bytenr,
11445 int level, u64 owner, struct node_refs *nrefs)
11447 struct btrfs_key key;
11448 struct btrfs_root *extent_root = root->fs_info->extent_root;
11449 struct btrfs_path path;
11450 struct btrfs_extent_item *ei;
11451 struct btrfs_extent_inline_ref *iref;
11452 struct extent_buffer *leaf;
11457 int root_level = btrfs_header_level(root->node);
11459 u32 nodesize = root->fs_info->nodesize;
11462 int tree_reloc_root = 0;
11469 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11470 btrfs_header_bytenr(root->node) == bytenr)
11471 tree_reloc_root = 1;
11472 btrfs_init_path(&path);
11473 key.objectid = bytenr;
11474 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11475 key.type = BTRFS_METADATA_ITEM_KEY;
11477 key.type = BTRFS_EXTENT_ITEM_KEY;
11478 key.offset = (u64)-1;
11480 /* Search for the backref in extent tree */
11481 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11483 err |= BACKREF_MISSING;
11486 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11488 err |= BACKREF_MISSING;
11492 leaf = path.nodes[0];
11493 slot = path.slots[0];
11494 btrfs_item_key_to_cpu(leaf, &key, slot);
11496 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11498 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11499 skinny_level = (int)key.offset;
11500 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11502 struct btrfs_tree_block_info *info;
11504 info = (struct btrfs_tree_block_info *)(ei + 1);
11505 skinny_level = btrfs_tree_block_level(leaf, info);
11506 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11515 * Due to the feature of shared tree blocks, if the upper node
11516 * is a fs root or shared node, the extent of checked node may
11517 * not be updated until the next CoW.
11520 strict = should_check_extent_strictly(root, nrefs,
11522 if (!(btrfs_extent_flags(leaf, ei) &
11523 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11525 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11526 key.objectid, nodesize,
11527 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11528 err = BACKREF_MISMATCH;
11530 header_gen = btrfs_header_generation(eb);
11531 extent_gen = btrfs_extent_generation(leaf, ei);
11532 if (header_gen != extent_gen) {
11534 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11535 key.objectid, nodesize, header_gen,
11537 err = BACKREF_MISMATCH;
11539 if (level != skinny_level) {
11541 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11542 key.objectid, nodesize, level, skinny_level);
11543 err = BACKREF_MISMATCH;
11545 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11547 "extent[%llu %u] is referred by other roots than %llu",
11548 key.objectid, nodesize, root->objectid);
11549 err = BACKREF_MISMATCH;
11554 * Iterate the extent/metadata item to find the exact backref
11556 item_size = btrfs_item_size_nr(leaf, slot);
11557 ptr = (unsigned long)iref;
11558 end = (unsigned long)ei + item_size;
11560 while (ptr < end) {
11561 iref = (struct btrfs_extent_inline_ref *)ptr;
11562 type = btrfs_extent_inline_ref_type(leaf, iref);
11563 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11565 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11566 if (offset == root->objectid)
11568 if (!strict && owner == offset)
11570 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11572 * Backref of tree reloc root points to itself, no need
11573 * to check backref any more.
11575 if (tree_reloc_root) {
11579 * Check if the backref points to valid
11582 found_ref = !check_tree_block_ref( root, NULL,
11583 offset, level + 1, owner,
11590 ptr += btrfs_extent_inline_ref_size(type);
11594 * Inlined extent item doesn't have what we need, check
11595 * TREE_BLOCK_REF_KEY
11598 btrfs_release_path(&path);
11599 key.objectid = bytenr;
11600 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11601 key.offset = root->objectid;
11603 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11608 err |= BACKREF_MISSING;
11610 btrfs_release_path(&path);
11611 if (nrefs && strict &&
11612 level < root_level && nrefs->full_backref[level + 1])
11613 parent = nrefs->bytenr[level + 1];
11614 if (eb && (err & BACKREF_MISSING))
11616 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11617 bytenr, nodesize, owner, level,
11618 parent ? "parent" : "root",
11619 parent ? parent : root->objectid);
11624 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11626 * Return >0 any error found and output error message
11627 * Return 0 for no error found
11629 static int check_extent_data_item(struct btrfs_root *root,
11630 struct btrfs_path *pathp,
11631 struct node_refs *nrefs, int account_bytes)
11633 struct btrfs_file_extent_item *fi;
11634 struct extent_buffer *eb = pathp->nodes[0];
11635 struct btrfs_path path;
11636 struct btrfs_root *extent_root = root->fs_info->extent_root;
11637 struct btrfs_key fi_key;
11638 struct btrfs_key dbref_key;
11639 struct extent_buffer *leaf;
11640 struct btrfs_extent_item *ei;
11641 struct btrfs_extent_inline_ref *iref;
11642 struct btrfs_extent_data_ref *dref;
11645 u64 disk_num_bytes;
11646 u64 extent_num_bytes;
11653 int found_dbackref = 0;
11654 int slot = pathp->slots[0];
11659 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11660 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11662 /* Nothing to check for hole and inline data extents */
11663 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11664 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11667 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11668 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11669 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11671 /* Check unaligned disk_num_bytes and num_bytes */
11672 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11674 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11675 fi_key.objectid, fi_key.offset, disk_num_bytes,
11676 root->fs_info->sectorsize);
11677 err |= BYTES_UNALIGNED;
11678 } else if (account_bytes) {
11679 data_bytes_allocated += disk_num_bytes;
11681 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11683 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11684 fi_key.objectid, fi_key.offset, extent_num_bytes,
11685 root->fs_info->sectorsize);
11686 err |= BYTES_UNALIGNED;
11687 } else if (account_bytes) {
11688 data_bytes_referenced += extent_num_bytes;
11690 owner = btrfs_header_owner(eb);
11692 /* Check the extent item of the file extent in extent tree */
11693 btrfs_init_path(&path);
11694 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11695 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11696 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11698 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11702 leaf = path.nodes[0];
11703 slot = path.slots[0];
11704 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11706 extent_flags = btrfs_extent_flags(leaf, ei);
11708 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11710 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11711 disk_bytenr, disk_num_bytes,
11712 BTRFS_EXTENT_FLAG_DATA);
11713 err |= BACKREF_MISMATCH;
11716 /* Check data backref inside that extent item */
11717 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11718 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11719 ptr = (unsigned long)iref;
11720 end = (unsigned long)ei + item_size;
11721 strict = should_check_extent_strictly(root, nrefs, -1);
11723 while (ptr < end) {
11724 iref = (struct btrfs_extent_inline_ref *)ptr;
11725 type = btrfs_extent_inline_ref_type(leaf, iref);
11726 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11728 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11729 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11730 if (ref_root == root->objectid)
11731 found_dbackref = 1;
11732 else if (!strict && owner == ref_root)
11733 found_dbackref = 1;
11734 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11735 found_dbackref = !check_tree_block_ref(root, NULL,
11736 btrfs_extent_inline_ref_offset(leaf, iref),
11740 if (found_dbackref)
11742 ptr += btrfs_extent_inline_ref_size(type);
11745 if (!found_dbackref) {
11746 btrfs_release_path(&path);
11748 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11749 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11750 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11751 dbref_key.offset = hash_extent_data_ref(root->objectid,
11752 fi_key.objectid, fi_key.offset);
11754 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11755 &dbref_key, &path, 0, 0);
11757 found_dbackref = 1;
11761 btrfs_release_path(&path);
11764 * Neither inlined nor EXTENT_DATA_REF found, try
11765 * SHARED_DATA_REF as last chance.
11767 dbref_key.objectid = disk_bytenr;
11768 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11769 dbref_key.offset = eb->start;
11771 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11772 &dbref_key, &path, 0, 0);
11774 found_dbackref = 1;
11780 if (!found_dbackref)
11781 err |= BACKREF_MISSING;
11782 btrfs_release_path(&path);
11783 if (err & BACKREF_MISSING) {
11784 error("data extent[%llu %llu] backref lost",
11785 disk_bytenr, disk_num_bytes);
11791 * Get real tree block level for the case like shared block
11792 * Return >= 0 as tree level
11793 * Return <0 for error
11795 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11797 struct extent_buffer *eb;
11798 struct btrfs_path path;
11799 struct btrfs_key key;
11800 struct btrfs_extent_item *ei;
11807 /* Search extent tree for extent generation and level */
11808 key.objectid = bytenr;
11809 key.type = BTRFS_METADATA_ITEM_KEY;
11810 key.offset = (u64)-1;
11812 btrfs_init_path(&path);
11813 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11816 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11824 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11825 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11826 struct btrfs_extent_item);
11827 flags = btrfs_extent_flags(path.nodes[0], ei);
11828 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11833 /* Get transid for later read_tree_block() check */
11834 transid = btrfs_extent_generation(path.nodes[0], ei);
11836 /* Get backref level as one source */
11837 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11838 backref_level = key.offset;
11840 struct btrfs_tree_block_info *info;
11842 info = (struct btrfs_tree_block_info *)(ei + 1);
11843 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11845 btrfs_release_path(&path);
11847 /* Get level from tree block as an alternative source */
11848 eb = read_tree_block(fs_info, bytenr, transid);
11849 if (!extent_buffer_uptodate(eb)) {
11850 free_extent_buffer(eb);
11853 header_level = btrfs_header_level(eb);
11854 free_extent_buffer(eb);
11856 if (header_level != backref_level)
11858 return header_level;
11861 btrfs_release_path(&path);
11866 * Check if a tree block backref is valid (points to a valid tree block)
11867 * if level == -1, level will be resolved
11868 * Return >0 for any error found and print error message
11870 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11871 u64 bytenr, int level)
11873 struct btrfs_root *root;
11874 struct btrfs_key key;
11875 struct btrfs_path path;
11876 struct extent_buffer *eb;
11877 struct extent_buffer *node;
11878 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11882 /* Query level for level == -1 special case */
11884 level = query_tree_block_level(fs_info, bytenr);
11886 err |= REFERENCER_MISSING;
11890 key.objectid = root_id;
11891 key.type = BTRFS_ROOT_ITEM_KEY;
11892 key.offset = (u64)-1;
11894 root = btrfs_read_fs_root(fs_info, &key);
11895 if (IS_ERR(root)) {
11896 err |= REFERENCER_MISSING;
11900 /* Read out the tree block to get item/node key */
11901 eb = read_tree_block(fs_info, bytenr, 0);
11902 if (!extent_buffer_uptodate(eb)) {
11903 err |= REFERENCER_MISSING;
11904 free_extent_buffer(eb);
11908 /* Empty tree, no need to check key */
11909 if (!btrfs_header_nritems(eb) && !level) {
11910 free_extent_buffer(eb);
11915 btrfs_node_key_to_cpu(eb, &key, 0);
11917 btrfs_item_key_to_cpu(eb, &key, 0);
11919 free_extent_buffer(eb);
11921 btrfs_init_path(&path);
11922 path.lowest_level = level;
11923 /* Search with the first key, to ensure we can reach it */
11924 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11926 err |= REFERENCER_MISSING;
11930 node = path.nodes[level];
11931 if (btrfs_header_bytenr(node) != bytenr) {
11933 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11934 bytenr, nodesize, bytenr,
11935 btrfs_header_bytenr(node));
11936 err |= REFERENCER_MISMATCH;
11938 if (btrfs_header_level(node) != level) {
11940 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11941 bytenr, nodesize, level,
11942 btrfs_header_level(node));
11943 err |= REFERENCER_MISMATCH;
11947 btrfs_release_path(&path);
11949 if (err & REFERENCER_MISSING) {
11951 error("extent [%llu %d] lost referencer (owner: %llu)",
11952 bytenr, nodesize, root_id);
11955 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11956 bytenr, nodesize, root_id, level);
11963 * Check if tree block @eb is tree reloc root.
11964 * Return 0 if it's not or any problem happens
11965 * Return 1 if it's a tree reloc root
11967 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11968 struct extent_buffer *eb)
11970 struct btrfs_root *tree_reloc_root;
11971 struct btrfs_key key;
11972 u64 bytenr = btrfs_header_bytenr(eb);
11973 u64 owner = btrfs_header_owner(eb);
11976 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11977 key.offset = owner;
11978 key.type = BTRFS_ROOT_ITEM_KEY;
11980 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11981 if (IS_ERR(tree_reloc_root))
11984 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11986 btrfs_free_fs_root(tree_reloc_root);
11991 * Check referencer for shared block backref
11992 * If level == -1, this function will resolve the level.
11994 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11995 u64 parent, u64 bytenr, int level)
11997 struct extent_buffer *eb;
11999 int found_parent = 0;
12002 eb = read_tree_block(fs_info, parent, 0);
12003 if (!extent_buffer_uptodate(eb))
12007 level = query_tree_block_level(fs_info, bytenr);
12011 /* It's possible it's a tree reloc root */
12012 if (parent == bytenr) {
12013 if (is_tree_reloc_root(fs_info, eb))
12018 if (level + 1 != btrfs_header_level(eb))
12021 nr = btrfs_header_nritems(eb);
12022 for (i = 0; i < nr; i++) {
12023 if (bytenr == btrfs_node_blockptr(eb, i)) {
12029 free_extent_buffer(eb);
12030 if (!found_parent) {
12032 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12033 bytenr, fs_info->nodesize, parent, level);
12034 return REFERENCER_MISSING;
12040 * Check referencer for normal (inlined) data ref
12041 * If len == 0, it will be resolved by searching in extent tree
12043 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12044 u64 root_id, u64 objectid, u64 offset,
12045 u64 bytenr, u64 len, u32 count)
12047 struct btrfs_root *root;
12048 struct btrfs_root *extent_root = fs_info->extent_root;
12049 struct btrfs_key key;
12050 struct btrfs_path path;
12051 struct extent_buffer *leaf;
12052 struct btrfs_file_extent_item *fi;
12053 u32 found_count = 0;
12058 key.objectid = bytenr;
12059 key.type = BTRFS_EXTENT_ITEM_KEY;
12060 key.offset = (u64)-1;
12062 btrfs_init_path(&path);
12063 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12066 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12069 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12070 if (key.objectid != bytenr ||
12071 key.type != BTRFS_EXTENT_ITEM_KEY)
12074 btrfs_release_path(&path);
12076 key.objectid = root_id;
12077 key.type = BTRFS_ROOT_ITEM_KEY;
12078 key.offset = (u64)-1;
12079 btrfs_init_path(&path);
12081 root = btrfs_read_fs_root(fs_info, &key);
12085 key.objectid = objectid;
12086 key.type = BTRFS_EXTENT_DATA_KEY;
12088 * It can be nasty as data backref offset is
12089 * file offset - file extent offset, which is smaller or
12090 * equal to original backref offset. The only special case is
12091 * overflow. So we need to special check and do further search.
12093 key.offset = offset & (1ULL << 63) ? 0 : offset;
12095 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12100 * Search afterwards to get correct one
12101 * NOTE: As we must do a comprehensive check on the data backref to
12102 * make sure the dref count also matches, we must iterate all file
12103 * extents for that inode.
12106 leaf = path.nodes[0];
12107 slot = path.slots[0];
12109 if (slot >= btrfs_header_nritems(leaf))
12111 btrfs_item_key_to_cpu(leaf, &key, slot);
12112 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12114 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12116 * Except normal disk bytenr and disk num bytes, we still
12117 * need to do extra check on dbackref offset as
12118 * dbackref offset = file_offset - file_extent_offset
12120 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12121 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12122 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12127 ret = btrfs_next_item(root, &path);
12132 btrfs_release_path(&path);
12133 if (found_count != count) {
12135 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12136 bytenr, len, root_id, objectid, offset, count, found_count);
12137 return REFERENCER_MISSING;
12143 * Check if the referencer of a shared data backref exists
12145 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12146 u64 parent, u64 bytenr)
12148 struct extent_buffer *eb;
12149 struct btrfs_key key;
12150 struct btrfs_file_extent_item *fi;
12152 int found_parent = 0;
12155 eb = read_tree_block(fs_info, parent, 0);
12156 if (!extent_buffer_uptodate(eb))
12159 nr = btrfs_header_nritems(eb);
12160 for (i = 0; i < nr; i++) {
12161 btrfs_item_key_to_cpu(eb, &key, i);
12162 if (key.type != BTRFS_EXTENT_DATA_KEY)
12165 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12166 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12169 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12176 free_extent_buffer(eb);
12177 if (!found_parent) {
12178 error("shared extent %llu referencer lost (parent: %llu)",
12180 return REFERENCER_MISSING;
12186 * Only delete backref if REFERENCER_MISSING now
12188 * Returns <0 the extent was deleted
12189 * Returns >0 the backref was deleted but extent still exists, returned value
12190 * means error after repair
12191 * Returns 0 nothing happened
12193 static int repair_extent_item(struct btrfs_trans_handle *trans,
12194 struct btrfs_root *root, struct btrfs_path *path,
12195 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12196 u64 owner, u64 offset, int err)
12198 struct btrfs_key old_key;
12202 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12204 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12205 /* delete the backref */
12206 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12207 num_bytes, parent, root_objectid, owner, offset);
12210 err &= ~REFERENCER_MISSING;
12211 printf("Delete backref in extent [%llu %llu]\n",
12212 bytenr, num_bytes);
12214 error("fail to delete backref in extent [%llu %llu]",
12215 bytenr, num_bytes);
12219 /* btrfs_free_extent may delete the extent */
12220 btrfs_release_path(path);
12221 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12231 * This function will check a given extent item, including its backref and
12232 * itself (like crossing stripe boundary and type)
12234 * Since we don't use extent_record anymore, introduce new error bit
12236 static int check_extent_item(struct btrfs_trans_handle *trans,
12237 struct btrfs_fs_info *fs_info,
12238 struct btrfs_path *path)
12240 struct btrfs_extent_item *ei;
12241 struct btrfs_extent_inline_ref *iref;
12242 struct btrfs_extent_data_ref *dref;
12243 struct extent_buffer *eb = path->nodes[0];
12246 int slot = path->slots[0];
12248 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12249 u32 item_size = btrfs_item_size_nr(eb, slot);
12259 struct btrfs_key key;
12263 btrfs_item_key_to_cpu(eb, &key, slot);
12264 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12265 bytes_used += key.offset;
12266 num_bytes = key.offset;
12268 bytes_used += nodesize;
12269 num_bytes = nodesize;
12272 if (item_size < sizeof(*ei)) {
12274 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12275 * old thing when on disk format is still un-determined.
12276 * No need to care about it anymore
12278 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12282 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12283 flags = btrfs_extent_flags(eb, ei);
12285 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12287 if (metadata && check_crossing_stripes(global_info, key.objectid,
12289 error("bad metadata [%llu, %llu) crossing stripe boundary",
12290 key.objectid, key.objectid + nodesize);
12291 err |= CROSSING_STRIPE_BOUNDARY;
12294 ptr = (unsigned long)(ei + 1);
12296 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12297 /* Old EXTENT_ITEM metadata */
12298 struct btrfs_tree_block_info *info;
12300 info = (struct btrfs_tree_block_info *)ptr;
12301 level = btrfs_tree_block_level(eb, info);
12302 ptr += sizeof(struct btrfs_tree_block_info);
12304 /* New METADATA_ITEM */
12305 level = key.offset;
12307 end = (unsigned long)ei + item_size;
12310 /* Reached extent item end normally */
12314 /* Beyond extent item end, wrong item size */
12316 err |= ITEM_SIZE_MISMATCH;
12317 error("extent item at bytenr %llu slot %d has wrong size",
12326 /* Now check every backref in this extent item */
12327 iref = (struct btrfs_extent_inline_ref *)ptr;
12328 type = btrfs_extent_inline_ref_type(eb, iref);
12329 offset = btrfs_extent_inline_ref_offset(eb, iref);
12331 case BTRFS_TREE_BLOCK_REF_KEY:
12332 root_objectid = offset;
12334 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12338 case BTRFS_SHARED_BLOCK_REF_KEY:
12340 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12344 case BTRFS_EXTENT_DATA_REF_KEY:
12345 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12346 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12347 owner = btrfs_extent_data_ref_objectid(eb, dref);
12348 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12349 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12350 owner_offset, key.objectid, key.offset,
12351 btrfs_extent_data_ref_count(eb, dref));
12354 case BTRFS_SHARED_DATA_REF_KEY:
12356 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12360 error("extent[%llu %d %llu] has unknown ref type: %d",
12361 key.objectid, key.type, key.offset, type);
12362 ret = UNKNOWN_TYPE;
12367 if (err && repair) {
12368 ret = repair_extent_item(trans, fs_info->extent_root, path,
12369 key.objectid, num_bytes, parent, root_objectid,
12370 owner, owner_offset, ret);
12379 ptr += btrfs_extent_inline_ref_size(type);
12387 * Check if a dev extent item is referred correctly by its chunk
12389 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12390 struct extent_buffer *eb, int slot)
12392 struct btrfs_root *chunk_root = fs_info->chunk_root;
12393 struct btrfs_dev_extent *ptr;
12394 struct btrfs_path path;
12395 struct btrfs_key chunk_key;
12396 struct btrfs_key devext_key;
12397 struct btrfs_chunk *chunk;
12398 struct extent_buffer *l;
12402 int found_chunk = 0;
12405 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12406 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12407 length = btrfs_dev_extent_length(eb, ptr);
12409 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12410 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12411 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12413 btrfs_init_path(&path);
12414 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12419 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12420 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12425 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12428 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12429 for (i = 0; i < num_stripes; i++) {
12430 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12431 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12433 if (devid == devext_key.objectid &&
12434 offset == devext_key.offset) {
12440 btrfs_release_path(&path);
12441 if (!found_chunk) {
12443 "device extent[%llu, %llu, %llu] did not find the related chunk",
12444 devext_key.objectid, devext_key.offset, length);
12445 return REFERENCER_MISSING;
12451 * Check if the used space is correct with the dev item
12453 static int check_dev_item(struct btrfs_fs_info *fs_info,
12454 struct extent_buffer *eb, int slot)
12456 struct btrfs_root *dev_root = fs_info->dev_root;
12457 struct btrfs_dev_item *dev_item;
12458 struct btrfs_path path;
12459 struct btrfs_key key;
12460 struct btrfs_dev_extent *ptr;
12466 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12467 dev_id = btrfs_device_id(eb, dev_item);
12468 used = btrfs_device_bytes_used(eb, dev_item);
12470 key.objectid = dev_id;
12471 key.type = BTRFS_DEV_EXTENT_KEY;
12474 btrfs_init_path(&path);
12475 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12477 btrfs_item_key_to_cpu(eb, &key, slot);
12478 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12479 key.objectid, key.type, key.offset);
12480 btrfs_release_path(&path);
12481 return REFERENCER_MISSING;
12484 /* Iterate dev_extents to calculate the used space of a device */
12486 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12489 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12490 if (key.objectid > dev_id)
12492 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12495 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12496 struct btrfs_dev_extent);
12497 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12499 ret = btrfs_next_item(dev_root, &path);
12503 btrfs_release_path(&path);
12505 if (used != total) {
12506 btrfs_item_key_to_cpu(eb, &key, slot);
12508 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12509 total, used, BTRFS_ROOT_TREE_OBJECTID,
12510 BTRFS_DEV_EXTENT_KEY, dev_id);
12511 return ACCOUNTING_MISMATCH;
12517 * Check a block group item with its referener (chunk) and its used space
12518 * with extent/metadata item
12520 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12521 struct extent_buffer *eb, int slot)
12523 struct btrfs_root *extent_root = fs_info->extent_root;
12524 struct btrfs_root *chunk_root = fs_info->chunk_root;
12525 struct btrfs_block_group_item *bi;
12526 struct btrfs_block_group_item bg_item;
12527 struct btrfs_path path;
12528 struct btrfs_key bg_key;
12529 struct btrfs_key chunk_key;
12530 struct btrfs_key extent_key;
12531 struct btrfs_chunk *chunk;
12532 struct extent_buffer *leaf;
12533 struct btrfs_extent_item *ei;
12534 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12542 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12543 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12544 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12545 used = btrfs_block_group_used(&bg_item);
12546 bg_flags = btrfs_block_group_flags(&bg_item);
12548 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12549 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12550 chunk_key.offset = bg_key.objectid;
12552 btrfs_init_path(&path);
12553 /* Search for the referencer chunk */
12554 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12557 "block group[%llu %llu] did not find the related chunk item",
12558 bg_key.objectid, bg_key.offset);
12559 err |= REFERENCER_MISSING;
12561 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12562 struct btrfs_chunk);
12563 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12566 "block group[%llu %llu] related chunk item length does not match",
12567 bg_key.objectid, bg_key.offset);
12568 err |= REFERENCER_MISMATCH;
12571 btrfs_release_path(&path);
12573 /* Search from the block group bytenr */
12574 extent_key.objectid = bg_key.objectid;
12575 extent_key.type = 0;
12576 extent_key.offset = 0;
12578 btrfs_init_path(&path);
12579 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12583 /* Iterate extent tree to account used space */
12585 leaf = path.nodes[0];
12587 /* Search slot can point to the last item beyond leaf nritems */
12588 if (path.slots[0] >= btrfs_header_nritems(leaf))
12591 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12592 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12595 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12596 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12598 if (extent_key.objectid < bg_key.objectid)
12601 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12604 total += extent_key.offset;
12606 ei = btrfs_item_ptr(leaf, path.slots[0],
12607 struct btrfs_extent_item);
12608 flags = btrfs_extent_flags(leaf, ei);
12609 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12610 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12612 "bad extent[%llu, %llu) type mismatch with chunk",
12613 extent_key.objectid,
12614 extent_key.objectid + extent_key.offset);
12615 err |= CHUNK_TYPE_MISMATCH;
12617 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12618 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12619 BTRFS_BLOCK_GROUP_METADATA))) {
12621 "bad extent[%llu, %llu) type mismatch with chunk",
12622 extent_key.objectid,
12623 extent_key.objectid + nodesize);
12624 err |= CHUNK_TYPE_MISMATCH;
12628 ret = btrfs_next_item(extent_root, &path);
12634 btrfs_release_path(&path);
12636 if (total != used) {
12638 "block group[%llu %llu] used %llu but extent items used %llu",
12639 bg_key.objectid, bg_key.offset, used, total);
12640 err |= ACCOUNTING_MISMATCH;
12646 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12647 * FIXME: We still need to repair error of dev_item.
12649 * Returns error after repair.
12651 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12652 struct btrfs_root *chunk_root,
12653 struct btrfs_path *path, int err)
12655 struct btrfs_chunk *chunk;
12656 struct btrfs_key chunk_key;
12657 struct extent_buffer *eb = path->nodes[0];
12659 int slot = path->slots[0];
12663 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12664 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12666 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12667 type = btrfs_chunk_type(path->nodes[0], chunk);
12668 length = btrfs_chunk_length(eb, chunk);
12670 if (err & REFERENCER_MISSING) {
12671 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12672 type, chunk_key.objectid, chunk_key.offset, length);
12674 error("fail to add block group item[%llu %llu]",
12675 chunk_key.offset, length);
12678 err &= ~REFERENCER_MISSING;
12679 printf("Added block group item[%llu %llu]\n",
12680 chunk_key.offset, length);
12689 * Check a chunk item.
12690 * Including checking all referred dev_extents and block group
12692 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12693 struct extent_buffer *eb, int slot)
12695 struct btrfs_root *extent_root = fs_info->extent_root;
12696 struct btrfs_root *dev_root = fs_info->dev_root;
12697 struct btrfs_path path;
12698 struct btrfs_key chunk_key;
12699 struct btrfs_key bg_key;
12700 struct btrfs_key devext_key;
12701 struct btrfs_chunk *chunk;
12702 struct extent_buffer *leaf;
12703 struct btrfs_block_group_item *bi;
12704 struct btrfs_block_group_item bg_item;
12705 struct btrfs_dev_extent *ptr;
12717 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12718 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12719 length = btrfs_chunk_length(eb, chunk);
12720 chunk_end = chunk_key.offset + length;
12721 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12724 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12726 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12729 type = btrfs_chunk_type(eb, chunk);
12731 bg_key.objectid = chunk_key.offset;
12732 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12733 bg_key.offset = length;
12735 btrfs_init_path(&path);
12736 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12739 "chunk[%llu %llu) did not find the related block group item",
12740 chunk_key.offset, chunk_end);
12741 err |= REFERENCER_MISSING;
12743 leaf = path.nodes[0];
12744 bi = btrfs_item_ptr(leaf, path.slots[0],
12745 struct btrfs_block_group_item);
12746 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12748 if (btrfs_block_group_flags(&bg_item) != type) {
12750 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12751 chunk_key.offset, chunk_end, type,
12752 btrfs_block_group_flags(&bg_item));
12753 err |= REFERENCER_MISSING;
12757 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12758 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12759 for (i = 0; i < num_stripes; i++) {
12760 btrfs_release_path(&path);
12761 btrfs_init_path(&path);
12762 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12763 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12764 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12766 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12769 goto not_match_dev;
12771 leaf = path.nodes[0];
12772 ptr = btrfs_item_ptr(leaf, path.slots[0],
12773 struct btrfs_dev_extent);
12774 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12775 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12776 if (objectid != chunk_key.objectid ||
12777 offset != chunk_key.offset ||
12778 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12779 goto not_match_dev;
12782 err |= BACKREF_MISSING;
12784 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12785 chunk_key.objectid, chunk_end, i);
12788 btrfs_release_path(&path);
12793 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12794 struct btrfs_root *root,
12795 struct btrfs_path *path)
12797 struct btrfs_key key;
12800 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12801 btrfs_release_path(path);
12802 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12808 ret = btrfs_del_item(trans, root, path);
12812 if (path->slots[0] == 0)
12813 btrfs_prev_leaf(root, path);
12818 error("failed to delete root %llu item[%llu, %u, %llu]",
12819 root->objectid, key.objectid, key.type, key.offset);
12821 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12822 root->objectid, key.objectid, key.type, key.offset);
12827 * Main entry function to check known items and update related accounting info
12829 static int check_leaf_items(struct btrfs_trans_handle *trans,
12830 struct btrfs_root *root, struct btrfs_path *path,
12831 struct node_refs *nrefs, int account_bytes)
12833 struct btrfs_fs_info *fs_info = root->fs_info;
12834 struct btrfs_key key;
12835 struct extent_buffer *eb;
12838 struct btrfs_extent_data_ref *dref;
12843 eb = path->nodes[0];
12844 slot = path->slots[0];
12845 if (slot >= btrfs_header_nritems(eb)) {
12847 error("empty leaf [%llu %u] root %llu", eb->start,
12848 root->fs_info->nodesize, root->objectid);
12854 btrfs_item_key_to_cpu(eb, &key, slot);
12858 case BTRFS_EXTENT_DATA_KEY:
12859 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12862 case BTRFS_BLOCK_GROUP_ITEM_KEY:
12863 ret = check_block_group_item(fs_info, eb, slot);
12865 ret & REFERENCER_MISSING)
12866 ret = delete_extent_tree_item(trans, root, path);
12869 case BTRFS_DEV_ITEM_KEY:
12870 ret = check_dev_item(fs_info, eb, slot);
12873 case BTRFS_CHUNK_ITEM_KEY:
12874 ret = check_chunk_item(fs_info, eb, slot);
12876 ret = repair_chunk_item(trans, root, path, ret);
12879 case BTRFS_DEV_EXTENT_KEY:
12880 ret = check_dev_extent_item(fs_info, eb, slot);
12883 case BTRFS_EXTENT_ITEM_KEY:
12884 case BTRFS_METADATA_ITEM_KEY:
12885 ret = check_extent_item(trans, fs_info, path);
12888 case BTRFS_EXTENT_CSUM_KEY:
12889 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12892 case BTRFS_TREE_BLOCK_REF_KEY:
12893 ret = check_tree_block_backref(fs_info, key.offset,
12896 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12897 ret = delete_extent_tree_item(trans, root, path);
12900 case BTRFS_EXTENT_DATA_REF_KEY:
12901 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12902 ret = check_extent_data_backref(fs_info,
12903 btrfs_extent_data_ref_root(eb, dref),
12904 btrfs_extent_data_ref_objectid(eb, dref),
12905 btrfs_extent_data_ref_offset(eb, dref),
12907 btrfs_extent_data_ref_count(eb, dref));
12909 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12910 ret = delete_extent_tree_item(trans, root, path);
12913 case BTRFS_SHARED_BLOCK_REF_KEY:
12914 ret = check_shared_block_backref(fs_info, key.offset,
12917 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12918 ret = delete_extent_tree_item(trans, root, path);
12921 case BTRFS_SHARED_DATA_REF_KEY:
12922 ret = check_shared_data_backref(fs_info, key.offset,
12925 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12926 ret = delete_extent_tree_item(trans, root, path);
12939 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
12942 * Low memory usage version check_chunks_and_extents.
12944 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12946 struct btrfs_trans_handle *trans = NULL;
12947 struct btrfs_path path;
12948 struct btrfs_key old_key;
12949 struct btrfs_key key;
12950 struct btrfs_root *root1;
12951 struct btrfs_root *root;
12952 struct btrfs_root *cur_root;
12956 root = fs_info->fs_root;
12959 /* pin every tree block to avoid extent overwrite */
12960 ret = pin_metadata_blocks(fs_info);
12962 error("failed to pin metadata blocks");
12965 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12966 if (IS_ERR(trans)) {
12967 error("failed to start transaction before check");
12968 return PTR_ERR(trans);
12972 root1 = root->fs_info->chunk_root;
12973 ret = check_btrfs_root(trans, root1, 0, 1);
12976 root1 = root->fs_info->tree_root;
12977 ret = check_btrfs_root(trans, root1, 0, 1);
12980 btrfs_init_path(&path);
12981 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12983 key.type = BTRFS_ROOT_ITEM_KEY;
12985 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12987 error("cannot find extent tree in tree_root");
12992 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12993 if (key.type != BTRFS_ROOT_ITEM_KEY)
12996 key.offset = (u64)-1;
12998 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12999 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13002 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13003 if (IS_ERR(cur_root) || !cur_root) {
13004 error("failed to read tree: %lld", key.objectid);
13008 ret = check_btrfs_root(trans, cur_root, 0, 1);
13011 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13012 btrfs_free_fs_root(cur_root);
13014 btrfs_release_path(&path);
13015 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13016 &old_key, &path, 0, 0);
13020 ret = btrfs_next_item(root1, &path);
13026 /* if repair, update block accounting */
13028 ret = btrfs_fix_block_accounting(trans, root);
13034 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13036 btrfs_release_path(&path);
13041 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13045 if (!ctx.progress_enabled)
13046 fprintf(stderr, "checking extents\n");
13047 if (check_mode == CHECK_MODE_LOWMEM)
13048 ret = check_chunks_and_extents_v2(fs_info);
13050 ret = check_chunks_and_extents(fs_info);
13055 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13056 struct btrfs_root *root, int overwrite)
13058 struct extent_buffer *c;
13059 struct extent_buffer *old = root->node;
13062 struct btrfs_disk_key disk_key = {0,0,0};
13068 extent_buffer_get(c);
13071 c = btrfs_alloc_free_block(trans, root,
13072 root->fs_info->nodesize,
13073 root->root_key.objectid,
13074 &disk_key, level, 0, 0);
13077 extent_buffer_get(c);
13081 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13082 btrfs_set_header_level(c, level);
13083 btrfs_set_header_bytenr(c, c->start);
13084 btrfs_set_header_generation(c, trans->transid);
13085 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13086 btrfs_set_header_owner(c, root->root_key.objectid);
13088 write_extent_buffer(c, root->fs_info->fsid,
13089 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13091 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13092 btrfs_header_chunk_tree_uuid(c),
13095 btrfs_mark_buffer_dirty(c);
13097 * this case can happen in the following case:
13099 * 1.overwrite previous root.
13101 * 2.reinit reloc data root, this is because we skip pin
13102 * down reloc data tree before which means we can allocate
13103 * same block bytenr here.
13105 if (old->start == c->start) {
13106 btrfs_set_root_generation(&root->root_item,
13108 root->root_item.level = btrfs_header_level(root->node);
13109 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13110 &root->root_key, &root->root_item);
13112 free_extent_buffer(c);
13116 free_extent_buffer(old);
13118 add_root_to_dirty_list(root);
13122 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13123 struct extent_buffer *eb, int tree_root)
13125 struct extent_buffer *tmp;
13126 struct btrfs_root_item *ri;
13127 struct btrfs_key key;
13129 int level = btrfs_header_level(eb);
13135 * If we have pinned this block before, don't pin it again.
13136 * This can not only avoid forever loop with broken filesystem
13137 * but also give us some speedups.
13139 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13140 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13143 btrfs_pin_extent(fs_info, eb->start, eb->len);
13145 nritems = btrfs_header_nritems(eb);
13146 for (i = 0; i < nritems; i++) {
13148 btrfs_item_key_to_cpu(eb, &key, i);
13149 if (key.type != BTRFS_ROOT_ITEM_KEY)
13151 /* Skip the extent root and reloc roots */
13152 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13153 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13154 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13156 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13157 bytenr = btrfs_disk_root_bytenr(eb, ri);
13160 * If at any point we start needing the real root we
13161 * will have to build a stump root for the root we are
13162 * in, but for now this doesn't actually use the root so
13163 * just pass in extent_root.
13165 tmp = read_tree_block(fs_info, bytenr, 0);
13166 if (!extent_buffer_uptodate(tmp)) {
13167 fprintf(stderr, "Error reading root block\n");
13170 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13171 free_extent_buffer(tmp);
13175 bytenr = btrfs_node_blockptr(eb, i);
13177 /* If we aren't the tree root don't read the block */
13178 if (level == 1 && !tree_root) {
13179 btrfs_pin_extent(fs_info, bytenr,
13180 fs_info->nodesize);
13184 tmp = read_tree_block(fs_info, bytenr, 0);
13185 if (!extent_buffer_uptodate(tmp)) {
13186 fprintf(stderr, "Error reading tree block\n");
13189 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13190 free_extent_buffer(tmp);
13199 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13203 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13207 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13210 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13212 struct btrfs_block_group_cache *cache;
13213 struct btrfs_path path;
13214 struct extent_buffer *leaf;
13215 struct btrfs_chunk *chunk;
13216 struct btrfs_key key;
13220 btrfs_init_path(&path);
13222 key.type = BTRFS_CHUNK_ITEM_KEY;
13224 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13226 btrfs_release_path(&path);
13231 * We do this in case the block groups were screwed up and had alloc
13232 * bits that aren't actually set on the chunks. This happens with
13233 * restored images every time and could happen in real life I guess.
13235 fs_info->avail_data_alloc_bits = 0;
13236 fs_info->avail_metadata_alloc_bits = 0;
13237 fs_info->avail_system_alloc_bits = 0;
13239 /* First we need to create the in-memory block groups */
13241 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13242 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13244 btrfs_release_path(&path);
13252 leaf = path.nodes[0];
13253 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13254 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13259 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13260 btrfs_add_block_group(fs_info, 0,
13261 btrfs_chunk_type(leaf, chunk),
13262 key.objectid, key.offset,
13263 btrfs_chunk_length(leaf, chunk));
13264 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13265 key.offset + btrfs_chunk_length(leaf, chunk));
13270 cache = btrfs_lookup_first_block_group(fs_info, start);
13274 start = cache->key.objectid + cache->key.offset;
13277 btrfs_release_path(&path);
13281 static int reset_balance(struct btrfs_trans_handle *trans,
13282 struct btrfs_fs_info *fs_info)
13284 struct btrfs_root *root = fs_info->tree_root;
13285 struct btrfs_path path;
13286 struct extent_buffer *leaf;
13287 struct btrfs_key key;
13288 int del_slot, del_nr = 0;
13292 btrfs_init_path(&path);
13293 key.objectid = BTRFS_BALANCE_OBJECTID;
13294 key.type = BTRFS_BALANCE_ITEM_KEY;
13296 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13301 goto reinit_data_reloc;
13306 ret = btrfs_del_item(trans, root, &path);
13309 btrfs_release_path(&path);
13311 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13312 key.type = BTRFS_ROOT_ITEM_KEY;
13314 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13318 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13323 ret = btrfs_del_items(trans, root, &path,
13330 btrfs_release_path(&path);
13333 ret = btrfs_search_slot(trans, root, &key, &path,
13340 leaf = path.nodes[0];
13341 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13342 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13344 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13349 del_slot = path.slots[0];
13358 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13362 btrfs_release_path(&path);
13365 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13366 key.type = BTRFS_ROOT_ITEM_KEY;
13367 key.offset = (u64)-1;
13368 root = btrfs_read_fs_root(fs_info, &key);
13369 if (IS_ERR(root)) {
13370 fprintf(stderr, "Error reading data reloc tree\n");
13371 ret = PTR_ERR(root);
13374 record_root_in_trans(trans, root);
13375 ret = btrfs_fsck_reinit_root(trans, root, 0);
13378 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13380 btrfs_release_path(&path);
13384 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13385 struct btrfs_fs_info *fs_info)
13391 * The only reason we don't do this is because right now we're just
13392 * walking the trees we find and pinning down their bytes, we don't look
13393 * at any of the leaves. In order to do mixed groups we'd have to check
13394 * the leaves of any fs roots and pin down the bytes for any file
13395 * extents we find. Not hard but why do it if we don't have to?
13397 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13398 fprintf(stderr, "We don't support re-initing the extent tree "
13399 "for mixed block groups yet, please notify a btrfs "
13400 "developer you want to do this so they can add this "
13401 "functionality.\n");
13406 * first we need to walk all of the trees except the extent tree and pin
13407 * down the bytes that are in use so we don't overwrite any existing
13410 ret = pin_metadata_blocks(fs_info);
13412 fprintf(stderr, "error pinning down used bytes\n");
13417 * Need to drop all the block groups since we're going to recreate all
13420 btrfs_free_block_groups(fs_info);
13421 ret = reset_block_groups(fs_info);
13423 fprintf(stderr, "error resetting the block groups\n");
13427 /* Ok we can allocate now, reinit the extent root */
13428 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13430 fprintf(stderr, "extent root initialization failed\n");
13432 * When the transaction code is updated we should end the
13433 * transaction, but for now progs only knows about commit so
13434 * just return an error.
13440 * Now we have all the in-memory block groups setup so we can make
13441 * allocations properly, and the metadata we care about is safe since we
13442 * pinned all of it above.
13445 struct btrfs_block_group_cache *cache;
13447 cache = btrfs_lookup_first_block_group(fs_info, start);
13450 start = cache->key.objectid + cache->key.offset;
13451 ret = btrfs_insert_item(trans, fs_info->extent_root,
13452 &cache->key, &cache->item,
13453 sizeof(cache->item));
13455 fprintf(stderr, "Error adding block group\n");
13458 btrfs_extent_post_op(trans, fs_info->extent_root);
13461 ret = reset_balance(trans, fs_info);
13463 fprintf(stderr, "error resetting the pending balance\n");
13468 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13470 struct btrfs_path path;
13471 struct btrfs_trans_handle *trans;
13472 struct btrfs_key key;
13475 printf("Recowing metadata block %llu\n", eb->start);
13476 key.objectid = btrfs_header_owner(eb);
13477 key.type = BTRFS_ROOT_ITEM_KEY;
13478 key.offset = (u64)-1;
13480 root = btrfs_read_fs_root(root->fs_info, &key);
13481 if (IS_ERR(root)) {
13482 fprintf(stderr, "Couldn't find owner root %llu\n",
13484 return PTR_ERR(root);
13487 trans = btrfs_start_transaction(root, 1);
13489 return PTR_ERR(trans);
13491 btrfs_init_path(&path);
13492 path.lowest_level = btrfs_header_level(eb);
13493 if (path.lowest_level)
13494 btrfs_node_key_to_cpu(eb, &key, 0);
13496 btrfs_item_key_to_cpu(eb, &key, 0);
13498 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13499 btrfs_commit_transaction(trans, root);
13500 btrfs_release_path(&path);
13504 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13506 struct btrfs_path path;
13507 struct btrfs_trans_handle *trans;
13508 struct btrfs_key key;
13511 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13512 bad->key.type, bad->key.offset);
13513 key.objectid = bad->root_id;
13514 key.type = BTRFS_ROOT_ITEM_KEY;
13515 key.offset = (u64)-1;
13517 root = btrfs_read_fs_root(root->fs_info, &key);
13518 if (IS_ERR(root)) {
13519 fprintf(stderr, "Couldn't find owner root %llu\n",
13521 return PTR_ERR(root);
13524 trans = btrfs_start_transaction(root, 1);
13526 return PTR_ERR(trans);
13528 btrfs_init_path(&path);
13529 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13535 ret = btrfs_del_item(trans, root, &path);
13537 btrfs_commit_transaction(trans, root);
13538 btrfs_release_path(&path);
13542 static int zero_log_tree(struct btrfs_root *root)
13544 struct btrfs_trans_handle *trans;
13547 trans = btrfs_start_transaction(root, 1);
13548 if (IS_ERR(trans)) {
13549 ret = PTR_ERR(trans);
13552 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13553 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13554 ret = btrfs_commit_transaction(trans, root);
13558 static int populate_csum(struct btrfs_trans_handle *trans,
13559 struct btrfs_root *csum_root, char *buf, u64 start,
13562 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13567 while (offset < len) {
13568 sectorsize = fs_info->sectorsize;
13569 ret = read_extent_data(fs_info, buf, start + offset,
13573 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13574 start + offset, buf, sectorsize);
13577 offset += sectorsize;
13582 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13583 struct btrfs_root *csum_root,
13584 struct btrfs_root *cur_root)
13586 struct btrfs_path path;
13587 struct btrfs_key key;
13588 struct extent_buffer *node;
13589 struct btrfs_file_extent_item *fi;
13596 buf = malloc(cur_root->fs_info->sectorsize);
13600 btrfs_init_path(&path);
13604 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13607 /* Iterate all regular file extents and fill its csum */
13609 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13611 if (key.type != BTRFS_EXTENT_DATA_KEY)
13613 node = path.nodes[0];
13614 slot = path.slots[0];
13615 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13616 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13618 start = btrfs_file_extent_disk_bytenr(node, fi);
13619 len = btrfs_file_extent_disk_num_bytes(node, fi);
13621 ret = populate_csum(trans, csum_root, buf, start, len);
13622 if (ret == -EEXIST)
13628 * TODO: if next leaf is corrupted, jump to nearest next valid
13631 ret = btrfs_next_item(cur_root, &path);
13641 btrfs_release_path(&path);
13646 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13647 struct btrfs_root *csum_root)
13649 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13650 struct btrfs_path path;
13651 struct btrfs_root *tree_root = fs_info->tree_root;
13652 struct btrfs_root *cur_root;
13653 struct extent_buffer *node;
13654 struct btrfs_key key;
13658 btrfs_init_path(&path);
13659 key.objectid = BTRFS_FS_TREE_OBJECTID;
13661 key.type = BTRFS_ROOT_ITEM_KEY;
13662 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13671 node = path.nodes[0];
13672 slot = path.slots[0];
13673 btrfs_item_key_to_cpu(node, &key, slot);
13674 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13676 if (key.type != BTRFS_ROOT_ITEM_KEY)
13678 if (!is_fstree(key.objectid))
13680 key.offset = (u64)-1;
13682 cur_root = btrfs_read_fs_root(fs_info, &key);
13683 if (IS_ERR(cur_root) || !cur_root) {
13684 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13688 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13693 ret = btrfs_next_item(tree_root, &path);
13703 btrfs_release_path(&path);
13707 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13708 struct btrfs_root *csum_root)
13710 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13711 struct btrfs_path path;
13712 struct btrfs_extent_item *ei;
13713 struct extent_buffer *leaf;
13715 struct btrfs_key key;
13718 btrfs_init_path(&path);
13720 key.type = BTRFS_EXTENT_ITEM_KEY;
13722 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13724 btrfs_release_path(&path);
13728 buf = malloc(csum_root->fs_info->sectorsize);
13730 btrfs_release_path(&path);
13735 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13736 ret = btrfs_next_leaf(extent_root, &path);
13744 leaf = path.nodes[0];
13746 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13747 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13752 ei = btrfs_item_ptr(leaf, path.slots[0],
13753 struct btrfs_extent_item);
13754 if (!(btrfs_extent_flags(leaf, ei) &
13755 BTRFS_EXTENT_FLAG_DATA)) {
13760 ret = populate_csum(trans, csum_root, buf, key.objectid,
13767 btrfs_release_path(&path);
13773 * Recalculate the csum and put it into the csum tree.
13775 * Extent tree init will wipe out all the extent info, so in that case, we
13776 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
13777 * will use fs/subvol trees to init the csum tree.
13779 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13780 struct btrfs_root *csum_root,
13781 int search_fs_tree)
13783 if (search_fs_tree)
13784 return fill_csum_tree_from_fs(trans, csum_root);
13786 return fill_csum_tree_from_extent(trans, csum_root);
13789 static void free_roots_info_cache(void)
13791 if (!roots_info_cache)
13794 while (!cache_tree_empty(roots_info_cache)) {
13795 struct cache_extent *entry;
13796 struct root_item_info *rii;
13798 entry = first_cache_extent(roots_info_cache);
13801 remove_cache_extent(roots_info_cache, entry);
13802 rii = container_of(entry, struct root_item_info, cache_extent);
13806 free(roots_info_cache);
13807 roots_info_cache = NULL;
13810 static int build_roots_info_cache(struct btrfs_fs_info *info)
13813 struct btrfs_key key;
13814 struct extent_buffer *leaf;
13815 struct btrfs_path path;
13817 if (!roots_info_cache) {
13818 roots_info_cache = malloc(sizeof(*roots_info_cache));
13819 if (!roots_info_cache)
13821 cache_tree_init(roots_info_cache);
13824 btrfs_init_path(&path);
13826 key.type = BTRFS_EXTENT_ITEM_KEY;
13828 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13831 leaf = path.nodes[0];
13834 struct btrfs_key found_key;
13835 struct btrfs_extent_item *ei;
13836 struct btrfs_extent_inline_ref *iref;
13837 int slot = path.slots[0];
13842 struct cache_extent *entry;
13843 struct root_item_info *rii;
13845 if (slot >= btrfs_header_nritems(leaf)) {
13846 ret = btrfs_next_leaf(info->extent_root, &path);
13853 leaf = path.nodes[0];
13854 slot = path.slots[0];
13857 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13859 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13860 found_key.type != BTRFS_METADATA_ITEM_KEY)
13863 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13864 flags = btrfs_extent_flags(leaf, ei);
13866 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13867 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13870 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13871 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13872 level = found_key.offset;
13874 struct btrfs_tree_block_info *binfo;
13876 binfo = (struct btrfs_tree_block_info *)(ei + 1);
13877 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13878 level = btrfs_tree_block_level(leaf, binfo);
13882 * For a root extent, it must be of the following type and the
13883 * first (and only one) iref in the item.
13885 type = btrfs_extent_inline_ref_type(leaf, iref);
13886 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13889 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13890 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13892 rii = malloc(sizeof(struct root_item_info));
13897 rii->cache_extent.start = root_id;
13898 rii->cache_extent.size = 1;
13899 rii->level = (u8)-1;
13900 entry = &rii->cache_extent;
13901 ret = insert_cache_extent(roots_info_cache, entry);
13904 rii = container_of(entry, struct root_item_info,
13908 ASSERT(rii->cache_extent.start == root_id);
13909 ASSERT(rii->cache_extent.size == 1);
13911 if (level > rii->level || rii->level == (u8)-1) {
13912 rii->level = level;
13913 rii->bytenr = found_key.objectid;
13914 rii->gen = btrfs_extent_generation(leaf, ei);
13915 rii->node_count = 1;
13916 } else if (level == rii->level) {
13924 btrfs_release_path(&path);
13929 static int maybe_repair_root_item(struct btrfs_path *path,
13930 const struct btrfs_key *root_key,
13931 const int read_only_mode)
13933 const u64 root_id = root_key->objectid;
13934 struct cache_extent *entry;
13935 struct root_item_info *rii;
13936 struct btrfs_root_item ri;
13937 unsigned long offset;
13939 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13942 "Error: could not find extent items for root %llu\n",
13943 root_key->objectid);
13947 rii = container_of(entry, struct root_item_info, cache_extent);
13948 ASSERT(rii->cache_extent.start == root_id);
13949 ASSERT(rii->cache_extent.size == 1);
13951 if (rii->node_count != 1) {
13953 "Error: could not find btree root extent for root %llu\n",
13958 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13959 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13961 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13962 btrfs_root_level(&ri) != rii->level ||
13963 btrfs_root_generation(&ri) != rii->gen) {
13966 * If we're in repair mode but our caller told us to not update
13967 * the root item, i.e. just check if it needs to be updated, don't
13968 * print this message, since the caller will call us again shortly
13969 * for the same root item without read only mode (the caller will
13970 * open a transaction first).
13972 if (!(read_only_mode && repair))
13974 "%sroot item for root %llu,"
13975 " current bytenr %llu, current gen %llu, current level %u,"
13976 " new bytenr %llu, new gen %llu, new level %u\n",
13977 (read_only_mode ? "" : "fixing "),
13979 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13980 btrfs_root_level(&ri),
13981 rii->bytenr, rii->gen, rii->level);
13983 if (btrfs_root_generation(&ri) > rii->gen) {
13985 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13986 root_id, btrfs_root_generation(&ri), rii->gen);
13990 if (!read_only_mode) {
13991 btrfs_set_root_bytenr(&ri, rii->bytenr);
13992 btrfs_set_root_level(&ri, rii->level);
13993 btrfs_set_root_generation(&ri, rii->gen);
13994 write_extent_buffer(path->nodes[0], &ri,
13995 offset, sizeof(ri));
14005 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14006 * caused read-only snapshots to be corrupted if they were created at a moment
14007 * when the source subvolume/snapshot had orphan items. The issue was that the
14008 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14009 * node instead of the post orphan cleanup root node.
14010 * So this function, and its callees, just detects and fixes those cases. Even
14011 * though the regression was for read-only snapshots, this function applies to
14012 * any snapshot/subvolume root.
14013 * This must be run before any other repair code - not doing it so, makes other
14014 * repair code delete or modify backrefs in the extent tree for example, which
14015 * will result in an inconsistent fs after repairing the root items.
14017 static int repair_root_items(struct btrfs_fs_info *info)
14019 struct btrfs_path path;
14020 struct btrfs_key key;
14021 struct extent_buffer *leaf;
14022 struct btrfs_trans_handle *trans = NULL;
14025 int need_trans = 0;
14027 btrfs_init_path(&path);
14029 ret = build_roots_info_cache(info);
14033 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14034 key.type = BTRFS_ROOT_ITEM_KEY;
14039 * Avoid opening and committing transactions if a leaf doesn't have
14040 * any root items that need to be fixed, so that we avoid rotating
14041 * backup roots unnecessarily.
14044 trans = btrfs_start_transaction(info->tree_root, 1);
14045 if (IS_ERR(trans)) {
14046 ret = PTR_ERR(trans);
14051 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14055 leaf = path.nodes[0];
14058 struct btrfs_key found_key;
14060 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14061 int no_more_keys = find_next_key(&path, &key);
14063 btrfs_release_path(&path);
14065 ret = btrfs_commit_transaction(trans,
14077 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14079 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14081 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14084 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14088 if (!trans && repair) {
14091 btrfs_release_path(&path);
14101 free_roots_info_cache();
14102 btrfs_release_path(&path);
14104 btrfs_commit_transaction(trans, info->tree_root);
14111 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14113 struct btrfs_trans_handle *trans;
14114 struct btrfs_block_group_cache *bg_cache;
14118 /* Clear all free space cache inodes and its extent data */
14120 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14123 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14126 current = bg_cache->key.objectid + bg_cache->key.offset;
14129 /* Don't forget to set cache_generation to -1 */
14130 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14131 if (IS_ERR(trans)) {
14132 error("failed to update super block cache generation");
14133 return PTR_ERR(trans);
14135 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14136 btrfs_commit_transaction(trans, fs_info->tree_root);
14141 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14146 if (clear_version == 1) {
14147 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14149 "free space cache v2 detected, use --clear-space-cache v2");
14153 printf("Clearing free space cache\n");
14154 ret = clear_free_space_cache(fs_info);
14156 error("failed to clear free space cache");
14159 printf("Free space cache cleared\n");
14161 } else if (clear_version == 2) {
14162 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14163 printf("no free space cache v2 to clear\n");
14167 printf("Clear free space cache v2\n");
14168 ret = btrfs_clear_free_space_tree(fs_info);
14170 error("failed to clear free space cache v2: %d", ret);
14173 printf("free space cache v2 cleared\n");
14180 const char * const cmd_check_usage[] = {
14181 "btrfs check [options] <device>",
14182 "Check structural integrity of a filesystem (unmounted).",
14183 "Check structural integrity of an unmounted filesystem. Verify internal",
14184 "trees' consistency and item connectivity. In the repair mode try to",
14185 "fix the problems found. ",
14186 "WARNING: the repair mode is considered dangerous",
14188 "-s|--super <superblock> use this superblock copy",
14189 "-b|--backup use the first valid backup root copy",
14190 "--force skip mount checks, repair is not possible",
14191 "--repair try to repair the filesystem",
14192 "--readonly run in read-only mode (default)",
14193 "--init-csum-tree create a new CRC tree",
14194 "--init-extent-tree create a new extent tree",
14195 "--mode <MODE> allows choice of memory/IO trade-offs",
14196 " where MODE is one of:",
14197 " original - read inodes and extents to memory (requires",
14198 " more memory, does less IO)",
14199 " lowmem - try to use less memory but read blocks again",
14201 "--check-data-csum verify checksums of data blocks",
14202 "-Q|--qgroup-report print a report on qgroup consistency",
14203 "-E|--subvol-extents <subvolid>",
14204 " print subvolume extents and sharing state",
14205 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14206 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14207 "-p|--progress indicate progress",
14208 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14212 int cmd_check(int argc, char **argv)
14214 struct cache_tree root_cache;
14215 struct btrfs_root *root;
14216 struct btrfs_fs_info *info;
14219 u64 tree_root_bytenr = 0;
14220 u64 chunk_root_bytenr = 0;
14221 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14225 int init_csum_tree = 0;
14227 int clear_space_cache = 0;
14228 int qgroup_report = 0;
14229 int qgroups_repaired = 0;
14230 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14235 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14236 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14237 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14238 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14239 GETOPT_VAL_FORCE };
14240 static const struct option long_options[] = {
14241 { "super", required_argument, NULL, 's' },
14242 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14243 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14244 { "init-csum-tree", no_argument, NULL,
14245 GETOPT_VAL_INIT_CSUM },
14246 { "init-extent-tree", no_argument, NULL,
14247 GETOPT_VAL_INIT_EXTENT },
14248 { "check-data-csum", no_argument, NULL,
14249 GETOPT_VAL_CHECK_CSUM },
14250 { "backup", no_argument, NULL, 'b' },
14251 { "subvol-extents", required_argument, NULL, 'E' },
14252 { "qgroup-report", no_argument, NULL, 'Q' },
14253 { "tree-root", required_argument, NULL, 'r' },
14254 { "chunk-root", required_argument, NULL,
14255 GETOPT_VAL_CHUNK_TREE },
14256 { "progress", no_argument, NULL, 'p' },
14257 { "mode", required_argument, NULL,
14259 { "clear-space-cache", required_argument, NULL,
14260 GETOPT_VAL_CLEAR_SPACE_CACHE},
14261 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14262 { NULL, 0, NULL, 0}
14265 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14269 case 'a': /* ignored */ break;
14271 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14274 num = arg_strtou64(optarg);
14275 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14277 "super mirror should be less than %d",
14278 BTRFS_SUPER_MIRROR_MAX);
14281 bytenr = btrfs_sb_offset(((int)num));
14282 printf("using SB copy %llu, bytenr %llu\n", num,
14283 (unsigned long long)bytenr);
14289 subvolid = arg_strtou64(optarg);
14292 tree_root_bytenr = arg_strtou64(optarg);
14294 case GETOPT_VAL_CHUNK_TREE:
14295 chunk_root_bytenr = arg_strtou64(optarg);
14298 ctx.progress_enabled = true;
14302 usage(cmd_check_usage);
14303 case GETOPT_VAL_REPAIR:
14304 printf("enabling repair mode\n");
14306 ctree_flags |= OPEN_CTREE_WRITES;
14308 case GETOPT_VAL_READONLY:
14311 case GETOPT_VAL_INIT_CSUM:
14312 printf("Creating a new CRC tree\n");
14313 init_csum_tree = 1;
14315 ctree_flags |= OPEN_CTREE_WRITES;
14317 case GETOPT_VAL_INIT_EXTENT:
14318 init_extent_tree = 1;
14319 ctree_flags |= (OPEN_CTREE_WRITES |
14320 OPEN_CTREE_NO_BLOCK_GROUPS);
14323 case GETOPT_VAL_CHECK_CSUM:
14324 check_data_csum = 1;
14326 case GETOPT_VAL_MODE:
14327 check_mode = parse_check_mode(optarg);
14328 if (check_mode == CHECK_MODE_UNKNOWN) {
14329 error("unknown mode: %s", optarg);
14333 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14334 if (strcmp(optarg, "v1") == 0) {
14335 clear_space_cache = 1;
14336 } else if (strcmp(optarg, "v2") == 0) {
14337 clear_space_cache = 2;
14338 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14341 "invalid argument to --clear-space-cache, must be v1 or v2");
14344 ctree_flags |= OPEN_CTREE_WRITES;
14346 case GETOPT_VAL_FORCE:
14352 if (check_argc_exact(argc - optind, 1))
14353 usage(cmd_check_usage);
14355 if (ctx.progress_enabled) {
14356 ctx.tp = TASK_NOTHING;
14357 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14360 /* This check is the only reason for --readonly to exist */
14361 if (readonly && repair) {
14362 error("repair options are not compatible with --readonly");
14367 * experimental and dangerous
14369 if (repair && check_mode == CHECK_MODE_LOWMEM)
14370 warning("low-memory mode repair support is only partial");
14373 cache_tree_init(&root_cache);
14375 ret = check_mounted(argv[optind]);
14378 error("could not check mount status: %s",
14384 "%s is currently mounted, use --force if you really intend to check the filesystem",
14392 error("repair and --force is not yet supported");
14399 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14403 "filesystem mounted, continuing because of --force");
14405 /* A block device is mounted in exclusive mode by kernel */
14406 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14409 /* only allow partial opening under repair mode */
14411 ctree_flags |= OPEN_CTREE_PARTIAL;
14413 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14414 chunk_root_bytenr, ctree_flags);
14416 error("cannot open file system");
14422 global_info = info;
14423 root = info->fs_root;
14424 uuid_unparse(info->super_copy->fsid, uuidbuf);
14426 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14429 * Check the bare minimum before starting anything else that could rely
14430 * on it, namely the tree roots, any local consistency checks
14432 if (!extent_buffer_uptodate(info->tree_root->node) ||
14433 !extent_buffer_uptodate(info->dev_root->node) ||
14434 !extent_buffer_uptodate(info->chunk_root->node)) {
14435 error("critical roots corrupted, unable to check the filesystem");
14441 if (clear_space_cache) {
14442 ret = do_clear_free_space_cache(info, clear_space_cache);
14448 * repair mode will force us to commit transaction which
14449 * will make us fail to load log tree when mounting.
14451 if (repair && btrfs_super_log_root(info->super_copy)) {
14452 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14458 ret = zero_log_tree(root);
14461 error("failed to zero log tree: %d", ret);
14466 if (qgroup_report) {
14467 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14469 ret = qgroup_verify_all(info);
14476 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14477 subvolid, argv[optind], uuidbuf);
14478 ret = print_extent_state(info, subvolid);
14483 if (init_extent_tree || init_csum_tree) {
14484 struct btrfs_trans_handle *trans;
14486 trans = btrfs_start_transaction(info->extent_root, 0);
14487 if (IS_ERR(trans)) {
14488 error("error starting transaction");
14489 ret = PTR_ERR(trans);
14494 if (init_extent_tree) {
14495 printf("Creating a new extent tree\n");
14496 ret = reinit_extent_tree(trans, info);
14502 if (init_csum_tree) {
14503 printf("Reinitialize checksum tree\n");
14504 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14506 error("checksum tree initialization failed: %d",
14513 ret = fill_csum_tree(trans, info->csum_root,
14517 error("checksum tree refilling failed: %d", ret);
14522 * Ok now we commit and run the normal fsck, which will add
14523 * extent entries for all of the items it finds.
14525 ret = btrfs_commit_transaction(trans, info->extent_root);
14530 if (!extent_buffer_uptodate(info->extent_root->node)) {
14531 error("critical: extent_root, unable to check the filesystem");
14536 if (!extent_buffer_uptodate(info->csum_root->node)) {
14537 error("critical: csum_root, unable to check the filesystem");
14543 ret = do_check_chunks_and_extents(info);
14547 "errors found in extent allocation tree or chunk allocation");
14549 ret = repair_root_items(info);
14552 error("failed to repair root items: %s", strerror(-ret));
14556 fprintf(stderr, "Fixed %d roots.\n", ret);
14558 } else if (ret > 0) {
14560 "Found %d roots with an outdated root item.\n",
14563 "Please run a filesystem check with the option --repair to fix them.\n");
14569 if (!ctx.progress_enabled) {
14570 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14571 fprintf(stderr, "checking free space tree\n");
14573 fprintf(stderr, "checking free space cache\n");
14575 ret = check_space_cache(root);
14578 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14579 error("errors found in free space tree");
14581 error("errors found in free space cache");
14586 * We used to have to have these hole extents in between our real
14587 * extents so if we don't have this flag set we need to make sure there
14588 * are no gaps in the file extents for inodes, otherwise we can just
14589 * ignore it when this happens.
14591 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14592 ret = do_check_fs_roots(info, &root_cache);
14595 error("errors found in fs roots");
14599 fprintf(stderr, "checking csums\n");
14600 ret = check_csums(root);
14603 error("errors found in csum tree");
14607 fprintf(stderr, "checking root refs\n");
14608 /* For low memory mode, check_fs_roots_v2 handles root refs */
14609 if (check_mode != CHECK_MODE_LOWMEM) {
14610 ret = check_root_refs(root, &root_cache);
14613 error("errors found in root refs");
14618 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14619 struct extent_buffer *eb;
14621 eb = list_first_entry(&root->fs_info->recow_ebs,
14622 struct extent_buffer, recow);
14623 list_del_init(&eb->recow);
14624 ret = recow_extent_buffer(root, eb);
14627 error("fails to fix transid errors");
14632 while (!list_empty(&delete_items)) {
14633 struct bad_item *bad;
14635 bad = list_first_entry(&delete_items, struct bad_item, list);
14636 list_del_init(&bad->list);
14638 ret = delete_bad_item(root, bad);
14644 if (info->quota_enabled) {
14645 fprintf(stderr, "checking quota groups\n");
14646 ret = qgroup_verify_all(info);
14649 error("failed to check quota groups");
14653 ret = repair_qgroups(info, &qgroups_repaired);
14656 error("failed to repair quota groups");
14662 if (!list_empty(&root->fs_info->recow_ebs)) {
14663 error("transid errors in file system");
14668 printf("found %llu bytes used, ",
14669 (unsigned long long)bytes_used);
14671 printf("error(s) found\n");
14673 printf("no error found\n");
14674 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14675 printf("total tree bytes: %llu\n",
14676 (unsigned long long)total_btree_bytes);
14677 printf("total fs tree bytes: %llu\n",
14678 (unsigned long long)total_fs_tree_bytes);
14679 printf("total extent tree bytes: %llu\n",
14680 (unsigned long long)total_extent_tree_bytes);
14681 printf("btree space waste bytes: %llu\n",
14682 (unsigned long long)btree_space_waste);
14683 printf("file data blocks allocated: %llu\n referenced %llu\n",
14684 (unsigned long long)data_bytes_allocated,
14685 (unsigned long long)data_bytes_referenced);
14687 free_qgroup_counts();
14688 free_root_recs_tree(&root_cache);
14692 if (ctx.progress_enabled)
14693 task_deinit(ctx.info);